[Opentrep-svn] SF.net SVN: opentrep:[160] trunk/opentrep
Status: Beta
Brought to you by:
denis_arnaud
From: <den...@us...> - 2009-07-23 05:13:33
|
Revision: 160 http://opentrep.svn.sourceforge.net/opentrep/?rev=160&view=rev Author: denis_arnaud Date: 2009-07-23 05:13:30 +0000 (Thu, 23 Jul 2009) Log Message: ----------- [Dev] Second attempt to fix a matching bug. Modified Paths: -------------- trunk/opentrep/db/data/ref_place_names.csv trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp Modified: trunk/opentrep/db/data/ref_place_names.csv =================================================================== --- trunk/opentrep/db/data/ref_place_names.csv 2009-07-23 04:35:16 UTC (rev 159) +++ trunk/opentrep/db/data/ref_place_names.csv 2009-07-23 05:13:30 UTC (rev 160) @@ -3550,7 +3550,7 @@ en,cdd,cauquira,cauquira,cauquira/hn:cauquira airport en,cde,caledonia,caledonia,caledonia/pa en,cdf,cortina d'ampezzo,cortina d'ampez,cortina d'ampezzo/it:fiames -en,cdg,paris cdg,paris cdg,paris/fr:charles de gaulle,cdg,cdg +en,cdg,paris cdg,paris cdg,paris/fr:charles de gaulle,cdg,cdg,charles de gaulle en,cdh,camden,camden,camden/ar/us:harrell fld en,cdi,cachoeiro,cachoeiro,cachoeiro de i/es/br:cachoeiro en,cdj,conceicao do arag,conceicao do ar,conceicao do arag/pa/br Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-23 04:35:16 UTC (rev 159) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-23 05:13:30 UTC (rev 160) @@ -68,30 +68,53 @@ } // ////////////////////////////////////////////////////////////////////// - std::string ResultHolder:: - searchString (Xapian::MSet& ioMatchingSet, - TravelQuery_T& ioPartialQueryString, - NbOfErrors_T& ioMaxEditDistance, - bool ioHasReachedMaximalAllowableEditDistance, - Document& ioMatchingDocument) { + std::string ResultHolder::searchString (Xapian::MSet& ioMatchingSet, + TravelQuery_T& ioPartialQueryString, + Document& ioMatchingDocument) { std::string oMatchedString; // Catch any Xapian::Error exceptions thrown try { + /** + The query string must first be checked, without allowing any + spelling errors, but by removing the furthest right word at + every step. + <br>If no match is found, the maximal allowable edit + distance/error becomes 1, and the process (trying to match + the whole sentence, then by removing the furthest right word, + etc.) is re-performed. + <br>If no match is found, the maximal allowable edit + distance/error becomes 2. + <br>And so on until the maximum of the edit distance/error + becomes greater than the maximal allowable distance/error. + reached. + */ + NbOfErrors_T lMaxEditDistance = 0; + bool hasReachedMaximalAllowableEditDistance = false; bool shouldStop = false; while (shouldStop == false) { - // Retrieve the list of documents matching the query string + + // DEBUG + OPENTREP_LOG_DEBUG ("--------"); + OPENTREP_LOG_DEBUG ("Current query string: `" + << ioPartialQueryString + << "', with a maximal edit distance of " + << lMaxEditDistance << "."); + + // Retrieve the list of Xapian documents matching the query string oMatchedString = StringMatcher::searchString (ioMatchingSet, ioPartialQueryString, - ioMaxEditDistance, - ioHasReachedMaximalAllowableEditDistance, + lMaxEditDistance, + hasReachedMaximalAllowableEditDistance, _database); // DEBUG - OPENTREP_LOG_DEBUG ("Current initial query string: `" + OPENTREP_LOG_DEBUG ("---- Current query string: `" << ioPartialQueryString << "' --- Kept query: `" - << oMatchedString << "' for " + << oMatchedString + << "', with a maximal edit distance of " + << lMaxEditDistance << ", for " << ioMatchingSet.size() << " matches."); if (ioMatchingSet.empty() == false) { @@ -106,22 +129,15 @@ break; } - // Since the query, as is, yield no match, the furthest right - // word must be removed from the query string. - StringMatcher::removeFurthestRightWord (ioPartialQueryString); - + // Allow for one more spelling error + ++lMaxEditDistance; + /** - Stop when the resulting string gets empty. - - <br>Note that whether maximal allowable edit distance/error - has been reached is not checked at that stage. That - algorithm is performed independently for each level of - maximal allowable edit distance/error. Only the caller - (below) retriggers this process by changing the level of - maximal allowable edit distance/error, until that latter be - reached. + Stop when it is no longer necessary to increase the maximal + allowable edit distance, as it is already greater than the + maximum of the calculated edit distance. */ - if (ioPartialQueryString.empty() == true) { + if (hasReachedMaximalAllowableEditDistance == true) { shouldStop = true; } } @@ -141,39 +157,26 @@ // Catch any Xapian::Error exceptions thrown try { - bool shouldStop = false; - NbOfErrors_T lMaxEditDistance = 0; - /** - The query string must first be checked, without allowing any - spelling errors, but by removing the furthest right word at - every step. - <br>If no match is found, the maximal allowable edit - distance/error becomes 1, and the process (trying to match - the whole sentence, then by removing the furthest right word, - etc.) is re-performed. - <br>If no match is found, the maximal allowable edit - distance/error becomes 2. - <br>And so on until the maximum of the edit distance/error - becomes greater than the maximal allowable distance/error. - reached. + A copy of the query is made, as that copy will be altered by + the below process, whereas a clean copy needs to be reprocessed + for each level of maximal edit distance/error. + <br>However, in case of match, the modifications on the query + string (lPartialQueryString) must be replicated on the + original one (ioPartialQueryString). */ + TravelQuery_T lPartialQueryString (ioPartialQueryString); + + bool shouldStop = false; while (shouldStop == false) { - /** - A copy of the query is made, as that copy will be altered by - the below process, whereas a clean copy needs to be reprocessed - for each level of maximal edit distance/error. - <br>However, at the end, the modifications on the query - string (lPartialQueryString) must be replicated on the - original one (ioPartialQueryString). - */ - TravelQuery_T lPartialQueryString (ioPartialQueryString); + // DEBUG + OPENTREP_LOG_DEBUG ("----------------"); + OPENTREP_LOG_DEBUG ("Current query string: `" + << lPartialQueryString << "'"); + Xapian::MSet lMatchingSet; - bool hasReachedMaximalAllowableEditDistance = false; oMatchedString = searchString (lMatchingSet, lPartialQueryString, - lMaxEditDistance, - hasReachedMaximalAllowableEditDistance, ioMatchingDocument); if (oMatchedString.empty() == false) { @@ -189,17 +192,28 @@ break; } - // Allow for one more spelling error - ++lMaxEditDistance; + // Since the query, as is, yields no match, the furthest right + // word must be removed from the query string. + StringMatcher::removeFurthestRightWord (lPartialQueryString); + + /** + Stop when the resulting string gets empty. - /** - Stop when it is no longer necessary to increase the maximal - allowable edit distance, as it is already greater than the - maximum of the calculated edit distance. + <br>Note that whether maximal allowable edit distance/error + has been reached is not checked at that stage. That + algorithm is performed independently for each level of + maximal allowable edit distance/error. Only the caller + (below) retriggers this process by changing the level of + maximal allowable edit distance/error, until that latter be + reached. */ - if (hasReachedMaximalAllowableEditDistance == true) { - ioPartialQueryString = lPartialQueryString; + if (lPartialQueryString.empty() == true) { shouldStop = true; + + // DEBUG + OPENTREP_LOG_DEBUG ("----------------"); + OPENTREP_LOG_DEBUG ("Still no match for current query string: `" + << ioPartialQueryString << "'"); } } @@ -221,7 +235,7 @@ bool shouldStop = false; while (shouldStop == false) { // DEBUG - OPENTREP_LOG_DEBUG ("---------------------") + OPENTREP_LOG_DEBUG ("+++++++++++++++++++++") OPENTREP_LOG_DEBUG ("Remaining part of the query string: `" << lRemainingQueryString << "'"); Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-23 04:35:16 UTC (rev 159) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-23 05:13:30 UTC (rev 160) @@ -48,16 +48,10 @@ /** Retrieve the document best matching the query string. @param Xapian::MSet& The Xapian matching set. It can be empty. @param TravelQuery_T& The partial query string. - @param NbOfErrors_T& The maximal allowable edit distance/error. - @param bool Whether or not the maximal allowable edit distance/error - has become greater than the maximum of the edit distance/errors - calculated on the phrase. @param MatchingDocument_T& The best matching Xapian document (if found). @return bool Whether such a best matching document has been found. */ std::string searchString (Xapian::MSet& ioMatchingSet, TravelQuery_T& ioPartialQueryString, - NbOfErrors_T& ioMaxEditDistance, - bool ioHasReachedMaximalAllowableEditDistance, Document& ioMatchingDocument); /** Retrieve the document best matching the query string. Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-23 04:35:16 UTC (rev 159) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-23 05:13:30 UTC (rev 160) @@ -92,7 +92,7 @@ searchString (Xapian::MSet& ioMatchingSet, const std::string& iSearchString, NbOfErrors_T& ioMaxEditDistance, - bool ioHasReachedMaximalAllowableEditDistance, + bool& ioHasReachedMaximalAllowableEditDistance, const Xapian::Database& ioDatabase) { NbOfErrors_T lMaxEditDistance = std::numeric_limits<EditDistance_T>::min(); @@ -176,9 +176,11 @@ int nbMatches = ioMatchingSet.size(); // DEBUG + /* OPENTREP_LOG_DEBUG ("Original query `" << lOriginalQueryString << "', i.e., `" << lOriginalQuery.get_description() << "' => " << nbMatches << " results found"); + */ /** When no match is found, we search on the corrected phrase/string @@ -241,10 +243,12 @@ nbMatches = ioMatchingSet.size(); // DEBUG + /* OPENTREP_LOG_DEBUG ("Corrected query `" << lCorrectedQueryString << "', i.e., `" << lCorrectedQuery.get_description() << "' => " << nbMatches << " results found on corrected string"); + */ if (nbMatches != 0) { /** @@ -300,6 +304,7 @@ nbMatches = ioMatchingSet.size(); // DEBUG + /* OPENTREP_LOG_DEBUG ("Query corrected as a full sentence `" << lFullWordCorrectedString << "' with an allowable maximal edit distance of " @@ -308,6 +313,7 @@ << ", i.e., `"<< lFullQueryCorrected.get_description() << "' => " << nbMatches << " results found on corrected full string"); + */ if (nbMatches != 0) { oMatchedString = lFullWordCorrectedString; @@ -329,7 +335,7 @@ of the calculated edit distance, it becomes useless to go on increasing the maximal allowable edit distance. */ - if (lMaxEditDistance <= ioMaxEditDistance) { + if (ioMaxEditDistance >= lMaxEditDistance) { ioHasReachedMaximalAllowableEditDistance = true; } Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-23 04:35:16 UTC (rev 159) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-23 05:13:30 UTC (rev 160) @@ -29,7 +29,7 @@ @param Xapian::MSet& The Xapian matching set. It can be empty. @param const std::string& The query string. @param NbOfErrors_T& The maximal allowable edit distance/error. - @param bool Whether or not the maximal allowable edit distance/error + @param bool& Whether or not the maximal allowable edit distance/error has become greater than the maximum of the edit distance/errors calculated on the phrase. @param const Xapian::Database& The Xapian index/database. @@ -38,7 +38,7 @@ static std::string searchString (Xapian::MSet&, const std::string& iSearchString, NbOfErrors_T& ioMaxEditDistance, - bool ioHasReachedMaximalAllowableEditDistance, + bool& ioHasReachedMaximalAllowableEditDistance, const Xapian::Database&); /** Extract the best matching Xapian document. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |