[Opentrep-svn] SF.net SVN: opentrep:[170] trunk/opentrep/opentrep
Status: Beta
Brought to you by:
denis_arnaud
From: <den...@us...> - 2009-07-27 05:56:55
|
Revision: 170 http://opentrep.svn.sourceforge.net/opentrep/?rev=170&view=rev Author: denis_arnaud Date: 2009-07-27 05:56:43 +0000 (Mon, 27 Jul 2009) Log Message: ----------- [Dev] Prepared the code to dig out the edit distance and extra and alternate locations. Modified Paths: -------------- trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/Document.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/factory/FacPlace.cpp trunk/opentrep/opentrep/factory/FacPlace.hpp Modified: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -25,7 +25,8 @@ std::ostringstream oStr; oStr << "`" << describeShortKey() << "'"; if (_correctedQueryString.empty() == false) { - oStr << " (corrected into `" << _correctedQueryString << "')"; + oStr << " (corrected into `" << _correctedQueryString + << "' with an edit distance/error of " << _editDistance << ")"; } return oStr.str(); } @@ -37,11 +38,12 @@ const Xapian::docid& lDocID = _document.get_docid(); oStr << " => Document ID " << lDocID << " matching at " << _percentage - << "% [" << _document.get_data() << "]"; + << "% (edit distance of " << _editDistance << ") [" + << _document.get_data() << "]"; if (_documentList.empty() == false) { oStr << " along with " << _documentList.size() - << " other matching document(s) ("; + << " other equivalent matching document(s) ("; unsigned short idx = 0; for (XapianDocumentList_T::const_iterator itDoc = _documentList.begin(); @@ -53,6 +55,25 @@ } oStr << lDocID; } + oStr << ")"; + } + + if (_alternateDocumentList.empty() == false) { + oStr << " and with still " << _alternateDocumentList.size() + << " other less matching document(s) ("; + + unsigned short idx = 0; + for (XapianAlternateDocumentList_T::const_iterator itDoc = + _alternateDocumentList.begin(); + itDoc != _alternateDocumentList.end(); ++itDoc, ++idx) { + const Xapian::percent& lPercentage = itDoc->first; + const Xapian::Document& lXapianDoc = itDoc->second; + const Xapian::docid& lDocID = lXapianDoc.get_docid(); + if (idx != 0) { + oStr << ", "; + } + oStr << lDocID << " / " << lPercentage << "%"; + } oStr << ")." << std::endl; } else { @@ -73,7 +94,7 @@ if (_documentList.empty() == false) { oStr << " along with " << _documentList.size() - << " other matching document(s) { "; + << " other equivalent matching document(s) { "; unsigned short idx = 0; for (XapianDocumentList_T::const_iterator itDoc = _documentList.begin(); @@ -85,6 +106,26 @@ } oStr << "Doc ID " << lDocID << " [" << lXapianDoc.get_data() << "]"; } + oStr << " }"; + } + + if (_alternateDocumentList.empty() == false) { + oStr << " and with still " << _alternateDocumentList.size() + << " other less matching document(s) { "; + + unsigned short idx = 0; + for (XapianAlternateDocumentList_T::const_iterator itDoc = + _alternateDocumentList.begin(); + itDoc != _alternateDocumentList.end(); ++itDoc, ++idx) { + const Xapian::percent& lPercentage = itDoc->first; + const Xapian::Document& lXapianDoc = itDoc->second; + const Xapian::docid& lDocID = lXapianDoc.get_docid(); + if (idx != 0) { + oStr << ", "; + } + oStr << lDocID << " / " << lPercentage << "% [" + << lXapianDoc.get_data() << "]"; + } oStr << " }." << std::endl; } else { Modified: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -17,6 +17,12 @@ // //////////////// Type definitions ///////////////// /** List of Xapian documents. */ typedef std::list<Xapian::Document> XapianDocumentList_T; + + /** Pair of a Xapian document and its associated matching percentage. */ + typedef std::pair<Xapian::percent, Xapian::Document> XapianDocumentPair_T; + + /** List of Xapian documents. */ + typedef std::list<XapianDocumentPair_T> XapianAlternateDocumentList_T; // //////////////// Main Class ///////////////// @@ -48,12 +54,19 @@ return _percentage; } - /** Get the extra list of matching Xapian documents. */ + /** Get the extra list of matching Xapian documents (i.e., those + having matched with the same weight as the main one). */ const XapianDocumentList_T& getExtraDocumentList() const { return _documentList; } + /** Get the alternate list of matching Xapian documents (i.e., those + having matched with a lower weight than the main one). */ + const XapianAlternateDocumentList_T& getAlternateDocumentList() const { + return _alternateDocumentList; + } + // ////////////////// Setters //////////////// /** Set the query string. */ void setQueryString (const TravelQuery_T& iQueryString) { @@ -75,12 +88,25 @@ _percentage = iPercentage; } + /** Set the edit distance/error, with which the matching has been made. */ + void setEditDistance (const NbOfErrors_T& iEditDistance) { + _editDistance = iEditDistance; + } + /** Add a matching Xapian document (having the same matching percentage). */ void addExtraDocument (const Xapian::Document& iMatchingDocument) { _documentList.push_back (iMatchingDocument); } + /** Add a matching Xapian document (having a lower matching percentage). */ + void addAlternateDocument (const Xapian::percent& iMatchingPercentage, + const Xapian::Document& iMatchingDocument) { + _alternateDocumentList. + push_back (XapianDocumentPair_T (iMatchingPercentage, + iMatchingDocument)); + } + public: // /////////// Business methods ///////// /** Retrieve the number of extra matches for the given query string, @@ -143,10 +169,18 @@ /** Matching document, as returned by the Xapian full text search. */ Xapian::Document _document; + /** Edit distance/error, with which the matching has been made. */ + NbOfErrors_T _editDistance; + /** List of Xapian documents having the same matching percentage. <br>Hence, any of those other Xapian documents could have been chosen, instead of the main one. */ XapianDocumentList_T _documentList; + + /** List of Xapian documents having the a lower matching percentage. + <br>Those alternate matches can be suggested (in the famous + "Did you mean Xxx?" question) to the end user. */ + XapianAlternateDocumentList_T _alternateDocumentList; }; } Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -10,12 +10,13 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - Place::Place () : _world (NULL), _placeHolder (NULL) { + Place::Place () : _world (NULL), _placeHolder (NULL), _mainPlace (NULL) { } // ////////////////////////////////////////////////////////////////////// Place::Place (const Place& iPlace) : _world (iPlace._world), _placeHolder (iPlace._placeHolder), + _mainPlace (iPlace._mainPlace), _placeCode (iPlace._placeCode), _cityCode (iPlace._cityCode), _stateCode (iPlace._stateCode), _countryCode (iPlace._countryCode), _regionCode (iPlace._regionCode), _continentCode (iPlace._continentCode), Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -14,6 +14,7 @@ #include <opentrep/Location.hpp> #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/Names.hpp> +#include <opentrep/bom/PlaceList.hpp> namespace OPENTREP { @@ -93,6 +94,16 @@ language. */ bool getNameList (const Language::EN_Language&, NameList_T&) const; + /** Get the list of extra matching (similar) places. */ + const PlaceOrderedList_T& getExtraPlaceList() const { + return _extraPlaceList; + } + + /** Get the list of alternate matching (less similar) places. */ + const PlaceOrderedList_T& getAlternatePlaceList() const { + return _alternatePlaceList; + } + // ///////// Setters //////// /** Set the Place code. */ @@ -206,9 +217,14 @@ /** Parent World. */ World* _world; - /** Parent PlaceHolder. */ + /** Parent PlaceHolder (not always defined,for instance if the + current Place object is an extra or alternate one). */ PlaceHolder* _placeHolder; + /** Parent (main) Place (not always defined,for instance if the + current Place object is itself a main one). */ + Place* _mainPlace; + private: // /////// Attributes ///////// /** Place code. */ @@ -233,6 +249,12 @@ NameMatrix_T _nameMatrix; /** Xapian document ID. */ XapianDocID_T _docID; + + /** List of extra matching (similar) places. */ + PlaceOrderedList_T _extraPlaceList; + + /** List of alternate matching (less similar) places. */ + PlaceOrderedList_T _alternatePlaceList; }; } Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -110,26 +110,26 @@ << lMaxEditDistance << "."); // Retrieve the list of Xapian documents matching the query string + NbOfErrors_T lCalculatedEditDistance = 0; oMatchedString = - StringMatcher::searchString (ioMatchingSet, ioPartialQueryString, - lMaxEditDistance, - hasReachedMaximalAllowableEditDistance, - _database); + StringMatcher::searchString(ioMatchingSet, ioPartialQueryString, + lCalculatedEditDistance, lMaxEditDistance, + hasReachedMaximalAllowableEditDistance, + _database); // DEBUG OPENTREP_LOG_DEBUG ("---- Current query string: `" << ioPartialQueryString << "' --- Kept query: `" << oMatchedString - << "', with a maximal edit distance of " - << lMaxEditDistance << ", for " + << "', with an edit distance of a maximum of " + << lCalculatedEditDistance << " (over " + << lMaxEditDistance << "), for " << ioMatchingSet.size() << " matches."); if (ioMatchingSet.empty() == false) { - // Create the corresponding list of documents - StringMatcher:: - extractBestMatchingDocumentFromMSet (ioMatchingSet, - ioMatchingDocument); - + // Store the calculated (and applied) edit distance/erro + ioMatchingDocument.setEditDistance (lCalculatedEditDistance); + // Since a result has been found, the search can be stopped // for that part of the query. shouldStop = true; Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -114,12 +114,13 @@ // /////////////////////////////////////////////////////////////////// void checkAndAlterIfNeeded (TravelQuery_T& ioSuggestedString, const TravelQuery_T& iOriginalString, + NbOfErrors_T& ioCalculatedEditDistance, const NbOfErrors_T& iMaxEditDistance, const Xapian::Database& iDatabase) { /** - Store a copy of the suggested string, as it will me altered by - the below method. + Store a copy of the suggested string, as it will be altered by + the below method, i.e., removeFurthestLeftWord(). */ TravelQuery_T lOriginalStringCopy (iOriginalString); StringMatcher::removeFurthestLeftWord (lOriginalStringCopy); @@ -128,15 +129,14 @@ Get a spell-corrected suggestion for the reduced original string. <br>Limit the edit distance to the given maximal one. */ - NbOfErrors_T lCalculatedEditDistance = - calculateEditDistance (lOriginalStringCopy); + ioCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - iMaxEditDistance); + ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, + iMaxEditDistance); std::string lSuggestionForReducedOriginalString = iDatabase.get_spelling_suggestion (lOriginalStringCopy, - lCalculatedEditDistance); + ioCalculatedEditDistance); /** Note that if the suggestion on the reduced-original string is @@ -154,7 +154,7 @@ OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString << "', with an edit distance/error of " - << lCalculatedEditDistance + << ioCalculatedEditDistance << " over " << iMaxEditDistance << " allowable" << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy @@ -184,14 +184,14 @@ Get a spell-corrected suggestion for the reduced original string. <br>Limit the edit distance to the given maximal one. */ - lCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); + ioCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - iMaxEditDistance); + ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, + iMaxEditDistance); lSuggestionForReducedOriginalString = iDatabase.get_spelling_suggestion (lOriginalStringCopy, - lCalculatedEditDistance); + ioCalculatedEditDistance); /** Note that if the suggestion on the reduced-original string is @@ -209,7 +209,7 @@ OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString << "', with an edit distance/error of " - << lCalculatedEditDistance + << ioCalculatedEditDistance << " over " << iMaxEditDistance << " allowable" << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy @@ -228,6 +228,7 @@ std::string StringMatcher:: searchString (Xapian::MSet& ioMatchingSet, const TravelQuery_T& iSearchString, + NbOfErrors_T& ioCalculatedEditDistance, NbOfErrors_T& ioMaxEditDistance, bool& ioHasReachedMaximalAllowableEditDistance, const Xapian::Database& iDatabase) { @@ -407,26 +408,26 @@ phrase/string. With the above example, 'sna francisco' yields the suggestion 'san francisco'. */ - NbOfErrors_T lCalculatedEditDistance = - calculateEditDistance (lOriginalQueryString); + ioCalculatedEditDistance = calculateEditDistance (lOriginalQueryString); // Store the greatest edit distance/error - lMaxEditDistance = std::max (lMaxEditDistance, lCalculatedEditDistance); + lMaxEditDistance = std::max (lMaxEditDistance, ioCalculatedEditDistance); // Limit the edit distance to the given maximal one - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - ioMaxEditDistance); + ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, + ioMaxEditDistance); std::string lFullWordCorrectedString = iDatabase.get_spelling_suggestion (lOriginalQueryString, - lCalculatedEditDistance); + ioCalculatedEditDistance); /** Check that the suggestion does not encompass extra words, which will be otherwise/rather recognised in another step. */ checkAndAlterIfNeeded (lFullWordCorrectedString, lOriginalQueryString, - ioMaxEditDistance, iDatabase); + ioCalculatedEditDistance, ioMaxEditDistance, + iDatabase); /** Since there is still no match, we search on the string @@ -528,7 +529,7 @@ NbOfMatches_T idx = 1; for ( ; itDoc != iMatchingSet.end(); ++itDoc, ++idx) { const Xapian::percent& lPercentage = itDoc.get_percent(); - // const Xapian::Document& lDocument = itDoc.get_document(); + const Xapian::Document& lDocument = itDoc.get_document(); // DEBUG /* @@ -536,12 +537,15 @@ << lDocument.get_docid() << " matching at " << lPercentage << "%."); */ - + + /** If the matching percentage is the same as for the main + (chosen) Xapian document, then add it to the dedicated + list. Otherwise, add it to the alternative choices. */ if (lPercentage == lBestPercentage) { - ioMatchingDocument.addExtraDocument (itDoc.get_document()); + ioMatchingDocument.addExtraDocument (lDocument); } else { - break; + ioMatchingDocument.addAlternateDocument (lPercentage, lDocument); } } } Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -28,6 +28,7 @@ words of the search string. @param Xapian::MSet& The Xapian matching set. It can be empty. @param const std::string& The query string. + @param NbOfErrors_T& The calculated (and applied) edit distance/error. @param NbOfErrors_T& The maximal allowable edit distance/error. @param bool& Whether or not the maximal allowable edit distance/error has become greater than the maximum of the edit distance/errors @@ -37,6 +38,7 @@ which has yielded matches. */ static std::string searchString (Xapian::MSet&, const std::string& iSearchString, + NbOfErrors_T& ioCalculatedEditDistance, NbOfErrors_T& ioMaxEditDistance, bool& ioHasReachedMaximalAllowableEditDistance, const Xapian::Database&); Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -72,6 +72,13 @@ const Result* lResult_ptr = *itResult; assert (lResult_ptr != NULL); + /** + TODO: Add a loop for retrieving both extra and alternate Documents + Use FacPlace::initLinkWithExtraPlace() and + FacPlace::initLinkWithAlternatePlace() + */ + + // Retrieve the parameters of the best matching document const Xapian::Document& lDocument = lResult_ptr->getXapianDocument(); const Xapian::percent& lDocPercentage = Modified: trunk/opentrep/opentrep/factory/FacPlace.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlace.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/factory/FacPlace.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -63,4 +63,26 @@ return *oPlace_ptr; } + // ////////////////////////////////////////////////////////////////////// + void FacPlace::initLinkWithExtraPlace (Place& ioMainPlace, + Place& ioExtraPlace) { + // Link the main Place to the extra Place, and vice versa + ioExtraPlace._mainPlace = &ioMainPlace; + + // Add the extra Place to the main Place internal map (of extra + // Place objects) + ioMainPlace._extraPlaceList.push_back (&ioExtraPlace); + } + + // ////////////////////////////////////////////////////////////////////// + void FacPlace::initLinkWithAlternatePlace (Place& ioMainPlace, + Place& ioAlternatePlace) { + // Link the main Place to the alternate Place, and vice versa + ioAlternatePlace._mainPlace = &ioMainPlace; + + // Add the alternate Place to the main Place internal map (of + // alternate Place objects) + ioMainPlace._extraPlaceList.push_back (&ioAlternatePlace); + } + } Modified: trunk/opentrep/opentrep/factory/FacPlace.hpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlace.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/factory/FacPlace.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -36,6 +36,20 @@ @return Place& The newly created object. */ Place& clone (const Place&); + /** Initialise the link between a Place and an extra Place. + @param Place& Main Place object. + @param Place& Extra Place object. + @exception FacExceptionNullPointer + @exception FacException.*/ + static void initLinkWithExtraPlace (Place&, Place&); + + /** Initialise the link between a Place and an alternate Place. + @param Place& Main Place object. + @param Place& Alternate Place object. + @exception FacExceptionNullPointer + @exception FacException.*/ + static void initLinkWithAlternatePlace (Place&, Place&); + private: /** Default Constructor. <br>This constructor is private in order to ensure the singleton This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |