opentrep-svn Mailing List for Open Travel Request Parser (Page 5)
Status: Beta
Brought to you by:
denis_arnaud
You can subscribe to this list here.
2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(52) |
Aug
(19) |
Sep
(4) |
Oct
(10) |
Nov
(2) |
Dec
(4) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2010 |
Jan
(3) |
Feb
|
Mar
(3) |
Apr
|
May
|
Jun
(1) |
Jul
(2) |
Aug
(1) |
Sep
(9) |
Oct
|
Nov
(1) |
Dec
|
2011 |
Jan
|
Feb
(8) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(2) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <den...@us...> - 2009-07-18 20:30:12
|
Revision: 139 http://opentrep.svn.sourceforge.net/opentrep/?rev=139&view=rev Author: denis_arnaud Date: 2009-07-18 20:30:08 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [Dev] Fixed a bug causing an infinite loop when no-matching words were part of the query string. Modified Paths: -------------- trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -232,11 +232,11 @@ const OPENTREP::NbOfMatches_T nbOfMatches = opentrepService.interpretTravelRequest (lTravelQuery, lLocationList); - if (nbOfMatches != 0) { std::cout << nbOfMatches << " (geographical) location(s) have been found " << "matching your query (`" << lTravelQuery << "´)." << std::endl; + if (nbOfMatches != 0) { OPENTREP::NbOfMatches_T idx = 1; for (OPENTREP::LocationList_T::const_iterator itLocation = lLocationList.begin(); Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -96,8 +96,8 @@ break; } - // Remove a word from the query string - StringMatcher::removeOneWord (ioPartialQueryString); + // Remove the furthest right word from the query string + StringMatcher::removeFurthestRightWord (ioPartialQueryString); // Stop when the resulting string gets empty. if (ioPartialQueryString.empty() == true) { @@ -150,22 +150,40 @@ // DEBUG OPENTREP_LOG_DEBUG ("==> Matching of the query string: `" << lQueryString << "'"); + + /** + Remove, from the lRemainingQueryString string, the part + which has been already successfully parsed. <br>For + instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() + method first reduce the query string to 'sna francisco', + which successfully matches against SFO (San Francisco + airport). <br>Then, the remaining part of the query + string to be parsed is 'rio de janeiro'. So, the already + parsed part, namely 'sna francisco', must be subtracted + from the initial query string. + */ + StringMatcher::subtractParsedToRemaining (lQueryString, + lRemainingQueryString); + } else { + // DEBUG + OPENTREP_LOG_DEBUG ("==> No matching of the query string: `" + << lRemainingQueryString + << "'. Skip the beginning word."); + assert (lQueryString.empty() == true); + + /** + We must suppress (at least) the furthest left word, as it + hinders the remaining of the query string to be + matched. If that furthest left word is the only word of + the query string, the remaining query string will + therefore be empty, and the loop will therefore be exited + in the next step below. + */ + // Remove the furthest right word from the query string + StringMatcher::removeFurthestLeftWord (lRemainingQueryString); } - /** - Remove, from the lRemainingQueryString string, the part which - has been already successfully parsed. - <br>For instance, when 'sna francisco rio de janeiro' is the - initial full clean query string, the searchString() method - first reduce the query string to 'sna francisco', which - successfully matches against SFO (San Francisco airport). - <br>Then, the remaining part of the query string to be parsed is - 'rio de janeiro'. So, the already parsed part, namely - 'sna francisco', must be subtracted from the initial query string. - */ - StringMatcher::subtractParsedToRemaining (lQueryString, - lRemainingQueryString); - // If there is nothing left to be parsed, we have then finished // to parse the initial string. if (lRemainingQueryString.empty() == true) { Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -312,7 +312,7 @@ } // ////////////////////////////////////////////////////////////////////// - void StringMatcher::removeOneWord (std::string& ioQueryString) { + void StringMatcher::removeFurthestRightWord (std::string& ioQueryString) { assert (ioQueryString.empty() == false); WordList_T lWordList; @@ -328,6 +328,22 @@ } // ////////////////////////////////////////////////////////////////////// + void StringMatcher::removeFurthestLeftWord (std::string& ioQueryString) { + assert (ioQueryString.empty() == false); + + WordList_T lWordList; + WordHolder::tokeniseStringIntoWordList (ioQueryString, lWordList); + assert (lWordList.empty() == false); + + // Remove the furthest left word + lWordList.pop_front(); + + const std::string& lReducedString = + WordHolder::createStringFromWordList (lWordList); + ioQueryString = lReducedString; + } + + // ////////////////////////////////////////////////////////////////////// void StringMatcher:: subtractParsedToRemaining (const std::string& iAlreadyParsedQueryString, std::string& ioRemainingQueryString) { Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 20:30:08 UTC (rev 139) @@ -41,8 +41,11 @@ extractBestMatchingDocumentFromMSet (const Xapian::MSet&, Document&); /** Remove the word furthest at right. */ - static void removeOneWord (std::string& ioQueryString); + static void removeFurthestRightWord (std::string& ioQueryString); + /** Remove the word furthest at left. */ + static void removeFurthestLeftWord (std::string& ioQueryString); + /** Remove, from a string, the part corresponding to the one given as parameter. */ static void Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -159,6 +159,7 @@ of the Place objects. Those (Location) structures are passed back to the caller of the service. */ lPlaceHolder.createLocations (ioLocationList); + oNbOfMatches = ioLocationList.size(); return oNbOfMatches; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-18 18:32:11
|
Revision: 138 http://opentrep.svn.sourceforge.net/opentrep/?rev=138&view=rev Author: denis_arnaud Date: 2009-07-18 18:32:04 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [Dev] Improved the interface, so that the travel search now returns a list of Location structures, that the caller can then benefit from. Modified Paths: -------------- trunk/opentrep/opentrep/OPENTREP_Service.hpp trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/BomAbstract.hpp trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/Document.hpp trunk/opentrep/opentrep/bom/Names.cpp trunk/opentrep/opentrep/bom/Names.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/PlaceHolder.cpp trunk/opentrep/opentrep/bom/PlaceHolder.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/World.cpp trunk/opentrep/opentrep/bom/World.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/command/RequestInterpreter.hpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp trunk/opentrep/opentrep/sources.mk Added Paths: ----------- trunk/opentrep/opentrep/Location.hpp trunk/opentrep/opentrep/OPENTREP_Abstract.hpp Added: trunk/opentrep/opentrep/Location.hpp =================================================================== --- trunk/opentrep/opentrep/Location.hpp (rev 0) +++ trunk/opentrep/opentrep/Location.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -0,0 +1,217 @@ +#ifndef __OPENTREP_LOCATION_HPP +#define __OPENTREP_LOCATION_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <istream> +#include <ostream> +#include <string> +#include <list> +// OpenTrep +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/OPENTREP_Abstract.hpp> + +namespace OPENTREP { + + /** List of names for a given (geographical) location. */ + typedef std::list<std::string> LocationNameList_T; + + + /** Structure modelling a (geographical) location. */ + struct Location : public OPENTREP_Abstract { + public: + // ///////// Getters //////// + /** Get the Location code. */ + std::string getLocationCode() const { + return _locationCode; + } + + /** Get the City code. */ + std::string getCityCode() const { + return _cityCode; + } + + /** Get the State code. */ + std::string getStateCode() const { + return _stateCode; + } + + /** Get the Country code. */ + std::string getCountryCode() const { + return _countryCode; + } + + /** Get the Region code. */ + std::string getRegionCode() const { + return _regionCode; + } + + /** Get the Continent code. */ + std::string getContinentCode() const { + return _continentCode; + } + + /** Get the Time-zone group. */ + std::string getTimeZoneGroup() const { + return _timeZoneGroup; + } + + /** Get the Longitude. */ + double getLongitude() const { + return _longitude; + } + + /** Get the Latitude. */ + double getLatitude() const { + return _latitude; + } + + /** Get the list of (American) English names for that location. */ + const LocationNameList_T& getNameList () const { + return _nameList; + } + + + // ///////// Setters ////////// + /** Set the Location code. */ + void setLocationCode (const std::string& iLocationCode) { + _locationCode = iLocationCode; + } + + /** Set the City code. */ + void setCityCode (const std::string& iCityCode) { + _cityCode = iCityCode; + } + + /** Set the State code. */ + void setStateCode (const std::string& iStateCode) { + _stateCode = iStateCode; + } + + /** Set the Country code. */ + void setCountryCode (const std::string& iCountryCode) { + _countryCode = iCountryCode; + } + + /** Set the Region code. */ + void setRegionCode (const std::string& iRegionCode) { + _regionCode = iRegionCode; + } + + /** Set the Continent code. */ + void setContinentCode (const std::string& iContinentCode) { + _continentCode = iContinentCode; + } + + /** Set the Time-zone group. */ + void setTimeZoneGroup (const std::string& iTimeZoneGroup) { + _timeZoneGroup = iTimeZoneGroup; + } + + /** Set the Longitude. */ + void setLongitude (const double& iLongitude) { + _longitude = iLongitude; + } + + /** Set the Latitude. */ + void setLatitude (const double& iLatitude) { + _latitude = iLatitude; + } + + /** Set the list of (American) English names for that location. */ + void getNameList (const LocationNameList_T& iNameList) { + _nameList = iNameList; + } + + + public: + // ///////// Display methods //////// + /** Dump a structure into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + /** Read a structure from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream&) { + } + + /** Get a short display of the Location structure. */ + std::string toShortString() const { + std::ostringstream oStr; + oStr << _locationCode << ", " << _cityCode << ", " << _stateCode + << ", " << _countryCode << ", " << _regionCode + << ", " << _continentCode << ", " << _timeZoneGroup + << ", " << _longitude << ", " << _latitude; + return oStr.str(); + } + + /** Get the serialised version of the Location structure. */ + std::string toString() const { + std::ostringstream oStr; + oStr << toShortString(); + for (LocationNameList_T::const_iterator itName = _nameList.begin(); + itName != _nameList.end(); ++itName) { + oStr << ", " << *itName; + } + return oStr.str(); + } + + + public: + /** Main Constructor. */ + Location (const std::string& iPlaceCode, const std::string& iCityCode, + const std::string& iStateCode, const std::string& iCountryCode, + const std::string& iRegionCode, const std::string& iContinentCode, + const std::string& iTimeZoneGroup, + const double iLongitude, const double iLatitude, + const LocationNameList_T& iNameList) + : _locationCode (iPlaceCode), _cityCode (iCityCode), + _stateCode (iStateCode), _countryCode (iCountryCode), + _regionCode (iRegionCode), _continentCode (iContinentCode), + _timeZoneGroup (iTimeZoneGroup), _longitude (iLongitude), + _latitude (iLatitude), _nameList (iNameList) { + } + + /** Default Constructor. */ + // Location (); + /** Default copy constructor. */ + // Location (const Location&); + + /** Destructor. */ + virtual ~Location() {} + + + private: + // /////// Attributes ///////// + /** Location code. */ + std::string _locationCode; + /** City code. */ + std::string _cityCode; + /** State code. */ + std::string _stateCode; + /** Country code. */ + std::string _countryCode; + /** Region code. */ + std::string _regionCode; + /** Continent code. */ + std::string _continentCode; + /** Time-zone group. */ + std::string _timeZoneGroup; + /** Longitude. */ + double _longitude; + /** Latitude. */ + double _latitude; + /** List of (American) English names. */ + LocationNameList_T _nameList; + }; + + + /** List of (geographical) location structures. */ + typedef std::list<Location> LocationList_T; + +} +#endif // __OPENTREP_LOCATION_HPP Added: trunk/opentrep/opentrep/OPENTREP_Abstract.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Abstract.hpp (rev 0) +++ trunk/opentrep/opentrep/OPENTREP_Abstract.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -0,0 +1,84 @@ +#ifndef __OPENTREP_OPENTREP_ABSTRACT_HPP +#define __OPENTREP_OPENTREP_ABSTRACT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <istream> +#include <ostream> +#include <sstream> +#include <string> + +namespace OPENTREP { + + /** Base class for the OPENTREP interface structures. */ + struct OPENTREP_Abstract { + public: + // /////////// Display support methods ///////// + /** Dump a structure into an output stream. + @param ostream& the output stream. */ + virtual void toStream (std::ostream& ioOut) const = 0; + + /** Read a structure from an input stream. + @param istream& the input stream. */ + virtual void fromStream (std::istream& ioIn) = 0; + + /** Get the serialised version of the structure. */ + virtual std::string toString() const = 0; + + + protected: + /** Protected Default Constructor to ensure this class is abtract. */ + OPENTREP_Abstract () {} + OPENTREP_Abstract (const OPENTREP_Abstract&) {} + + /** Destructor. */ + virtual ~OPENTREP_Abstract() {} + }; +} + +/** + Piece of code given by Nicolai M. Josuttis, Section 13.12.1 "Implementing + Output Operators" (p653) of his book "The C++ Standard Library: A Tutorial + and Reference", published by Addison-Wesley. + */ +template <class charT, class traits> +inline +std::basic_ostream<charT, traits>& +operator<< (std::basic_ostream<charT, traits>& ioOut, + const OPENTREP::OPENTREP_Abstract& iStructure) { + /** + string stream: + - with same format + - without special field width + */ + std::basic_ostringstream<charT,traits> ostr; + ostr.copyfmt (ioOut); + ostr.width (0); + + // Fill string stream + iStructure.toStream (ostr); + + // Print string stream + ioOut << ostr.str(); + + return ioOut; +} + +/** + Piece of code given by Nicolai M. Josuttis, Section 13.12.1 "Implementing + Output Operators" (pp655-657) of his book "The C++ Standard Library: + A Tutorial and Reference", published by Addison-Wesley. + */ +template <class charT, class traits> +inline +std::basic_istream<charT, traits>& +operator>> (std::basic_istream<charT, traits>& ioIn, + OPENTREP::OPENTREP_Abstract& ioStucture) { + // Fill Bom object with input stream + ioStucture.fromStream (ioIn); + return ioIn; +} + +#endif // __OPENTREP_OPENTREP_ABSTRACT_HPP Modified: trunk/opentrep/opentrep/OPENTREP_Service.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -7,30 +7,44 @@ // STL #include <ostream> #include <string> -// OPENTREP +// OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> namespace OPENTREP { - /** Forward declaration. */ + // Forward declaration class OPENTREP_ServiceContext; + /** Interface for the OPENTREP Services. */ class OPENTREP_Service { public: - /** Constructor. */ + // /////////// Business Methods ///////////// + /** Build the Xapian database (index) on the BOM held in memory. */ + void buildSearchIndex (); + + /** Match the given string, thanks to a full-text search on the + underlying Xapian index (named "database"). + @param const std::string& (Travel-related) query string (e.g., + "sna francicso rio de janero lso angles reykyavki nce iev mow"). + @param LocationList_T& List of (geographical) locations, if any, + matching the given query string. + @return NbOfMatches_T Number of matches. */ + NbOfMatches_T interpretTravelRequest (const std::string& iTravelQuery, + LocationList_T&); + + + // ////////// Constructors and destructors ////////// + /** Constructor. + @param std::ostream& Output log stream (for instance, std::cout). + @param const std::string& Filepath of the Xapian index/database. */ OPENTREP_Service (std::ostream& ioLogStream, const std::string& iXapianDatabaseFilepath); /** Destructor. */ ~OPENTREP_Service(); - /** Build the Xapian database (index) on the BOM held in memory. */ - void buildSearchIndex (); - - /** Perform the query, thanks to the underlying Xapian database - (index) name. */ - void interpretTravelRequest (const std::string& iTravelQuery); private: // /////// Construction and Destruction helper methods /////// @@ -43,12 +57,10 @@ void init (std::ostream& ioLogStream, const std::string& iXapianDatabaseFilepath); - /** Initilise the log. */ - void logInit (const LOG::EN_LogLevel iLogLevel, std::ostream& ioLogStream); - - /** Finaliser. */ + /** Finalise. */ void finalise (); + private: // ///////// Service Context ///////// /** Opentrep context. */ Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -139,6 +139,11 @@ logOutputFile.open (lLogFilename.c_str()); logOutputFile.clear(); + // + std::cout << "Creating the Xapian index/database may take a few minutes " + << "on some architectures (and a few seconds on fastest ones)..." + << std::endl; + // Initialise the context OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lXapianDatabaseName); Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -228,7 +228,23 @@ lXapianDatabaseName); // Query the Xapian database (index) - opentrepService.interpretTravelRequest (lTravelQuery); + OPENTREP::LocationList_T lLocationList; + const OPENTREP::NbOfMatches_T nbOfMatches = + opentrepService.interpretTravelRequest (lTravelQuery, lLocationList); + + if (nbOfMatches != 0) { + std::cout << nbOfMatches << " (geographical) location(s) have been found " + << "matching your query (`" << lTravelQuery << "´)." + << std::endl; + + OPENTREP::NbOfMatches_T idx = 1; + for (OPENTREP::LocationList_T::const_iterator itLocation = + lLocationList.begin(); + itLocation != lLocationList.end(); ++itLocation, ++idx) { + const OPENTREP::Location& lLocation = *itLocation; + std::cout << " [" << idx << "]: " << lLocation << std::endl; + } + } // Close the Log outputFile logOutputFile.close(); Modified: trunk/opentrep/opentrep/bom/BomAbstract.hpp =================================================================== --- trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -9,8 +9,6 @@ #include <ostream> #include <sstream> #include <string> -// OpenTrep -#include <opentrep/bom/Language.hpp> namespace OPENTREP { @@ -32,11 +30,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - virtual const std::string describeKey() const = 0; + virtual std::string describeKey() const = 0; /** Get a string describing the short key (differentiating two objects at the same level). */ - virtual const std::string describeShortKey() const = 0; + virtual std::string describeShortKey() const = 0; protected: Modified: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -24,14 +24,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Document::describeShortKey() const { + std::string Document::describeShortKey() const { std::ostringstream oStr; oStr << _queryString; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Document::describeKey() const { + std::string Document::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -84,11 +84,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; public: Modified: trunk/opentrep/opentrep/bom/Names.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Names.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Names.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -2,7 +2,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // C -#include <assert.h> +#include <cassert> // STL #include <sstream> // OpenTrep @@ -12,6 +12,7 @@ // ////////////////////////////////////////////////////////////////////// Names::Names() : _languageCode (Language::en_US) { + assert (false); } // ////////////////////////////////////////////////////////////////////// @@ -41,14 +42,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Names::describeShortKey() const { + std::string Names::describeShortKey() const { std::ostringstream oStr; oStr << "[" << Language::getLongLabel (_languageCode) << "]: "; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Names::describeKey() const { + std::string Names::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/Names.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Names.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Names.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -33,6 +33,7 @@ private: /** Default constructor: should not be used. */ Names(); + public: // /////////// Getters /////////////// @@ -78,14 +79,15 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; - // Attributes + private: + // //////////// Attributes ////////////// /** Language code (e.g., en_US, fr_FR, etc.). */ Language::EN_Language _languageCode; @@ -93,7 +95,11 @@ NameList_T _nameList; }; - // Type definitions + // ////////////// Type definitions //////////// + /** Matrix of place names: for each of the language, the place gets a + corresponding list of names. + <br>For instance, MUC corresponds to Munich in English, München + in German, Munique in French, Мюнхен in Russian, etc. */ typedef std::map<Language::EN_Language, Names> NameMatrix_T; } Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -27,16 +27,40 @@ // ////////////////////////////////////////////////////////////////////// Place::~Place () { } + + // ////////////////////////////////////////////////////////////////////// + std::string Place::getCityCode() const { + std::string oCityCode (_cityCode); + if (oCityCode.empty() == true) { + oCityCode = _placeCode; + } + return oCityCode; + } + + // ////////////////////////////////////////////////////////////////////// + bool Place::getNameList (const Language::EN_Language& iLanguageCode, + NameList_T& ioNameList) const { + bool oFoundNameList = false; + + NameMatrix_T::const_iterator itNameList = _nameMatrix.find (iLanguageCode); + if (itNameList != _nameMatrix.end()) { + const Names& lNameList = itNameList->second; + ioNameList = lNameList.getNameList(); + oFoundNameList = true; + } + + return oFoundNameList; + } // ////////////////////////////////////////////////////////////////////// - const std::string Place::describeShortKey() const { + std::string Place::describeShortKey() const { std::ostringstream oStr; oStr << _placeCode; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Place::describeKey() const { + std::string Place::describeKey() const { return describeShortKey(); } @@ -46,13 +70,10 @@ not an airport. The city code is thus the same as the place code itself. */ std::ostringstream oStr; - oStr << describeShortKey() << ", "; - if (_cityCode.empty()) { - oStr << _placeCode << ", "; - } else { - oStr << _cityCode << ", "; - } - oStr << _stateCode + oStr << describeShortKey(); + + const std::string& lCityCode = getCityCode(); + oStr << ", " << lCityCode << ", " << _stateCode << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude << ", " << _docID << ". "; @@ -72,22 +93,23 @@ not an airport. The city code is thus the same as the place code itself. */ std::ostringstream oStr; - oStr << describeShortKey() << ", "; - if (_cityCode.empty()) { - oStr << _placeCode << ", "; - } else { - oStr << _cityCode << ", "; - } - oStr << _stateCode + oStr << describeShortKey(); + + const std::string& lCityCode = getCityCode(); + oStr << ", " << lCityCode << ", " << _stateCode << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude << ", " << _docID; NameMatrix_T::const_iterator itNameHolder = _nameMatrix.begin(); - const Names& lNameHolder = itNameHolder->second; - const std::string& lFirstName = lNameHolder.getFirstName(); - if (lFirstName.empty() == false) { - oStr << ", " << lFirstName << "."; + if (itNameHolder != _nameMatrix.end()) { + + const Names& lNameHolder = itNameHolder->second; + const std::string& lFirstName = lNameHolder.getFirstName(); + + if (lFirstName.empty() == false) { + oStr << ", " << lFirstName << "."; + } } return oStr.str(); @@ -103,18 +125,16 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Place::shortDisplay() const { + std::string Place::shortDisplay() const { /* When the city code is empty, it means that the place is a city and not an airport. The city code is thus the same as the place code itself. */ std::ostringstream oStr; oStr << describeKey(); - if (_cityCode.empty()) { - oStr << ", city code = " << _placeCode; - } else { - oStr << ", city code = " << _cityCode; - } - oStr << ", state code = " << _stateCode + + const std::string& lCityCode = getCityCode(); + oStr << ", city code = " << lCityCode + << ", state code = " << _stateCode << ", country code = " << _countryCode << ", region code = " << _regionCode << ", continent code = " << _continentCode @@ -127,7 +147,7 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Place::display() const { + std::string Place::display() const { std::ostringstream oStr; oStr << shortDisplay(); for (NameMatrix_T::const_iterator itNameList = _nameMatrix.begin(); @@ -174,4 +194,27 @@ _nameMatrix.clear(); } + // ////////////////////////////////////////////////////////////////////// + Location Place::createLocation() const { + + const std::string& lCityCode = getCityCode(); + + NameList_T lNameList; + const bool hasFoundNameList = getNameList (Language::en_US, lNameList); + + if (hasFoundNameList == false) { + // + OPENTREP_LOG_ERROR ("No list of names in (American) English (en_US " + << "locale) can be found for the following place: " + << toShortString()); + throw LanguageCodeNotDefinedInNameTableException(); + } + assert (hasFoundNameList == true); + + // Copy the parameters from the Place object to the Location structure + Location oLocation (_placeCode, lCityCode, _stateCode, _countryCode, + _regionCode, _continentCode, _timeZoneGroup, + _longitude, _latitude, lNameList); + return oLocation; + } } Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -11,6 +11,7 @@ #include <map> // OpenTrep Bom #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/Names.hpp> @@ -20,7 +21,7 @@ class World; class PlaceHolder; - /** Structure modelling an place. */ + /** Class modelling a place. */ class Place : public BomAbstract { friend class FacWorld; friend class FacPlace; @@ -29,37 +30,38 @@ public: // ///////// Getters //////// /** Get the Place code. */ - std::string getPlaceCode() const { + const std::string& getPlaceCode() const { return _placeCode; } - /** Get the City code. */ - std::string getCityCode() const { - return _cityCode; - } - + /** Get the City code. + <br>When the city code is empty, it means that the place is a + city and not an airport. The city code is thus the same as the + place code itself. */ + std::string getCityCode() const; + /** Get the State code. */ - std::string getStateCode() const { + const std::string& getStateCode() const { return _stateCode; } /** Get the Country code. */ - std::string getCountryCode() const { + const std::string& getCountryCode() const { return _countryCode; } /** Get the Region code. */ - std::string getRegionCode() const { + const std::string& getRegionCode() const { return _regionCode; } /** Get the Continent code. */ - std::string getContinentCode() const { + const std::string& getContinentCode() const { return _continentCode; } /** Get the Time-zone group. */ - std::string getTimeZoneGroup() const { + const std::string& getTimeZoneGroup() const { return _timeZoneGroup; } @@ -83,6 +85,14 @@ return _nameMatrix; } + /** Get, for a given language (code), the corresponding list of names. + @param const Language::EN_Language& Language code. + @param NameList_T& Empty list of names, which will be filled by the + method if a list exists for that language code. + @return bool Whether or not such a list exists for the given + language. */ + bool getNameList (const Language::EN_Language&, NameList_T&) const; + // ///////// Setters //////// /** Set the Place code. */ @@ -146,6 +156,14 @@ public: + // /////////// Business methods ///////// + /** Create a Location structure, which is a light copy + of the Place object. That (Location) structure is passed + back to the caller of the service. */ + Location createLocation() const; + + + public: // ///////// Display methods //////// /** Dump a Business Object into an output stream. @param ostream& the output stream. */ @@ -163,17 +181,17 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; /** Display the full Place context. */ - const std::string display() const; + std::string display() const; /** Display a short Place context. */ - const std::string shortDisplay() const; + std::string shortDisplay() const; private: Modified: trunk/opentrep/opentrep/bom/PlaceHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/PlaceHolder.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -27,13 +27,13 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string PlaceHolder::describeShortKey() const { + std::string PlaceHolder::describeShortKey() const { std::ostringstream oStr; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string PlaceHolder::describeKey() const { + std::string PlaceHolder::describeKey() const { return describeShortKey(); } @@ -78,4 +78,18 @@ void PlaceHolder::fromStream (std::istream& ioIn) { } + // ////////////////////////////////////////////////////////////////////// + void PlaceHolder::createLocations (LocationList_T& ioLocationList) const { + + for (PlaceOrderedList_T::const_iterator itPlace = _placeOrderedList.begin(); + itPlace != _placeOrderedList.end(); ++itPlace) { + const Place* lPlace_ptr = *itPlace; + assert (lPlace_ptr != NULL); + + const Location& lLocation = lPlace_ptr->createLocation(); + ioLocationList.push_back (lLocation); + } + } + + } Modified: trunk/opentrep/opentrep/bom/PlaceHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceHolder.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/PlaceHolder.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -6,6 +6,7 @@ // ////////////////////////////////////////////////////////////////////// // OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/PlaceList.hpp> @@ -27,8 +28,12 @@ public: // /////////// Business methods ///////// - + /** Create the list of Location structures, which are light copies + of the Place objects. Those (Location) structures are passed + back to the caller of the service. */ + void createLocations (LocationList_T&) const; + public: // /////////// Display support methods ///////// /** Dump a Business Object into an output stream. @@ -47,11 +52,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; private: Modified: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -27,14 +27,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Result::describeShortKey() const { + std::string Result::describeShortKey() const { std::ostringstream oStr; oStr << _queryString; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Result::describeKey() const { + std::string Result::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -71,11 +71,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; private: Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -31,14 +31,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string ResultHolder::describeShortKey() const { + std::string ResultHolder::describeShortKey() const { std::ostringstream oStr; oStr << _queryString; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string ResultHolder::describeKey() const { + std::string ResultHolder::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -65,11 +65,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; private: Modified: trunk/opentrep/opentrep/bom/World.cpp =================================================================== --- trunk/opentrep/opentrep/bom/World.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/World.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -25,13 +25,13 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string World::describeShortKey() const { + std::string World::describeShortKey() const { std::ostringstream oStr; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string World::describeKey() const { + std::string World::describeKey() const { return describeShortKey(); } @@ -50,14 +50,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string World::shortDisplay() const { + std::string World::shortDisplay() const { std::ostringstream oStr; oStr << describeKey() << " one world " << std::endl; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string World::display() const { + std::string World::display() const { std::ostringstream oStr; oStr << shortDisplay(); Modified: trunk/opentrep/opentrep/bom/World.hpp =================================================================== --- trunk/opentrep/opentrep/bom/World.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/World.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -50,17 +50,17 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; /** Display the full World context. */ - const std::string display() const; + std::string display() const; /** Display a short World context. */ - const std::string shortDisplay() const; + std::string shortDisplay() const; /** Retrieve a generic BOM object from the dedicated list. */ GenericBom_T getGenericBom (const XapianDocID_T& iDocID) const; Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -1,6 +1,8 @@ // ////////////////////////////////////////////////////////////////////// // Import section // ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> // STL #include <istream> #include <ostream> @@ -23,63 +25,47 @@ #include <xapian.h> namespace OPENTREP { - + // ////////////////////////////////////////////////////////////////////// - void RequestInterpreter:: - interpretTravelRequest (soci::session& ioSociSession, - const TravelDatabaseName_T& iTravelDatabaseName, - const TravelQuery_T& iTravelQuery) { + void createResults (const DocumentList_T& iDocumentList, + const Xapian::Database& iXapianDatabase, + ResultHolder& ioResultHolder) { - try { - - // Make the database - Xapian::Database lXapianDatabase (iTravelDatabaseName); - - // Create a ResultHolder object - ResultHolder& lResultHolder = - FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); - - // DEBUG - OPENTREP_LOG_DEBUG (std::endl - << "========================================="); + // Back-up the (retrieved) matching Xapian documents into still + // to-be-created Result objects. + for (DocumentList_T::const_iterator itDoc = iDocumentList.begin(); + itDoc != iDocumentList.end(); ++itDoc) { + // Retrieve both the Xapian document object and the corresponding + // matching percentage (most of the time, it is 100%) + const Document& lMatchingDocument = *itDoc; - // Main algorithm - DocumentList_T lDocumentList; - lResultHolder.searchString (lDocumentList); - - // Back-up the (retrieved) matching Xapian documents into still - // to-be-created Result objects. - for (DocumentList_T::const_iterator itDoc = lDocumentList.begin(); - itDoc != lDocumentList.end(); ++itDoc) { - // Retrieve both the Xapian document object and the corresponding - // matching percentage (most of the time, it is 100%) - const Document& lMatchingDocument = *itDoc; - - // Create a Result object - Result& lResult = FacResult::instance().create (lXapianDatabase); - - // Fill the Result object with both the corresponding Document object - // and its associated query string - lResult.setMatchingDocument (lMatchingDocument); - - // Add the Result object (holding the list of matching - // documents) to the dedicated list. - FacResultHolder::initLinkWithResult (lResultHolder, lResult); - } - - // DEBUG - OPENTREP_LOG_DEBUG (std::endl - << "=========================================" - << std::endl << "Matching list: " << std::endl - << lResultHolder.toString() - << "=========================================" - << std::endl << std::endl); - - // Create a PlaceHolder object, to collect the matching Place objects - PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create(); + // Create a Result object + Result& lResult = FacResult::instance().create (iXapianDatabase); + + // Fill the Result object with both the corresponding Document object + // and its associated query string + lResult.setMatchingDocument (lMatchingDocument); + + // Add the Result object (holding the list of matching + // documents) to the dedicated list. + FacResultHolder::initLinkWithResult (ioResultHolder, lResult); + } + // DEBUG + OPENTREP_LOG_DEBUG (std::endl + << "=========================================" + << std::endl << "Matching list: " << std::endl + << ioResultHolder.toString() + << "=========================================" + << std::endl << std::endl); + } + + // ////////////////////////////////////////////////////////////////////// + void createPlaces (const ResultHolder& iResultHolder, + soci::session& ioSociSession, PlaceHolder& ioPlaceHolder) { + // Browse the list of result objects - const ResultList_T& lResultList = lResultHolder.getResultList(); + const ResultList_T& lResultList = iResultHolder.getResultList(); for (ResultList_T::const_iterator itResult = lResultList.begin(); itResult != lResultList.end(); ++itResult) { // Retrieve the result object @@ -108,7 +94,7 @@ if (hasRetrievedPlace == true) { // Insert the Place object within the PlaceHolder object - FacPlaceHolder::initLinkWithPlace (lPlaceHolder, lPlace); + FacPlaceHolder::initLinkWithPlace (ioPlaceHolder, lPlace); // DEBUG OPENTREP_LOG_DEBUG ("Retrieved Document: " << lPlace.toString()); @@ -118,18 +104,63 @@ OPENTREP_LOG_DEBUG ("No retrieved Document for ID = " << lDocID); } } + } + + // ////////////////////////////////////////////////////////////////////// + NbOfMatches_T RequestInterpreter:: + interpretTravelRequest (soci::session& ioSociSession, + const TravelDatabaseName_T& iTravelDatabaseName, + const TravelQuery_T& iTravelQuery, + LocationList_T& ioLocationList) { + NbOfMatches_T oNbOfMatches = 0; + // Create a PlaceHolder object, to collect the matching Place objects + PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create(); + + try { + + // Make the database + Xapian::Database lXapianDatabase (iTravelDatabaseName); + + // Create a ResultHolder object + ResultHolder& lResultHolder = + FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); + // DEBUG - OPENTREP_LOG_NOTIFICATION (std::endl - << "=========================================" - << std::endl << "Summary:" << std::endl - << lPlaceHolder.toShortString() << std::endl - << "=========================================" - << std::endl); + OPENTREP_LOG_DEBUG (std::endl + << "========================================="); + // Main algorithm + DocumentList_T lDocumentList; + lResultHolder.searchString (lDocumentList); + + /** Create the list of Result objects corresponding to the list + of documents. */ + createResults (lDocumentList, lXapianDatabase, lResultHolder); + + /** Create the list of Place objects, for each of which a + look-up is made in the SQL database (e.g., MySQL or Oracle) + to retrieve complementary data. */ + createPlaces (lResultHolder, ioSociSession, lPlaceHolder); + } catch (const Xapian::Error& error) { OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } + + // DEBUG + OPENTREP_LOG_DEBUG (std::endl + << "=========================================" + << std::endl << "Summary:" << std::endl + << lPlaceHolder.toShortString() << std::endl + << "=========================================" + << std::endl); + + /** Create the list of Location structures, which are light copies + of the Place objects. Those (Location) structures are passed + back to the caller of the service. */ + lPlaceHolder.createLocations (ioLocationList); + + return oNbOfMatches; } } Modified: trunk/opentrep/opentrep/command/RequestInterpreter.hpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/command/RequestInterpreter.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -8,6 +8,7 @@ #include <string> // OpenTrep #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> // Forward declarations namespace soci { @@ -23,10 +24,19 @@ class RequestInterpreter { friend class OPENTREP_Service; private: - /** Interpret a search query. */ - static void interpretTravelRequest (soci::session&, - const TravelDatabaseName_T&, - const TravelQuery_T&); + /** Match the given string, thanks to a full-text search on the + underlying Xapian index (named "database"). + @param soci::session& SQL Database (e.g., MySQL, Oracle) session. + @param const TravelDatabaseName_T& Filepath to the Xapian database. + @param const std::string& (Travel-related) query string (e.g., + "sna francicso rio de janero lso angles reykyavki nce iev mow"). + @param LocationList_T& List of (geographical) locations, if any, + matching the given query string. + @return NbOfMatches_T Number of matches. */ + static NbOfMatches_T interpretTravelRequest (soci::session&, + const TravelDatabaseName_T&, + const TravelQuery_T&, + LocationList_T&); private: /** Constructors. */ Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -19,8 +19,9 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - OPENTREP_Service::OPENTREP_Service (std::ostream& ioLogStream, - const std::string& iXapianDatabaseFilepath) + OPENTREP_Service:: + OPENTREP_Service (std::ostream& ioLogStream, + const std::string& iXapianDatabaseFilepath) : _opentrepServiceContext (NULL) { init (ioLogStream, iXapianDatabaseFilepath); } @@ -43,6 +44,12 @@ } // ////////////////////////////////////////////////////////////////////// + void logInit (const LOG::EN_LogLevel iLogLevel, + std::ostream& ioLogOutputFile) { + Logger::instance().setLogParameters (iLogLevel, ioLogOutputFile); + } + + // ////////////////////////////////////////////////////////////////////// void OPENTREP_Service::init (std::ostream& ioLogStream, const std::string& iTravelDatabaseName) { // Set the log file @@ -65,12 +72,6 @@ } // ////////////////////////////////////////////////////////////////////// - void OPENTREP_Service::logInit (const LOG::EN_LogLevel iLogLevel, - std::ostream& ioLogOutputFile) { - Logger::instance().setLogParameters (iLogLevel, ioLogOutputFile); - } - - // ////////////////////////////////////////////////////////////////////// void OPENTREP_Service::finalise () { assert (_opentrepServiceContext != NULL); @@ -109,8 +110,9 @@ } // ////////////////////////////////////////////////////////////////////// - void OPENTREP_Service:: - interpretTravelRequest (const std::string& iTravelQuery) { + NbOfMatches_T OPENTREP_Service:: + interpretTravelRequest (const std::string& iTravelQuery, + LocationList_T& ioLocationList) { if (_opentrepServiceContext == NULL) { throw NonInitialisedServiceException(); } @@ -128,9 +130,10 @@ // Delegate the query execution to the dedicated command BasChronometer lRequestInterpreterChronometer; lRequestInterpreterChronometer.start(); - RequestInterpreter::interpretTravelRequest (lSociSession, - lTravelDatabaseName, - iTravelQuery); + const NbOfMatches_T nbOfMatches = + RequestInterpreter::interpretTravelRequest (lSociSession, + lTravelDatabaseName, + iTravelQuery, ioLocationList); const double lRequestInterpreterMeasure = lRequestInterpreterChronometer.elapsed(); @@ -138,6 +141,8 @@ OPENTREP_LOG_DEBUG ("Match query on Xapian database (index): " << lRequestInterpreterMeasure << " - " << lOPENTREP_ServiceContext.display()); + + return nbOfMatches; } } Modified: trunk/opentrep/opentrep/sources.mk =================================================================== --- trunk/opentrep/opentrep/sources.mk 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/sources.mk 2009-07-18 18:32:04 UTC (rev 138) @@ -1,4 +1,6 @@ service_h_sources = \ $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ + $(top_srcdir)/opentrep/OPENTREP_Abstract.hpp \ + $(top_srcdir)/opentrep/Location.hpp \ $(top_srcdir)/opentrep/OPENTREP_Service.hpp service_cc_sources = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-18 15:15:47
|
Revision: 137 http://opentrep.svn.sourceforge.net/opentrep/?rev=137&view=rev Author: denis_arnaud Date: 2009-07-18 15:15:40 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [Dev] Better debugging logs. Modified Paths: -------------- trunk/opentrep/opentrep/bom/DocumentList.hpp trunk/opentrep/opentrep/bom/PlaceList.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/bom/sources.mk trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp Added Paths: ----------- trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/Document.hpp Added: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -0,0 +1,59 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// OpenTREP +#include <opentrep/bom/Document.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + Document::Document () { + } + + // ////////////////////////////////////////////////////////////////////// + Document::Document (const Document& iDocument) + : _queryString (iDocument._queryString), + _document (iDocument._document), + _documentList (iDocument._documentList) { + } + + // ////////////////////////////////////////////////////////////////////// + Document::~Document () { + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Document::describeShortKey() const { + std::ostringstream oStr; + oStr << _queryString; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Document::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string Document::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + const Xapian::docid& lDocID = _document.get_docid(); + oStr << "Document ID " << lDocID << "\t" << _percentage + << "% [" << _document.get_data() << "]"; + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void Document::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void Document::fromStream (std::istream& ioIn) { + } + +} Added: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -0,0 +1,123 @@ +#ifndef __OPENTREP_BOM_DOCUMENT_HPP +#define __OPENTREP_BOM_DOCUMENT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <list> +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +// Xapian +#include <xapian.h> + +namespace OPENTREP { + + // //////////////// Type definitions ///////////////// + /** List of Xapian documents. */ + typedef std::list<Xapian::Document> XapianDocumentList_T; + + + // //////////////// Main Class ///////////////// + /** Structure wrapping a Xapian document having matched part of a + given query string. + <br>It is a structure, as it is aimed to be temporary, the time + a Result object be created with the corresponding content. */ + struct Document : public BomAbstract { + public: + // ////////////////// Getters //////////////// + /** Get the query string. */ + const TravelQuery_T& getTravelQuery() { + return _queryString; + } + + /** Get the matching Xapian document. */ + const Xapian::Document& getXapianDocument() const { + return _document; + } + + /** Get the matching percentage associated to the Xapian document. */ + const Xapian::percent& getXapianPercentage() const { + return _percentage; + } + + /** Get the extra list of matching Xapian documents. */ + const XapianDocumentList_T& getExtraDocumentList() const { + return _documentList; + } + + + // ////////////////// Setters //////////////// + void setQueryString (const TravelQuery_T& iQueryString) { + _queryString = iQueryString; + } + + /** Set the matching Xapian document. */ + void setXapianDocument (const Xapian::Document& iMatchingDocument) { + _document = iMatchingDocument; + } + + /** Set the matching percentage associated to the Xapian document. */ + void setXapianPercentage (const Xapian::percent& iPercentage) { + _percentage = iPercentage; + } + + /** Add a matching Xapian document (having the same matching percentage). */ + void addExtraDocument (const Xapian::Document& iMatchingDocument) { + _documentList.push_back (iMatchingDocument); + } + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + public: + // //////////////// Constructors and Destructors ///////////// + /** Default constructor. */ + Document (); + /** Default copy constructor. */ + Document (const Document&); + /** Default destructor. */ + ~Document (); + + + private: + // ///////////////// Attributes ////////////////// + /** Query string with which a Xapian full text search is done. */ + TravelQuery_T _queryString; + + /** Matching percentage, as returned by the Xapian full text search. + <br>Generally, that percentage is equal to, or close to, 100%. */ + Xapian::percent _percentage; + + /** Matching document, as returned by the Xapian full text search. */ + Xapian::Document _document; + + /** List of Xapian documents having the same matching percentage. + <br>Hence, any of those other Xapian documents could have been + chosen, instead of the main one. */ + XapianDocumentList_T _documentList; + }; + +} +#endif // __OPENTREP_BOM_DOCUMENT_HPP Modified: trunk/opentrep/opentrep/bom/DocumentList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -8,20 +8,14 @@ #include <list> // OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/Document.hpp> // Xapian #include <xapian.h> namespace OPENTREP { - /** Xapian document and its associated matching percentage. */ - typedef std::pair<Xapian::percent, Xapian::Document> MatchingDocument_T; - - /** A matching Xapian document, along with the query string which it - matches. */ - typedef std::pair<TravelQuery_T, MatchingDocument_T> QueryAndDocument_T; - /** List of matching Xapian documents. */ - typedef std::list<QueryAndDocument_T> DocumentList_T; + typedef std::list<Document> DocumentList_T; } #endif // __OPENTREP_BOM_DOCUMENTLIST_HPP Modified: trunk/opentrep/opentrep/bom/PlaceList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceList.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/PlaceList.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -14,9 +14,10 @@ class Place; // ///////////// Type definitions //////////////////// - typedef std::size_t PlaceID_T; + // typedef std::size_t PlaceID_T; // typedef std::map<PlaceID_T, Place*> PlaceDirectList_T; - typedef std::map<std::string, Place*> PlaceList_T; + + typedef std::multimap<std::string, Place*> PlaceList_T; typedef std::list<Place*> PlaceOrderedList_T; } Modified: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -41,14 +41,7 @@ // ////////////////////////////////////////////////////////////////////// std::string Result::toString() const { std::ostringstream oStr; - oStr << describeShortKey() << std::endl; - - const Xapian::percent& lPercentage = _matchingDocument.first; - const Xapian::Document& lDocument = _matchingDocument.second; - const Xapian::docid& lDocID = lDocument.get_docid(); - oStr << "Document ID " << lDocID << "\t" << lPercentage - << "% [" << lDocument.get_data() << "]" << std::endl; - + oStr << _matchingDocument.toString(); return oStr.str(); } Modified: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -7,7 +7,7 @@ // OpenTREP #include <opentrep/OPENTREP_Types.hpp> #include <opentrep/bom/BomAbstract.hpp> -#include <opentrep/bom/DocumentList.hpp> +#include <opentrep/bom/Document.hpp> namespace OPENTREP { @@ -27,19 +27,19 @@ /** Get the Matching Xapian document object, along with its corresponding matching percentage. */ - const MatchingDocument_T& getMatchingDocument() const { + const Document& getMatchingDocument() const { return _matchingDocument; } /** Retrieve the percentage corresponding to the matching Xapian document object. */ - const Xapian::percent& getPercentage() const { - return _matchingDocument.first; + const Xapian::percent& getXapianPercentage() const { + return _matchingDocument.getXapianPercentage(); } /** Retrieve the matching Xapian document object. */ - const Xapian::Document& getDocument() const { - return _matchingDocument.second; + const Xapian::Document& getXapianDocument() const { + return _matchingDocument.getXapianDocument(); } @@ -51,17 +51,10 @@ /** Set the matching Xapian document object and its corresponding matching percentage. */ - void setMatchingDocument (const MatchingDocument_T& iMatchingDocument) { + void setMatchingDocument (const Document& iMatchingDocument) { _matchingDocument = iMatchingDocument; } - /** Set the matching Xapian document object and its corresponding - matching percentage. */ - void setQueryAndDocument (const QueryAndDocument_T& iQueryAndDocument) { - _queryString = iQueryAndDocument.first; - _matchingDocument = iQueryAndDocument.second; - } - public: // /////////// Display support methods ///////// @@ -112,7 +105,7 @@ /** Matching Xapian document object, along with its corresponding matching percentage. */ - MatchingDocument_T _matchingDocument; + Document _matchingDocument; }; } Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -69,7 +69,7 @@ // ////////////////////////////////////////////////////////////////////// bool ResultHolder::searchString (TravelQuery_T& ioPartialQueryString, - MatchingDocument_T& ioMatchingDocument) { + Document& ioMatchingDocument) { bool oFoundDocument = false; // Catch any Xapian::Error exceptions thrown @@ -78,11 +78,8 @@ bool shouldStop = false; while (shouldStop == false) { // DEBUG - /* - OPENTREP_LOG_DEBUG (std::endl << "--------------------------------" - << std::endl << "Current query string: `" << ioPartialQueryString - << "'"); - */ + OPENTREP_LOG_DEBUG ("Current query string: `" + << ioPartialQueryString << "'"); // Retrieve the list of documents matching the query string Xapian::MSet lMatchingSet; @@ -125,11 +122,10 @@ bool shouldStop = false; while (shouldStop == false) { // DEBUG - /* - OPENTREP_LOG_DEBUG (std::endl - << "================================" << std::endl - << "Current query string: `" << lRemainingQueryString << "'"); - */ + OPENTREP_LOG_DEBUG ("---------------------") + OPENTREP_LOG_DEBUG ("Remaining part of the query string: `" + << lRemainingQueryString << "'"); + /** Search with the initial full string, then by removing a word if there was no result, then by removing another word if there was @@ -143,14 +139,17 @@ furthest right words, so that the remaining left part be matched against the Xapian database). */ - MatchingDocument_T lMatchingDocument; + Document lMatchingDocument; const bool hasFoundDocument = searchString (lQueryString, lMatchingDocument); if (hasFoundDocument == true) { - const QueryAndDocument_T lQueryAndDocument (lQueryString, - lMatchingDocument); - ioDocumentList.push_back (lQueryAndDocument); + lMatchingDocument.setQueryString (lQueryString); + ioDocumentList.push_back (lMatchingDocument); + + // DEBUG + OPENTREP_LOG_DEBUG ("==> Matching of the query string: `" + << lQueryString << "'"); } /** Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -47,7 +47,7 @@ @param TravelQuery_T& The partial query string. @param MatchingDocument_T& The best matching Xapian document (if found). @return bool Whether such a best matching document has been found. */ - bool searchString(TravelQuery_T& ioPartialQueryString, MatchingDocument_T&); + bool searchString (TravelQuery_T& ioPartialQueryString, Document&); public: Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -4,7 +4,8 @@ // C #include <cassert> // STL -#include <iostream> +#include <istream> +#include <ostream> #include <sstream> #include <string> #include <list> @@ -73,7 +74,7 @@ } } catch (const Xapian::Error& error) { - std::cerr << "Exception: " << error.get_msg() << std::endl; + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } } @@ -267,14 +268,14 @@ */ } catch (const Xapian::Error& error) { - std::cerr << "Exception: " << error.get_msg() << std::endl; + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } } // ////////////////////////////////////////////////////////////////////// bool StringMatcher:: extractBestMatchingDocumentFromMSet (const Xapian::MSet& iMatchingSet, - MatchingDocument_T& ioMatchingDocument) { + Document& ioMatchingDocument) { bool oFoundDocument = false; if (iMatchingSet.empty() == true) { @@ -290,9 +291,23 @@ same: it appears random). */ Xapian::MSetIterator itDoc = iMatchingSet.begin(); - ioMatchingDocument.first = itDoc.get_percent(); - ioMatchingDocument.second = itDoc.get_document(); + const Xapian::percent& lBestPercentage = itDoc.get_percent(); + ioMatchingDocument.setXapianPercentage (lBestPercentage); + ioMatchingDocument.setXapianDocument (itDoc.get_document()); + /** Add all the Xapian documents having reached the same matching + percentage. */ + for ( ; itDoc != iMatchingSet.end(); ++itDoc) { + const Xapian::percent& lPercentage = itDoc.get_percent(); + + if (lPercentage == lBestPercentage) { + ioMatchingDocument.addExtraDocument (itDoc.get_document()); + + } else { + break; + } + } + return oFoundDocument; } Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -9,7 +9,7 @@ // OpenTREP #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/WordList.hpp> -#include <opentrep/bom/DocumentList.hpp> +#include <opentrep/bom/Document.hpp> // Forward declarations namespace Xapian { @@ -38,8 +38,7 @@ @return bool Whether or not there was a matching document. */ static bool - extractBestMatchingDocumentFromMSet (const Xapian::MSet&, - MatchingDocument_T&); + extractBestMatchingDocumentFromMSet (const Xapian::MSet&, Document&); /** Remove the word furthest at right. */ static void removeOneWord (std::string& ioQueryString); Modified: trunk/opentrep/opentrep/bom/sources.mk =================================================================== --- trunk/opentrep/opentrep/bom/sources.mk 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/sources.mk 2009-07-18 15:15:40 UTC (rev 137) @@ -9,6 +9,7 @@ $(top_srcdir)/opentrep/bom/Place.hpp \ $(top_srcdir)/opentrep/bom/PlaceList.hpp \ $(top_srcdir)/opentrep/bom/PlaceHolder.hpp \ + $(top_srcdir)/opentrep/bom/Document.hpp \ $(top_srcdir)/opentrep/bom/DocumentList.hpp \ $(top_srcdir)/opentrep/bom/Result.hpp \ $(top_srcdir)/opentrep/bom/ResultList.hpp \ @@ -22,6 +23,7 @@ $(top_srcdir)/opentrep/bom/Names.cpp \ $(top_srcdir)/opentrep/bom/Place.cpp \ $(top_srcdir)/opentrep/bom/PlaceHolder.cpp \ + $(top_srcdir)/opentrep/bom/Document.cpp \ $(top_srcdir)/opentrep/bom/Result.cpp \ $(top_srcdir)/opentrep/bom/ResultHolder.cpp \ $(top_srcdir)/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -39,6 +39,10 @@ ResultHolder& lResultHolder = FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); + // DEBUG + OPENTREP_LOG_DEBUG (std::endl + << "========================================="); + // Main algorithm DocumentList_T lDocumentList; lResultHolder.searchString (lDocumentList); @@ -49,14 +53,14 @@ itDoc != lDocumentList.end(); ++itDoc) { // Retrieve both the Xapian document object and the corresponding // matching percentage (most of the time, it is 100%) - const QueryAndDocument_T& lQueryAndDocument = *itDoc; + const Document& lMatchingDocument = *itDoc; // Create a Result object Result& lResult = FacResult::instance().create (lXapianDatabase); // Fill the Result object with both the corresponding Document object // and its associated query string - lResult.setQueryAndDocument (lQueryAndDocument); + lResult.setMatchingDocument (lMatchingDocument); // Add the Result object (holding the list of matching // documents) to the dedicated list. @@ -83,8 +87,9 @@ assert (lResult_ptr != NULL); // Retrieve the parameters of the best matching document - const Xapian::Document& lDocument = lResult_ptr->getDocument(); - const Xapian::percent& lDocPercentage = lResult_ptr->getPercentage(); + const Xapian::Document& lDocument = lResult_ptr->getXapianDocument(); + const Xapian::percent& lDocPercentage = + lResult_ptr->getXapianPercentage(); const Xapian::docid& lDocID = lDocument.get_docid(); const std::string& lDocData = lDocument.get_data(); Modified: trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -59,15 +59,16 @@ ioPlace._placeHolder = &ioPlaceHolder; // Add the Place to the PlaceHolder internal map (of Place objects) - const bool insertSucceeded = ioPlaceHolder._placeList. - insert (PlaceList_T::value_type (ioPlace.describeShortKey(), - &ioPlace)).second; - if (insertSucceeded == false) { - OPENTREP_LOG_ERROR ("Insertion failed for " - << ioPlaceHolder.describeKey() - << " and " << ioPlace.describeShortKey()); - assert (insertSucceeded == true); - } + // const bool insertSucceeded = + ioPlaceHolder._placeList. + insert (PlaceList_T::value_type (ioPlace.describeShortKey(), &ioPlace)); + +// if (insertSucceeded == false) { +// OPENTREP_LOG_ERROR ("Insertion failed for " +// << ioPlaceHolder.describeKey() +// << " and " << ioPlace.describeShortKey()); +// assert (insertSucceeded == true); +// } // Add the Place to the PlaceHolder internal list (of Place objects) ioPlaceHolder._placeOrderedList.push_back (&ioPlace); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-18 09:33:57
|
Revision: 136 http://opentrep.svn.sourceforge.net/opentrep/?rev=136&view=rev Author: denis_arnaud Date: 2009-07-18 09:33:50 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [DB] Fixed the issue with location of the CSV files. Modified Paths: -------------- trunk/opentrep/db/data/ref_place_names.csv trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/db/data/ref_place_names.csv =================================================================== --- trunk/opentrep/db/data/ref_place_names.csv 2009-07-18 00:50:00 UTC (rev 135) +++ trunk/opentrep/db/data/ref_place_names.csv 2009-07-18 09:33:50 UTC (rev 136) @@ -5057,7 +5057,7 @@ en,reg,reggio calabria,reggio calabria,reggio calabria/it:t menniti en,reh,rehoboth beach,rehoboth beach,rehoboth beach/de/us en,rei,regina,regina,regina/gf -en,rek,reykjavik,reykjavik,reykjavik/is +en,rek,reykjavik,reykjavik,reykjavik/is,reykjavik main,reykjavik city en,rel,trelew,trelew,trelew/cb/ar en,ren,orenburg,orenburg,orenburg/ru en,reo,rome,rome,rome/or/us:state Modified: trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh =================================================================== --- trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh 2009-07-18 00:50:00 UTC (rev 135) +++ trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh 2009-07-18 09:33:50 UTC (rev 136) @@ -39,20 +39,34 @@ # Database Name DB_NAME="opentrep" +# Check file existence +function checkSQLFile() { + if [ ! -r ${SQL_FILE} ]; then + echo + echo "The ${SQL_FILE} SQL file can not be found" + echo + exit -1; + fi +} + # Create the database function createDatabase() { + checkSQLFile echo "The '${DB_NAME}' database will be created:" mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} mysql < ${SQL_FILE} } # Scan a SQL script for the names of (database) tables function createTable() { + checkSQLFile echo "The ref_place_details and ref_place_names tables will be created:" mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_FILE} } # function loadData() { + SQL_FILE=${SQL_LOADER_FILE} + checkSQLFile echo "The ref_place_details and ref_place_names tables will be filled from ../data/*.csv files:" mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_LOADER_FILE} echo "Done" @@ -74,15 +88,15 @@ } # Database -SQL_FILE="../data_structure/ref_db.sql" +SQL_FILE="tables/ref_db.sql" createDatabase # Table: Airport and City -SQL_FILE="../data_structure/ref_city.sql" +SQL_FILE="tables/ref_city.sql" createTable # Load data into the table -SQL_LOADER_FILE="create_and_fill_mysql_db.sql" +SQL_LOADER_FILE="tables/create_and_fill_mysql_db.sql" loadData # Trim the spaces from the state_code field of the ref_place_details table Modified: trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql =================================================================== --- trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql 2009-07-18 00:50:00 UTC (rev 135) +++ trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql 2009-07-18 09:33:50 UTC (rev 136) @@ -1,3 +1,8 @@ +-- +-- Note: that file is expected to be launched from the +-- $(top_srcdir)/db/maintenance sub-directory, as the CSV files are +-- to be found in $(top_srcdir)/db/data sub-directory +-- -- -- Load the Airport and City geographical details into the MySQL table Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 00:50:00 UTC (rev 135) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 09:33:50 UTC (rev 136) @@ -28,7 +28,7 @@ const std::string K_OPENTREP_DEFAULT_DATABSE_FILEPATH("/tmp/opentrep/traveldb"); /** Default travel query string, to be seached against the Xapian database. */ -const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso anglese reykyavki"); +const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso angles reykyavki"); /** Default error distance for spelling corrections. */ const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE = 3; Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 00:50:00 UTC (rev 135) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 09:33:50 UTC (rev 136) @@ -34,7 +34,7 @@ const EditDistance_T lQueryStringSize = iPhrase.size(); - oEditDistance = lQueryStringSize / 3; + oEditDistance = lQueryStringSize / 4; return oEditDistance; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-18 00:50:11
|
Revision: 135 http://opentrep.svn.sourceforge.net/opentrep/?rev=135&view=rev Author: denis_arnaud Date: 2009-07-18 00:50:00 +0000 (Sat, 18 Jul 2009) Log Message: ----------- 1. Re-worked the searcher, so that the Result objects be created by the command (and no longer by the BOM itself). 2. Added building makefiles for database handling. Modified Paths: -------------- trunk/opentrep/Makefile.am trunk/opentrep/configure.ac trunk/opentrep/opentrep/OPENTREP_Types.hpp trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/DocumentList.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/command/RequestInterpreter.hpp trunk/opentrep/opentrep/factory/FacResult.cpp trunk/opentrep/opentrep/factory/FacResult.hpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp Added Paths: ----------- trunk/opentrep/db/ trunk/opentrep/db/Makefile.am trunk/opentrep/db/admin/ trunk/opentrep/db/admin/Makefile.am trunk/opentrep/db/admin/create_opentrep_user.sh trunk/opentrep/db/admin/create_opentrep_user.sql trunk/opentrep/db/data/Makefile.am trunk/opentrep/db/data/sources.mk trunk/opentrep/db/maintenance/ trunk/opentrep/db/maintenance/Makefile.am trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh trunk/opentrep/db/maintenance/drop_tables_from_mysql_db.sh trunk/opentrep/db/maintenance/tables/ trunk/opentrep/db/maintenance/tables/Makefile.am trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql trunk/opentrep/db/maintenance/tables/ref_city.sql trunk/opentrep/db/maintenance/tables/ref_db.sql trunk/opentrep/db/maintenance/tables/sources.mk Removed Paths: ------------- trunk/opentrep/db/data_structure/ trunk/opentrep/db/maintenance/ref_city.sql trunk/opentrep/db/maintenance/ref_db.sql trunk/opentrep/db/mysql/create_and_fill_mysql_db.sh trunk/opentrep/db/mysql/create_and_fill_mysql_db.sql trunk/opentrep/db/mysql/create_opentrep_user.sh trunk/opentrep/db/mysql/create_opentrep_user.sql trunk/opentrep/db/mysql/drop_tables_from_mysql_db.sh trunk/opentrep/refdata/ Property Changed: ---------------- trunk/opentrep/db/data/ trunk/opentrep/opentrep/batches/ Modified: trunk/opentrep/Makefile.am =================================================================== --- trunk/opentrep/Makefile.am 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/Makefile.am 2009-07-18 00:50:00 UTC (rev 135) @@ -24,7 +24,7 @@ EXTRA_DIST = @PACKAGE@.spec @PACKAGE@.m4 @PACKAGE@.pc Makefile.common # Build in these directories: -SUBDIRS = opentrep win32 po man $(INFO_DOC_DIR) $(HTML_DOC_DIR) $(TEST_DIR) +SUBDIRS = opentrep win32 po man $(INFO_DOC_DIR) $(HTML_DOC_DIR) db $(TEST_DIR) # Configuration helpers Modified: trunk/opentrep/configure.ac =================================================================== --- trunk/opentrep/configure.ac 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/configure.ac 2009-07-18 00:50:00 UTC (rev 135) @@ -233,6 +233,11 @@ doc/doxygen_html.cfg doc/sourceforge/howto_release_opentrep.html po/Makefile.in + db/Makefile + db/admin/Makefile + db/maintenance/Makefile + db/maintenance/tables/Makefile + db/data/Makefile test/com/Makefile test/parsers/Makefile test/Makefile Property changes on: trunk/opentrep/db ___________________________________________________________________ Added: svn:ignore + Makefile Makefile.in Added: trunk/opentrep/db/Makefile.am =================================================================== --- trunk/opentrep/db/Makefile.am (rev 0) +++ trunk/opentrep/db/Makefile.am 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,6 @@ +## db sub-directory +include $(top_srcdir)/Makefile.common + +MAINTAINERCLEANFILES = Makefile.in + +SUBDIRS = admin data maintenance Property changes on: trunk/opentrep/db/admin ___________________________________________________________________ Added: svn:ignore + Makefile Makefile.in Added: trunk/opentrep/db/admin/Makefile.am =================================================================== --- trunk/opentrep/db/admin/Makefile.am (rev 0) +++ trunk/opentrep/db/admin/Makefile.am 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,8 @@ +## db sub-directory +include $(top_srcdir)/Makefile.common + +MAINTAINERCLEANFILES = Makefile.in + +MYSQL_ADMIN_FILES = create_opentrep_user.sh create_opentrep_user.sql + +EXTRA_DIST = $(MYSQL_ADMIN_FILES) Copied: trunk/opentrep/db/admin/create_opentrep_user.sh (from rev 134, trunk/opentrep/refdata/mysql/create_opentrep_user.sh) =================================================================== --- trunk/opentrep/db/admin/create_opentrep_user.sh (rev 0) +++ trunk/opentrep/db/admin/create_opentrep_user.sh 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,52 @@ +#!/bin/sh +# +# Two parameters are required for this script: +# - the administrator username +# - the administrator password +# +# Two parameters are optional: +# - the host server of the database +# - the port of the database +# + +if [ "$1" = "" -o "$2" = "" -o "$1" = "-h" -o "$1" = "--help" ]; +then + echo "Usage: $0 <Admin Username> <Admin password> [<Database Server Hostname> [<Database Server Port>]]" + echo "" + exit -1 +fi + +## +# Database Server Hostname +DB_HOST="localhost" +if [ "$3" != "" ]; +then + DB_HOST="$3" +fi + +# Database Server Port +DB_PORT="3306" +if [ "$4" != "" ]; +then + DB_PORT="$4" +fi + +# Database User +DB_USER="$1" + +# Database Password +DB_PASSWD="$2" + +# Database Name +DB_NAME="mysql" + +function createOpenTrepUser() { + echo "Creating the opentrep user within the database:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_FILE} + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} -e "flush privileges" +} + +# Creating the opentrep user +SQL_FILE="create_opentrep_user.sql" +createOpenTrepUser + Copied: trunk/opentrep/db/admin/create_opentrep_user.sql (from rev 134, trunk/opentrep/refdata/mysql/create_opentrep_user.sql) =================================================================== --- trunk/opentrep/db/admin/create_opentrep_user.sql (rev 0) +++ trunk/opentrep/db/admin/create_opentrep_user.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,7 @@ + +insert into `user` (`Host`, `User`, `Password`, `Select_priv`, `Insert_priv`, `Update_priv`, `Delete_priv`, `Create_priv`, `Drop_priv`, `Reload_priv`, `Shutdown_priv`, `Process_priv`, `File_priv`, `Grant_priv`, `References_priv`, `Index_priv`, `Alter_priv`, `Show_db_priv`, `Super_priv`, `Create_tmp_table_priv`, `Lock_tables_priv`, `Execute_priv`, `Repl_slave_priv`, `Repl_client_priv`, `Create_view_priv`, `Show_view_priv`, `Create_routine_priv`, `Alter_routine_priv`, `Create_user_priv`, `ssl_type`, `ssl_cipher`, `x509_issuer`, `x509_subject`, `max_questions`, `max_updates`, `max_connections`, `max_user_connections`) values +('%', 'opentrep', '*C21B5F0DB6BBABAA20B5496E75D652982A6AC65C', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'Y', 'Y', 'N', '', '', '', '', 0, 0, 0, 0), +('localhost', 'opentrep', '*C21B5F0DB6BBABAA20B5496E75D652982A6AC65C', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'Y', 'Y', 'N', '', '', '', '', 0, 0, 0, 0); + +flush privileges; + Property changes on: trunk/opentrep/db/data ___________________________________________________________________ Added: svn:ignore + Makefile Makefile.in Added: trunk/opentrep/db/data/Makefile.am =================================================================== --- trunk/opentrep/db/data/Makefile.am (rev 0) +++ trunk/opentrep/db/data/Makefile.am 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,26 @@ +## db/data sub-directory +include $(top_srcdir)/Makefile.common +include $(top_srcdir)/db/data/sources.mk + +datadir = @datadir@ +pkgdatadir = $(datadir)/@PACKAGE@ +dbscriptsdir = $(pkgdatadir)/db/mysql/fill_tables + +MAINTAINERCLEANFILES = Makefile.in Makefile + +noinst_DATA = $(data_mysql_sources) +EXTRA_DIST = $(noinst_DATA) + + +# Targets +install-data-local: + $(mkinstalldirs) $(DESTDIR)$(dbscriptsdir); \ + for f in $(data_mysql_sources); do \ + $(INSTALL_DATA) $$f $(DESTDIR)$(dbscriptsdir); \ + done + +uninstall-local: + rm -rf $(DESTDIR)$(dbscriptsdir) + +clean-local: + rm -rf *.log *.tag Added: trunk/opentrep/db/data/sources.mk =================================================================== --- trunk/opentrep/db/data/sources.mk (rev 0) +++ trunk/opentrep/db/data/sources.mk 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,4 @@ +data_mysql_sources = \ + $(top_srcdir)/db/data/ref_city.csv \ + $(top_srcdir)/db/data/ref_place_details.csv \ + $(top_srcdir)/db/data/ref_place_names.csv Property changes on: trunk/opentrep/db/maintenance ___________________________________________________________________ Added: svn:ignore + Makefile Makefile.in Added: trunk/opentrep/db/maintenance/Makefile.am =================================================================== --- trunk/opentrep/db/maintenance/Makefile.am (rev 0) +++ trunk/opentrep/db/maintenance/Makefile.am 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,10 @@ +## db/maintenance sub-directory +include $(top_srcdir)/Makefile.common + +MAINTAINERCLEANFILES = Makefile.in + +SUBDIRS = tables + +TABLE_MAINT_FILES = create_and_fill_mysql_db.sh drop_tables_from_mysql_db.sh + +EXTRA_DIST = $(TABLE_MAINT_FILES) Copied: trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh (from rev 134, trunk/opentrep/refdata/mysql/create_and_fill_mysql_db.sh) =================================================================== --- trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh (rev 0) +++ trunk/opentrep/db/maintenance/create_and_fill_mysql_db.sh 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,98 @@ +#!/bin/sh +# +# One parameter is required for this script: +# - the username +# +# Two parameters are optional: +# - the host server of the database +# - the port of the database +# + +if [ "$1" = "" -o "$1" = "-h" -o "$1" = "--help" ]; +then + echo "Usage: $0 <Database Username> [<Database Server Hostname> [<Database Server Port>]]" + echo "" + exit -1 +fi + +## +# Database Server Hostname +DB_HOST="localhost" +if [ "$2" != "" ]; +then + DB_HOST="$2" +fi + +# Database Server Port +DB_PORT="3306" +if [ "$3" != "" ]; +then + DB_PORT="$3" +fi + +# Database User +DB_USER="$1" + +# Database Password +DB_PASSWD="${DB_USER}" + +# Database Name +DB_NAME="opentrep" + +# Create the database +function createDatabase() { + echo "The '${DB_NAME}' database will be created:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} mysql < ${SQL_FILE} +} + +# Scan a SQL script for the names of (database) tables +function createTable() { + echo "The ref_place_details and ref_place_names tables will be created:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_FILE} +} + +# +function loadData() { + echo "The ref_place_details and ref_place_names tables will be filled from ../data/*.csv files:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_LOADER_FILE} + echo "Done" +} + +# +function trimStateCode() { + echo "Triming the spaces from the state_code field of the ${TABLE} table:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set city_code=NULL where city_code='';" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set state_code=NULL where state_code like '%null%';" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set state_code=NULL where length(state_code)=2;" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set state_code=substring(state_code,2,2) where length(state_code)=4;" +} + +# +function countRows() { + echo "Counting the rows from the ${TABLE} table:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "select count(*) from ${TABLE};" +} + +# Database +SQL_FILE="../data_structure/ref_db.sql" +createDatabase + +# Table: Airport and City +SQL_FILE="../data_structure/ref_city.sql" +createTable + +# Load data into the table +SQL_LOADER_FILE="create_and_fill_mysql_db.sql" +loadData + +# Trim the spaces from the state_code field of the ref_place_details table +TABLE=ref_place_details +trimStateCode + +# Count the rows from the ref_place_details table +TABLE=ref_place_details +countRows + +# Count the rows from the ref_place_names table +TABLE=ref_place_names +countRows Copied: trunk/opentrep/db/maintenance/drop_tables_from_mysql_db.sh (from rev 134, trunk/opentrep/refdata/mysql/drop_tables_from_mysql_db.sh) =================================================================== --- trunk/opentrep/db/maintenance/drop_tables_from_mysql_db.sh (rev 0) +++ trunk/opentrep/db/maintenance/drop_tables_from_mysql_db.sh 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,54 @@ +#!/bin/sh +# +# One parameter is required for this script: +# - the username +# +# Two parameters are optional: +# - the host server of the database +# - the port of the database +# + +if [ "$1" = "" -o "$1" = "-h" -o "$1" = "--help" ]; +then + echo "Usage: $0 <Database Username> [<Database Server Hostname> [<Database Server Port>]]" + echo "" + exit -1 +fi + +## +# Database Server Hostname +DB_HOST="localhost" +if [ "$2" != "" ]; +then + DB_HOST="$2" +fi + +# Database Server Port +DB_PORT="3306" +if [ "$3" != "" ]; +then + DB_PORT="$3" +fi + +# Database User +DB_USER="$1" + +# Database Password +DB_PASSWD="${DB_USER}" + +# Database Name +DB_NAME="opentrep" + +# Drop a table +function dropTable() { + echo "The ${TABLE} table will be dropped:" + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "drop table ${DB_NAME}.${TABLE}" +} + +# Table: drop the ref_place_details table +TABLE=ref_place_details +dropTable + +# Table: drop the ref_place_names table +TABLE=ref_place_names +dropTable Deleted: trunk/opentrep/db/maintenance/ref_city.sql =================================================================== --- trunk/opentrep/refdata/data_structure/ref_city.sql 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/maintenance/ref_city.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -1,46 +0,0 @@ --- --- Place details --- Does not depend on language --- -create table if not exists ref_place_details ( - code char(3) collate utf8_unicode_ci not null, - city_code char(3) collate utf8_unicode_ci, - xapian_docid integer, - is_airport char(1) collate utf8_unicode_ci not null, - is_city char(1) collate utf8_unicode_ci not null, - is_main char(1) collate utf8_unicode_ci not null default 'N', - is_commercial char(1) collate utf8_unicode_ci not null, - state_code varchar(5) collate utf8_unicode_ci, - country_code char(2) collate utf8_unicode_ci not null, - region_code varchar(5) collate utf8_unicode_ci not null, - continent_code varchar(4) collate utf8_unicode_ci not null, - time_zone_grp varchar(5) collate utf8_unicode_ci not null, - longitude float(20), - latitude float(20), - primary key (code), - key `geographical codes`(city_code, continent_code, country_code, region_code, time_zone_grp) -) engine=myisam default charset=utf8 collate=utf8_unicode_ci; - --- --- Place names --- Depends on language --- -create table if not exists ref_place_names ( - language_code char(2) collate utf8_unicode_ci not null, - code char(3) collate utf8_unicode_ci not null, - classical_name varchar(30) collate utf8_unicode_ci not null, - classical_name2 varchar(50) collate utf8_unicode_ci not null, - extended_name varchar(100) collate utf8_unicode_ci not null, - alternate_name1 varchar(60) collate utf8_unicode_ci, - alternate_name2 varchar(60) collate utf8_unicode_ci, - alternate_name3 varchar(60) collate utf8_unicode_ci, - alternate_name4 varchar(60) collate utf8_unicode_ci, - alternate_name5 varchar(60) collate utf8_unicode_ci, - alternate_name6 varchar(60) collate utf8_unicode_ci, - alternate_name7 varchar(60) collate utf8_unicode_ci, - alternate_name8 varchar(60) collate utf8_unicode_ci, - alternate_name9 varchar(60) collate utf8_unicode_ci, - alternate_name10 varchar(60) collate utf8_unicode_ci, - primary key (language_code, code) -) engine=myisam default charset=utf8 collate=utf8_unicode_ci; - Deleted: trunk/opentrep/db/maintenance/ref_db.sql =================================================================== --- trunk/opentrep/refdata/data_structure/ref_db.sql 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/maintenance/ref_db.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -1,6 +0,0 @@ - --- --- Create the opentrep database --- -create database if not exists opentrep -default character set utf8 collate utf8_unicode_ci; Property changes on: trunk/opentrep/db/maintenance/tables ___________________________________________________________________ Added: svn:ignore + Makefile Makefile.in Added: trunk/opentrep/db/maintenance/tables/Makefile.am =================================================================== --- trunk/opentrep/db/maintenance/tables/Makefile.am (rev 0) +++ trunk/opentrep/db/maintenance/tables/Makefile.am 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,26 @@ +## db/maintenance/tables sub-directory +include $(top_srcdir)/Makefile.common +include $(top_srcdir)/db/maintenance/tables/sources.mk + +datadir = @datadir@ +pkgdatadir = $(datadir)/@PACKAGE@ +dbscriptsdir = $(pkgdatadir)/db/mysql/create_tables + +MAINTAINERCLEANFILES = Makefile.in Makefile + +noinst_DATA = $(dbscript_mysql_sources) +EXTRA_DIST = $(noinst_DATA) + + +# Targets +install-data-local: + $(mkinstalldirs) $(DESTDIR)$(dbscriptsdir); \ + for f in $(dbscript_mysql_sources); do \ + $(INSTALL_DATA) $$f $(DESTDIR)$(dbscriptsdir); \ + done + +uninstall-local: + rm -rf $(DESTDIR)$(dbscriptsdir) + +clean-local: + rm -rf *.log *.tag Copied: trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql (from rev 134, trunk/opentrep/refdata/mysql/create_and_fill_mysql_db.sql) =================================================================== --- trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql (rev 0) +++ trunk/opentrep/db/maintenance/tables/create_and_fill_mysql_db.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,16 @@ + +-- +-- Load the Airport and City geographical details into the MySQL table +-- +load data local infile '../data/ref_place_details.csv' ignore +into table ref_place_details +fields terminated by ',' enclosed by '' escaped by '\\' +ignore 1 lines; + +-- +-- Load the Airport and City names into the MySQL table +-- +load data local infile '../data/ref_place_names.csv' ignore +into table ref_place_names +fields terminated by ',' enclosed by '' escaped by '\\' +ignore 1 lines; Copied: trunk/opentrep/db/maintenance/tables/ref_city.sql (from rev 134, trunk/opentrep/refdata/data_structure/ref_city.sql) =================================================================== --- trunk/opentrep/db/maintenance/tables/ref_city.sql (rev 0) +++ trunk/opentrep/db/maintenance/tables/ref_city.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,46 @@ +-- +-- Place details +-- Does not depend on language +-- +create table if not exists ref_place_details ( + code char(3) collate utf8_unicode_ci not null, + city_code char(3) collate utf8_unicode_ci, + xapian_docid integer, + is_airport char(1) collate utf8_unicode_ci not null, + is_city char(1) collate utf8_unicode_ci not null, + is_main char(1) collate utf8_unicode_ci not null default 'N', + is_commercial char(1) collate utf8_unicode_ci not null, + state_code varchar(5) collate utf8_unicode_ci, + country_code char(2) collate utf8_unicode_ci not null, + region_code varchar(5) collate utf8_unicode_ci not null, + continent_code varchar(4) collate utf8_unicode_ci not null, + time_zone_grp varchar(5) collate utf8_unicode_ci not null, + longitude float(20), + latitude float(20), + primary key (code), + key `geographical codes`(city_code, continent_code, country_code, region_code, time_zone_grp) +) engine=myisam default charset=utf8 collate=utf8_unicode_ci; + +-- +-- Place names +-- Depends on language +-- +create table if not exists ref_place_names ( + language_code char(2) collate utf8_unicode_ci not null, + code char(3) collate utf8_unicode_ci not null, + classical_name varchar(30) collate utf8_unicode_ci not null, + classical_name2 varchar(50) collate utf8_unicode_ci not null, + extended_name varchar(100) collate utf8_unicode_ci not null, + alternate_name1 varchar(60) collate utf8_unicode_ci, + alternate_name2 varchar(60) collate utf8_unicode_ci, + alternate_name3 varchar(60) collate utf8_unicode_ci, + alternate_name4 varchar(60) collate utf8_unicode_ci, + alternate_name5 varchar(60) collate utf8_unicode_ci, + alternate_name6 varchar(60) collate utf8_unicode_ci, + alternate_name7 varchar(60) collate utf8_unicode_ci, + alternate_name8 varchar(60) collate utf8_unicode_ci, + alternate_name9 varchar(60) collate utf8_unicode_ci, + alternate_name10 varchar(60) collate utf8_unicode_ci, + primary key (language_code, code) +) engine=myisam default charset=utf8 collate=utf8_unicode_ci; + Copied: trunk/opentrep/db/maintenance/tables/ref_db.sql (from rev 134, trunk/opentrep/refdata/data_structure/ref_db.sql) =================================================================== --- trunk/opentrep/db/maintenance/tables/ref_db.sql (rev 0) +++ trunk/opentrep/db/maintenance/tables/ref_db.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,6 @@ + +-- +-- Create the opentrep database +-- +create database if not exists opentrep +default character set utf8 collate utf8_unicode_ci; Added: trunk/opentrep/db/maintenance/tables/sources.mk =================================================================== --- trunk/opentrep/db/maintenance/tables/sources.mk (rev 0) +++ trunk/opentrep/db/maintenance/tables/sources.mk 2009-07-18 00:50:00 UTC (rev 135) @@ -0,0 +1,4 @@ +dbscript_mysql_sources = \ + $(top_srcdir)/db/maintenance/tables/create_and_fill_mysql_db.sql \ + $(top_srcdir)/db/maintenance/tables/ref_db.sql \ + $(top_srcdir)/db/maintenance/tables/ref_city.sql Deleted: trunk/opentrep/db/mysql/create_and_fill_mysql_db.sh =================================================================== --- trunk/opentrep/refdata/mysql/create_and_fill_mysql_db.sh 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/mysql/create_and_fill_mysql_db.sh 2009-07-18 00:50:00 UTC (rev 135) @@ -1,98 +0,0 @@ -#!/bin/sh -# -# One parameter is required for this script: -# - the username -# -# Two parameters are optional: -# - the host server of the database -# - the port of the database -# - -if [ "$1" = "" -o "$1" = "-h" -o "$1" = "--help" ]; -then - echo "Usage: $0 <Database Username> [<Database Server Hostname> [<Database Server Port>]]" - echo "" - exit -1 -fi - -## -# Database Server Hostname -DB_HOST="localhost" -if [ "$2" != "" ]; -then - DB_HOST="$2" -fi - -# Database Server Port -DB_PORT="3306" -if [ "$3" != "" ]; -then - DB_PORT="$3" -fi - -# Database User -DB_USER="$1" - -# Database Password -DB_PASSWD="${DB_USER}" - -# Database Name -DB_NAME="opentrep" - -# Create the database -function createDatabase() { - echo "The '${DB_NAME}' database will be created:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} mysql < ${SQL_FILE} -} - -# Scan a SQL script for the names of (database) tables -function createTable() { - echo "The ref_place_details and ref_place_names tables will be created:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_FILE} -} - -# -function loadData() { - echo "The ref_place_details and ref_place_names tables will be filled from ../data/*.csv files:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_LOADER_FILE} - echo "Done" -} - -# -function trimStateCode() { - echo "Triming the spaces from the state_code field of the ${TABLE} table:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set city_code=NULL where city_code='';" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set state_code=NULL where state_code like '%null%';" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set state_code=NULL where length(state_code)=2;" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "update ${TABLE} set state_code=substring(state_code,2,2) where length(state_code)=4;" -} - -# -function countRows() { - echo "Counting the rows from the ${TABLE} table:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "select count(*) from ${TABLE};" -} - -# Database -SQL_FILE="../data_structure/ref_db.sql" -createDatabase - -# Table: Airport and City -SQL_FILE="../data_structure/ref_city.sql" -createTable - -# Load data into the table -SQL_LOADER_FILE="create_and_fill_mysql_db.sql" -loadData - -# Trim the spaces from the state_code field of the ref_place_details table -TABLE=ref_place_details -trimStateCode - -# Count the rows from the ref_place_details table -TABLE=ref_place_details -countRows - -# Count the rows from the ref_place_names table -TABLE=ref_place_names -countRows Deleted: trunk/opentrep/db/mysql/create_and_fill_mysql_db.sql =================================================================== --- trunk/opentrep/refdata/mysql/create_and_fill_mysql_db.sql 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/mysql/create_and_fill_mysql_db.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -1,16 +0,0 @@ - --- --- Load the Airport and City geographical details into the MySQL table --- -load data local infile '../data/ref_place_details.csv' ignore -into table ref_place_details -fields terminated by ',' enclosed by '' escaped by '\\' -ignore 1 lines; - --- --- Load the Airport and City names into the MySQL table --- -load data local infile '../data/ref_place_names.csv' ignore -into table ref_place_names -fields terminated by ',' enclosed by '' escaped by '\\' -ignore 1 lines; Deleted: trunk/opentrep/db/mysql/create_opentrep_user.sh =================================================================== --- trunk/opentrep/refdata/mysql/create_opentrep_user.sh 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/mysql/create_opentrep_user.sh 2009-07-18 00:50:00 UTC (rev 135) @@ -1,52 +0,0 @@ -#!/bin/sh -# -# Two parameters are required for this script: -# - the administrator username -# - the administrator password -# -# Two parameters are optional: -# - the host server of the database -# - the port of the database -# - -if [ "$1" = "" -o "$2" = "" -o "$1" = "-h" -o "$1" = "--help" ]; -then - echo "Usage: $0 <Admin Username> <Admin password> [<Database Server Hostname> [<Database Server Port>]]" - echo "" - exit -1 -fi - -## -# Database Server Hostname -DB_HOST="localhost" -if [ "$3" != "" ]; -then - DB_HOST="$3" -fi - -# Database Server Port -DB_PORT="3306" -if [ "$4" != "" ]; -then - DB_PORT="$4" -fi - -# Database User -DB_USER="$1" - -# Database Password -DB_PASSWD="$2" - -# Database Name -DB_NAME="mysql" - -function createOpenTrepUser() { - echo "Creating the opentrep user within the database:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_FILE} - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} -e "flush privileges" -} - -# Creating the opentrep user -SQL_FILE="create_opentrep_user.sql" -createOpenTrepUser - Deleted: trunk/opentrep/db/mysql/create_opentrep_user.sql =================================================================== --- trunk/opentrep/refdata/mysql/create_opentrep_user.sql 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/mysql/create_opentrep_user.sql 2009-07-18 00:50:00 UTC (rev 135) @@ -1,7 +0,0 @@ - -insert into `user` (`Host`, `User`, `Password`, `Select_priv`, `Insert_priv`, `Update_priv`, `Delete_priv`, `Create_priv`, `Drop_priv`, `Reload_priv`, `Shutdown_priv`, `Process_priv`, `File_priv`, `Grant_priv`, `References_priv`, `Index_priv`, `Alter_priv`, `Show_db_priv`, `Super_priv`, `Create_tmp_table_priv`, `Lock_tables_priv`, `Execute_priv`, `Repl_slave_priv`, `Repl_client_priv`, `Create_view_priv`, `Show_view_priv`, `Create_routine_priv`, `Alter_routine_priv`, `Create_user_priv`, `ssl_type`, `ssl_cipher`, `x509_issuer`, `x509_subject`, `max_questions`, `max_updates`, `max_connections`, `max_user_connections`) values -('%', 'opentrep', '*C21B5F0DB6BBABAA20B5496E75D652982A6AC65C', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'Y', 'Y', 'N', '', '', '', '', 0, 0, 0, 0), -('localhost', 'opentrep', '*C21B5F0DB6BBABAA20B5496E75D652982A6AC65C', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'Y', 'Y', 'N', '', '', '', '', 0, 0, 0, 0); - -flush privileges; - Deleted: trunk/opentrep/db/mysql/drop_tables_from_mysql_db.sh =================================================================== --- trunk/opentrep/refdata/mysql/drop_tables_from_mysql_db.sh 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/db/mysql/drop_tables_from_mysql_db.sh 2009-07-18 00:50:00 UTC (rev 135) @@ -1,54 +0,0 @@ -#!/bin/sh -# -# One parameter is required for this script: -# - the username -# -# Two parameters are optional: -# - the host server of the database -# - the port of the database -# - -if [ "$1" = "" -o "$1" = "-h" -o "$1" = "--help" ]; -then - echo "Usage: $0 <Database Username> [<Database Server Hostname> [<Database Server Port>]]" - echo "" - exit -1 -fi - -## -# Database Server Hostname -DB_HOST="localhost" -if [ "$2" != "" ]; -then - DB_HOST="$2" -fi - -# Database Server Port -DB_PORT="3306" -if [ "$3" != "" ]; -then - DB_PORT="$3" -fi - -# Database User -DB_USER="$1" - -# Database Password -DB_PASSWD="${DB_USER}" - -# Database Name -DB_NAME="opentrep" - -# Drop a table -function dropTable() { - echo "The ${TABLE} table will be dropped:" - mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} -e "drop table ${DB_NAME}.${TABLE}" -} - -# Table: drop the ref_place_details table -TABLE=ref_place_details -dropTable - -# Table: drop the ref_place_names table -TABLE=ref_place_names -dropTable Modified: trunk/opentrep/opentrep/OPENTREP_Types.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-07-18 00:50:00 UTC (rev 135) @@ -56,8 +56,11 @@ /** Xapian document ID. */ typedef int XapianDocID_T; - /** Travel Search Query. */ + /** Travel search query. */ typedef std::string TravelQuery_T; + + /** Number of matching documents. */ + typedef unsigned short NbOfMatches_T; } #endif // __OPENTREP_OPENTREP_TYPES_HPP Property changes on: trunk/opentrep/opentrep/batches ___________________________________________________________________ Modified: svn:ignore - .deps .libs Makefile Makefile.in opentrep_indexer opentrep_searcher + .deps .libs Makefile Makefile.in opentrep_indexer* opentrep_searcher* Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-18 00:50:00 UTC (rev 135) @@ -12,43 +12,56 @@ #include <boost/program_options.hpp> // OPENTREP #include <opentrep/OPENTREP_Service.hpp> +#include <opentrep/config/opentrep-paths.hpp> + +// //////// Type definitions /////// +typedef std::vector<std::string> WordList_T; + + +// //////// Constants ////// +/** Default name and location for the log file. */ +const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep_indexer.log"); + +/** Default name and location for the Xapian database. */ +const std::string K_OPENTREP_DEFAULT_DATABSE_FILEPATH("/tmp/opentrep/traveldb"); + + // ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} +/** Early return status (so that it can be differentiated from an error). */ +const int K_OPENTREP_EARLY_RETURN_STATUS = 99; -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); +/** Read and parse the command line options. */ +int readConfiguration (int argc, char* argv[], + std::string& ioDatabaseFilepath, + std::string& ioLogFilename) { + + // Declare a group of options that will be allowed only on command line + boost::program_options::options_description generic ("Generic options"); generic.add_options() + ("prefix", "print installation prefix") ("version,v", "print version string") ("help,h", "produce help message"); - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); + // Declare a group of options that will be allowed both on command + // line and in config file + boost::program_options::options_description config ("Configuration"); config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); + ("database,d", + boost::program_options::value< std::string >(&ioDatabaseFilepath)->default_value(K_OPENTREP_DEFAULT_DATABSE_FILEPATH), + "Xapian database filepath (e.g., /tmp/opentrep/traveldb)") + ("log,l", + boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME), + "Filepath for the logs") + ; // Hidden options, will be allowed both on command line and // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); + boost::program_options::options_description hidden ("Hidden options"); hidden.add_options() - ("input-file", + ("copyright", boost::program_options::value< std::vector<std::string> >(), - "input file"); + "Show the copyright (license)"); boost::program_options::options_description cmdline_options; cmdline_options.add(generic).add(config).add(hidden); @@ -56,46 +69,48 @@ boost::program_options::options_description config_file_options; config_file_options.add(config).add(hidden); - boost::program_options::options_description visible("Allowed options"); + boost::program_options::options_description visible ("Allowed options"); visible.add(generic).add(config); boost::program_options::positional_options_description p; - p.add("input-file", -1); + p.add ("copyright", -1); boost::program_options::variables_map vm; boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); + store (boost::program_options::command_line_parser (argc, argv). + options (cmdline_options).positional(p).run(), vm); - std::ifstream ifs ("request_parser.cfg"); + std::ifstream ifs ("opentrep_indexer.cfg"); boost::program_options::store (parse_config_file (ifs, config_file_options), vm); boost::program_options::notify (vm); if (vm.count ("help")) { std::cout << visible << std::endl; - return 0; + return K_OPENTREP_EARLY_RETURN_STATUS; } if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; + std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl; + return K_OPENTREP_EARLY_RETURN_STATUS; } - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; + if (vm.count ("prefix")) { + std::cout << "Installation prefix: " << PREFIXDIR << std::endl; + return K_OPENTREP_EARLY_RETURN_STATUS; } - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() + if (vm.count ("database")) { + ioDatabaseFilepath = vm["database"].as< std::string >(); + std::cout << "Xapian database filepath is: " << ioDatabaseFilepath << std::endl; } - std::cout << "Optimization level is " << opt << std::endl; - + if (vm.count ("log")) { + ioLogFilename = vm["log"].as< std::string >(); + std::cout << "Log filename is: " << ioLogFilename << std::endl; + } + return 0; } @@ -105,19 +120,17 @@ try { // Output log File - std::string lLogFilename ("indexer.log"); + std::string lLogFilename; // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + OPENTREP::TravelDatabaseName_T lXapianDatabaseName; - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lLogFilename; - } + // Call the command-line option parser + const int lOptionParserStatus = + readConfiguration (argc, argv, lXapianDatabaseName, lLogFilename); - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lXapianDatabaseName; + if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) { + return 0; } // Set the log parameters @@ -135,7 +148,6 @@ // Close the Log outputFile logOutputFile.close(); - } catch (const OPENTREP::RootException& otexp) { std::cerr << "Standard exception: " << otexp.what() << std::endl; Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 00:50:00 UTC (rev 135) @@ -15,15 +15,17 @@ #include <opentrep/OPENTREP_Service.hpp> #include <opentrep/config/opentrep-paths.hpp> + // //////// Type definitions /////// typedef std::vector<std::string> WordList_T; + // //////// Constants ////// /** Default name and location for the log file. */ const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep_searcher.log"); /** Default name and location for the Xapian database. */ -const std::string K_OPENTREP_DEFAULT_DATABSE_FILEPATH ("/tmp/opentrep/traveldb"); +const std::string K_OPENTREP_DEFAULT_DATABSE_FILEPATH("/tmp/opentrep/traveldb"); /** Default travel query string, to be seached against the Xapian database. */ const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso anglese reykyavki"); @@ -31,6 +33,7 @@ /** Default error distance for spelling corrections. */ const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE = 3; + // ////////////////////////////////////////////////////////////////////// void tokeniseStringIntoWordList (const std::string& iPhrase, WordList_T& ioWordList) { Modified: trunk/opentrep/opentrep/bom/DocumentList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-18 00:50:00 UTC (rev 135) @@ -5,14 +5,23 @@ // Import section // ////////////////////////////////////////////////////////////////////// // STL -#include <map> +#include <list> +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> // Xapian #include <xapian.h> namespace OPENTREP { - /** List of Xapian documents. */ - typedef std::multimap<Xapian::percent, Xapian::Document> DocumentList_T; + /** Xapian document and its associated matching percentage. */ + typedef std::pair<Xapian::percent, Xapian::Document> MatchingDocument_T; + /** A matching Xapian document, along with the query string which it + matches. */ + typedef std::pair<TravelQuery_T, MatchingDocument_T> QueryAndDocument_T; + + /** List of matching Xapian documents. */ + typedef std::list<QueryAndDocument_T> DocumentList_T; + } #endif // __OPENTREP_BOM_DOCUMENTLIST_HPP Modified: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 00:50:00 UTC (rev 135) @@ -5,7 +5,7 @@ #include <cassert> #include <string> #include <sstream> -// OPENTREP +// OpenTREP #include <opentrep/bom/StringMatcher.hpp> #include <opentrep/bom/Result.hpp> #include <opentrep/service/Logger.hpp> @@ -24,7 +24,6 @@ // ////////////////////////////////////////////////////////////////////// void Result::init () { - _documentList.clear(); } // ////////////////////////////////////////////////////////////////////// @@ -44,14 +43,11 @@ std::ostringstream oStr; oStr << describeShortKey() << std::endl; - for (DocumentList_T::const_iterator itDoc = _documentList.begin(); - itDoc != _documentList.end(); ++itDoc) { - const Xapian::percent& lPercent = itDoc->first; - const Xapian::Document& lDocument = itDoc->second; - const Xapian::docid& lDocID = lDocument.get_docid(); - oStr << "Document ID " << lDocID << "\t" << lPercent - << "% [" << lDocument.get_data() << "]" << std::endl; - } + const Xapian::percent& lPercentage = _matchingDocument.first; + const Xapian::Document& lDocument = _matchingDocument.second; + const Xapian::docid& lDocID = lDocument.get_docid(); + oStr << "Document ID " << lDocID << "\t" << lPercentage + << "% [" << lDocument.get_data() << "]" << std::endl; return oStr.str(); } @@ -65,67 +61,4 @@ void Result::fromStream (std::istream& ioIn) { } - // ////////////////////////////////////////////////////////////////////// - const Xapian::Document& Result::getBestMatchingDocument() const { - /** - Retrieve the best matching document. As the document list (STL map) - is sorted by ascending order of the matching percentage, the best - matching one is located at the end (back) of the list (STL map). - */ - DocumentList_T::const_reverse_iterator itDocument = _documentList.rbegin(); - return itDocument->second; - } - - // ////////////////////////////////////////////////////////////////////// - const Xapian::percent& Result::getBestMatchingPercentage() const { - /** - Retrieve the best matching document. As the document list (STL map) - is sorted by ascending order of the matching percentage, the best - matching one is located at the end (back) of the list (STL map). - */ - DocumentList_T::const_reverse_iterator itDocument = _documentList.rbegin(); - return itDocument->first; - } - - // ////////////////////////////////////////////////////////////////////// - void Result::searchString () { - - // Catch any Xapian::Error exceptions thrown - try { - - bool shouldStop = false; - while (shouldStop == false) { - // DEBUG - /* - OPENTREP_LOG_DEBUG (std::endl << "--------------------------------" - << std::endl << "Current query string: `" << ioQueryString << "'"); - */ - - // Retrieve the list of documents matching the query string - Xapian::MSet lMatchingSet; - StringMatcher::searchString (lMatchingSet, _queryString, _database); - - // Create the corresponding list of documents - StringMatcher::createDocumentListFromMSet (lMatchingSet, _documentList); - - // Stop if a result is found. - if (_documentList.empty() == false) { - shouldStop = true; - break; - } - - // Remove a word from the query string - StringMatcher::removeOneWord (_queryString); - - // Stop when the resulting string gets empty. - if (_queryString.empty() == true) { - shouldStop = true; - } - } - - } catch (const Xapian::Error& error) { - OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); - } - } - } Modified: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 00:50:00 UTC (rev 135) @@ -25,31 +25,43 @@ return _queryString; } - /** Get the list of Xapian document objects. */ - const DocumentList_T& getDocumentList() const { - return _documentList; + /** Get the Matching Xapian document object, along with its + corresponding matching percentage. */ + const MatchingDocument_T& getMatchingDocument() const { + return _matchingDocument; } - /** Retrieve the best matching Xapian document object. */ - const Xapian::Document& getBestMatchingDocument() const; + /** Retrieve the percentage corresponding to the matching Xapian + document object. */ + const Xapian::percent& getPercentage() const { + return _matchingDocument.first; + } + + /** Retrieve the matching Xapian document object. */ + const Xapian::Document& getDocument() const { + return _matchingDocument.second; + } - /** Retrieve the percentage corresponding to the best matching - Xapian document object. */ - const Xapian::percent& getBestMatchingPercentage() const; - // ////////////// Setters ///////////// /** Set the query string. */ void setQueryString (const TravelQuery_T& iQueryString) { _queryString = iQueryString; } - - public: - // /////////// Business methods ///////// - /** Retrieve the list of documents matching the query string. */ - void searchString (); + /** Set the matching Xapian document object and its corresponding + matching percentage. */ + void setMatchingDocument (const MatchingDocument_T& iMatchingDocument) { + _matchingDocument = iMatchingDocument; + } + /** Set the matching Xapian document object and its corresponding + matching percentage. */ + void setQueryAndDocument (const QueryAndDocument_T& iQueryAndDocument) { + _queryString = iQueryAndDocument.first; + _matchingDocument = iQueryAndDocument.second; + } + public: // /////////// Display support methods ///////// @@ -98,8 +110,9 @@ /** Xapian database. */ const Xapian::Database& _database; - /** List of Xapian document objects. */ - DocumentList_T _documentList; + /** Matching Xapian document object, along with its corresponding + matching percentage. */ + MatchingDocument_T _matchingDocument; }; } Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 00:50:00 UTC (rev 135) @@ -10,9 +10,6 @@ #include <opentrep/bom/StringMatcher.hpp> #include <opentrep/bom/Result.hpp> #include <opentrep/bom/ResultHolder.hpp> -// TODO: move that out of the BOM layer -#include <opentrep/factory/FacResultHolder.hpp> -#include <opentrep/factory/FacResult.hpp> #include <opentrep/service/Logger.hpp> namespace OPENTREP { @@ -71,11 +68,59 @@ } // ////////////////////////////////////////////////////////////////////// - void ResultHolder::searchString () { + bool ResultHolder::searchString (TravelQuery_T& ioPartialQueryString, + MatchingDocument_T& ioMatchingDocument) { + bool oFoundDocument = false; // Catch any Xapian::Error exceptions thrown try { + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + OPENTREP_LOG_DEBUG (std::endl << "--------------------------------" + << std::endl << "Current query string: `" << ioPartialQueryString + << "'"); + */ + + // Retrieve the list of documents matching the query string + Xapian::MSet lMatchingSet; + StringMatcher::searchString (lMatchingSet, ioPartialQueryString, + _database); + + // Create the corresponding list of documents + oFoundDocument = StringMatcher:: + extractBestMatchingDocumentFromMSet(lMatchingSet, ioMatchingDocument); + + // Stop if a result is found. + if (oFoundDocument == true) { + shouldStop = true; + break; + } + + // Remove a word from the query string + StringMatcher::removeOneWord (ioPartialQueryString); + + // Stop when the resulting string gets empty. + if (ioPartialQueryString.empty() == true) { + shouldStop = true; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + } + + return oFoundDocument; + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::searchString (DocumentList_T& ioDocumentList) { + + // Catch any Xapian::Error exceptions thrown + try { + std::string lRemainingQueryString (_queryString); bool shouldStop = false; while (shouldStop == false) { @@ -91,21 +136,23 @@ again no result, until either a result is found or the resulting string gets empty. */ - DocumentList_T lDocumentList; - // TODO: move that out of the BOM layer - Result& lResult = FacResult::instance().create (_database); - std::string lQueryString (lRemainingQueryString); - // - lResult.setQueryString (lQueryString); - lResult.searchString (); + /** + Main algorithm, altering the query string (suppressing the + furthest right words, so that the remaining left part be matched + against the Xapian database). + */ + MatchingDocument_T lMatchingDocument; + const bool hasFoundDocument = searchString (lQueryString, + lMatchingDocument); + + if (hasFoundDocument == true) { + const QueryAndDocument_T lQueryAndDocument (lQueryString, + lMatchingDocument); + ioDocumentList.push_back (lQueryAndDocument); + } - // Add the Result object (holding the list of matching - // documents) to the dedicated list. - // TODO: move that out of the BOM layer - FacResultHolder::initLinkWithResult (*this, lResult); - /** Remove, from the lRemainingQueryString string, the part which has been already successfully parsed. @@ -117,7 +164,6 @@ 'rio de janeiro'. So, the already parsed part, namely 'sna francisco', must be subtracted from the initial query string. */ - lQueryString = lResult.getQueryString(); StringMatcher::subtractParsedToRemaining (lQueryString, lRemainingQueryString); Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 00:50:00 UTC (rev 135) @@ -7,6 +7,7 @@ // OpenTREP #include <opentrep/OPENTREP_Types.hpp> #include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/DocumentList.hpp> #include <opentrep/bom/ResultList.hpp> // Forward declarations @@ -38,9 +39,17 @@ public: // /////////// Business methods ///////// /** Retrieve the list of documents matching the query string. */ - void searchString (); + void searchString (DocumentList_T&); + private: + /** Retrieve the document best matching the query string. + @param TravelQuery_T& The partial query string. + @param MatchingDocument_T& The best matching Xapian document (if found). + @return bool Whether such a best matching document has been found. */ + bool searchString(TravelQuery_T& ioPartialQueryString, MatchingDocument_T&); + + public: // /////////// Display support methods ///////// /** Dump a Business Object into an output stream. Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 00:50:00 UTC (rev 135) @@ -272,21 +272,30 @@ } // ////////////////////////////////////////////////////////////////////// - void StringMatcher:: - createDocumentListFromMSet (const Xapian::MSet& iMatchingSet, - DocumentList_T& ioDocumentList) { - // Empty the list of documents - ioDocumentList.clear(); + bool StringMatcher:: + extractBestMatchingDocumentFromMSet (const Xapian::MSet& iMatchingSet, + MatchingDocument_T& ioMatchingDocument) { + bool oFoundDocument = false; - for (Xapian::MSetIterator itDoc = iMatchingSet.begin(); - itDoc != iMatchingSet.end(); ++itDoc) { - const Xapian::Document& lDocument = itDoc.get_document(); + if (iMatchingSet.empty() == true) { + return oFoundDocument; + } + oFoundDocument = true; - ioDocumentList.insert (DocumentList_T::value_type (itDoc.get_percent(), - lDocument)); - } + /** + Retrieve the best matching document. If there are several such + best matching documents (for instance, several at, say, 100%), + one is taken randomly (well, we take the first one of the STL + multimap, so it is not exactly randomly, but the result is the + same: it appears random). + */ + Xapian::MSetIterator itDoc = iMatchingSet.begin(); + ioMatchingDocument.first = itDoc.get_percent(); + ioMatchingDocument.second = itDoc.get_document(); + + return oFoundDocument; } - + // ////////////////////////////////////////////////////////////////////// void StringMatcher::removeOneWord (std::string& ioQueryString) { assert (ioQueryString.empty() == false); Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 00:50:00 UTC (rev 135) @@ -29,10 +29,17 @@ static void searchString (Xapian::MSet&, const std::string& iSearchString, const Xapian::Database&); - /** Copy the Xapian MSet (matching set) object into a document - list object. */ - static void createDocumentListFromMSet (const Xapian::MSet&, - DocumentList_T&); + /** + Extract the best matching Xapian document. + <br>If there are several such best matching documents (for + instance, several at, say, 100%), one is taken randomly. Well, + as we take the first one of the STL multimap, it is not exactly + randomly, but the result is the same: it appears to be random. + @return bool Whether or not there was a matching document. + */ + static bool + extractBestMatchingDocumentFromMSet (const Xapian::MSet&, + MatchingDocument_T&); /** Remove the word furthest at right. */ static void removeOneWord (std::string& ioQueryString); Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-17 17:19:13 UTC (rev 134) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 00:50:00 UTC (rev 135) @@ -15,6 +15,7 @@ #include <opentrep/factory/FacPlaceHolder.hpp> #include <opentrep/factory/FacPlace.hpp> #include <opentrep/factory/FacResultHolder.hpp> +#include <opentrep/factory/FacResult.hpp> #include <opentrep/command/DBManager.hpp> #include <opentrep/command/RequestInterpreter.hpp> #include <opentrep/service/Logger.hpp> @@ -27,8 +28,7 @@ void RequestInterpreter:: interpretTravelRequest (soci::session& ioSociSession, const TravelDatabaseName_T& iTravelDatabaseName, - const TravelQuery_T& iTravelQuery, - PlaceHolder& ioPlaceHolder) { + const TravelQuery_T& iTravelQuery) { try { @@ -39,9 +39,30 @@ ResultHolder& lResultHolder = FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); - // - lResultHolder.searchString(); + // Main algorithm + DocumentList_T lDocumentList; + lResultHolder.searchString (lDocumentList); + // Back-up the (retrieved) matching Xapian documents into still + // to-be-created Result objects. + for (DocumentList_T::const_iterator itDoc = lDocumentList.begin(); + itDoc != lDocumentList.end(); ++itDoc) { + // Retrieve both the Xapian document object and the corresponding + // matching percentage (most of the time, it is 100%) + const QueryAndDocument_T& lQueryAndDocument = *itDoc; + + // Create a Result object + Result& lResult = FacResult::instance().create (lXapianDatabase); + + // Fill the Result object with both the corresponding Document object + // and its associated query string + lResult.setQueryAndDocument (lQueryAndDocument); + + // Add the Result object (holding the list of matching + // documents) to the dedicated list. + FacResultHolder::initLinkWithResult (lResultHolder, lResult); + } + // DEBUG OPENTREP_LOG_DEBUG (std::endl << "=========================================" @@ -50,6 +71,9 @@ << "=========================================" << std::endl << std::endl); + // Create a PlaceHolder object, to collect the matching Place objects + PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create(); + // Browse the list of result objects const ResultList_T& lResultList = lResultHolder.getResultList(); for (ResultList_T::const_iterator itResult = lResultList.begin(); @@ -59,10 +83,8 @@ assert (lResult_ptr != NULL); // Retrieve the parameters of the best matching document - const Xapian::Document& lDocument = - lResult_ptr->getBestMatchingDocumen... [truncated message content] |
From: <den...@us...> - 2009-07-17 17:19:16
|
Revision: 134 http://opentrep.svn.sourceforge.net/opentrep/?rev=134&view=rev Author: denis_arnaud Date: 2009-07-17 17:19:13 +0000 (Fri, 17 Jul 2009) Log Message: ----------- [Dev] Added a few factory classes. Modified Paths: -------------- trunk/opentrep/opentrep/OPENTREP_Service.hpp trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/Names.cpp trunk/opentrep/opentrep/bom/Names.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/PlaceList.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/World.cpp trunk/opentrep/opentrep/bom/World.hpp trunk/opentrep/opentrep/bom/sources.mk trunk/opentrep/opentrep/command/DBManager.cpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/command/RequestInterpreter.hpp trunk/opentrep/opentrep/factory/FacPlace.cpp trunk/opentrep/opentrep/factory/FacPlace.hpp trunk/opentrep/opentrep/factory/FacWorld.cpp trunk/opentrep/opentrep/factory/sources.mk trunk/opentrep/opentrep/service/OPENTREP_Service.cpp trunk/opentrep/refdata/data/ref_place_names.csv trunk/opentrep/test/parsers/search_string_parser.cpp Added Paths: ----------- trunk/opentrep/opentrep/bom/PlaceHolder.cpp trunk/opentrep/opentrep/bom/PlaceHolder.hpp trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp trunk/opentrep/opentrep/factory/FacPlaceHolder.hpp trunk/opentrep/opentrep/factory/FacResult.cpp trunk/opentrep/opentrep/factory/FacResult.hpp trunk/opentrep/opentrep/factory/FacResultHolder.cpp trunk/opentrep/opentrep/factory/FacResultHolder.hpp Modified: trunk/opentrep/opentrep/OPENTREP_Service.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -6,6 +6,7 @@ // ////////////////////////////////////////////////////////////////////// // STL #include <ostream> +#include <string> // OPENTREP #include <opentrep/OPENTREP_Types.hpp> @@ -18,14 +19,12 @@ class OPENTREP_Service { public: /** Constructor. */ - OPENTREP_Service (); + OPENTREP_Service (std::ostream& ioLogStream, + const std::string& iXapianDatabaseFilepath); + /** Destructor. */ ~OPENTREP_Service(); - /** Initialise. */ - void init (std::ostream& ioLogStream, - const std::string& iTravelDatabaseName); - /** Build the Xapian database (index) on the BOM held in memory. */ void buildSearchIndex (); @@ -35,9 +34,15 @@ private: // /////// Construction and Destruction helper methods /////// - /** Default Constructor. */ + /** Default constructor. */ + OPENTREP_Service (); + /** Default copy constructor. */ OPENTREP_Service (const OPENTREP_Service&); + /** Initialise. */ + void init (std::ostream& ioLogStream, + const std::string& iXapianDatabaseFilepath); + /** Initilise the log. */ void logInit (const LOG::EN_LogLevel iLogLevel, std::ostream& ioLogStream); Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -127,8 +127,8 @@ logOutputFile.clear(); // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); + OPENTREP::OPENTREP_Service opentrepService (logOutputFile, + lXapianDatabaseName); // Launch the indexation opentrepService.buildSearchIndex(); Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -4,15 +4,73 @@ #include <iostream> #include <sstream> #include <fstream> -#include <map> #include <vector> +#include <string> // Boost (Extended STL) #include <boost/date_time/posix_time/posix_time.hpp> #include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/tokenizer.hpp> #include <boost/program_options.hpp> // OPENTREP #include <opentrep/OPENTREP_Service.hpp> +#include <opentrep/config/opentrep-paths.hpp> +// //////// Type definitions /////// +typedef std::vector<std::string> WordList_T; + +// //////// Constants ////// +/** Default name and location for the log file. */ +const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep_searcher.log"); + +/** Default name and location for the Xapian database. */ +const std::string K_OPENTREP_DEFAULT_DATABSE_FILEPATH ("/tmp/opentrep/traveldb"); + +/** Default travel query string, to be seached against the Xapian database. */ +const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso anglese reykyavki"); + +/** Default error distance for spelling corrections. */ +const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE = 3; + +// ////////////////////////////////////////////////////////////////////// +void tokeniseStringIntoWordList (const std::string& iPhrase, + WordList_T& ioWordList) { + // Empty the word list + ioWordList.clear(); + + // Boost Tokeniser + typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T; + + // Define the separators + const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); + + // Initialise the phrase to be tokenised + Tokeniser_T lTokens (iPhrase, lSepatorList); + for (Tokeniser_T::const_iterator tok_iter = lTokens.begin(); + tok_iter != lTokens.end(); ++tok_iter) { + const std::string& lTerm = *tok_iter; + ioWordList.push_back (lTerm); + } + +} + +// ////////////////////////////////////////////////////////////////////// +std::string createStringFromWordList (const WordList_T& iWordList) { + std::ostringstream oStr; + + unsigned short idx = iWordList.size(); + for (WordList_T::const_iterator itWord = iWordList.begin(); + itWord != iWordList.end(); ++itWord, --idx) { + const std::string& lWord = *itWord; + oStr << lWord; + if (idx > 1) { + oStr << " "; + } + } + + return oStr.str(); +} + + // ///////// Parsing of Options & Configuration ///////// // A helper function to simplify the main part. template<class T> std::ostream& operator<< (std::ostream& os, @@ -21,34 +79,57 @@ return os; } -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); +/** Early return status (so that it can be differentiated from an error). */ +const int K_OPENTREP_EARLY_RETURN_STATUS = 99; + +/** Read and parse the command line options. */ +int readConfiguration (int argc, char* argv[], + unsigned short& ioSpellingErrorDistance, + std::string& ioQueryString, + std::string& ioDatabaseFilepath, + std::string& ioLogFilename) { + + // Initialise the travel query string, if that one is empty + if (ioQueryString.empty() == true) { + ioQueryString = K_OPENTREP_DEFAULT_QUERY_STRING; + } + + // Transform the query string into a list of words (STL strings) + WordList_T lWordList; + tokeniseStringIntoWordList (ioQueryString, lWordList); + + // Declare a group of options that will be allowed only on command line + boost::program_options::options_description generic ("Generic options"); generic.add_options() + ("prefix", "print installation prefix") ("version,v", "print version string") ("help,h", "produce help message"); - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); + // Declare a group of options that will be allowed both on command + // line and in config file + boost::program_options::options_description config ("Configuration"); config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); + ("error,e", + boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE), + "Spelling error distance (e.g., 3)") + ("query,q", + boost::program_options::value< WordList_T >(&lWordList)->multitoken(), + "Traval query word list (e.g. sna francicso rio de janero lso anglese reykyavki") + ("database,d", + boost::program_options::value< std::string >(&ioDatabaseFilepath)->default_value(K_OPENTREP_DEFAULT_DATABSE_FILEPATH), + "Xapian database filepath (e.g., /tmp/opentrep/traveldb)") + ("log,l", + boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME), + "Filepath for the logs") + ; // Hidden options, will be allowed both on command line and // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); + boost::program_options::options_description hidden ("Hidden options"); hidden.add_options() - ("input-file", + ("copyright", boost::program_options::value< std::vector<std::string> >(), - "input file"); + "Show the copyright (license)"); boost::program_options::options_description cmdline_options; cmdline_options.add(generic).add(config).add(hidden); @@ -56,45 +137,53 @@ boost::program_options::options_description config_file_options; config_file_options.add(config).add(hidden); - boost::program_options::options_description visible("Allowed options"); + boost::program_options::options_description visible ("Allowed options"); visible.add(generic).add(config); boost::program_options::positional_options_description p; - p.add("input-file", -1); + p.add ("copyright", -1); boost::program_options::variables_map vm; boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); + store (boost::program_options::command_line_parser (argc, argv). + options (cmdline_options).positional(p).run(), vm); - std::ifstream ifs ("request_parser.cfg"); + std::ifstream ifs ("opentrep_searcher.cfg"); boost::program_options::store (parse_config_file (ifs, config_file_options), vm); boost::program_options::notify (vm); if (vm.count ("help")) { std::cout << visible << std::endl; - return 0; + return K_OPENTREP_EARLY_RETURN_STATUS; } if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; + std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl; + return K_OPENTREP_EARLY_RETURN_STATUS; } - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; + if (vm.count ("prefix")) { + std::cout << "Installation prefix: " << PREFIXDIR << std::endl; + return K_OPENTREP_EARLY_RETURN_STATUS; } - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() + if (vm.count ("database")) { + ioDatabaseFilepath = vm["database"].as< std::string >(); + std::cout << "Xapian database filepath is: " << ioDatabaseFilepath << std::endl; } - std::cout << "Optimization level is " << opt << std::endl; + if (vm.count ("log")) { + ioLogFilename = vm["log"].as< std::string >(); + std::cout << "Log filename is: " << ioLogFilename << std::endl; + } + + std::cout << "The spelling error distance is: " << ioSpellingErrorDistance + << std::endl; + + ioQueryString = createStringFromWordList (lWordList); + std::cout << "The travel query string is: " << ioQueryString << std::endl; return 0; } @@ -105,29 +194,26 @@ try { // Travel query - OPENTREP::TravelQuery_T lTravelQuery ("sna francisco rio de janero lso angeles"); + OPENTREP::TravelQuery_T lTravelQuery; // Output log File - std::string lLogFilename ("searcher.log"); + std::string lLogFilename; // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + OPENTREP::TravelDatabaseName_T lXapianDatabaseName; - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lTravelQuery; - } + // Xapian spelling error distance + unsigned short lSpellingErrorDistance; - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lLogFilename; + // Call the command-line option parser + const int lOptionParserStatus = + readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery, + lXapianDatabaseName, lLogFilename); + + if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) { + return 0; } - if (argc >= 3 && argv[3] != NULL) { - std::istringstream istr (argv[3]); - istr >> lXapianDatabaseName; - } - // Set the log parameters std::ofstream logOutputFile; // open and clean the log outputfile @@ -135,8 +221,8 @@ logOutputFile.clear(); // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); + OPENTREP::OPENTREP_Service opentrepService (logOutputFile, + lXapianDatabaseName); // Query the Xapian database (index) opentrepService.interpretTravelRequest (lTravelQuery); Modified: trunk/opentrep/opentrep/bom/Names.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Names.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/Names.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -28,6 +28,17 @@ // ////////////////////////////////////////////////////////////////////// Names::~Names() { } + + // ////////////////////////////////////////////////////////////////////// + std::string Names::getFirstName() const { + if (_nameList.empty() == true) { + return ""; + } + NameList_T::const_iterator itName = _nameList.begin(); + assert (itName != _nameList.end()); + const std::string& lName = *itName; + return lName; + } // ////////////////////////////////////////////////////////////////////// const std::string Names::describeShortKey() const { Modified: trunk/opentrep/opentrep/bom/Names.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Names.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/Names.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -46,6 +46,11 @@ return _nameList; } + /** Get the first name of the list. + <br>Note that it can be empty (when the list is itself empty). */ + std::string getFirstName() const; + + // /////////// Setters /////////////// /** Set the language code. */ void setLanguageCode (const Language::EN_Language& iLanguageCode) { Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -2,7 +2,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // C -#include <assert.h> +#include <cassert> // OpenTrep BOM #include <opentrep/bom/Place.hpp> #include <opentrep/service/Logger.hpp> @@ -10,12 +10,12 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - Place::Place () : _world (NULL) { + Place::Place () : _world (NULL), _placeHolder (NULL) { } // ////////////////////////////////////////////////////////////////////// Place::Place (const Place& iPlace) : - _world (iPlace._world), + _world (iPlace._world), _placeHolder (iPlace._placeHolder), _placeCode (iPlace._placeCode), _cityCode (iPlace._cityCode), _stateCode (iPlace._stateCode), _countryCode (iPlace._countryCode), _regionCode (iPlace._regionCode), _continentCode (iPlace._continentCode), @@ -67,6 +67,33 @@ } // ////////////////////////////////////////////////////////////////////// + std::string Place::toShortString() const { + /* When the city code is empty, it means that the place is a city and + not an airport. The city code is thus the same as the place code + itself. */ + std::ostringstream oStr; + oStr << describeShortKey() << ", "; + if (_cityCode.empty()) { + oStr << _placeCode << ", "; + } else { + oStr << _cityCode << ", "; + } + oStr << _stateCode + << ", " << _countryCode << ", " << _regionCode + << ", " << _continentCode << ", " << _timeZoneGroup + << ", " << _longitude << ", " << _latitude << ", " << _docID; + + NameMatrix_T::const_iterator itNameHolder = _nameMatrix.begin(); + const Names& lNameHolder = itNameHolder->second; + const std::string& lFirstName = lNameHolder.getFirstName(); + if (lFirstName.empty() == false) { + oStr << ", " << lFirstName << "."; + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// void Place::toStream (std::ostream& ioOut) const { ioOut << toString(); } Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -18,11 +18,13 @@ // Forward declarations class World; + class PlaceHolder; /** Structure modelling an place. */ class Place : public BomAbstract { friend class FacWorld; friend class FacPlace; + friend class FacPlaceHolder; friend class DbaPlace; public: // ///////// Getters //////// @@ -141,6 +143,7 @@ /** Reset the map of name lists. */ void resetMatrix(); + public: // ///////// Display methods //////// @@ -155,6 +158,9 @@ /** Get the serialised version of the Place object. */ std::string toString() const; + /** Get a short display of the Business Object. */ + std::string toShortString() const; + /** Get a string describing the whole key (differentiating two objects at any level). */ const std::string describeKey() const; @@ -182,6 +188,9 @@ /** Parent World. */ World* _world; + /** Parent PlaceHolder. */ + PlaceHolder* _placeHolder; + private: // /////// Attributes ///////// /** Place code. */ Added: trunk/opentrep/opentrep/bom/PlaceHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/PlaceHolder.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,81 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +#include <sstream> +// OpenTREP +#include <opentrep/bom/Place.hpp> +#include <opentrep/bom/PlaceHolder.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + PlaceHolder::PlaceHolder () { + init(); + } + + // ////////////////////////////////////////////////////////////////////// + PlaceHolder::~PlaceHolder () { + } + + // ////////////////////////////////////////////////////////////////////// + void PlaceHolder::init () { + _placeList.clear(); + _placeOrderedList.clear(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string PlaceHolder::describeShortKey() const { + std::ostringstream oStr; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string PlaceHolder::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string PlaceHolder::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + for (PlaceOrderedList_T::const_iterator itPlace = _placeOrderedList.begin(); + itPlace != _placeOrderedList.end(); ++itPlace) { + const Place* lPlace_ptr = *itPlace; + assert (lPlace_ptr != NULL); + + oStr << lPlace_ptr->toString() << std::endl; + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string PlaceHolder::toShortString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + for (PlaceOrderedList_T::const_iterator itPlace = _placeOrderedList.begin(); + itPlace != _placeOrderedList.end(); ++itPlace) { + const Place* lPlace_ptr = *itPlace; + assert (lPlace_ptr != NULL); + + oStr << lPlace_ptr->toShortString() << std::endl; + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void PlaceHolder::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void PlaceHolder::fromStream (std::istream& ioIn) { + } + +} Added: trunk/opentrep/opentrep/bom/PlaceHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/PlaceHolder.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,80 @@ +#ifndef __OPENTREP_BOM_PLACEHOLDER_HPP +#define __OPENTREP_BOM_PLACEHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/PlaceList.hpp> + +namespace OPENTREP { + + /** Class wrapping functions on a list of Place objects. */ + class PlaceHolder : public BomAbstract { + friend class FacPlaceHolder; + public: + // ////////////// Getters ///////////// + /** Retrieve the list of place objects. */ + const PlaceList_T& getPlaceList() const { + return _placeList; + } + + + // ////////////// Setters ///////////// + + + public: + // /////////// Business methods ///////// + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a short display of the Business Object. */ + std::string toShortString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + private: + // ////////////// Constructors and Destructors ///////////// + /** Default constructor. */ + PlaceHolder (); + /** Default copy constructor. */ + PlaceHolder (const PlaceHolder&); + /** Destructor. */ + ~PlaceHolder (); + /** Initialise (reset the list of documents). */ + void init (); + + + private: + // /////////////// Attributes //////////////// + /** List of place objects, sorted by Place ID. */ + PlaceList_T _placeList; + + /** List of place objects, the sort order corresponding to their + insertion order. */ + PlaceOrderedList_T _placeOrderedList; + }; + +} +#endif // __OPENTREP_BOM_PLACEHOLDER_HPP Modified: trunk/opentrep/opentrep/bom/PlaceList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceList.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/PlaceList.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -15,8 +15,9 @@ // ///////////// Type definitions //////////////////// typedef std::size_t PlaceID_T; - typedef std::map<PlaceID_T, Place*> PlaceList_T; - typedef std::list<Place*> SimplePlaceList_T; + // typedef std::map<PlaceID_T, Place*> PlaceDirectList_T; + typedef std::map<std::string, Place*> PlaceList_T; + typedef std::list<Place*> PlaceOrderedList_T; } #endif // __OPENTREP_BOM_PLACELIST_HPP Modified: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -14,7 +14,7 @@ // ////////////////////////////////////////////////////////////////////// Result::Result (const Xapian::Database& iDatabase) - : _database (iDatabase) { + : _resultHolder (NULL), _database (iDatabase) { init(); } Modified: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -11,10 +11,13 @@ namespace OPENTREP { + // Forward declarations + class ResultHolder; + /** Class wrapping functions on a list of Xapian Document objects. */ class Result : public BomAbstract { + friend class FacResultHolder; friend class FacResult; - friend class ResultHolder; public: // ////////////// Getters ///////////// /** Get the query string. */ @@ -86,6 +89,9 @@ private: // /////////////// Attributes //////////////// + /** Parent ResultHolder. */ + ResultHolder* _resultHolder; + /** Query string having generated the list of document. */ TravelQuery_T _queryString; Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -10,6 +10,9 @@ #include <opentrep/bom/StringMatcher.hpp> #include <opentrep/bom/Result.hpp> #include <opentrep/bom/ResultHolder.hpp> +// TODO: move that out of the BOM layer +#include <opentrep/factory/FacResultHolder.hpp> +#include <opentrep/factory/FacResult.hpp> #include <opentrep/service/Logger.hpp> namespace OPENTREP { @@ -89,18 +92,19 @@ resulting string gets empty. */ DocumentList_T lDocumentList; - Result* lResult_ptr = new Result (_database); - assert (lResult_ptr != NULL); + // TODO: move that out of the BOM layer + Result& lResult = FacResult::instance().create (_database); std::string lQueryString (lRemainingQueryString); // - lResult_ptr->setQueryString (lQueryString); - lResult_ptr->searchString (); + lResult.setQueryString (lQueryString); + lResult.searchString (); // Add the Result object (holding the list of matching // documents) to the dedicated list. - _resultList.push_back (lResult_ptr); + // TODO: move that out of the BOM layer + FacResultHolder::initLinkWithResult (*this, lResult); /** Remove, from the lRemainingQueryString string, the part which @@ -113,7 +117,7 @@ 'rio de janeiro'. So, the already parsed part, namely 'sna francisco', must be subtracted from the initial query string. */ - lQueryString = lResult_ptr->getQueryString(); + lQueryString = lResult.getQueryString(); StringMatcher::subtractParsedToRemaining (lQueryString, lRemainingQueryString); Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -19,7 +19,6 @@ /** Class wrapping functions on a list of Result objects. */ class ResultHolder : public BomAbstract { friend class FacResultHolder; - friend class RequestInterpreter; public: // ////////////// Getters ///////////// /** Get the query string. */ Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -20,13 +20,31 @@ namespace OPENTREP { + // ///////////// Type definitions ////////// + /** Edit distance (e.g., 2 or 3). */ + typedef unsigned int EditDistance_T; + + /** Given the size of the phrase, determine the allowed edit distance for + spelling purpose. For instance, an edit distance of 1 will be allowed + on a 4-letter word, while an edit distance of 3 will be allowed on an + 11-letter word. */ + // ////////////////////////////////////////////////////////////////////// + static unsigned int calculateEditDistance (const std::string& iPhrase) { + EditDistance_T oEditDistance = 2; + + const EditDistance_T lQueryStringSize = iPhrase.size(); + + oEditDistance = lQueryStringSize / 3; + return oEditDistance; + } + /** For each of the word in the given list, perform spelling corrections. If the word is correctly spelled, it is copied as is. Otherwise, a corrected version is stored. */ // ////////////////////////////////////////////////////////////////////// static void createCorrectedWordList (const WordList_T& iOriginalWordList, - WordList_T& ioCorrectedWordList, - const Xapian::Database& iDatabase) { + WordList_T& ioCorrectedWordList, + const Xapian::Database& iDatabase) { // Empty the target list ioCorrectedWordList.clear(); @@ -36,8 +54,9 @@ for (WordList_T::const_iterator itWord = iOriginalWordList.begin(); itWord != iOriginalWordList.end(); ++itWord) { const std::string& lOriginalWord = *itWord; + const EditDistance_T lEditDistance= calculateEditDistance(lOriginalWord); const std::string& lSuggestedWord = - iDatabase.get_spelling_suggestion (lOriginalWord, 3); + iDatabase.get_spelling_suggestion (lOriginalWord, lEditDistance); if (lSuggestedWord.empty() == true) { ioCorrectedWordList.push_back (lOriginalWord); @@ -110,8 +129,10 @@ phrase/string. With the above example, 'sna francisco' yields the suggestion 'san francisco'. */ + const EditDistance_T lEditDistance = + calculateEditDistance (lOriginalQueryString); const std::string lFullWordCorrectedString = - ioDatabase.get_spelling_suggestion (lOriginalQueryString, 3); + ioDatabase.get_spelling_suggestion (lOriginalQueryString, lEditDistance); // DEBUG /* Modified: trunk/opentrep/opentrep/bom/World.cpp =================================================================== --- trunk/opentrep/opentrep/bom/World.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/World.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -62,8 +62,8 @@ oStr << shortDisplay(); - for (SimplePlaceList_T::const_iterator itPlace = _simplePlaceList.begin(); - itPlace != _simplePlaceList.end(); ++itPlace) { + for (PlaceOrderedList_T::const_iterator itPlace = _placeOrderedList.begin(); + itPlace != _placeOrderedList.end(); ++itPlace) { const Place* lPlace_ptr = *itPlace; assert (lPlace_ptr != NULL); Modified: trunk/opentrep/opentrep/bom/World.hpp =================================================================== --- trunk/opentrep/opentrep/bom/World.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/World.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -23,14 +23,14 @@ public: // //////////// Getters ///////////// /** Get the list of Place objects. - const PlaceList_T& getPlaceList () const { + const PlaceDirectList_T& getPlaceList () const { return _placeList; } */ /** Get the list of Place objects. */ - const SimplePlaceList_T& getSimplePlaceList () const { - return _simplePlaceList; + const PlaceOrderedList_T& getSimplePlaceList () const { + return _placeOrderedList; } // //////////// Setters ///////////// @@ -80,12 +80,12 @@ /** List of Place objects. <br>That list is actually a STL map, indexed on the Xapian document ID. */ - // PlaceList_T _placeList; + // PlaceDirectList_T _placeList; /** List of Place objects. <br>That list is actually a STL list, to store temporarily Place objects when indexing the Xapian database. */ - SimplePlaceList_T _simplePlaceList; + PlaceOrderedList_T _placeOrderedList; }; // ///////////// Type definitions //////////////////// Modified: trunk/opentrep/opentrep/bom/sources.mk =================================================================== --- trunk/opentrep/opentrep/bom/sources.mk 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/bom/sources.mk 2009-07-17 17:19:13 UTC (rev 134) @@ -3,11 +3,12 @@ $(top_srcdir)/opentrep/bom/Language.hpp \ $(top_srcdir)/opentrep/bom/GenericBom.hpp \ $(top_srcdir)/opentrep/bom/World.hpp \ + $(top_srcdir)/opentrep/bom/WordList.hpp \ + $(top_srcdir)/opentrep/bom/WordHolder.hpp \ $(top_srcdir)/opentrep/bom/Names.hpp \ $(top_srcdir)/opentrep/bom/Place.hpp \ $(top_srcdir)/opentrep/bom/PlaceList.hpp \ - $(top_srcdir)/opentrep/bom/WordList.hpp \ - $(top_srcdir)/opentrep/bom/WordHolder.hpp \ + $(top_srcdir)/opentrep/bom/PlaceHolder.hpp \ $(top_srcdir)/opentrep/bom/DocumentList.hpp \ $(top_srcdir)/opentrep/bom/Result.hpp \ $(top_srcdir)/opentrep/bom/ResultList.hpp \ @@ -17,9 +18,10 @@ $(top_srcdir)/opentrep/bom/BomType.cpp \ $(top_srcdir)/opentrep/bom/Language.cpp \ $(top_srcdir)/opentrep/bom/World.cpp \ + $(top_srcdir)/opentrep/bom/WordHolder.cpp \ $(top_srcdir)/opentrep/bom/Names.cpp \ $(top_srcdir)/opentrep/bom/Place.cpp \ - $(top_srcdir)/opentrep/bom/WordHolder.cpp \ + $(top_srcdir)/opentrep/bom/PlaceHolder.cpp \ $(top_srcdir)/opentrep/bom/Result.cpp \ $(top_srcdir)/opentrep/bom/ResultHolder.cpp \ $(top_srcdir)/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/command/DBManager.cpp =================================================================== --- trunk/opentrep/opentrep/command/DBManager.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/command/DBManager.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -185,7 +185,7 @@ hasStillData = iterateOnStatement (lSelectStatement, ioPlace, shouldNotDoReset); if (hasStillData == true) { - throw new MultipleRowsForASingleDocIDException(); + throw MultipleRowsForASingleDocIDException(); } // Debug Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -11,7 +11,10 @@ #include <opentrep/bom/Place.hpp> #include <opentrep/bom/ResultHolder.hpp> #include <opentrep/bom/Result.hpp> +#include <opentrep/bom/PlaceHolder.hpp> +#include <opentrep/factory/FacPlaceHolder.hpp> #include <opentrep/factory/FacPlace.hpp> +#include <opentrep/factory/FacResultHolder.hpp> #include <opentrep/command/DBManager.hpp> #include <opentrep/command/RequestInterpreter.hpp> #include <opentrep/service/Logger.hpp> @@ -24,40 +27,31 @@ void RequestInterpreter:: interpretTravelRequest (soci::session& ioSociSession, const TravelDatabaseName_T& iTravelDatabaseName, - const TravelQuery_T& iTravelQuery) { + const TravelQuery_T& iTravelQuery, + PlaceHolder& ioPlaceHolder) { try { // Make the database Xapian::Database lXapianDatabase (iTravelDatabaseName); - // TODO: Use FacResultHolder for the following - ResultHolder* lResultHolder_ptr = new ResultHolder (iTravelQuery, - lXapianDatabase); - assert (lResultHolder_ptr != NULL); + // Create a ResultHolder object + ResultHolder& lResultHolder = + FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); // - lResultHolder_ptr->searchString(); + lResultHolder.searchString(); // DEBUG OPENTREP_LOG_DEBUG (std::endl - << "_________________________________________" - << std::endl << "=========================================" - << std::endl - << "-----------------------------------------" - << std::endl - << "Matching list: " << std::endl - << lResultHolder_ptr->toString() - << "_________________________________________" - << std::endl + << std::endl << "Matching list: " << std::endl + << lResultHolder.toString() << "=========================================" - << std::endl - << "-----------------------------------------" << std::endl << std::endl); // Browse the list of result objects - const ResultList_T& lResultList = lResultHolder_ptr->getResultList(); + const ResultList_T& lResultList = lResultHolder.getResultList(); for (ResultList_T::const_iterator itResult = lResultList.begin(); itResult != lResultList.end(); ++itResult) { // Retrieve the result object @@ -86,8 +80,14 @@ DBManager::retrievePlace (ioSociSession, lDocID, lPlace); if (hasRetrievedPlace == true) { + // Insert the Place object within the PlaceHolder object + FacPlaceHolder::initLinkWithPlace (ioPlaceHolder, lPlace); + + // DEBUG OPENTREP_LOG_DEBUG ("Retrieved Document: " << lPlace.toString()); + } else { + // DEBUG OPENTREP_LOG_DEBUG ("No retrieved Document for ID = " << lDocID); } } Modified: trunk/opentrep/opentrep/command/RequestInterpreter.hpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/command/RequestInterpreter.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -16,6 +16,9 @@ namespace OPENTREP { + // Forward declarations + class PlaceHolder; + /** Command wrapping the travel request process. */ class RequestInterpreter { friend class OPENTREP_Service; @@ -23,7 +26,7 @@ /** Interpret a search query. */ static void interpretTravelRequest (soci::session&, const TravelDatabaseName_T&, - const TravelQuery_T&); + const TravelQuery_T&, PlaceHolder&); private: /** Constructors. */ Modified: trunk/opentrep/opentrep/factory/FacPlace.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlace.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/factory/FacPlace.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -2,7 +2,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // C -#include <assert.h> +#include <cassert> // OPENTREP #include <opentrep/bom/Place.hpp> #include <opentrep/factory/FacSupervisor.hpp> @@ -13,6 +13,14 @@ FacPlace* FacPlace::_instance = NULL; // ////////////////////////////////////////////////////////////////////// + FacPlace::FacPlace () { + } + + // ////////////////////////////////////////////////////////////////////// + FacPlace::FacPlace (const FacPlace&) { + } + + // ////////////////////////////////////////////////////////////////////// FacPlace::~FacPlace () { _instance = NULL; } Modified: trunk/opentrep/opentrep/factory/FacPlace.hpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlace.hpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/factory/FacPlace.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -40,8 +40,8 @@ /** Default Constructor. <br>This constructor is private in order to ensure the singleton pattern.*/ - FacPlace () {} - FacPlace (const FacPlace&) {} + FacPlace (); + FacPlace (const FacPlace&); private: /** The unique instance.*/ Added: trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,76 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// OPENTREP +#include <opentrep/bom/PlaceHolder.hpp> +#include <opentrep/bom/Place.hpp> +#include <opentrep/factory/FacSupervisor.hpp> +#include <opentrep/factory/FacPlaceHolder.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + FacPlaceHolder* FacPlaceHolder::_instance = NULL; + + // ////////////////////////////////////////////////////////////////////// + FacPlaceHolder::FacPlaceHolder () { + } + + // ////////////////////////////////////////////////////////////////////// + FacPlaceHolder::FacPlaceHolder (const FacPlaceHolder&) { + } + + // ////////////////////////////////////////////////////////////////////// + FacPlaceHolder::~FacPlaceHolder () { + _instance = NULL; + } + + // ////////////////////////////////////////////////////////////////////// + FacPlaceHolder& FacPlaceHolder::instance () { + + if (_instance == NULL) { + _instance = new FacPlaceHolder(); + assert (_instance != NULL); + + FacSupervisor::instance().registerBomFactory (_instance); + } + return *_instance; + } + + // ////////////////////////////////////////////////////////////////////// + PlaceHolder& FacPlaceHolder::create () { + PlaceHolder* oPlaceHolder_ptr = NULL; + + oPlaceHolder_ptr = new PlaceHolder (); + assert (oPlaceHolder_ptr != NULL); + + // The new object is added to the Bom pool + _pool.push_back (oPlaceHolder_ptr); + + return *oPlaceHolder_ptr; + } + + // ////////////////////////////////////////////////////////////////////// + void FacPlaceHolder::initLinkWithPlace (PlaceHolder& ioPlaceHolder, + Place& ioPlace) { + // Link the PlaceHolder to the Place, and vice versa + ioPlace._placeHolder = &ioPlaceHolder; + + // Add the Place to the PlaceHolder internal map (of Place objects) + const bool insertSucceeded = ioPlaceHolder._placeList. + insert (PlaceList_T::value_type (ioPlace.describeShortKey(), + &ioPlace)).second; + if (insertSucceeded == false) { + OPENTREP_LOG_ERROR ("Insertion failed for " + << ioPlaceHolder.describeKey() + << " and " << ioPlace.describeShortKey()); + assert (insertSucceeded == true); + } + + // Add the Place to the PlaceHolder internal list (of Place objects) + ioPlaceHolder._placeOrderedList.push_back (&ioPlace); + } + +} Added: trunk/opentrep/opentrep/factory/FacPlaceHolder.hpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlaceHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/factory/FacPlaceHolder.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,56 @@ +#ifndef __OPENTREP_FAC_FACPLACEHOLDER_HPP +#define __OPENTREP_FAC_FACPLACEHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OPENTREP +#include <opentrep/factory/FacBomAbstract.hpp> + +namespace OPENTREP { + + /** Forward declarations. */ + class PlaceHolder; + class Place; + + /** Factory for Place. */ + class FacPlaceHolder : public FacBomAbstract { + public: + + /** Provide the unique instance. + <br> The singleton is instantiated when first used + @return FacPlaceHolder& */ + static FacPlaceHolder& instance(); + + /** Destructor. + <br> The Destruction put the _instance to NULL + in order to be clean for the next FacPlaceHolder::instance() */ + virtual ~FacPlaceHolder(); + + /** Create a new PlaceHolder object. + <br>This new object is added to the list of instantiated objects. + @return PlaceHolder& The newly created object. */ + PlaceHolder& create (); + + /** Initialise the link between a PlaceHolder and a Place. + @param PlaceHolder& + @param Place& + @exception FacExceptionNullPointer + @exception FacException.*/ + static void initLinkWithPlace (PlaceHolder&, Place&); + + + private: + /** Default Constructor. + <br>This constructor is private in order to ensure the singleton + pattern.*/ + FacPlaceHolder (); + FacPlaceHolder (const FacPlaceHolder&); + + private: + /** The unique instance.*/ + static FacPlaceHolder* _instance; + + }; +} +#endif // __OPENTREP_FAC_FACPLACEHOLDER_HPP Added: trunk/opentrep/opentrep/factory/FacResult.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacResult.cpp (rev 0) +++ trunk/opentrep/opentrep/factory/FacResult.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,66 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// OPENTREP +#include <opentrep/bom/Result.hpp> +#include <opentrep/factory/FacSupervisor.hpp> +#include <opentrep/factory/FacResult.hpp> + +namespace OPENTREP { + + FacResult* FacResult::_instance = NULL; + + // ////////////////////////////////////////////////////////////////////// + FacResult::FacResult () { + } + + // ////////////////////////////////////////////////////////////////////// + FacResult::FacResult (const FacResult&) { + } + + // ////////////////////////////////////////////////////////////////////// + FacResult::~FacResult () { + _instance = NULL; + } + + // ////////////////////////////////////////////////////////////////////// + FacResult& FacResult::instance () { + + if (_instance == NULL) { + _instance = new FacResult(); + assert (_instance != NULL); + + FacSupervisor::instance().registerBomFactory (_instance); + } + return *_instance; + } + + // ////////////////////////////////////////////////////////////////////// + Result& FacResult::create (const Xapian::Database& iXapianDatabase) { + Result* oResult_ptr = NULL; + + oResult_ptr = new Result (iXapianDatabase); + assert (oResult_ptr != NULL); + + // The new object is added to the Bom pool + _pool.push_back (oResult_ptr); + + return *oResult_ptr; + } + + // ////////////////////////////////////////////////////////////////////// + Result& FacResult::clone (const Result& iResult) { + Result* oResult_ptr = NULL; + + oResult_ptr = new Result (iResult); + assert (oResult_ptr != NULL); + + // The new object is added to the Bom pool + _pool.push_back (oResult_ptr); + + return *oResult_ptr; + } + +} Added: trunk/opentrep/opentrep/factory/FacResult.hpp =================================================================== --- trunk/opentrep/opentrep/factory/FacResult.hpp (rev 0) +++ trunk/opentrep/opentrep/factory/FacResult.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,52 @@ +#ifndef __OPENTREP_FAC_FACRESULT_HPP +#define __OPENTREP_FAC_FACRESULT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OPENTREP +#include <opentrep/factory/FacBomAbstract.hpp> + +namespace OPENTREP { + + /** Forward declarations. */ + class Result; + + /** Factory for Result. */ + class FacResult : public FacBomAbstract { + public: + + /** Provide the unique instance. + <br> The singleton is instantiated when first used + @return FacResult& */ + static FacResult& instance(); + + /** Destructor. + <br> The Destruction put the _instance to NULL + in order to be clean for the next FacResult::instance() */ + virtual ~FacResult(); + + /** Create a new Result object. + <br>This new object is added to the list of instantiated objects. + @return Result& The newly created object. */ + Result& create (const Xapian::Database&); + + /** Create a copy of a Result object. + <br>This new object is added to the list of instantiated objects. + @return Result& The newly created object. */ + Result& clone (const Result&); + + private: + /** Default Constructor. + <br>This constructor is private in order to ensure the singleton + pattern.*/ + FacResult (); + FacResult (const FacResult&); + + private: + /** The unique instance.*/ + static FacResult* _instance; + + }; +} +#endif // __OPENTREP_FAC_FACRESULT_HPP Added: trunk/opentrep/opentrep/factory/FacResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacResultHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/factory/FacResultHolder.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,66 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// OPENTREP +#include <opentrep/bom/ResultHolder.hpp> +#include <opentrep/bom/Result.hpp> +#include <opentrep/factory/FacSupervisor.hpp> +#include <opentrep/factory/FacResultHolder.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + FacResultHolder* FacResultHolder::_instance = NULL; + + // ////////////////////////////////////////////////////////////////////// + FacResultHolder::FacResultHolder () { + } + + // ////////////////////////////////////////////////////////////////////// + FacResultHolder::FacResultHolder (const FacResultHolder&) { + } + + // ////////////////////////////////////////////////////////////////////// + FacResultHolder::~FacResultHolder () { + _instance = NULL; + } + + // ////////////////////////////////////////////////////////////////////// + FacResultHolder& FacResultHolder::instance () { + + if (_instance == NULL) { + _instance = new FacResultHolder(); + assert (_instance != NULL); + + FacSupervisor::instance().registerBomFactory (_instance); + } + return *_instance; + } + + // ////////////////////////////////////////////////////////////////////// + ResultHolder& FacResultHolder::create (const TravelQuery_T& iQueryString, + const Xapian::Database& iDatabase) { + ResultHolder* oResultHolder_ptr = NULL; + + oResultHolder_ptr = new ResultHolder (iQueryString, iDatabase); + assert (oResultHolder_ptr != NULL); + + // The new object is added to the Bom pool + _pool.push_back (oResultHolder_ptr); + + return *oResultHolder_ptr; + } + + // ////////////////////////////////////////////////////////////////////// + void FacResultHolder::initLinkWithResult (ResultHolder& ioResultHolder, + Result& ioResult) { + // Link the ResultHolder to the Result, and vice versa + ioResult._resultHolder = &ioResultHolder; + + // Add the Result to the ResultHolder internal list (of Result objects) + ioResultHolder._resultList.push_back (&ioResult); + } + +} Added: trunk/opentrep/opentrep/factory/FacResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/factory/FacResultHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/factory/FacResultHolder.hpp 2009-07-17 17:19:13 UTC (rev 134) @@ -0,0 +1,63 @@ +#ifndef __OPENTREP_FAC_FACRESULTHOLDER_HPP +#define __OPENTREP_FAC_FACRESULTHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/factory/FacBomAbstract.hpp> +#include <opentrep/OPENTREP_Types.hpp> + +// Forward declarations +namespace Xapian { + class Database; +} + +namespace OPENTREP { + + /** Forward declarations. */ + class ResultHolder; + class Result; + + /** Factory for Result. */ + class FacResultHolder : public FacBomAbstract { + public: + + /** Provide the unique instance. + <br> The singleton is instantiated when first used + @return FacResultHolder& */ + static FacResultHolder& instance(); + + /** Destructor. + <br> The Destruction put the _instance to NULL + in order to be clean for the next FacResultHolder::instance() */ + virtual ~FacResultHolder(); + + /** Create a new ResultHolder object. + <br>This new object is added to the list of instantiated objects. + @return ResultHolder& The newly created object. */ + ResultHolder& create (const TravelQuery_T& iQueryString, + const Xapian::Database& iDatabase); + + /** Initialise the link between a ResultHolder and a Result. + @param ResultHolder& + @param Result& + @exception FacExceptionNullPointer + @exception FacException.*/ + static void initLinkWithResult (ResultHolder&, Result&); + + + private: + /** Default Constructor. + <br>This constructor is private in order to ensure the singleton + pattern.*/ + FacResultHolder (); + FacResultHolder (const FacResultHolder&); + + private: + /** The unique instance.*/ + static FacResultHolder* _instance; + + }; +} +#endif // __OPENTREP_FAC_FACRESULTHOLDER_HPP Modified: trunk/opentrep/opentrep/factory/FacWorld.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacWorld.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/factory/FacWorld.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -71,8 +71,8 @@ // Add the Place pointer to the dedicated list within the World object /* - const bool insertSucceeded2 = ioWorld._placeList. - insert (PlaceList_T::value_type (lPlaceID, &ioPlace)).second; + const bool insertSucceeded2 = ioWorld._placeDirectList. + insert (PlaceDirectList_T::value_type (lPlaceID, &ioPlace)).second; if (insertSucceeded2 == false) { OPENTREP_LOG_ERROR ("Insertion failed for " << ioWorld.describeKey() << " and " << ioPlace.describeShortKey()); @@ -83,7 +83,7 @@ } // Add the Place pointer to the dedicated list within the World object - ioWorld._simplePlaceList.push_back (&ioPlace); + ioWorld._placeOrderedList.push_back (&ioPlace); } // ////////////////////////////////////////////////////////////////////// Modified: trunk/opentrep/opentrep/factory/sources.mk =================================================================== --- trunk/opentrep/opentrep/factory/sources.mk 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/factory/sources.mk 2009-07-17 17:19:13 UTC (rev 134) @@ -3,10 +3,16 @@ $(top_srcdir)/opentrep/factory/FacSupervisor.hpp \ $(top_srcdir)/opentrep/factory/FacOpenTrepServiceContext.hpp \ $(top_srcdir)/opentrep/factory/FacWorld.hpp \ - $(top_srcdir)/opentrep/factory/FacPlace.hpp + $(top_srcdir)/opentrep/factory/FacPlaceHolder.hpp \ + $(top_srcdir)/opentrep/factory/FacPlace.hpp \ + $(top_srcdir)/opentrep/factory/FacResultHolder.hpp \ + $(top_srcdir)/opentrep/factory/FacResult.hpp fac_cc_sources = $(top_srcdir)/opentrep/factory/FacBomAbstract.cpp \ $(top_srcdir)/opentrep/factory/FacServiceAbstract.cpp \ $(top_srcdir)/opentrep/factory/FacSupervisor.cpp \ $(top_srcdir)/opentrep/factory/FacOpenTrepServiceContext.cpp \ $(top_srcdir)/opentrep/factory/FacWorld.cpp \ - $(top_srcdir)/opentrep/factory/FacPlace.cpp + $(top_srcdir)/opentrep/factory/FacPlaceHolder.cpp \ + $(top_srcdir)/opentrep/factory/FacPlace.cpp \ + $(top_srcdir)/opentrep/factory/FacResultHolder.cpp \ + $(top_srcdir)/opentrep/factory/FacResult.cpp Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-17 00:10:38 UTC (rev 133) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-17 17:19:13 UTC (rev 134) @@ -2,11 +2,13 @@ // Import section // ////////////////////////////////////////////////////////////////////// // C -#include <assert.h> +#include <cassert> // OpenTrep #include <opentrep/basic/BasConst_OPENTREP_Service.hpp> #include <opentrep/basic/BasChronometer.hpp> +#include <opentrep/bom/PlaceHolder.hpp> #include <opentrep/factory/FacWorld.hpp> +#include <opentrep/factory/FacPlaceHolder.hpp> #include <opentrep/command/SociSessionManager.hpp> #include <opentrep/command/DBManager.hpp> #include <opentrep/command/IndexBuilder.hpp> @@ -19,12 +21,21 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// + OPENTREP_Service::OPENTREP_Service (std::ostream& ioLogStream, + const std::string& iXapianDatabaseFilepath) + : _opentrepSer... [truncated message content] |
From: <den...@us...> - 2009-07-17 00:10:41
|
Revision: 133 http://opentrep.svn.sourceforge.net/opentrep/?rev=133&view=rev Author: denis_arnaud Date: 2009-07-17 00:10:38 +0000 (Fri, 17 Jul 2009) Log Message: ----------- [Dev] Retrofitted the code of test/xapian/string_search.cpp into the OpenTREP library, and created the corresponding objects and support classes. Modified Paths: -------------- trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/BomAbstract.hpp trunk/opentrep/opentrep/bom/sources.mk trunk/opentrep/opentrep/command/RequestInterpreter.cpp Added Paths: ----------- trunk/opentrep/opentrep/bom/DocumentList.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/ResultList.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/bom/WordHolder.cpp trunk/opentrep/opentrep/bom/WordHolder.hpp trunk/opentrep/opentrep/bom/WordList.hpp Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -1,153 +1,153 @@ -// C -#include <assert.h> -// STL -#include <iostream> -#include <sstream> -#include <fstream> -#include <map> -#include <vector> -// Boost (Extended STL) -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/gregorian/gregorian.hpp> -#include <boost/program_options.hpp> -// OPENTREP -#include <opentrep/OPENTREP_Service.hpp> - -// ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} - -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); - generic.add_options() - ("version,v", "print version string") - ("help,h", "produce help message"); - - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); - config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); - - // Hidden options, will be allowed both on command line and - // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-file", - boost::program_options::value< std::vector<std::string> >(), - "input file"); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(generic).add(config).add(hidden); - - boost::program_options::options_description config_file_options; - config_file_options.add(config).add(hidden); - - boost::program_options::options_description visible("Allowed options"); - visible.add(generic).add(config); - - boost::program_options::positional_options_description p; - p.add("input-file", -1); - - boost::program_options::variables_map vm; - boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); - - std::ifstream ifs ("request_parser.cfg"); - boost::program_options::store (parse_config_file (ifs, config_file_options), - vm); - boost::program_options::notify (vm); - - if (vm.count ("help")) { - std::cout << visible << std::endl; - return 0; - } - - if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; - } - - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; - } - - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() - << std::endl; - } - - std::cout << "Optimization level is " << opt << std::endl; - - return 0; -} - - -// /////////////// M A I N ///////////////// -int main (int argc, char* argv[]) { - try { - - // Output log File - std::string lLogFilename ("indexer.log"); - - // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); - - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lLogFilename; - } - - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lXapianDatabaseName; - } - - // Set the log parameters - std::ofstream logOutputFile; - // open and clean the log outputfile - logOutputFile.open (lLogFilename.c_str()); - logOutputFile.clear(); - - // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); - - // Launch the indexation - opentrepService.buildSearchIndex(); - - // Close the Log outputFile - logOutputFile.close(); - - - } catch (const OPENTREP::RootException& otexp) { - std::cerr << "Standard exception: " << otexp.what() << std::endl; - return -1; - - } catch (const std::exception& stde) { - std::cerr << "Standard exception: " << stde.what() << std::endl; - return -1; - - } catch (...) { - return -1; - } - - return 0; -} +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <map> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/program_options.hpp> +// OPENTREP +#include <opentrep/OPENTREP_Service.hpp> + +// ///////// Parsing of Options & Configuration ///////// +// A helper function to simplify the main part. +template<class T> std::ostream& operator<< (std::ostream& os, + const std::vector<T>& v) { + std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); + return os; +} + +int readConfiguration (int argc, char* argv[]) { + int opt; + + // Declare a group of options that will be + // allowed only on command line + boost::program_options::options_description generic("Generic options"); + generic.add_options() + ("version,v", "print version string") + ("help,h", "produce help message"); + + // Declare a group of options that will be allowed both on command line and in + // config file + boost::program_options::options_description config("Configuration"); + config.add_options() + ("optimization", + boost::program_options::value<int>(&opt)->default_value(10), + "optimization level") + ("include-path,I", + boost::program_options::value< std::vector<std::string> >()->composing(), + "include path"); + + // Hidden options, will be allowed both on command line and + // in config file, but will not be shown to the user. + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-file", + boost::program_options::value< std::vector<std::string> >(), + "input file"); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(generic).add(config).add(hidden); + + boost::program_options::options_description config_file_options; + config_file_options.add(config).add(hidden); + + boost::program_options::options_description visible("Allowed options"); + visible.add(generic).add(config); + + boost::program_options::positional_options_description p; + p.add("input-file", -1); + + boost::program_options::variables_map vm; + boost::program_options:: + store (boost::program_options::command_line_parser(argc, argv). + options (cmdline_options).positional(p).run(), vm); + + std::ifstream ifs ("request_parser.cfg"); + boost::program_options::store (parse_config_file (ifs, config_file_options), + vm); + boost::program_options::notify (vm); + + if (vm.count ("help")) { + std::cout << visible << std::endl; + return 0; + } + + if (vm.count ("version")) { + std::cout << "Open Travel Request Parser, version 1.0" << std::endl; + return 0; + } + + if (vm.count ("include-path")) { + std::cout << "Include paths are: " + << vm["include-path"].as< std::vector<std::string> >() + << std::endl; + } + + if (vm.count ("input-file")) { + std::cout << "Input files are: " + << vm["input-file"].as< std::vector<std::string> >() + << std::endl; + } + + std::cout << "Optimization level is " << opt << std::endl; + + return 0; +} + + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // Output log File + std::string lLogFilename ("indexer.log"); + + // Xapian database name (directory of the index) + OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lLogFilename; + } + + if (argc >= 2 && argv[2] != NULL) { + std::istringstream istr (argv[2]); + istr >> lXapianDatabaseName; + } + + // Set the log parameters + std::ofstream logOutputFile; + // open and clean the log outputfile + logOutputFile.open (lLogFilename.c_str()); + logOutputFile.clear(); + + // Initialise the context + OPENTREP::OPENTREP_Service opentrepService; + opentrepService.init (logOutputFile, lXapianDatabaseName); + + // Launch the indexation + opentrepService.buildSearchIndex(); + + // Close the Log outputFile + logOutputFile.close(); + + + } catch (const OPENTREP::RootException& otexp) { + std::cerr << "Standard exception: " << otexp.what() << std::endl; + return -1; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -1,160 +1,160 @@ -// C -#include <assert.h> -// STL -#include <iostream> -#include <sstream> -#include <fstream> -#include <map> -#include <vector> -// Boost (Extended STL) -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/gregorian/gregorian.hpp> -#include <boost/program_options.hpp> -// OPENTREP -#include <opentrep/OPENTREP_Service.hpp> - -// ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} - -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); - generic.add_options() - ("version,v", "print version string") - ("help,h", "produce help message"); - - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); - config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); - - // Hidden options, will be allowed both on command line and - // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-file", - boost::program_options::value< std::vector<std::string> >(), - "input file"); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(generic).add(config).add(hidden); - - boost::program_options::options_description config_file_options; - config_file_options.add(config).add(hidden); - - boost::program_options::options_description visible("Allowed options"); - visible.add(generic).add(config); - - boost::program_options::positional_options_description p; - p.add("input-file", -1); - - boost::program_options::variables_map vm; - boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); - - std::ifstream ifs ("request_parser.cfg"); - boost::program_options::store (parse_config_file (ifs, config_file_options), - vm); - boost::program_options::notify (vm); - - if (vm.count ("help")) { - std::cout << visible << std::endl; - return 0; - } - - if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; - } - - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; - } - - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() - << std::endl; - } - - std::cout << "Optimization level is " << opt << std::endl; - - return 0; -} - - -// /////////////// M A I N ///////////////// -int main (int argc, char* argv[]) { - try { - - // Travel query - OPENTREP::TravelQuery_T lTravelQuery ("cdg"); - - // Output log File - std::string lLogFilename ("searcher.log"); - - // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); - - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lTravelQuery; - } - - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lLogFilename; - } - - if (argc >= 3 && argv[3] != NULL) { - std::istringstream istr (argv[3]); - istr >> lXapianDatabaseName; - } - - // Set the log parameters - std::ofstream logOutputFile; - // open and clean the log outputfile - logOutputFile.open (lLogFilename.c_str()); - logOutputFile.clear(); - - // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); - - // Query the Xapian database (index) - opentrepService.interpretTravelRequest (lTravelQuery); - - // Close the Log outputFile - logOutputFile.close(); - - } catch (const OPENTREP::RootException& otexp) { - std::cerr << "Standard exception: " << otexp.what() << std::endl; - return -1; - - } catch (const std::exception& stde) { - std::cerr << "Standard exception: " << stde.what() << std::endl; - return -1; - - } catch (...) { - return -1; - } - - return 0; -} +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <map> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/program_options.hpp> +// OPENTREP +#include <opentrep/OPENTREP_Service.hpp> + +// ///////// Parsing of Options & Configuration ///////// +// A helper function to simplify the main part. +template<class T> std::ostream& operator<< (std::ostream& os, + const std::vector<T>& v) { + std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); + return os; +} + +int readConfiguration (int argc, char* argv[]) { + int opt; + + // Declare a group of options that will be + // allowed only on command line + boost::program_options::options_description generic("Generic options"); + generic.add_options() + ("version,v", "print version string") + ("help,h", "produce help message"); + + // Declare a group of options that will be allowed both on command line and in + // config file + boost::program_options::options_description config("Configuration"); + config.add_options() + ("optimization", + boost::program_options::value<int>(&opt)->default_value(10), + "optimization level") + ("include-path,I", + boost::program_options::value< std::vector<std::string> >()->composing(), + "include path"); + + // Hidden options, will be allowed both on command line and + // in config file, but will not be shown to the user. + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-file", + boost::program_options::value< std::vector<std::string> >(), + "input file"); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(generic).add(config).add(hidden); + + boost::program_options::options_description config_file_options; + config_file_options.add(config).add(hidden); + + boost::program_options::options_description visible("Allowed options"); + visible.add(generic).add(config); + + boost::program_options::positional_options_description p; + p.add("input-file", -1); + + boost::program_options::variables_map vm; + boost::program_options:: + store (boost::program_options::command_line_parser(argc, argv). + options (cmdline_options).positional(p).run(), vm); + + std::ifstream ifs ("request_parser.cfg"); + boost::program_options::store (parse_config_file (ifs, config_file_options), + vm); + boost::program_options::notify (vm); + + if (vm.count ("help")) { + std::cout << visible << std::endl; + return 0; + } + + if (vm.count ("version")) { + std::cout << "Open Travel Request Parser, version 1.0" << std::endl; + return 0; + } + + if (vm.count ("include-path")) { + std::cout << "Include paths are: " + << vm["include-path"].as< std::vector<std::string> >() + << std::endl; + } + + if (vm.count ("input-file")) { + std::cout << "Input files are: " + << vm["input-file"].as< std::vector<std::string> >() + << std::endl; + } + + std::cout << "Optimization level is " << opt << std::endl; + + return 0; +} + + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // Travel query + OPENTREP::TravelQuery_T lTravelQuery ("sna francisco rio de janero lso angeles"); + + // Output log File + std::string lLogFilename ("searcher.log"); + + // Xapian database name (directory of the index) + OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lTravelQuery; + } + + if (argc >= 2 && argv[2] != NULL) { + std::istringstream istr (argv[2]); + istr >> lLogFilename; + } + + if (argc >= 3 && argv[3] != NULL) { + std::istringstream istr (argv[3]); + istr >> lXapianDatabaseName; + } + + // Set the log parameters + std::ofstream logOutputFile; + // open and clean the log outputfile + logOutputFile.open (lLogFilename.c_str()); + logOutputFile.clear(); + + // Initialise the context + OPENTREP::OPENTREP_Service opentrepService; + opentrepService.init (logOutputFile, lXapianDatabaseName); + + // Query the Xapian database (index) + opentrepService.interpretTravelRequest (lTravelQuery); + + // Close the Log outputFile + logOutputFile.close(); + + } catch (const OPENTREP::RootException& otexp) { + std::cerr << "Standard exception: " << otexp.what() << std::endl; + return -1; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Modified: trunk/opentrep/opentrep/bom/BomAbstract.hpp =================================================================== --- trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -1,5 +1,5 @@ -#ifndef __OPENTREP_BOMABSTRACT_HPP -#define __OPENTREP_BOMABSTRACT_HPP +#ifndef __OPENTREP_BOM_BOMABSTRACT_HPP +#define __OPENTREP_BOM_BOMABSTRACT_HPP // ////////////////////////////////////////////////////////////////////// // Import section @@ -92,4 +92,4 @@ return ioIn; } -#endif // __OPENTREP_BOMABSTRACT_HPP +#endif // __OPENTREP_BOM_BOMABSTRACT_HPP Added: trunk/opentrep/opentrep/bom/DocumentList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/DocumentList.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,18 @@ +#ifndef __OPENTREP_BOM_DOCUMENTLIST_HPP +#define __OPENTREP_BOM_DOCUMENTLIST_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <map> +// Xapian +#include <xapian.h> + +namespace OPENTREP { + + /** List of Xapian documents. */ + typedef std::multimap<Xapian::percent, Xapian::Document> DocumentList_T; + +} +#endif // __OPENTREP_BOM_DOCUMENTLIST_HPP Added: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,131 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +#include <string> +#include <sstream> +// OPENTREP +#include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/bom/Result.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + Result::Result (const Xapian::Database& iDatabase) + : _database (iDatabase) { + init(); + } + + // ////////////////////////////////////////////////////////////////////// + Result::~Result () { + } + + // ////////////////////////////////////////////////////////////////////// + void Result::init () { + _documentList.clear(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Result::describeShortKey() const { + std::ostringstream oStr; + oStr << _queryString; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Result::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string Result::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + for (DocumentList_T::const_iterator itDoc = _documentList.begin(); + itDoc != _documentList.end(); ++itDoc) { + const Xapian::percent& lPercent = itDoc->first; + const Xapian::Document& lDocument = itDoc->second; + const Xapian::docid& lDocID = lDocument.get_docid(); + oStr << "Document ID " << lDocID << "\t" << lPercent + << "% [" << lDocument.get_data() << "]" << std::endl; + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void Result::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void Result::fromStream (std::istream& ioIn) { + } + + // ////////////////////////////////////////////////////////////////////// + const Xapian::Document& Result::getBestMatchingDocument() const { + /** + Retrieve the best matching document. As the document list (STL map) + is sorted by ascending order of the matching percentage, the best + matching one is located at the end (back) of the list (STL map). + */ + DocumentList_T::const_reverse_iterator itDocument = _documentList.rbegin(); + return itDocument->second; + } + + // ////////////////////////////////////////////////////////////////////// + const Xapian::percent& Result::getBestMatchingPercentage() const { + /** + Retrieve the best matching document. As the document list (STL map) + is sorted by ascending order of the matching percentage, the best + matching one is located at the end (back) of the list (STL map). + */ + DocumentList_T::const_reverse_iterator itDocument = _documentList.rbegin(); + return itDocument->first; + } + + // ////////////////////////////////////////////////////////////////////// + void Result::searchString () { + + // Catch any Xapian::Error exceptions thrown + try { + + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + OPENTREP_LOG_DEBUG (std::endl << "--------------------------------" + << std::endl << "Current query string: `" << ioQueryString << "'"); + */ + + // Retrieve the list of documents matching the query string + Xapian::MSet lMatchingSet; + StringMatcher::searchString (lMatchingSet, _queryString, _database); + + // Create the corresponding list of documents + StringMatcher::createDocumentListFromMSet (lMatchingSet, _documentList); + + // Stop if a result is found. + if (_documentList.empty() == false) { + shouldStop = true; + break; + } + + // Remove a word from the query string + StringMatcher::removeOneWord (_queryString); + + // Stop when the resulting string gets empty. + if (_queryString.empty() == true) { + shouldStop = true; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + } + } + +} Added: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,100 @@ +#ifndef __OPENTREP_BOM_RESULT_HPP +#define __OPENTREP_BOM_RESULT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/DocumentList.hpp> + +namespace OPENTREP { + + /** Class wrapping functions on a list of Xapian Document objects. */ + class Result : public BomAbstract { + friend class FacResult; + friend class ResultHolder; + public: + // ////////////// Getters ///////////// + /** Get the query string. */ + const TravelQuery_T& getQueryString () const { + return _queryString; + } + + /** Get the list of Xapian document objects. */ + const DocumentList_T& getDocumentList() const { + return _documentList; + } + + /** Retrieve the best matching Xapian document object. */ + const Xapian::Document& getBestMatchingDocument() const; + + /** Retrieve the percentage corresponding to the best matching + Xapian document object. */ + const Xapian::percent& getBestMatchingPercentage() const; + + + // ////////////// Setters ///////////// + /** Set the query string. */ + void setQueryString (const TravelQuery_T& iQueryString) { + _queryString = iQueryString; + } + + + public: + // /////////// Business methods ///////// + /** Retrieve the list of documents matching the query string. */ + void searchString (); + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + private: + // ////////////// Constructors and Destructors ///////////// + /** Main constructor. */ + Result (const Xapian::Database&); + /** Default constructor. */ + Result (); + /** Default copy constructor. */ + Result (const Result&); + /** Destructor. */ + ~Result (); + /** Initialise (reset the list of documents). */ + void init (); + + + private: + // /////////////// Attributes //////////////// + /** Query string having generated the list of document. */ + TravelQuery_T _queryString; + + /** Xapian database. */ + const Xapian::Database& _database; + + /** List of Xapian document objects. */ + DocumentList_T _documentList; + }; + +} +#endif // __OPENTREP_BOM_RESULT_HPP Added: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,133 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +#include <sstream> +// Xapian +#include <xapian.h> +// OpenTREP +#include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/bom/Result.hpp> +#include <opentrep/bom/ResultHolder.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + ResultHolder::ResultHolder (const TravelQuery_T& iQueryString, + const Xapian::Database& iDatabase) + : _queryString (iQueryString), _database (iDatabase) { + init(); + } + + // ////////////////////////////////////////////////////////////////////// + ResultHolder::~ResultHolder () { + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::init () { + _resultList.clear(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string ResultHolder::describeShortKey() const { + std::ostringstream oStr; + oStr << _queryString; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string ResultHolder::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string ResultHolder::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + for (ResultList_T::const_iterator itResult = _resultList.begin(); + itResult != _resultList.end(); ++itResult) { + const Result* lResult_ptr = *itResult; + assert (lResult_ptr != NULL); + + oStr << " ==> " << std::endl << lResult_ptr->toString(); + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::fromStream (std::istream& ioIn) { + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::searchString () { + + // Catch any Xapian::Error exceptions thrown + try { + + std::string lRemainingQueryString (_queryString); + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + OPENTREP_LOG_DEBUG (std::endl + << "================================" << std::endl + << "Current query string: `" << lRemainingQueryString << "'"); + */ + /** + Search with the initial full string, then by removing a word if + there was no result, then by removing another word if there was + again no result, until either a result is found or the + resulting string gets empty. + */ + DocumentList_T lDocumentList; + Result* lResult_ptr = new Result (_database); + assert (lResult_ptr != NULL); + + std::string lQueryString (lRemainingQueryString); + + // + lResult_ptr->setQueryString (lQueryString); + lResult_ptr->searchString (); + + // Add the Result object (holding the list of matching + // documents) to the dedicated list. + _resultList.push_back (lResult_ptr); + + /** + Remove, from the lRemainingQueryString string, the part which + has been already successfully parsed. + <br>For instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() method + first reduce the query string to 'sna francisco', which + successfully matches against SFO (San Francisco airport). + <br>Then, the remaining part of the query string to be parsed is + 'rio de janeiro'. So, the already parsed part, namely + 'sna francisco', must be subtracted from the initial query string. + */ + lQueryString = lResult_ptr->getQueryString(); + StringMatcher::subtractParsedToRemaining (lQueryString, + lRemainingQueryString); + + // If there is nothing left to be parsed, we have then finished + // to parse the initial string. + if (lRemainingQueryString.empty() == true) { + shouldStop = true; + break; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + } + } + +} Added: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,94 @@ +#ifndef __OPENTREP_BOM_RESULTHOLDER_HPP +#define __OPENTREP_BOM_RESULTHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/ResultList.hpp> + +// Forward declarations +namespace Xapian { + class Database; +} + +namespace OPENTREP { + + /** Class wrapping functions on a list of Result objects. */ + class ResultHolder : public BomAbstract { + friend class FacResultHolder; + friend class RequestInterpreter; + public: + // ////////////// Getters ///////////// + /** Get the query string. */ + const TravelQuery_T& getQueryString () const { + return _queryString; + } + + /** Retrieve the list of result objects. */ + const ResultList_T& getResultList() const { + return _resultList; + } + + + // ////////////// Setters ///////////// + + + public: + // /////////// Business methods ///////// + /** Retrieve the list of documents matching the query string. */ + void searchString (); + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + private: + // ////////////// Constructors and Destructors ///////////// + /** Main constructor. */ + ResultHolder (const TravelQuery_T&, const Xapian::Database&); + /** Default constructor. */ + ResultHolder (); + /** Default copy constructor. */ + ResultHolder (const ResultHolder&); + /** Destructor. */ + ~ResultHolder (); + /** Initialise (reset the list of documents). */ + void init (); + + + private: + // /////////////// Attributes //////////////// + /** Query string having generated the list of document. */ + const TravelQuery_T _queryString; + + /** Xapian database. */ + const Xapian::Database& _database; + + /** List of result objects. */ + ResultList_T _resultList; + }; + +} +#endif // __OPENTREP_BOM_RESULTHOLDER_HPP Added: trunk/opentrep/opentrep/bom/ResultList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultList.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/ResultList.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,19 @@ +#ifndef __OPENTREP_BOM_RESULTLIST_HPP +#define __OPENTREP_BOM_RESULTLIST_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <list> + +namespace OPENTREP { + + // Forward declarations + class Result; + + /** List of result objects. */ + typedef std::list<Result*> ResultList_T; + +} +#endif // __OPENTREP_BOM_RESULTLIST_HPP Added: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,319 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// STL +#include <iostream> +#include <sstream> +#include <string> +#include <list> +#include <map> +// Boost +#include <boost/tokenizer.hpp> +// Xapian +#include <xapian.h> +// OpenTREP +#include <opentrep/bom/WordHolder.hpp> +#include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + /** For each of the word in the given list, perform spelling + corrections. If the word is correctly spelled, it is copied as + is. Otherwise, a corrected version is stored. */ + // ////////////////////////////////////////////////////////////////////// + static void createCorrectedWordList (const WordList_T& iOriginalWordList, + WordList_T& ioCorrectedWordList, + const Xapian::Database& iDatabase) { + // Empty the target list + ioCorrectedWordList.clear(); + + // Catch any Xapian::Error exceptions thrown + try { + + for (WordList_T::const_iterator itWord = iOriginalWordList.begin(); + itWord != iOriginalWordList.end(); ++itWord) { + const std::string& lOriginalWord = *itWord; + const std::string& lSuggestedWord = + iDatabase.get_spelling_suggestion (lOriginalWord, 3); + + if (lSuggestedWord.empty() == true) { + ioCorrectedWordList.push_back (lOriginalWord); + + } else { + ioCorrectedWordList.push_back (lSuggestedWord); + } + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Original word: `" << lOriginalWord + << "' ==> corrected word: `" << lSuggestedWord << "'"); + */ + } + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } + } + + // /////////////////////////////////////////////////////////////////// + void StringMatcher::searchString (Xapian::MSet& ioMatchingSet, + const std::string& iSearchString, + const Xapian::Database& ioDatabase) { + + // Catch any Xapian::Error exceptions thrown + try { + + /** + Build another string, in addition to the original one. Overall, + there are thus two strings: + <br><ul> + <li>One with the original words given by the user</li> + <li>One with the orthographic-corrected words, wherever + relevant (otherwise, the original word is taken)</li> + </ul> + <br>For instance, 'sna francisco' would give the following + two strings: + <br><ul> + <li>'sna francicso' (original)</li> + <li>'sna francisco' (corrected, where relevant, word by word)</li> + </ul> + <br>Note that, as 'sna' exists in the dictionary (Santa Ana, CA, USA), + it is not replaced. We shall take care of the whole string in a + further step below. + */ + WordList_T lOriginalWordList; + WordHolder::tokeniseStringIntoWordList (iSearchString, lOriginalWordList); + + /** + We rebuild a clean query string from the word list. Indeed, the original + string may have contained a few separators (e.g., '/', ';', etc.), which + have been removed by the tokeniseStringIntoWordList() method. All those + separators are thus replaced by spaces. + For instance, the 'san francisco, ca, us' initial string would be + replaced by 'san francisco ca us'. + */ + const std::string lOriginalQueryString = + WordHolder::createStringFromWordList (lOriginalWordList); + + WordList_T lCorrectedWordList; + createCorrectedWordList (lOriginalWordList, lCorrectedWordList, + ioDatabase); + + const std::string lCorrectedQueryString = + WordHolder::createStringFromWordList (lCorrectedWordList); + + /** + Try to find, if relevant, an orthographic suggestion for the whole + phrase/string. With the above example, 'sna francisco' yields the + suggestion 'san francisco'. + */ + const std::string lFullWordCorrectedString = + ioDatabase.get_spelling_suggestion (lOriginalQueryString, 3); + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Query string `" << lOriginalQueryString + << "' ==> corrected query string: `" << lCorrectedQueryString + << "' and correction for the full query string: `" + << lFullWordCorrectedString << "'"); + */ + + // Build the query object + Xapian::QueryParser lQueryParser; + lQueryParser.set_database (ioDatabase); + /** + As explained in http://www.xapian.org/docs/queryparser.html, + Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, + but only available from version 1.0.13 of Xapian. + */ + // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); + lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description() + << "'"); + */ + + /** + The Xapian::QueryParser::parse_query() method aggregates all the words + with operators inbetween them (here, the "PHRASE" operator). + With the above example ('sna francicso'), it yields + "sna PHRASE 2 francicso". + */ + Xapian::Query lQuery = + lQueryParser.parse_query (lOriginalQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE + | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); + /** + Strangely enough (is it?), the corrected query given by the Xapian + QueryParser corresponds to the full original string, where words + have been corrected one by one, but considered as a single block. + With the above example, 'sna francicso' yields (wrongly) + 'sna francisco', instead of "sna PHRASE 2 francisco", as generated + by the following code. + */ + // Xapian::Query lCorrectedQuery = + // lQueryParser.get_corrected_query_string(); + Xapian::Query lCorrectedQuery = + lQueryParser.parse_query (lCorrectedQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + /** + As, with the above example, the full corrected string is + 'san francisco', it yields the query "san PHRASE 2 francisco", + which is eventually right. + */ + Xapian::Query lFullQueryCorrected = + lQueryParser.parse_query (lFullWordCorrectedString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Query `" << lQuery.get_description() + << "', corrected query `" << lCorrectedQuery.get_description() + << "' and corrected for full query `" + << lFullQueryCorrected.get_description() << "' "); + */ + + // Start an enquire session + Xapian::Enquire enquire (ioDatabase); + + // Give the query object to the enquire session + enquire.set_query (lQuery); + + // Get the top 10 results of the query + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + int nbMatches = ioMatchingSet.size(); + + // DEBUG + /* + OPENTREP_LOG_DEBUG (nbMatches << " results found"); + */ + + /** + When no match is found, we search on the corrected phrase/string + (where the words have been corrected one by one). + */ + if (nbMatches == 0) { + enquire.set_query (lCorrectedQuery); + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + nbMatches = ioMatchingSet.size(); + + // DEBUG + /* + OPENTREP_LOG_DEBUG(nbMatches << " results found on corrected string"); + */ + } + + /** + If there is still no match, we search on the string corrected + as a whole. + */ + if (nbMatches == 0) { + enquire.set_query (lFullQueryCorrected); + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + nbMatches = ioMatchingSet.size(); + + // DEBUG + /* + OPENTREP_LOG_DEBUG (nbMatches + << " results found on corrected full string"); + */ + } + + + // DEBUG + /* + const Xapian::Query& lActualQuery = enquire.get_query(); + OPENTREP_LOG_DEBUG ("Actual query `" << lActualQuery.get_description() + << "'"); + */ + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } + } + + // ////////////////////////////////////////////////////////////////////// + void StringMatcher:: + createDocumentListFromMSet (const Xapian::MSet& iMatchingSet, + DocumentList_T& ioDocumentList) { + // Empty the list of documents + ioDocumentList.clear(); + + for (Xapian::MSetIterator itDoc = iMatchingSet.begin(); + itDoc != iMatchingSet.end(); ++itDoc) { + const Xapian::Document& lDocument = itDoc.get_document(); + + ioDocumentList.insert (DocumentList_T::value_type (itDoc.get_percent(), + lDocument)); + } + } + + // ////////////////////////////////////////////////////////////////////// + void StringMatcher::removeOneWord (std::string& ioQueryString) { + assert (ioQueryString.empty() == false); + + WordList_T lWordList; + WordHolder::tokeniseStringIntoWordList (ioQueryString, lWordList); + assert (lWordList.empty() == false); + + // Remove the furthest right word + lWordList.pop_back(); + + const std::string& lReducedString = + WordHolder::createStringFromWordList (lWordList); + ioQueryString = lReducedString; + } + + // ////////////////////////////////////////////////////////////////////// + void StringMatcher:: + subtractParsedToRemaining (const std::string& iAlreadyParsedQueryString, + std::string& ioRemainingQueryString) { + /** + Remove, from the lRemainingQueryString string, the part which + has been already successfully parsed. + <br>For instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() method + first reduce the query string to 'sna francisco', which + successfully matches against SFO (San Francisco airport). + <br>Then, the remaining part of the query string to be parsed is + 'rio de janeiro'. So, the already parsed part must be subtracted + from the initial query string. + */ + WordList_T lRemainingWordList; + WordHolder::tokeniseStringIntoWordList (ioRemainingQueryString, + lRemainingWordList); + + WordList_T lParsedWordList; + WordHolder::tokeniseStringIntoWordList (iAlreadyParsedQueryString, + lParsedWordList); + + unsigned int idx = lParsedWordList.size(); + for ( ; idx != 0 && lRemainingWordList.empty() == false; --idx) { + lRemainingWordList.pop_front(); + } + + // Build the remaining part of the string still to be parsed. + // <br>Note that that part may be empty. + ioRemainingQueryString = + WordHolder::createStringFromWordList (lRemainingWordList); + } + +} Added: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,52 @@ +#ifndef __OPENTREP_BOM_STRINGMATCHER_HPP +#define __OPENTREP_BOM_STRINGMATCHER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <string> +// OpenTREP +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/WordList.hpp> +#include <opentrep/bom/DocumentList.hpp> + +// Forward declarations +namespace Xapian { + class MSet; + class Database; +} + +namespace OPENTREP { + + /** Class grouping a few utility methods based on the Xapian library. + <br>See <a href="http://www.xapian.org">Xapian's Web site</a> + for more information. */ + class StringMatcher : public BomAbstract { + public: + /** Search, within the Xapian database, for occurrences of the + words of the search string. */ + static void searchString (Xapian::MSet&, const std::string& iSearchString, + const Xapian::Database&); + + /** Copy the Xapian MSet (matching set) object into a document + list object. */ + static void createDocumentListFromMSet (const Xapian::MSet&, + DocumentList_T&); + + /** Remove the word furthest at right. */ + static void removeOneWord (std::string& ioQueryString); + + /** Remove, from a string, the part corresponding to the one given + as parameter. */ + static void + subtractParsedToRemaining (const std::string& iAlreadyParsedQueryString, + std::string& ioRemainingQueryString); + + + private: + + }; + +} +#endif // __OPENTREP_BOM_STRINGMATCHER_HPP Added: trunk/opentrep/opentrep/bom/WordHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/WordHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/WordHolder.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,53 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// Boost +#include <boost/tokenizer.hpp> +// OpenTREP +#include <opentrep/bom/WordHolder.hpp> + +namespace OPENTREP { + + // Define the separators + static const boost::char_separator<char> SepatorList (" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); + + // ////////////////////////////////////////////////////////////////////// + void WordHolder::tokeniseStringIntoWordList (const std::string& iPhrase, + WordList_T& ioWordList) { + // Empty the word list + ioWordList.clear(); + + // Boost Tokeniser + typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T; + + // Initialise the phrase to be tokenised + Tokeniser_T lTokens (iPhrase, SepatorList); + for (Tokeniser_T::const_iterator tok_iter = lTokens.begin(); + tok_iter != lTokens.end(); ++tok_iter) { + const std::string& lTerm = *tok_iter; + ioWordList.push_back (lTerm); + } + + } + + // ////////////////////////////////////////////////////////////////////// + std::string WordHolder:: + createStringFromWordList (const WordList_T& iWordList) { + std::ostringstream oStr; + + unsigned short idx = iWordList.size(); + for (WordList_T::const_iterator itWord = iWordList.begin(); + itWord != iWordList.end(); ++itWord, --idx) { + const std::string& lWord = *itWord; + oStr << lWord; + if (idx > 1) { + oStr << " "; + } + } + + return oStr.str(); + } + +} Added: trunk/opentrep/opentrep/bom/WordHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/WordHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/WordHolder.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,43 @@ +#ifndef __OPENTREP_BOM_WORDHOLDER_HPP +#define __OPENTREP_BOM_WORDHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/WordList.hpp> + +namespace OPENTREP { + + /** Class wrapping utility functions to transform back and forth + strings from and into a list of words. */ + class WordHolder : public BomAbstract { + friend class FacWordHolder; + public: + + // /////////////// Business Methods //////////////// + /** Tokenise a string into a list of words (STL strings). + <br>The Boost.Tokenizer library is used. */ + static void tokeniseStringIntoWordList (const std::string& iPhrase, + WordList_T& ioWordList); + + /** Serialise a list of words (STL strings) into a single (STL) string. */ + static std::string createStringFromWordList (const WordList_T& iWordList); + + private: + // ////////////// Constructors and Destructors ///////////// + /** Default constructor. */ + WordHolder (); + /** Default copy constructor. */ + WordHolder (const WordHolder&); + /** Destructor. */ + ~WordHolder (); + + + private: + // /////////////// Attributes //////////////// + }; + +} +#endif // __OPENTREP_BOM_WORDHOLDER_HPP Added: trunk/opentrep/opentrep/bom/WordList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/WordList.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/WordList.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,18 @@ +#ifndef __OPENTREP_BOM_WORDLIST_HPP +#define __OPENTREP_BOM_WORDLIST_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <string> +#include <list> + +namespace OPENTREP { + + /** List of simple words (STL strings). */ + typedef std::list<std::string> WordList_T; + +} +#endif // __OPENTREP_BOM_WORDLIST_HPP + Modified: trunk/opentrep/opentrep/bom/sources.mk =================================================================== --- trunk/opentrep/opentrep/bom/sources.mk 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/bom/sources.mk 2009-07-17 00:10:38 UTC (rev 133) @@ -5,10 +5,21 @@ $(top_srcdir)/opentrep/bom/World.hpp \ $(top_srcdir)/opentrep/bom/Names.hpp \ $(top_srcdir)/opentrep/bom/Place.hpp \ - $(top_srcdir)/opentrep/bom/PlaceList.hpp + $(top_srcdir)/opentrep/bom/PlaceList.hpp \ + $(top_srcdir)/opentrep/bom/WordList.hpp \ + $(top_srcdir)/opentrep/bom/WordHolder.hpp \ + $(top_srcdir)/opentrep/bom/DocumentList.hpp \ + $(top_srcdir)/opentrep/bom/Result.hpp \ + $(top_srcdir)/opentrep/bom/ResultList.hpp \ + $(top_srcdir)/opentrep/bom/ResultHolder.hpp \ + $(top_srcdir)/opentrep/bom/StringMatcher.hpp bom_cc_sources = $(top_srcdir)/opentrep/bom/BomAbstract.cpp \ $(top_srcdir)/opentrep/bom/BomType.cpp \ $(top_srcdir)/opentrep/bom/Language.cpp \ $(top_srcdir)/opentrep/bom/World.cpp \ $(top_srcdir)/opentrep/bom/Names.cpp \ - $(top_srcdir)/opentrep/bom/Place.cpp + $(top_srcdir)/opentrep/bom/Place.cpp \ + $(top_srcdir)/opentrep/bom/WordHolder.cpp \ + $(top_srcdir)/opentrep/bom/Result.cpp \ + $(top_srcdir)/opentrep/bom/ResultHolder.cpp \ + $(top_srcdir)/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -9,6 +9,8 @@ #include <exception> // OpenTrep #include <opentrep/bom/Place.hpp> +#include <opentrep/bom/ResultHolder.hpp> +#include <opentrep/bom/Result.hpp> #include <opentrep/factory/FacPlace.hpp> #include <opentrep/command/DBManager.hpp> #include <opentrep/command/RequestInterpreter.hpp> @@ -23,44 +25,51 @@ interpretTravelRequest (soci::session& ioSociSession, const TravelDatabaseName_T& iTravelDatabaseName, ... [truncated message content] |
From: <den...@us...> - 2009-07-16 16:45:50
|
Revision: 132 http://opentrep.svn.sourceforge.net/opentrep/?rev=132&view=rev Author: denis_arnaud Date: 2009-07-16 16:45:48 +0000 (Thu, 16 Jul 2009) Log Message: ----------- [Dev] The (testing) string search fully works now. Modified Paths: -------------- trunk/opentrep/test/xapian/Makefile trunk/opentrep/test/xapian/string_search.cpp Modified: trunk/opentrep/test/xapian/Makefile =================================================================== --- trunk/opentrep/test/xapian/Makefile 2009-07-15 23:42:07 UTC (rev 131) +++ trunk/opentrep/test/xapian/Makefile 2009-07-16 16:45:48 UTC (rev 132) @@ -1,7 +1,7 @@ # Xapian -XAPIAN_DIR=/opt/xapian-1.0.10 -#XAPIAN_DIR=/usr +#XAPIAN_DIR=/opt/xapian-1.0.11 +XAPIAN_DIR=/usr XAPIAN_CFLAGS=-I${XAPIAN_DIR}/include XAPIAN_LIBS=-L${XAPIAN_DIR}/lib -lxapian Modified: trunk/opentrep/test/xapian/string_search.cpp =================================================================== --- trunk/opentrep/test/xapian/string_search.cpp 2009-07-15 23:42:07 UTC (rev 131) +++ trunk/opentrep/test/xapian/string_search.cpp 2009-07-16 16:45:48 UTC (rev 132) @@ -5,6 +5,7 @@ #include <sstream> #include <string> #include <list> +#include <map> // Boost #include <boost/tokenizer.hpp> // Xapian @@ -15,12 +16,25 @@ typedef std::list<std::string> WordList_T; /** List of Xapian documents. */ -typedef std::list<Xapian::Document> DocumentList_T; +typedef std::multimap<Xapian::percent, Xapian::Document> DocumentList_T; +/** List of results, organised by sets, for a given full string. */ +typedef std::map<std::string, DocumentList_T> ResultList_T; +/** Memory allocation error. */ +class MemoryError : public std::exception { +}; + + +// ///////////////////////// Forward declarations /////////////////////// +std::string display (const Xapian::MSet& iMatchingSet); +std::string display (const DocumentList_T& iDocumentList); +std::string display (const ResultList_T& iResultList); + + // ////////////////////////////////////////////////////////////////////// -void tokeniseAndAddToDocument (const std::string& iPhrase, - WordList_T& ioWordList) { +void tokeniseStringIntoWordList (const std::string& iPhrase, + WordList_T& ioWordList) { // Empty the word list ioWordList.clear(); @@ -36,9 +50,8 @@ tok_iter != lTokens.end(); ++tok_iter) { const std::string& lTerm = *tok_iter; ioWordList.push_back (lTerm); - - // OPENTREP_LOG_DEBUG ("Added term: " << lTerm); } + } // ////////////////////////////////////////////////////////////////////// @@ -82,9 +95,11 @@ } // DEBUG + /* std::cout << "Original word: `" << lOriginalWord << "' ==> corrected word: `" << lSuggestedWord << "'" << std::endl; + */ } } catch (const Xapian::Error& error) { @@ -93,9 +108,8 @@ } // /////////////////////////////////////////////////////////////////// -void searchString (Xapian::MSet& ioMatchingSet, - const std::string& iSearchString, - Xapian::Database& ioDatabase) { +void searchString (Xapian::MSet& ioMatchingSet, const std::string& iSearchString, + Xapian::Database& ioDatabase, std::ostream& ioStream) { // Catch any Xapian::Error exceptions thrown try { @@ -119,8 +133,16 @@ further step below. */ WordList_T lOriginalWordList; - tokeniseAndAddToDocument (iSearchString, lOriginalWordList); - + tokeniseStringIntoWordList (iSearchString, lOriginalWordList); + + /** + We rebuild a clean query string from the word list. Indeed, the original + string may have contained a few separators (e.g., '/', ';', etc.), which + have been removed by the tokeniseStringIntoWordList() method. All those + separators are thus replaced by spaces. + For instance, the 'san francisco, ca, us' initial string would be + replaced by 'san francisco ca us'. + */ const std::string lOriginalQueryString = createStringFromWordList (lOriginalWordList); @@ -137,11 +159,14 @@ */ const std::string lFullWordCorrectedString = ioDatabase.get_spelling_suggestion (lOriginalQueryString, 3); - - std::cout << "Query string `" << lOriginalQueryString + + // DEBUG + /* + ioStream << "Query string `" << lOriginalQueryString << "' ==> corrected query string: `" << lCorrectedQueryString << "' and correction for the full query string: `" << lFullWordCorrectedString << "'" << std::endl; + */ // Build the query object Xapian::QueryParser lQueryParser; @@ -153,9 +178,12 @@ */ // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); - - std::cout << "Query parser `" << lQueryParser.get_description() << "'" + + // DEBUG + /* + ioStream << "Query parser `" << lQueryParser.get_description() << "'" << std::endl; + */ /** The Xapian::QueryParser::parse_query() method aggregates all the words @@ -196,10 +224,13 @@ | Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_LOVEHATE); - std::cout << "Query `" << lQuery.get_description() + // DEBUG + /* + ioStream << "Query `" << lQuery.get_description() << "', corrected query `" << lCorrectedQuery.get_description() << "' and corrected for full query `" << lFullQueryCorrected.get_description() << "' " << std::endl; + */ // Start an enquire session Xapian::Enquire enquire (ioDatabase); @@ -212,8 +243,12 @@ // Display the results int nbMatches = ioMatchingSet.size(); - std::cout << nbMatches << " results found" << std::endl; + // DEBUG + /* + ioStream << nbMatches << " results found" << std::endl; + */ + /** When no match is found, we search on the corrected phrase/string (where the words have been corrected one by one). @@ -224,8 +259,12 @@ // Display the results nbMatches = ioMatchingSet.size(); - std::cout << nbMatches << " results found on corrected string" + + // DEBUG + /* + ioStream << nbMatches << " results found on corrected string" << std::endl; + */ } /** @@ -238,13 +277,21 @@ // Display the results nbMatches = ioMatchingSet.size(); - std::cout << nbMatches << " results found on corrected full string" + + // DEBUG + /* + ioStream << nbMatches << " results found on corrected full string" << std::endl; + */ } const Xapian::Query& lActualQuery = enquire.get_query(); - std::cout << "Actual query `" << lActualQuery.get_description() + + // DEBUG + /* + ioStream << "Actual query `" << lActualQuery.get_description() << "'" << std::endl; + */ } catch (const Xapian::Error& error) { std::cerr << "Exception: " << error.get_msg() << std::endl; @@ -254,11 +301,15 @@ // ////////////////////////////////////////////////////////////////////// void createDocumentListFromMSet (const Xapian::MSet& iMatchingSet, DocumentList_T& ioDocumentList) { + // Empty the list of documents + ioDocumentList.clear(); for (Xapian::MSetIterator itDoc = iMatchingSet.begin(); itDoc != iMatchingSet.end(); ++itDoc) { const Xapian::Document& lDocument = itDoc.get_document(); - ioDocumentList.push_back (lDocument); + + ioDocumentList.insert (DocumentList_T::value_type (itDoc.get_percent(), + lDocument)); } } @@ -267,7 +318,7 @@ assert (ioQueryString.empty() == false); WordList_T lWordList; - tokeniseAndAddToDocument (ioQueryString, lWordList); + tokeniseStringIntoWordList (ioQueryString, lWordList); assert (lWordList.empty() == false); // Remove the furthest right word @@ -278,6 +329,169 @@ } // ////////////////////////////////////////////////////////////////////// +void subtractParsedToRemaining (std::string& ioAlreadyParsedQueryString, + std::string& ioRemainingQueryString, + std::ostream& ioStream) { + /** + Remove, from the lRemainingQueryString string, the part which + has been already successfully parsed. + <br>For instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() method + first reduce the query string to 'sna francisco', which + successfully matches against SFO (San Francisco airport). + <br>Then, the remaining part of the query string to be parsed is + 'rio de janeiro'. So, the already parsed part must be subtracted + from the initial query string. + */ + WordList_T lRemainingWordList; + tokeniseStringIntoWordList (ioRemainingQueryString, lRemainingWordList); + + WordList_T lParsedWordList; + tokeniseStringIntoWordList (ioAlreadyParsedQueryString, lParsedWordList); + + unsigned int idx = lParsedWordList.size(); + for ( ; idx != 0 && lRemainingWordList.empty() == false; --idx) { + lRemainingWordList.pop_front(); + } + + // Build the remaining part of the string still to be parsed. + // <br>Note that that part may be empty. + ioRemainingQueryString = createStringFromWordList (lRemainingWordList); +} + +// ////////////////////////////////////////////////////////////////////// +void searchString (DocumentList_T& ioDocumentList, std::string& ioQueryString, + Xapian::Database& ioDatabase, std::ostream& ioStream) { + + // Catch any Xapian::Error exceptions thrown + try { + + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + ioStream << std::endl << "--------------------------------" << std::endl + << "Current query string: `" << ioQueryString << "'" << std::endl; + */ + + // Retrieve the list of documents matching the query string + Xapian::MSet lMatchingSet; + searchString (lMatchingSet, ioQueryString, ioDatabase, ioStream); + + // DEBUG + /* + ioStream << "Matching set (" << lMatchingSet.size() << " elements): " + << display (lMatchingSet); + */ + + // Create the corresponding list of documents + createDocumentListFromMSet (lMatchingSet, ioDocumentList); + + // Stop if a result is found. + if (ioDocumentList.empty() == false) { + shouldStop = true; + break; + } + + // Remove a word from the query string + removeOneWord (ioQueryString); + + // Stop when the resulting string gets empty. + if (ioQueryString.empty() == true) { + shouldStop = true; + } + } + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } +} + +// ////////////////////////////////////////////////////////////////////// +void searchString (const std::string& iFullCleanQueryString, + ResultList_T& ioResultList, Xapian::Database& ioDatabase, + std::ostream& ioStream) { + + // Catch any Xapian::Error exceptions thrown + try { + + std::string lRemainingQueryString (iFullCleanQueryString); + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + ioStream << std::endl << "================================" << std::endl + << "Current query string: `" << lRemainingQueryString << "'" + << std::endl; + */ + /** + Search with the initial full string, then by removing a word if + there was no result, then by removing another word if there was + again no result, until either a result is found or the + resulting string gets empty. + */ + DocumentList_T lDocumentList; + std::string lQueryString (lRemainingQueryString); + searchString (lDocumentList, lQueryString, ioDatabase, ioStream); + + // Add the list of matching documents to the result list + const bool hasInsertionBeenSuccessfull = + ioResultList.insert (ResultList_T::value_type (lQueryString, + lDocumentList)).second; + if (hasInsertionBeenSuccessfull == false) { + std::cerr << "Insertion of document list failed for: `" + << display (lDocumentList) << "\xB4" << std::endl; + throw MemoryError(); + } + + /** + Remove, from the lRemainingQueryString string, the part which + has been already successfully parsed. + <br>For instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() method + first reduce the query string to 'sna francisco', which + successfully matches against SFO (San Francisco airport). + <br>Then, the remaining part of the query string to be parsed is + 'rio de janeiro'. So, the already parsed part, namely 'sna francisco', + must be subtracted from the initial query string. + */ + subtractParsedToRemaining (lQueryString, lRemainingQueryString, ioStream); + + // If there is nothing left to be parsed, we have then finished + // to parse the initial string. + if (lRemainingQueryString.empty() == true) { + shouldStop = true; + break; + } + } + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } +} + +// ////////////////////////////////////////////////////////////////////// +void extractResults (const ResultList_T& iResultList, + DocumentList_T& ioDocumentList) { + + for (ResultList_T::const_reverse_iterator itResult = iResultList.rbegin(); + itResult != iResultList.rend(); ++itResult) { + const std::string& lQueryString = itResult->first; + const DocumentList_T& lDocumentList = itResult->second; + + // Retrieve the best matching document. As the document list (STL map) + // is sorted by ascending order of the matching percentage, the best + // matching one is located at the end (back) of the list (STL map). + DocumentList_T::const_reverse_iterator itDocument = lDocumentList.rbegin(); + const Xapian::percent& lBestMatchingPercentage = itDocument->first; + const Xapian::Document& lBestMatchingDocument = itDocument->second; + + ioDocumentList.insert (DocumentList_T::value_type (lBestMatchingPercentage, + lBestMatchingDocument)); + } +} + +// ////////////////////////////////////////////////////////////////////// std::string display (const Xapian::MSet& iMatchingSet) { std::ostringstream oStr; @@ -299,16 +513,33 @@ for (DocumentList_T::const_iterator itDoc = iDocumentList.begin(); itDoc != iDocumentList.end(); ++itDoc) { - const Xapian::Document& lDocument = *itDoc; + const Xapian::percent& lPercent = itDoc->first; + const Xapian::Document& lDocument = itDoc->second; const Xapian::docid& lDocID = lDocument.get_docid(); - oStr << "Document ID " << lDocID << "\t[" - << lDocument.get_data() << "]" << std::endl; + oStr << "Document ID " << lDocID << "\t" << lPercent + << "% [" << lDocument.get_data() << "]" << std::endl; } return oStr.str(); } +// ////////////////////////////////////////////////////////////////////// +std::string display (const ResultList_T& iResultList) { + std::ostringstream oStr; + + for (ResultList_T::const_iterator itResult = iResultList.begin(); + itResult != iResultList.end(); ++itResult) { + const std::string& lQueryString = itResult->first; + const DocumentList_T& lDocumentList = itResult->second; + + oStr << "Result for query (sub-)string: `" << lQueryString << "\xB4:" + << std::endl << display (lDocumentList); + } + + return oStr.str(); +} + // //////////////////////////// M A I N ////////////////////////////// int main (int argc, char* argv[]) { @@ -324,7 +555,8 @@ try { // Make the database - Xapian::Database lDatabase (argv[1]); + const std::string lXapianDatabaseFilepath (argv[1]); + Xapian::Database lDatabase (lXapianDatabaseFilepath); /** Build a query string from the command line parameters. @@ -339,45 +571,34 @@ const std::string lWord (argv[idx]); lQueryStringStr << lWord; } - const std::string& lCommandLineQueryString = lQueryStringStr.str(); + const std::string& lFullCleanQueryString = lQueryStringStr.str(); - /** - Search with the initial full string, then by removing a word if - no there was result, then by removing another word if there was - again no result, until either a result is found or the - resulting string gets empty. - */ - std::string lQueryString (lCommandLineQueryString); - bool shouldStop = false; - while (shouldStop == false) { - // DEBUG - std::cout << std::endl << "================================" << std::endl - << "New query string: `" << lQueryString << "'" << std::endl; - - // Retrieve the list of documents matching the query string - Xapian::MSet lMatchingSet; - searchString (lMatchingSet, lQueryString, lDatabase); - std::cout << display (lMatchingSet); - - // Create the corresponding list of documents - DocumentList_T lDocumentList; - createDocumentListFromMSet (lMatchingSet, lDocumentList); + // + ResultList_T lResultList; + searchString (lFullCleanQueryString, lResultList, lDatabase, std::cout); - // Stop if a result is found. - if (lDocumentList.empty() == false) { - shouldStop = true; - break; - } + // DEBUG + /* + std::cout << std::endl << "Result list: " << std::endl + << display (lResultList); + */ - // Remove a word from the query string - removeOneWord (lQueryString); - - // Stop when the resulting string gets empty. - if (lQueryString.empty() == true) { - shouldStop = true; - } - } + // Extract the best matching results from each matching set (document list) + DocumentList_T lDocumentList; + extractResults (lResultList, lDocumentList); + // DEBUG + std::cout << std::endl + << "_________________________________________" << std::endl + << "=========================================" << std::endl + << "-----------------------------------------" << std::endl + << "Matching list: " << std::endl + << display (lDocumentList) + << "_________________________________________" << std::endl + << "=========================================" << std::endl + << "-----------------------------------------" << std::endl + << std::endl; + } catch (const Xapian::Error& error) { std::cerr << "Exception: " << error.get_msg() << std::endl; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-15 23:42:10
|
Revision: 131 http://opentrep.svn.sourceforge.net/opentrep/?rev=131&view=rev Author: denis_arnaud Date: 2009-07-15 23:42:07 +0000 (Wed, 15 Jul 2009) Log Message: ----------- [Dev] Improved the string search, by allowing several cities at once. There is still some work to do. Modified Paths: -------------- trunk/opentrep/test/xapian/string_search.cpp Modified: trunk/opentrep/test/xapian/string_search.cpp =================================================================== --- trunk/opentrep/test/xapian/string_search.cpp 2009-07-15 16:59:19 UTC (rev 130) +++ trunk/opentrep/test/xapian/string_search.cpp 2009-07-15 23:42:07 UTC (rev 131) @@ -1,140 +1,386 @@ +// C +#include <cassert> // STL #include <iostream> #include <sstream> +#include <string> +#include <list> +// Boost +#include <boost/tokenizer.hpp> // Xapian #include <xapian.h> -// ////////////// M A I N ////////////// -int main (int argc, char* argv[]) { +// /////////////////////// Type definitions ///////////////////////////// +/** List of simple words (STL strings). */ +typedef std::list<std::string> WordList_T; - // Simplest possible options parsing: we just require two or more - // parameters. - if (argc < 3) { - std::cout << "Usage: " << argv[0] - << " <path to database> <search terms>" << std::endl; - return -1; +/** List of Xapian documents. */ +typedef std::list<Xapian::Document> DocumentList_T; + + +// ////////////////////////////////////////////////////////////////////// +void tokeniseAndAddToDocument (const std::string& iPhrase, + WordList_T& ioWordList) { + // Empty the word list + ioWordList.clear(); + + // Boost Tokeniser + typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T; + + // Define the separators + const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); + + // Initialise the phrase to be tokenised + Tokeniser_T lTokens (iPhrase, lSepatorList); + for (Tokeniser_T::const_iterator tok_iter = lTokens.begin(); + tok_iter != lTokens.end(); ++tok_iter) { + const std::string& lTerm = *tok_iter; + ioWordList.push_back (lTerm); + + // OPENTREP_LOG_DEBUG ("Added term: " << lTerm); + } +} + +// ////////////////////////////////////////////////////////////////////// +std::string createStringFromWordList (const WordList_T& iWordList) { + std::ostringstream oStr; + + unsigned short idx = iWordList.size(); + for (WordList_T::const_iterator itWord = iWordList.begin(); + itWord != iWordList.end(); ++itWord, --idx) { + const std::string& lWord = *itWord; + oStr << lWord; + if (idx > 1) { + oStr << " "; } + } + + return oStr.str(); +} - // Catch any Xapian::Error exceptions thrown - try { +// ////////////////////////////////////////////////////////////////////// +void createCorrectedWordList (const WordList_T& iOriginalWordList, + WordList_T& ioCorrectedWordList, + const Xapian::Database& iDatabase) { + // Empty the target list + ioCorrectedWordList.clear(); + + // Catch any Xapian::Error exceptions thrown + try { - // Make the database - Xapian::Database db (argv[1]); + for (WordList_T::const_iterator itWord = iOriginalWordList.begin(); + itWord != iOriginalWordList.end(); ++itWord) { + const std::string& lOriginalWord = *itWord; + const std::string& lSuggestedWord = + iDatabase.get_spelling_suggestion (lOriginalWord, 3); - // Start an enquire session - Xapian::Enquire enquire (db); + if (lSuggestedWord.empty() == true) { + ioCorrectedWordList.push_back (lOriginalWord); - std::ostringstream oOriginalStr; - std::ostringstream oCorrectedStr; - for (int idx=2; idx != argc; ++idx) { - if (idx != 2) { - oOriginalStr << " "; - oCorrectedStr << " "; - } - const std::string lWord (argv[idx]); - const std::string lSuggestedWord = db.get_spelling_suggestion(lWord, 3); - std::cout << "Word `" << lWord << "' ==> Suggested word `" - << lSuggestedWord << "'" << std::endl; - oOriginalStr << lWord; + } else { + ioCorrectedWordList.push_back (lSuggestedWord); + } - if (lSuggestedWord.empty() == true) { - oCorrectedStr << lWord; - - } else { - oCorrectedStr << lSuggestedWord; - } - } + // DEBUG + std::cout << "Original word: `" << lOriginalWord + << "' ==> corrected word: `" << lSuggestedWord << "'" + << std::endl; + } + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } +} + +// /////////////////////////////////////////////////////////////////// +void searchString (Xapian::MSet& ioMatchingSet, + const std::string& iSearchString, + Xapian::Database& ioDatabase) { + + // Catch any Xapian::Error exceptions thrown + try { - const std::string lOriginalQueryString = oOriginalStr.str(); - const std::string lCorrectedQueryString = oCorrectedStr.str(); - const std::string lFullWordCorrectedString = - db.get_spelling_suggestion (lOriginalQueryString, 4); + /** + Build another string, in addition to the original one. Overall, + there are thus two strings: + <br><ul> + <li>One with the original words given by the user</li> + <li>One with the orthographic-corrected words, wherever + relevant (otherwise, the original word is taken)</li> + </ul> + <br>For instance, 'sna francisco' would give the following + two strings: + <br><ul> + <li>'sna francicso' (original)</li> + <li>'sna francisco' (corrected, where relevant, word by word)</li> + </ul> + <br>Note that, as 'sna' exists in the dictionary (Santa Ana, CA, USA), + it is not replaced. We shall take care of the whole string in a + further step below. + */ + WordList_T lOriginalWordList; + tokeniseAndAddToDocument (iSearchString, lOriginalWordList); - std::cout << "Query string `" << lOriginalQueryString - << "' ==> corrected query string: `" << lCorrectedQueryString - << "' and correction for the full query string: `" - << lFullWordCorrectedString << "'" << std::endl; + const std::string lOriginalQueryString = + createStringFromWordList (lOriginalWordList); + + WordList_T lCorrectedWordList; + createCorrectedWordList (lOriginalWordList, lCorrectedWordList, ioDatabase); + + const std::string lCorrectedQueryString = + createStringFromWordList (lCorrectedWordList); - // Build the query object - Xapian::QueryParser lQueryParser; - lQueryParser.set_database (db); - // As explained in http://www.xapian.org/docs/queryparser.html, - // Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, - // but only available from version 1.0.13 of Xapian. - // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); - lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); + /** + Try to find, if relevant, an orthographic suggestion for the whole + phrase/string. With the above example, 'sna francisco' yields the + suggestion 'san francisco'. + */ + const std::string lFullWordCorrectedString = + ioDatabase.get_spelling_suggestion (lOriginalQueryString, 3); - std::cout << "Query parser `" << lQueryParser.get_description() << "'" - << std::endl; + std::cout << "Query string `" << lOriginalQueryString + << "' ==> corrected query string: `" << lCorrectedQueryString + << "' and correction for the full query string: `" + << lFullWordCorrectedString << "'" << std::endl; - Xapian::Query lQuery = - lQueryParser.parse_query (lOriginalQueryString, - Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_LOVEHATE - | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); - //Xapian::Query lCorrectedQuery= lQueryParser.get_corrected_query_string(); - Xapian::Query lCorrectedQuery = - lQueryParser.parse_query (lCorrectedQueryString, - Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_LOVEHATE); + // Build the query object + Xapian::QueryParser lQueryParser; + lQueryParser.set_database (ioDatabase); + /** + As explained in http://www.xapian.org/docs/queryparser.html, + Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, + but only available from version 1.0.13 of Xapian. + */ + // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); + lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); + + std::cout << "Query parser `" << lQueryParser.get_description() << "'" + << std::endl; - Xapian::Query lFullQueryCorrected = - lQueryParser.parse_query (lFullWordCorrectedString, - Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_LOVEHATE); + /** + The Xapian::QueryParser::parse_query() method aggregates all the words + with operators inbetween them (here, the "PHRASE" operator). + With the above example ('sna francicso'), it yields + "sna PHRASE 2 francicso". + */ + Xapian::Query lQuery = + lQueryParser.parse_query (lOriginalQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE + | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); + /** + Strangely enough (is it?), the corrected query given by the Xapian + QueryParser corresponds to the full original string, where words + have been corrected one by one, but considered as a single block. + With the above example, 'sna francicso' yields (wrongly) + 'sna francisco', instead of "sna PHRASE 2 francisco", as generated + by the following code. + */ + // Xapian::Query lCorrectedQuery = + // lQueryParser.get_corrected_query_string(); + Xapian::Query lCorrectedQuery = + lQueryParser.parse_query (lCorrectedQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); - std::cout << "Query `" << lQuery.get_description() - << "', corrected query `" << lCorrectedQuery.get_description() - << "' and corrected for full query `" - << lFullQueryCorrected.get_description() << "' " << std::endl; + /** + As, with the above example, the full corrected string is + 'san francisco', it yields the query "san PHRASE 2 francisco", + which is eventually right. + */ + Xapian::Query lFullQueryCorrected = + lQueryParser.parse_query (lFullWordCorrectedString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + std::cout << "Query `" << lQuery.get_description() + << "', corrected query `" << lCorrectedQuery.get_description() + << "' and corrected for full query `" + << lFullQueryCorrected.get_description() << "' " << std::endl; - // Give the query object to the enquire session - enquire.set_query (lQuery); + // Start an enquire session + Xapian::Enquire enquire (ioDatabase); - // Get the top 10 results of the query - Xapian::MSet matches = enquire.get_mset (0, 10); + // Give the query object to the enquire session + enquire.set_query (lQuery); + // Get the top 10 results of the query + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + int nbMatches = ioMatchingSet.size(); + std::cout << nbMatches << " results found" << std::endl; + + /** + When no match is found, we search on the corrected phrase/string + (where the words have been corrected one by one). + */ + if (nbMatches == 0) { + enquire.set_query (lCorrectedQuery); + ioMatchingSet = enquire.get_mset (0, 10); + // Display the results - int nbMatches = matches.size(); - std::cout << nbMatches << " results found" << std::endl; + nbMatches = ioMatchingSet.size(); + std::cout << nbMatches << " results found on corrected string" + << std::endl; + } - if (nbMatches == 0) { - enquire.set_query (lCorrectedQuery); - matches = enquire.get_mset (0, 10); + /** + If there is still no match, we search on the string corrected + as a whole. + */ + if (nbMatches == 0) { + enquire.set_query (lFullQueryCorrected); + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + nbMatches = ioMatchingSet.size(); + std::cout << nbMatches << " results found on corrected full string" + << std::endl; + } - // Display the results - nbMatches = matches.size(); - std::cout << nbMatches << " results found on corrected string" - << std::endl; + const Xapian::Query& lActualQuery = enquire.get_query(); + std::cout << "Actual query `" << lActualQuery.get_description() + << "'" << std::endl; + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } +} - if (nbMatches == 0) { - enquire.set_query (lFullQueryCorrected); - matches = enquire.get_mset (0, 10); +// ////////////////////////////////////////////////////////////////////// +void createDocumentListFromMSet (const Xapian::MSet& iMatchingSet, + DocumentList_T& ioDocumentList) { - // Display the results - nbMatches = matches.size(); - std::cout << nbMatches << " results found on corrected full string" - << std::endl; - } + for (Xapian::MSetIterator itDoc = iMatchingSet.begin(); + itDoc != iMatchingSet.end(); ++itDoc) { + const Xapian::Document& lDocument = itDoc.get_document(); + ioDocumentList.push_back (lDocument); + } +} + +// ////////////////////////////////////////////////////////////////////// +void removeOneWord (std::string& ioQueryString) { + assert (ioQueryString.empty() == false); + + WordList_T lWordList; + tokeniseAndAddToDocument (ioQueryString, lWordList); + assert (lWordList.empty() == false); + + // Remove the furthest right word + lWordList.pop_back(); + + const std::string& lReducedString = createStringFromWordList (lWordList); + ioQueryString = lReducedString; +} + +// ////////////////////////////////////////////////////////////////////// +std::string display (const Xapian::MSet& iMatchingSet) { + std::ostringstream oStr; + + for (Xapian::MSetIterator itDoc = iMatchingSet.begin(); + itDoc != iMatchingSet.end(); ++itDoc) { + const Xapian::Document& lDocument = itDoc.get_document(); + const Xapian::docid& lDocID = lDocument.get_docid(); + + oStr << "Document ID " << lDocID << "\t" << itDoc.get_percent() + << "% [" << lDocument.get_data() << "]" << std::endl; + } + + return oStr.str(); +} + +// ////////////////////////////////////////////////////////////////////// +std::string display (const DocumentList_T& iDocumentList) { + std::ostringstream oStr; + + for (DocumentList_T::const_iterator itDoc = iDocumentList.begin(); + itDoc != iDocumentList.end(); ++itDoc) { + const Xapian::Document& lDocument = *itDoc; + const Xapian::docid& lDocID = lDocument.get_docid(); + + oStr << "Document ID " << lDocID << "\t[" + << lDocument.get_data() << "]" << std::endl; + } + + return oStr.str(); +} + +// //////////////////////////// M A I N ////////////////////////////// +int main (int argc, char* argv[]) { + + // Simplest possible options parsing: we just require two or more + // parameters. + if (argc < 3) { + std::cout << "Usage: " << argv[0] + << " <path to database> <search terms>" << std::endl; + return -1; + } + + // Catch any Xapian::Error exceptions thrown + try { + + // Make the database + Xapian::Database lDatabase (argv[1]); + + /** + Build a query string from the command line parameters. + That way, any other front end producing a query string will + be fine. + */ + std::ostringstream lQueryStringStr; + for (unsigned int idx = 2; idx != argc; ++idx) { + if (idx != 2) { + lQueryStringStr << " "; } + const std::string lWord (argv[idx]); + lQueryStringStr << lWord; + } + const std::string& lCommandLineQueryString = lQueryStringStr.str(); - const Xapian::Query& lActualQuery = enquire.get_query(); - std::cout << "Actual query `" << lActualQuery.get_description() - << "'" << std::endl; + /** + Search with the initial full string, then by removing a word if + no there was result, then by removing another word if there was + again no result, until either a result is found or the + resulting string gets empty. + */ + std::string lQueryString (lCommandLineQueryString); + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + std::cout << std::endl << "================================" << std::endl + << "New query string: `" << lQueryString << "'" << std::endl; - for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { - Xapian::Document doc = i.get_document(); - std::cout << "Document ID " << *i << "\t" << - i.get_percent() << "% [" << - doc.get_data() << "]" << std::endl; + // Retrieve the list of documents matching the query string + Xapian::MSet lMatchingSet; + searchString (lMatchingSet, lQueryString, lDatabase); + std::cout << display (lMatchingSet); + + // Create the corresponding list of documents + DocumentList_T lDocumentList; + createDocumentListFromMSet (lMatchingSet, lDocumentList); + + // Stop if a result is found. + if (lDocumentList.empty() == false) { + shouldStop = true; + break; } - } catch (const Xapian::Error& error) { - std::cerr << "Exception: " << error.get_msg() << std::endl; + // Remove a word from the query string + removeOneWord (lQueryString); + + // Stop when the resulting string gets empty. + if (lQueryString.empty() == true) { + shouldStop = true; + } } + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } - return 0; + return 0; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-15 16:59:26
|
Revision: 130 http://opentrep.svn.sourceforge.net/opentrep/?rev=130&view=rev Author: denis_arnaud Date: 2009-07-15 16:59:19 +0000 (Wed, 15 Jul 2009) Log Message: ----------- [TREP] Improved the string search (command line utility). Modified Paths: -------------- trunk/opentrep/opentrep/command/IndexBuilder.cpp trunk/opentrep/test/xapian/string_search.cpp Modified: trunk/opentrep/opentrep/command/IndexBuilder.cpp =================================================================== --- trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-07-15 14:45:43 UTC (rev 129) +++ trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-07-15 16:59:19 UTC (rev 130) @@ -33,7 +33,7 @@ typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T; // Define the separators - boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); + const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); // Initialise the phrase to be tokenised Tokeniser_T lTokens (iPhrase, lSepatorList); @@ -44,7 +44,7 @@ ioDatabase.add_spelling (lTerm); ioDocument.add_term (lTerm); - OPENTREP_LOG_DEBUG ("Added term: " << lTerm); + // OPENTREP_LOG_DEBUG ("Added term: " << lTerm); } } @@ -115,10 +115,14 @@ // Add the place name (it can be the classical one, or // extended, alternate, etc.) if (lName.empty() == false) { + // Add the full name (potentially containing spaces, e.g., + // 'san francisco'), as well as each word + // within it (with the example above, 'san' and 'francisco'). + lDocument.add_term (lName); ++idx; + ioDatabase.add_spelling (lName); + tokeniseAndAddToDocument (lName, lDocument, ioDatabase); + // OPENTREP_LOG_DEBUG ("Added name: " << lName); - // lDocument.add_term (lName); ++idx; - // ioDatabase.add_spelling (lName); - tokeniseAndAddToDocument (lName, lDocument, ioDatabase); } } } Modified: trunk/opentrep/test/xapian/string_search.cpp =================================================================== --- trunk/opentrep/test/xapian/string_search.cpp 2009-07-15 14:45:43 UTC (rev 129) +++ trunk/opentrep/test/xapian/string_search.cpp 2009-07-15 16:59:19 UTC (rev 130) @@ -24,26 +24,43 @@ // Start an enquire session Xapian::Enquire enquire (db); - std::ostringstream oStr; + std::ostringstream oOriginalStr; + std::ostringstream oCorrectedStr; for (int idx=2; idx != argc; ++idx) { if (idx != 2) { - oStr << " "; + oOriginalStr << " "; + oCorrectedStr << " "; } const std::string lWord (argv[idx]); const std::string lSuggestedWord = db.get_spelling_suggestion(lWord, 3); std::cout << "Word `" << lWord << "' ==> Suggested word `" << lSuggestedWord << "'" << std::endl; - oStr << lWord; + oOriginalStr << lWord; + + if (lSuggestedWord.empty() == true) { + oCorrectedStr << lWord; + + } else { + oCorrectedStr << lSuggestedWord; + } } - const std::string lQueryString = oStr.str(); - std::cout << "QueryString `" << lQueryString << "'" << std::endl; + + const std::string lOriginalQueryString = oOriginalStr.str(); + const std::string lCorrectedQueryString = oCorrectedStr.str(); + const std::string lFullWordCorrectedString = + db.get_spelling_suggestion (lOriginalQueryString, 4); + + std::cout << "Query string `" << lOriginalQueryString + << "' ==> corrected query string: `" << lCorrectedQueryString + << "' and correction for the full query string: `" + << lFullWordCorrectedString << "'" << std::endl; // Build the query object Xapian::QueryParser lQueryParser; lQueryParser.set_database (db); // As explained in http://www.xapian.org/docs/queryparser.html, // Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, - // but only available from version 1.0.13 of Xapian + // but only available from version 1.0.13 of Xapian. // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); @@ -51,17 +68,28 @@ << std::endl; Xapian::Query lQuery = - lQueryParser.parse_query (lQueryString, + lQueryParser.parse_query (lOriginalQueryString, Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); - Xapian::Query lCorrectedQuery = lQueryParser.get_corrected_query_string(); + //Xapian::Query lCorrectedQuery= lQueryParser.get_corrected_query_string(); + Xapian::Query lCorrectedQuery = + lQueryParser.parse_query (lCorrectedQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + Xapian::Query lFullQueryCorrected = + lQueryParser.parse_query (lFullWordCorrectedString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + std::cout << "Query `" << lQuery.get_description() - << "', Corrected query `" << lCorrectedQuery.get_description() - << "' " - << std::endl; + << "', corrected query `" << lCorrectedQuery.get_description() + << "' and corrected for full query `" + << lFullQueryCorrected.get_description() << "' " << std::endl; // Give the query object to the enquire session enquire.set_query (lQuery); @@ -70,17 +98,25 @@ Xapian::MSet matches = enquire.get_mset (0, 10); // Display the results - const int nbMatches = matches.size(); + int nbMatches = matches.size(); std::cout << nbMatches << " results found" << std::endl; - - // if (true) { if (nbMatches == 0) { enquire.set_query (lCorrectedQuery); matches = enquire.get_mset (0, 10); - //const Xapian::MSet matchesAll = enquire.get_mset (); - if (matches.size() == matches.max_size()) { - std::cout << "Corrected string matches all the documents" + + // Display the results + nbMatches = matches.size(); + std::cout << nbMatches << " results found on corrected string" + << std::endl; + + if (nbMatches == 0) { + enquire.set_query (lFullQueryCorrected); + matches = enquire.get_mset (0, 10); + + // Display the results + nbMatches = matches.size(); + std::cout << nbMatches << " results found on corrected full string" << std::endl; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-15 14:45:49
|
Revision: 129 http://opentrep.svn.sourceforge.net/opentrep/?rev=129&view=rev Author: denis_arnaud Date: 2009-07-15 14:45:43 +0000 (Wed, 15 Jul 2009) Log Message: ----------- [Indexer] Fixed a bug in the indexer (where terms were inserted with spaces). Modified Paths: -------------- trunk/opentrep/opentrep/command/IndexBuilder.cpp trunk/opentrep/refdata/data/ref_place_names.csv trunk/opentrep/test/xapian/simple_search.cpp trunk/opentrep/test/xapian/string_search.cpp Modified: trunk/opentrep/opentrep/command/IndexBuilder.cpp =================================================================== --- trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-07-14 22:34:14 UTC (rev 128) +++ trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-07-15 14:45:43 UTC (rev 129) @@ -7,6 +7,8 @@ #include <string> #include <vector> #include <exception> +// Boost +#include <boost/tokenizer.hpp> // OpenTrep #include <opentrep/bom/World.hpp> #include <opentrep/bom/Place.hpp> @@ -21,6 +23,30 @@ #include <xapian.h> namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + void tokeniseAndAddToDocument (const std::string& iPhrase, + Xapian::Document& ioDocument, + Xapian::WritableDatabase& ioDatabase) { + + // Boost Tokeniser + typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T; + + // Define the separators + boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); + + // Initialise the phrase to be tokenised + Tokeniser_T lTokens (iPhrase, lSepatorList); + for (Tokeniser_T::const_iterator tok_iter = lTokens.begin(); + tok_iter != lTokens.end(); ++tok_iter) { + const std::string& lTerm = *tok_iter; + + ioDatabase.add_spelling (lTerm); + ioDocument.add_term (lTerm); + + OPENTREP_LOG_DEBUG ("Added term: " << lTerm); + } + } // ////////////////////////////////////////////////////////////////////// void IndexBuilder:: @@ -90,8 +116,9 @@ // extended, alternate, etc.) if (lName.empty() == false) { // OPENTREP_LOG_DEBUG ("Added name: " << lName); - lDocument.add_term (lName); ++idx; - ioDatabase.add_spelling (lName); + // lDocument.add_term (lName); ++idx; + // ioDatabase.add_spelling (lName); + tokeniseAndAddToDocument (lName, lDocument, ioDatabase); } } } Modified: trunk/opentrep/refdata/data/ref_place_names.csv =================================================================== --- trunk/opentrep/refdata/data/ref_place_names.csv 2009-07-14 22:34:14 UTC (rev 128) +++ trunk/opentrep/refdata/data/ref_place_names.csv 2009-07-15 14:45:43 UTC (rev 129) @@ -1826,7 +1826,7 @@ en,jbt,bethel jbt,bethel jbt,bethel/ak/us:city landing en,jca,cannes jca,cannes jca,cannes/fr:croisette hpt en,jcb,joacaba,joacaba,joacaba/sc/br -en,jcc,sanfrancisco jcc,sanfrancisco jc,san francisco/ca/us:china hpt +en,jcc,san francisco jcc,san francisco jc,san francisco/ca/us:china hpt en,jcd,st croix is jcd,st croix is jcd,st croix is/vi:downtown hpt en,jce,convention,convention,convention/ca/us:heliport en,jch,qasigiannguit,qasigiannguit,qasigiannguit/gl Modified: trunk/opentrep/test/xapian/simple_search.cpp =================================================================== --- trunk/opentrep/test/xapian/simple_search.cpp 2009-07-14 22:34:14 UTC (rev 128) +++ trunk/opentrep/test/xapian/simple_search.cpp 2009-07-15 14:45:43 UTC (rev 129) @@ -1,5 +1,6 @@ // STL #include <iostream> +#include <string> // Xapian #include <xapian.h> @@ -7,46 +8,59 @@ int main (int argc, char* argv[]) { // Simplest possible options parsing: we just require two or more - // parameters. - if (argc < 3) { - std::cout << "Usage: " << argv[0] - << " <path to database> <search terms>" << std::endl; - return -1; - } + // parameters. + if (argc < 3) { + std::cout << "Usage: " << argv[0] + << " <path to database> <search terms>" << std::endl; + return -1; + } - // Catch any Xapian::Error exceptions thrown - try { + // Catch any Xapian::Error exceptions thrown + try { - // Make the database - Xapian::Database db (argv[1]); + // Open the database for searching. + Xapian::Database db (argv[1]); - // Start an enquire session - Xapian::Enquire enquire (db); + // Start an enquire session + Xapian::Enquire enquire (db); - // Build the query object - Xapian::Query query (Xapian::Query::OP_AND, argv + 2, argv + argc); - std::cout << "Performing query `" << query.get_description() << "'" - << std::endl; - - // Give the query object to the enquire session - enquire.set_query (query); + // Combine the rest of the command line arguments with spaces between + // them, so that simple queries don't have to be quoted at the shell + // level. + std::string query_string (argv[2]); + argv += 3; + while (*argv) { + query_string += ' '; + query_string += *argv++; + } - // Get the top 10 results of the query - Xapian::MSet matches = enquire.get_mset (0, 10); + // Parse the query string to produce a Xapian::Query object. + Xapian::QueryParser qp; + Xapian::Stem stemmer ("english"); + qp.set_stemmer (stemmer); + qp.set_database (db); + qp.set_stemming_strategy (Xapian::QueryParser::STEM_SOME); + Xapian::Query query = qp.parse_query (query_string); + std::cout << "Parsed query is: " << query.get_description() << std::endl; - // Display the results - std::cout << matches.size() << " results found" << std::endl; + // Find the top 10 results for the query. + enquire.set_query (query); + Xapian::MSet matches = enquire.get_mset(0, 10); - for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { - Xapian::Document doc = i.get_document(); - std::cout << "Document ID " << *i << "\t" << - i.get_percent() << "% [" << - doc.get_data() << "]" << std::endl; - } + // Display the results. + std::cout << matches.get_matches_estimated() << " results found." + << std::endl; + std::cout << "Matches 1-" << matches.size() << ":" << std::endl << std::endl; + + for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) { + std::cout << i.get_rank() + 1 << ": " << i.get_percent() << "% docid=" + << *i << " [" << i.get_document().get_data() << "]" + << std::endl << std::endl; + } - } catch (const Xapian::Error& error) { - std::cerr << "Exception: " << error.get_msg() << std::endl; - } + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } - return 0; + return 0; } Modified: trunk/opentrep/test/xapian/string_search.cpp =================================================================== --- trunk/opentrep/test/xapian/string_search.cpp 2009-07-14 22:34:14 UTC (rev 128) +++ trunk/opentrep/test/xapian/string_search.cpp 2009-07-15 14:45:43 UTC (rev 129) @@ -41,7 +41,11 @@ // Build the query object Xapian::QueryParser lQueryParser; lQueryParser.set_database (db); - lQueryParser.set_default_op (Xapian::Query::OP_NEAR); + // As explained in http://www.xapian.org/docs/queryparser.html, + // Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, + // but only available from version 1.0.13 of Xapian + // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); + lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); std::cout << "Query parser `" << lQueryParser.get_description() << "'" << std::endl; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-14 22:34:18
|
Revision: 128 http://opentrep.svn.sourceforge.net/opentrep/?rev=128&view=rev Author: denis_arnaud Date: 2009-07-14 22:34:14 +0000 (Tue, 14 Jul 2009) Log Message: ----------- [Test] Tested a few variations for Xapian string search. Modified Paths: -------------- trunk/opentrep/test/xapian/string_search.cpp Property Changed: ---------------- trunk/opentrep/ternary_tree/ Property changes on: trunk/opentrep/ternary_tree ___________________________________________________________________ Modified: svn:ignore - .libs .deps Makefile Makefile.in + .libs .deps Makefile Makefile.in simple_tst Modified: trunk/opentrep/test/xapian/string_search.cpp =================================================================== --- trunk/opentrep/test/xapian/string_search.cpp 2009-07-14 14:07:29 UTC (rev 127) +++ trunk/opentrep/test/xapian/string_search.cpp 2009-07-14 22:34:14 UTC (rev 128) @@ -28,20 +28,20 @@ for (int idx=2; idx != argc; ++idx) { if (idx != 2) { oStr << " "; -// oStr << " AND "; } const std::string lWord (argv[idx]); - const std::string lSuggestedWord = - db.get_spelling_suggestion (lWord, 3); + const std::string lSuggestedWord = db.get_spelling_suggestion(lWord, 3); std::cout << "Word `" << lWord << "' ==> Suggested word `" << lSuggestedWord << "'" << std::endl; oStr << lWord; } const std::string lQueryString = oStr.str(); + std::cout << "QueryString `" << lQueryString << "'" << std::endl; // Build the query object Xapian::QueryParser lQueryParser; lQueryParser.set_database (db); + lQueryParser.set_default_op (Xapian::Query::OP_NEAR); std::cout << "Query parser `" << lQueryParser.get_description() << "'" << std::endl; @@ -53,10 +53,11 @@ | Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); Xapian::Query lCorrectedQuery = lQueryParser.get_corrected_query_string(); - + std::cout << "Query `" << lQuery.get_description() << "', Corrected query `" << lCorrectedQuery.get_description() - << "'" << std::endl; + << "' " + << std::endl; // Give the query object to the enquire session enquire.set_query (lQuery); @@ -65,12 +66,19 @@ Xapian::MSet matches = enquire.get_mset (0, 10); // Display the results - int nbMatches = matches.size(); + const int nbMatches = matches.size(); std::cout << nbMatches << " results found" << std::endl; + + // if (true) { if (nbMatches == 0) { enquire.set_query (lCorrectedQuery); matches = enquire.get_mset (0, 10); + //const Xapian::MSet matchesAll = enquire.get_mset (); + if (matches.size() == matches.max_size()) { + std::cout << "Corrected string matches all the documents" + << std::endl; + } } const Xapian::Query& lActualQuery = enquire.get_query(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-14 14:07:33
|
Revision: 127 http://opentrep.svn.sourceforge.net/opentrep/?rev=127&view=rev Author: denis_arnaud Date: 2009-07-14 14:07:29 +0000 (Tue, 14 Jul 2009) Log Message: ----------- [TST] Updated the Ternary Structure Tree (TST). It still does not compile. Modified Paths: -------------- trunk/opentrep/configure.ac trunk/opentrep/opentrep/Makefile.am trunk/opentrep/ternary_tree/examples.cpp trunk/opentrep/ternary_tree/iterator_compile_test.cpp trunk/opentrep/ternary_tree/iterator_wrapper.hpp trunk/opentrep/ternary_tree/readme.txt trunk/opentrep/ternary_tree/structured_map.hpp trunk/opentrep/ternary_tree/structured_set.hpp trunk/opentrep/ternary_tree/ternary_tree.hpp trunk/opentrep/ternary_tree/tst_concept_checks.cpp trunk/opentrep/ternary_tree/tst_detail/iteration_impl.hpp trunk/opentrep/ternary_tree/tst_detail/new_iterator_base.ipp trunk/opentrep/ternary_tree/tst_detail/tst_implementation.ipp trunk/opentrep/ternary_tree/tst_detail/tst_iterator_base.ipp trunk/opentrep/ternary_tree/tst_detail/tst_iterator_facade.hpp trunk/opentrep/ternary_tree/tst_detail/tst_node.hpp trunk/opentrep/ternary_tree/tst_detail/tst_search_results.ipp Added Paths: ----------- trunk/opentrep/ternary_tree/Makefile.am trunk/opentrep/ternary_tree/fill_dictionary.hpp trunk/opentrep/ternary_tree/simple_tst.cpp trunk/opentrep/ternary_tree/sources.mk Removed Paths: ------------- trunk/opentrep/ternary_tree/fill_dictionary.cpp Property Changed: ---------------- trunk/opentrep/ternary_tree/ Modified: trunk/opentrep/configure.ac =================================================================== --- trunk/opentrep/configure.ac 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/configure.ac 2009-07-14 14:07:29 UTC (rev 127) @@ -211,6 +211,7 @@ opentrep.pc opentrep.spec opentrep.m4 + ternary_tree/Makefile opentrep/Makefile opentrep/basic/Makefile opentrep/bom/Makefile Modified: trunk/opentrep/opentrep/Makefile.am =================================================================== --- trunk/opentrep/opentrep/Makefile.am 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/opentrep/Makefile.am 2009-07-14 14:07:29 UTC (rev 127) @@ -3,8 +3,6 @@ ## Source directory -DISTCLEANFILES = @PACKAGE@-paths.h - MAINTAINERCLEANFILES = Makefile.in SUBDIRS = basic bom factory dbadaptor command service core config batches Property changes on: trunk/opentrep/ternary_tree ___________________________________________________________________ Added: svn:ignore + .libs .deps Makefile Makefile.in Added: trunk/opentrep/ternary_tree/Makefile.am =================================================================== --- trunk/opentrep/ternary_tree/Makefile.am (rev 0) +++ trunk/opentrep/ternary_tree/Makefile.am 2009-07-14 14:07:29 UTC (rev 127) @@ -0,0 +1,46 @@ +include $(top_srcdir)/Makefile.common +include $(srcdir)/sources.mk + +## +# Source directory + +DISTCLEANFILES = + +MAINTAINERCLEANFILES = Makefile.in + +SUBDIRS = + +EXTRA_DIST = + + +## +# Library +lib_LTLIBRARIES = libtst.la + +libtst_la_SOURCES = $(tst_h_sources) $(tst_cc_sources) +#libtst_la_LIBADD = +libtst_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) + +# Header files +nobase_pkginclude_HEADERS = $(ttree_h_sources) +#nobase_nodist_pkginclude_HEADERS = $(top_builddir)/@PACKAGE@/config.h + + +## +# Binaries (batches) +bin_PROGRAMS = simple_tst + +simple_tst_SOURCES = simple_tst.cpp +#simple_tst_CXXFLAGS = +#simple_tst_LDADD = +#simple_tst_LDFLAGS = + +## +# Test binaries +#check_PROGRAMS = iterator_compile_test tst_concept_checks + +#iterator_compile_test_SOURCES = iterator_compile_test.cpp +#iterator_compile_test_LDFLAGS = + +#tst_concept_checks_SOURCES = tst_concept_checks.cpp +#tst_concept_checks_LDFLAGS = Modified: trunk/opentrep/ternary_tree/examples.cpp =================================================================== --- trunk/opentrep/ternary_tree/examples.cpp 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/ternary_tree/examples.cpp 2009-07-14 14:07:29 UTC (rev 127) @@ -1,231 +1,231 @@ -/** \file - * Usage examples for Structured Containers. - */ - -#include <iostream> -#include "structured_set.hpp" -#include "structured_map.hpp" - -#include <iostream> -#include <string> -#include <set> -#include <functional> -//#include <boost/scoped_ptr.hpp> - -// -// Basic use of structured_set -// - -void basic() -{ - typedef containers::structured_set<std::string> Set; - typedef Set::iterator SetIter; - typedef std::pair<SetIter, SetIter> IterPair; - - Set names; - names.insert("apps"); - names.insert("applets"); - names.insert("banana"); - - std::cout << "The set contains\n\t"; - for (SetIter it = names.begin(); it != names.end(); ++it) - std::cout << *it << ", "; - - IterPair p = names.prefix_range("app"); - std::cout << "\nprefix_range(\"app\") returns:\n\t"; - while (p.first != p.second) { - std::cout << *p.first++ << ", "; - } - std::cout << "\np.second points to " << *p.second; - - std::cout << "\nequal_range(\"app\") returns:\n\t"; - p = names.equal_range("app"); - if (p.first == p.second) - std::cout << "empty range"; - std::cout << "\np.second points to " << *p.second; -} - - -//############################################################################# -// -// prefix_range example (compile-only, does not run as is) -// -typedef containers::structured_set<std::string> SymbolSet; -SymbolSet symbols; -bool is_defined_in_scope(std::string scope, std::string name) -{ - typedef std::pair<SymbolSet::iterator, SymbolSet::iterator> Range; - Range r = symbols.prefix_range(scope + "::"); - SymbolSet::iterator n = symbols.find(name); - return n != symbols.end() && *n >= *r.first && *n < *r.second; -} - - -//############################################################################# -// -// Case-insensitive structured containers -// -template<class CharT> -struct nocase_less : public std::binary_function<CharT, CharT, bool> -{ - bool operator()(CharT a, CharT b) const { return tolower(a) < tolower(b); } -}; - -void caseless_set() -{ - typedef containers::structured_multiset<std::string, nocase_less<char> > CaselessSet; - typedef containers::structured_multimap<std::string, double, nocase_less<char> > CaselessMap; - - CaselessMap uncased; - uncased.insert(std::make_pair("NoCase", 0.1)); - CaselessSet caseless; - caseless.insert("NoCase"); - caseless.insert("nocase"); - caseless.insert("noCase"); - caseless.insert("NOCASE"); - - std::cout << "nocase = " << (int)caseless.count("nocase"); - - CaselessSet::const_iterator endit = caseless.end(); - for(CaselessSet::const_iterator it = caseless.begin(); it != endit; ++it) { - std::cout << ", " << *it; - } - -} - -//############################################################################# -// -// Localization comparator -// - -#include "examples/locale_less.hpp" - -void localized_comparator() -{ - typedef containers::structured_set<std::string, utility::locale_less<char> > LocalSet; - - typedef containers::structured_set<std::string> DefaultSet; - - if (utility::swedish_locale_name == "C") - std::cout << "No locale to test\n"; - else - std::cout << "Attempt to set Swedish locale \"" << utility::swedish_locale_name << "\"\n"; - - try { - // use comparator constructor, create Swedish locale - LocalSet se_names(utility::locale_less<char>::locale_less(utility::swedish_locale_name)); - DefaultSet anynames; - - se_names.insert("\xC4ska"); - se_names.insert("\xC5m\xE5l"); - se_names.insert("\xD6dla"); - se_names.insert("Adam"); - - anynames.insert("\xC4ska"); - anynames.insert("\xC5m\xE5l"); - anynames.insert("\xD6dla"); - anynames.insert("Adam"); - - - for(LocalSet::iterator sit = se_names.begin(); sit != se_names.end(); ++sit) { - std::cout << *sit << ", "; - } - std::cout << "not:\n"; - for(DefaultSet::iterator dit = anynames.begin(); dit != anynames.end(); ++dit) { - std::cout << *dit << ", "; - } - } catch(std::exception& x) { - std::cout << "...failed - skip test\n" << x.what() << "\n"; - } -} - -//############################################################################# -// -// longest_match example -// -#include <fstream> - -typedef containers::structured_map<std::string, int, nocase_less<char> > Vocabulary; - -void fill_wordlist(const char* filename, Vocabulary& wordlist) -{ - std::ifstream wordstream(filename); - if (!wordstream.is_open()) { - std::cerr << "Could not open dictionary " << filename << "\n"; - return; - } - char buf[300]; - int linecount = 0; - while(wordstream.getline(buf, 300, '\n').good()) - wordlist[buf] = ++linecount; -} - -std::streamsize get_filesize(std::ifstream& str) -{ - std::streamsize pos = str.tellg(); - str.seekg(0, std::ios_base::end); - std::streamsize result = str.tellg(); - str.seekg(pos, std::ios_base::beg); - return result; -} - -namespace { - template<class T> - struct scoped_array - { - scoped_array(size_t count) : buf(new T[count]) {} - ~scoped_array() { delete[] buf; } - T* get() { return buf; } - private: - T* buf; - }; -} - -void longest_match_example(const char* dictfile, const char* parsefile) -{ - Vocabulary english; - // Read dictionary from disk - fill_wordlist(dictfile, english); - if (english.empty()) - return; - - std::ifstream infile(parsefile); - if (!infile.is_open()) - return; - - // longest_match does not work with istream_iterator, so must fill buffer - size_t filesize = (size_t)get_filesize(infile); - // instead of boost::scoped_array - scoped_array<char> bytes(filesize); - infile.read(bytes.get(), filesize); - - const char *first = bytes.get(); - const char *last = first + infile.gcount(); - - while (first != last) - { - Vocabulary::iterator word = english.longest_match(first, last); - if (word != english.end()) - std::cout << (*word).first << " "; //= " << (*word).second << "\n"; - else { - // No key; try next char - ++first; - } - } -} - -//############################################################################# - -int main() -{ - std::cout << "*** basic usage ***\n"; - basic(); - std::cout << "\n\n*** custom comparator ***\n"; - caseless_set(); - std::cout << "\n\n*** locale comparator ***\n"; - localized_comparator(); - std::cout << "\n\n*** longest_match ***\n"; - // You need to supply files, not included in ternary_tree distribution - longest_match_example("../english-150k.txt", "../shakequotes.txt"); - return 0; -} +/** \file + * Usage examples for Structured Containers. + */ + +#include <iostream> +#include "structured_set.hpp" +#include "structured_map.hpp" + +#include <iostream> +#include <string> +#include <set> +#include <functional> +//#include <boost/scoped_ptr.hpp> + +// +// Basic use of structured_set +// + +void basic() +{ + typedef containers::structured_set<std::string> Set; + typedef Set::iterator SetIter; + typedef std::pair<SetIter, SetIter> IterPair; + + Set names; + names.insert("apps"); + names.insert("applets"); + names.insert("banana"); + + std::cout << "The set contains\n\t"; + for (SetIter it = names.begin(); it != names.end(); ++it) + std::cout << *it << ", "; + + IterPair p = names.prefix_range("app"); + std::cout << "\nprefix_range(\"app\") returns:\n\t"; + while (p.first != p.second) { + std::cout << *p.first++ << ", "; + } + std::cout << "\np.second points to " << *p.second; + + std::cout << "\nequal_range(\"app\") returns:\n\t"; + p = names.equal_range("app"); + if (p.first == p.second) + std::cout << "empty range"; + std::cout << "\np.second points to " << *p.second; +} + + +//############################################################################# +// +// prefix_range example (compile-only, does not run as is) +// +typedef containers::structured_set<std::string> SymbolSet; +SymbolSet symbols; +bool is_defined_in_scope(std::string scope, std::string name) +{ + typedef std::pair<SymbolSet::iterator, SymbolSet::iterator> Range; + Range r = symbols.prefix_range(scope + "::"); + SymbolSet::iterator n = symbols.find(name); + return n != symbols.end() && *n >= *r.first && *n < *r.second; +} + + +//############################################################################# +// +// Case-insensitive structured containers +// +template<class CharT> +struct nocase_less : public std::binary_function<CharT, CharT, bool> +{ + bool operator()(CharT a, CharT b) const { return tolower(a) < tolower(b); } +}; + +void caseless_set() +{ + typedef containers::structured_multiset<std::string, nocase_less<char> > CaselessSet; + typedef containers::structured_multimap<std::string, double, nocase_less<char> > CaselessMap; + + CaselessMap uncased; + uncased.insert(std::make_pair("NoCase", 0.1)); + CaselessSet caseless; + caseless.insert("NoCase"); + caseless.insert("nocase"); + caseless.insert("noCase"); + caseless.insert("NOCASE"); + + std::cout << "nocase = " << (int)caseless.count("nocase"); + + CaselessSet::const_iterator endit = caseless.end(); + for(CaselessSet::const_iterator it = caseless.begin(); it != endit; ++it) { + std::cout << ", " << *it; + } + +} + +//############################################################################# +// +// Localization comparator +// + +#include "examples/locale_less.hpp" + +void localized_comparator() +{ + typedef containers::structured_set<std::string, utility::locale_less<char> > LocalSet; + + typedef containers::structured_set<std::string> DefaultSet; + + if (utility::swedish_locale_name == "C") + std::cout << "No locale to test\n"; + else + std::cout << "Attempt to set Swedish locale \"" << utility::swedish_locale_name << "\"\n"; + + try { + // use comparator constructor, create Swedish locale + LocalSet se_names(utility::locale_less<char>::locale_less(utility::swedish_locale_name)); + DefaultSet anynames; + + se_names.insert("\xC4ska"); + se_names.insert("\xC5m\xE5l"); + se_names.insert("\xD6dla"); + se_names.insert("Adam"); + + anynames.insert("\xC4ska"); + anynames.insert("\xC5m\xE5l"); + anynames.insert("\xD6dla"); + anynames.insert("Adam"); + + + for(LocalSet::iterator sit = se_names.begin(); sit != se_names.end(); ++sit) { + std::cout << *sit << ", "; + } + std::cout << "not:\n"; + for(DefaultSet::iterator dit = anynames.begin(); dit != anynames.end(); ++dit) { + std::cout << *dit << ", "; + } + } catch(std::exception& x) { + std::cout << "...failed - skip test\n" << x.what() << "\n"; + } +} + +//############################################################################# +// +// longest_match example +// +#include <fstream> + +typedef containers::structured_map<std::string, int, nocase_less<char> > Vocabulary; + +void fill_wordlist(const char* filename, Vocabulary& wordlist) +{ + std::ifstream wordstream(filename); + if (!wordstream.is_open()) { + std::cerr << "Could not open dictionary " << filename << "\n"; + return; + } + char buf[300]; + int linecount = 0; + while(wordstream.getline(buf, 300, '\n').good()) + wordlist[buf] = ++linecount; +} + +std::streamsize get_filesize(std::ifstream& str) +{ + std::streamsize pos = str.tellg(); + str.seekg(0, std::ios_base::end); + std::streamsize result = str.tellg(); + str.seekg(pos, std::ios_base::beg); + return result; +} + +namespace { + template<class T> + struct scoped_array + { + scoped_array(size_t count) : buf(new T[count]) {} + ~scoped_array() { delete[] buf; } + T* get() { return buf; } + private: + T* buf; + }; +} + +void longest_match_example(const char* dictfile, const char* parsefile) +{ + Vocabulary english; + // Read dictionary from disk + fill_wordlist(dictfile, english); + if (english.empty()) + return; + + std::ifstream infile(parsefile); + if (!infile.is_open()) + return; + + // longest_match does not work with istream_iterator, so must fill buffer + size_t filesize = (size_t)get_filesize(infile); + // instead of boost::scoped_array + scoped_array<char> bytes(filesize); + infile.read(bytes.get(), filesize); + + const char *first = bytes.get(); + const char *last = first + infile.gcount(); + + while (first != last) + { + Vocabulary::iterator word = english.longest_match(first, last); + if (word != english.end()) + std::cout << (*word).first << " "; //= " << (*word).second << "\n"; + else { + // No key; try next char + ++first; + } + } +} + +//############################################################################# + +int main() +{ + std::cout << "*** basic usage ***\n"; + basic(); + std::cout << "\n\n*** custom comparator ***\n"; + caseless_set(); + std::cout << "\n\n*** locale comparator ***\n"; + localized_comparator(); + std::cout << "\n\n*** longest_match ***\n"; + // You need to supply files, not included in ternary_tree distribution + longest_match_example("../english-150k.txt", "../shakequotes.txt"); + return 0; +} Deleted: trunk/opentrep/ternary_tree/fill_dictionary.cpp =================================================================== --- trunk/opentrep/ternary_tree/fill_dictionary.cpp 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/ternary_tree/fill_dictionary.cpp 2009-07-14 14:07:29 UTC (rev 127) @@ -1,66 +0,0 @@ -#include <vector> -#include <string> -#include <iostream> -#include <fstream> -#include <algorithm> -#include <stdexcept> - -typedef std::vector<std::string> Dictionary; - -//template<class Container> -size_t fill_dictionary(const char* filename, Dictionary& dictionary, size_t maxsize, size_t line_length = 0) -{ - std::ifstream input(filename); - size_t longest_in_file = 0; - size_t linecount = 0; - if (!input.is_open()) { - std::cerr << filename << ": file open fail\n"; - throw std::runtime_error("fill_dictionary failed"); - } - if (input.is_open() && !line_length) - { - std::vector<char> next; - next.resize(std::max(line_length, size_t(300))); - while(input.good() && linecount < maxsize) { - input.getline(&next[0], next.capacity()); - std::string s(next.begin(), next.begin() + (size_t)input.gcount()); - if (!s.empty()) { - dictionary.push_back(s.c_str()); - ++linecount; - if (s.size() > longest_in_file) - longest_in_file = s.size(); - //std::cerr << s.c_str() << "\n"; - } - //next.clear(); - } - //std::cerr << "Read " << dictionary.size() << " lines from wordlist.txt\n"; - } - - // If file not long enough, fill up with some random alphabetic strings - if (line_length || (dictionary.size() < maxsize && (maxsize < size_t(-1)) ) ) - { - std::string next; - if (!line_length) { - line_length = linecount? longest_in_file : 10; - std::cerr << "zero-length line, we'll have trouble"; - } - next.reserve(line_length + 1); - for (size_t i = dictionary.size(); i < maxsize; ++i) - { - size_t length = 1 + (rand() % line_length); - next.resize(length--); - while(length--) { - next[length] = rand() % (127-' ') + ' '; - } - //std::cerr << next << '\n'; - dictionary.push_back(next.c_str()); - } - if (line_length > longest_in_file) - longest_in_file = line_length; - } - return longest_in_file; -} - - - - Copied: trunk/opentrep/ternary_tree/fill_dictionary.hpp (from rev 126, trunk/opentrep/ternary_tree/fill_dictionary.cpp) =================================================================== --- trunk/opentrep/ternary_tree/fill_dictionary.hpp (rev 0) +++ trunk/opentrep/ternary_tree/fill_dictionary.hpp 2009-07-14 14:07:29 UTC (rev 127) @@ -0,0 +1,66 @@ +#include <vector> +#include <string> +#include <iostream> +#include <fstream> +#include <algorithm> +#include <stdexcept> + +typedef std::vector<std::string> Dictionary; + +//template<class Container> +size_t fill_dictionary(const char* filename, Dictionary& dictionary, size_t maxsize, size_t line_length = 0) +{ + std::ifstream input(filename); + size_t longest_in_file = 0; + size_t linecount = 0; + if (!input.is_open()) { + std::cerr << filename << ": file open fail\n"; + throw std::runtime_error("fill_dictionary failed"); + } + if (input.is_open() && !line_length) + { + std::vector<char> next; + next.resize(std::max(line_length, size_t(300))); + while(input.good() && linecount < maxsize) { + input.getline(&next[0], next.capacity()); + std::string s(next.begin(), next.begin() + (size_t)input.gcount()); + if (!s.empty()) { + dictionary.push_back(s.c_str()); + ++linecount; + if (s.size() > longest_in_file) + longest_in_file = s.size(); + //std::cerr << s.c_str() << "\n"; + } + //next.clear(); + } + //std::cerr << "Read " << dictionary.size() << " lines from wordlist.txt\n"; + } + + // If file not long enough, fill up with some random alphabetic strings + if (line_length || (dictionary.size() < maxsize && (maxsize < size_t(-1)) ) ) + { + std::string next; + if (!line_length) { + line_length = linecount? longest_in_file : 10; + std::cerr << "zero-length line, we'll have trouble"; + } + next.reserve(line_length + 1); + for (size_t i = dictionary.size(); i < maxsize; ++i) + { + size_t length = 1 + (rand() % line_length); + next.resize(length--); + while(length--) { + next[length] = rand() % (127-' ') + ' '; + } + //std::cerr << next << '\n'; + dictionary.push_back(next.c_str()); + } + if (line_length > longest_in_file) + longest_in_file = line_length; + } + return longest_in_file; +} + + + + Modified: trunk/opentrep/ternary_tree/iterator_compile_test.cpp =================================================================== --- trunk/opentrep/ternary_tree/iterator_compile_test.cpp 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/ternary_tree/iterator_compile_test.cpp 2009-07-14 14:07:29 UTC (rev 127) @@ -1,163 +1,163 @@ -/** Pure compilation/header test - * \file - * This file checks interoperability requirements for iterator_wrapper.hpp - * The problem cases are those that should fail: they cannot be checked - * automatically by compiler. - * So to use, you must define TEST_COMPILATION_FAILURE or - * CHECK_SPECIAL_COMP_FAILURE below, and then inspect compiler warnings - * to see that you get an error for every SHOULD_FAIL line (#1-F, #20-21). - * To simplify this, uncomment one statement at a time in the function - * iterator_interop_checks_main() - * at the end of this file, then try to compile. - * - * Construction/assignment to reverse_iterator from const_reverse_iterator - * generates more complicated error messages from compilers, - * so were broken out to allow separate runs. - * Define the macro CHECK_SPECIAL_COMP_FAILURE and look for - * SPECIAL_FAIL_1 and _2 in the compiler output. - * - * This (un)works as required with MSVC and Comeau online tryitout compiler. - */ - -#ifdef _MSC_VER -# pragma warning(disable: 4245 4127 4189 4700) -#endif - - -//#define TEST_COMPILATION_FAILURE -// Two cases must be inspected in references on MSVC -//#define CHECK_SPECIAL_COMP_FAILURE - -#ifdef TEST_COMPILATION_FAILURE -# define SHOULD_FAIL( Pred ) Pred -#else -# define SHOULD_FAIL( Pred ) -#endif - -#ifdef CHECK_SPECIAL_COMP_FAILURE -# define SPECIAL_FAIL_1( Pred ) Pred -# define SPECIAL_FAIL_2( Pred ) Pred -#else -# define SPECIAL_FAIL_1( Pred ) -# define SPECIAL_FAIL_2( Pred ) -#endif - - - -template<class Container> -void iterator_interop_checks() -{ - typedef Container C; - typedef typename C::const_iterator c_t; - typedef typename C::iterator i_t; - typedef typename C::const_reverse_iterator cr_t; - typedef typename C::reverse_iterator r_t; - c_t c; - i_t i; - cr_t cr; - r_t r; -///// COPY-CONSTRUCTORS - // Should work - c_t i1( i ); - c_t i2( cr.base() ); - c_t i3( r.base() ); - i_t i4( r.base() ); - cr_t i5( r ); - cr_t i6( c ); - cr_t i7( i ); - r_t i8( i ); - - SHOULD_FAIL( c_t i101( cr ); ) // #1 - SHOULD_FAIL( c_t i102( r ); ) // #2 - SHOULD_FAIL( i_t i103( c ); ) // #3 - SHOULD_FAIL( i_t i104( cr ); ) // #4 - SHOULD_FAIL( i_t i105( r ); ) // #5 - SHOULD_FAIL( r_t i106( c ); ) // #6 - -///// ASSIGNMENT - // Should work - c = i; - c = cr.base(); - c = r.base(); - i = r.base(); - cr = r; - - SHOULD_FAIL( c = cr; ) // #7 - SHOULD_FAIL( c = r; ) // #8 - SHOULD_FAIL( i = c; ) // #9 - SHOULD_FAIL( i = cr; ) // #A - SHOULD_FAIL( i = r; ) // #B - SHOULD_FAIL( cr = c; ) // #C - SHOULD_FAIL( cr = i; ) // #D - SHOULD_FAIL( r = c; ) // #E - SHOULD_FAIL( r = i; ) // #F - -// these fail in 2nd pass or something, compile separately - SPECIAL_FAIL_1( r_t i107( cr ); ) // #10 - SPECIAL_FAIL_2( r = cr; ) // #11 - -///// ADVANCE - ++c; --c; - ++i; --i; - ++cr; --cr; - ++r; --r; - c++; c--; - i++; i--; - cr++; cr--; - r++; r--; - -///// COMPARE - if (c == i && i == c) c = c; - if (c == cr.base() && cr.base() == c) c = c; - if (i == r.base() && r.base() == i) c = c; - // Should this fail? - Dinkum nor StlPort don't seem to prevent it - //if (r == cr && cr == r) c = c; - if (r.base() == cr.base() && cr.base() == r.base()) c = c; - -///// DEREFERENCE - typedef typename C::value_type val_t; - typedef typename C::pointer ptr_t; - typedef typename C::reference ref_t; - typedef typename C::const_reference cref_t; - - val_t val1 = *c; - val_t val2 = *i; - val_t val3 = *cr; - val_t val4 = *r; - - ref_t ref2 = *i; - ref_t ref4 = *r; - - cref_t cref1 = *c; - cref_t cref2 = *i; - cref_t cref3 = *cr; - cref_t cref4 = *r; - - SHOULD_FAIL( ref_t ref1 = *c ); // #20 - SHOULD_FAIL( ref_t ref3 = *cr ); // #21 - -} - -#include <vector> -#include "ternary_tree.hpp" -#include "structured_set.hpp" -#include "structured_map.hpp" - -void iterator_interop_checks_main() -{ - typedef std::vector<int> Cont; -// iterator_interop_checks<Cont>(); -/* typedef containers::ternary_tree<std::string, int> Tst; - iterator_interop_checks<Tst>(); - typedef containers::structured_set<std::string> StrucSet; - iterator_interop_checks<StrucSet>(); -*/ typedef containers::structured_multiset<std::string> MStrucSet; - iterator_interop_checks<MStrucSet>(); -/* typedef containers::structured_map<std::string, int> StrucMap; - iterator_interop_checks<StrucMap>(); - typedef containers::structured_multimap<std::string, int> MStrucMap; - iterator_interop_checks<MStrucMap>(); */ -} - - - +/** Pure compilation/header test + * \file + * This file checks interoperability requirements for iterator_wrapper.hpp + * The problem cases are those that should fail: they cannot be checked + * automatically by compiler. + * So to use, you must define TEST_COMPILATION_FAILURE or + * CHECK_SPECIAL_COMP_FAILURE below, and then inspect compiler warnings + * to see that you get an error for every SHOULD_FAIL line (#1-F, #20-21). + * To simplify this, uncomment one statement at a time in the function + * iterator_interop_checks_main() + * at the end of this file, then try to compile. + * + * Construction/assignment to reverse_iterator from const_reverse_iterator + * generates more complicated error messages from compilers, + * so were broken out to allow separate runs. + * Define the macro CHECK_SPECIAL_COMP_FAILURE and look for + * SPECIAL_FAIL_1 and _2 in the compiler output. + * + * This (un)works as required with MSVC and Comeau online tryitout compiler. + */ + +#ifdef _MSC_VER +# pragma warning(disable: 4245 4127 4189 4700) +#endif + + +//#define TEST_COMPILATION_FAILURE +// Two cases must be inspected in references on MSVC +//#define CHECK_SPECIAL_COMP_FAILURE + +#ifdef TEST_COMPILATION_FAILURE +# define SHOULD_FAIL( Pred ) Pred +#else +# define SHOULD_FAIL( Pred ) +#endif + +#ifdef CHECK_SPECIAL_COMP_FAILURE +# define SPECIAL_FAIL_1( Pred ) Pred +# define SPECIAL_FAIL_2( Pred ) Pred +#else +# define SPECIAL_FAIL_1( Pred ) +# define SPECIAL_FAIL_2( Pred ) +#endif + + + +template<class Container> +void iterator_interop_checks() +{ + typedef Container C; + typedef typename C::const_iterator c_t; + typedef typename C::iterator i_t; + typedef typename C::const_reverse_iterator cr_t; + typedef typename C::reverse_iterator r_t; + c_t c; + i_t i; + cr_t cr; + r_t r; +///// COPY-CONSTRUCTORS + // Should work + c_t i1( i ); + c_t i2( cr.base() ); + c_t i3( r.base() ); + i_t i4( r.base() ); + cr_t i5( r ); + cr_t i6( c ); + cr_t i7( i ); + r_t i8( i ); + + SHOULD_FAIL( c_t i101( cr ); ) // #1 + SHOULD_FAIL( c_t i102( r ); ) // #2 + SHOULD_FAIL( i_t i103( c ); ) // #3 + SHOULD_FAIL( i_t i104( cr ); ) // #4 + SHOULD_FAIL( i_t i105( r ); ) // #5 + SHOULD_FAIL( r_t i106( c ); ) // #6 + +///// ASSIGNMENT + // Should work + c = i; + c = cr.base(); + c = r.base(); + i = r.base(); + cr = r; + + SHOULD_FAIL( c = cr; ) // #7 + SHOULD_FAIL( c = r; ) // #8 + SHOULD_FAIL( i = c; ) // #9 + SHOULD_FAIL( i = cr; ) // #A + SHOULD_FAIL( i = r; ) // #B + SHOULD_FAIL( cr = c; ) // #C + SHOULD_FAIL( cr = i; ) // #D + SHOULD_FAIL( r = c; ) // #E + SHOULD_FAIL( r = i; ) // #F + +// these fail in 2nd pass or something, compile separately + SPECIAL_FAIL_1( r_t i107( cr ); ) // #10 + SPECIAL_FAIL_2( r = cr; ) // #11 + +///// ADVANCE + ++c; --c; + ++i; --i; + ++cr; --cr; + ++r; --r; + c++; c--; + i++; i--; + cr++; cr--; + r++; r--; + +///// COMPARE + if (c == i && i == c) c = c; + if (c == cr.base() && cr.base() == c) c = c; + if (i == r.base() && r.base() == i) c = c; + // Should this fail? - Dinkum nor StlPort don't seem to prevent it + //if (r == cr && cr == r) c = c; + if (r.base() == cr.base() && cr.base() == r.base()) c = c; + +///// DEREFERENCE + typedef typename C::value_type val_t; + typedef typename C::pointer ptr_t; + typedef typename C::reference ref_t; + typedef typename C::const_reference cref_t; + + val_t val1 = *c; + val_t val2 = *i; + val_t val3 = *cr; + val_t val4 = *r; + + ref_t ref2 = *i; + ref_t ref4 = *r; + + cref_t cref1 = *c; + cref_t cref2 = *i; + cref_t cref3 = *cr; + cref_t cref4 = *r; + + SHOULD_FAIL( ref_t ref1 = *c ); // #20 + SHOULD_FAIL( ref_t ref3 = *cr ); // #21 + +} + +#include <vector> +#include "ternary_tree.hpp" +#include "structured_set.hpp" +#include "structured_map.hpp" + +void iterator_interop_checks_main() +{ + typedef std::vector<int> Cont; +// iterator_interop_checks<Cont>(); +/* typedef containers::ternary_tree<std::string, int> Tst; + iterator_interop_checks<Tst>(); + typedef containers::structured_set<std::string> StrucSet; + iterator_interop_checks<StrucSet>(); +*/ typedef containers::structured_multiset<std::string> MStrucSet; + iterator_interop_checks<MStrucSet>(); +/* typedef containers::structured_map<std::string, int> StrucMap; + iterator_interop_checks<StrucMap>(); + typedef containers::structured_multimap<std::string, int> MStrucMap; + iterator_interop_checks<MStrucMap>(); */ +} + + + Modified: trunk/opentrep/ternary_tree/iterator_wrapper.hpp =================================================================== --- trunk/opentrep/ternary_tree/iterator_wrapper.hpp 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/ternary_tree/iterator_wrapper.hpp 2009-07-14 14:07:29 UTC (rev 127) @@ -1,233 +1,233 @@ -// Created Mon Feb 06 13:20:01 2006 -#ifndef ITERATOR_WRAPPER_HPP_INCLUDED -#define ITERATOR_WRAPPER_HPP_INCLUDED - -#include <iterator> - -namespace iterators { - - // This is mostly a lame ripoff from Boost.Iterator, to avoid the dependency... - - //! Standard type traits for const_iterators. \see iterator_wrapper - template <class T> - struct const_traits { - typedef T value_type; - typedef const T* pointer; - typedef const T* const_pointer; - typedef const T& reference; - typedef const T& const_reference; - }; - - //! Standard type traits for (non-const) iterators. \see iterator_wrapper - template <class T> - struct nonconst_traits { - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - }; - - /** Creates a bidirectional iterator from a base implementation, - * which is required to supply the interface \code - * struct iter_impl_sample - * { - * typedef /impl-defined/ reference; - * iter_impl_sample(); - * iter_impl_sample(/args/); - * void increment(); - * void decrement(); - * reference dereference() const; - * template<class OtherIter> bool equal(const OtherIter& rhs); - * void swap(this_type& rhs); - * }; \endcode - * (This class is meant for iterators you control - if you need to adapt an existing iterator - * with different interface, something like boost::iterator_facade is needed.) - * - * The first template parameter is the iterator implementation class. - * iterator_wrapper does not inherit from this. The second parameter is either const_traits <T> - * or nonconst_traits <T>, which provide the basic value_type related definitions. - * - * Note that Boost.Iterator will do the same job better, this was provided to avoid the dependency. - * Future versions may move to Boost instead. - * - * \ingroup utilities - */ - template< class BaseIterT - , class TraitsT - , class IterCatT = std::bidirectional_iterator_tag - > - struct iterator_wrapper - { - typedef BaseIterT base_iter; - typedef TraitsT traits_type; - typedef iterator_wrapper<BaseIterT, TraitsT, IterCatT> this_type; - - typedef typename TraitsT::value_type value_type; - typedef typename TraitsT::pointer pointer; - typedef typename TraitsT::reference reference; - typedef typename TraitsT::const_reference const_reference; - - typedef IterCatT iterator_category; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - - iterator_wrapper() {} - - //! Copy constructor for iterator and constructor from (non-const) iterator for const_iterator - template<class SameBase> - iterator_wrapper(const iterator_wrapper<SameBase, nonconst_traits<value_type>, IterCatT>& it) - : m_iter(it.iter_base()) - {} - - iterator_wrapper(const iterator_wrapper& it) : m_iter(it.iter_base()) {} - - iterator_wrapper(const base_iter& it) : m_iter(it) {} - - reference operator*() const { return m_iter.dereference(); } - - pointer operator->() const { return &m_iter.dereference(); } - - this_type& operator++() { m_iter.increment(); return *this; } - - this_type operator++(int) { - this_type tmp(*this); - this->operator++(); - return tmp; - } - - this_type& operator--() { m_iter.decrement(); return *this; } - - this_type operator--(int) { - this_type tmp(*this); - this->operator--(); - return tmp; - } - - //! Assignment from non-const to const_iterator - template<class SameBase> - this_type& operator=(const iterator_wrapper<SameBase, nonconst_traits<value_type> >& rhs) { - this_type(rhs).swap(*this); - return *this; - } - - this_type& operator=(const iterator_wrapper<BaseIterT, TraitsT>& rhs) { - this_type(rhs).swap(*this); - return *this; - } - - template<class Base, class Constness> - void swap(iterator_wrapper<Base, Constness>& other) { - iter_base().swap(other.iter_base()); - } - - base_iter& iter_base() { return m_iter; } - const base_iter& iter_base() const { return m_iter; } - - private: - base_iter m_iter; - }; - - // \relates iterator_wrapper - template<class Base, class Val, class Val2, class Cat> - bool operator== (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { - return lhs.iter_base().equal(rhs.iter_base()); - } - - // \relates iterator_wrapper - template<class Base, class Val, class Val2, class Cat> - bool operator!= (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { - return ! (lhs == rhs); - } - -//! \def provide equality operator for reverse const/nonconst iterators \relates iterator_wrapper -#define INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS(ConstTraits, NonConstTraits) \ - template<class Base, class Val, class Cat> \ - bool operator== (const std::reverse_iterator<iterator_wrapper<Base, ConstTraits<Val>, Cat> >& lhs, \ - const std::reverse_iterator<iterator_wrapper<Base, NonConstTraits<Val>, Cat> >& rhs) { \ - return lhs.base() == rhs.base(); \ - } \ - template<class Base, class Val, class Cat> \ - bool operator!= (const std::reverse_iterator<iterator_wrapper<Base, NonConstTraits<Val>, Cat> >& lhs, \ - const std::reverse_iterator<iterator_wrapper<Base, ConstTraits<Val>, Cat> >& rhs) { \ - return !(lhs.base() == rhs.base()); \ - } - -INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS(const_traits, nonconst_traits) -INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS(nonconst_traits, const_traits) - -#undef INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS - - -/** \relates iterator_wrapper - * @{ - */ -template<class Base, class Val, class Val2, class Cat> -bool operator< (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { - return lhs.iter_base().less(rhs.iter_base()); -} - - -template<class Base, class Val, class Val2, class Cat> -bool operator> (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { - return rhs < lhs; -} - - -template<class Base, class Val, class Val2, class Cat> -bool operator>= (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { - return ! (lhs < rhs); -} - -template<class Base, class Val, class Val2, class Cat> -bool operator<= (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { - return ! (rhs > lhs); -} - -// random access iter operations (+= -= etc) - -template<class Base, class Val, class Dist> -iterator_wrapper<Base, Val, std::random_access_iterator_tag>& -operator+= (iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { - it.iter_base().advance(n); - return it; -} - -template<class Base, class Val, class Dist> -iterator_wrapper<Base, Val, std::random_access_iterator_tag>& -operator-= (iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { - it.iter_base().advance(-n); - return it; -} - -template<class Base, class Val, class Dist> -iterator_wrapper<Base, Val, std::random_access_iterator_tag> -operator+ (const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { - iterator_wrapper<Base, Val, std::random_access_iterator_tag> tmp(it); - tmp += n; - return tmp; -} - -template<class Base, class Val, class Dist> -iterator_wrapper<Base, Val, std::random_access_iterator_tag> -operator- (const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { - iterator_wrapper<Base, Val, std::random_access_iterator_tag> tmp(it); - tmp -= n; - return tmp; -} - -template<class Base, class Val, class Dist> -Dist -operator- (const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& lhs, - const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& rhs) { - return lhs - rhs; -} - - -/*@}*/ - -} // namespace iterators - - - -#endif // ITERATOR_WRAPPER_HPP_INCLUDED +// Created Mon Feb 06 13:20:01 2006 +#ifndef ITERATOR_WRAPPER_HPP_INCLUDED +#define ITERATOR_WRAPPER_HPP_INCLUDED + +#include <iterator> + +namespace iterators { + + // This is mostly a lame ripoff from Boost.Iterator, to avoid the dependency... + + //! Standard type traits for const_iterators. \see iterator_wrapper + template <class T> + struct const_traits { + typedef T value_type; + typedef const T* pointer; + typedef const T* const_pointer; + typedef const T& reference; + typedef const T& const_reference; + }; + + //! Standard type traits for (non-const) iterators. \see iterator_wrapper + template <class T> + struct nonconst_traits { + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + }; + + /** Creates a bidirectional iterator from a base implementation, + * which is required to supply the interface \code + * struct iter_impl_sample + * { + * typedef /impl-defined/ reference; + * iter_impl_sample(); + * iter_impl_sample(/args/); + * void increment(); + * void decrement(); + * reference dereference() const; + * template<class OtherIter> bool equal(const OtherIter& rhs); + * void swap(this_type& rhs); + * }; \endcode + * (This class is meant for iterators you control - if you need to adapt an existing iterator + * with different interface, something like boost::iterator_facade is needed.) + * + * The first template parameter is the iterator implementation class. + * iterator_wrapper does not inherit from this. The second parameter is either const_traits <T> + * or nonconst_traits <T>, which provide the basic value_type related definitions. + * + * Note that Boost.Iterator will do the same job better, this was provided to avoid the dependency. + * Future versions may move to Boost instead. + * + * \ingroup utilities + */ + template< class BaseIterT + , class TraitsT + , class IterCatT = std::bidirectional_iterator_tag + > + struct iterator_wrapper + { + typedef BaseIterT base_iter; + typedef TraitsT traits_type; + typedef iterator_wrapper<BaseIterT, TraitsT, IterCatT> this_type; + + typedef typename TraitsT::value_type value_type; + typedef typename TraitsT::pointer pointer; + typedef typename TraitsT::reference reference; + typedef typename TraitsT::const_reference const_reference; + + typedef IterCatT iterator_category; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + + iterator_wrapper() {} + + //! Copy constructor for iterator and constructor from (non-const) iterator for const_iterator + template<class SameBase> + iterator_wrapper(const iterator_wrapper<SameBase, nonconst_traits<value_type>, IterCatT>& it) + : m_iter(it.iter_base()) + {} + + iterator_wrapper(const iterator_wrapper& it) : m_iter(it.iter_base()) {} + + iterator_wrapper(const base_iter& it) : m_iter(it) {} + + reference operator*() const { return m_iter.dereference(); } + + pointer operator->() const { return &m_iter.dereference(); } + + this_type& operator++() { m_iter.increment(); return *this; } + + this_type operator++(int) { + this_type tmp(*this); + this->operator++(); + return tmp; + } + + this_type& operator--() { m_iter.decrement(); return *this; } + + this_type operator--(int) { + this_type tmp(*this); + this->operator--(); + return tmp; + } + + //! Assignment from non-const to const_iterator + template<class SameBase> + this_type& operator=(const iterator_wrapper<SameBase, nonconst_traits<value_type> >& rhs) { + this_type(rhs).swap(*this); + return *this; + } + + this_type& operator=(const iterator_wrapper<BaseIterT, TraitsT>& rhs) { + this_type(rhs).swap(*this); + return *this; + } + + template<class Base, class Constness> + void swap(iterator_wrapper<Base, Constness>& other) { + iter_base().swap(other.iter_base()); + } + + base_iter& iter_base() { return m_iter; } + const base_iter& iter_base() const { return m_iter; } + + private: + base_iter m_iter; + }; + + // \relates iterator_wrapper + template<class Base, class Val, class Val2, class Cat> + bool operator== (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { + return lhs.iter_base().equal(rhs.iter_base()); + } + + // \relates iterator_wrapper + template<class Base, class Val, class Val2, class Cat> + bool operator!= (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { + return ! (lhs == rhs); + } + +//! \def provide equality operator for reverse const/nonconst iterators \relates iterator_wrapper +#define INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS(ConstTraits, NonConstTraits) \ + template<class Base, class Val, class Cat> \ + bool operator== (const std::reverse_iterator<iterator_wrapper<Base, ConstTraits<Val>, Cat> >& lhs, \ + const std::reverse_iterator<iterator_wrapper<Base, NonConstTraits<Val>, Cat> >& rhs) { \ + return lhs.base() == rhs.base(); \ + } \ + template<class Base, class Val, class Cat> \ + bool operator!= (const std::reverse_iterator<iterator_wrapper<Base, NonConstTraits<Val>, Cat> >& lhs, \ + const std::reverse_iterator<iterator_wrapper<Base, ConstTraits<Val>, Cat> >& rhs) { \ + return !(lhs.base() == rhs.base()); \ + } + +INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS(const_traits, nonconst_traits) +INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS(nonconst_traits, const_traits) + +#undef INTEROPERABLE_REVERSE_ITERATOR_WRAPPERS + + +/** \relates iterator_wrapper + * @{ + */ +template<class Base, class Val, class Val2, class Cat> +bool operator< (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { + return lhs.iter_base().less(rhs.iter_base()); +} + + +template<class Base, class Val, class Val2, class Cat> +bool operator> (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { + return rhs < lhs; +} + + +template<class Base, class Val, class Val2, class Cat> +bool operator>= (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { + return ! (lhs < rhs); +} + +template<class Base, class Val, class Val2, class Cat> +bool operator<= (const iterator_wrapper<Base, Val, Cat>& lhs, const iterator_wrapper<Base, Val2, Cat>& rhs) { + return ! (rhs > lhs); +} + +// random access iter operations (+= -= etc) + +template<class Base, class Val, class Dist> +iterator_wrapper<Base, Val, std::random_access_iterator_tag>& +operator+= (iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { + it.iter_base().advance(n); + return it; +} + +template<class Base, class Val, class Dist> +iterator_wrapper<Base, Val, std::random_access_iterator_tag>& +operator-= (iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { + it.iter_base().advance(-n); + return it; +} + +template<class Base, class Val, class Dist> +iterator_wrapper<Base, Val, std::random_access_iterator_tag> +operator+ (const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { + iterator_wrapper<Base, Val, std::random_access_iterator_tag> tmp(it); + tmp += n; + return tmp; +} + +template<class Base, class Val, class Dist> +iterator_wrapper<Base, Val, std::random_access_iterator_tag> +operator- (const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& it, Dist n) { + iterator_wrapper<Base, Val, std::random_access_iterator_tag> tmp(it); + tmp -= n; + return tmp; +} + +template<class Base, class Val, class Dist> +Dist +operator- (const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& lhs, + const iterator_wrapper<Base, Val, std::random_access_iterator_tag>& rhs) { + return lhs - rhs; +} + + +/*@}*/ + +} // namespace iterators + + + +#endif // ITERATOR_WRAPPER_HPP_INCLUDED Modified: trunk/opentrep/ternary_tree/readme.txt =================================================================== --- trunk/opentrep/ternary_tree/readme.txt 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/ternary_tree/readme.txt 2009-07-14 14:07:29 UTC (rev 127) @@ -1,71 +1,71 @@ -LIBRARY -Ternary Search Tree C++ implementation by rasmus ekman -A header-only library of fast string containers with advanced search features. - -version 0.67, 14 May 2006 - -Please send bug reports, suggestions or questions to ras...@ab... -Get latest version at http://abc.se/~re/code/tst/ - -REQUIREMENTS -Library files tested with g++ 3.4.3 and MSVC 7.1 (Visual Studio 2003). -Visual Studio 6 will not work, but may only need moving the template -methods of ternary_tree and structured_* classes inline. -To generate documentation, you need Doxygen. See http://doxygen.org. - - -USAGE -Container classes: - structured_set<Key [, Comp, Alloc]> - structured_multiset<Key [, Comp, Alloc]> - structured_map<Key, Value [, Comp, Alloc]> - structured_multimap<Key, Value [, Comp, Alloc]> -- Key is a std::string-like type (a Forward Container), -- Value is any type, -- Comp is a less-like sort operation on Key::value_type (eg char/wchar_t) -- Alloc is std::allocator<Key [, Value]> or has same interface. - -These containers can be used as nearly drop-in replacments for std::set, -multiset, map, multimap or unordered_* containers on string-like types. - -There is one difference in interface: -If you used non-default comparator template argument with a set or map type, -it must be changed to operate on character type, not string. - -To use standard set, map features: See documentation of these classes. -See included documentation for information about the advanced key search -facilities in all structured_* containers. - - -FILES ---- l i b r a r y c o d e --- -structured_map.hpp - classes structured_map and -multimap. -structured_set.hpp - classes structured_set and multiset. -ternary_tree.hpp - implementation backend class. -./tst_detail/.* ternary_tree implementation files -iterator_wrapper.hpp - iterator interface, included by all containers. - ---- d o c s --- -tst_public.doxy - doxygen config file, generates public interface of library. -tst.doxy - doxygen config file, generates public and private interface docs. -./doxygen_input/* - extra documentation sources used by Doxygen. - -./html/* - generated public and private documentation -full-docs-index.html - redirects to html directory -index.html - redirects to tst_docs directory, only useful if doxygen is used - with tst_public.doxy - ---- t e s t s --- -tst_concept_checks.cpp - requires Boost concept_check header, portable. -iterator_compile_test.cpp - checks iterator_wrapper interoperability, portable. - -test_tst.cpp - test suite; relies on non-portable wstring.hpp (but see below) -fill_dictionary.cpp - (sloppy old support file for test_tst) - fills a vector with strings from file. -wstring.hpp - string/wstring conversion, uses Windows API - if the MultiByteToWideChar and WideCharToMultiByte API calls are replaced, - test_tst may run on your platform... - ---- -rasmus ekman -May 14, 2006 +LIBRARY +Ternary Search Tree C++ implementation by rasmus ekman +A header-only library of fast string containers with advanced search features. + +version 0.67, 14 May 2006 + +Please send bug reports, suggestions or questions to ras...@ab... +Get latest version at http://abc.se/~re/code/tst/ + +REQUIREMENTS +Library files tested with g++ 3.4.3 and MSVC 7.1 (Visual Studio 2003). +Visual Studio 6 will not work, but may only need moving the template +methods of ternary_tree and structured_* classes inline. +To generate documentation, you need Doxygen. See http://doxygen.org. + + +USAGE +Container classes: + structured_set<Key [, Comp, Alloc]> + structured_multiset<Key [, Comp, Alloc]> + structured_map<Key, Value [, Comp, Alloc]> + structured_multimap<Key, Value [, Comp, Alloc]> +- Key is a std::string-like type (a Forward Container), +- Value is any type, +- Comp is a less-like sort operation on Key::value_type (eg char/wchar_t) +- Alloc is std::allocator<Key [, Value]> or has same interface. + +These containers can be used as nearly drop-in replacments for std::set, +multiset, map, multimap or unordered_* containers on string-like types. + +There is one difference in interface: +If you used non-default comparator template argument with a set or map type, +it must be changed to operate on character type, not string. + +To use standard set, map features: See documentation of these classes. +See included documentation for information about the advanced key search +facilities in all structured_* containers. + + +FILES +--- l i b r a r y c o d e --- +structured_map.hpp - classes structured_map and -multimap. +structured_set.hpp - classes structured_set and multiset. +ternary_tree.hpp - implementation backend class. +./tst_detail/.* ternary_tree implementation files +iterator_wrapper.hpp - iterator interface, included by all containers. + +--- d o c s --- +tst_public.doxy - doxygen config file, generates public interface of library. +tst.doxy - doxygen config file, generates public and private interface docs. +./doxygen_input/* - extra documentation sources used by Doxygen. + +./html/* - generated public and private documentation +full-docs-index.html - redirects to html directory +index.html - redirects to tst_docs directory, only useful if doxygen is used + with tst_public.doxy + +--- t e s t s --- +tst_concept_checks.cpp - requires Boost concept_check header, portable. +iterator_compile_test.cpp - checks iterator_wrapper interoperability, portable. + +test_tst.cpp - test suite; relies on non-portable wstring.hpp (but see below) +fill_dictionary.cpp - (sloppy old support file for test_tst) + fills a vector with strings from file. +wstring.hpp - string/wstring conversion, uses Windows API + if the MultiByteToWideChar and WideCharToMultiByte API calls are replaced, + test_tst may run on your platform... + +--- +rasmus ekman +May 14, 2006 Added: trunk/opentrep/ternary_tree/simple_tst.cpp =================================================================== --- trunk/opentrep/ternary_tree/simple_tst.cpp (rev 0) +++ trunk/opentrep/ternary_tree/simple_tst.cpp 2009-07-14 14:07:29 UTC (rev 127) @@ -0,0 +1,15 @@ +// C +#include <cassert> +// STL +#include <iostream> +// Ternary Tree Structure (TST) +//#include <structured_set.hpp> + +// /////////////// M A I N /////////////// +int main (int argc, char* argv[]) { + + std::cout << "Hello TST!" << std::endl; + + return 0; +} + Added: trunk/opentrep/ternary_tree/sources.mk =================================================================== --- trunk/opentrep/ternary_tree/sources.mk (rev 0) +++ trunk/opentrep/ternary_tree/sources.mk 2009-07-14 14:07:29 UTC (rev 127) @@ -0,0 +1,6 @@ +tst_h_sources = \ + $(top_srcdir)/ternary_tree/ternary_tree.hpp \ + $(top_srcdir)/ternary_tree/structured_set.hpp \ + $(top_srcdir)/ternary_tree/structured_map.hpp \ + $(top_srcdir)/ternary_tree/iterator_wrapper.hpp +tst_cc_sources = Modified: trunk/opentrep/ternary_tree/structured_map.hpp =================================================================== --- trunk/opentrep/ternary_tree/structured_map.hpp 2009-07-14 10:45:19 UTC (rev 126) +++ trunk/opentrep/ternary_tree/structured_map.hpp 2009-07-14 14:07:29 UTC (rev 127) @@ -1,938 +1,938 @@ -#ifndef STRUCTURED_MAP_HPP_INCLUDED -#define STRUCTURED_MAP_HPP_INCLUDED - -#define TST_NO_STANDALONE_ITERATOR_FACADE -#include "ternary_tree.hpp" -#undef TST_NO_STANDALONE_ITERATOR_FACADE -// note: also #include <list> in structured_multimap section - -namespace containers { - - -/** Structured Map is a Sorted Associative Container that stores objects of type pair<Key, Data>. - * Structured Map is a Structured Container, meaning that its key type is required to be - * a Forward Container, and that the map uses a comparator to establish - * a strict weak ordering among key::value_type elements (rather than on whole keys). - * This allows searches in the set involving parts of keys, ie with shared prefix - * or with shared middle parts. - * - * Structured Map is a Pair Associative Container, meaning that its value type - * is pair<const Key, T>. - * It is also a Unique Associative Container, meaning that no two elements are the same. - * - * A std::map is normally backed by a binary tree. A structured map is instead backed - * by a ternary_tree, which manages structured ordering of keys. - * For string-like keys, a ternary tree is typically as fast as an unordered_map, - * and several times faster than most std::map implementations. - * \ingroup containers - */ -template<class KeyT, class DataT, class CompT = std::less<typename KeyT::value_type>, - class AllocT = std::allocator<std::pair<const KeyT, DataT> > > -class structured_map -{ -public: - typedef KeyT key_type; - typedef DataT mapped_type; - typedef std::pair<const KeyT, DataT> value_type; - typedef typename KeyT::value_type char_type; - typedef CompT char_compare; - - typedef AllocT allocator_type; - typedef typename AllocT::difference_type difference_type; - typedef typename AllocT::size_type size_type; - typedef typename AllocT::pointer pointer; - typedef typename AllocT::const_pointer const_pointer; - typedef typename AllocT::reference reference; - typedef typename AllocT::const_reference const_reference; - -private: - // Internal value type is pair<Key, Value> (non-const Key). - typedef ternary_tree< KeyT, value_type, CompT, - typename AllocT::template rebind<value_type>::other - > ternary_tree; - typedef typename ternary_tree::iterator tst_iterator; - typedef typename ternary_tree::iterator::base_iter tst_iterator_base; - typedef typename ternary_tree::const_iterator tst_const_iterator; - - enum { invalid_index = size_type(-1) }; - -public: - - typedef typename ternary_tree::key_compare key_compare; - - - typedef iterators::iterator_wrapper < tst_iterator_base - , iterators::nonconst_traits<value_type> - > iterator; - - typedef iterators::iterator_wrapper < tst_iterator_base - , iterators::const_traits<value_type> - > const_iterator; - - typedef std::reverse_iterator<iterator> reverse_iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; - - - /** \name Construct, copy, destroy - * @{ - */ - structured_map() : m_tree(char_compare(), allocator_type()) {} - - explicit structured_map(const char_compare& comp) - : m_tree(comp, allocator_type()) - {} - - structured_map(const char_compare& comp, const allocator_type& alloc) - : m_tree(comp, alloc) - {} - - template<class InputIterator> - structured_map( InputIterator first, InputIterator last, - const char_compare& comp = char_compare(), - const allocator_type& alloc = allocator_type()) - : m_tree(comp, alloc) - { - insert(first, last); - } - - structured_map(const structured_map& other) - : m_tree(other.m_tree) - {} - - ~structured_map() {} - - structured_map& operator= (const structured_map& other) { - structured_map(other).swap(*this); - return *this; - } - - allocator_type get_allocator() const { return m_tree.get_allocator(); } - /* @} */ - - /** \name Iterators - * Includes C++0x methods cbegin, cend, crbegin, crend to make it easier - * to access const iterators. - * @{ - */ - iterator begin() { return iterator(m_tree.begin()); } - const_iterator begin() const { return const_iterator(m_tree.begin()); } - iterator end() { return iterator(m_tree.end()); } - const_iterator end() const { return const_iterator(m_tree.end()); } - - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - - // C++0x additions - const_iterator cbegin() const { return const_iterator(m_tree.begin()); } - const_iterator cend() const { return const_iterator(m_... [truncated message content] |
From: <den...@us...> - 2009-07-14 10:45:31
|
Revision: 126 http://opentrep.svn.sourceforge.net/opentrep/?rev=126&view=rev Author: denis_arnaud Date: 2009-07-14 10:45:19 +0000 (Tue, 14 Jul 2009) Log Message: ----------- [Ternary Trees] Added the ternary trees structure. Added Paths: ----------- trunk/opentrep/ternary_tree/ trunk/opentrep/ternary_tree/README trunk/opentrep/ternary_tree/doxygen_input/ trunk/opentrep/ternary_tree/doxygen_input/blather.hpp trunk/opentrep/ternary_tree/doxygen_input/concepts.txt trunk/opentrep/ternary_tree/doxygen_input/doxygen-old.css trunk/opentrep/ternary_tree/doxygen_input/doxygen.css trunk/opentrep/ternary_tree/doxygen_input/external.png trunk/opentrep/ternary_tree/doxygen_input/featuretable.html trunk/opentrep/ternary_tree/doxygen_input/footer_inc.html trunk/opentrep/ternary_tree/doxygen_input/header_inc.html trunk/opentrep/ternary_tree/doxygen_input/performancetable.html trunk/opentrep/ternary_tree/doxygen_input/tree - trie concepts.txt trunk/opentrep/ternary_tree/doxygen_input/usage.hpp trunk/opentrep/ternary_tree/examples/ trunk/opentrep/ternary_tree/examples/examples.vcproj trunk/opentrep/ternary_tree/examples/locale_less.hpp trunk/opentrep/ternary_tree/examples.cpp trunk/opentrep/ternary_tree/fill_dictionary.cpp trunk/opentrep/ternary_tree/full-docs-index.html trunk/opentrep/ternary_tree/html/ trunk/opentrep/ternary_tree/html/annotated.html trunk/opentrep/ternary_tree/html/blather_8hpp.html trunk/opentrep/ternary_tree/html/class_data_t.html trunk/opentrep/ternary_tree/html/class_data_t_01_5.html trunk/opentrep/ternary_tree/html/classcontainers_1_1search__results__list-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1search__results__list.html trunk/opentrep/ternary_tree/html/classcontainers_1_1search__results__list_1_1iterator-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1search__results__list_1_1iterator.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__map-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__map.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__map_1_1value__compare-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__map_1_1value__compare.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__multimap-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__multimap.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__multimap_1_1value__compare-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__multimap_1_1value__compare.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__multiset-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__multiset.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__set-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1structured__set.html trunk/opentrep/ternary_tree/html/classcontainers_1_1ternary__tree-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1ternary__tree.html trunk/opentrep/ternary_tree/html/classcontainers_1_1ternary__tree_1_1key__compare-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1ternary__tree_1_1key__compare.html trunk/opentrep/ternary_tree/html/classcontainers_1_1tst__detail_1_1_base_t.html trunk/opentrep/ternary_tree/html/classcontainers_1_1tst__detail_1_1tst__iterator__base-members.html trunk/opentrep/ternary_tree/html/classcontainers_1_1tst__detail_1_1tst__iterator__base.html trunk/opentrep/ternary_tree/html/classstd_1_1back__insert__iterator.html trunk/opentrep/ternary_tree/html/classstd_1_1binary__function.html trunk/opentrep/ternary_tree/html/dir_0df55976ff011c1ef61da79183e9e28f.html trunk/opentrep/ternary_tree/html/dir_59457a7c227558cb0e28f31428e14f54.html trunk/opentrep/ternary_tree/html/dirs.html trunk/opentrep/ternary_tree/html/doxygen.css trunk/opentrep/ternary_tree/html/doxygen.png trunk/opentrep/ternary_tree/html/files.html trunk/opentrep/ternary_tree/html/functions.html trunk/opentrep/ternary_tree/html/functions_0x62.html trunk/opentrep/ternary_tree/html/functions_0x63.html trunk/opentrep/ternary_tree/html/functions_0x64.html trunk/opentrep/ternary_tree/html/functions_0x65.html trunk/opentrep/ternary_tree/html/functions_0x66.html trunk/opentrep/ternary_tree/html/functions_0x67.html trunk/opentrep/ternary_tree/html/functions_0x68.html trunk/opentrep/ternary_tree/html/functions_0x69.html trunk/opentrep/ternary_tree/html/functions_0x6b.html trunk/opentrep/ternary_tree/html/functions_0x6c.html trunk/opentrep/ternary_tree/html/functions_0x6d.html trunk/opentrep/ternary_tree/html/functions_0x6e.html trunk/opentrep/ternary_tree/html/functions_0x6f.html trunk/opentrep/ternary_tree/html/functions_0x70.html trunk/opentrep/ternary_tree/html/functions_0x72.html trunk/opentrep/ternary_tree/html/functions_0x73.html trunk/opentrep/ternary_tree/html/functions_0x74.html trunk/opentrep/ternary_tree/html/functions_0x75.html trunk/opentrep/ternary_tree/html/functions_0x76.html trunk/opentrep/ternary_tree/html/functions_0x77.html trunk/opentrep/ternary_tree/html/functions_0x7e.html trunk/opentrep/ternary_tree/html/functions_enum.html trunk/opentrep/ternary_tree/html/functions_eval.html trunk/opentrep/ternary_tree/html/functions_func.html trunk/opentrep/ternary_tree/html/functions_func_0x62.html trunk/opentrep/ternary_tree/html/functions_func_0x63.html trunk/opentrep/ternary_tree/html/functions_func_0x64.html trunk/opentrep/ternary_tree/html/functions_func_0x65.html trunk/opentrep/ternary_tree/html/functions_func_0x66.html trunk/opentrep/ternary_tree/html/functions_func_0x67.html trunk/opentrep/ternary_tree/html/functions_func_0x68.html trunk/opentrep/ternary_tree/html/functions_func_0x69.html trunk/opentrep/ternary_tree/html/functions_func_0x6b.html trunk/opentrep/ternary_tree/html/functions_func_0x6c.html trunk/opentrep/ternary_tree/html/functions_func_0x6d.html trunk/opentrep/ternary_tree/html/functions_func_0x6e.html trunk/opentrep/ternary_tree/html/functions_func_0x6f.html trunk/opentrep/ternary_tree/html/functions_func_0x70.html trunk/opentrep/ternary_tree/html/functions_func_0x72.html trunk/opentrep/ternary_tree/html/functions_func_0x73.html trunk/opentrep/ternary_tree/html/functions_func_0x74.html trunk/opentrep/ternary_tree/html/functions_func_0x75.html trunk/opentrep/ternary_tree/html/functions_func_0x76.html trunk/opentrep/ternary_tree/html/functions_func_0x7e.html trunk/opentrep/ternary_tree/html/functions_rela.html trunk/opentrep/ternary_tree/html/functions_type.html trunk/opentrep/ternary_tree/html/functions_type_0x62.html trunk/opentrep/ternary_tree/html/functions_type_0x63.html trunk/opentrep/ternary_tree/html/functions_type_0x64.html trunk/opentrep/ternary_tree/html/functions_type_0x66.html trunk/opentrep/ternary_tree/html/functions_type_0x68.html trunk/opentrep/ternary_tree/html/functions_type_0x69.html trunk/opentrep/ternary_tree/html/functions_type_0x6b.html trunk/opentrep/ternary_tree/html/functions_type_0x6c.html trunk/opentrep/ternary_tree/html/functions_type_0x6d.html trunk/opentrep/ternary_tree/html/functions_type_0x6e.html trunk/opentrep/ternary_tree/html/functions_type_0x70.html trunk/opentrep/ternary_tree/html/functions_type_0x72.html trunk/opentrep/ternary_tree/html/functions_type_0x73.html trunk/opentrep/ternary_tree/html/functions_type_0x74.html trunk/opentrep/ternary_tree/html/functions_type_0x76.html trunk/opentrep/ternary_tree/html/functions_vars.html trunk/opentrep/ternary_tree/html/globals.html trunk/opentrep/ternary_tree/html/globals_defs.html trunk/opentrep/ternary_tree/html/globals_func.html trunk/opentrep/ternary_tree/html/graph_legend.dot trunk/opentrep/ternary_tree/html/graph_legend.html trunk/opentrep/ternary_tree/html/graph_legend.png trunk/opentrep/ternary_tree/html/hierarchy.html trunk/opentrep/ternary_tree/html/index.html trunk/opentrep/ternary_tree/html/iteration__impl_8hpp.html trunk/opentrep/ternary_tree/html/iterator__wrapper_8hpp.html trunk/opentrep/ternary_tree/html/namespacecontainers.html trunk/opentrep/ternary_tree/html/namespacecontainers_1_1smap__detail.html trunk/opentrep/ternary_tree/html/namespacecontainers_1_1sset__detail.html trunk/opentrep/ternary_tree/html/namespacecontainers_1_1tst__detail.html trunk/opentrep/ternary_tree/html/namespacecontainers_1_1tst__detail_1_1mpl__detail.html trunk/opentrep/ternary_tree/html/namespacecontainers_1_1tst__erase__impl__detail.html trunk/opentrep/ternary_tree/html/namespacecontainers_1_1util.html trunk/opentrep/ternary_tree/html/namespaceiterators.html trunk/opentrep/ternary_tree/html/namespacemembers.html trunk/opentrep/ternary_tree/html/namespacemembers_func.html trunk/opentrep/ternary_tree/html/namespaces.html trunk/opentrep/ternary_tree/html/namespacestd.html trunk/opentrep/ternary_tree/html/new__iterator__base_8ipp.html trunk/opentrep/ternary_tree/html/pages.html trunk/opentrep/ternary_tree/html/perf_notes.html trunk/opentrep/ternary_tree/html/structcontainers_1_1smap__detail_1_1multimap__iterator-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1smap__detail_1_1multimap__iterator.html trunk/opentrep/ternary_tree/html/structcontainers_1_1sset__detail_1_1multiset__iterator-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1sset__detail_1_1multiset__iterator.html trunk/opentrep/ternary_tree/html/structcontainers_1_1ternary__tree_1_1find__result-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1ternary__tree_1_1find__result.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1always__heap__node-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1always__heap__node.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1back__push__pop-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1back__push__pop.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1dummy__sequence-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1dummy__sequence.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1heap__node-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1heap__node.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1inorder__seek-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1inorder__seek.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1inplace__node-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1inplace__node.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1iter__method__forward-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1iter__method__forward.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1key__access-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1key__access.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info_1_1search-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info_1_1search.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c_3_01false_00_01_t1_00_01_t2_01_4-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c_3_01false_00_01_t1_00_01_t2_01_4.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1node__base-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1node__base.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1size__policy__node-members.html trunk/opentrep/ternary_tree/html/structcontainers_1_1tst__detail_1_1size__policy__node.html trunk/opentrep/ternary_tree/html/structiterators_1_1const__traits-members.html trunk/opentrep/ternary_tree/html/structiterators_1_1const__traits.html trunk/opentrep/ternary_tree/html/structiterators_1_1iterator__wrapper-members.html trunk/opentrep/ternary_tree/html/structiterators_1_1iterator__wrapper.html trunk/opentrep/ternary_tree/html/structiterators_1_1nonconst__traits-members.html trunk/opentrep/ternary_tree/html/structiterators_1_1nonconst__traits.html trunk/opentrep/ternary_tree/html/structured__map_8hpp.html trunk/opentrep/ternary_tree/html/structured__set_8hpp.html trunk/opentrep/ternary_tree/html/structured_concept.html trunk/opentrep/ternary_tree/html/tab_b.gif trunk/opentrep/ternary_tree/html/tab_l.gif trunk/opentrep/ternary_tree/html/tab_r.gif trunk/opentrep/ternary_tree/html/tabs.css trunk/opentrep/ternary_tree/html/ternary__tree_8hpp.html trunk/opentrep/ternary_tree/html/todo.html trunk/opentrep/ternary_tree/html/tst__implementation_8ipp.html trunk/opentrep/ternary_tree/html/tst__iterator__base_8ipp.html trunk/opentrep/ternary_tree/html/tst__iterator__facade_8hpp.html trunk/opentrep/ternary_tree/html/tst__node_8hpp.html trunk/opentrep/ternary_tree/html/tst__search__results_8ipp.html trunk/opentrep/ternary_tree/html/tst_impl.html trunk/opentrep/ternary_tree/html/tst_links.html trunk/opentrep/ternary_tree/html/tst_reference.html trunk/opentrep/ternary_tree/html/tst_tests.html trunk/opentrep/ternary_tree/html/tst_usage.html trunk/opentrep/ternary_tree/html/usage_8hpp.html trunk/opentrep/ternary_tree/index.html trunk/opentrep/ternary_tree/iterator_compile_test.cpp trunk/opentrep/ternary_tree/iterator_wrapper.hpp trunk/opentrep/ternary_tree/readme.txt trunk/opentrep/ternary_tree/structured_map.hpp trunk/opentrep/ternary_tree/structured_set.hpp trunk/opentrep/ternary_tree/ternary_tree.hpp trunk/opentrep/ternary_tree/test/ trunk/opentrep/ternary_tree/test/basic_insertion_test.hpp trunk/opentrep/ternary_tree/test/check_iteration.hpp trunk/opentrep/ternary_tree/test/copy_test.hpp trunk/opentrep/ternary_tree/test/element_range_test.hpp trunk/opentrep/ternary_tree/test/erase_test.cpp trunk/opentrep/ternary_tree/test/hamming_search_test.cpp trunk/opentrep/ternary_tree/test/iterator_test.cpp trunk/opentrep/ternary_tree/test/localization_test.cpp trunk/opentrep/ternary_tree/test/longest_match_test.cpp trunk/opentrep/ternary_tree/test/mapped_value_test.cpp trunk/opentrep/ternary_tree/test/partial_match_test.cpp trunk/opentrep/ternary_tree/test/prefix_range_test.cpp trunk/opentrep/ternary_tree/test/scrabble_search_test.cpp trunk/opentrep/ternary_tree/test/test.vcproj trunk/opentrep/ternary_tree/test/test_tst.cpp trunk/opentrep/ternary_tree/test/tests_common.hpp trunk/opentrep/ternary_tree/tst.doxy trunk/opentrep/ternary_tree/tst_concept_checks.cpp trunk/opentrep/ternary_tree/tst_detail/ trunk/opentrep/ternary_tree/tst_detail/iteration_impl.hpp trunk/opentrep/ternary_tree/tst_detail/new_iterator_base.ipp trunk/opentrep/ternary_tree/tst_detail/tst_implementation.ipp trunk/opentrep/ternary_tree/tst_detail/tst_iterator_base.ipp trunk/opentrep/ternary_tree/tst_detail/tst_iterator_facade.hpp trunk/opentrep/ternary_tree/tst_detail/tst_node.hpp trunk/opentrep/ternary_tree/tst_detail/tst_search_results.ipp trunk/opentrep/ternary_tree/tst_public.doxy trunk/opentrep/test/ternary/ Added: trunk/opentrep/ternary_tree/README =================================================================== --- trunk/opentrep/ternary_tree/README (rev 0) +++ trunk/opentrep/ternary_tree/README 2009-07-14 10:45:19 UTC (rev 126) @@ -0,0 +1,3 @@ + +Source: http://abc.se/~re/code/tst and http://abc.se/~re/code/tst/ternary_tree.zip + Added: trunk/opentrep/ternary_tree/doxygen_input/blather.hpp =================================================================== --- trunk/opentrep/ternary_tree/doxygen_input/blather.hpp (rev 0) +++ trunk/opentrep/ternary_tree/doxygen_input/blather.hpp 2009-07-14 10:45:19 UTC (rev 126) @@ -0,0 +1,558 @@ +/** \mainpage Structured Associative Containers + +Ternary Search Tree containers to replace \c set<string> and \c map<string, Value> </h2> + +<center><table bgcolor="#fbf9e5" style="border: thin dotted #808000;" width="95%" border=0> +<tr> +<td> +<h3>Table of contents</h3> +<dl> + <dt>\ref introduction "Introduction"</dt> + <dt>\ref subkey_search_overview "Advanced searches overview"</dt> + <dt>\ref tst_usage "Tutorial"</dt> + <dt>\ref tst_reference "Reference"</dt> <dd> + <dd>\ref structured_concept "Structured Container concept" \n + Class \ref containers::structured_set "structured_set" \n + Class \ref containers::structured_map "structured_map" \n + Class \ref containers::structured_multiset "structured_multiset" \n + Class \ref containers::structured_multimap "structured_multimap" \n + Implementation class \ref containers::ternary_tree "ternary_tree" + </dd></dt> + <dt>\ref perf_notes "Performance notes"</dt> + <dt>\ref tst_impl "Implementation details"</dt> + <dt>\ref tst_links "Links"</dt> + <dt>\ref tst_tests "Test Suite"</dt> +</dl> +</td> +</tr></table></center> + +Download: Latest version (0.684) http://abc.se/~re/code/tst/ternary_tree.zip\n + +Copyleft: <a href="mailto:rasmus%20point%20ekman%20at%20abc%20point%20se?subject=Structured Containers suck/rule"> +rasmus ekman</a> 2007-2009 \n +Weblink: http://abc.se/~re/code/tst + +\anchor introduction <hr> +<h2>Introduction</h2> +<b>Structured containers</b> are \c map and \c set -like containers specialized for strings. +They are commonly used for dictionaries.\n +Structured containers have two major benefits: +- They offer near-match searches (wildcard search, partial match etc) that are hard to implement + with other containers. +- Lookup performance is on a par with hashed containers for many common applications, +and 2-5 times faster than standard maps and sets (with string-like keys). + +Of course there is a price to pay: structured containers use much more memory than +other containers: Around 6-8 bytes <b>per letter</b> inserted (whether \c char or \c wchar_t); +an English 150 k word dictionary uses eg 7.3 MB to store 1.2 MB words (2.4 MB of \c wchar_t words). + +The container classes in this library can be used as drop-in replacements for \c set and \c map +(or \c unordered_set, \c unordered_map): + - \ref containers::structured_set "structured_set": This stores unique keys and allows structured key searches. + - \ref containers::structured_multiset "structured_multiset": This stores non-unique keys. + - \ref containers::structured_map "structured_map": This is a + <a target="sgi" href="http://www.sgi.com/tech/stl/PairAssociativeContainer.html">Pair Associative Container</a>, + as it allows associating a value with each key. + - \ref containers::structured_multimap "structured_multimap": Technically, a + <a target="sgi" href="http://www.sgi.com/tech/stl/MultipleSortedAssociativeContainer.html">Multiple, Sorted, + Pair Associative Container</a> - it allows storing several values with each key. + +While the STL standard associative containers are normally backed by a binary tree structure, +Structured Containers are backed by a Ternary Search Tree, as presented by +\ref note_1 "Jon Bentley and Robert Sedgewick in [1]". + +Class \ref containers::ternary_tree "ternary_tree<Key, Value, Comp, Alloc>" provides the implementation backend. +Due to its internals, its interface cannot easily be made to conform with standard STL concepts, +so it is used internally by the structured* wrapper classes (much like STL's internal \c rb_tree class). + +Basically, if you have code using sets or maps, you have code to use structured containers. +And with 1-3 lines of code, you're ready to make advanced imprecise searches in your dictionaries.\n +See \ref tst_usage "the usage section" for examples of how to use these classes. + +<table bgcolor="#f0f0ff" style="border: thin dotted #808000;" border=0> +<tr><th>Library status</th></tr> +<tr><td valign="top" align="right">Compatibility:</td> +<td>Note that the file \b tst_concept_checks.cpp is currently broken. Will investigate.\n +<!-- This used to compile with Mingw GCC 3.4.2 and with MSVC7.1 (with STLport 5). Requires Boost 1.33. +Not sure what happened in Boost 1.36-37 or if I've mangled something. \n +Due to recent changes, ternary tree does not support stateful allocators (earlier versions did this by implication) --> +</td> +<tr><td valign="top" align="right">version 0.684: (Jan 2009)</td> +<td>Fix standard-breakage in multimap/multiset return from <code>insert(const value_type&)</code>.<br> +Added <code>operator-></code> to iterator wrapper for C++0x compatibility. +Thanks to Geoffrey Noel for reports.</td> +</tr> +<tr><td valign="top" align="right">version 0.683: (March 2007)</td> +<td>Fix portability issues for GCC and non-STLport libraries. Fix longest_match.<br> +Thanks to Arjen Wagenaar for several reports, fixes and encouragement. Thanks also to Michel Tourn for reports.</td> +</tr> +<tr><td valign="top" align="right">version 0.68: (Dec 2006)</td> +<td>Implement TST_NODE_COUNT_TYPE macro, which can be used to control node size on 64-bit systems. + See \ref containers::ternary_tree "class ternary_tree"</td> +</tr> +<tr><td valign="top" align="right">version 0.68 (alpha):</td> +<td>Reimplemented node type. Do proper management of value type (was inconsistent, partly unimplemented - duh!)</td> +</tr> +<!--tr> +<tr><td valign="top" align="right">version 0.676:</td> +<td>Modified containers to follow C++0x draft standard: \n +Added \c cbegin, \c cend methods returning \c const_iterator, and \c crbegin, \c crend +returning \c const_reverse_iterator, to make it easier to code with const-correctness. \n +\c erase(iterator pos); and \c erase(iterator first, iterator last); methods now return iterators.</td> +<tr><td valign="top" align="right">version 0.675:</td> +<td>All Structured Container classes implemented. Structured search interface TBD. +</td--> +</table> + + +\anchor subkey_search_overview <hr> +<h2>Sub-key, or Structure Searches</h2> +<span style="color:#905050;">(a new interface for these searches will be specified in the future)</span> + +Ternary trees allow searches that match parts of keys and ignores mismatches in other parts.\n +In the current interface we specify a small number of searches facilitated by the tree structure; +the Partial Match and Hamming searches are defined in several other implementations +(showcased in \ref note_1 "Bentley and Sedgewick" code). +The Levenshtein and combinatorial searches are not found in other ternary trees (that I know of). + +<table border="1" cellspacing="0"> + <tr><th bgcolor="#f0f0ff">Name (function name)</th><th bgcolor="#f0f0ff">Description</th></tr> + <tr><th> + Prefix match (\ref containers::ternary_tree::prefix_range "prefix_range")</th><td> + Finds keys sharing a common prefix, returns a pair of iterators.</td></tr> + <tr><th> + Longest match (\ref containers::ternary_tree::longest_match "longest_match")</th><td> + Finds the longest key that matches beginning of search string. + A typical application is to tokenize a string using the ternary tree as dictionary.</td></tr> + <tr><th> + Partial match, or wildcard search (\ref containers::ternary_tree::partial_match_search "partial_match_search")</th><td> + Accepts a search string with wildcard characters that will match any letter, + eg "b?nd" would match "band", "bend", "bind", "bond" in an English dictionary.</td></tr> + <tr><th> + Search allowing \c N mismatches, + (\ref containers::ternary_tree::hamming_search "hamming_search"<span style="font-weight:normal;"></span>)</th><td> + Accepts a search string and an integer \c dist indicating how many non-matching letters are allowed, + then finds keys matching search string that have at most \a dist mismatches. + This works like a partial match search with all combinations of \a dist + wildcards in the search string.\n + \c hamming_search("band", 1) matches the wildcard search plus "bald", "bane" and "wand", etc. \n + The version here, following DDJ code, extends the strict Hamming search by also allowing shorter and longer + strings; a search for "band", \a dist = 1, also finds "ban" and "bandy" etc.\n + See also http://wikipedia.org/wiki/Hamming_distance</td></tr> + <tr><th> + Levenshtein distance search</b> (\ref containers::ternary_tree::levenshtein_search "levenshtein_search" + <span style="font-weight:normal;">- consider descriptive name</span>)</th><td> + + Hamming search matches characters in fixed position, allowing substitution of \a dist chars. + Levenshtein search also allows shifting parts of the search string by insertion or skipping chars (in \a dist places). + So <code>levenshtein_search("band", 1) </code> extends the hamming_search set with "and" and "bland", etc. + A typical application is to match mispelt words.\n + See also http://wikipedia.org/wiki/Levenshtein_distance</td></tr> + <tr><th> + Combinatorial or "scrabble" search (\ref containers::ternary_tree::combinatorial_search "combinatorial_search")</th><td> + Finds all keys using the characters in search string. \c combinatorial_search("band") finds + "ad", "and", "bad", "dab", "nab", etc. A count of wildcards can be added, also allowing + nonmatching characters (use with care, values over 10% of average key length + may cause the algorithm to traverse a large part of the tree).</td></tr> +</table> + +See \ref usage_imprecise_searches "advanced search overview" in the tutorial. + +These searches are defined for all containers in this library. +But they are also marked as deprecated (to be replaced by generic algorithms with same interface). +For a relative performance comparison of imprecise searches, see the second table in \ref perf_notes. + +<h3>Future directions</h3> +The searches currently defined are clearly special cases in a sea of search possibilities. +We have only defined searches that are relatively efficient, compared to other combinations of containers and algorithms. +But there can be many variations on the available searches: increasing Hamming/Levenshtein distance +at the end of words, or matching limited ranges of characters (eg allowing mismatches only in vowels), etc. + +The next step for this project is to support a more flexible low-level interface for +traversing and filtering tree nodes. +The interface for these "structured searches" is open for consideration, but it +will basically define sub-key iterators, conversion of full-key from sub-key iterators, +and a small collection of algorithms operating on these sub-key iterators. + +At least the following operations are needed: + + - sub-key match: matching a part of a key (prefix, or starting from current char position) + - key element range increment: from a sub-key position, match a range of characters + in next position (returns a list of sub-key iterators? - or iterator-like operation?) + - conversion from sub-key iterator to full-key iterator range (nearest and post-furthest + keys in the subtree) + - \c is_key(subkey_iterator pos): true if end-of-key exists at iterator position. + - \c count_elements(subkey_iterator pos): returns number of available key elements at position. + - In all predefined algorithms above, either a specific, or any char is matched, + we would also support arbitrary char sets (possibly with special case for char ranges). + + */ + +/** \page tst_reference Reference +<center><table bgcolor="#fbf9e5" style="border: thin dotted #808000;" width="95%" border=0> +<tr> +<td> +<dl> + <dt>\ref structured_concept "Structured Container concept"</dt> + <dt>\ref ref_sethpp "Header < structured_set.hpp >"</dt> + <dt>\ref ref_maphpp "Header < structured_map.hpp >"</dt> + <dt>\ref ref_tsthpp "Header < ternary_tree.hpp >"</dt> + <dt>\ref ref_iterhpp "Header < iterator_wrapper.hpp >"</dt> +</dl> +</td> +</tr></table></center> + +<hr> + +\anchor ref_sethpp +<h2>Header < <a href="../structured_set.hpp">%structured_set.hpp</a> > synopsis</h2> +<pre> +\b namespace containers { + \b template <\b class Key, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<Key> > + \b class \ref containers::structured_set "structured_set"; + + \b template <\b class Key, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<Key> > + \b class \ref containers::structured_multiset "structured_multiset"; +} +</pre> + +\anchor ref_maphpp +<h2>Header < <a href="../structured_map.hpp">%structured_map.hpp</a> > synopsis</h2> +<pre> +\b namespace containers { + \b template <\b class Key, + \b class T, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<std::pair<\b const Key, T> > > + \b class \ref containers::structured_map "structured_map"; + + \b template <\b class Key, + \b class T, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<std::pair<\b const Key, T> > > + \b class \ref containers::structured_multimap "structured_multimap"; +} +</pre> + +<hr> +Supplementary header files needed to support structured_set and -map classes. + + +\anchor ref_tsthpp +<h2>Header < <a href="../ternary_tree.hpp">%ternary_tree.hpp</a> > synopsis</h2> +<pre> +\b namespace containers { + + \b template <\b class Key, + \b class T, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<std::pair<\b const Key, T> > > + \b class \ref containers::ternary_tree "ternary_tree"; + + \b template <\b class TreeT, \b class IteratorT> + \b class \ref containers::search_results_list "search_results_list"; + +} +</pre> + + +\anchor ref_iterhpp +<h2>Header < <a href="../iterator_wrapper.hpp">%iterator_wrapper.hpp</a> > synopsis</h2> +<pre> +\b namespace iterators { + + \b template <\b class T> \b struct const_traits; + \b template <\b class T> \b struct nonconst_traits; + + \b template <\b class BaseIterT, + \b class TraitsT, // either const_traits<T> or nonconst_traits<T> + \b class IterCatT = std::bidirectional_iterator_tag > + \b class \ref iterators::iterator_wrapper "iterator_wrapper"; +} +</pre> + +*/ + +/** +\page structured_concept Structured Associative Container Concept + +<span style="color:#905050;">(a preliminary sketch of the formal technical concept description)</span> + +A Structured Associative Container is a specialization of the C++ 98 standard concept +<a target="sgi" href="http://www.sgi.com/tech/stl/SortedAssociativeContainer.html">Sorted Associative Container</a>, +with extended interface. + +The template parameters are similar to that of the Associated Containers: + +<code> structured_set<Key, Comp, Alloc>; </code>\n +<code> structured_map<Key, Value, Comp, Alloc>; </code>\n + +where: + - \c <b>Key</b> type is itself a container (eg a \c std::string or \c std::wstring) + - \c <b>Comp</b> is a comparison operator that imposes a sort order on \c Key::value_type elements \n + (so if \c Key is string, \c Comp compares \c char, if \c Key is \c wstring, \c Comp applies to \c wchar_t). + - \c <b>Value</b> can be any Assignable type + - \c <b>Alloc</b> is an allocator that manages all memory allocation for the container. + +The \c Comp and the \c Alloc types have default template arguments. + +In other words Structured containers are like Sorted Associative Containers, BUT + - add the requirement on Key template type to be a + <a target="sgi" href="http://www.sgi.com/tech/stl/ForwardContainer.html">Forward Container</a>.\n + For example, \c std::basic_string<CharT> is compatible with this requirement. + - change the requirement on the \c Comp (comparator) template argument to operate on + \c key_type::value_type elements (rather than on \c key_type itself). + Like Sorted Associative comparator, the \c Comp type shall define a less-like comparison, a + <a target="sgi" href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a> + of key-elements. + +<b>Associated types</b> + - \b char_compare: less-like comparison of key elements (establishing a Strict Weak Ordering). + The <a target="sgi" href="http://www.sgi.com/tech/stl/AssociativeContainer.html">Associative Container</a> + \c key_compare type is also provided, but is defined in terms of \c char_compare. \n + - \b subkey_iterator: Used in structure searches. Convertible to iterator (TBD). + +In consequence it allows searches involving subparts of keys, ie with shared prefix and/or +with shared middle parts. + +<hr> +<h3>Deprecated search interface</h3> + +In the first iteration, additional searches are provided as methods on the containers. +This will be changed to use free functions operating on \c subkey_iterator. +The deprecated search methods will still be provided as convenience functions; +to migrate your code from present version to the new interface, will mean moving +the object name to the first argument, but also to respecify the search_results_list type. +(This sloppy-hackish type is by itself reason not to keep the method interface) + +See \ref subkey_search_overview "Structured search overview" +and \ref tst_structsearch "ternary_tree Structure search section". +*/ + +/* + +\b Notation \n +<table border=0> +<tr><td>\c X <td>A type that is a model of Associative Container </td></tr> +<tr><td>\c a <td>Object of type \c X </tr> +<tr><td>\c k <td>Object of type \c X::key_type </tr> +<tr><td>\c p, \c q <td>Object of type \c X::char_iterator </tr> +<tr><td>\c c <td>Object of type \c X::char_type </tr> +<tr><td>\c o <td>Object modelling output iterator </tr> +<tr><td>\c i <td>Object of type \c X::size_type </tr> +</dl> + +<table border=1> +<tr><th>Name</th><th>Expression</th><th>Return value</th> +<tr><td>Prefix match</td><td><code>a.prefix_range(k)</code></td><td> + \c std::pair<iterator, iterator> if \c a is mutable, otherwise <br>\c std::pair<const_iterator, const_iterator></td></tr> +<tr><td>Longest match</td><td><code>a.longest_match(p, q)</code></td><td> + \c iterator if \c a is mutable, otherwise \c const_iterator</td></tr> +<tr><td>Partial match, or <br>wildcard search</td><td><code>a.partial_match_search(k, o, c)</code></td><td> + The output iterator \c o</td></tr> +<tr><td>Hamming search</td><td><code>a.hamming_search(k, o, i)</code></td><td> + The output iterator \c o</td></tr> +<tr><td>Levenshtein search</td><td><code>a.levenshtein_search(k, o, i)</code></td><td> + The output iterator \c o</td></tr> +<tr><td>Combinatorial or <br>"scrabble" search</td><td><code>a.combinatorial_search(k, o, i)</code></td><td> + The output iterator \c o</td></tr> +</table> + +*/ + +/** \page tst_impl Implementation Details + * (In the following, "original" and "DDJ" code refers to the article by Bentley/Sedgewick + * published in Dr Dobb's Journal, and the accompanying C source code - see \ref tst_links) + * + * In most implementations, a ternary tree node has the following members: \code + * struct node { + * char splitchar; // key letter, or 0 at end of key + * node *hikid; // subtree of keys with higher value than splitchar + * node *eqkid; // subtree matching splitchar (pointer to mapped value at end-of-key node) + * node *lokid; // subtree less than splitchar + * node *parent; // necessary for iteration (not needed for insert/find) + * }; \endcode + * + * This means that each node is 1 char plus three or four pointers size. + * On many systems, struct member alignment makes the char member consume size of one pointer + * as well, so we have 4 (or 5) x sizeof(pointer) per node in the tree. + * With several kinds of dictionaries, the node count ends up at around 0.3-0.5 times + * total key length (since keys share nodes). + * This is even more expensive on 64-bit machines. + * + * There are several variation points in the node class: + * -# the DDJ C code designates an invalid value of zero to indicate end-of-string. We want to + * allow any string as key, so the end-of-string representation should change. + * We note that on many platforms, C/C++ struct member alignment leaves a "hole" + * in the binary representation of the node, between the char and the first pointer ("hikid"). + * On such systems there is no space cost to use another char-sized value to indicate end node. + * This also works for \c wchar_t strings on 32- or 64-bit systems. + * -# The original code stores a value for each string in the terminal node's "equal" pointer. + * The value in DDJ code is always a pointer to the terminated string. This is used to make + * advanced searches work (they return an array of pointers to strings stored in end-nodes). + * In reality this means that strings may need to be copied on insertion (not reflected in DDJ timings). + * -# Original DDJ code does not support iterating over strings in the tree. + * Idiomatic STL-like container style strongly suggests that iteration should be supported. + * This is fairly simple to implement if a parent pointer is added to the node struct: + * Because when an end-node is reached, the iterator must backtrack to find the previous + * branch point. + * + * The parent pointer also makes it possible to recover the inserted string by walking nodes + * backward from a terminal node to the root. Complexity is key length, plus log(tree.size), + * but it means inserted keys do not \b have to be copied to the end node. + * We opt to cache keys in iterators, at no measurable extra cost in iteration. + + * Instead of the key, an arbitrary value can be associated with endnodes. + * However, it should not be allowed to increase node size, since most nodes in the tree are not endnodes. + * In this library we store the mapped value directly in end-node if it is <tt> <= \c sizeof(void*). </tt> + * Larger objects are allocated on the heap, and a pointer to the copy is stored in end-node + * (the copy is managed by the tree). + * + * <h4>Now for some optimization</h4> + * We use a \c vector<node> as pool allocator, and record eq-hi-lo links as vector index instead of pointers. + * The pool allocation essentially follows original C code insert2() principle. + * For us, it also simplifies reallocation, since pointers do not have to be rebound; + * the indices are always valid. + * This has the following consequences: + * - allow the option of 4-byte indices also on 64-bit systems (with obvious resulting tree size limit) + * - When a new key is inserted, the last part (unique to the key) is always allocated in a batch. + * This means that one node member, \c "eqkid", becomes redundant, as it is always the next index + * (except after terminal nodes of course). + * - in DDJ code the end-node value is stored in union with the eqkid. We note that the \c lokid node index + * is also unused by end-nodes (as no char should be lower than zero), so all endnode children + * are linked to the hi node. + * + * (In our binary-cognizant version where zero is a regular char value, this still holds, + * we just change the end-node test accordingly) + * + * In the final cut, our node struct data members appear roughly like this: \code + * struct node { + * CharType splitchar; // key letter, or 0 at end of key (to make sure lokid is never allowed) + * CharType endflag; // zero on normal nodes, 1 at end nodes, 2 at erased nodes. + * node_index hikid; // subtree of keys with higher value than splitchar + * node_index lokid; // subtree less than splitchar + * node_index parent; // necessary for iteration (not needed for insert/find) + * }; \endcode + * + * where \c CharType is defined by template \c Key::value_type, and treated as an unsigned type + * (so 0 is the lowest value); and \c node_index is a \c size_t -like type used by the node + * storage backend (currently \c std::vector). + * + * This optimization could also be applied to C version, trimming space requirement in DDJ code + * to 3-word nodes. + */ + +/** \page perf_notes Performance Notes + * + * <h3>Space considerations</h3> + * + * Ternary trees are notably larger than hash maps or most binary tree designs. + * Each node holds only one character (instead of a whole key), and use 3-5 pointers. + * Our nodes consist of 4 \c size_t values (16 bytes) regardless of platform pointer size, + * or char type (if at most 2-byte like \c wchar_t). + * + * The shared parts of strings save space: In a typical English dictionary, + * each key shares over half its nodes with other keys, so the allocated space is about half + * of total key-length times 16. In a scrabble dictionary like the one reported below, + * which contains all valid word endings, most nodes are shared, so its storage cost is "only" + * total key length times 0.35 times 16, or less than 6 bytes per char. + * With \c wchar_t type, the storage cost cannot be considered overly large. + * + * See also \ref tst_impl + * + * <h3>Lookup speed</h3> + * + * The complexity of ternary tree operations is basically the same as for binary trees, + * (logarithmic in tree size) but with quite different constant factors. See \ref note_1 "[1]". + * + * Overall lookup and iteration speed depends on application factors - ie + * whether strings are inserted in random order or not, etc. + * + * Rough speed estimates (compared to Stlport hash_map and map). + * - insertion is a bit slower (>30% to 0%) than hash_map, ~30% faster than map. + * - finding a key is ~0-50% slower than hash_map (equal on failure, with short keys). + * - finding a key is 1.5-3 times faster than map (again with short keys). + * + * Compared to C versions (DDJ and libtst), + * - find and insert are slower, by factors ranging from 1.5-4. + * - partial_match and neighbour searches are 5-20% faster than published DDJ code - + * the code is essentially the same, but our implementation rolls out some recursion. + * This is easily back-ported, so in effect they should be considered to run at same speed. + * This by itself is good news though, since eg single-key lookup is always slower. + * + * Since each character in a key is at a separate node in the internal tree, + * iterating over values is a little slower than for other tree-based containers. + * + * For detailed test, see performance table below. + * + * <hr style="height: 3px; border-top: 0px; background-color: #e09060;"> + * \htmlinclude performancetable.html + */ + + +/** \page tst_links Links + * ternary_tree by rasmus ekman, see http://abc.se/~re/code/tst <br> + * Download: http://abc.se/~re/code/tst/ternary_tree.zip + * + * Some other TST implementations. + * - <b>DDJ code:</b> Original C implementation by Jon Bentley and Robert Sedgewick. + * Article in Dr Dobb's Journal, 1998 #4: http://www.ddj.com/documents/s=921/ddj9804a/9804a.htm \anchor note_1 \n + * See http://www.cs.princeton.edu/~rs/strings/ for C code and article on TST complexity. + * - \b libtst: Worked-out version of DDJ code by Peter A. Friend 2002. Version 1.3. \n + * See http://www.octavian.org/cs/software.html \anchor note_2 \n + * - \b Boost.Spirit version: C++ reimplementation by Joel de Guzman. \anchor note_3 \n + * See http://spirit.sourceforge.net/ internal file ./boost/spirit/symbols/impl/tst.h + * - <b>Hartmut Kaiser version:</b> C++ reimplementation intended for generalization of tst. + * Currently abandoned, available in Spirit CVS. (interesting for interface design) \n + * See http://lists.boost.org/Archives/boost/2005/09/93316.php \n + * and http://article.gmane.org/gmane.comp.parsers.spirit.general/6959 + * - \b pytst: C++ version by Nicolas Lehuen, with SWIG wrappers for use from other languages. Version 0.97. \n + * See http://nicolas.lehuen.com/download/pytst/ + * (not yet tested) \anchor note_4 + * + * <h2>Feature chart</h2> + * All versions have insert and plain search, other features available as tabled below: + * \htmlinclude featuretable.html + */ + +/** \page tst_tests Test Suite + +All tests require the <a href="http://boost.org">Boost library</a> to compile. + +<h3>Concept checks</h3> + +The file <a href="../tst_concept_checks.cpp">tst_concept_checks.cpp</a> +performs a compile-time test of structured containers. \n +A class \c StructuredAssociativeContainer is defined, which contains +prototypes of all required methods for structured containers (also class ternary_tree). +Relevant concepts from \c boost/concept_check.hpp are used to check the structured set/map +containers. + +<h3>Correctness tests</h3> + +The subdirectory \b test in the distribution contains a bunch of files hacked up during development. +All these tests are performed by a single main test file <a href="../test/test_tst.cpp">test_tst.cpp</a>. +This file includes individual .cpp files, since we use a simplified (hacked) version +of the Boost.Test harness. + +Each test prints a single line to \c std::cerr saying whether the test was "OK" or "FAIL". +A line is added if an exception was thrown. + +These are runtime tests, several which require a file name to a dictionary-type file, +a plain-text file with one word per line. +The file \c fill_dictionary.cpp must be compiled with test projects, +it reads a dictionary file and fills a std::vector with strings. + +Dictionary files can be found by an internet search (try eg "dictionary file"). + +<h3>To do</h3> + +Proper organization and cleanup of this part of our library will be required before 1.0 release. + +*/ + + Added: trunk/opentrep/ternary_tree/doxygen_input/concepts.txt =================================================================== --- trunk/opentrep/ternary_tree/doxygen_input/concepts.txt (rev 0) +++ trunk/opentrep/ternary_tree/doxygen_input/concepts.txt 2009-07-14 10:45:19 UTC (rev 126) @@ -0,0 +1,122 @@ +Tutorial + +In programming, a Concept is a set of formal requirements on input/output of a subsystem, or pre/post-conditions of +an operation, of complexity constraints and exceptional behaviour. +Note ye well the "complexity" bit. Since the specification of C++ STL, the complexity of operations on +a type have been introduced as a proper feature of its concept, a full-citizenship part of type specification. +(This diverges from the mathematical roots of programming, which defines types/concepts in purely structural terms +-- ie as long as an operation does not transgress countability or infinity boundaries, it doesn't matter a damn bit whether +it requires zero overhead or would enrol half the atoms in the universe to encode intermediate information. +Maths is not about bean counting.). + +Here we will discuss tree concepts in the common sense. +The following are some definitions of terms as used in documentation of the Structured Containers library. +The definitions given are stipulative, in that they do not purely document an existing usage, but unless an expert +tells me otherwise, I believe they should be made into when discussing trees and tries. + + +Tree =df a directed acyclic graph of single-parented, multi-childed Nodes. Usually single-rooted, but this is not essential. + Stipulative: tree nodes have a fixed maximum number of children. + An implementation constraint that has become ingrained in most programmers' understanding of the concept. + All trees can be reduced to (easily and naturally implemented by) a binary tree. + +We assume common terminology for the parts of Trees: + Root - a node designated as start point, from which other nodes are reachable as children, or children of children etc. + Single-rooted Tree - a Tree where all nodes are reachable from a single Root node. + Multi-rooted tree, or Forest - a Tree where several start nodes are designated. + Level N - the set of nodes that are at the same distance from Root. Every node at level 3 is reachable + from the/a Root node by following exactly 3 child-node links. + Sibling - relation between any two nodes on the same Level. + Leaf - a node without children. + Fanout - the number of children that a node can have. This defines the maximum number of nodes at each level. + +Trie =df A Tree where the nodes have a "alphabet"-sized (max) number of children, for some alphabet. + Typical alphabets are the English letters, Unicode, or the ACGT genetic bases. + +Tree nodes represent a full "key" of any [less-comparable] type. +Tries store string-like keys; a Trie node does not store a full key, only a part of it. +A full key is represented by a leaf node and its path back to the/a root node. +The reason for using Tries is that access to string-like keys is very fast - in principle linear in key length. +Binary Trees over the same key is O(log n) where n is count of keys in the tree, with average key length +as a constant factor. + + +Ternary Search Tree (TST) is a space optimization for Tries. + +Because each path to a child of a Tree node takes up memory space, Trie nodes are very expensive +if the alphabet is large. From the 3rd or 4th level on, most child-node links are empty. +A TST constructs exactly the number of links existing at each level of a Trie. + +Graphics: +Tree + root==node==node==node + \\node==node \\node==node + +Trie (6-letter alphabet: 123456) + root + ______________||______________ + || || || || || || + node1 (empty) node3 node4 node5 (empty) + ______________||_____________ + || || || || || || + node1 node2 node3 node4 node5 node6 + +Here we see the root node with 4/6 child links populated. Each child has 6 empty links, except the 5th child. +The 5th child has 6/6 links filled, and each of its children has 0/6 children. +In all there are 4+6 = 10 nodes, and 10*6 = 60 links. +Since a Trie only stores part of a key, the substantial information in each trie node is small, and the +structure overhead - the links - is very large. + +This cries out for optimization. Several kinds of variable-sized nodes have been tried, but they usually +end up with complex code to use and maintain, and thus squander the search speed which was the rationale +for constructing Tries in the first place. + + +TST is one such optimization. Here each trie node is constructed from much smaller nodes, but the +code to use and maintain nodes is still fairly simple, so search speed is not badly compromised. +Let's see the structure of the above tree: +The exact runtime layout depends on insert order. If child 3 is inserted before child 1, child1 may become a +"lower-child" child of child 2. +Here we assume insertion order 4, 3, 5, 1 for the root + + //node1 + //node3 +root=node4 + \\node5==(level2) + +level 2: assume insertion order 3,4,5,1,2,6 + + //node1 + // \\node2 +level2.root=node3 + \\node4 + \\node5 + \\node6 + + +Here we see 10 nodes, each with 3 child links. This means 30 links, ie half the link count of Trie. +The space savings are of course even better for larger alphabets. +(And in the Structured Containers implementation, the middle child link is omitted, so only 2*10=20 links are needed.) +Given English alphabet Trie with the above sparse population, there would be 26*10 = 260 links in +the Trie (each Trie node has 26 child links), and still the same count of TST node-links (30, or 20). +A Unicode Trie would have 10*2^16 links= 10*65536= 655 thousand links. A TST for Unicode again uses 30 (20) links only. + +Important points wrt TSTs and TST nodes. +A. TST nodes have two different kinds of child links: + (1) Two same-level sibling links + (2) One next-level "proper" child link. + +B. TSTs generalizes Trie implementations. + Tries with any alphabet can be implemented by the same TST node type - no new type needs to be defined for new alphabets. + (However a specialization is still often needed, since there must be a comparison function for the alphabet letters) + +C. TSTs are a hybrid of (binary) Tree and Trie. + In consequence of (A), TSTs combine the features of binary trees with Tries. + TST nodes can be viewed as binary nodes with associated data, where the data is a link to a next-level binary tree + (that implements a trie node). + - Against this view one may note that the binary treelets have absolute size constraints (defined by + count of letters in the implemented alphabet) - this is an "unnatural" constraint on a tree type. + - In support of the view one may note that search complexity is more like binary trees than pure Trie implementation. + + + Added: trunk/opentrep/ternary_tree/doxygen_input/doxygen-old.css =================================================================== --- trunk/opentrep/ternary_tree/doxygen_input/doxygen-old.css (rev 0) +++ trunk/opentrep/ternary_tree/doxygen_input/doxygen-old.css 2009-07-14 10:45:19 UTC (rev 126) @@ -0,0 +1,311 @@ +BODY,H1,H2,H3,H4,H5,H6,P,CENTER,TD,TH,UL,DL,DIV { + font-family: Geneva, Arial, Helvetica, sans-serif; +} +/*BODY,TD { font-size: 90%; } +H1 { + font-size: 150%; + background-color: #eeeeff; + width: 100%; + border: 1px solid #b00000; + margin: 2px; + padding: 2px; +} +H2 { font-size: 140%; } +H3 { font-size: 100%; } */ + +CAPTION { font-weight: bold } +DIV.qindex { + width: 100%; + background-color: #eeeeff; + border: 1px solid #b0b0b0; + text-align: center; + margin: 2px; + padding: 2px; + line-height: 140%; +} +DIV.nav { + width: 100%; + background-color: #eeeeff; + border: 1px solid #b0b0b0; + text-align: center; + margin: 2px; + padding: 2px; + line-height: 140%; +} +DIV.navtab { + background-color: #eeeeff; + border: 1px solid #b0b0b0; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} +TD.navtab { + font-size: 90%; +} +A.qindex { + text-decoration: none; + font-weight: bold; + color: #1A419D; +} +A.qindex:visited { + text-decoration: none; + font-weight: bold; + color: #1A419D +} +A.qindex:hover { + text-decoration: none; + background-color: #ddddff; +} +A.qindexHL { + text-decoration: none; + font-weight: bold; + background-color: #6666cc; + color: #ffffff; + border: 1px double #9295C2; +} +A.qindexHL:hover { + text-decoration: none; + background-color: #6666cc; + color: #ffffff; +} +A.qindexHL:visited { text-decoration: none; background-color: #6666cc; color: #ffffff } +A.el { text-decoration: none; font-weight: bold } +A.elRef { font-weight: bold } +A.code:link { text-decoration: none; font-weight: normal; color: #0000a0 } +A.code:visited { text-decoration: none; font-weight: normal; color: #0000a0 } +A.codeRef:link { font-weight: normal; color: #0000FF} +A.codeRef:visited { font-weight: normal; color: #0000FF} +A:hover { text-decoration: none; background-color: #f2f2ff } +DL.el { margin-left: -1cm } +.fragment { + font-family: Fixed, monospace + font-size: 100%; +} +PRE.fragment { + font-size: normal; + border: 1px solid #CCCCCC; + background-color: #f5f5f5; + margin-top: 4px; + margin-bottom: 4px; + margin-left: 2px; + margin-right: 8px; + padding-left: 6px; + padding-right: 6px; + padding-top: 4px; + padding-bottom: 4px; +} +DIV.ah { background-color: black; font-weight: bold; color: #ffffff; margin-bottom: 3px; margin-top: 3px } +TD.md { background-color: #F4F4FB; font-weight: bold; } +TD.mdPrefix { + background-color: #F4F4FB; + color: #606060; + font-size: 90%; +} +TD.mdname1 { background-color: #F4F4FB; font-weight: bold; color: #602020; } +TD.mdname { background-color: #F4F4FB; font-weight: bold; color: #602020; width: 600px; } +DIV.groupHeader { + margin-left: 16px; + margin-top: 12px; + margin-bottom: 6px; + padding: 3px; + font-weight: bold; + font-size: 110%; + background-color: #d0d0ff; +} +DIV.groupText { margin-left: 16px; font-style: italic; font-size: 90%; } +BODY { + background: white; + color: black; + margin-right: 20px; + margin-left: 20px; +} +TD.indexkey { + background-color: #eeeeff; + font-weight: bold; + padding-right : 10px; + padding-top : 2px; + padding-left : 10px; + padding-bottom : 2px; + margin-left : 0px; + margin-right : 0px; + margin-top : 2px; + margin-bottom : 2px; + border: 1px solid #CCCCCC; +} +TD.indexvalue { + background-color: #eeeeff; + font-style: italic; + padding-right : 10px; + padding-top : 2px; + padding-left : 10px; + padding-bottom : 2px; + margin-left : 0px; + margin-right : 0px; + margin-top : 2px; + margin-bottom : 2px; + border: 1px solid #CCCCCC; +} +TR.memlist { + background-color: #f0f0f0; +} +P.formulaDsp { text-align: center; } +IMG.formulaDsp { } +IMG.formulaInl { vertical-align: middle; } +SPAN.keyword { color: #0000ff } +SPAN.keywordtype { color: #0000ff } +SPAN.keywordflow { color: #0000ff } +SPAN.comment { color: #008000 } +SPAN.preprocessor { color: #806020 } +SPAN.stringliteral { color: #800000 } +SPAN.charliteral { color: #800000 } +.mdTable { + border: 1px solid #868686; + background-color: #F4F4FB; +} +.mdRow { + padding: 8px 10px; +} +.mdescLeft { + padding: 0px 8px 4px 8px; + font-size: 12px; + font-style: italic; + background-color: #FAFAFA; + border-top: 1px none #E0E0E0; + border-right: 1px none #E0E0E0; + border-bottom: 1px none #E0E0E0; + border-left: 1px none #E0E0E0; + margin: 0px; +} +.mdescRight { + padding: 0px 8px 4px 8px; + font-size: 12px; + font-style: italic; + background-color: #FAFAFA; + border-top: 1px none #E0E0E0; + border-right: 1px none #E0E0E0; + border-bottom: 1px none #E0E0E0; + border-left: 1px none #E0E0E0; + margin: 0px; +} +.memItemLeft { + padding: 1px 0px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: solid; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 90%; +} +.memItemRight { + padding: 1px 8px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: solid; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 100%; +} +.memTemplItemLeft { + padding: 1px 0px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + border-left-width: 1px; + border-top-color: #E0E0E0; + border-right-color: #E0E0E0; + border-bottom-color: #E0E0E0; + border-left-color: #E0E0E0; + border-top-style: none; + border-right-style: none; + border-bottom-style: none; + border-left-style: none; + background-color: #FAFAFA; + font-size: 90%; +} +.memTemplItemRight { + padding: 1px 8px 0px 8px; + margin: 4px; + border-top-width: 1px; + border-right-width: 1px; + border-bottom-width: 1px; + borde... [truncated message content] |
From: <den...@us...> - 2009-07-13 19:55:07
|
Revision: 125 http://opentrep.svn.sourceforge.net/opentrep/?rev=125&view=rev Author: denis_arnaud Date: 2009-07-13 19:55:04 +0000 (Mon, 13 Jul 2009) Log Message: ----------- [Indexer] The words are no longer indexed with positions within documents. Modified Paths: -------------- trunk/opentrep/opentrep/command/IndexBuilder.cpp trunk/opentrep/test/xapian/string_search.cpp Modified: trunk/opentrep/opentrep/command/IndexBuilder.cpp =================================================================== --- trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-07-13 18:03:01 UTC (rev 124) +++ trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-07-13 19:55:04 UTC (rev 125) @@ -48,15 +48,18 @@ (lCityCode.empty())?lPlaceCode:lCityCode; const std::string& lStateCode = ioPlace.getStateCode(); const std::string lDBStateCode = (lStateCode.empty())?"NA":lStateCode; + + // Word index/position within the Xapian document + unsigned short idx = 1; // Add indexing terms - lDocument.add_posting (lPlaceCode, 1); - lDocument.add_posting (lDBCityCode, 2); - lDocument.add_posting (lDBStateCode, 3); - lDocument.add_posting (ioPlace.getCountryCode(), 4); - lDocument.add_posting (ioPlace.getRegionCode(), 5); - lDocument.add_posting (ioPlace.getContinentCode(), 6); - lDocument.add_posting (ioPlace.getTimeZoneGroup(), 7); + lDocument.add_term (lPlaceCode); ++idx; + lDocument.add_term (lDBCityCode); ++idx; + lDocument.add_term (lDBStateCode); ++idx; + lDocument.add_term (ioPlace.getCountryCode()); ++idx; + lDocument.add_term (ioPlace.getRegionCode()); ++idx; + lDocument.add_term (ioPlace.getContinentCode()); ++idx; + lDocument.add_term (ioPlace.getTimeZoneGroup()); ++idx; // Add terms to the spelling dictionnary ioDatabase.add_spelling (lPlaceCode); @@ -65,18 +68,20 @@ ioDatabase.add_spelling (lStateCode); } - // Retrieve the map of name lists - unsigned int i = 1; - const NameMatrix_T& lNameMatrix = ioPlace.getNameMatrix(); + // Retrieve the place names in all the available languages + const NameMatrix_T& lNameMatrix = ioPlace.getNameMatrix (); for (NameMatrix_T::const_iterator itNameList = lNameMatrix.begin(); - itNameList != lNameMatrix.end(); ++itNameList, ++i) { + itNameList != lNameMatrix.end(); ++itNameList) { + // Retrieve the language code and locale + const Language::EN_Language& lLanguage = itNameList->first; const Names& lNames = itNameList->second; - // Add the language code (e.g., en_US) - lDocument.add_posting (lNames.describeShortKey(), 7+i); - ++i; + // Add that language code and locale to the Xapian document + lDocument.add_term (Language::getLongLabel (lLanguage)); ++idx; + // For a given language, retrieve the list of place names const NameList_T& lNameList = lNames.getNameList(); + for (NameList_T::const_iterator itName = lNameList.begin(); itName != lNameList.end(); ++itName) { const std::string& lName = *itName; @@ -84,9 +89,9 @@ // Add the place name (it can be the classical one, or // extended, alternate, etc.) if (lName.empty() == false) { - lDocument.add_posting (lName, 8+i); + // OPENTREP_LOG_DEBUG ("Added name: " << lName); + lDocument.add_term (lName); ++idx; ioDatabase.add_spelling (lName); - ++i; } } } Modified: trunk/opentrep/test/xapian/string_search.cpp =================================================================== --- trunk/opentrep/test/xapian/string_search.cpp 2009-07-13 18:03:01 UTC (rev 124) +++ trunk/opentrep/test/xapian/string_search.cpp 2009-07-13 19:55:04 UTC (rev 125) @@ -28,6 +28,7 @@ for (int idx=2; idx != argc; ++idx) { if (idx != 2) { oStr << " "; +// oStr << " AND "; } const std::string lWord (argv[idx]); const std::string lSuggestedWord = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-13 18:03:11
|
Revision: 124 http://opentrep.svn.sourceforge.net/opentrep/?rev=124&view=rev Author: denis_arnaud Date: 2009-07-13 18:03:01 +0000 (Mon, 13 Jul 2009) Log Message: ----------- [DB] Updated the MySQL database scripts for the user creation. Modified Paths: -------------- trunk/opentrep/refdata/mysql/create_opentrep_user.sh trunk/opentrep/refdata/mysql/create_opentrep_user.sql Modified: trunk/opentrep/refdata/mysql/create_opentrep_user.sh =================================================================== --- trunk/opentrep/refdata/mysql/create_opentrep_user.sh 2009-07-13 00:11:02 UTC (rev 123) +++ trunk/opentrep/refdata/mysql/create_opentrep_user.sh 2009-07-13 18:03:01 UTC (rev 124) @@ -43,8 +43,10 @@ function createOpenTrepUser() { echo "Creating the opentrep user within the database:" mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} ${DB_NAME} < ${SQL_FILE} + mysql -u ${DB_USER} --password=${DB_PASSWD} -P ${DB_PORT} -h ${DB_HOST} -e "flush privileges" } # Creating the opentrep user SQL_FILE="create_opentrep_user.sql" createOpenTrepUser + Modified: trunk/opentrep/refdata/mysql/create_opentrep_user.sql =================================================================== --- trunk/opentrep/refdata/mysql/create_opentrep_user.sql 2009-07-13 00:11:02 UTC (rev 123) +++ trunk/opentrep/refdata/mysql/create_opentrep_user.sql 2009-07-13 18:03:01 UTC (rev 124) @@ -1,3 +1,7 @@ + insert into `user` (`Host`, `User`, `Password`, `Select_priv`, `Insert_priv`, `Update_priv`, `Delete_priv`, `Create_priv`, `Drop_priv`, `Reload_priv`, `Shutdown_priv`, `Process_priv`, `File_priv`, `Grant_priv`, `References_priv`, `Index_priv`, `Alter_priv`, `Show_db_priv`, `Super_priv`, `Create_tmp_table_priv`, `Lock_tables_priv`, `Execute_priv`, `Repl_slave_priv`, `Repl_client_priv`, `Create_view_priv`, `Show_view_priv`, `Create_routine_priv`, `Alter_routine_priv`, `Create_user_priv`, `ssl_type`, `ssl_cipher`, `x509_issuer`, `x509_subject`, `max_questions`, `max_updates`, `max_connections`, `max_user_connections`) values ('%', 'opentrep', '*C21B5F0DB6BBABAA20B5496E75D652982A6AC65C', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'Y', 'Y', 'N', '', '', '', '', 0, 0, 0, 0), ('localhost', 'opentrep', '*C21B5F0DB6BBABAA20B5496E75D652982A6AC65C', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'N', 'N', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'N', 'N', 'Y', 'N', 'Y', 'N', 'N', 'Y', 'Y', 'Y', 'Y', 'N', '', '', '', '', 0, 0, 0, 0); + +flush privileges; + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-13 00:11:12
|
Revision: 123 http://opentrep.svn.sourceforge.net/opentrep/?rev=123&view=rev Author: denis_arnaud Date: 2009-07-13 00:11:02 +0000 (Mon, 13 Jul 2009) Log Message: ----------- [Dev] Added the Ternary-Tree project, and a few examples within the parsers sub-directory. Modified Paths: -------------- trunk/opentrep/test/parsers/search_string_parser.cpp Added Paths: ----------- trunk/opentrep/test/parsers/levenshtein_new.cpp trunk/opentrep/tools/ trunk/ternary_tree/ trunk/ternary_tree/README trunk/ternary_tree/doxygen_input/ trunk/ternary_tree/doxygen_input/blather.hpp trunk/ternary_tree/doxygen_input/concepts.txt trunk/ternary_tree/doxygen_input/doxygen-old.css trunk/ternary_tree/doxygen_input/doxygen.css trunk/ternary_tree/doxygen_input/external.png trunk/ternary_tree/doxygen_input/featuretable.html trunk/ternary_tree/doxygen_input/footer_inc.html trunk/ternary_tree/doxygen_input/header_inc.html trunk/ternary_tree/doxygen_input/performancetable.html trunk/ternary_tree/doxygen_input/tree - trie concepts.txt trunk/ternary_tree/doxygen_input/usage.hpp trunk/ternary_tree/examples/ trunk/ternary_tree/examples/examples.vcproj trunk/ternary_tree/examples/locale_less.hpp trunk/ternary_tree/examples.cpp trunk/ternary_tree/fill_dictionary.cpp trunk/ternary_tree/full-docs-index.html trunk/ternary_tree/html/ trunk/ternary_tree/html/annotated.html trunk/ternary_tree/html/blather_8hpp.html trunk/ternary_tree/html/class_data_t.html trunk/ternary_tree/html/class_data_t_01_5.html trunk/ternary_tree/html/classcontainers_1_1search__results__list-members.html trunk/ternary_tree/html/classcontainers_1_1search__results__list.html trunk/ternary_tree/html/classcontainers_1_1search__results__list_1_1iterator-members.html trunk/ternary_tree/html/classcontainers_1_1search__results__list_1_1iterator.html trunk/ternary_tree/html/classcontainers_1_1structured__map-members.html trunk/ternary_tree/html/classcontainers_1_1structured__map.html trunk/ternary_tree/html/classcontainers_1_1structured__map_1_1value__compare-members.html trunk/ternary_tree/html/classcontainers_1_1structured__map_1_1value__compare.html trunk/ternary_tree/html/classcontainers_1_1structured__multimap-members.html trunk/ternary_tree/html/classcontainers_1_1structured__multimap.html trunk/ternary_tree/html/classcontainers_1_1structured__multimap_1_1value__compare-members.html trunk/ternary_tree/html/classcontainers_1_1structured__multimap_1_1value__compare.html trunk/ternary_tree/html/classcontainers_1_1structured__multiset-members.html trunk/ternary_tree/html/classcontainers_1_1structured__multiset.html trunk/ternary_tree/html/classcontainers_1_1structured__set-members.html trunk/ternary_tree/html/classcontainers_1_1structured__set.html trunk/ternary_tree/html/classcontainers_1_1ternary__tree-members.html trunk/ternary_tree/html/classcontainers_1_1ternary__tree.html trunk/ternary_tree/html/classcontainers_1_1ternary__tree_1_1key__compare-members.html trunk/ternary_tree/html/classcontainers_1_1ternary__tree_1_1key__compare.html trunk/ternary_tree/html/classcontainers_1_1tst__detail_1_1_base_t.html trunk/ternary_tree/html/classcontainers_1_1tst__detail_1_1tst__iterator__base-members.html trunk/ternary_tree/html/classcontainers_1_1tst__detail_1_1tst__iterator__base.html trunk/ternary_tree/html/classstd_1_1back__insert__iterator.html trunk/ternary_tree/html/classstd_1_1binary__function.html trunk/ternary_tree/html/dir_0df55976ff011c1ef61da79183e9e28f.html trunk/ternary_tree/html/dir_59457a7c227558cb0e28f31428e14f54.html trunk/ternary_tree/html/dirs.html trunk/ternary_tree/html/doxygen.css trunk/ternary_tree/html/doxygen.png trunk/ternary_tree/html/files.html trunk/ternary_tree/html/functions.html trunk/ternary_tree/html/functions_0x62.html trunk/ternary_tree/html/functions_0x63.html trunk/ternary_tree/html/functions_0x64.html trunk/ternary_tree/html/functions_0x65.html trunk/ternary_tree/html/functions_0x66.html trunk/ternary_tree/html/functions_0x67.html trunk/ternary_tree/html/functions_0x68.html trunk/ternary_tree/html/functions_0x69.html trunk/ternary_tree/html/functions_0x6b.html trunk/ternary_tree/html/functions_0x6c.html trunk/ternary_tree/html/functions_0x6d.html trunk/ternary_tree/html/functions_0x6e.html trunk/ternary_tree/html/functions_0x6f.html trunk/ternary_tree/html/functions_0x70.html trunk/ternary_tree/html/functions_0x72.html trunk/ternary_tree/html/functions_0x73.html trunk/ternary_tree/html/functions_0x74.html trunk/ternary_tree/html/functions_0x75.html trunk/ternary_tree/html/functions_0x76.html trunk/ternary_tree/html/functions_0x77.html trunk/ternary_tree/html/functions_0x7e.html trunk/ternary_tree/html/functions_enum.html trunk/ternary_tree/html/functions_eval.html trunk/ternary_tree/html/functions_func.html trunk/ternary_tree/html/functions_func_0x62.html trunk/ternary_tree/html/functions_func_0x63.html trunk/ternary_tree/html/functions_func_0x64.html trunk/ternary_tree/html/functions_func_0x65.html trunk/ternary_tree/html/functions_func_0x66.html trunk/ternary_tree/html/functions_func_0x67.html trunk/ternary_tree/html/functions_func_0x68.html trunk/ternary_tree/html/functions_func_0x69.html trunk/ternary_tree/html/functions_func_0x6b.html trunk/ternary_tree/html/functions_func_0x6c.html trunk/ternary_tree/html/functions_func_0x6d.html trunk/ternary_tree/html/functions_func_0x6e.html trunk/ternary_tree/html/functions_func_0x6f.html trunk/ternary_tree/html/functions_func_0x70.html trunk/ternary_tree/html/functions_func_0x72.html trunk/ternary_tree/html/functions_func_0x73.html trunk/ternary_tree/html/functions_func_0x74.html trunk/ternary_tree/html/functions_func_0x75.html trunk/ternary_tree/html/functions_func_0x76.html trunk/ternary_tree/html/functions_func_0x7e.html trunk/ternary_tree/html/functions_rela.html trunk/ternary_tree/html/functions_type.html trunk/ternary_tree/html/functions_type_0x62.html trunk/ternary_tree/html/functions_type_0x63.html trunk/ternary_tree/html/functions_type_0x64.html trunk/ternary_tree/html/functions_type_0x66.html trunk/ternary_tree/html/functions_type_0x68.html trunk/ternary_tree/html/functions_type_0x69.html trunk/ternary_tree/html/functions_type_0x6b.html trunk/ternary_tree/html/functions_type_0x6c.html trunk/ternary_tree/html/functions_type_0x6d.html trunk/ternary_tree/html/functions_type_0x6e.html trunk/ternary_tree/html/functions_type_0x70.html trunk/ternary_tree/html/functions_type_0x72.html trunk/ternary_tree/html/functions_type_0x73.html trunk/ternary_tree/html/functions_type_0x74.html trunk/ternary_tree/html/functions_type_0x76.html trunk/ternary_tree/html/functions_vars.html trunk/ternary_tree/html/globals.html trunk/ternary_tree/html/globals_defs.html trunk/ternary_tree/html/globals_func.html trunk/ternary_tree/html/graph_legend.dot trunk/ternary_tree/html/graph_legend.html trunk/ternary_tree/html/graph_legend.png trunk/ternary_tree/html/hierarchy.html trunk/ternary_tree/html/index.html trunk/ternary_tree/html/iteration__impl_8hpp.html trunk/ternary_tree/html/iterator__wrapper_8hpp.html trunk/ternary_tree/html/namespacecontainers.html trunk/ternary_tree/html/namespacecontainers_1_1smap__detail.html trunk/ternary_tree/html/namespacecontainers_1_1sset__detail.html trunk/ternary_tree/html/namespacecontainers_1_1tst__detail.html trunk/ternary_tree/html/namespacecontainers_1_1tst__detail_1_1mpl__detail.html trunk/ternary_tree/html/namespacecontainers_1_1tst__erase__impl__detail.html trunk/ternary_tree/html/namespacecontainers_1_1util.html trunk/ternary_tree/html/namespaceiterators.html trunk/ternary_tree/html/namespacemembers.html trunk/ternary_tree/html/namespacemembers_func.html trunk/ternary_tree/html/namespaces.html trunk/ternary_tree/html/namespacestd.html trunk/ternary_tree/html/new__iterator__base_8ipp.html trunk/ternary_tree/html/pages.html trunk/ternary_tree/html/perf_notes.html trunk/ternary_tree/html/structcontainers_1_1smap__detail_1_1multimap__iterator-members.html trunk/ternary_tree/html/structcontainers_1_1smap__detail_1_1multimap__iterator.html trunk/ternary_tree/html/structcontainers_1_1sset__detail_1_1multiset__iterator-members.html trunk/ternary_tree/html/structcontainers_1_1sset__detail_1_1multiset__iterator.html trunk/ternary_tree/html/structcontainers_1_1ternary__tree_1_1find__result-members.html trunk/ternary_tree/html/structcontainers_1_1ternary__tree_1_1find__result.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1always__heap__node-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1always__heap__node.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1back__push__pop-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1back__push__pop.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1dummy__sequence-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1dummy__sequence.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1heap__node-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1heap__node.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1inorder__seek-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1inorder__seek.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1inplace__node-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1inplace__node.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1iter__method__forward-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1iter__method__forward.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1key__access-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1key__access.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info_1_1search-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1levenshtein__search__info_1_1search.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c_3_01false_00_01_t1_00_01_t2_01_4-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1mpl__detail_1_1if__c_3_01false_00_01_t1_00_01_t2_01_4.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1node__base-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1node__base.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1size__policy__node-members.html trunk/ternary_tree/html/structcontainers_1_1tst__detail_1_1size__policy__node.html trunk/ternary_tree/html/structiterators_1_1const__traits-members.html trunk/ternary_tree/html/structiterators_1_1const__traits.html trunk/ternary_tree/html/structiterators_1_1iterator__wrapper-members.html trunk/ternary_tree/html/structiterators_1_1iterator__wrapper.html trunk/ternary_tree/html/structiterators_1_1nonconst__traits-members.html trunk/ternary_tree/html/structiterators_1_1nonconst__traits.html trunk/ternary_tree/html/structured__map_8hpp.html trunk/ternary_tree/html/structured__set_8hpp.html trunk/ternary_tree/html/structured_concept.html trunk/ternary_tree/html/tab_b.gif trunk/ternary_tree/html/tab_l.gif trunk/ternary_tree/html/tab_r.gif trunk/ternary_tree/html/tabs.css trunk/ternary_tree/html/ternary__tree_8hpp.html trunk/ternary_tree/html/todo.html trunk/ternary_tree/html/tst__implementation_8ipp.html trunk/ternary_tree/html/tst__iterator__base_8ipp.html trunk/ternary_tree/html/tst__iterator__facade_8hpp.html trunk/ternary_tree/html/tst__node_8hpp.html trunk/ternary_tree/html/tst__search__results_8ipp.html trunk/ternary_tree/html/tst_impl.html trunk/ternary_tree/html/tst_links.html trunk/ternary_tree/html/tst_reference.html trunk/ternary_tree/html/tst_tests.html trunk/ternary_tree/html/tst_usage.html trunk/ternary_tree/html/usage_8hpp.html trunk/ternary_tree/index.html trunk/ternary_tree/iterator_compile_test.cpp trunk/ternary_tree/iterator_wrapper.hpp trunk/ternary_tree/readme.txt trunk/ternary_tree/structured_map.hpp trunk/ternary_tree/structured_set.hpp trunk/ternary_tree/ternary_tree.hpp trunk/ternary_tree/test/ trunk/ternary_tree/test/basic_insertion_test.hpp trunk/ternary_tree/test/check_iteration.hpp trunk/ternary_tree/test/copy_test.hpp trunk/ternary_tree/test/element_range_test.hpp trunk/ternary_tree/test/erase_test.cpp trunk/ternary_tree/test/hamming_search_test.cpp trunk/ternary_tree/test/iterator_test.cpp trunk/ternary_tree/test/localization_test.cpp trunk/ternary_tree/test/longest_match_test.cpp trunk/ternary_tree/test/mapped_value_test.cpp trunk/ternary_tree/test/partial_match_test.cpp trunk/ternary_tree/test/prefix_range_test.cpp trunk/ternary_tree/test/scrabble_search_test.cpp trunk/ternary_tree/test/test.vcproj trunk/ternary_tree/test/test_tst.cpp trunk/ternary_tree/test/tests_common.hpp trunk/ternary_tree/tst.doxy trunk/ternary_tree/tst_concept_checks.cpp trunk/ternary_tree/tst_detail/ trunk/ternary_tree/tst_detail/iteration_impl.hpp trunk/ternary_tree/tst_detail/new_iterator_base.ipp trunk/ternary_tree/tst_detail/tst_implementation.ipp trunk/ternary_tree/tst_detail/tst_iterator_base.ipp trunk/ternary_tree/tst_detail/tst_iterator_facade.hpp trunk/ternary_tree/tst_detail/tst_node.hpp trunk/ternary_tree/tst_detail/tst_search_results.ipp trunk/ternary_tree/tst_public.doxy Added: trunk/opentrep/test/parsers/levenshtein_new.cpp =================================================================== --- trunk/opentrep/test/parsers/levenshtein_new.cpp (rev 0) +++ trunk/opentrep/test/parsers/levenshtein_new.cpp 2009-07-13 00:11:02 UTC (rev 123) @@ -0,0 +1,316 @@ +// +// Source: http://www.codeproject.com/KB/recipes/Levenshtein.aspx +// +using System; +using System.Collections.Generic; +using System.Text; + +namespace Test +{ + class Program + { + static void Main(string[] args) + { + string sNew = @"GAMBOL +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +"; + string sOld = @"GUMBO +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 +"; + + + try + { + Levenshtein l = new Levenshtein(); + + + while (true) + { + /// Original version + /// + /// + /* Read the initial time. */ + DateTime startTime = DateTime.Now; + + l.LD(sNew, sOld); + + /* Read the end time. */ + DateTime stopTime = DateTime.Now; + + /* Compute the duration between the initial and the end time. */ + TimeSpan duration = stopTime - startTime; + Console.WriteLine("Original:"+duration); + + + /// New version + /// + /// + /* Read the initial time. */ + startTime = DateTime.Now; + + l.iLD(sNew, sOld); + + /* Read the end time. */ + stopTime = DateTime.Now; + + /* Compute the duration between the initial and the end time. */ + duration = stopTime - startTime; + Console.WriteLine("New :"+duration); + + Console.WriteLine("----------------"); + } + } + catch (Exception e) + { + System.Console.WriteLine( e.ToString()); + } + + } + } + + public class Levenshtein + { + + + ///***************************** + /// Compute Levenshtein distance + /// Memory efficient version + ///***************************** + public int iLD(String sRow, String sCol) + { + int RowLen = sRow.Length; // length of sRow + int ColLen = sCol.Length; // length of sCol + int RowIdx; // iterates through sRow + int ColIdx; // iterates through sCol + char Row_i; // ith character of sRow + char Col_j; // jth character of sCol + int cost; // cost + + /// Test string length + if (Math.Max(sRow.Length, sCol.Length) > Math.Pow(2, 31)) + throw (new Exception("\nMaximum string length in Levenshtein.iLD is " + Math.Pow(2, 31) + ".\nYours is " + Math.Max(sRow.Length, sCol.Length) + ".")); + + // Step 1 + + if (RowLen == 0) + { + return ColLen; + } + + if (ColLen == 0) + { + return RowLen; + } + + /// Create the two vectors + int[] v0 = new int[RowLen + 1]; + int[] v1 = new int[RowLen + 1]; + int[] vTmp; + + + + /// Step 2 + /// Initialize the first vector + for (RowIdx = 1; RowIdx <= RowLen; RowIdx++) + { + v0[RowIdx] = RowIdx; + } + + // Step 3 + + /// Fore each column + for (ColIdx = 1; ColIdx <= ColLen; ColIdx++) + { + /// Set the 0'th element to the column number + v1[0] = ColIdx; + + Col_j = sCol[ColIdx - 1]; + + + // Step 4 + + /// Fore each row + for (RowIdx = 1; RowIdx <= RowLen; RowIdx++) + { + Row_i = sRow[RowIdx - 1]; + + + // Step 5 + + if (Row_i == Col_j) + { + cost = 0; + } + else + { + cost = 1; + } + + // Step 6 + + /// Find minimum + int m_min = v0[RowIdx] + 1; + int b = v1[RowIdx - 1] + 1; + int c = v0[RowIdx - 1] + cost; + + if (b < m_min) + { + m_min = b; + } + if (c < m_min) + { + m_min = c; + } + + v1[RowIdx] = m_min; + } + + /// Swap the vectors + vTmp = v0; + v0 = v1; + v1 = vTmp; + + } + + + // Step 7 + + /// Value between 0 - 100 + /// 0==perfect match 100==totaly different + /// + /// The vectors where swaped one last time at the end of the last loop, + /// that is why the result is now in v0 rather than in v1 + System.Console.WriteLine("iDist=" + v0[RowLen]); + int max = System.Math.Max(RowLen, ColLen); + return ((100 * v0[RowLen]) / max); + } + + + + + + ///***************************** + /// Compute the min + ///***************************** + + private int Minimum(int a, int b, int c) + { + int mi = a; + + if (b < mi) + { + mi = b; + } + if (c < mi) + { + mi = c; + } + + return mi; + } + + ///***************************** + /// Compute Levenshtein distance + ///***************************** + + public int LD(String sNew, String sOld) + { + int[,] matrix; // matrix + int sNewLen = sNew.Length; // length of sNew + int sOldLen = sOld.Length; // length of sOld + int sNewIdx; // iterates through sNew + int sOldIdx; // iterates through sOld + char sNew_i; // ith character of sNew + char sOld_j; // jth character of sOld + int cost; // cost + + /// Test string length + if (Math.Max(sNew.Length, sOld.Length) > Math.Pow(2, 31)) + throw (new Exception("\nMaximum string length in Levenshtein.LD is " + Math.Pow(2, 31) + ".\nYours is " + Math.Max(sNew.Length, sOld.Length) + ".")); + + // Step 1 + + if (sNewLen == 0) + { + return sOldLen; + } + + if (sOldLen == 0) + { + return sNewLen; + } + + matrix = new int[sNewLen + 1, sOldLen + 1]; + + // Step 2 + + for (sNewIdx = 0; sNewIdx <= sNewLen; sNewIdx++) + { + matrix[sNewIdx, 0] = sNewIdx; + } + + for (sOldIdx = 0; sOldIdx <= sOldLen; sOldIdx++) + { + matrix[0, sOldIdx] = sOldIdx; + } + + // Step 3 + + for (sNewIdx = 1; sNewIdx <= sNewLen; sNewIdx++) + { + sNew_i = sNew[sNewIdx - 1]; + + // Step 4 + + for (sOldIdx = 1; sOldIdx <= sOldLen; sOldIdx++) + { + sOld_j = sOld[sOldIdx - 1]; + + // Step 5 + + if (sNew_i == sOld_j) + { + cost = 0; + } + else + { + cost = 1; + } + + // Step 6 + + matrix[sNewIdx, sOldIdx] = Minimum(matrix[sNewIdx - 1, sOldIdx] + 1, matrix[sNewIdx, sOldIdx - 1] + 1, matrix[sNewIdx - 1, sOldIdx - 1] + cost); + + } + } + + // Step 7 + + /// Value between 0 - 100 + /// 0==perfect match 100==totaly different + System.Console.WriteLine("Dist=" + matrix[sNewLen, sOldLen]); + int max = System.Math.Max(sNewLen, sOldLen); + return (100 * matrix[sNewLen, sOldLen]) / max; + } + } + + +} Modified: trunk/opentrep/test/parsers/search_string_parser.cpp =================================================================== --- trunk/opentrep/test/parsers/search_string_parser.cpp 2009-07-12 16:27:02 UTC (rev 122) +++ trunk/opentrep/test/parsers/search_string_parser.cpp 2009-07-13 00:11:02 UTC (rev 123) @@ -81,7 +81,9 @@ Airline_T () : _isPreferred (true), _name(""), _code("") {} /* Display. */ void display() const { - std::cout << "Airline: " << _name << " (" << _code << ")" << std::endl; + const std::string isPreferredStr = (_isPreferred)?"+":"-"; + std::cout << "Airline: " << isPreferredStr << _name << " (" << _code << ")" + << std::endl; } }; @@ -210,7 +212,7 @@ : _searchString (ioSearchString) {} void operator() (bool iAirlineSign) const { - _searchString._tmpAirline._isPreferred = iAirlineSign; + _searchString._tmpAirline._isPreferred = !iAirlineSign; // std::cout << "Airline is preferred: " << iAirlineSign << std::endl; } @@ -281,6 +283,40 @@ SearchString_T& _searchString; }; + /** Store the parsed passenger type. */ + struct store_child_passenger_type { + store_child_passenger_type (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lPassengerType (iStr, iStrEnd); + _searchString._tmpPassenger._type = Passenger_T::CHILD; + // std::cout << "Passenger type: " << lPassengerType << std::endl; + + // Add the parsed passenger to the list + _searchString._passengerList.push_back (_searchString._tmpPassenger); + } + + SearchString_T& _searchString; + }; + + /** Store the parsed passenger type. */ + struct store_pet_passenger_type { + store_pet_passenger_type (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lPassengerType (iStr, iStrEnd); + _searchString._tmpPassenger._type = Passenger_T::PET; + // std::cout << "Passenger type: " << lPassengerType << std::endl; + + // Add the parsed passenger to the list + _searchString._passengerList.push_back (_searchString._tmpPassenger); + } + + SearchString_T& _searchString; + }; + } // /////////// Utilities ///////////// @@ -288,8 +324,12 @@ boost::spirit::int_parser<unsigned int, 10, 1, 1> int1_p; /** 1-digit-integer parser */ boost::spirit::uint_parser<unsigned int, 10, 1, 1> uint1_p; +/** Up-to-2-digit-integer parser */ +boost::spirit::uint_parser<unsigned int, 10, 1, 2> uint1_2_p; /** 2-digit-integer parser */ boost::spirit::uint_parser<int, 10, 2, 2> uint2_p; +/** Up-to-4-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 2, 4> uint2_4_p; /** 4-digit-integer parser */ boost::spirit::uint_parser<int, 10, 4, 4> uint4_p; /** Up-to-4-digit-integer parser */ @@ -360,13 +400,26 @@ ; date = - lexeme_d[ limit_d(1u,12u)[uint2_p][assign_a(self._searchString._tmpDate._month)] - >> boost::spirit::chset_p("/-") - >> limit_d(1u,31u)[uint2_p][assign_a(self._searchString._tmpDate._day)] - >> boost::spirit::chset_p("/-") - >> limit_d(2000u,2099u)[uint4_p][assign_a(self._searchString._tmpDate._year)]] + ( month | day ) + >> boost::spirit::chset_p("/-") + >> ( day | month ) + >> ! ( boost::spirit::chset_p("/-") + >> year ) ; + day = + lexeme_d[ limit_d(1u,31u)[uint1_2_p][assign_a(self._searchString._tmpDate._day)] ] + ; + + month = + lexeme_d[ limit_d(1u,12u)[uint1_2_p][assign_a(self._searchString._tmpDate._month)] ] + ; + + year = + lexeme_d[ limit_d(2000u,2099u)[uint4_p][assign_a(self._searchString._tmpDate._year)] ] + | lexeme_d[ limit_d(0u,99u)[uint2_p][assign_a(self._searchString._tmpDate._year)] ] + ; + preferred_airlines = !(boost::spirit::sign_p)[store_airline_sign(self._searchString)] >> airline_code | airline_name @@ -389,25 +442,47 @@ ; passenger_type = - lexeme_d[ (repeat_p(3,20)[chset_p("0-9a-z")])[store_adult_passenger_type(self._searchString)] ] + passenger_adult_type[store_adult_passenger_type(self._searchString)] + | passenger_child_type[store_child_passenger_type(self._searchString)] + | passenger_pet_type[store_pet_passenger_type(self._searchString)] ; + passenger_adult_type = + lexeme_d[ as_lower_d [ str_p("adult") >> !ch_p('s') ] ] + ; + + passenger_child_type = + lexeme_d[ as_lower_d [ str_p("child") >> !str_p("ren") ] ] + ; + + passenger_pet_type = + lexeme_d[ as_lower_d [ str_p("dog") | str_p("cat") >> !ch_p('s') ] ] + ; + BOOST_SPIRIT_DEBUG_NODE (search_string); BOOST_SPIRIT_DEBUG_NODE (places); BOOST_SPIRIT_DEBUG_NODE (place_element); BOOST_SPIRIT_DEBUG_NODE (dates); BOOST_SPIRIT_DEBUG_NODE (date); + BOOST_SPIRIT_DEBUG_NODE (day); + BOOST_SPIRIT_DEBUG_NODE (month); + BOOST_SPIRIT_DEBUG_NODE (year); BOOST_SPIRIT_DEBUG_NODE (preferred_airlines); BOOST_SPIRIT_DEBUG_NODE (airline_code); BOOST_SPIRIT_DEBUG_NODE (airline_name); BOOST_SPIRIT_DEBUG_NODE (passengers); BOOST_SPIRIT_DEBUG_NODE (passenger_number); BOOST_SPIRIT_DEBUG_NODE (passenger_type); + BOOST_SPIRIT_DEBUG_NODE (passenger_adult_type); + BOOST_SPIRIT_DEBUG_NODE (passenger_child_type); + BOOST_SPIRIT_DEBUG_NODE (passenger_pet_type); } boost::spirit::rule<ScannerT> search_string, places, place_element, - dates, date, preferred_airlines, airline_code, airline_name, - passengers, passenger_number, passenger_type; + dates, date, month, day, year, + preferred_airlines, airline_code, airline_name, + passengers, passenger_number, passenger_type, passenger_adult_type, + passenger_child_type, passenger_pet_type; boost::spirit::rule<ScannerT> const& start() const { return search_string; } }; @@ -415,42 +490,93 @@ SearchString_T& _searchString; }; +// ////////////////////////////////////////////////////////// +bool parseSearchString (std::ostream& oStream, + const std::string& iSearchString, + SearchString_T& ioSearchStringStruct) { + bool oHasBeenParsingSuccessful = false; + + // Read the search string + iterator_t lStringIterator = iSearchString.c_str(); + + // Instantiate the structure that will hold the result of the parsing. + SearchStringParser lSearchStringParser (ioSearchStringStruct); + boost::spirit::parse_info<iterator_t> info = + boost::spirit::parse (lStringIterator, lSearchStringParser, + boost::spirit::space_p); + + oStream << "-------------------------" << std::endl; + + oHasBeenParsingSuccessful = info.full; + if (oHasBeenParsingSuccessful == true) { + oStream << "Parsing succeeded" << std::endl; + + } else { + oStream << "Parsing failed" << std::endl; + } + oStream << "-------------------------" << std::endl; + + return oHasBeenParsingSuccessful; +} + // /////////////// M A I N ///////////////// int main (int argc, char* argv[]) { try { - // File to be parsed - std::string lSearchString ("guadeloupe rio de janeiro 07/22/2009 +aa -ua 2 adults 1 dog"); - - // Read the command-line parameters - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lSearchString; - } + // String to be parsed + const std::string lSearchString1 ("guadeloupe rio de janeiro 07/22/2009 +aa -ua 2 adults 3 children 1 dog"); + const std::string lSearchString2 ("nce rekjavik 07-22-2009 +aa -ua 2 ADULTS 3 CHILDREN 1 DOG"); + const std::string lSearchString3 ("sna francisco ods 07.22.2009 +aa"); + const std::string lSearchString4 ("nce ods 07/22 +aa"); + const std::string lSearchString5 ("nce ods 22/07 +aa"); - // Read the search string - iterator_t lStringIterator = lSearchString.c_str(); + // Parse the search string + SearchString_T lSearchStringStruct1; + bool hasBeenParsingSuccessful = parseSearchString (std::cout, + lSearchString1, + lSearchStringStruct1); - // Instantiate the structure that will hold the result of the parsing. - SearchString_T lSearchStringStruct; - SearchStringParser lSearchStringParser (lSearchStringStruct); - boost::spirit::parse_info<iterator_t> info = - boost::spirit::parse (lStringIterator, lSearchStringParser, - boost::spirit::space_p); + // Parse the search string + SearchString_T lSearchStringStruct2; + hasBeenParsingSuccessful = parseSearchString (std::cout, lSearchString2, + lSearchStringStruct2); + // Parse the search string + SearchString_T lSearchStringStruct3; + hasBeenParsingSuccessful = parseSearchString (std::cout, lSearchString3, + lSearchStringStruct3); + + // Parse the search string + SearchString_T lSearchStringStruct4; + hasBeenParsingSuccessful = parseSearchString (std::cout, lSearchString4, + lSearchStringStruct4); + + // Parse the search string + SearchString_T lSearchStringStruct5; + hasBeenParsingSuccessful = parseSearchString (std::cout, lSearchString1, + lSearchStringStruct5); + // DEBUG - std::cout << "Search string:" << std::endl; - lSearchStringStruct.display(); + std::cout << "--------------------------------------------" << std::endl; + std::cout << "Search string1: " << lSearchString1 << std::endl; + lSearchStringStruct1.display(); - std::cout << "-------------------------" << std::endl; - if (info.full) { - std::cout << "Parsing succeeded" << std::endl; - - } else { - std::cout << "Parsing failed" << std::endl; - } - std::cout << "-------------------------" << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::cout << "Search string2: " << lSearchString2 << std::endl; + lSearchStringStruct2.display(); + std::cout << "--------------------------------------------" << std::endl; + std::cout << "Search string3: " << lSearchString3 << std::endl; + lSearchStringStruct3.display(); + + std::cout << "--------------------------------------------" << std::endl; + std::cout << "Search string4: " << lSearchString4 << std::endl; + lSearchStringStruct4.display(); + + std::cout << "--------------------------------------------" << std::endl; + std::cout << "Search string5: " << lSearchString5 << std::endl; + lSearchStringStruct5.display(); + } catch (const std::exception& stde) { std::cerr << "Standard exception: " << stde.what() << std::endl; return -1; Added: trunk/ternary_tree/README =================================================================== --- trunk/ternary_tree/README (rev 0) +++ trunk/ternary_tree/README 2009-07-13 00:11:02 UTC (rev 123) @@ -0,0 +1,3 @@ + +Source: http://abc.se/~re/code/tst and http://abc.se/~re/code/tst/ternary_tree.zip + Added: trunk/ternary_tree/doxygen_input/blather.hpp =================================================================== --- trunk/ternary_tree/doxygen_input/blather.hpp (rev 0) +++ trunk/ternary_tree/doxygen_input/blather.hpp 2009-07-13 00:11:02 UTC (rev 123) @@ -0,0 +1,558 @@ +/** \mainpage Structured Associative Containers + +Ternary Search Tree containers to replace \c set<string> and \c map<string, Value> </h2> + +<center><table bgcolor="#fbf9e5" style="border: thin dotted #808000;" width="95%" border=0> +<tr> +<td> +<h3>Table of contents</h3> +<dl> + <dt>\ref introduction "Introduction"</dt> + <dt>\ref subkey_search_overview "Advanced searches overview"</dt> + <dt>\ref tst_usage "Tutorial"</dt> + <dt>\ref tst_reference "Reference"</dt> <dd> + <dd>\ref structured_concept "Structured Container concept" \n + Class \ref containers::structured_set "structured_set" \n + Class \ref containers::structured_map "structured_map" \n + Class \ref containers::structured_multiset "structured_multiset" \n + Class \ref containers::structured_multimap "structured_multimap" \n + Implementation class \ref containers::ternary_tree "ternary_tree" + </dd></dt> + <dt>\ref perf_notes "Performance notes"</dt> + <dt>\ref tst_impl "Implementation details"</dt> + <dt>\ref tst_links "Links"</dt> + <dt>\ref tst_tests "Test Suite"</dt> +</dl> +</td> +</tr></table></center> + +Download: Latest version (0.684) http://abc.se/~re/code/tst/ternary_tree.zip\n + +Copyleft: <a href="mailto:rasmus%20point%20ekman%20at%20abc%20point%20se?subject=Structured Containers suck/rule"> +rasmus ekman</a> 2007-2009 \n +Weblink: http://abc.se/~re/code/tst + +\anchor introduction <hr> +<h2>Introduction</h2> +<b>Structured containers</b> are \c map and \c set -like containers specialized for strings. +They are commonly used for dictionaries.\n +Structured containers have two major benefits: +- They offer near-match searches (wildcard search, partial match etc) that are hard to implement + with other containers. +- Lookup performance is on a par with hashed containers for many common applications, +and 2-5 times faster than standard maps and sets (with string-like keys). + +Of course there is a price to pay: structured containers use much more memory than +other containers: Around 6-8 bytes <b>per letter</b> inserted (whether \c char or \c wchar_t); +an English 150 k word dictionary uses eg 7.3 MB to store 1.2 MB words (2.4 MB of \c wchar_t words). + +The container classes in this library can be used as drop-in replacements for \c set and \c map +(or \c unordered_set, \c unordered_map): + - \ref containers::structured_set "structured_set": This stores unique keys and allows structured key searches. + - \ref containers::structured_multiset "structured_multiset": This stores non-unique keys. + - \ref containers::structured_map "structured_map": This is a + <a target="sgi" href="http://www.sgi.com/tech/stl/PairAssociativeContainer.html">Pair Associative Container</a>, + as it allows associating a value with each key. + - \ref containers::structured_multimap "structured_multimap": Technically, a + <a target="sgi" href="http://www.sgi.com/tech/stl/MultipleSortedAssociativeContainer.html">Multiple, Sorted, + Pair Associative Container</a> - it allows storing several values with each key. + +While the STL standard associative containers are normally backed by a binary tree structure, +Structured Containers are backed by a Ternary Search Tree, as presented by +\ref note_1 "Jon Bentley and Robert Sedgewick in [1]". + +Class \ref containers::ternary_tree "ternary_tree<Key, Value, Comp, Alloc>" provides the implementation backend. +Due to its internals, its interface cannot easily be made to conform with standard STL concepts, +so it is used internally by the structured* wrapper classes (much like STL's internal \c rb_tree class). + +Basically, if you have code using sets or maps, you have code to use structured containers. +And with 1-3 lines of code, you're ready to make advanced imprecise searches in your dictionaries.\n +See \ref tst_usage "the usage section" for examples of how to use these classes. + +<table bgcolor="#f0f0ff" style="border: thin dotted #808000;" border=0> +<tr><th>Library status</th></tr> +<tr><td valign="top" align="right">Compatibility:</td> +<td>Note that the file \b tst_concept_checks.cpp is currently broken. Will investigate.\n +<!-- This used to compile with Mingw GCC 3.4.2 and with MSVC7.1 (with STLport 5). Requires Boost 1.33. +Not sure what happened in Boost 1.36-37 or if I've mangled something. \n +Due to recent changes, ternary tree does not support stateful allocators (earlier versions did this by implication) --> +</td> +<tr><td valign="top" align="right">version 0.684: (Jan 2009)</td> +<td>Fix standard-breakage in multimap/multiset return from <code>insert(const value_type&)</code>.<br> +Added <code>operator-></code> to iterator wrapper for C++0x compatibility. +Thanks to Geoffrey Noel for reports.</td> +</tr> +<tr><td valign="top" align="right">version 0.683: (March 2007)</td> +<td>Fix portability issues for GCC and non-STLport libraries. Fix longest_match.<br> +Thanks to Arjen Wagenaar for several reports, fixes and encouragement. Thanks also to Michel Tourn for reports.</td> +</tr> +<tr><td valign="top" align="right">version 0.68: (Dec 2006)</td> +<td>Implement TST_NODE_COUNT_TYPE macro, which can be used to control node size on 64-bit systems. + See \ref containers::ternary_tree "class ternary_tree"</td> +</tr> +<tr><td valign="top" align="right">version 0.68 (alpha):</td> +<td>Reimplemented node type. Do proper management of value type (was inconsistent, partly unimplemented - duh!)</td> +</tr> +<!--tr> +<tr><td valign="top" align="right">version 0.676:</td> +<td>Modified containers to follow C++0x draft standard: \n +Added \c cbegin, \c cend methods returning \c const_iterator, and \c crbegin, \c crend +returning \c const_reverse_iterator, to make it easier to code with const-correctness. \n +\c erase(iterator pos); and \c erase(iterator first, iterator last); methods now return iterators.</td> +<tr><td valign="top" align="right">version 0.675:</td> +<td>All Structured Container classes implemented. Structured search interface TBD. +</td--> +</table> + + +\anchor subkey_search_overview <hr> +<h2>Sub-key, or Structure Searches</h2> +<span style="color:#905050;">(a new interface for these searches will be specified in the future)</span> + +Ternary trees allow searches that match parts of keys and ignores mismatches in other parts.\n +In the current interface we specify a small number of searches facilitated by the tree structure; +the Partial Match and Hamming searches are defined in several other implementations +(showcased in \ref note_1 "Bentley and Sedgewick" code). +The Levenshtein and combinatorial searches are not found in other ternary trees (that I know of). + +<table border="1" cellspacing="0"> + <tr><th bgcolor="#f0f0ff">Name (function name)</th><th bgcolor="#f0f0ff">Description</th></tr> + <tr><th> + Prefix match (\ref containers::ternary_tree::prefix_range "prefix_range")</th><td> + Finds keys sharing a common prefix, returns a pair of iterators.</td></tr> + <tr><th> + Longest match (\ref containers::ternary_tree::longest_match "longest_match")</th><td> + Finds the longest key that matches beginning of search string. + A typical application is to tokenize a string using the ternary tree as dictionary.</td></tr> + <tr><th> + Partial match, or wildcard search (\ref containers::ternary_tree::partial_match_search "partial_match_search")</th><td> + Accepts a search string with wildcard characters that will match any letter, + eg "b?nd" would match "band", "bend", "bind", "bond" in an English dictionary.</td></tr> + <tr><th> + Search allowing \c N mismatches, + (\ref containers::ternary_tree::hamming_search "hamming_search"<span style="font-weight:normal;"></span>)</th><td> + Accepts a search string and an integer \c dist indicating how many non-matching letters are allowed, + then finds keys matching search string that have at most \a dist mismatches. + This works like a partial match search with all combinations of \a dist + wildcards in the search string.\n + \c hamming_search("band", 1) matches the wildcard search plus "bald", "bane" and "wand", etc. \n + The version here, following DDJ code, extends the strict Hamming search by also allowing shorter and longer + strings; a search for "band", \a dist = 1, also finds "ban" and "bandy" etc.\n + See also http://wikipedia.org/wiki/Hamming_distance</td></tr> + <tr><th> + Levenshtein distance search</b> (\ref containers::ternary_tree::levenshtein_search "levenshtein_search" + <span style="font-weight:normal;">- consider descriptive name</span>)</th><td> + + Hamming search matches characters in fixed position, allowing substitution of \a dist chars. + Levenshtein search also allows shifting parts of the search string by insertion or skipping chars (in \a dist places). + So <code>levenshtein_search("band", 1) </code> extends the hamming_search set with "and" and "bland", etc. + A typical application is to match mispelt words.\n + See also http://wikipedia.org/wiki/Levenshtein_distance</td></tr> + <tr><th> + Combinatorial or "scrabble" search (\ref containers::ternary_tree::combinatorial_search "combinatorial_search")</th><td> + Finds all keys using the characters in search string. \c combinatorial_search("band") finds + "ad", "and", "bad", "dab", "nab", etc. A count of wildcards can be added, also allowing + nonmatching characters (use with care, values over 10% of average key length + may cause the algorithm to traverse a large part of the tree).</td></tr> +</table> + +See \ref usage_imprecise_searches "advanced search overview" in the tutorial. + +These searches are defined for all containers in this library. +But they are also marked as deprecated (to be replaced by generic algorithms with same interface). +For a relative performance comparison of imprecise searches, see the second table in \ref perf_notes. + +<h3>Future directions</h3> +The searches currently defined are clearly special cases in a sea of search possibilities. +We have only defined searches that are relatively efficient, compared to other combinations of containers and algorithms. +But there can be many variations on the available searches: increasing Hamming/Levenshtein distance +at the end of words, or matching limited ranges of characters (eg allowing mismatches only in vowels), etc. + +The next step for this project is to support a more flexible low-level interface for +traversing and filtering tree nodes. +The interface for these "structured searches" is open for consideration, but it +will basically define sub-key iterators, conversion of full-key from sub-key iterators, +and a small collection of algorithms operating on these sub-key iterators. + +At least the following operations are needed: + + - sub-key match: matching a part of a key (prefix, or starting from current char position) + - key element range increment: from a sub-key position, match a range of characters + in next position (returns a list of sub-key iterators? - or iterator-like operation?) + - conversion from sub-key iterator to full-key iterator range (nearest and post-furthest + keys in the subtree) + - \c is_key(subkey_iterator pos): true if end-of-key exists at iterator position. + - \c count_elements(subkey_iterator pos): returns number of available key elements at position. + - In all predefined algorithms above, either a specific, or any char is matched, + we would also support arbitrary char sets (possibly with special case for char ranges). + + */ + +/** \page tst_reference Reference +<center><table bgcolor="#fbf9e5" style="border: thin dotted #808000;" width="95%" border=0> +<tr> +<td> +<dl> + <dt>\ref structured_concept "Structured Container concept"</dt> + <dt>\ref ref_sethpp "Header < structured_set.hpp >"</dt> + <dt>\ref ref_maphpp "Header < structured_map.hpp >"</dt> + <dt>\ref ref_tsthpp "Header < ternary_tree.hpp >"</dt> + <dt>\ref ref_iterhpp "Header < iterator_wrapper.hpp >"</dt> +</dl> +</td> +</tr></table></center> + +<hr> + +\anchor ref_sethpp +<h2>Header < <a href="../structured_set.hpp">%structured_set.hpp</a> > synopsis</h2> +<pre> +\b namespace containers { + \b template <\b class Key, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<Key> > + \b class \ref containers::structured_set "structured_set"; + + \b template <\b class Key, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<Key> > + \b class \ref containers::structured_multiset "structured_multiset"; +} +</pre> + +\anchor ref_maphpp +<h2>Header < <a href="../structured_map.hpp">%structured_map.hpp</a> > synopsis</h2> +<pre> +\b namespace containers { + \b template <\b class Key, + \b class T, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<std::pair<\b const Key, T> > > + \b class \ref containers::structured_map "structured_map"; + + \b template <\b class Key, + \b class T, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<std::pair<\b const Key, T> > > + \b class \ref containers::structured_multimap "structured_multimap"; +} +</pre> + +<hr> +Supplementary header files needed to support structured_set and -map classes. + + +\anchor ref_tsthpp +<h2>Header < <a href="../ternary_tree.hpp">%ternary_tree.hpp</a> > synopsis</h2> +<pre> +\b namespace containers { + + \b template <\b class Key, + \b class T, + \b class Comp = std::less<\b typename Key::value_type>, + \b class Alloc = std::allocator<std::pair<\b const Key, T> > > + \b class \ref containers::ternary_tree "ternary_tree"; + + \b template <\b class TreeT, \b class IteratorT> + \b class \ref containers::search_results_list "search_results_list"; + +} +</pre> + + +\anchor ref_iterhpp +<h2>Header < <a href="../iterator_wrapper.hpp">%iterator_wrapper.hpp</a> > synopsis</h2> +<pre> +\b namespace iterators { + + \b template <\b class T> \b struct const_traits; + \b template <\b class T> \b struct nonconst_traits; + + \b template <\b class BaseIterT, + \b class TraitsT, // either const_traits<T> or nonconst_traits<T> + \b class IterCatT = std::bidirectional_iterator_tag > + \b class \ref iterators::iterator_wrapper "iterator_wrapper"; +} +</pre> + +*/ + +/** +\page structured_concept Structured Associative Container Concept + +<span style="color:#905050;">(a preliminary sketch of the formal technical concept description)</span> + +A Structured Associative Container is a specialization of the C++ 98 standard concept +<a target="sgi" href="http://www.sgi.com/tech/stl/SortedAssociativeContainer.html">Sorted Associative Container</a>, +with extended interface. + +The template parameters are similar to that of the Associated Containers: + +<code> structured_set<Key, Comp, Alloc>; </code>\n +<code> structured_map<Key, Value, Comp, Alloc>; </code>\n + +where: + - \c <b>Key</b> type is itself a container (eg a \c std::string or \c std::wstring) + - \c <b>Comp</b> is a comparison operator that imposes a sort order on \c Key::value_type elements \n + (so if \c Key is string, \c Comp compares \c char, if \c Key is \c wstring, \c Comp applies to \c wchar_t). + - \c <b>Value</b> can be any Assignable type + - \c <b>Alloc</b> is an allocator that manages all memory allocation for the container. + +The \c Comp and the \c Alloc types have default template arguments. + +In other words Structured containers are like Sorted Associative Containers, BUT + - add the requirement on Key template type to be a + <a target="sgi" href="http://www.sgi.com/tech/stl/ForwardContainer.html">Forward Container</a>.\n + For example, \c std::basic_string<CharT> is compatible with this requirement. + - change the requirement on the \c Comp (comparator) template argument to operate on + \c key_type::value_type elements (rather than on \c key_type itself). + Like Sorted Associative comparator, the \c Comp type shall define a less-like comparison, a + <a target="sgi" href="http://www.sgi.com/tech/stl/StrictWeakOrdering.html">Strict Weak Ordering</a> + of key-elements. + +<b>Associated types</b> + - \b char_compare: less-like comparison of key elements (establishing a Strict Weak Ordering). + The <a target="sgi" href="http://www.sgi.com/tech/stl/AssociativeContainer.html">Associative Container</a> + \c key_compare type is also provided, but is defined in terms of \c char_compare. \n + - \b subkey_iterator: Used in structure searches. Convertible to iterator (TBD). + +In consequence it allows searches involving subparts of keys, ie with shared prefix and/or +with shared middle parts. + +<hr> +<h3>Deprecated search interface</h3> + +In the first iteration, additional searches are provided as methods on the containers. +This will be changed to use free functions operating on \c subkey_iterator. +The deprecated search methods will still be provided as convenience functions; +to migrate your code from present version to the new interface, will mean moving +the object name to the first argument, but also to respecify the search_results_list type. +(This sloppy-hackish type is by itself reason not to keep the method interface) + +See \ref subkey_search_overview "Structured search overview" +and \ref tst_structsearch "ternary_tree Structure search section". +*/ + +/* + +\b Notation \n +<table border=0> +<tr><td>\c X <td>A type that is a model of Associative Container </td></tr> +<tr><td>\c a <td>Object of type \c X </tr> +<tr><td>\c k <td>Object of type \c X::key_type </tr> +<tr><td>\c p, \c q <td>Object of type \c X::char_iterator </tr> +<tr><td>\c c <td>Object of type \c X::char_type </tr> +<tr><td>\c o <td>Object modelling output iterator </tr> +<tr><td>\c i <td>Object of type \c X::size_type </tr> +</dl> + +<table border=1> +<tr><th>Name</th><th>Expression</th><th>Return value</th> +<tr><td>Prefix match</td><td><code>a.prefix_range(k)</code></td><td> + \c std::pair<iterator, iterator> if \c a is mutable, otherwise <br>\c std::pair<const_iterator, const_iterator></td></tr> +<tr><td>Longest match</td><td><code>a.longest_match(p, q)</code></td><td> + \c iterator if \c a is mutable, otherwise \c const_iterator</td></tr> +<tr><td>Partial match, or <br>wildcard search</td><td><code>a.partial_match_search(k, o, c)</code></td><td> + The output iterator \c o</td></tr> +<tr><td>Hamming search</td><td><code>a.hamming_search(k, o, i)</code></td><td> + The output iterator \c o</td></tr> +<tr><td>Levenshtein search</td><td><code>a.levenshtein_search(k, o, i)</code></td><td> + The output iterator \c o</td></tr> +<tr><td>Combinatorial or <br>"scrabble" search</td><td><code>a.combinatorial_search(k, o, i)</code></td><td> + The output iterator \c o</td></tr> +</table> + +*/ + +/** \page tst_impl Implementation Details + * (In the following, "original" and "DDJ" code refers to the article by Bentley/Sedgewick + * published in Dr Dobb's Journal, and the accompanying C source code - see \ref tst_links) + * + * In most implementations, a ternary tree node has the following members: \code + * struct node { + * char splitchar; // key letter, or 0 at end of key + * node *hikid; // subtree of keys with higher value than splitchar + * node *eqkid; // subtree matching splitchar (pointer to mapped value at end-of-key node) + * node *lokid; // subtree less than splitchar + * node *parent; // necessary for iteration (not needed for insert/find) + * }; \endcode + * + * This means that each node is 1 char plus three or four pointers size. + * On many systems, struct member alignment makes the char member consume size of one pointer + * as well, so we have 4 (or 5) x sizeof(pointer) per node in the tree. + * With several kinds of dictionaries, the node count ends up at around 0.3-0.5 times + * total key length (since keys share nodes). + * This is even more expensive on 64-bit machines. + * + * There are several variation points in the node class: + * -# the DDJ C code designates an invalid value of zero to indicate end-of-string. We want to + * allow any string as key, so the end-of-string representation should change. + * We note that on many platforms, C/C++ struct member alignment leaves a "hole" + * in the binary representation of the node, between the char and the first pointer ("hikid"). + * On such systems there is no space cost to use another char-sized value to indicate end node. + * This also works for \c wchar_t strings on 32- or 64-bit systems. + * -# The original code stores a value for each string in the terminal node's "equal" pointer. + * The value in DDJ code is always a pointer to the terminated string. This is used to make + * advanced searches work (they return an array of pointers to strings stored in end-nodes). + * In reality this means that strings may need to be copied on insertion (not reflected in DDJ timings). + * -# Original DDJ code does not support iterating over strings in the tree. + * Idiomatic STL-like container style strongly suggests that iteration should be supported. + * This is fairly simple to implement if a parent pointer is added to the node struct: + * Because when an end-node is reached, the iterator must backtrack to find the previous + * branch point. + * + * The parent pointer also makes it possible to recover the inserted string by walking nodes + * backward from a terminal node to the root. Complexity is key length, plus log(tree.size), + * but it means inserted keys do not \b have to be copied to the end node. + * We opt to cache keys in iterators, at no measurable extra cost in iteration. + + * Instead of the key, an arbitrary value can be associated with endnodes. + * However, it should not be allowed to increase node size, since most nodes in the tree are not endnodes. + * In this library we store the mapped value directly in end-node if it is <tt> <= \c sizeof(void*). </tt> + * Larger objects are allocated on the heap, and a pointer to the copy is stored in end-node + * (the copy is managed by the tree). + * + * <h4>Now for some optimization</h4> + * We use a \c vector<node> as pool allocator, and record eq-hi-lo links as vector index instead of pointers. + * The pool allocation essentially follows original C code insert2() principle. + * For us, it also simplifies reallocation, since pointers do not have to be rebound; + * the indices are always valid. + * This has the following consequences: + * - allow the option of 4-byte indices also on 64-bit systems (with obvious resulting tree size limit) + * - When a new key is inserted, the last part (unique to the key) is always allocated in a batch. + * This means that one node member, \c "eqkid", becomes redundant, as it is always the next index + * (except after terminal nodes of course). + * - in DDJ code the end-node value is stored in union with the eqkid. We note that the \c lokid node index + * is also unused by end-nodes (as no char should be lower than zero), so all endnode children + * are linked to the hi node. + * + * (In our binary-cognizant version where zero is a regular char value, this still holds, + * we just change the end-node test accordingly) + * + * In the final cut, our node struct data members appear roughly like this: \code + * struct node { + * CharType splitchar; // key letter, or 0 at end of key (to make sure lokid is never allowed) + ... [truncated message content] |
From: <den...@us...> - 2009-07-12 16:27:06
|
Revision: 122 http://opentrep.svn.sourceforge.net/opentrep/?rev=122&view=rev Author: denis_arnaud Date: 2009-07-12 16:27:02 +0000 (Sun, 12 Jul 2009) Log Message: ----------- [Test] First basic working version of the travel search string. Modified Paths: -------------- trunk/opentrep/test/parsers/search_string_parser.cpp Modified: trunk/opentrep/test/parsers/search_string_parser.cpp =================================================================== --- trunk/opentrep/test/parsers/search_string_parser.cpp 2009-07-12 16:13:18 UTC (rev 121) +++ trunk/opentrep/test/parsers/search_string_parser.cpp 2009-07-12 16:27:02 UTC (rev 122) @@ -361,8 +361,10 @@ date = lexeme_d[ limit_d(1u,12u)[uint2_p][assign_a(self._searchString._tmpDate._month)] - >> '-' >> limit_d(1u,31u)[uint2_p][assign_a(self._searchString._tmpDate._day)] - >> '-' >> limit_d(2000u,2099u)[uint4_p][assign_a(self._searchString._tmpDate._year)]] + >> boost::spirit::chset_p("/-") + >> limit_d(1u,31u)[uint2_p][assign_a(self._searchString._tmpDate._day)] + >> boost::spirit::chset_p("/-") + >> limit_d(2000u,2099u)[uint4_p][assign_a(self._searchString._tmpDate._year)]] ; preferred_airlines = @@ -383,11 +385,11 @@ ; passenger_number = - lexeme_d[ limit_d(0u, 20u)[uint2_p][store_passenger_number(self._searchString)] ] + lexeme_d[ limit_d(1u, 9u)[uint1_p][store_passenger_number(self._searchString)] ] ; passenger_type = - lexeme_d[ (repeat_p(4,20)[chset_p("0-9A-Z")])[store_adult_passenger_type(self._searchString)] ] + lexeme_d[ (repeat_p(3,20)[chset_p("0-9a-z")])[store_adult_passenger_type(self._searchString)] ] ; BOOST_SPIRIT_DEBUG_NODE (search_string); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-12 16:13:23
|
Revision: 121 http://opentrep.svn.sourceforge.net/opentrep/?rev=121&view=rev Author: denis_arnaud Date: 2009-07-12 16:13:18 +0000 (Sun, 12 Jul 2009) Log Message: ----------- [Test] First compiling version of the travel search string. Modified Paths: -------------- trunk/opentrep/test/parsers/Makefile.am Added Paths: ----------- trunk/opentrep/test/parsers/search_string_parser.cpp Property Changed: ---------------- trunk/opentrep/test/parsers/ Property changes on: trunk/opentrep/test/parsers ___________________________________________________________________ Modified: svn:ignore - .libs .deps Makefile Makefile.in parameter_parser* full_calculator* schedule_parser* levenshtein* + .libs .deps Makefile Makefile.in parameter_parser* full_calculator* schedule_parser* search_string_parser* levenshtein* Modified: trunk/opentrep/test/parsers/Makefile.am =================================================================== --- trunk/opentrep/test/parsers/Makefile.am 2009-07-12 13:36:35 UTC (rev 120) +++ trunk/opentrep/test/parsers/Makefile.am 2009-07-12 16:13:18 UTC (rev 121) @@ -3,7 +3,8 @@ MAINTAINERCLEANFILES = Makefile.in -check_PROGRAMS = full_calculator parameter_parser schedule_parser levenshtein +check_PROGRAMS = full_calculator parameter_parser schedule_parser \ + search_string_parser levenshtein full_calculator_SOURCES = full_calculator.cpp full_calculator_CXXFLAGS = $(BOOST_CFLAGS) @@ -17,6 +18,10 @@ schedule_parser_CXXFLAGS = $(BOOST_CFLAGS) schedule_parser_LDADD = $(BOOST_LIBS) $(BOOST_DATE_TIME_LIB) +search_string_parser_SOURCES = search_string_parser.cpp +search_string_parser_CXXFLAGS = $(BOOST_CFLAGS) +search_string_parser_LDADD = $(BOOST_LIBS) $(BOOST_DATE_TIME_LIB) + levenshtein_SOURCES = levenshtein.cpp levenshtein_LDADD = Added: trunk/opentrep/test/parsers/search_string_parser.cpp =================================================================== --- trunk/opentrep/test/parsers/search_string_parser.cpp (rev 0) +++ trunk/opentrep/test/parsers/search_string_parser.cpp 2009-07-12 16:13:18 UTC (rev 121) @@ -0,0 +1,461 @@ +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <string> +#include <map> +#include <set> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +// Boost Spirit (Parsing) +#define BOOST_SPIRIT_DEBUG +#include <boost/spirit/core.hpp> +#include <boost/spirit/attribute.hpp> +#include <boost/spirit/utility/functor_parser.hpp> +#include <boost/spirit/utility/loops.hpp> +#include <boost/spirit/utility/chset.hpp> +#include <boost/spirit/utility/confix.hpp> +#include <boost/spirit/iterator/file_iterator.hpp> +#include <boost/spirit/actor/push_back_actor.hpp> +#include <boost/spirit/actor/assign_actor.hpp> + +// Type definitions +typedef char char_t; +typedef char const* iterator_t; +//typedef boost::spirit::file_iterator<char_t> iterator_t; +typedef boost::spirit::scanner<iterator_t> scanner_t; +typedef boost::spirit::rule<scanner_t> rule_t; + +/** Place. */ +struct Place_T { + // Attributes + std::string _name; + std::string _code; + /** Constructor. */ + Place_T () : _name (""), _code ("") {} + /* Display. */ + void display() const { + std::cout << "Place: " << _name << " (" << _code << ")" << std::endl; + } +}; + +/** List of Place strucutres. */ +typedef std::vector<Place_T> PlaceList_T; + +/** Date. */ +struct Date_T { + // Attributes + boost::gregorian::date _date; + unsigned int _reldays; + unsigned int _day; + unsigned int _month; + unsigned int _year; + /** Constructor. */ + Date_T () : _reldays (14), _day(1), _month(1), _year(1970) {} + /* Display. */ + void display() const { + std::cout << "Date: " << _date << " (" << _day << "/" << _month + << "/" << _year << "), i.e. in " << _reldays << " days" + << std::endl; + } + /** Set the date from the staging details. */ + boost::gregorian::date getDate() const { + return boost::gregorian::date (_year, _month, _day); + } +}; + +/** List of Date strucutres. */ +typedef std::vector<Date_T> DateList_T; + +/** Airline. */ +struct Airline_T { + // Attributes + bool _isPreferred; + std::string _name; + std::string _code; + /** Constructor. */ + Airline_T () : _isPreferred (true), _name(""), _code("") {} + /* Display. */ + void display() const { + std::cout << "Airline: " << _name << " (" << _code << ")" << std::endl; + } +}; + +/** List of Airline strucutres. */ +typedef std::vector<Airline_T> AirlineList_T; + +/** Passenger. */ +struct Passenger_T { + // Attributes + typedef enum { ADULT = 0, CHILD, PET, LAST_VALUE } PassengerType_T; + static const std::string _labels[LAST_VALUE]; + PassengerType_T _type; + unsigned short _number; + /** Constructor. */ + Passenger_T () : _type(ADULT), _number(1) {} + /* Display. */ + void display() const { + std::cout << "Passenger: " << _number << " (" << _labels[_type] << ")" + << std::endl; + } +}; + +/** Passenger type labels. */ +const std::string Passenger_T::_labels[Passenger_T::LAST_VALUE] = + { "Adult", "Child", "Pet" }; + +/** List of Passenger strucutres. */ +typedef std::vector<Passenger_T> PassengerList_T; + +/** Search string. */ +struct SearchString_T { + // Attributes + PlaceList_T _placeList; + DateList_T _dateList; + AirlineList_T _airlineList; + PassengerList_T _passengerList; + + /** Constructor. */ + SearchString_T () {} + + /* Display. */ + void display() const { + std::cout << std::endl; + + for (PlaceList_T::const_iterator itPlace = _placeList.begin(); + itPlace != _placeList.end(); ++itPlace) { + const Place_T& lPlace = *itPlace; + lPlace.display(); + } + + for (DateList_T::const_iterator itDate = _dateList.begin(); + itDate != _dateList.end(); ++itDate) { + const Date_T& lDate = *itDate; + lDate.display(); + } + + for (AirlineList_T::const_iterator itAirline = _airlineList.begin(); + itAirline != _airlineList.end(); ++itAirline) { + const Airline_T& lAirline = *itAirline; + lAirline.display(); + } + + for (PassengerList_T::const_iterator itPassenger = _passengerList.begin(); + itPassenger != _passengerList.end(); ++itPassenger) { + const Passenger_T& lPassenger = *itPassenger; + lPassenger.display(); + } + + std::cout << "-- Staging --" << std::endl; + _tmpPlace.display(); + } + + // //// Staging //// + Place_T _tmpPlace; + Date_T _tmpDate; + Airline_T _tmpAirline; + Passenger_T _tmpPassenger; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// Semantic actions +// +/////////////////////////////////////////////////////////////////////////////// +namespace { + + /** Store the parsed place element. */ + struct store_place_element { + store_place_element (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lPlace (iStr, iStrEnd); + // std::cout << "Place: " << lPlace << std::endl; + + // Set the place + _searchString._tmpPlace._name += " " + lPlace; + + // Add the parsed place to the list + // _searchString._placeList.push_back (_searchString._tmpPlace); + } + + SearchString_T& _searchString; + }; + + /** Store a parsed date. */ + struct store_date { + store_date (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + _searchString._tmpDate._date = _searchString._tmpDate.getDate(); + // std::cout << "Board date: " + // << _searchString._date << std::endl; + + // Add the parsed date to the list + _searchString._dateList.push_back (_searchString._tmpDate); + } + + SearchString_T& _searchString; + }; + + /** Store the airline sign (+/-). */ + struct store_airline_sign { + store_airline_sign (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (bool iAirlineSign) const { + _searchString._tmpAirline._isPreferred = iAirlineSign; + // std::cout << "Airline is preferred: " << iAirlineSign << std::endl; + } + + SearchString_T& _searchString; + }; + + /** Store the parsed airline code. */ + struct store_airline_code { + store_airline_code (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lAirlineCode (iStr, iStrEnd); + _searchString._tmpAirline._code = lAirlineCode; + // std::cout << "Airline code: " << lAirlineCode << std::endl; + + // Add the parsed airline to the list + _searchString._airlineList.push_back (_searchString._tmpAirline); + } + + SearchString_T& _searchString; + }; + + /** Store the parsed airline name. */ + struct store_airline_name { + store_airline_name (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lAirlineName (iStr, iStrEnd); + _searchString._tmpAirline._name = lAirlineName; + // std::cout << "Airline: " << lAirlineName << std::endl; + + // Add the parsed airline to the list + _searchString._airlineList.push_back (_searchString._tmpAirline); + } + + SearchString_T& _searchString; + }; + + /** Store the parsed number of passengers. */ + struct store_passenger_number { + store_passenger_number (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (unsigned int iNumber) const { + _searchString._tmpPassenger._number = iNumber; + // std::cout << "Number of passengers: " << iNumber << std::endl; + } + + SearchString_T& _searchString; + }; + + /** Store the parsed passenger type. */ + struct store_adult_passenger_type { + store_adult_passenger_type (SearchString_T& ioSearchString) + : _searchString (ioSearchString) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lPassengerType (iStr, iStrEnd); + _searchString._tmpPassenger._type = Passenger_T::ADULT; + // std::cout << "Passenger type: " << lPassengerType << std::endl; + + // Add the parsed passenger to the list + _searchString._passengerList.push_back (_searchString._tmpPassenger); + } + + SearchString_T& _searchString; + }; + +} + +// /////////// Utilities ///////////// +/** 1-digit-integer parser */ +boost::spirit::int_parser<unsigned int, 10, 1, 1> int1_p; +/** 1-digit-integer parser */ +boost::spirit::uint_parser<unsigned int, 10, 1, 1> uint1_p; +/** 2-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 2, 2> uint2_p; +/** 4-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 4, 4> uint4_p; +/** Up-to-4-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 1, 4> uint1_4_p; + +/////////////////////////////////////////////////////////////////////////////// +// +// Our calculator grammar (using subrules) +// +/////////////////////////////////////////////////////////////////////////////// + /** + Sample + guadeloupe rio de janeiro 07/22/2009 +aa -ua 2 adults 1 dog + + Grammar: + search_string ::= places + [dates] + *(preferred_airlines) + *(passengers) + dates ::= board_date [off_date] + places ::= [board_place] off_place + board_place ::= place_elements + off_place ::= place_elements + place_elements ::= country | city | airport + country ::= country_code | country_name + city ::= city_code | city_name + airport ::= airport_code | airport_name + preferred_airlines ::= [+|-] airline_code | airline_name + passengers ::= adult_number adult_description + [child_number child_description] + [pet_number pet_description] + adult_description ::= 'adult' | 'adults' | 'pax' | 'passengers' + child_description ::= 'child' | 'children' | 'kid' | 'kids' + pet_description ::= 'dog' | 'dogs' | 'cat' | 'cats' + */ + +using namespace boost::spirit; + +/** Grammar for the search string parser. */ +struct SearchStringParser : + public boost::spirit::grammar<SearchStringParser> { + + SearchStringParser (SearchString_T& ioSearchString) + : _searchString (ioSearchString) { + } + + template <typename ScannerT> + struct definition { + definition (SearchStringParser const& self) { + + search_string = places + >> !( dates ) + >> *( preferred_airlines ) + >> *( passengers ) + ; + + places = + +( place_element ) + ; + + place_element = + lexeme_d[ (repeat_p(1,20)[chset_p("a-z")])[store_place_element(self._searchString)] ] + ; + + dates = + date[store_date(self._searchString)] + >> !date[store_date(self._searchString)] + ; + + date = + lexeme_d[ limit_d(1u,12u)[uint2_p][assign_a(self._searchString._tmpDate._month)] + >> '-' >> limit_d(1u,31u)[uint2_p][assign_a(self._searchString._tmpDate._day)] + >> '-' >> limit_d(2000u,2099u)[uint4_p][assign_a(self._searchString._tmpDate._year)]] + ; + + preferred_airlines = + !(boost::spirit::sign_p)[store_airline_sign(self._searchString)] + >> airline_code | airline_name + ; + + airline_code = + lexeme_d[ (repeat_p(2,3)[chset_p("0-9a-z")])[store_airline_code(self._searchString)] ] + ; + + airline_name = + lexeme_d[ (repeat_p(4,20)[chset_p("0-9a-z")])[store_airline_name(self._searchString)] ] + ; + + passengers = + passenger_number >> passenger_type + ; + + passenger_number = + lexeme_d[ limit_d(0u, 20u)[uint2_p][store_passenger_number(self._searchString)] ] + ; + + passenger_type = + lexeme_d[ (repeat_p(4,20)[chset_p("0-9A-Z")])[store_adult_passenger_type(self._searchString)] ] + ; + + BOOST_SPIRIT_DEBUG_NODE (search_string); + BOOST_SPIRIT_DEBUG_NODE (places); + BOOST_SPIRIT_DEBUG_NODE (place_element); + BOOST_SPIRIT_DEBUG_NODE (dates); + BOOST_SPIRIT_DEBUG_NODE (date); + BOOST_SPIRIT_DEBUG_NODE (preferred_airlines); + BOOST_SPIRIT_DEBUG_NODE (airline_code); + BOOST_SPIRIT_DEBUG_NODE (airline_name); + BOOST_SPIRIT_DEBUG_NODE (passengers); + BOOST_SPIRIT_DEBUG_NODE (passenger_number); + BOOST_SPIRIT_DEBUG_NODE (passenger_type); + } + + boost::spirit::rule<ScannerT> search_string, places, place_element, + dates, date, preferred_airlines, airline_code, airline_name, + passengers, passenger_number, passenger_type; + + boost::spirit::rule<ScannerT> const& start() const { return search_string; } + }; + + SearchString_T& _searchString; +}; + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // File to be parsed + std::string lSearchString ("guadeloupe rio de janeiro 07/22/2009 +aa -ua 2 adults 1 dog"); + + // Read the command-line parameters + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lSearchString; + } + + // Read the search string + iterator_t lStringIterator = lSearchString.c_str(); + + // Instantiate the structure that will hold the result of the parsing. + SearchString_T lSearchStringStruct; + SearchStringParser lSearchStringParser (lSearchStringStruct); + boost::spirit::parse_info<iterator_t> info = + boost::spirit::parse (lStringIterator, lSearchStringParser, + boost::spirit::space_p); + + // DEBUG + std::cout << "Search string:" << std::endl; + lSearchStringStruct.display(); + + std::cout << "-------------------------" << std::endl; + if (info.full) { + std::cout << "Parsing succeeded" << std::endl; + + } else { + std::cout << "Parsing failed" << std::endl; + } + std::cout << "-------------------------" << std::endl; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-12 13:36:46
|
Revision: 120 http://opentrep.svn.sourceforge.net/opentrep/?rev=120&view=rev Author: denis_arnaud Date: 2009-07-12 13:36:35 +0000 (Sun, 12 Jul 2009) Log Message: ----------- [Test] Added some tests on parsers. Modified Paths: -------------- trunk/opentrep/configure.ac trunk/opentrep/test/Makefile.am Added Paths: ----------- trunk/opentrep/test/parsers/ trunk/opentrep/test/parsers/Makefile.am trunk/opentrep/test/parsers/full_calculator.cpp trunk/opentrep/test/parsers/levenshtein.cpp trunk/opentrep/test/parsers/parameter_parser.cpp trunk/opentrep/test/parsers/schedule_parser.cpp trunk/opentrep/test/parsers/test_full_calculator.sh trunk/opentrep/test/parsers/test_parameter_parser.sh trunk/opentrep/test/parsers/test_schedule_parser.sh trunk/opentrep/test/parsers/world_schedule.csv Modified: trunk/opentrep/configure.ac =================================================================== --- trunk/opentrep/configure.ac 2009-07-12 12:59:01 UTC (rev 119) +++ trunk/opentrep/configure.ac 2009-07-12 13:36:35 UTC (rev 120) @@ -233,6 +233,7 @@ doc/sourceforge/howto_release_opentrep.html po/Makefile.in test/com/Makefile + test/parsers/Makefile test/Makefile win32/Makefile) AC_OUTPUT Modified: trunk/opentrep/test/Makefile.am =================================================================== --- trunk/opentrep/test/Makefile.am 2009-07-12 12:59:01 UTC (rev 119) +++ trunk/opentrep/test/Makefile.am 2009-07-12 13:36:35 UTC (rev 120) @@ -4,7 +4,7 @@ MAINTAINERCLEANFILES = Makefile.in ## -SUBDIRS = com +SUBDIRS = com parsers ## check_PROGRAMS = IndexBuildingTestSuite Property changes on: trunk/opentrep/test/parsers ___________________________________________________________________ Added: svn:ignore + .libs .deps Makefile Makefile.in parameter_parser* full_calculator* schedule_parser* levenshtein* Added: trunk/opentrep/test/parsers/Makefile.am =================================================================== --- trunk/opentrep/test/parsers/Makefile.am (rev 0) +++ trunk/opentrep/test/parsers/Makefile.am 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,24 @@ +## command sub-directory +include $(top_srcdir)/Makefile.common + +MAINTAINERCLEANFILES = Makefile.in + +check_PROGRAMS = full_calculator parameter_parser schedule_parser levenshtein + +full_calculator_SOURCES = full_calculator.cpp +full_calculator_CXXFLAGS = $(BOOST_CFLAGS) +full_calculator_LDADD = $(BOOST_LIB) + +parameter_parser_SOURCES = parameter_parser.cpp +parameter_parser_CXXFLAGS = $(BOOST_CFLAGS) +parameter_parser_LDADD = $(BOOST_LIB) + +schedule_parser_SOURCES = schedule_parser.cpp +schedule_parser_CXXFLAGS = $(BOOST_CFLAGS) +schedule_parser_LDADD = $(BOOST_LIBS) $(BOOST_DATE_TIME_LIB) + +levenshtein_SOURCES = levenshtein.cpp +levenshtein_LDADD = + +EXTRA_DIST = test_full_calculator.sh test_parameter_parser.sh \ + test_schedule_parser.sh Added: trunk/opentrep/test/parsers/full_calculator.cpp =================================================================== --- trunk/opentrep/test/parsers/full_calculator.cpp (rev 0) +++ trunk/opentrep/test/parsers/full_calculator.cpp 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,131 @@ +/*============================================================================= + Copyright (c) 2002-2003 Joel de Guzman + http://spirit.sourceforge.net/ + + Use, modification and distribution is subject to the Boost Software + License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + =============================================================================*/ +//////////////////////////////////////////////////////////////////////////// +// +// Full calculator example demonstrating Phoenix +// This is discussed in the "Closures" chapter in the Spirit User's Guide. +// +// [ JDG 6/29/2002 ] +// +//////////////////////////////////////////////////////////////////////////// +#include <boost/spirit/core.hpp> +#include <boost/spirit/attribute.hpp> +#include <iostream> +#include <string> + +//////////////////////////////////////////////////////////////////////////// +using namespace std; +using namespace boost::spirit; +using namespace phoenix; + +//////////////////////////////////////////////////////////////////////////// +// +// Our calculator grammar using phoenix to do the semantics +// +// Note: The top rule propagates the expression result (value) upwards +// to the calculator grammar self.val closure member which is +// then visible outside the grammar (i.e. since self.val is the +// member1 of the closure, it becomes the attribute passed by +// the calculator to an attached semantic action. See the +// driver code that uses the calculator below). +// +//////////////////////////////////////////////////////////////////////////// +struct calc_closure : boost::spirit::closure<calc_closure, double> +{ + member1 val; +}; + +struct calculator : public grammar<calculator, calc_closure::context_t> +{ + template <typename ScannerT> + struct definition + { + definition(calculator const& self) + { + top = expression[self.val = arg1]; + + expression + = term[expression.val = arg1] + >> *( ('+' >> term[expression.val += arg1]) + | ('+' >> term[expression.val -= arg1]) + | ('-' >> term[expression.val -= arg1]) + ) + ; + + term + = factor[term.val = arg1] + >> *( ('*' >> factor[term.val *= arg1]) + | ('/' >> factor[term.val /= arg1]) + ) + ; + + factor + = ureal_p[factor.val = arg1] + | '(' >> expression[factor.val = arg1] >> ')' + | ('-' >> factor[factor.val = -arg1]) + | ('+' >> factor[factor.val = arg1]) + ; + } + + typedef rule<ScannerT, calc_closure::context_t> rule_t; + rule_t expression, term, factor; + rule<ScannerT> top; + + rule<ScannerT> const& + start() const { return top; } + }; +}; + +//////////////////////////////////////////////////////////////////////////// +// +// Main program +// +//////////////////////////////////////////////////////////////////////////// +int +main() +{ + cout << "/////////////////////////////////////////////////////////\n\n"; + cout << "\t\tExpression parser using Phoenix...\n\n"; + cout << "/////////////////////////////////////////////////////////\n\n"; + cout << "Type an expression...or [q or Q] to quit\n\n"; + + calculator calc; // Our parser + + string str; + while (getline(cin, str)) + { + if (str.empty() || str[0] == 'q' || str[0] == 'Q') + break; + + double n = 0; + parse_info<> info = parse(str.c_str(), calc[var(n) = arg1], space_p); + + // calc[var(n) = arg1] invokes the calculator and extracts + // the result of the computation. See calculator grammar + // note above. + + if (info.full) + { + cout << "-------------------------\n"; + cout << "Parsing succeeded\n"; + cout << "result = " << n << endl; + cout << "-------------------------\n"; + } + else + { + cout << "-------------------------\n"; + cout << "Parsing failed\n"; + cout << "stopped at: \": " << info.stop << "\"\n"; + cout << "-------------------------\n"; + } + } + + cout << "Bye... :-) \n\n"; + return 0; +} Added: trunk/opentrep/test/parsers/levenshtein.cpp =================================================================== --- trunk/opentrep/test/parsers/levenshtein.cpp (rev 0) +++ trunk/opentrep/test/parsers/levenshtein.cpp 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,131 @@ +// Levenshtein Distance Algorithm: C++ Implementation by Anders Sewerin Johansen +// STL +#include <iostream> +#include <string> +#include <vector> + +// ////////////////////////////////////////////////////////////////// +int getLevenshteinDistance (const std::string& source, + const std::string& target) { + + // Step 1 + + const int n = source.length(); + const int m = target.length(); + if (n == 0) { + return m; + } + if (m == 0) { + return n; + } + + // Definition of Matrix Type + typedef std::vector< std::vector<int> > Matrix_T; + + Matrix_T matrix (n+1); + + // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't + // allow for allocation on declaration of 2.nd dimension of vec of vec + + for (int i = 0; i <= n; i++) { + matrix[i].resize(m+1); + } + + // Step 2 + + for (int i = 0; i <= n; i++) { + matrix[i][0]=i; + } + + for (int j = 0; j <= m; j++) { + matrix[0][j]=j; + } + + // Step 3 + + for (int i = 1; i <= n; i++) { + + const char s_i = source[i-1]; + + // Step 4 + + for (int j = 1; j <= m; j++) { + + const char t_j = target[j-1]; + + // Step 5 + + int cost; + if (s_i == t_j) { + cost = 0; + } + else { + cost = 1; + } + + // Step 6 + + const int above = matrix[i-1][j]; + const int left = matrix[i][j-1]; + const int diag = matrix[i-1][j-1]; + int cell = std::min ( above + 1, std::min (left + 1, diag + cost)); + + // Step 6A: Cover transposition, in addition to deletion, + // insertion and substitution. This step is taken from: + // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's + // Enhanced Dynamic Programming ASM Algorithm" + // (http://www.acm.org/~hlb/publications/asm/asm.html) + + if (i>2 && j>2) { + int trans = matrix[i-2][j-2] + 1; + if (source[i-2] != t_j) { + trans++; + } + if (s_i != target[j-2]) { + trans++; + } + if (cell > trans) { + cell = trans; + } + } + + matrix[i][j] = cell; + } + } + + // Step 7 + + return matrix[n][m]; +} + + +// /////////// M A I N //////////////// +int main (int argc, char* argv[]) { + + const std::string lLax1Str = "los angeles"; + const std::string lLax2Str = "lso angeles"; + const std::string lRio1Str = "rio de janeiro"; + const std::string lRio2Str = "rio de janero"; + const std::string lRek1Str = "reikjavik"; + const std::string lRek2Str = "rekyavik"; + const std::string lSfoRio1Str = "san francisco rio de janeiro"; + const std::string lSfoRio2Str = "san francicso rio de janero"; + + std::cout << "Distance between '" << lLax1Str + << "' and '" << lLax2Str << "' is: " + << getLevenshteinDistance (lLax1Str, lLax2Str) << std::endl; + + std::cout << "Distance between '" << lRio1Str + << "' and '" << lRio2Str << "' is: " + << getLevenshteinDistance (lRio1Str, lRio2Str) << std::endl; + + std::cout << "Distance between '" << lRek1Str + << "' and '" << lRek2Str << "' is: " + << getLevenshteinDistance (lRek1Str, lRek2Str) << std::endl; + + std::cout << "Distance between '" << lSfoRio1Str + << "' and '" << lSfoRio2Str << "' is: " + << getLevenshteinDistance (lSfoRio1Str, lSfoRio2Str) << std::endl; + + return 0; +} Added: trunk/opentrep/test/parsers/parameter_parser.cpp =================================================================== --- trunk/opentrep/test/parsers/parameter_parser.cpp (rev 0) +++ trunk/opentrep/test/parsers/parameter_parser.cpp 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,216 @@ +/*============================================================================= + Copyright (c) 2001-2003 Hartmut Kaiser + http://spirit.sourceforge.net/ + + Use, modification and distribution is subject to the Boost Software + License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) + =============================================================================*/ +/////////////////////////////////////////////////////////////////////////////// +// This sample show the usage of parser parameters. +// +// Parser parameters are used to pass some values from the outer parsing scope +// to the next inner scope. They can be imagined as the opposite to the return +// value paradigm, which returns some value from the inner to the next outer +// scope. See the "Closures" chapter in the User's Guide. + +#include <string> +#include <iostream> +#include <cassert> + +#if defined(_MSC_VER) /*&& !defined(__COMO__)*/ +#pragma warning(disable: 4244) +#pragma warning(disable: 4355) +#endif // defined(_MSC_VER) && !defined(__COMO__) + +#include <boost/spirit/core.hpp> +#include <boost/spirit/symbols/symbols.hpp> + +#include <boost/spirit/phoenix/tuples.hpp> +#include <boost/spirit/phoenix/tuple_helpers.hpp> +#include <boost/spirit/phoenix/primitives.hpp> +#include <boost/spirit/attribute/closure.hpp> + +/////////////////////////////////////////////////////////////////////////////// +// used namespaces +using namespace boost::spirit; +using namespace phoenix; +using namespace std; + +/////////////////////////////////////////////////////////////////////////////// +// Helper class for encapsulation of the type for the parsed variable names +class declaration_type +{ +public: + enum vartype { + vartype_unknown = 0, // unknown variable type + vartype_int = 1, // 'int' + vartype_real = 2 // 'real' + }; + + declaration_type() : type(vartype_unknown) + { + } + template <typename ItT> + declaration_type(ItT const &first, ItT const &last) + { + init(string(first, last-first-1)); + } + declaration_type(declaration_type const &type_) : type(type_.type) + { + } + declaration_type(string const &type_) : type(vartype_unknown) + { + init(type_); + } + + // access to the variable type + operator vartype const &() const { return type; } + operator string () + { + switch(type) { + default: + case vartype_unknown: break; + case vartype_int: return string("int"); + case vartype_real: return string("real"); + } + return string ("unknown"); + } + + void swap(declaration_type &s) { std::swap(type, s.type); } + +protected: + void init (string const &type_) + { + if (type_ == "int") + type = vartype_int; + else if (type_ == "real") + type = vartype_real; + else + type = vartype_unknown; + } + +private: + vartype type; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// used closure type +// +/////////////////////////////////////////////////////////////////////////////// +struct var_decl_closure : boost::spirit::closure<var_decl_closure, declaration_type> +{ + member1 val; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// symbols_with_data +// +// Helper class for inserting an item with data into a symbol table +// +/////////////////////////////////////////////////////////////////////////////// +template <typename T, typename InitT> +class symbols_with_data +{ +public: + typedef + symbol_inserter<T, boost::spirit::impl::tst<T, char> > + symbol_inserter_t; + + symbols_with_data(symbol_inserter_t const &add_, InitT const &data_) : + add(add_), data(as_actor<InitT>::convert(data_)) + { + } + + template <typename IteratorT> + symbol_inserter_t const & + operator()(IteratorT const &first_, IteratorT const &last) const + { + IteratorT first = first_; + return add(first, last, data()); + } + +private: + symbol_inserter_t const &add; + typename as_actor<InitT>::type data; +}; + +template <typename T, typename CharT, typename InitT> +inline +symbols_with_data<T, InitT> +symbols_gen(symbol_inserter<T, boost::spirit::impl::tst<T, CharT> > const &add_, + InitT const &data_) +{ + return symbols_with_data<T, InitT>(add_, data_); +} + +/////////////////////////////////////////////////////////////////////////////// +// The var_decl_list grammar parses variable declaration list + +struct var_decl_list : + public grammar<var_decl_list, var_decl_closure::context_t> +{ + template <typename ScannerT> + struct definition + { + definition(var_decl_list const &self) + { + // pass variable type returned from 'type' to list closure member 0 + decl = type[self.val = arg1] >> +space_p >> list(self.val); + + // m0 to access arg 0 of list --> passing variable type down to ident + list = ident(list.val) >> *(',' >> ident(list.val)); + + // store identifier and type into the symbol table + ident = (*alnum_p)[symbols_gen(symtab.add, ident.val)]; + + // the type of the decl is returned in type's closure member 0 + type = + str_p("int")[type.val = construct_<string>(arg1, arg2)] + | str_p("real")[type.val = construct_<string>(arg1, arg2)] + ; + + BOOST_SPIRIT_DEBUG_RULE(decl); + BOOST_SPIRIT_DEBUG_RULE(list); + BOOST_SPIRIT_DEBUG_RULE(ident); + BOOST_SPIRIT_DEBUG_RULE(type); + } + + rule<ScannerT> const& + start() const { return decl; } + + private: + typedef rule<ScannerT, var_decl_closure::context_t> rule_t; + rule_t type; + rule_t list; + rule_t ident; + symbols<declaration_type> symtab; + + rule<ScannerT> decl; // start rule + }; +}; + +/////////////////////////////////////////////////////////////////////////////// +// main entry point +int main() +{ + var_decl_list decl; + declaration_type type; + char const *pbegin = "int var1"; + + if (parse (pbegin, decl[assign(type)]).full) { + cout << endl + << "Parsed variable declarations successfully!" << endl + << "Detected type: " << declaration_type::vartype(type) + << " (" << string(type) << ")" + << endl; + } else { + cout << endl + << "Parsing the input stream failed!" + << endl; + } + return 0; +} + Added: trunk/opentrep/test/parsers/schedule_parser.cpp =================================================================== --- trunk/opentrep/test/parsers/schedule_parser.cpp (rev 0) +++ trunk/opentrep/test/parsers/schedule_parser.cpp 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,911 @@ +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <string> +#include <map> +#include <set> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +// Boost Spirit (Parsing) +//#define BOOST_SPIRIT_DEBUG +#include <boost/spirit/core.hpp> +#include <boost/spirit/attribute.hpp> +#include <boost/spirit/utility/functor_parser.hpp> +#include <boost/spirit/utility/loops.hpp> +#include <boost/spirit/utility/chset.hpp> +#include <boost/spirit/utility/confix.hpp> +#include <boost/spirit/iterator/file_iterator.hpp> +#include <boost/spirit/actor/push_back_actor.hpp> +#include <boost/spirit/actor/assign_actor.hpp> + +// Type definitions +typedef char char_t; +//typedef char const* iterator_t; +typedef boost::spirit::file_iterator<char_t> iterator_t; +typedef boost::spirit::scanner<iterator_t> scanner_t; +typedef boost::spirit::rule<scanner_t> rule_t; + +/** LegCabin-Details. */ +struct Cabin_T { + // Attributes + std::string _cabinCode; + double _capacity; + + void display() const { + std::cout << " " << _cabinCode << " " << _capacity << ", "; + } +}; + +/** List of Cabin-Detail strucutres. */ +typedef std::vector<Cabin_T> CabinList_T; + +/** Leg. */ +struct Leg_T { + // Attributes + std::string _boardPoint; + boost::posix_time::time_duration _boardTime; + boost::gregorian::date_duration _boardDateOffSet; + std::string _offPoint; + boost::posix_time::time_duration _offTime; + boost::gregorian::date_duration _offDateOffSet; + boost::posix_time::time_duration _elapsed; + CabinList_T _cabinList; + + /** Constructor. */ + Leg_T () : _boardDateOffSet (0), _offDateOffSet (0) {} + + void display() const { + std::cout << " " << _boardPoint << " / " + << boost::posix_time::to_simple_string (_boardTime) + << " -- " << _offPoint << " / " + << boost::posix_time::to_simple_string (_offTime) + << " --> " << boost::posix_time::to_simple_string (_elapsed) + << std::endl; + for (CabinList_T::const_iterator itCabin = _cabinList.begin(); + itCabin != _cabinList.end(); itCabin++) { + const Cabin_T& lCabin = *itCabin; + lCabin.display(); + } + std::cout << std::endl; + } +}; + +/** List of Leg strucutres. */ +typedef std::vector<Leg_T> LegList_T; + +/** SegmentCabin-Details. */ +struct SegmentCabin_T { + // Attributes + std::string _cabinCode; + std::string _classes; + + void display() const { + std::cout << " " << _cabinCode << " " << _classes << ", "; + } +}; + +/** List of SegmentCabin-Detail strucutres. */ +typedef std::vector<SegmentCabin_T> SegmentCabinList_T; + +/** Segment. */ +struct Segment_T { + // Attributes + std::string _boardPoint; + boost::posix_time::time_duration _boardTime; + boost::gregorian::date_duration _boardDateOffSet; + std::string _offPoint; + boost::posix_time::time_duration _offTime; + boost::gregorian::date_duration _offDateOffSet; + boost::posix_time::time_duration _elapsed; + SegmentCabinList_T _cabinList; + + /** Constructor. */ + Segment_T () : _boardDateOffSet (0), _offDateOffSet (0) {} + + void display() const { + std::cout << " " << _boardPoint << " / " + << boost::posix_time::to_simple_string (_boardTime) + << " -- " << _offPoint << " / " + << boost::posix_time::to_simple_string (_offTime) + << " --> " << boost::posix_time::to_simple_string (_elapsed) + << std::endl; + for (SegmentCabinList_T::const_iterator itCabin = _cabinList.begin(); + itCabin != _cabinList.end(); itCabin++) { + const SegmentCabin_T& lCabin = *itCabin; + lCabin.display(); + } + std::cout << std::endl; + } +}; + +/** List of Segment strucutres. */ +typedef std::vector<Segment_T> SegmentList_T; + +/** Flight-Period. */ +struct FlightPeriod_T { + // Attributes + std::string _airlineCode; + unsigned int _flightNumber; + boost::gregorian::date _dateRangeStart; + boost::gregorian::date _dateRangeEnd; + std::string _dow; + LegList_T _legList; + SegmentList_T _segmentList; + + /** Constructor. */ + FlightPeriod_T () : _legAlreadyDefined (false), _itSeconds (0) {} + + /** Set the date from the staging details. */ + boost::gregorian::date getDate() const { + return boost::gregorian::date (_itYear, _itMonth, _itDay); + } + + /** Set the time from the staging details. */ + boost::posix_time::time_duration getTime() const { + return boost::posix_time::hours (_itHours) + + boost::posix_time::minutes (_itMinutes) + + boost::posix_time::seconds (_itSeconds); + } + + void display() const { + std::cout << _airlineCode << _flightNumber + << ", " << boost::gregorian::to_simple_string (_dateRangeStart) + << " - " << boost::gregorian::to_simple_string (_dateRangeEnd) + << " - " << _dow + << std::endl; + + for (LegList_T::const_iterator itLeg = _legList.begin(); + itLeg != _legList.end(); itLeg++) { + const Leg_T& lLeg = *itLeg; + lLeg.display(); + } + + for (SegmentList_T::const_iterator itSegment = _segmentList.begin(); + itSegment != _segmentList.end(); itSegment++) { + const Segment_T& lSegment = *itSegment; + lSegment.display(); + } + + //std::cout << "[Debug] - Staging Leg: "; + //_itLeg.display(); + //std::cout << "[Debug] - Staging Cabin: "; + //_itCabin.display(); + //std::cout << "[Debug] - Staging Segment: "; + //_itSegment.display(); + } + + /** Add the given airport to the internal lists (if not already existing). */ + void addAirport (const std::string& iAirport) { + std::set<std::string>::const_iterator itAirport = + _airportList.find (iAirport); + if (itAirport == _airportList.end()) { + // Add the airport code to the airport set + const bool insertSuccessful = _airportList.insert (iAirport).second; + + if (insertSuccessful == false) { + // TODO: throw an exception + } + // Add the airport code to the airport vector + _airportOrderedList.push_back (iAirport); + } + } + + /** Build the routing (segments). */ + void buildSegments () { + // The list of airports encompasses all the airports on which + // the flight takes off or lands. Moreover, that list is + // time-ordered: the first airport is the initial departure of + // the flight, and the last airport is the eventual point of + // rest of the flight. + // Be l the size of the ordered list of airports. + // We want to generate all the segment combinations from the legs + // and, hence, from all the possible (time-ordered) airport pairs. + // Thus, we both iterator on i=0...l-1 and j=i+1...l + assert (_airportOrderedList.size() >= 2); + + _segmentList.clear(); + for (std::vector<std::string>::const_iterator itAirport_i = + _airportOrderedList.begin(); + itAirport_i != _airportOrderedList.end()-1; ++itAirport_i) { + for (std::vector<std::string>::const_iterator itAirport_j = + itAirport_i + 1; + itAirport_j != _airportOrderedList.end(); ++itAirport_j) { + Segment_T lSegment; + lSegment._boardPoint = *itAirport_i; + lSegment._offPoint = *itAirport_j; + + _segmentList.push_back (lSegment); + } + } + + // Clear the lists of airports, so that it is ready for the next flight + _airportList.clear(); + _airportOrderedList.clear(); + } + + /** Add, to the Segment whose key corresponds to the + given (board point, off point) pair, the specific segment cabin + details (mainly, the list of the class codes). + <br>Note that the Segment structure is retrieved from the internal + list, already filled by a previous step (the buildSegments() + method). */ + void addSegmentCabin (const Segment_T& iSegment, + const SegmentCabin_T& iCabin) { + // Retrieve the Segment structure corresponding to the (board, off) point + // pair. + SegmentList_T::iterator itSegment = _segmentList.begin(); + for ( ; itSegment != _segmentList.end(); ++itSegment) { + const Segment_T& lSegment = *itSegment; + + const std::string& lBoardPoint = iSegment._boardPoint; + const std::string& lOffPoint = iSegment._offPoint; + if (lSegment._boardPoint == lBoardPoint + && lSegment._offPoint == lOffPoint) { + break; + } + } + + // If the segment key (airport pair) given in the schedule input file + // does not correspond to the leg (board, off) points, throw an exception + // so that the user knows the schedule input file is corrupted. + if (itSegment == _segmentList.end()) { + std::cerr << "Within the schedule input file, there is a flight for which the airports of segments and those of the legs do not correspond."; + throw std::exception(); + } + + // Add the Cabin structure to the Segment Cabin structure. + assert (itSegment != _segmentList.end()); + Segment_T& lSegment = *itSegment; + lSegment._cabinList.push_back (iCabin); + } + + /** Add, to all the Segment, the general segment cabin details + (mainly, the list of the class codes). + <br>Note that the Segment structures are stored within the internal + list, already filled by a previous step (the buildSegments() + method). */ + void addSegmentCabin (const SegmentCabin_T& iCabin) { + // Iterate on all the Segment (as they get the same cabin definitions) + for (SegmentList_T::iterator itSegment = _segmentList.begin(); + itSegment != _segmentList.end(); ++itSegment) { + Segment_T& lSegment = *itSegment; + lSegment._cabinList.push_back (iCabin); + } + } + + /** Staging Leg (resp. Cabin) structure, gathering the result of the iteration + on one leg (resp. cabin). */ + bool _legAlreadyDefined; + Leg_T _itLeg; + Cabin_T _itCabin; + + /** Staging Date. */ + unsigned int _itYear; + unsigned int _itMonth; + unsigned int _itDay; + + /** Staging Time. */ + long _itHours; + long _itMinutes; + long _itSeconds; + int _dateOffSet; + + /** Staging Airport List (helper to derive the list of Segment + structures). */ + std::set<std::string> _airportList; + std::vector<std::string> _airportOrderedList; + + /** Staging Segment-related attributes. */ + bool _areSegmentDefinitionsSpecific; + Segment_T _itSegment; + SegmentCabin_T _itSegmentCabin; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// Semantic actions +// +/////////////////////////////////////////////////////////////////////////////// +namespace { + + /** Store the parsed airline code. */ + struct store_airline_code { + store_airline_code (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lAirlineCode (iStr, iStrEnd); + _flightPeriod._airlineCode = lAirlineCode; + // std::cout << "Airline code: " << lAirlineCode << std::endl; + + // As that's the beginning of a new flight, the list of legs must be reset + _flightPeriod._legList.clear(); + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the parsed flight number. */ + struct store_flight_number { + store_flight_number (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (unsigned int iNumber) const { + _flightPeriod._flightNumber = iNumber; + // std::cout << "Flight number: " << iNumber << std::endl; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the start of the date range. */ + struct store_date_range_start { + store_date_range_start (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + _flightPeriod._dateRangeStart = _flightPeriod.getDate(); + // std::cout << "Date Range Start: " + // << _flightPeriod._dateRangeStart << std::endl; + + // Reset the number of seconds + _flightPeriod._itSeconds = 0; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the end of the date range. */ + struct store_date_range_end { + store_date_range_end (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + _flightPeriod._dateRangeEnd = _flightPeriod.getDate(); + // std::cout << "Date Range End: " + // << _flightPeriod._dateRangeEnd << std::endl; + + // Reset the number of seconds + _flightPeriod._itSeconds = 0; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the DOW (day of the Week). */ + struct store_dow { + store_dow (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lDow (iStr, iStrEnd); + _flightPeriod._dow = lDow; + // std::cout << "DOW: " << lDow << std::endl; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the parsed board point. */ + struct store_board_point { + store_board_point (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lBoardPoint (iStr, iStrEnd); + // std::cout << "Board point: " << lBoardPoint << std::endl; + + // If a leg has already been parsed, add it to the FlightPeriod + if (_flightPeriod._legAlreadyDefined == true) { + _flightPeriod._legList.push_back (_flightPeriod._itLeg); + } else { + _flightPeriod._legAlreadyDefined = true; + } + + // Set the (new) board point + _flightPeriod._itLeg._boardPoint = lBoardPoint; + + // As that's the beginning of a new leg, the list of cabins must be reset + _flightPeriod._itLeg._cabinList.clear(); + + // Add the airport code if it is not already stored in the airport lists + _flightPeriod.addAirport (lBoardPoint); + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the parsed off point. */ + struct store_off_point { + store_off_point (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lOffPoint (iStr, iStrEnd); + _flightPeriod._itLeg._offPoint = lOffPoint; + // std::cout << "Off point: " << lOffPoint << std::endl; + + // Add the airport code if it is not already stored in the airport lists + _flightPeriod.addAirport (lOffPoint); + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the board time. */ + struct store_board_time { + store_board_time (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + _flightPeriod._itLeg._boardTime = _flightPeriod.getTime(); + + // Reset the number of seconds + _flightPeriod._itSeconds = 0; + + // Reset the date off-set + _flightPeriod._dateOffSet = 0; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the off time. */ + struct store_off_time { + store_off_time (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + _flightPeriod._itLeg._offTime = _flightPeriod.getTime(); + + // Reset the number of seconds + _flightPeriod._itSeconds = 0; + + // As the board date off set is optional, it can be set only afterwards, + // based on the staging date off-set value (_flightPeriod._dateOffSet). + const boost::gregorian::date_duration lDateOffSet (_flightPeriod._dateOffSet); + _flightPeriod._itLeg._boardDateOffSet = lDateOffSet; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the elapsed time. */ + struct store_elapsed_time { + store_elapsed_time (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + _flightPeriod._itLeg._elapsed = _flightPeriod.getTime(); + + // Reset the number of seconds + _flightPeriod._itSeconds = 0; + + // As the board date off set is optional, it can be set only afterwards, + // based on the staging date off-set value (_flightPeriod._dateOffSet). + const boost::gregorian::date_duration lDateOffSet (_flightPeriod._dateOffSet); + _flightPeriod._itLeg._offDateOffSet = lDateOffSet; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the parsed cabin code. */ + struct store_cabin_code { + store_cabin_code (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (char iChar) const { + _flightPeriod._itCabin._cabinCode = iChar; + // std::cout << "Cabin code: " << iChar << std::endl; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the parsed capacity. */ + struct store_capacity { + store_capacity (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + void operator() (double iReal) const { + _flightPeriod._itCabin._capacity = iReal; + // std::cout << "Capacity: " << iReal << std::endl; + + // The capacity is the last (according to arrival order) detail + // of the cabin. Hence, when a capacity is parsed, it means that + // the full cabin details have already been parsed as well: the + // cabin can thus be added to the leg. + _flightPeriod._itLeg._cabinList.push_back (_flightPeriod._itCabin); + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store whether or not all the segments are the same. */ + struct store_segment_specificity { + store_segment_specificity (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) { + } + + void operator() (char iChar) const { + if (iChar == '0') { + _flightPeriod._areSegmentDefinitionsSpecific = false; + } else { + _flightPeriod._areSegmentDefinitionsSpecific = true; + } + + // Do a few sanity checks: the two lists should get exactly the same + // content (in terms of airport codes). The only difference is that one + // is a STL set, and the other a STL vector. + assert (_flightPeriod._airportList.size() + == _flightPeriod._airportOrderedList.size()); + assert (_flightPeriod._airportList.size() >= 2); + + // Since all the legs have now been parsed, we get all the airports + // and the segments may be built. + _flightPeriod.buildSegments(); + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the board point of the segment. */ + struct store_segment_board_point { + store_segment_board_point (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) { + } + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lBoardPoint (iStr, iStrEnd); + _flightPeriod._itSegment._boardPoint = lBoardPoint; + // std::cout << "Board point: " << lBoardPoint << std::endl; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the off point of the segment. */ + struct store_segment_off_point { + store_segment_off_point (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) { + } + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lOffPoint (iStr, iStrEnd); + _flightPeriod._itSegment._offPoint = lOffPoint; + // std::cout << "Off point: " << lOffPoint << std::endl; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the off point of the segment. */ + struct store_segment_cabin_code { + store_segment_cabin_code (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) { + } + + void operator() (char iChar) const { + _flightPeriod._itSegmentCabin._cabinCode = iChar; + // std::cout << "Cabin code: " << iChar << std::endl; + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Store the classes of the segment-cabin. */ + struct store_classes { + store_classes (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) { + } + + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + std::string lClasses (iStr, iStrEnd); + _flightPeriod._itSegmentCabin._classes = lClasses; + // std::cout << "Classes: " << lClasses << std::endl; + + // The list of classes is the last (according to the arrival order + // within the schedule input file) detail of the segment cabin. Hence, + // when a list of classes is parsed, it means that the full segment + // cabin details have already been parsed as well: the segment cabin + // can thus be added to the segment. + if (_flightPeriod._areSegmentDefinitionsSpecific == true) { + _flightPeriod.addSegmentCabin (_flightPeriod._itSegment, + _flightPeriod._itSegmentCabin); + } else { + _flightPeriod.addSegmentCabin (_flightPeriod._itSegmentCabin); + } + } + + FlightPeriod_T& _flightPeriod; + }; + + /** Mark the end of the flight-period parsing. */ + struct do_end_flight { + do_end_flight (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) {} + + // void operator() (char iChar) const { + void operator() (iterator_t iStr, iterator_t iStrEnd) const { + // std::cout << "End of Flight-Period " << std::endl; + + assert (_flightPeriod._legAlreadyDefined == true); + _flightPeriod._legList.push_back (_flightPeriod._itLeg); + + // The lists of legs and cabins must be reset + _flightPeriod._legAlreadyDefined = false; + _flightPeriod._itLeg._cabinList.clear(); + + // Display the result + _flightPeriod.display(); + } + + FlightPeriod_T& _flightPeriod; + }; +} + +// /////////// Utilities ///////////// +/** 1-digit-integer parser */ +boost::spirit::int_parser<unsigned int, 10, 1, 1> int1_p; +/** 2-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 2, 2> uint2_p; +/** 4-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 4, 4> uint4_p; +/** Up-to-4-digit-integer parser */ +boost::spirit::uint_parser<int, 10, 1, 4> uint1_4_p; + +/////////////////////////////////////////////////////////////////////////////// +// +// Our calculator grammar (using subrules) +// +/////////////////////////////////////////////////////////////////////////////// + /** + AirlineCode; FlightNumber; DateRangeStart; DateRangeEnd; DOW; + (list) BoardPoint; OffPoint; BoardTime; DateOffSet; OffTime; + ElapsedTime; + (list) CabinCode; Capacity; + SegmentSpecificty (0 or 1); + (list) (optional BoardPoint; OffPoint); CabinCode; Classes + + BA; 9; 2007-04-20; 2007-06-30; 0000011; + LHR; BKK; 22:00; 15:15 / +1; 11:15; F; 5; J; 12; W; 20; Y; 300; + BKK; SYD; 18:10 / +1; 06:05 / +2; 08:55; F; 5; J; 12; W; 20; Y; 300; + 0; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; + BA; 9; 2007-04-20; 2007-06-30; 1111100; + LHR; BKK; 22:00; 15:15 / +1; 11:15; F; 5; J; 12; W; 20; Y; 300; + BKK; SYD; 18:10 / +1; 06:05 / +2; 08:55; F; 5; J; 12; W; 20; Y; 300; + 1; LHR; BKK; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; + BKK; SYD; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; + LHR; SYD; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; + + Grammar: + DOW ::= int + FlightKey ::= AirlineCode ';' FlightNumber + ';' DateRangeStart ';' DateRangeEnd ';' DOW + LegKey ::= BoardPoint ';' OffPoint + LegDetails ::= BoardTime ['/' BoardDateOffSet] + ';' OffTime ['/' BoardDateOffSet] + ';' Elapsed + LegCabinDetails ::= CabinCode ';' Capacity + Leg ::= LegKey ';' LegDetails (';' CabinDetails)+ + SegmentKey ::= BoardPoint ';' OffPoint + SegmentCabinDetails ::= CabinCode ';' Classes + FullSegmentCabinDetails::= (';' SegmentCabinDetails)+ + GeneralSegments ::= '0' (';' SegmentCabinDetails)+ + SpecificSegments ::= '1' (';' SegmentKey + ';' FullSegmentCabinDetails)+ + Segment ::= GeneralSegment | SpecificSegment + FlightPeriod ::= FlightKey (';' Leg)+ + (';' Segment)+ ';' EndOfFlight + EndOfFlight ::= ';' + */ + +using namespace boost::spirit; + +/** Grammar for the Flight-Period parser. */ +struct FlightPeriodParser : + public boost::spirit::grammar<FlightPeriodParser> { + + FlightPeriodParser (FlightPeriod_T& ioFlightPeriod) + : _flightPeriod (ioFlightPeriod) { + } + + template <typename ScannerT> + struct definition { + definition (FlightPeriodParser const& self) { + + flight_period_list = *( boost::spirit::comment_p("//") + | boost::spirit::comment_p("/*", "*/") + | flight_period ) + ; + + flight_period = flight_key + >> +( ';' >> leg ) + >> +( ';' >> segment ) + >> flight_period_end[do_end_flight(self._flightPeriod)] + ; + + flight_period_end = + boost::spirit::ch_p(';') + ; + + flight_key = airline_code + >> ';' >> flight_number + >> ';' >> date[store_date_range_start(self._flightPeriod)] + >> ';' >> date[store_date_range_end(self._flightPeriod)] + >> ';' >> dow[store_dow(self._flightPeriod)] + ; + + airline_code = + lexeme_d[ (repeat_p(2,3)[chset_p("0-9A-Z")])[store_airline_code(self._flightPeriod)] ] + ; + + flight_number = + lexeme_d[ limit_d(0u, 9999u)[uint1_4_p][store_flight_number(self._flightPeriod)] ] + ; + + date = + lexeme_d[ limit_d(2000u,2099u)[uint4_p][assign_a(self._flightPeriod._itYear)] + >> '-' >> limit_d(1u,12u)[uint2_p][assign_a(self._flightPeriod._itMonth)] + >> '-' >> limit_d(1u,31u)[uint2_p][assign_a(self._flightPeriod._itDay)] ] + ; + + dow = + lexeme_d[ repeat_p(7)[chset_p("0-1")] ] + ; + + leg = leg_key >> ';' >> leg_details >> +( ';' >> cabin_details ) + ; + + leg_key = + (repeat_p(3)[chset_p("0-9A-Z")])[store_board_point(self._flightPeriod)] + >> ';' + >> (repeat_p(3)[chset_p("0-9A-Z")])[store_off_point(self._flightPeriod)] + ; + + leg_details = + time[store_board_time(self._flightPeriod)] + >> !(date_offset) + >> ';' + >> time[store_off_time(self._flightPeriod)] + >> !(date_offset) + >> ';' + >> time[store_elapsed_time(self._flightPeriod)] + ; + + time = lexeme_d[ limit_d(0u,23u)[uint2_p][assign_a(self._flightPeriod._itHours)] + >> ':' >> limit_d(0u,59u)[uint2_p][assign_a(self._flightPeriod._itMinutes)] + >> !(':' >> limit_d(0u,59u)[uint2_p][assign_a(self._flightPeriod._itSeconds)]) ] + ; + + date_offset = + boost::spirit::ch_p('/') + >> (int1_p)[boost::spirit::assign_a(self._flightPeriod._dateOffSet)] + ; + + cabin_details = (chset_p("A-Z"))[store_cabin_code(self._flightPeriod)] + >> ';' >> (boost::spirit::ureal_p)[store_capacity(self._flightPeriod)] + ; + + segment_key = + (repeat_p(3)[chset_p("0-9A-Z")])[store_segment_board_point(self._flightPeriod)] + >> ';' + >> (repeat_p(3)[chset_p("0-9A-Z")])[store_segment_off_point(self._flightPeriod)] + ; + + segment = + general_segments | specific_segments + ; + + general_segments = + boost::spirit::ch_p('0')[store_segment_specificity(self._flightPeriod)] + >> +(';' >> segment_cabin_details) + ; + + specific_segments = + boost::spirit::ch_p('1')[store_segment_specificity(self._flightPeriod)] + >> +(';' >> segment_key >> full_segment_cabin_details) + ; + + full_segment_cabin_details = + +(';' >> segment_cabin_details) + ; + + segment_cabin_details = + (chset_p("A-Z"))[store_segment_cabin_code(self._flightPeriod)] + >> ';' >> (repeat_p(1,26)[chset_p("A-Z")])[store_classes(self._flightPeriod)] + ; + + BOOST_SPIRIT_DEBUG_NODE (flight_period_list); + BOOST_SPIRIT_DEBUG_NODE (flight_period); + BOOST_SPIRIT_DEBUG_NODE (flight_period_end); + BOOST_SPIRIT_DEBUG_NODE (flight_key); + BOOST_SPIRIT_DEBUG_NODE (airline_code); + BOOST_SPIRIT_DEBUG_NODE (flight_number); + BOOST_SPIRIT_DEBUG_NODE (date); + BOOST_SPIRIT_DEBUG_NODE (dow); + BOOST_SPIRIT_DEBUG_NODE (leg); + BOOST_SPIRIT_DEBUG_NODE (leg_key); + BOOST_SPIRIT_DEBUG_NODE (leg_details); + BOOST_SPIRIT_DEBUG_NODE (time); + BOOST_SPIRIT_DEBUG_NODE (date_offset); + BOOST_SPIRIT_DEBUG_NODE (cabin_details); + BOOST_SPIRIT_DEBUG_NODE (segment); + BOOST_SPIRIT_DEBUG_NODE (segment_key); + BOOST_SPIRIT_DEBUG_NODE (general_segments); + BOOST_SPIRIT_DEBUG_NODE (specific_segments); + BOOST_SPIRIT_DEBUG_NODE (full_segment_cabin_details); + BOOST_SPIRIT_DEBUG_NODE (segment_cabin_details); + } + + boost::spirit::rule<ScannerT> flight_period_list, flight_period, + flight_period_end, flight_key, airline_code, flight_number, + date, dow, leg, leg_key, leg_details, time, date_offset, cabin_details, + segment, segment_key, general_segments, specific_segments, + full_segment_cabin_details, segment_cabin_details; + + boost::spirit::rule<ScannerT> const& start() const { return flight_period_list; } + }; + + FlightPeriod_T& _flightPeriod; +}; + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // File to be parsed + std::string lFilename ("world_schedule.csv"); + + // Read the command-line parameters + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lFilename; + } + + // Open the file + iterator_t lFileIterator (lFilename); + if (!lFileIterator) { + std::cerr << "The file " << lFilename << " can not be open." << std::endl; + } + + // Create an EOF iterator + iterator_t lFileIteratorEnd = lFileIterator.make_end(); + + // Instantiate the structure that will hold the result of the parsing. + FlightPeriod_T lFlightPeriod; + FlightPeriodParser lFlightPeriodParser (lFlightPeriod); + boost::spirit::parse_info<iterator_t> info = + boost::spirit::parse (lFileIterator, lFileIteratorEnd, + lFlightPeriodParser, + boost::spirit::space_p); + + // DEBUG + std::cout << "Flight Period:" << std::endl; + lFlightPeriod.display(); + + std::cout << "-------------------------" << std::endl; + if (info.full) { + std::cout << "Parsing succeeded" << std::endl; + + } else { + std::cout << "Parsing failed" << std::endl; + } + std::cout << "-------------------------" << std::endl; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Added: trunk/opentrep/test/parsers/test_full_calculator.sh =================================================================== --- trunk/opentrep/test/parsers/test_full_calculator.sh (rev 0) +++ trunk/opentrep/test/parsers/test_full_calculator.sh 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,25 @@ +#!/bin/sh + +INSTALL_DIR=`grep "^prefix =" ../Makefile | cut -d"=" -d" " -f3` +TST_PROG=./full_calculator +LATUS_API_VERSION=`grep "^LATUS_API_VERSION =" ../Makefile | cut -d"=" -d" " -f3` +LATUS_LIBRARY_NAME=`grep "^LATUS_LIBRARY_NAME =" ../Makefile | cut -d"=" -d" " -f3` +LATUS_LIB=lib${LATUS_LIBRARY_NAME}-${LATUS_API_VERSION}.so + +if [ ! -x ${TST_PROG} ]; +then + echo "The sample program does not seem to have been compiled. Try 'make check' first." + exit -1 +fi + +if [ "$1" = "-h" -o "$1" = "-H" -o "$1" = "--h" -o "$1" = "--help" ]; +then + echo "Usage: $0 [<String to be parsed>]" + echo " The list to be parsed should contain floating point numbers" + echo " separated by commas, and should not contain spaces." + echo " Example: 10.2,5.4" + echo "The program parses a line and fills a flight-period structure." + exit 0 +fi + +${TST_PROG} $1 Property changes on: trunk/opentrep/test/parsers/test_full_calculator.sh ___________________________________________________________________ Added: svn:executable + * Added: trunk/opentrep/test/parsers/test_parameter_parser.sh =================================================================== --- trunk/opentrep/test/parsers/test_parameter_parser.sh (rev 0) +++ trunk/opentrep/test/parsers/test_parameter_parser.sh 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,25 @@ +#!/bin/sh + +INSTALL_DIR=`grep "^prefix =" ../Makefile | cut -d"=" -d" " -f3` +TST_PROG=./parameter_parser +LATUS_API_VERSION=`grep "^LATUS_API_VERSION =" ../Makefile | cut -d"=" -d" " -f3` +LATUS_LIBRARY_NAME=`grep "^LATUS_LIBRARY_NAME =" ../Makefile | cut -d"=" -d" " -f3` +LATUS_LIB=lib${LATUS_LIBRARY_NAME}-${LATUS_API_VERSION}.so + +if [ ! -x ${TST_PROG} ]; +then + echo "The sample program does not seem to have been compiled. Try 'make check' first." + exit -1 +fi + +if [ "$1" = "-h" -o "$1" = "-H" -o "$1" = "--h" -o "$1" = "--help" ]; +then + echo "Usage: $0 [<String to be parsed>]" + echo " The list to be parsed should contain floating point numbers" + echo " separated by commas, and should not contain spaces." + echo " Example: 10.2,5.4" + echo "The program parses a line and fills a flight-period structure." + exit 0 +fi + +${TST_PROG} $1 Property changes on: trunk/opentrep/test/parsers/test_parameter_parser.sh ___________________________________________________________________ Added: svn:executable + * Added: trunk/opentrep/test/parsers/test_schedule_parser.sh =================================================================== --- trunk/opentrep/test/parsers/test_schedule_parser.sh (rev 0) +++ trunk/opentrep/test/parsers/test_schedule_parser.sh 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,25 @@ +#!/bin/sh + +INSTALL_DIR=`grep "^prefix =" ../Makefile | cut -d"=" -d" " -f3` +TST_PROG=./schedule_parser +LATUS_API_VERSION=`grep "^LATUS_API_VERSION =" ../Makefile | cut -d"=" -d" " -f3` +LATUS_LIBRARY_NAME=`grep "^LATUS_LIBRARY_NAME =" ../Makefile | cut -d"=" -d" " -f3` +LATUS_LIB=lib${LATUS_LIBRARY_NAME}-${LATUS_API_VERSION}.so + +if [ ! -x ${TST_PROG} ]; +then + echo "The sample program does not seem to have been compiled. Try 'make check' first." + exit -1 +fi + +if [ "$1" = "-h" -o "$1" = "-H" -o "$1" = "--h" -o "$1" = "--help" ]; +then + echo "Usage: $0 [<String to be parsed>]" + echo " The list to be parsed should contain floating point numbers" + echo " separated by commas, and should not contain spaces." + echo " Example: 10.2,5.4" + echo "The program parses a line and fills a flight-period structure." + exit 0 +fi + +${TST_PROG} $1 Property changes on: trunk/opentrep/test/parsers/test_schedule_parser.sh ___________________________________________________________________ Added: svn:executable + * Added: trunk/opentrep/test/parsers/world_schedule.csv =================================================================== --- trunk/opentrep/test/parsers/world_schedule.csv (rev 0) +++ trunk/opentrep/test/parsers/world_schedule.csv 2009-07-12 13:36:35 UTC (rev 120) @@ -0,0 +1,21 @@ +// Flights: AirlineCode; FlightNumber; Date-Range; ; DOW; Legs; Segments; +// Legs: BoardPoint; OffPoint; BoardTime; ArrivalDateOffSet; ArrivalTime; +// ElapsedTime; LegCabins; +// LegCabins: CabinCode; Capacity; +// Segments: Specific; +BA; 9; 2007-04-20; 2007-06-30; 0000011; LHR; BKK; 22:00; 15:15 / +1; 11:15; F; 5; J; 12; W; 20; Y; 300; BKK; SYD; 18:10 / +1; 06:05 / +2; 08:55; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; +BA; 9; 2007-04-20; 2007-06-30; 1111100; LHR; BKK; 22:00; 15:15 / +1; 11:15; F; 5; J; 12; W; 20; Y; 300; BKK; SYD; 18:10 / +1; 06:05 / +2; 08:55; F; 5; J; 12; W; 20; Y; 300; 1; LHR; BKK; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; BKK; SYD; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; LHR; SYD; F; FA; J; JCDI; W; WT; Y; YBHKMLSQ; +BA; 117; 2007-04-20; 2007-06-30; 1111111; LHR; JFK; 08:20; 11:00; 07:40; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKM; +BA; 175; 2007-04-20; 2007-06-30; 1111111; LHR; JFK; 10:55; 13:35; 07:40; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKMRL; +BA; 179; 2007-04-20; 2007-06-30; 1111111; LHR; JFK; 18:05; 20:45; 07:40; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKMRVNELSQO; +BA; 207; 2007-04-20; 2007-06-30; 1111111; LHR; MIA; 09:40; 14:25; 09:45; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKMRVNELSQO; +BA; 279; 2007-04-20; 2007-06-30; 1111111; LHR; LAX; 10:05; 13:10; 11:05; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKMRVNELSQO; +BA; 295; 2007-04-20; 2007-06-30; 1111111; LHR; ORD; 11:35; 14:00; 08:25; F; 5; J; 12; W; 20; Y; 300; 0; F; FA; J; JCDI; W; WT; Y; YBHKMRVNELSQO; +BA; 341; 2007-04-20; 2007-06-30; 1111111; NCE; LHR; 08:55; 10:05; 02:10; J; 12; Y; 300; 0; J; JCDI; Y; YBHKMRVNEQLSO; +BA; 343; 2007-04-20; 2007-06-30; 1111111; NCE; LHR; 11:00; 12:15; 02:15; J; 12; Y; 300; 0; J; JCDI; Y; YBHKMRVNEQLSO; +BA; 345; 2007-04-20; 2007-06-30; 1111111; NCE; LHR; 16:20; 17:25; 02:05; J; 12; Y; 300; 0; J; JCDI; Y; YBHKMRVNEQLSO; +BA; 347; 2007-04-20; 2007-06-30; 1111111; NCE; LHR; 13:55; 15:00; 02:05; J; 12; Y; 300; 0; J; JCDI; Y; YBHKMRVNEQLSO; +AA; 101; 2007-04-20; 2007-06-30; 1111111; LHR; JFK; 09:55; 12:50; 07:55; G; 300; 0; G; GHQKLMVSOWN; +AA; 117; 2007-04-20; 2007-06-30; 1111111; JFK; LAX; 14:20; 17:25; 06:05; F; 12; J; 20; Y; 300; 0; F; FA; J; JDI; Y; YBGHQKLMVSOWN; +AA; 181; 2007-04-20; 2007-06-30; 1111111; JFK; LAX; 17:00; 20:00; 06:00; F; 12; J; 20; Y; 300; 0; F; FA; J; JDI; Y; YBHKMLWVGSNOQ; +AA; 585; 2007-04-20; 2007-06-30; 1111111; JFK; MIA; 15:40; 18:50; 03:10; F; 12; Y; 300; 0; F; FAP; Y; YBHKMLWVGSONQ; \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-12 12:59:06
|
Revision: 119 http://opentrep.svn.sourceforge.net/opentrep/?rev=119&view=rev Author: denis_arnaud Date: 2009-07-12 12:59:01 +0000 (Sun, 12 Jul 2009) Log Message: ----------- [Structure] Re-organised a little bit the hierarchy of directories, so as to add core, config and batches. Modified Paths: -------------- trunk/opentrep/config/soci.m4 trunk/opentrep/configure.ac trunk/opentrep/opentrep/Makefile.am trunk/opentrep/opentrep/dbadaptor/Makefile.am trunk/opentrep/opentrep/sources.mk trunk/opentrep/po/POTFILES.in trunk/opentrep/test/testIndexer.sh trunk/opentrep/test/testSearcher.sh Added Paths: ----------- trunk/opentrep/opentrep/batches/ trunk/opentrep/opentrep/batches/Makefile.am trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/batches/sources.mk trunk/opentrep/opentrep/config/ trunk/opentrep/opentrep/config/Makefile.am trunk/opentrep/opentrep/core/ trunk/opentrep/opentrep/core/Makefile.am trunk/opentrep/opentrep/core/sources.mk Removed Paths: ------------- trunk/opentrep/opentrep/indexer.cpp trunk/opentrep/opentrep/searcher.cpp Property Changed: ---------------- trunk/opentrep/config/ trunk/opentrep/opentrep/ Property changes on: trunk/opentrep/config ___________________________________________________________________ Modified: svn:ignore - install-sh missing depcomp mdate-sh texinfo.tex ltmain.sh lib-ld.m4 lib-link.m4 lib-prefix.m4 config.rpath config.sub config.guess mkinstalldirs printf-posix.m4 uintmax_t.m4 signed.m4 iconv.m4 longlong.m4 inttypes.m4 glibc21.m4 codeset.m4 inttypes_h.m4 longdouble.m4 nls.m4 po.m4 intmax.m4 xsize.m4 lcmessage.m4 wint_t.m4 ulonglong.m4 progtest.m4 inttypes-pri.m4 stdint_h.m4 intdiv0.m4 isc-posix.m4 size_max.m4 gettext.m4 wchar_t.m4 + install-sh missing depcomp mdate-sh texinfo.tex ltmain.sh libtool.m4 lt~obsolete.m4 ltsugar.m4 ltversion.m4 ltoptions.m4 lib-ld.m4 lib-link.m4 lib-prefix.m4 config.rpath config.sub config.guess mkinstalldirs printf-posix.m4 uintmax_t.m4 signed.m4 iconv.m4 longlong.m4 inttypes.m4 glibc21.m4 codeset.m4 inttypes_h.m4 longdouble.m4 nls.m4 po.m4 intmax.m4 xsize.m4 lcmessage.m4 wint_t.m4 ulonglong.m4 progtest.m4 inttypes-pri.m4 stdint_h.m4 intdiv0.m4 isc-posix.m4 size_max.m4 gettext.m4 wchar_t.m4 Modified: trunk/opentrep/config/soci.m4 =================================================================== --- trunk/opentrep/config/soci.m4 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/config/soci.m4 2009-07-12 12:59:01 UTC (rev 119) @@ -4,115 +4,137 @@ dnl dnl We define the following configure script flags: dnl -dnl --with-soci: Give prefix for both library and headers, and try -dnl to guess subdirectory names for each. (e.g. add /lib and -dnl /include onto given dir names, and other common schemes.) +dnl --with-soci: Give prefix for both library and headers, and try +dnl to guess subdirectory names for each. (e.g. Tack /lib and +dnl /include onto given dir name, and other common schemes.) +dnl --with-soci-lib: Similar to --with-soci, but for library only. +dnl --with-soci-include: Similar to --with-soci, but for headers +dnl only. dnl -dnl @version 1.3, 2009/05/02 -dnl @author Denis Arnaud <den...@us...> -dnl dnl @version 1.2, 2007/02/20 dnl @author Warren Young <so...@et...> AC_DEFUN([AX_SOCI], [ -# -# Set up configure script macros -# -AC_ARG_WITH(soci, - [--with-soci=<path> root directory path of Soci installation], - [SOCI_lib_check="$with_soci/lib64/soci $with_soci/lib/soci $with_soci/lib64 $with_soci/lib" - SOCI_inc_check="$with_soci/include $with_soci/include/soci"], - [SOCI_lib_check="/usr/lib64 /usr/lib /usr/lib64/soci /usr/lib/soci /usr/local/lib64 /usr/local/lib /opt/soci/lib64 /opt/soci/lib /usr/local/lib64/soci /usr/local/lib/soci /usr/local/soci/lib64 /usr/local/soci/lib /opt/soci/lib64/soci /opt/soci/lib/soci" - SOCI_inc_check="/usr/include /usr/include/soci /usr/local/include /opt/soci/include /usr/local/include/soci /usr/local/soci/include /usr/local/soci/include/soci /opt/soci/include/soci"]) + # + # Set up configure script macros + # + AC_ARG_WITH(soci, + [ --with-soci=<path> root directory path of Soci installation], + [SOCI_lib_check="$with_soci/lib64/soci $with_soci/lib/soci $with_soci/lib64 $with_soci/lib" + SOCI_inc_check="$with_soci/include $with_soci/include/soci"], + [SOCI_lib_check="/usr/lib64 /usr/lib /usr/lib64/soci /usr/lib/soci /usr/local/lib64 /usr/local/lib /usr/local/lib/soci /usr/local/soci/lib /usr/local/soci/lib/soci /opt/soci/lib /opt/soci/lib/soci" + SOCI_inc_check="/usr/include /usr/include/soci /usr/local/include/soci /usr/local/soci/include /usr/local/soci/include/soci /opt/soci/include /opt/soci/include/soci"]) -# SOCI library -SOCI_CORE_LIB=soci_core -SOCI_MYSQL_LIB=soci_mysql + AC_ARG_WITH(soci-lib, + [ --with-soci-lib=<path> directory path of Soci library installation], + [SOCI_lib_check="$with_soci_lib $with_soci_lib/lib64 $with_soci_lib/lib $with_soci_lib/lib64/soci $with_soci_lib/lib/soci"]) -# -# Look for Soci Core API library -# -AC_MSG_CHECKING([for Soci library directory]) -SOCI_libdir= -for m in $SOCI_lib_check -do - if test -d "$m" && \ - (test -f "$m/lib$SOCI_CORE_LIB.so" || test -f "$m/lib$SOCI_CORE_LIB.a") + AC_ARG_WITH(soci-include, + [ --with-soci-include=<path> directory path of Soci header installation], + [SOCI_inc_check="$with_soci_include $with_soci_include/include $with_soci_include/include/soci"]) + + # SOCI library + SOCI_CORE_LIB=soci_core + SOCI_MYSQL_LIB=soci_mysql + SOCI_LIB_SUFFIX=gcc-3_0 + + # + # Look for Soci Core API library + # + AC_MSG_CHECKING([for Soci library directory]) + SOCI_libdir= + for m in $SOCI_lib_check + do + if test -d "$m" + then + for socilib in "$SOCI_CORE_LIB $SOCI_CORE_LIB-${SOCI_LIB_SUFFIX}" + do + if (test -f "$m/lib$SOCI_CORE_LIB.so" || test -f "$m/lib$SOCI_CORE_LIB.a") + then + SOCI_libdir=$m + fi + if (test -f "$m/lib${SOCI_CORE_LIB}-${SOCI_LIB_SUFFIX}.so" \ + || test -f "$m/lib${SOCI_CORE_LIB}-${SOCI_LIB_SUFFIX}.a") + then + SOCI_CORE_LIB=${SOCI_CORE_LIB}-${SOCI_LIB_SUFFIX} + SOCI_MYSQL_LIB=${SOCI_MYSQL_LIB}-${SOCI_LIB_SUFFIX} + SOCI_libdir=$m + fi + done + break + fi + done + + if test -z "$SOCI_libdir" then - SOCI_libdir=$m - break + AC_MSG_ERROR([Didn't find $SOCI_CORE_LIB library in '$SOCI_lib_check']) fi -done -if test -z "$SOCI_libdir" -then - AC_MSG_ERROR([Didn't find $SOCI_CORE_LIB library in '$SOCI_lib_check']) -fi + case "$SOCI_libdir" in + /* ) ;; + * ) AC_MSG_ERROR([The Soci library directory ($SOCI_libdir) must be an absolute path.]) ;; + esac -case "$SOCI_libdir" in - /* ) ;; - * ) AC_MSG_ERROR([The Soci library directory ($SOCI_libdir) must be an absolute path.]) ;; -esac + AC_MSG_RESULT([$SOCI_libdir]) -AC_MSG_RESULT([$SOCI_libdir]) + case "$SOCI_libdir" in + /usr/lib) ;; + *) LDFLAGS="$LDFLAGS -L${SOCI_libdir}" ;; + esac -case "$SOCI_libdir" in - /usr/lib64) ;; - /usr/lib) ;; - *) SOCI_LIBS="-L${SOCI_libdir}" ;; -esac -LDFLAGS="$LDFLAGS ${SOCI_LIBS}" + # + # Look for Soci Core API headers + # + AC_MSG_CHECKING([for Soci include directory]) + SOCI_incdir= + for m in $SOCI_inc_check + do + if test -d "$m" && (test -f "$m/soci/core/soci.h" || test -f "$m/soci/soci.h") + then + SOCI_incdir=$m + break + fi + done -# -# Look for Soci Core API headers -# -AC_MSG_CHECKING([for Soci include directory]) -SOCI_incdir= -for m in $SOCI_inc_check -do - if test -d "$m" && test -f "$m/soci/core/soci.h" + if test -z "$SOCI_incdir" then - SOCI_incdir=$m - break + AC_MSG_ERROR([Didn't find the Soci include dir in '$SOCI_inc_check']) fi -done -if test -z "$SOCI_incdir" -then - AC_MSG_ERROR([Didn't find the Soci include dir in '$SOCI_inc_check']) -fi + case "$SOCI_incdir" in + /* ) ;; + * ) AC_MSG_ERROR([The Soci include directory ($SOCI_incdir) must be an absolute path.]) ;; + esac -case "$SOCI_incdir" in - /* ) ;; - * ) AC_MSG_ERROR([The Soci include directory ($SOCI_incdir) must be an absolute path.]) ;; -esac + AC_MSG_RESULT([$SOCI_incdir]) -AC_MSG_RESULT([$SOCI_incdir]) + if test "$SOCI_incdir" != "/usr/include" + then + SOCI_CFLAGS="-I${SOCI_incdir}" + fi + if test "$SOCI_libdir" != "/usr/lib" -a "$SOCI_libdir" != "/usr/lib64" + then + SOCI_LIBS="-L${SOCI_libdir}" + fi + SOCI_CFLAGS="-DSOCI_HEADERS_BURIED -DSOCI_MYSQL_HEADERS_BURIED $SOCI_CFLAGS" + SOCI_LIBS="$SOCI_LIBS -l${SOCI_CORE_LIB} -l${SOCI_MYSQL_LIB} -ldl" + AC_SUBST(SOCI_CFLAGS) + AC_SUBST(SOCI_LIBS) -case "$SOCI_incdir" in - /usr/include) ;; - *) SOCI_CFLAGS="-I${SOCI_incdir}" ;; -esac - -SOCI_LIBS="${SOCI_LIBS} -l${SOCI_CORE_LIB} -l${SOCI_MYSQL_LIB} -ldl" - -AC_SUBST(SOCI_CFLAGS) -AC_SUBST(SOCI_LIBS) - # Test linking with soci (note that it needs MySQL client to have been defined # before) -save_LIBS="$LIBS" -if test -z "$MYSQL_LIBS" -then - MYSQL_LIBS="-L/usr/lib64/mysql -L/usr/lib/mysql -lmysqlclient" -fi -LIBS="$LIBS $MYSQL_LIBS $SOCI_LIBS" -AC_CHECK_LIB($SOCI_CORE_LIB, soci_begin, - [], - [AC_MSG_ERROR([Could not find working Soci client library!])] - ) -LIBS="$save_LIBS" -AC_SUBST(SOCI_CORE_LIB) + save_LIBS="$LIBS" + if test -z "$MYSQL_LIBS" + then + MYSQL_LIBS="-L/usr/lib64/mysql -L/usr/lib/mysql -lmysqlclient" + fi + LIBS="$LIBS $MYSQL_LIBS $SOCI_LIBS" + AC_CHECK_LIB($SOCI_CORE_LIB, soci_begin, + [], + [AC_MSG_ERROR([Could not find working Soci client library!])] + ) + LIBS="$save_LIBS" + AC_SUBST(SOCI_CORE_LIB) ]) dnl AX_SOCI Modified: trunk/opentrep/configure.ac =================================================================== --- trunk/opentrep/configure.ac 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/configure.ac 2009-07-12 12:59:01 UTC (rev 119) @@ -218,6 +218,9 @@ opentrep/dbadaptor/Makefile opentrep/command/Makefile opentrep/service/Makefile + opentrep/config/Makefile + opentrep/core/Makefile + opentrep/batches/Makefile man/Makefile info/Makefile doc/Makefile Property changes on: trunk/opentrep/opentrep ___________________________________________________________________ Modified: svn:ignore - .libs .deps stamp-h1 config.h config.h.in Makefile Makefile.in opentrep-paths.hpp opentrep_indexer opentrep_searcher + .libs .deps stamp-h1 config.h config.h.in Makefile Makefile.in Modified: trunk/opentrep/opentrep/Makefile.am =================================================================== --- trunk/opentrep/opentrep/Makefile.am 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/opentrep/Makefile.am 2009-07-12 12:59:01 UTC (rev 119) @@ -7,7 +7,7 @@ MAINTAINERCLEANFILES = Makefile.in -SUBDIRS = basic bom factory dbadaptor command service +SUBDIRS = basic bom factory dbadaptor command service core config batches EXTRA_DIST = config_msvc.h @@ -24,33 +24,6 @@ $(top_builddir)/@PACKAGE@/service/libsvc.la lib@PACKAGE@_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) -# Binaries -bin_PROGRAMS = opentrep_indexer opentrep_searcher - -opentrep_indexer_SOURCES = $(bin_idx_h_sources) $(bin_idx_cc_sources) -opentrep_indexer_CXXFLAGS = $(BOOST_CFLAGS) -opentrep_indexer_LDFLAGS = $(BOOST_PROGRAM_OPTIONS_LIB) $(SOCI_LIBS) -opentrep_indexer_LDADD = lib@PACKAGE@.la - -opentrep_searcher_SOURCES = $(bin_srh_h_sources) $(bin_srh_cc_sources) -opentrep_searcher_CXXFLAGS = $(BOOST_CFLAGS) -opentrep_searcher_LDFLAGS = $(BOOST_PROGRAM_OPTIONS_LIB) $(SOCI_LIBS) -opentrep_searcher_LDADD = lib@PACKAGE@.la - # Header files nobase_pkginclude_HEADERS = $(service_h_sources) nobase_nodist_pkginclude_HEADERS = $(top_builddir)/@PACKAGE@/config.h - - -# Targets -all-local: @PACKAGE@-paths.hpp - -@PACKAGE@-paths.hpp: Makefile - @echo '#ifndef __OPENTREP_PATHS_HPP' > $@ - @echo '#define __OPENTREP_PATHS_HPP' >> $@ - @echo '#define PREFIXDIR "$(prefix)"' >> $@ - @echo '#define BINDIR "$(bindir)"' >> $@ - @echo '#define LIBEXECDIR "$(libexecdir)"' >> $@ - @echo '#define DATADIR "$(datadir)"' >> $@ - @echo '#define DOCDIR "$(docdir)"' >> $@ - @echo '#endif // __OPENTREP_PATHS_HPP' >> $@ Property changes on: trunk/opentrep/opentrep/batches ___________________________________________________________________ Added: svn:ignore + .deps .libs Makefile Makefile.in opentrep_indexer opentrep_searcher Added: trunk/opentrep/opentrep/batches/Makefile.am =================================================================== --- trunk/opentrep/opentrep/batches/Makefile.am (rev 0) +++ trunk/opentrep/opentrep/batches/Makefile.am 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,23 @@ +include $(top_srcdir)/Makefile.common +include $(srcdir)/sources.mk + +## Source directory + +MAINTAINERCLEANFILES = Makefile.in + + +# Binaries (batches) +bin_PROGRAMS = opentrep_indexer opentrep_searcher + +opentrep_indexer_SOURCES = $(batches_idx_h_sources) $(batches_idx_cc_sources) +opentrep_indexer_CXXFLAGS = $(BOOST_CFLAGS) +#opentrep_indexer_LDADD = +opentrep_indexer_LDFLAGS = $(BOOST_PROGRAM_OPTIONS_LIB) $(SOCI_LIBS) \ + $(top_builddir)/@PACKAGE@/core/lib@PACKAGE@.la + + +opentrep_searcher_SOURCES = $(batches_srh_h_sources) $(batches_srh_cc_sources) +opentrep_searcher_CXXFLAGS = $(BOOST_CFLAGS) +#opentrep_searcher_LDADD = +opentrep_searcher_LDFLAGS = $(BOOST_PROGRAM_OPTIONS_LIB) $(SOCI_LIBS) \ + $(top_builddir)/@PACKAGE@/core/lib@PACKAGE@.la Copied: trunk/opentrep/opentrep/batches/indexer.cpp (from rev 114, trunk/opentrep/opentrep/indexer.cpp) =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp (rev 0) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,153 @@ +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <map> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/program_options.hpp> +// OPENTREP +#include <opentrep/OPENTREP_Service.hpp> + +// ///////// Parsing of Options & Configuration ///////// +// A helper function to simplify the main part. +template<class T> std::ostream& operator<< (std::ostream& os, + const std::vector<T>& v) { + std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); + return os; +} + +int readConfiguration (int argc, char* argv[]) { + int opt; + + // Declare a group of options that will be + // allowed only on command line + boost::program_options::options_description generic("Generic options"); + generic.add_options() + ("version,v", "print version string") + ("help,h", "produce help message"); + + // Declare a group of options that will be allowed both on command line and in + // config file + boost::program_options::options_description config("Configuration"); + config.add_options() + ("optimization", + boost::program_options::value<int>(&opt)->default_value(10), + "optimization level") + ("include-path,I", + boost::program_options::value< std::vector<std::string> >()->composing(), + "include path"); + + // Hidden options, will be allowed both on command line and + // in config file, but will not be shown to the user. + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-file", + boost::program_options::value< std::vector<std::string> >(), + "input file"); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(generic).add(config).add(hidden); + + boost::program_options::options_description config_file_options; + config_file_options.add(config).add(hidden); + + boost::program_options::options_description visible("Allowed options"); + visible.add(generic).add(config); + + boost::program_options::positional_options_description p; + p.add("input-file", -1); + + boost::program_options::variables_map vm; + boost::program_options:: + store (boost::program_options::command_line_parser(argc, argv). + options (cmdline_options).positional(p).run(), vm); + + std::ifstream ifs ("request_parser.cfg"); + boost::program_options::store (parse_config_file (ifs, config_file_options), + vm); + boost::program_options::notify (vm); + + if (vm.count ("help")) { + std::cout << visible << std::endl; + return 0; + } + + if (vm.count ("version")) { + std::cout << "Open Travel Request Parser, version 1.0" << std::endl; + return 0; + } + + if (vm.count ("include-path")) { + std::cout << "Include paths are: " + << vm["include-path"].as< std::vector<std::string> >() + << std::endl; + } + + if (vm.count ("input-file")) { + std::cout << "Input files are: " + << vm["input-file"].as< std::vector<std::string> >() + << std::endl; + } + + std::cout << "Optimization level is " << opt << std::endl; + + return 0; +} + + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // Output log File + std::string lLogFilename ("indexer.log"); + + // Xapian database name (directory of the index) + OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lLogFilename; + } + + if (argc >= 2 && argv[2] != NULL) { + std::istringstream istr (argv[2]); + istr >> lXapianDatabaseName; + } + + // Set the log parameters + std::ofstream logOutputFile; + // open and clean the log outputfile + logOutputFile.open (lLogFilename.c_str()); + logOutputFile.clear(); + + // Initialise the context + OPENTREP::OPENTREP_Service opentrepService; + opentrepService.init (logOutputFile, lXapianDatabaseName); + + // Launch the indexation + opentrepService.buildSearchIndex(); + + // Close the Log outputFile + logOutputFile.close(); + + + } catch (const OPENTREP::RootException& otexp) { + std::cerr << "Standard exception: " << otexp.what() << std::endl; + return -1; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Copied: trunk/opentrep/opentrep/batches/searcher.cpp (from rev 114, trunk/opentrep/opentrep/searcher.cpp) =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp (rev 0) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,160 @@ +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <map> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/program_options.hpp> +// OPENTREP +#include <opentrep/OPENTREP_Service.hpp> + +// ///////// Parsing of Options & Configuration ///////// +// A helper function to simplify the main part. +template<class T> std::ostream& operator<< (std::ostream& os, + const std::vector<T>& v) { + std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); + return os; +} + +int readConfiguration (int argc, char* argv[]) { + int opt; + + // Declare a group of options that will be + // allowed only on command line + boost::program_options::options_description generic("Generic options"); + generic.add_options() + ("version,v", "print version string") + ("help,h", "produce help message"); + + // Declare a group of options that will be allowed both on command line and in + // config file + boost::program_options::options_description config("Configuration"); + config.add_options() + ("optimization", + boost::program_options::value<int>(&opt)->default_value(10), + "optimization level") + ("include-path,I", + boost::program_options::value< std::vector<std::string> >()->composing(), + "include path"); + + // Hidden options, will be allowed both on command line and + // in config file, but will not be shown to the user. + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-file", + boost::program_options::value< std::vector<std::string> >(), + "input file"); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(generic).add(config).add(hidden); + + boost::program_options::options_description config_file_options; + config_file_options.add(config).add(hidden); + + boost::program_options::options_description visible("Allowed options"); + visible.add(generic).add(config); + + boost::program_options::positional_options_description p; + p.add("input-file", -1); + + boost::program_options::variables_map vm; + boost::program_options:: + store (boost::program_options::command_line_parser(argc, argv). + options (cmdline_options).positional(p).run(), vm); + + std::ifstream ifs ("request_parser.cfg"); + boost::program_options::store (parse_config_file (ifs, config_file_options), + vm); + boost::program_options::notify (vm); + + if (vm.count ("help")) { + std::cout << visible << std::endl; + return 0; + } + + if (vm.count ("version")) { + std::cout << "Open Travel Request Parser, version 1.0" << std::endl; + return 0; + } + + if (vm.count ("include-path")) { + std::cout << "Include paths are: " + << vm["include-path"].as< std::vector<std::string> >() + << std::endl; + } + + if (vm.count ("input-file")) { + std::cout << "Input files are: " + << vm["input-file"].as< std::vector<std::string> >() + << std::endl; + } + + std::cout << "Optimization level is " << opt << std::endl; + + return 0; +} + + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // Travel query + OPENTREP::TravelQuery_T lTravelQuery ("cdg"); + + // Output log File + std::string lLogFilename ("searcher.log"); + + // Xapian database name (directory of the index) + OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lTravelQuery; + } + + if (argc >= 2 && argv[2] != NULL) { + std::istringstream istr (argv[2]); + istr >> lLogFilename; + } + + if (argc >= 3 && argv[3] != NULL) { + std::istringstream istr (argv[3]); + istr >> lXapianDatabaseName; + } + + // Set the log parameters + std::ofstream logOutputFile; + // open and clean the log outputfile + logOutputFile.open (lLogFilename.c_str()); + logOutputFile.clear(); + + // Initialise the context + OPENTREP::OPENTREP_Service opentrepService; + opentrepService.init (logOutputFile, lXapianDatabaseName); + + // Query the Xapian database (index) + opentrepService.interpretTravelRequest (lTravelQuery); + + // Close the Log outputFile + logOutputFile.close(); + + } catch (const OPENTREP::RootException& otexp) { + std::cerr << "Standard exception: " << otexp.what() << std::endl; + return -1; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Added: trunk/opentrep/opentrep/batches/sources.mk =================================================================== --- trunk/opentrep/opentrep/batches/sources.mk (rev 0) +++ trunk/opentrep/opentrep/batches/sources.mk 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,4 @@ +batches_idx_h_sources = +batches_idx_cc_sources = $(top_srcdir)/opentrep/batches/indexer.cpp +batches_srh_h_sources = +batches_srh_cc_sources = $(top_srcdir)/opentrep/batches/searcher.cpp Property changes on: trunk/opentrep/opentrep/config ___________________________________________________________________ Added: svn:ignore + .deps .libs Makefile Makefile.in opentrep-paths.hpp Added: trunk/opentrep/opentrep/config/Makefile.am =================================================================== --- trunk/opentrep/opentrep/config/Makefile.am (rev 0) +++ trunk/opentrep/opentrep/config/Makefile.am 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,25 @@ +include $(top_srcdir)/Makefile.common + +## Source directory + +DISTCLEANFILES = @PACKAGE@-paths.hpp + +MAINTAINERCLEANFILES = Makefile.in + +EXTRA_DIST = @PACKAGE@-paths.hpp + +# Targets +all-local: @PACKAGE@-paths.hpp + +@PACKAGE@-paths.hpp: Makefile + @echo '#ifndef __@PACKAGE_NAME@_PATHS_HPP' > $@ + @echo '#define __@PACKAGE_NAME@_PATHS_HPP' >> $@ + @echo '#define PACKAGE "@PACKAGE@"' >> $@ + @echo '#define PACKAGE_NAME "@PACKAGE_NAME@"' >> $@ + @echo '#define PACKAGE_VERSION "@VERSION@"' >> $@ + @echo '#define PREFIXDIR "$(prefix)"' >> $@ + @echo '#define BINDIR "$(bindir)"' >> $@ + @echo '#define LIBEXECDIR "$(libexecdir)"' >> $@ + @echo '#define DATADIR "$(datadir)"' >> $@ + @echo '#define DOCDIR "$(docdir)"' >> $@ + @echo '#endif // __@PACKAGE_NAME@_PATHS_HPP' >> $@ Property changes on: trunk/opentrep/opentrep/core ___________________________________________________________________ Added: svn:ignore + .deps .libs Makefile Makefile.in Added: trunk/opentrep/opentrep/core/Makefile.am =================================================================== --- trunk/opentrep/opentrep/core/Makefile.am (rev 0) +++ trunk/opentrep/opentrep/core/Makefile.am 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,24 @@ +include $(top_srcdir)/Makefile.common +include $(srcdir)/sources.mk + +## Source directory + +MAINTAINERCLEANFILES = Makefile.in + +SUBDIRS = + + +# Library +lib_LTLIBRARIES = lib@PACKAGE@.la + +lib@PACKAGE@_la_SOURCES = $(service_h_sources) $(service_cc_sources) +lib@PACKAGE@_la_LIBADD = \ + $(top_builddir)/@PACKAGE@/basic/libbas.la \ + $(top_builddir)/@PACKAGE@/bom/libbom.la \ + $(top_builddir)/@PACKAGE@/factory/libfac.la \ + $(top_builddir)/@PACKAGE@/dbadaptor/libdba.la \ + $(top_builddir)/@PACKAGE@/command/libcmd.la \ + $(top_builddir)/@PACKAGE@/service/libsvc.la +lib@PACKAGE@_la_LDFLAGS = \ + $(BOOST_DATE_TIME_LIB) $(BOOST_PROGRAM_OPTIONS_LIB) \ + $(SOCI_LIBS) -version-info $(GENERIC_LIBRARY_VERSION) Added: trunk/opentrep/opentrep/core/sources.mk =================================================================== --- trunk/opentrep/opentrep/core/sources.mk (rev 0) +++ trunk/opentrep/opentrep/core/sources.mk 2009-07-12 12:59:01 UTC (rev 119) @@ -0,0 +1,3 @@ +service_h_sources = $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ + $(top_srcdir)/opentrep/OPENTREP_Service.hpp +service_cc_sources = Modified: trunk/opentrep/opentrep/dbadaptor/Makefile.am =================================================================== --- trunk/opentrep/opentrep/dbadaptor/Makefile.am 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/opentrep/dbadaptor/Makefile.am 2009-07-12 12:59:01 UTC (rev 119) @@ -3,16 +3,10 @@ include $(srcdir)/sources.mk noinst_LTLIBRARIES= libdba.la -if ENABLE_DEBUG -noinst_LTLIBRARIES += libdba_debug.la -endif + libdba_la_SOURCES= $(dba_h_sources) $(dba_cc_sources) libdba_la_CXXFLAGS = $(CXXFLAGS_OPT) $(SOCI_CFLAGS) libdba_la_LIBADD = $(SOCI_LIBS) -libdba_debug_la_SOURCES = $(dba_h_sources) $(dba_cc_sources) -libdba_debug_la_CXXFLAGS = $(CXXFLAGS_DEBUG) $(SOCI_CFLAGS) -libdba_debug_la_LIBADD = $(SOCI_LIBS) - #pkgincludedir = $(includedir)/@PACKAGE@/dba #pkginclude_HEADERS = $(dba_h_sources) Deleted: trunk/opentrep/opentrep/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/indexer.cpp 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/opentrep/indexer.cpp 2009-07-12 12:59:01 UTC (rev 119) @@ -1,153 +0,0 @@ -// C -#include <assert.h> -// STL -#include <iostream> -#include <sstream> -#include <fstream> -#include <map> -#include <vector> -// Boost (Extended STL) -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/gregorian/gregorian.hpp> -#include <boost/program_options.hpp> -// OPENTREP -#include <opentrep/OPENTREP_Service.hpp> - -// ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} - -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); - generic.add_options() - ("version,v", "print version string") - ("help,h", "produce help message"); - - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); - config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); - - // Hidden options, will be allowed both on command line and - // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-file", - boost::program_options::value< std::vector<std::string> >(), - "input file"); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(generic).add(config).add(hidden); - - boost::program_options::options_description config_file_options; - config_file_options.add(config).add(hidden); - - boost::program_options::options_description visible("Allowed options"); - visible.add(generic).add(config); - - boost::program_options::positional_options_description p; - p.add("input-file", -1); - - boost::program_options::variables_map vm; - boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); - - std::ifstream ifs ("request_parser.cfg"); - boost::program_options::store (parse_config_file (ifs, config_file_options), - vm); - boost::program_options::notify (vm); - - if (vm.count ("help")) { - std::cout << visible << std::endl; - return 0; - } - - if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; - } - - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; - } - - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() - << std::endl; - } - - std::cout << "Optimization level is " << opt << std::endl; - - return 0; -} - - -// /////////////// M A I N ///////////////// -int main (int argc, char* argv[]) { - try { - - // Output log File - std::string lLogFilename ("indexer.log"); - - // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); - - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lLogFilename; - } - - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lXapianDatabaseName; - } - - // Set the log parameters - std::ofstream logOutputFile; - // open and clean the log outputfile - logOutputFile.open (lLogFilename.c_str()); - logOutputFile.clear(); - - // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); - - // Launch the indexation - opentrepService.buildSearchIndex(); - - // Close the Log outputFile - logOutputFile.close(); - - - } catch (const OPENTREP::RootException& otexp) { - std::cerr << "Standard exception: " << otexp.what() << std::endl; - return -1; - - } catch (const std::exception& stde) { - std::cerr << "Standard exception: " << stde.what() << std::endl; - return -1; - - } catch (...) { - return -1; - } - - return 0; -} Deleted: trunk/opentrep/opentrep/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/searcher.cpp 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/opentrep/searcher.cpp 2009-07-12 12:59:01 UTC (rev 119) @@ -1,160 +0,0 @@ -// C -#include <assert.h> -// STL -#include <iostream> -#include <sstream> -#include <fstream> -#include <map> -#include <vector> -// Boost (Extended STL) -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/gregorian/gregorian.hpp> -#include <boost/program_options.hpp> -// OPENTREP -#include <opentrep/OPENTREP_Service.hpp> - -// ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} - -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); - generic.add_options() - ("version,v", "print version string") - ("help,h", "produce help message"); - - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); - config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); - - // Hidden options, will be allowed both on command line and - // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-file", - boost::program_options::value< std::vector<std::string> >(), - "input file"); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(generic).add(config).add(hidden); - - boost::program_options::options_description config_file_options; - config_file_options.add(config).add(hidden); - - boost::program_options::options_description visible("Allowed options"); - visible.add(generic).add(config); - - boost::program_options::positional_options_description p; - p.add("input-file", -1); - - boost::program_options::variables_map vm; - boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); - - std::ifstream ifs ("request_parser.cfg"); - boost::program_options::store (parse_config_file (ifs, config_file_options), - vm); - boost::program_options::notify (vm); - - if (vm.count ("help")) { - std::cout << visible << std::endl; - return 0; - } - - if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; - } - - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; - } - - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() - << std::endl; - } - - std::cout << "Optimization level is " << opt << std::endl; - - return 0; -} - - -// /////////////// M A I N ///////////////// -int main (int argc, char* argv[]) { - try { - - // Travel query - OPENTREP::TravelQuery_T lTravelQuery ("cdg"); - - // Output log File - std::string lLogFilename ("searcher.log"); - - // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); - - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lTravelQuery; - } - - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lLogFilename; - } - - if (argc >= 3 && argv[3] != NULL) { - std::istringstream istr (argv[3]); - istr >> lXapianDatabaseName; - } - - // Set the log parameters - std::ofstream logOutputFile; - // open and clean the log outputfile - logOutputFile.open (lLogFilename.c_str()); - logOutputFile.clear(); - - // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); - - // Query the Xapian database (index) - opentrepService.interpretTravelRequest (lTravelQuery); - - // Close the Log outputFile - logOutputFile.close(); - - } catch (const OPENTREP::RootException& otexp) { - std::cerr << "Standard exception: " << otexp.what() << std::endl; - return -1; - - } catch (const std::exception& stde) { - std::cerr << "Standard exception: " << stde.what() << std::endl; - return -1; - - } catch (...) { - return -1; - } - - return 0; -} Modified: trunk/opentrep/opentrep/sources.mk =================================================================== --- trunk/opentrep/opentrep/sources.mk 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/opentrep/sources.mk 2009-07-12 12:59:01 UTC (rev 119) @@ -1,7 +1,4 @@ -service_h_sources = $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ +service_h_sources = \ + $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ $(top_srcdir)/opentrep/OPENTREP_Service.hpp service_cc_sources = -bin_idx_h_sources = -bin_idx_cc_sources = $(top_srcdir)/opentrep/indexer.cpp -bin_srh_h_sources = -bin_srh_cc_sources = $(top_srcdir)/opentrep/searcher.cpp Modified: trunk/opentrep/po/POTFILES.in =================================================================== --- trunk/opentrep/po/POTFILES.in 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/po/POTFILES.in 2009-07-12 12:59:01 UTC (rev 119) @@ -7,8 +7,8 @@ opentrep/service/Logger.hpp opentrep/service/OPENTREP_ServiceContext.cpp opentrep/service/OPENTREP_ServiceContext.hpp -opentrep/indexer.cpp -opentrep/searcher.cpp +opentrep/batches/indexer.cpp +opentrep/batches/searcher.cpp opentrep/dbadaptor/DbaPlace.hpp opentrep/dbadaptor/DbaAbstract.cpp opentrep/dbadaptor/DbaPlace.cpp Modified: trunk/opentrep/test/testIndexer.sh =================================================================== --- trunk/opentrep/test/testIndexer.sh 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/test/testIndexer.sh 2009-07-12 12:59:01 UTC (rev 119) @@ -1,7 +1,7 @@ #!/bin/sh INSTALL_DIR=`grep "^prefix =" ../Makefile | cut -d"=" -d" " -f3` -TST_PROG=../opentrep/opentrep_indexer +TST_PROG=../opentrep/batches/opentrep_indexer OPENTREP=`grep "^PACKAGE_VERSION =" ../Makefile | cut -d"=" -d" " -f3` OPENTREP_LIBRARY_NAME=`grep "^PACKAGE =" ../Makefile | cut -d"=" -d" " -f3` OPENTREP_LIB=lib${OPENTREP_LIBRARY_NAME}-${OPENTREP_API_VERSION}.so Modified: trunk/opentrep/test/testSearcher.sh =================================================================== --- trunk/opentrep/test/testSearcher.sh 2009-05-30 16:41:07 UTC (rev 118) +++ trunk/opentrep/test/testSearcher.sh 2009-07-12 12:59:01 UTC (rev 119) @@ -1,7 +1,7 @@ #!/bin/sh INSTALL_DIR=`grep "^prefix =" ../Makefile | cut -d"=" -d" " -f3` -TST_PROG=../opentrep/opentrep_searcher +TST_PROG=../opentrep/batches/opentrep_searcher OPENTREP=`grep "^PACKAGE_VERSION =" ../Makefile | cut -d"=" -d" " -f3` OPENTREP_LIBRARY_NAME=`grep "^PACKAGE =" ../Makefile | cut -d"=" -d" " -f3` OPENTREP_LIB=lib${OPENTREP_LIBRARY_NAME}-${OPENTREP_API_VERSION}.so This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |