Thread: [Opentrep-svn] SF.net SVN: opentrep:[133] trunk/opentrep/opentrep
Status: Beta
Brought to you by:
denis_arnaud
From: <den...@us...> - 2009-07-17 00:10:41
|
Revision: 133 http://opentrep.svn.sourceforge.net/opentrep/?rev=133&view=rev Author: denis_arnaud Date: 2009-07-17 00:10:38 +0000 (Fri, 17 Jul 2009) Log Message: ----------- [Dev] Retrofitted the code of test/xapian/string_search.cpp into the OpenTREP library, and created the corresponding objects and support classes. Modified Paths: -------------- trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/BomAbstract.hpp trunk/opentrep/opentrep/bom/sources.mk trunk/opentrep/opentrep/command/RequestInterpreter.cpp Added Paths: ----------- trunk/opentrep/opentrep/bom/DocumentList.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/ResultList.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/bom/WordHolder.cpp trunk/opentrep/opentrep/bom/WordHolder.hpp trunk/opentrep/opentrep/bom/WordList.hpp Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -1,153 +1,153 @@ -// C -#include <assert.h> -// STL -#include <iostream> -#include <sstream> -#include <fstream> -#include <map> -#include <vector> -// Boost (Extended STL) -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/gregorian/gregorian.hpp> -#include <boost/program_options.hpp> -// OPENTREP -#include <opentrep/OPENTREP_Service.hpp> - -// ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} - -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); - generic.add_options() - ("version,v", "print version string") - ("help,h", "produce help message"); - - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); - config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); - - // Hidden options, will be allowed both on command line and - // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-file", - boost::program_options::value< std::vector<std::string> >(), - "input file"); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(generic).add(config).add(hidden); - - boost::program_options::options_description config_file_options; - config_file_options.add(config).add(hidden); - - boost::program_options::options_description visible("Allowed options"); - visible.add(generic).add(config); - - boost::program_options::positional_options_description p; - p.add("input-file", -1); - - boost::program_options::variables_map vm; - boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); - - std::ifstream ifs ("request_parser.cfg"); - boost::program_options::store (parse_config_file (ifs, config_file_options), - vm); - boost::program_options::notify (vm); - - if (vm.count ("help")) { - std::cout << visible << std::endl; - return 0; - } - - if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; - } - - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; - } - - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() - << std::endl; - } - - std::cout << "Optimization level is " << opt << std::endl; - - return 0; -} - - -// /////////////// M A I N ///////////////// -int main (int argc, char* argv[]) { - try { - - // Output log File - std::string lLogFilename ("indexer.log"); - - // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); - - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lLogFilename; - } - - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lXapianDatabaseName; - } - - // Set the log parameters - std::ofstream logOutputFile; - // open and clean the log outputfile - logOutputFile.open (lLogFilename.c_str()); - logOutputFile.clear(); - - // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); - - // Launch the indexation - opentrepService.buildSearchIndex(); - - // Close the Log outputFile - logOutputFile.close(); - - - } catch (const OPENTREP::RootException& otexp) { - std::cerr << "Standard exception: " << otexp.what() << std::endl; - return -1; - - } catch (const std::exception& stde) { - std::cerr << "Standard exception: " << stde.what() << std::endl; - return -1; - - } catch (...) { - return -1; - } - - return 0; -} +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <map> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/program_options.hpp> +// OPENTREP +#include <opentrep/OPENTREP_Service.hpp> + +// ///////// Parsing of Options & Configuration ///////// +// A helper function to simplify the main part. +template<class T> std::ostream& operator<< (std::ostream& os, + const std::vector<T>& v) { + std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); + return os; +} + +int readConfiguration (int argc, char* argv[]) { + int opt; + + // Declare a group of options that will be + // allowed only on command line + boost::program_options::options_description generic("Generic options"); + generic.add_options() + ("version,v", "print version string") + ("help,h", "produce help message"); + + // Declare a group of options that will be allowed both on command line and in + // config file + boost::program_options::options_description config("Configuration"); + config.add_options() + ("optimization", + boost::program_options::value<int>(&opt)->default_value(10), + "optimization level") + ("include-path,I", + boost::program_options::value< std::vector<std::string> >()->composing(), + "include path"); + + // Hidden options, will be allowed both on command line and + // in config file, but will not be shown to the user. + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-file", + boost::program_options::value< std::vector<std::string> >(), + "input file"); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(generic).add(config).add(hidden); + + boost::program_options::options_description config_file_options; + config_file_options.add(config).add(hidden); + + boost::program_options::options_description visible("Allowed options"); + visible.add(generic).add(config); + + boost::program_options::positional_options_description p; + p.add("input-file", -1); + + boost::program_options::variables_map vm; + boost::program_options:: + store (boost::program_options::command_line_parser(argc, argv). + options (cmdline_options).positional(p).run(), vm); + + std::ifstream ifs ("request_parser.cfg"); + boost::program_options::store (parse_config_file (ifs, config_file_options), + vm); + boost::program_options::notify (vm); + + if (vm.count ("help")) { + std::cout << visible << std::endl; + return 0; + } + + if (vm.count ("version")) { + std::cout << "Open Travel Request Parser, version 1.0" << std::endl; + return 0; + } + + if (vm.count ("include-path")) { + std::cout << "Include paths are: " + << vm["include-path"].as< std::vector<std::string> >() + << std::endl; + } + + if (vm.count ("input-file")) { + std::cout << "Input files are: " + << vm["input-file"].as< std::vector<std::string> >() + << std::endl; + } + + std::cout << "Optimization level is " << opt << std::endl; + + return 0; +} + + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // Output log File + std::string lLogFilename ("indexer.log"); + + // Xapian database name (directory of the index) + OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lLogFilename; + } + + if (argc >= 2 && argv[2] != NULL) { + std::istringstream istr (argv[2]); + istr >> lXapianDatabaseName; + } + + // Set the log parameters + std::ofstream logOutputFile; + // open and clean the log outputfile + logOutputFile.open (lLogFilename.c_str()); + logOutputFile.clear(); + + // Initialise the context + OPENTREP::OPENTREP_Service opentrepService; + opentrepService.init (logOutputFile, lXapianDatabaseName); + + // Launch the indexation + opentrepService.buildSearchIndex(); + + // Close the Log outputFile + logOutputFile.close(); + + + } catch (const OPENTREP::RootException& otexp) { + std::cerr << "Standard exception: " << otexp.what() << std::endl; + return -1; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -1,160 +1,160 @@ -// C -#include <assert.h> -// STL -#include <iostream> -#include <sstream> -#include <fstream> -#include <map> -#include <vector> -// Boost (Extended STL) -#include <boost/date_time/posix_time/posix_time.hpp> -#include <boost/date_time/gregorian/gregorian.hpp> -#include <boost/program_options.hpp> -// OPENTREP -#include <opentrep/OPENTREP_Service.hpp> - -// ///////// Parsing of Options & Configuration ///////// -// A helper function to simplify the main part. -template<class T> std::ostream& operator<< (std::ostream& os, - const std::vector<T>& v) { - std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); - return os; -} - -int readConfiguration (int argc, char* argv[]) { - int opt; - - // Declare a group of options that will be - // allowed only on command line - boost::program_options::options_description generic("Generic options"); - generic.add_options() - ("version,v", "print version string") - ("help,h", "produce help message"); - - // Declare a group of options that will be allowed both on command line and in - // config file - boost::program_options::options_description config("Configuration"); - config.add_options() - ("optimization", - boost::program_options::value<int>(&opt)->default_value(10), - "optimization level") - ("include-path,I", - boost::program_options::value< std::vector<std::string> >()->composing(), - "include path"); - - // Hidden options, will be allowed both on command line and - // in config file, but will not be shown to the user. - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-file", - boost::program_options::value< std::vector<std::string> >(), - "input file"); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(generic).add(config).add(hidden); - - boost::program_options::options_description config_file_options; - config_file_options.add(config).add(hidden); - - boost::program_options::options_description visible("Allowed options"); - visible.add(generic).add(config); - - boost::program_options::positional_options_description p; - p.add("input-file", -1); - - boost::program_options::variables_map vm; - boost::program_options:: - store (boost::program_options::command_line_parser(argc, argv). - options (cmdline_options).positional(p).run(), vm); - - std::ifstream ifs ("request_parser.cfg"); - boost::program_options::store (parse_config_file (ifs, config_file_options), - vm); - boost::program_options::notify (vm); - - if (vm.count ("help")) { - std::cout << visible << std::endl; - return 0; - } - - if (vm.count ("version")) { - std::cout << "Open Travel Request Parser, version 1.0" << std::endl; - return 0; - } - - if (vm.count ("include-path")) { - std::cout << "Include paths are: " - << vm["include-path"].as< std::vector<std::string> >() - << std::endl; - } - - if (vm.count ("input-file")) { - std::cout << "Input files are: " - << vm["input-file"].as< std::vector<std::string> >() - << std::endl; - } - - std::cout << "Optimization level is " << opt << std::endl; - - return 0; -} - - -// /////////////// M A I N ///////////////// -int main (int argc, char* argv[]) { - try { - - // Travel query - OPENTREP::TravelQuery_T lTravelQuery ("cdg"); - - // Output log File - std::string lLogFilename ("searcher.log"); - - // Xapian database name (directory of the index) - OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); - - if (argc >= 1 && argv[1] != NULL) { - std::istringstream istr (argv[1]); - istr >> lTravelQuery; - } - - if (argc >= 2 && argv[2] != NULL) { - std::istringstream istr (argv[2]); - istr >> lLogFilename; - } - - if (argc >= 3 && argv[3] != NULL) { - std::istringstream istr (argv[3]); - istr >> lXapianDatabaseName; - } - - // Set the log parameters - std::ofstream logOutputFile; - // open and clean the log outputfile - logOutputFile.open (lLogFilename.c_str()); - logOutputFile.clear(); - - // Initialise the context - OPENTREP::OPENTREP_Service opentrepService; - opentrepService.init (logOutputFile, lXapianDatabaseName); - - // Query the Xapian database (index) - opentrepService.interpretTravelRequest (lTravelQuery); - - // Close the Log outputFile - logOutputFile.close(); - - } catch (const OPENTREP::RootException& otexp) { - std::cerr << "Standard exception: " << otexp.what() << std::endl; - return -1; - - } catch (const std::exception& stde) { - std::cerr << "Standard exception: " << stde.what() << std::endl; - return -1; - - } catch (...) { - return -1; - } - - return 0; -} +// C +#include <assert.h> +// STL +#include <iostream> +#include <sstream> +#include <fstream> +#include <map> +#include <vector> +// Boost (Extended STL) +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/program_options.hpp> +// OPENTREP +#include <opentrep/OPENTREP_Service.hpp> + +// ///////// Parsing of Options & Configuration ///////// +// A helper function to simplify the main part. +template<class T> std::ostream& operator<< (std::ostream& os, + const std::vector<T>& v) { + std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " ")); + return os; +} + +int readConfiguration (int argc, char* argv[]) { + int opt; + + // Declare a group of options that will be + // allowed only on command line + boost::program_options::options_description generic("Generic options"); + generic.add_options() + ("version,v", "print version string") + ("help,h", "produce help message"); + + // Declare a group of options that will be allowed both on command line and in + // config file + boost::program_options::options_description config("Configuration"); + config.add_options() + ("optimization", + boost::program_options::value<int>(&opt)->default_value(10), + "optimization level") + ("include-path,I", + boost::program_options::value< std::vector<std::string> >()->composing(), + "include path"); + + // Hidden options, will be allowed both on command line and + // in config file, but will not be shown to the user. + boost::program_options::options_description hidden("Hidden options"); + hidden.add_options() + ("input-file", + boost::program_options::value< std::vector<std::string> >(), + "input file"); + + boost::program_options::options_description cmdline_options; + cmdline_options.add(generic).add(config).add(hidden); + + boost::program_options::options_description config_file_options; + config_file_options.add(config).add(hidden); + + boost::program_options::options_description visible("Allowed options"); + visible.add(generic).add(config); + + boost::program_options::positional_options_description p; + p.add("input-file", -1); + + boost::program_options::variables_map vm; + boost::program_options:: + store (boost::program_options::command_line_parser(argc, argv). + options (cmdline_options).positional(p).run(), vm); + + std::ifstream ifs ("request_parser.cfg"); + boost::program_options::store (parse_config_file (ifs, config_file_options), + vm); + boost::program_options::notify (vm); + + if (vm.count ("help")) { + std::cout << visible << std::endl; + return 0; + } + + if (vm.count ("version")) { + std::cout << "Open Travel Request Parser, version 1.0" << std::endl; + return 0; + } + + if (vm.count ("include-path")) { + std::cout << "Include paths are: " + << vm["include-path"].as< std::vector<std::string> >() + << std::endl; + } + + if (vm.count ("input-file")) { + std::cout << "Input files are: " + << vm["input-file"].as< std::vector<std::string> >() + << std::endl; + } + + std::cout << "Optimization level is " << opt << std::endl; + + return 0; +} + + +// /////////////// M A I N ///////////////// +int main (int argc, char* argv[]) { + try { + + // Travel query + OPENTREP::TravelQuery_T lTravelQuery ("sna francisco rio de janero lso angeles"); + + // Output log File + std::string lLogFilename ("searcher.log"); + + // Xapian database name (directory of the index) + OPENTREP::TravelDatabaseName_T lXapianDatabaseName ("traveldb"); + + if (argc >= 1 && argv[1] != NULL) { + std::istringstream istr (argv[1]); + istr >> lTravelQuery; + } + + if (argc >= 2 && argv[2] != NULL) { + std::istringstream istr (argv[2]); + istr >> lLogFilename; + } + + if (argc >= 3 && argv[3] != NULL) { + std::istringstream istr (argv[3]); + istr >> lXapianDatabaseName; + } + + // Set the log parameters + std::ofstream logOutputFile; + // open and clean the log outputfile + logOutputFile.open (lLogFilename.c_str()); + logOutputFile.clear(); + + // Initialise the context + OPENTREP::OPENTREP_Service opentrepService; + opentrepService.init (logOutputFile, lXapianDatabaseName); + + // Query the Xapian database (index) + opentrepService.interpretTravelRequest (lTravelQuery); + + // Close the Log outputFile + logOutputFile.close(); + + } catch (const OPENTREP::RootException& otexp) { + std::cerr << "Standard exception: " << otexp.what() << std::endl; + return -1; + + } catch (const std::exception& stde) { + std::cerr << "Standard exception: " << stde.what() << std::endl; + return -1; + + } catch (...) { + return -1; + } + + return 0; +} Modified: trunk/opentrep/opentrep/bom/BomAbstract.hpp =================================================================== --- trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -1,5 +1,5 @@ -#ifndef __OPENTREP_BOMABSTRACT_HPP -#define __OPENTREP_BOMABSTRACT_HPP +#ifndef __OPENTREP_BOM_BOMABSTRACT_HPP +#define __OPENTREP_BOM_BOMABSTRACT_HPP // ////////////////////////////////////////////////////////////////////// // Import section @@ -92,4 +92,4 @@ return ioIn; } -#endif // __OPENTREP_BOMABSTRACT_HPP +#endif // __OPENTREP_BOM_BOMABSTRACT_HPP Added: trunk/opentrep/opentrep/bom/DocumentList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/DocumentList.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,18 @@ +#ifndef __OPENTREP_BOM_DOCUMENTLIST_HPP +#define __OPENTREP_BOM_DOCUMENTLIST_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <map> +// Xapian +#include <xapian.h> + +namespace OPENTREP { + + /** List of Xapian documents. */ + typedef std::multimap<Xapian::percent, Xapian::Document> DocumentList_T; + +} +#endif // __OPENTREP_BOM_DOCUMENTLIST_HPP Added: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,131 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +#include <string> +#include <sstream> +// OPENTREP +#include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/bom/Result.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + Result::Result (const Xapian::Database& iDatabase) + : _database (iDatabase) { + init(); + } + + // ////////////////////////////////////////////////////////////////////// + Result::~Result () { + } + + // ////////////////////////////////////////////////////////////////////// + void Result::init () { + _documentList.clear(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Result::describeShortKey() const { + std::ostringstream oStr; + oStr << _queryString; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Result::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string Result::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + for (DocumentList_T::const_iterator itDoc = _documentList.begin(); + itDoc != _documentList.end(); ++itDoc) { + const Xapian::percent& lPercent = itDoc->first; + const Xapian::Document& lDocument = itDoc->second; + const Xapian::docid& lDocID = lDocument.get_docid(); + oStr << "Document ID " << lDocID << "\t" << lPercent + << "% [" << lDocument.get_data() << "]" << std::endl; + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void Result::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void Result::fromStream (std::istream& ioIn) { + } + + // ////////////////////////////////////////////////////////////////////// + const Xapian::Document& Result::getBestMatchingDocument() const { + /** + Retrieve the best matching document. As the document list (STL map) + is sorted by ascending order of the matching percentage, the best + matching one is located at the end (back) of the list (STL map). + */ + DocumentList_T::const_reverse_iterator itDocument = _documentList.rbegin(); + return itDocument->second; + } + + // ////////////////////////////////////////////////////////////////////// + const Xapian::percent& Result::getBestMatchingPercentage() const { + /** + Retrieve the best matching document. As the document list (STL map) + is sorted by ascending order of the matching percentage, the best + matching one is located at the end (back) of the list (STL map). + */ + DocumentList_T::const_reverse_iterator itDocument = _documentList.rbegin(); + return itDocument->first; + } + + // ////////////////////////////////////////////////////////////////////// + void Result::searchString () { + + // Catch any Xapian::Error exceptions thrown + try { + + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + OPENTREP_LOG_DEBUG (std::endl << "--------------------------------" + << std::endl << "Current query string: `" << ioQueryString << "'"); + */ + + // Retrieve the list of documents matching the query string + Xapian::MSet lMatchingSet; + StringMatcher::searchString (lMatchingSet, _queryString, _database); + + // Create the corresponding list of documents + StringMatcher::createDocumentListFromMSet (lMatchingSet, _documentList); + + // Stop if a result is found. + if (_documentList.empty() == false) { + shouldStop = true; + break; + } + + // Remove a word from the query string + StringMatcher::removeOneWord (_queryString); + + // Stop when the resulting string gets empty. + if (_queryString.empty() == true) { + shouldStop = true; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + } + } + +} Added: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,100 @@ +#ifndef __OPENTREP_BOM_RESULT_HPP +#define __OPENTREP_BOM_RESULT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/DocumentList.hpp> + +namespace OPENTREP { + + /** Class wrapping functions on a list of Xapian Document objects. */ + class Result : public BomAbstract { + friend class FacResult; + friend class ResultHolder; + public: + // ////////////// Getters ///////////// + /** Get the query string. */ + const TravelQuery_T& getQueryString () const { + return _queryString; + } + + /** Get the list of Xapian document objects. */ + const DocumentList_T& getDocumentList() const { + return _documentList; + } + + /** Retrieve the best matching Xapian document object. */ + const Xapian::Document& getBestMatchingDocument() const; + + /** Retrieve the percentage corresponding to the best matching + Xapian document object. */ + const Xapian::percent& getBestMatchingPercentage() const; + + + // ////////////// Setters ///////////// + /** Set the query string. */ + void setQueryString (const TravelQuery_T& iQueryString) { + _queryString = iQueryString; + } + + + public: + // /////////// Business methods ///////// + /** Retrieve the list of documents matching the query string. */ + void searchString (); + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + private: + // ////////////// Constructors and Destructors ///////////// + /** Main constructor. */ + Result (const Xapian::Database&); + /** Default constructor. */ + Result (); + /** Default copy constructor. */ + Result (const Result&); + /** Destructor. */ + ~Result (); + /** Initialise (reset the list of documents). */ + void init (); + + + private: + // /////////////// Attributes //////////////// + /** Query string having generated the list of document. */ + TravelQuery_T _queryString; + + /** Xapian database. */ + const Xapian::Database& _database; + + /** List of Xapian document objects. */ + DocumentList_T _documentList; + }; + +} +#endif // __OPENTREP_BOM_RESULT_HPP Added: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,133 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +#include <sstream> +// Xapian +#include <xapian.h> +// OpenTREP +#include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/bom/Result.hpp> +#include <opentrep/bom/ResultHolder.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + ResultHolder::ResultHolder (const TravelQuery_T& iQueryString, + const Xapian::Database& iDatabase) + : _queryString (iQueryString), _database (iDatabase) { + init(); + } + + // ////////////////////////////////////////////////////////////////////// + ResultHolder::~ResultHolder () { + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::init () { + _resultList.clear(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string ResultHolder::describeShortKey() const { + std::ostringstream oStr; + oStr << _queryString; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string ResultHolder::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string ResultHolder::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + for (ResultList_T::const_iterator itResult = _resultList.begin(); + itResult != _resultList.end(); ++itResult) { + const Result* lResult_ptr = *itResult; + assert (lResult_ptr != NULL); + + oStr << " ==> " << std::endl << lResult_ptr->toString(); + } + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::fromStream (std::istream& ioIn) { + } + + // ////////////////////////////////////////////////////////////////////// + void ResultHolder::searchString () { + + // Catch any Xapian::Error exceptions thrown + try { + + std::string lRemainingQueryString (_queryString); + bool shouldStop = false; + while (shouldStop == false) { + // DEBUG + /* + OPENTREP_LOG_DEBUG (std::endl + << "================================" << std::endl + << "Current query string: `" << lRemainingQueryString << "'"); + */ + /** + Search with the initial full string, then by removing a word if + there was no result, then by removing another word if there was + again no result, until either a result is found or the + resulting string gets empty. + */ + DocumentList_T lDocumentList; + Result* lResult_ptr = new Result (_database); + assert (lResult_ptr != NULL); + + std::string lQueryString (lRemainingQueryString); + + // + lResult_ptr->setQueryString (lQueryString); + lResult_ptr->searchString (); + + // Add the Result object (holding the list of matching + // documents) to the dedicated list. + _resultList.push_back (lResult_ptr); + + /** + Remove, from the lRemainingQueryString string, the part which + has been already successfully parsed. + <br>For instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() method + first reduce the query string to 'sna francisco', which + successfully matches against SFO (San Francisco airport). + <br>Then, the remaining part of the query string to be parsed is + 'rio de janeiro'. So, the already parsed part, namely + 'sna francisco', must be subtracted from the initial query string. + */ + lQueryString = lResult_ptr->getQueryString(); + StringMatcher::subtractParsedToRemaining (lQueryString, + lRemainingQueryString); + + // If there is nothing left to be parsed, we have then finished + // to parse the initial string. + if (lRemainingQueryString.empty() == true) { + shouldStop = true; + break; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + } + } + +} Added: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,94 @@ +#ifndef __OPENTREP_BOM_RESULTHOLDER_HPP +#define __OPENTREP_BOM_RESULTHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/ResultList.hpp> + +// Forward declarations +namespace Xapian { + class Database; +} + +namespace OPENTREP { + + /** Class wrapping functions on a list of Result objects. */ + class ResultHolder : public BomAbstract { + friend class FacResultHolder; + friend class RequestInterpreter; + public: + // ////////////// Getters ///////////// + /** Get the query string. */ + const TravelQuery_T& getQueryString () const { + return _queryString; + } + + /** Retrieve the list of result objects. */ + const ResultList_T& getResultList() const { + return _resultList; + } + + + // ////////////// Setters ///////////// + + + public: + // /////////// Business methods ///////// + /** Retrieve the list of documents matching the query string. */ + void searchString (); + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + private: + // ////////////// Constructors and Destructors ///////////// + /** Main constructor. */ + ResultHolder (const TravelQuery_T&, const Xapian::Database&); + /** Default constructor. */ + ResultHolder (); + /** Default copy constructor. */ + ResultHolder (const ResultHolder&); + /** Destructor. */ + ~ResultHolder (); + /** Initialise (reset the list of documents). */ + void init (); + + + private: + // /////////////// Attributes //////////////// + /** Query string having generated the list of document. */ + const TravelQuery_T _queryString; + + /** Xapian database. */ + const Xapian::Database& _database; + + /** List of result objects. */ + ResultList_T _resultList; + }; + +} +#endif // __OPENTREP_BOM_RESULTHOLDER_HPP Added: trunk/opentrep/opentrep/bom/ResultList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultList.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/ResultList.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,19 @@ +#ifndef __OPENTREP_BOM_RESULTLIST_HPP +#define __OPENTREP_BOM_RESULTLIST_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <list> + +namespace OPENTREP { + + // Forward declarations + class Result; + + /** List of result objects. */ + typedef std::list<Result*> ResultList_T; + +} +#endif // __OPENTREP_BOM_RESULTLIST_HPP Added: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,319 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// STL +#include <iostream> +#include <sstream> +#include <string> +#include <list> +#include <map> +// Boost +#include <boost/tokenizer.hpp> +// Xapian +#include <xapian.h> +// OpenTREP +#include <opentrep/bom/WordHolder.hpp> +#include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/service/Logger.hpp> + +namespace OPENTREP { + + /** For each of the word in the given list, perform spelling + corrections. If the word is correctly spelled, it is copied as + is. Otherwise, a corrected version is stored. */ + // ////////////////////////////////////////////////////////////////////// + static void createCorrectedWordList (const WordList_T& iOriginalWordList, + WordList_T& ioCorrectedWordList, + const Xapian::Database& iDatabase) { + // Empty the target list + ioCorrectedWordList.clear(); + + // Catch any Xapian::Error exceptions thrown + try { + + for (WordList_T::const_iterator itWord = iOriginalWordList.begin(); + itWord != iOriginalWordList.end(); ++itWord) { + const std::string& lOriginalWord = *itWord; + const std::string& lSuggestedWord = + iDatabase.get_spelling_suggestion (lOriginalWord, 3); + + if (lSuggestedWord.empty() == true) { + ioCorrectedWordList.push_back (lOriginalWord); + + } else { + ioCorrectedWordList.push_back (lSuggestedWord); + } + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Original word: `" << lOriginalWord + << "' ==> corrected word: `" << lSuggestedWord << "'"); + */ + } + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } + } + + // /////////////////////////////////////////////////////////////////// + void StringMatcher::searchString (Xapian::MSet& ioMatchingSet, + const std::string& iSearchString, + const Xapian::Database& ioDatabase) { + + // Catch any Xapian::Error exceptions thrown + try { + + /** + Build another string, in addition to the original one. Overall, + there are thus two strings: + <br><ul> + <li>One with the original words given by the user</li> + <li>One with the orthographic-corrected words, wherever + relevant (otherwise, the original word is taken)</li> + </ul> + <br>For instance, 'sna francisco' would give the following + two strings: + <br><ul> + <li>'sna francicso' (original)</li> + <li>'sna francisco' (corrected, where relevant, word by word)</li> + </ul> + <br>Note that, as 'sna' exists in the dictionary (Santa Ana, CA, USA), + it is not replaced. We shall take care of the whole string in a + further step below. + */ + WordList_T lOriginalWordList; + WordHolder::tokeniseStringIntoWordList (iSearchString, lOriginalWordList); + + /** + We rebuild a clean query string from the word list. Indeed, the original + string may have contained a few separators (e.g., '/', ';', etc.), which + have been removed by the tokeniseStringIntoWordList() method. All those + separators are thus replaced by spaces. + For instance, the 'san francisco, ca, us' initial string would be + replaced by 'san francisco ca us'. + */ + const std::string lOriginalQueryString = + WordHolder::createStringFromWordList (lOriginalWordList); + + WordList_T lCorrectedWordList; + createCorrectedWordList (lOriginalWordList, lCorrectedWordList, + ioDatabase); + + const std::string lCorrectedQueryString = + WordHolder::createStringFromWordList (lCorrectedWordList); + + /** + Try to find, if relevant, an orthographic suggestion for the whole + phrase/string. With the above example, 'sna francisco' yields the + suggestion 'san francisco'. + */ + const std::string lFullWordCorrectedString = + ioDatabase.get_spelling_suggestion (lOriginalQueryString, 3); + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Query string `" << lOriginalQueryString + << "' ==> corrected query string: `" << lCorrectedQueryString + << "' and correction for the full query string: `" + << lFullWordCorrectedString << "'"); + */ + + // Build the query object + Xapian::QueryParser lQueryParser; + lQueryParser.set_database (ioDatabase); + /** + As explained in http://www.xapian.org/docs/queryparser.html, + Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, + but only available from version 1.0.13 of Xapian. + */ + // lQueryParser.set_default_op (Xapian::Query::OP_ADJ); + lQueryParser.set_default_op (Xapian::Query::OP_PHRASE); + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description() + << "'"); + */ + + /** + The Xapian::QueryParser::parse_query() method aggregates all the words + with operators inbetween them (here, the "PHRASE" operator). + With the above example ('sna francicso'), it yields + "sna PHRASE 2 francicso". + */ + Xapian::Query lQuery = + lQueryParser.parse_query (lOriginalQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE + | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); + /** + Strangely enough (is it?), the corrected query given by the Xapian + QueryParser corresponds to the full original string, where words + have been corrected one by one, but considered as a single block. + With the above example, 'sna francicso' yields (wrongly) + 'sna francisco', instead of "sna PHRASE 2 francisco", as generated + by the following code. + */ + // Xapian::Query lCorrectedQuery = + // lQueryParser.get_corrected_query_string(); + Xapian::Query lCorrectedQuery = + lQueryParser.parse_query (lCorrectedQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + /** + As, with the above example, the full corrected string is + 'san francisco', it yields the query "san PHRASE 2 francisco", + which is eventually right. + */ + Xapian::Query lFullQueryCorrected = + lQueryParser.parse_query (lFullWordCorrectedString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + // DEBUG + /* + OPENTREP_LOG_DEBUG ("Query `" << lQuery.get_description() + << "', corrected query `" << lCorrectedQuery.get_description() + << "' and corrected for full query `" + << lFullQueryCorrected.get_description() << "' "); + */ + + // Start an enquire session + Xapian::Enquire enquire (ioDatabase); + + // Give the query object to the enquire session + enquire.set_query (lQuery); + + // Get the top 10 results of the query + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + int nbMatches = ioMatchingSet.size(); + + // DEBUG + /* + OPENTREP_LOG_DEBUG (nbMatches << " results found"); + */ + + /** + When no match is found, we search on the corrected phrase/string + (where the words have been corrected one by one). + */ + if (nbMatches == 0) { + enquire.set_query (lCorrectedQuery); + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + nbMatches = ioMatchingSet.size(); + + // DEBUG + /* + OPENTREP_LOG_DEBUG(nbMatches << " results found on corrected string"); + */ + } + + /** + If there is still no match, we search on the string corrected + as a whole. + */ + if (nbMatches == 0) { + enquire.set_query (lFullQueryCorrected); + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + nbMatches = ioMatchingSet.size(); + + // DEBUG + /* + OPENTREP_LOG_DEBUG (nbMatches + << " results found on corrected full string"); + */ + } + + + // DEBUG + /* + const Xapian::Query& lActualQuery = enquire.get_query(); + OPENTREP_LOG_DEBUG ("Actual query `" << lActualQuery.get_description() + << "'"); + */ + + } catch (const Xapian::Error& error) { + std::cerr << "Exception: " << error.get_msg() << std::endl; + } + } + + // ////////////////////////////////////////////////////////////////////// + void StringMatcher:: + createDocumentListFromMSet (const Xapian::MSet& iMatchingSet, + DocumentList_T& ioDocumentList) { + // Empty the list of documents + ioDocumentList.clear(); + + for (Xapian::MSetIterator itDoc = iMatchingSet.begin(); + itDoc != iMatchingSet.end(); ++itDoc) { + const Xapian::Document& lDocument = itDoc.get_document(); + + ioDocumentList.insert (DocumentList_T::value_type (itDoc.get_percent(), + lDocument)); + } + } + + // ////////////////////////////////////////////////////////////////////// + void StringMatcher::removeOneWord (std::string& ioQueryString) { + assert (ioQueryString.empty() == false); + + WordList_T lWordList; + WordHolder::tokeniseStringIntoWordList (ioQueryString, lWordList); + assert (lWordList.empty() == false); + + // Remove the furthest right word + lWordList.pop_back(); + + const std::string& lReducedString = + WordHolder::createStringFromWordList (lWordList); + ioQueryString = lReducedString; + } + + // ////////////////////////////////////////////////////////////////////// + void StringMatcher:: + subtractParsedToRemaining (const std::string& iAlreadyParsedQueryString, + std::string& ioRemainingQueryString) { + /** + Remove, from the lRemainingQueryString string, the part which + has been already successfully parsed. + <br>For instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() method + first reduce the query string to 'sna francisco', which + successfully matches against SFO (San Francisco airport). + <br>Then, the remaining part of the query string to be parsed is + 'rio de janeiro'. So, the already parsed part must be subtracted + from the initial query string. + */ + WordList_T lRemainingWordList; + WordHolder::tokeniseStringIntoWordList (ioRemainingQueryString, + lRemainingWordList); + + WordList_T lParsedWordList; + WordHolder::tokeniseStringIntoWordList (iAlreadyParsedQueryString, + lParsedWordList); + + unsigned int idx = lParsedWordList.size(); + for ( ; idx != 0 && lRemainingWordList.empty() == false; --idx) { + lRemainingWordList.pop_front(); + } + + // Build the remaining part of the string still to be parsed. + // <br>Note that that part may be empty. + ioRemainingQueryString = + WordHolder::createStringFromWordList (lRemainingWordList); + } + +} Added: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,52 @@ +#ifndef __OPENTREP_BOM_STRINGMATCHER_HPP +#define __OPENTREP_BOM_STRINGMATCHER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <string> +// OpenTREP +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/WordList.hpp> +#include <opentrep/bom/DocumentList.hpp> + +// Forward declarations +namespace Xapian { + class MSet; + class Database; +} + +namespace OPENTREP { + + /** Class grouping a few utility methods based on the Xapian library. + <br>See <a href="http://www.xapian.org">Xapian's Web site</a> + for more information. */ + class StringMatcher : public BomAbstract { + public: + /** Search, within the Xapian database, for occurrences of the + words of the search string. */ + static void searchString (Xapian::MSet&, const std::string& iSearchString, + const Xapian::Database&); + + /** Copy the Xapian MSet (matching set) object into a document + list object. */ + static void createDocumentListFromMSet (const Xapian::MSet&, + DocumentList_T&); + + /** Remove the word furthest at right. */ + static void removeOneWord (std::string& ioQueryString); + + /** Remove, from a string, the part corresponding to the one given + as parameter. */ + static void + subtractParsedToRemaining (const std::string& iAlreadyParsedQueryString, + std::string& ioRemainingQueryString); + + + private: + + }; + +} +#endif // __OPENTREP_BOM_STRINGMATCHER_HPP Added: trunk/opentrep/opentrep/bom/WordHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/WordHolder.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/WordHolder.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,53 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// Boost +#include <boost/tokenizer.hpp> +// OpenTREP +#include <opentrep/bom/WordHolder.hpp> + +namespace OPENTREP { + + // Define the separators + static const boost::char_separator<char> SepatorList (" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\""); + + // ////////////////////////////////////////////////////////////////////// + void WordHolder::tokeniseStringIntoWordList (const std::string& iPhrase, + WordList_T& ioWordList) { + // Empty the word list + ioWordList.clear(); + + // Boost Tokeniser + typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T; + + // Initialise the phrase to be tokenised + Tokeniser_T lTokens (iPhrase, SepatorList); + for (Tokeniser_T::const_iterator tok_iter = lTokens.begin(); + tok_iter != lTokens.end(); ++tok_iter) { + const std::string& lTerm = *tok_iter; + ioWordList.push_back (lTerm); + } + + } + + // ////////////////////////////////////////////////////////////////////// + std::string WordHolder:: + createStringFromWordList (const WordList_T& iWordList) { + std::ostringstream oStr; + + unsigned short idx = iWordList.size(); + for (WordList_T::const_iterator itWord = iWordList.begin(); + itWord != iWordList.end(); ++itWord, --idx) { + const std::string& lWord = *itWord; + oStr << lWord; + if (idx > 1) { + oStr << " "; + } + } + + return oStr.str(); + } + +} Added: trunk/opentrep/opentrep/bom/WordHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/WordHolder.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/WordHolder.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,43 @@ +#ifndef __OPENTREP_BOM_WORDHOLDER_HPP +#define __OPENTREP_BOM_WORDHOLDER_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// OpenTREP +#include <opentrep/bom/BomAbstract.hpp> +#include <opentrep/bom/WordList.hpp> + +namespace OPENTREP { + + /** Class wrapping utility functions to transform back and forth + strings from and into a list of words. */ + class WordHolder : public BomAbstract { + friend class FacWordHolder; + public: + + // /////////////// Business Methods //////////////// + /** Tokenise a string into a list of words (STL strings). + <br>The Boost.Tokenizer library is used. */ + static void tokeniseStringIntoWordList (const std::string& iPhrase, + WordList_T& ioWordList); + + /** Serialise a list of words (STL strings) into a single (STL) string. */ + static std::string createStringFromWordList (const WordList_T& iWordList); + + private: + // ////////////// Constructors and Destructors ///////////// + /** Default constructor. */ + WordHolder (); + /** Default copy constructor. */ + WordHolder (const WordHolder&); + /** Destructor. */ + ~WordHolder (); + + + private: + // /////////////// Attributes //////////////// + }; + +} +#endif // __OPENTREP_BOM_WORDHOLDER_HPP Added: trunk/opentrep/opentrep/bom/WordList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/WordList.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/WordList.hpp 2009-07-17 00:10:38 UTC (rev 133) @@ -0,0 +1,18 @@ +#ifndef __OPENTREP_BOM_WORDLIST_HPP +#define __OPENTREP_BOM_WORDLIST_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <string> +#include <list> + +namespace OPENTREP { + + /** List of simple words (STL strings). */ + typedef std::list<std::string> WordList_T; + +} +#endif // __OPENTREP_BOM_WORDLIST_HPP + Modified: trunk/opentrep/opentrep/bom/sources.mk =================================================================== --- trunk/opentrep/opentrep/bom/sources.mk 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/bom/sources.mk 2009-07-17 00:10:38 UTC (rev 133) @@ -5,10 +5,21 @@ $(top_srcdir)/opentrep/bom/World.hpp \ $(top_srcdir)/opentrep/bom/Names.hpp \ $(top_srcdir)/opentrep/bom/Place.hpp \ - $(top_srcdir)/opentrep/bom/PlaceList.hpp + $(top_srcdir)/opentrep/bom/PlaceList.hpp \ + $(top_srcdir)/opentrep/bom/WordList.hpp \ + $(top_srcdir)/opentrep/bom/WordHolder.hpp \ + $(top_srcdir)/opentrep/bom/DocumentList.hpp \ + $(top_srcdir)/opentrep/bom/Result.hpp \ + $(top_srcdir)/opentrep/bom/ResultList.hpp \ + $(top_srcdir)/opentrep/bom/ResultHolder.hpp \ + $(top_srcdir)/opentrep/bom/StringMatcher.hpp bom_cc_sources = $(top_srcdir)/opentrep/bom/BomAbstract.cpp \ $(top_srcdir)/opentrep/bom/BomType.cpp \ $(top_srcdir)/opentrep/bom/Language.cpp \ $(top_srcdir)/opentrep/bom/World.cpp \ $(top_srcdir)/opentrep/bom/Names.cpp \ - $(top_srcdir)/opentrep/bom/Place.cpp + $(top_srcdir)/opentrep/bom/Place.cpp \ + $(top_srcdir)/opentrep/bom/WordHolder.cpp \ + $(top_srcdir)/opentrep/bom/Result.cpp \ + $(top_srcdir)/opentrep/bom/ResultHolder.cpp \ + $(top_srcdir)/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-16 16:45:48 UTC (rev 132) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-17 00:10:38 UTC (rev 133) @@ -9,6 +9,8 @@ #include <exception> // OpenTrep #include <opentrep/bom/Place.hpp> +#include <opentrep/bom/ResultHolder.hpp> +#include <opentrep/bom/Result.hpp> #include <opentrep/factory/FacPlace.hpp> #include <opentrep/command/DBManager.hpp> #include <opentrep/command/RequestInterpreter.hpp> @@ -23,44 +25,51 @@ interpretTravelRequest (soci::session& ioSociSession, const TravelDatabaseName_T& iTravelDatabaseName, ... [truncated message content] |
From: <den...@us...> - 2009-07-18 15:15:47
|
Revision: 137 http://opentrep.svn.sourceforge.net/opentrep/?rev=137&view=rev Author: denis_arnaud Date: 2009-07-18 15:15:40 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [Dev] Better debugging logs. Modified Paths: -------------- trunk/opentrep/opentrep/bom/DocumentList.hpp trunk/opentrep/opentrep/bom/PlaceList.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/bom/sources.mk trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp Added Paths: ----------- trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/Document.hpp Added: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -0,0 +1,59 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> +// OpenTREP +#include <opentrep/bom/Document.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////////// + Document::Document () { + } + + // ////////////////////////////////////////////////////////////////////// + Document::Document (const Document& iDocument) + : _queryString (iDocument._queryString), + _document (iDocument._document), + _documentList (iDocument._documentList) { + } + + // ////////////////////////////////////////////////////////////////////// + Document::~Document () { + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Document::describeShortKey() const { + std::ostringstream oStr; + oStr << _queryString; + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + const std::string Document::describeKey() const { + return describeShortKey(); + } + + // ////////////////////////////////////////////////////////////////////// + std::string Document::toString() const { + std::ostringstream oStr; + oStr << describeShortKey() << std::endl; + + const Xapian::docid& lDocID = _document.get_docid(); + oStr << "Document ID " << lDocID << "\t" << _percentage + << "% [" << _document.get_data() << "]"; + + return oStr.str(); + } + + // ////////////////////////////////////////////////////////////////////// + void Document::toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + // ////////////////////////////////////////////////////////////////////// + void Document::fromStream (std::istream& ioIn) { + } + +} Added: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -0,0 +1,123 @@ +#ifndef __OPENTREP_BOM_DOCUMENT_HPP +#define __OPENTREP_BOM_DOCUMENT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <list> +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> +// Xapian +#include <xapian.h> + +namespace OPENTREP { + + // //////////////// Type definitions ///////////////// + /** List of Xapian documents. */ + typedef std::list<Xapian::Document> XapianDocumentList_T; + + + // //////////////// Main Class ///////////////// + /** Structure wrapping a Xapian document having matched part of a + given query string. + <br>It is a structure, as it is aimed to be temporary, the time + a Result object be created with the corresponding content. */ + struct Document : public BomAbstract { + public: + // ////////////////// Getters //////////////// + /** Get the query string. */ + const TravelQuery_T& getTravelQuery() { + return _queryString; + } + + /** Get the matching Xapian document. */ + const Xapian::Document& getXapianDocument() const { + return _document; + } + + /** Get the matching percentage associated to the Xapian document. */ + const Xapian::percent& getXapianPercentage() const { + return _percentage; + } + + /** Get the extra list of matching Xapian documents. */ + const XapianDocumentList_T& getExtraDocumentList() const { + return _documentList; + } + + + // ////////////////// Setters //////////////// + void setQueryString (const TravelQuery_T& iQueryString) { + _queryString = iQueryString; + } + + /** Set the matching Xapian document. */ + void setXapianDocument (const Xapian::Document& iMatchingDocument) { + _document = iMatchingDocument; + } + + /** Set the matching percentage associated to the Xapian document. */ + void setXapianPercentage (const Xapian::percent& iPercentage) { + _percentage = iPercentage; + } + + /** Add a matching Xapian document (having the same matching percentage). */ + void addExtraDocument (const Xapian::Document& iMatchingDocument) { + _documentList.push_back (iMatchingDocument); + } + + + public: + // /////////// Display support methods ///////// + /** Dump a Business Object into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const; + + /** Read a Business Object from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream& ioIn); + + /** Get the serialised version of the Business Object. */ + std::string toString() const; + + /** Get a string describing the whole key (differentiating two objects + at any level). */ + const std::string describeKey() const; + + /** Get a string describing the short key (differentiating two objects + at the same level). */ + const std::string describeShortKey() const; + + + public: + // //////////////// Constructors and Destructors ///////////// + /** Default constructor. */ + Document (); + /** Default copy constructor. */ + Document (const Document&); + /** Default destructor. */ + ~Document (); + + + private: + // ///////////////// Attributes ////////////////// + /** Query string with which a Xapian full text search is done. */ + TravelQuery_T _queryString; + + /** Matching percentage, as returned by the Xapian full text search. + <br>Generally, that percentage is equal to, or close to, 100%. */ + Xapian::percent _percentage; + + /** Matching document, as returned by the Xapian full text search. */ + Xapian::Document _document; + + /** List of Xapian documents having the same matching percentage. + <br>Hence, any of those other Xapian documents could have been + chosen, instead of the main one. */ + XapianDocumentList_T _documentList; + }; + +} +#endif // __OPENTREP_BOM_DOCUMENT_HPP Modified: trunk/opentrep/opentrep/bom/DocumentList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/DocumentList.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -8,20 +8,14 @@ #include <list> // OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/Document.hpp> // Xapian #include <xapian.h> namespace OPENTREP { - /** Xapian document and its associated matching percentage. */ - typedef std::pair<Xapian::percent, Xapian::Document> MatchingDocument_T; - - /** A matching Xapian document, along with the query string which it - matches. */ - typedef std::pair<TravelQuery_T, MatchingDocument_T> QueryAndDocument_T; - /** List of matching Xapian documents. */ - typedef std::list<QueryAndDocument_T> DocumentList_T; + typedef std::list<Document> DocumentList_T; } #endif // __OPENTREP_BOM_DOCUMENTLIST_HPP Modified: trunk/opentrep/opentrep/bom/PlaceList.hpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceList.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/PlaceList.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -14,9 +14,10 @@ class Place; // ///////////// Type definitions //////////////////// - typedef std::size_t PlaceID_T; + // typedef std::size_t PlaceID_T; // typedef std::map<PlaceID_T, Place*> PlaceDirectList_T; - typedef std::map<std::string, Place*> PlaceList_T; + + typedef std::multimap<std::string, Place*> PlaceList_T; typedef std::list<Place*> PlaceOrderedList_T; } Modified: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -41,14 +41,7 @@ // ////////////////////////////////////////////////////////////////////// std::string Result::toString() const { std::ostringstream oStr; - oStr << describeShortKey() << std::endl; - - const Xapian::percent& lPercentage = _matchingDocument.first; - const Xapian::Document& lDocument = _matchingDocument.second; - const Xapian::docid& lDocID = lDocument.get_docid(); - oStr << "Document ID " << lDocID << "\t" << lPercentage - << "% [" << lDocument.get_data() << "]" << std::endl; - + oStr << _matchingDocument.toString(); return oStr.str(); } Modified: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -7,7 +7,7 @@ // OpenTREP #include <opentrep/OPENTREP_Types.hpp> #include <opentrep/bom/BomAbstract.hpp> -#include <opentrep/bom/DocumentList.hpp> +#include <opentrep/bom/Document.hpp> namespace OPENTREP { @@ -27,19 +27,19 @@ /** Get the Matching Xapian document object, along with its corresponding matching percentage. */ - const MatchingDocument_T& getMatchingDocument() const { + const Document& getMatchingDocument() const { return _matchingDocument; } /** Retrieve the percentage corresponding to the matching Xapian document object. */ - const Xapian::percent& getPercentage() const { - return _matchingDocument.first; + const Xapian::percent& getXapianPercentage() const { + return _matchingDocument.getXapianPercentage(); } /** Retrieve the matching Xapian document object. */ - const Xapian::Document& getDocument() const { - return _matchingDocument.second; + const Xapian::Document& getXapianDocument() const { + return _matchingDocument.getXapianDocument(); } @@ -51,17 +51,10 @@ /** Set the matching Xapian document object and its corresponding matching percentage. */ - void setMatchingDocument (const MatchingDocument_T& iMatchingDocument) { + void setMatchingDocument (const Document& iMatchingDocument) { _matchingDocument = iMatchingDocument; } - /** Set the matching Xapian document object and its corresponding - matching percentage. */ - void setQueryAndDocument (const QueryAndDocument_T& iQueryAndDocument) { - _queryString = iQueryAndDocument.first; - _matchingDocument = iQueryAndDocument.second; - } - public: // /////////// Display support methods ///////// @@ -112,7 +105,7 @@ /** Matching Xapian document object, along with its corresponding matching percentage. */ - MatchingDocument_T _matchingDocument; + Document _matchingDocument; }; } Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -69,7 +69,7 @@ // ////////////////////////////////////////////////////////////////////// bool ResultHolder::searchString (TravelQuery_T& ioPartialQueryString, - MatchingDocument_T& ioMatchingDocument) { + Document& ioMatchingDocument) { bool oFoundDocument = false; // Catch any Xapian::Error exceptions thrown @@ -78,11 +78,8 @@ bool shouldStop = false; while (shouldStop == false) { // DEBUG - /* - OPENTREP_LOG_DEBUG (std::endl << "--------------------------------" - << std::endl << "Current query string: `" << ioPartialQueryString - << "'"); - */ + OPENTREP_LOG_DEBUG ("Current query string: `" + << ioPartialQueryString << "'"); // Retrieve the list of documents matching the query string Xapian::MSet lMatchingSet; @@ -125,11 +122,10 @@ bool shouldStop = false; while (shouldStop == false) { // DEBUG - /* - OPENTREP_LOG_DEBUG (std::endl - << "================================" << std::endl - << "Current query string: `" << lRemainingQueryString << "'"); - */ + OPENTREP_LOG_DEBUG ("---------------------") + OPENTREP_LOG_DEBUG ("Remaining part of the query string: `" + << lRemainingQueryString << "'"); + /** Search with the initial full string, then by removing a word if there was no result, then by removing another word if there was @@ -143,14 +139,17 @@ furthest right words, so that the remaining left part be matched against the Xapian database). */ - MatchingDocument_T lMatchingDocument; + Document lMatchingDocument; const bool hasFoundDocument = searchString (lQueryString, lMatchingDocument); if (hasFoundDocument == true) { - const QueryAndDocument_T lQueryAndDocument (lQueryString, - lMatchingDocument); - ioDocumentList.push_back (lQueryAndDocument); + lMatchingDocument.setQueryString (lQueryString); + ioDocumentList.push_back (lMatchingDocument); + + // DEBUG + OPENTREP_LOG_DEBUG ("==> Matching of the query string: `" + << lQueryString << "'"); } /** Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -47,7 +47,7 @@ @param TravelQuery_T& The partial query string. @param MatchingDocument_T& The best matching Xapian document (if found). @return bool Whether such a best matching document has been found. */ - bool searchString(TravelQuery_T& ioPartialQueryString, MatchingDocument_T&); + bool searchString (TravelQuery_T& ioPartialQueryString, Document&); public: Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -4,7 +4,8 @@ // C #include <cassert> // STL -#include <iostream> +#include <istream> +#include <ostream> #include <sstream> #include <string> #include <list> @@ -73,7 +74,7 @@ } } catch (const Xapian::Error& error) { - std::cerr << "Exception: " << error.get_msg() << std::endl; + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } } @@ -267,14 +268,14 @@ */ } catch (const Xapian::Error& error) { - std::cerr << "Exception: " << error.get_msg() << std::endl; + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } } // ////////////////////////////////////////////////////////////////////// bool StringMatcher:: extractBestMatchingDocumentFromMSet (const Xapian::MSet& iMatchingSet, - MatchingDocument_T& ioMatchingDocument) { + Document& ioMatchingDocument) { bool oFoundDocument = false; if (iMatchingSet.empty() == true) { @@ -290,9 +291,23 @@ same: it appears random). */ Xapian::MSetIterator itDoc = iMatchingSet.begin(); - ioMatchingDocument.first = itDoc.get_percent(); - ioMatchingDocument.second = itDoc.get_document(); + const Xapian::percent& lBestPercentage = itDoc.get_percent(); + ioMatchingDocument.setXapianPercentage (lBestPercentage); + ioMatchingDocument.setXapianDocument (itDoc.get_document()); + /** Add all the Xapian documents having reached the same matching + percentage. */ + for ( ; itDoc != iMatchingSet.end(); ++itDoc) { + const Xapian::percent& lPercentage = itDoc.get_percent(); + + if (lPercentage == lBestPercentage) { + ioMatchingDocument.addExtraDocument (itDoc.get_document()); + + } else { + break; + } + } + return oFoundDocument; } Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 15:15:40 UTC (rev 137) @@ -9,7 +9,7 @@ // OpenTREP #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/WordList.hpp> -#include <opentrep/bom/DocumentList.hpp> +#include <opentrep/bom/Document.hpp> // Forward declarations namespace Xapian { @@ -38,8 +38,7 @@ @return bool Whether or not there was a matching document. */ static bool - extractBestMatchingDocumentFromMSet (const Xapian::MSet&, - MatchingDocument_T&); + extractBestMatchingDocumentFromMSet (const Xapian::MSet&, Document&); /** Remove the word furthest at right. */ static void removeOneWord (std::string& ioQueryString); Modified: trunk/opentrep/opentrep/bom/sources.mk =================================================================== --- trunk/opentrep/opentrep/bom/sources.mk 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/bom/sources.mk 2009-07-18 15:15:40 UTC (rev 137) @@ -9,6 +9,7 @@ $(top_srcdir)/opentrep/bom/Place.hpp \ $(top_srcdir)/opentrep/bom/PlaceList.hpp \ $(top_srcdir)/opentrep/bom/PlaceHolder.hpp \ + $(top_srcdir)/opentrep/bom/Document.hpp \ $(top_srcdir)/opentrep/bom/DocumentList.hpp \ $(top_srcdir)/opentrep/bom/Result.hpp \ $(top_srcdir)/opentrep/bom/ResultList.hpp \ @@ -22,6 +23,7 @@ $(top_srcdir)/opentrep/bom/Names.cpp \ $(top_srcdir)/opentrep/bom/Place.cpp \ $(top_srcdir)/opentrep/bom/PlaceHolder.cpp \ + $(top_srcdir)/opentrep/bom/Document.cpp \ $(top_srcdir)/opentrep/bom/Result.cpp \ $(top_srcdir)/opentrep/bom/ResultHolder.cpp \ $(top_srcdir)/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -39,6 +39,10 @@ ResultHolder& lResultHolder = FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); + // DEBUG + OPENTREP_LOG_DEBUG (std::endl + << "========================================="); + // Main algorithm DocumentList_T lDocumentList; lResultHolder.searchString (lDocumentList); @@ -49,14 +53,14 @@ itDoc != lDocumentList.end(); ++itDoc) { // Retrieve both the Xapian document object and the corresponding // matching percentage (most of the time, it is 100%) - const QueryAndDocument_T& lQueryAndDocument = *itDoc; + const Document& lMatchingDocument = *itDoc; // Create a Result object Result& lResult = FacResult::instance().create (lXapianDatabase); // Fill the Result object with both the corresponding Document object // and its associated query string - lResult.setQueryAndDocument (lQueryAndDocument); + lResult.setMatchingDocument (lMatchingDocument); // Add the Result object (holding the list of matching // documents) to the dedicated list. @@ -83,8 +87,9 @@ assert (lResult_ptr != NULL); // Retrieve the parameters of the best matching document - const Xapian::Document& lDocument = lResult_ptr->getDocument(); - const Xapian::percent& lDocPercentage = lResult_ptr->getPercentage(); + const Xapian::Document& lDocument = lResult_ptr->getXapianDocument(); + const Xapian::percent& lDocPercentage = + lResult_ptr->getXapianPercentage(); const Xapian::docid& lDocID = lDocument.get_docid(); const std::string& lDocData = lDocument.get_data(); Modified: trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp 2009-07-18 09:33:50 UTC (rev 136) +++ trunk/opentrep/opentrep/factory/FacPlaceHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) @@ -59,15 +59,16 @@ ioPlace._placeHolder = &ioPlaceHolder; // Add the Place to the PlaceHolder internal map (of Place objects) - const bool insertSucceeded = ioPlaceHolder._placeList. - insert (PlaceList_T::value_type (ioPlace.describeShortKey(), - &ioPlace)).second; - if (insertSucceeded == false) { - OPENTREP_LOG_ERROR ("Insertion failed for " - << ioPlaceHolder.describeKey() - << " and " << ioPlace.describeShortKey()); - assert (insertSucceeded == true); - } + // const bool insertSucceeded = + ioPlaceHolder._placeList. + insert (PlaceList_T::value_type (ioPlace.describeShortKey(), &ioPlace)); + +// if (insertSucceeded == false) { +// OPENTREP_LOG_ERROR ("Insertion failed for " +// << ioPlaceHolder.describeKey() +// << " and " << ioPlace.describeShortKey()); +// assert (insertSucceeded == true); +// } // Add the Place to the PlaceHolder internal list (of Place objects) ioPlaceHolder._placeOrderedList.push_back (&ioPlace); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-18 18:32:11
|
Revision: 138 http://opentrep.svn.sourceforge.net/opentrep/?rev=138&view=rev Author: denis_arnaud Date: 2009-07-18 18:32:04 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [Dev] Improved the interface, so that the travel search now returns a list of Location structures, that the caller can then benefit from. Modified Paths: -------------- trunk/opentrep/opentrep/OPENTREP_Service.hpp trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/BomAbstract.hpp trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/Document.hpp trunk/opentrep/opentrep/bom/Names.cpp trunk/opentrep/opentrep/bom/Names.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/PlaceHolder.cpp trunk/opentrep/opentrep/bom/PlaceHolder.hpp trunk/opentrep/opentrep/bom/Result.cpp trunk/opentrep/opentrep/bom/Result.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/World.cpp trunk/opentrep/opentrep/bom/World.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/command/RequestInterpreter.hpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp trunk/opentrep/opentrep/sources.mk Added Paths: ----------- trunk/opentrep/opentrep/Location.hpp trunk/opentrep/opentrep/OPENTREP_Abstract.hpp Added: trunk/opentrep/opentrep/Location.hpp =================================================================== --- trunk/opentrep/opentrep/Location.hpp (rev 0) +++ trunk/opentrep/opentrep/Location.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -0,0 +1,217 @@ +#ifndef __OPENTREP_LOCATION_HPP +#define __OPENTREP_LOCATION_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <istream> +#include <ostream> +#include <string> +#include <list> +// OpenTrep +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/OPENTREP_Abstract.hpp> + +namespace OPENTREP { + + /** List of names for a given (geographical) location. */ + typedef std::list<std::string> LocationNameList_T; + + + /** Structure modelling a (geographical) location. */ + struct Location : public OPENTREP_Abstract { + public: + // ///////// Getters //////// + /** Get the Location code. */ + std::string getLocationCode() const { + return _locationCode; + } + + /** Get the City code. */ + std::string getCityCode() const { + return _cityCode; + } + + /** Get the State code. */ + std::string getStateCode() const { + return _stateCode; + } + + /** Get the Country code. */ + std::string getCountryCode() const { + return _countryCode; + } + + /** Get the Region code. */ + std::string getRegionCode() const { + return _regionCode; + } + + /** Get the Continent code. */ + std::string getContinentCode() const { + return _continentCode; + } + + /** Get the Time-zone group. */ + std::string getTimeZoneGroup() const { + return _timeZoneGroup; + } + + /** Get the Longitude. */ + double getLongitude() const { + return _longitude; + } + + /** Get the Latitude. */ + double getLatitude() const { + return _latitude; + } + + /** Get the list of (American) English names for that location. */ + const LocationNameList_T& getNameList () const { + return _nameList; + } + + + // ///////// Setters ////////// + /** Set the Location code. */ + void setLocationCode (const std::string& iLocationCode) { + _locationCode = iLocationCode; + } + + /** Set the City code. */ + void setCityCode (const std::string& iCityCode) { + _cityCode = iCityCode; + } + + /** Set the State code. */ + void setStateCode (const std::string& iStateCode) { + _stateCode = iStateCode; + } + + /** Set the Country code. */ + void setCountryCode (const std::string& iCountryCode) { + _countryCode = iCountryCode; + } + + /** Set the Region code. */ + void setRegionCode (const std::string& iRegionCode) { + _regionCode = iRegionCode; + } + + /** Set the Continent code. */ + void setContinentCode (const std::string& iContinentCode) { + _continentCode = iContinentCode; + } + + /** Set the Time-zone group. */ + void setTimeZoneGroup (const std::string& iTimeZoneGroup) { + _timeZoneGroup = iTimeZoneGroup; + } + + /** Set the Longitude. */ + void setLongitude (const double& iLongitude) { + _longitude = iLongitude; + } + + /** Set the Latitude. */ + void setLatitude (const double& iLatitude) { + _latitude = iLatitude; + } + + /** Set the list of (American) English names for that location. */ + void getNameList (const LocationNameList_T& iNameList) { + _nameList = iNameList; + } + + + public: + // ///////// Display methods //////// + /** Dump a structure into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + /** Read a structure from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream&) { + } + + /** Get a short display of the Location structure. */ + std::string toShortString() const { + std::ostringstream oStr; + oStr << _locationCode << ", " << _cityCode << ", " << _stateCode + << ", " << _countryCode << ", " << _regionCode + << ", " << _continentCode << ", " << _timeZoneGroup + << ", " << _longitude << ", " << _latitude; + return oStr.str(); + } + + /** Get the serialised version of the Location structure. */ + std::string toString() const { + std::ostringstream oStr; + oStr << toShortString(); + for (LocationNameList_T::const_iterator itName = _nameList.begin(); + itName != _nameList.end(); ++itName) { + oStr << ", " << *itName; + } + return oStr.str(); + } + + + public: + /** Main Constructor. */ + Location (const std::string& iPlaceCode, const std::string& iCityCode, + const std::string& iStateCode, const std::string& iCountryCode, + const std::string& iRegionCode, const std::string& iContinentCode, + const std::string& iTimeZoneGroup, + const double iLongitude, const double iLatitude, + const LocationNameList_T& iNameList) + : _locationCode (iPlaceCode), _cityCode (iCityCode), + _stateCode (iStateCode), _countryCode (iCountryCode), + _regionCode (iRegionCode), _continentCode (iContinentCode), + _timeZoneGroup (iTimeZoneGroup), _longitude (iLongitude), + _latitude (iLatitude), _nameList (iNameList) { + } + + /** Default Constructor. */ + // Location (); + /** Default copy constructor. */ + // Location (const Location&); + + /** Destructor. */ + virtual ~Location() {} + + + private: + // /////// Attributes ///////// + /** Location code. */ + std::string _locationCode; + /** City code. */ + std::string _cityCode; + /** State code. */ + std::string _stateCode; + /** Country code. */ + std::string _countryCode; + /** Region code. */ + std::string _regionCode; + /** Continent code. */ + std::string _continentCode; + /** Time-zone group. */ + std::string _timeZoneGroup; + /** Longitude. */ + double _longitude; + /** Latitude. */ + double _latitude; + /** List of (American) English names. */ + LocationNameList_T _nameList; + }; + + + /** List of (geographical) location structures. */ + typedef std::list<Location> LocationList_T; + +} +#endif // __OPENTREP_LOCATION_HPP Added: trunk/opentrep/opentrep/OPENTREP_Abstract.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Abstract.hpp (rev 0) +++ trunk/opentrep/opentrep/OPENTREP_Abstract.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -0,0 +1,84 @@ +#ifndef __OPENTREP_OPENTREP_ABSTRACT_HPP +#define __OPENTREP_OPENTREP_ABSTRACT_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <istream> +#include <ostream> +#include <sstream> +#include <string> + +namespace OPENTREP { + + /** Base class for the OPENTREP interface structures. */ + struct OPENTREP_Abstract { + public: + // /////////// Display support methods ///////// + /** Dump a structure into an output stream. + @param ostream& the output stream. */ + virtual void toStream (std::ostream& ioOut) const = 0; + + /** Read a structure from an input stream. + @param istream& the input stream. */ + virtual void fromStream (std::istream& ioIn) = 0; + + /** Get the serialised version of the structure. */ + virtual std::string toString() const = 0; + + + protected: + /** Protected Default Constructor to ensure this class is abtract. */ + OPENTREP_Abstract () {} + OPENTREP_Abstract (const OPENTREP_Abstract&) {} + + /** Destructor. */ + virtual ~OPENTREP_Abstract() {} + }; +} + +/** + Piece of code given by Nicolai M. Josuttis, Section 13.12.1 "Implementing + Output Operators" (p653) of his book "The C++ Standard Library: A Tutorial + and Reference", published by Addison-Wesley. + */ +template <class charT, class traits> +inline +std::basic_ostream<charT, traits>& +operator<< (std::basic_ostream<charT, traits>& ioOut, + const OPENTREP::OPENTREP_Abstract& iStructure) { + /** + string stream: + - with same format + - without special field width + */ + std::basic_ostringstream<charT,traits> ostr; + ostr.copyfmt (ioOut); + ostr.width (0); + + // Fill string stream + iStructure.toStream (ostr); + + // Print string stream + ioOut << ostr.str(); + + return ioOut; +} + +/** + Piece of code given by Nicolai M. Josuttis, Section 13.12.1 "Implementing + Output Operators" (pp655-657) of his book "The C++ Standard Library: + A Tutorial and Reference", published by Addison-Wesley. + */ +template <class charT, class traits> +inline +std::basic_istream<charT, traits>& +operator>> (std::basic_istream<charT, traits>& ioIn, + OPENTREP::OPENTREP_Abstract& ioStucture) { + // Fill Bom object with input stream + ioStucture.fromStream (ioIn); + return ioIn; +} + +#endif // __OPENTREP_OPENTREP_ABSTRACT_HPP Modified: trunk/opentrep/opentrep/OPENTREP_Service.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -7,30 +7,44 @@ // STL #include <ostream> #include <string> -// OPENTREP +// OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> namespace OPENTREP { - /** Forward declaration. */ + // Forward declaration class OPENTREP_ServiceContext; + /** Interface for the OPENTREP Services. */ class OPENTREP_Service { public: - /** Constructor. */ + // /////////// Business Methods ///////////// + /** Build the Xapian database (index) on the BOM held in memory. */ + void buildSearchIndex (); + + /** Match the given string, thanks to a full-text search on the + underlying Xapian index (named "database"). + @param const std::string& (Travel-related) query string (e.g., + "sna francicso rio de janero lso angles reykyavki nce iev mow"). + @param LocationList_T& List of (geographical) locations, if any, + matching the given query string. + @return NbOfMatches_T Number of matches. */ + NbOfMatches_T interpretTravelRequest (const std::string& iTravelQuery, + LocationList_T&); + + + // ////////// Constructors and destructors ////////// + /** Constructor. + @param std::ostream& Output log stream (for instance, std::cout). + @param const std::string& Filepath of the Xapian index/database. */ OPENTREP_Service (std::ostream& ioLogStream, const std::string& iXapianDatabaseFilepath); /** Destructor. */ ~OPENTREP_Service(); - /** Build the Xapian database (index) on the BOM held in memory. */ - void buildSearchIndex (); - - /** Perform the query, thanks to the underlying Xapian database - (index) name. */ - void interpretTravelRequest (const std::string& iTravelQuery); private: // /////// Construction and Destruction helper methods /////// @@ -43,12 +57,10 @@ void init (std::ostream& ioLogStream, const std::string& iXapianDatabaseFilepath); - /** Initilise the log. */ - void logInit (const LOG::EN_LogLevel iLogLevel, std::ostream& ioLogStream); - - /** Finaliser. */ + /** Finalise. */ void finalise (); + private: // ///////// Service Context ///////// /** Opentrep context. */ Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -139,6 +139,11 @@ logOutputFile.open (lLogFilename.c_str()); logOutputFile.clear(); + // + std::cout << "Creating the Xapian index/database may take a few minutes " + << "on some architectures (and a few seconds on fastest ones)..." + << std::endl; + // Initialise the context OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lXapianDatabaseName); Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -228,7 +228,23 @@ lXapianDatabaseName); // Query the Xapian database (index) - opentrepService.interpretTravelRequest (lTravelQuery); + OPENTREP::LocationList_T lLocationList; + const OPENTREP::NbOfMatches_T nbOfMatches = + opentrepService.interpretTravelRequest (lTravelQuery, lLocationList); + + if (nbOfMatches != 0) { + std::cout << nbOfMatches << " (geographical) location(s) have been found " + << "matching your query (`" << lTravelQuery << "´)." + << std::endl; + + OPENTREP::NbOfMatches_T idx = 1; + for (OPENTREP::LocationList_T::const_iterator itLocation = + lLocationList.begin(); + itLocation != lLocationList.end(); ++itLocation, ++idx) { + const OPENTREP::Location& lLocation = *itLocation; + std::cout << " [" << idx << "]: " << lLocation << std::endl; + } + } // Close the Log outputFile logOutputFile.close(); Modified: trunk/opentrep/opentrep/bom/BomAbstract.hpp =================================================================== --- trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/BomAbstract.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -9,8 +9,6 @@ #include <ostream> #include <sstream> #include <string> -// OpenTrep -#include <opentrep/bom/Language.hpp> namespace OPENTREP { @@ -32,11 +30,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - virtual const std::string describeKey() const = 0; + virtual std::string describeKey() const = 0; /** Get a string describing the short key (differentiating two objects at the same level). */ - virtual const std::string describeShortKey() const = 0; + virtual std::string describeShortKey() const = 0; protected: Modified: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -24,14 +24,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Document::describeShortKey() const { + std::string Document::describeShortKey() const { std::ostringstream oStr; oStr << _queryString; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Document::describeKey() const { + std::string Document::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -84,11 +84,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; public: Modified: trunk/opentrep/opentrep/bom/Names.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Names.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Names.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -2,7 +2,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // C -#include <assert.h> +#include <cassert> // STL #include <sstream> // OpenTrep @@ -12,6 +12,7 @@ // ////////////////////////////////////////////////////////////////////// Names::Names() : _languageCode (Language::en_US) { + assert (false); } // ////////////////////////////////////////////////////////////////////// @@ -41,14 +42,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Names::describeShortKey() const { + std::string Names::describeShortKey() const { std::ostringstream oStr; oStr << "[" << Language::getLongLabel (_languageCode) << "]: "; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Names::describeKey() const { + std::string Names::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/Names.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Names.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Names.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -33,6 +33,7 @@ private: /** Default constructor: should not be used. */ Names(); + public: // /////////// Getters /////////////// @@ -78,14 +79,15 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; - // Attributes + private: + // //////////// Attributes ////////////// /** Language code (e.g., en_US, fr_FR, etc.). */ Language::EN_Language _languageCode; @@ -93,7 +95,11 @@ NameList_T _nameList; }; - // Type definitions + // ////////////// Type definitions //////////// + /** Matrix of place names: for each of the language, the place gets a + corresponding list of names. + <br>For instance, MUC corresponds to Munich in English, München + in German, Munique in French, Мюнхен in Russian, etc. */ typedef std::map<Language::EN_Language, Names> NameMatrix_T; } Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -27,16 +27,40 @@ // ////////////////////////////////////////////////////////////////////// Place::~Place () { } + + // ////////////////////////////////////////////////////////////////////// + std::string Place::getCityCode() const { + std::string oCityCode (_cityCode); + if (oCityCode.empty() == true) { + oCityCode = _placeCode; + } + return oCityCode; + } + + // ////////////////////////////////////////////////////////////////////// + bool Place::getNameList (const Language::EN_Language& iLanguageCode, + NameList_T& ioNameList) const { + bool oFoundNameList = false; + + NameMatrix_T::const_iterator itNameList = _nameMatrix.find (iLanguageCode); + if (itNameList != _nameMatrix.end()) { + const Names& lNameList = itNameList->second; + ioNameList = lNameList.getNameList(); + oFoundNameList = true; + } + + return oFoundNameList; + } // ////////////////////////////////////////////////////////////////////// - const std::string Place::describeShortKey() const { + std::string Place::describeShortKey() const { std::ostringstream oStr; oStr << _placeCode; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Place::describeKey() const { + std::string Place::describeKey() const { return describeShortKey(); } @@ -46,13 +70,10 @@ not an airport. The city code is thus the same as the place code itself. */ std::ostringstream oStr; - oStr << describeShortKey() << ", "; - if (_cityCode.empty()) { - oStr << _placeCode << ", "; - } else { - oStr << _cityCode << ", "; - } - oStr << _stateCode + oStr << describeShortKey(); + + const std::string& lCityCode = getCityCode(); + oStr << ", " << lCityCode << ", " << _stateCode << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude << ", " << _docID << ". "; @@ -72,22 +93,23 @@ not an airport. The city code is thus the same as the place code itself. */ std::ostringstream oStr; - oStr << describeShortKey() << ", "; - if (_cityCode.empty()) { - oStr << _placeCode << ", "; - } else { - oStr << _cityCode << ", "; - } - oStr << _stateCode + oStr << describeShortKey(); + + const std::string& lCityCode = getCityCode(); + oStr << ", " << lCityCode << ", " << _stateCode << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude << ", " << _docID; NameMatrix_T::const_iterator itNameHolder = _nameMatrix.begin(); - const Names& lNameHolder = itNameHolder->second; - const std::string& lFirstName = lNameHolder.getFirstName(); - if (lFirstName.empty() == false) { - oStr << ", " << lFirstName << "."; + if (itNameHolder != _nameMatrix.end()) { + + const Names& lNameHolder = itNameHolder->second; + const std::string& lFirstName = lNameHolder.getFirstName(); + + if (lFirstName.empty() == false) { + oStr << ", " << lFirstName << "."; + } } return oStr.str(); @@ -103,18 +125,16 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Place::shortDisplay() const { + std::string Place::shortDisplay() const { /* When the city code is empty, it means that the place is a city and not an airport. The city code is thus the same as the place code itself. */ std::ostringstream oStr; oStr << describeKey(); - if (_cityCode.empty()) { - oStr << ", city code = " << _placeCode; - } else { - oStr << ", city code = " << _cityCode; - } - oStr << ", state code = " << _stateCode + + const std::string& lCityCode = getCityCode(); + oStr << ", city code = " << lCityCode + << ", state code = " << _stateCode << ", country code = " << _countryCode << ", region code = " << _regionCode << ", continent code = " << _continentCode @@ -127,7 +147,7 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Place::display() const { + std::string Place::display() const { std::ostringstream oStr; oStr << shortDisplay(); for (NameMatrix_T::const_iterator itNameList = _nameMatrix.begin(); @@ -174,4 +194,27 @@ _nameMatrix.clear(); } + // ////////////////////////////////////////////////////////////////////// + Location Place::createLocation() const { + + const std::string& lCityCode = getCityCode(); + + NameList_T lNameList; + const bool hasFoundNameList = getNameList (Language::en_US, lNameList); + + if (hasFoundNameList == false) { + // + OPENTREP_LOG_ERROR ("No list of names in (American) English (en_US " + << "locale) can be found for the following place: " + << toShortString()); + throw LanguageCodeNotDefinedInNameTableException(); + } + assert (hasFoundNameList == true); + + // Copy the parameters from the Place object to the Location structure + Location oLocation (_placeCode, lCityCode, _stateCode, _countryCode, + _regionCode, _continentCode, _timeZoneGroup, + _longitude, _latitude, lNameList); + return oLocation; + } } Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -11,6 +11,7 @@ #include <map> // OpenTrep Bom #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/Names.hpp> @@ -20,7 +21,7 @@ class World; class PlaceHolder; - /** Structure modelling an place. */ + /** Class modelling a place. */ class Place : public BomAbstract { friend class FacWorld; friend class FacPlace; @@ -29,37 +30,38 @@ public: // ///////// Getters //////// /** Get the Place code. */ - std::string getPlaceCode() const { + const std::string& getPlaceCode() const { return _placeCode; } - /** Get the City code. */ - std::string getCityCode() const { - return _cityCode; - } - + /** Get the City code. + <br>When the city code is empty, it means that the place is a + city and not an airport. The city code is thus the same as the + place code itself. */ + std::string getCityCode() const; + /** Get the State code. */ - std::string getStateCode() const { + const std::string& getStateCode() const { return _stateCode; } /** Get the Country code. */ - std::string getCountryCode() const { + const std::string& getCountryCode() const { return _countryCode; } /** Get the Region code. */ - std::string getRegionCode() const { + const std::string& getRegionCode() const { return _regionCode; } /** Get the Continent code. */ - std::string getContinentCode() const { + const std::string& getContinentCode() const { return _continentCode; } /** Get the Time-zone group. */ - std::string getTimeZoneGroup() const { + const std::string& getTimeZoneGroup() const { return _timeZoneGroup; } @@ -83,6 +85,14 @@ return _nameMatrix; } + /** Get, for a given language (code), the corresponding list of names. + @param const Language::EN_Language& Language code. + @param NameList_T& Empty list of names, which will be filled by the + method if a list exists for that language code. + @return bool Whether or not such a list exists for the given + language. */ + bool getNameList (const Language::EN_Language&, NameList_T&) const; + // ///////// Setters //////// /** Set the Place code. */ @@ -146,6 +156,14 @@ public: + // /////////// Business methods ///////// + /** Create a Location structure, which is a light copy + of the Place object. That (Location) structure is passed + back to the caller of the service. */ + Location createLocation() const; + + + public: // ///////// Display methods //////// /** Dump a Business Object into an output stream. @param ostream& the output stream. */ @@ -163,17 +181,17 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; /** Display the full Place context. */ - const std::string display() const; + std::string display() const; /** Display a short Place context. */ - const std::string shortDisplay() const; + std::string shortDisplay() const; private: Modified: trunk/opentrep/opentrep/bom/PlaceHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/PlaceHolder.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -27,13 +27,13 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string PlaceHolder::describeShortKey() const { + std::string PlaceHolder::describeShortKey() const { std::ostringstream oStr; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string PlaceHolder::describeKey() const { + std::string PlaceHolder::describeKey() const { return describeShortKey(); } @@ -78,4 +78,18 @@ void PlaceHolder::fromStream (std::istream& ioIn) { } + // ////////////////////////////////////////////////////////////////////// + void PlaceHolder::createLocations (LocationList_T& ioLocationList) const { + + for (PlaceOrderedList_T::const_iterator itPlace = _placeOrderedList.begin(); + itPlace != _placeOrderedList.end(); ++itPlace) { + const Place* lPlace_ptr = *itPlace; + assert (lPlace_ptr != NULL); + + const Location& lLocation = lPlace_ptr->createLocation(); + ioLocationList.push_back (lLocation); + } + } + + } Modified: trunk/opentrep/opentrep/bom/PlaceHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/PlaceHolder.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/PlaceHolder.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -6,6 +6,7 @@ // ////////////////////////////////////////////////////////////////////// // OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/PlaceList.hpp> @@ -27,8 +28,12 @@ public: // /////////// Business methods ///////// - + /** Create the list of Location structures, which are light copies + of the Place objects. Those (Location) structures are passed + back to the caller of the service. */ + void createLocations (LocationList_T&) const; + public: // /////////// Display support methods ///////// /** Dump a Business Object into an output stream. @@ -47,11 +52,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; private: Modified: trunk/opentrep/opentrep/bom/Result.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Result.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -27,14 +27,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string Result::describeShortKey() const { + std::string Result::describeShortKey() const { std::ostringstream oStr; oStr << _queryString; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string Result::describeKey() const { + std::string Result::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/Result.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/Result.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -71,11 +71,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; private: Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -31,14 +31,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string ResultHolder::describeShortKey() const { + std::string ResultHolder::describeShortKey() const { std::ostringstream oStr; oStr << _queryString; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string ResultHolder::describeKey() const { + std::string ResultHolder::describeKey() const { return describeShortKey(); } Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -65,11 +65,11 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; private: Modified: trunk/opentrep/opentrep/bom/World.cpp =================================================================== --- trunk/opentrep/opentrep/bom/World.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/World.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -25,13 +25,13 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string World::describeShortKey() const { + std::string World::describeShortKey() const { std::ostringstream oStr; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string World::describeKey() const { + std::string World::describeKey() const { return describeShortKey(); } @@ -50,14 +50,14 @@ } // ////////////////////////////////////////////////////////////////////// - const std::string World::shortDisplay() const { + std::string World::shortDisplay() const { std::ostringstream oStr; oStr << describeKey() << " one world " << std::endl; return oStr.str(); } // ////////////////////////////////////////////////////////////////////// - const std::string World::display() const { + std::string World::display() const { std::ostringstream oStr; oStr << shortDisplay(); Modified: trunk/opentrep/opentrep/bom/World.hpp =================================================================== --- trunk/opentrep/opentrep/bom/World.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/bom/World.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -50,17 +50,17 @@ /** Get a string describing the whole key (differentiating two objects at any level). */ - const std::string describeKey() const; + std::string describeKey() const; /** Get a string describing the short key (differentiating two objects at the same level). */ - const std::string describeShortKey() const; + std::string describeShortKey() const; /** Display the full World context. */ - const std::string display() const; + std::string display() const; /** Display a short World context. */ - const std::string shortDisplay() const; + std::string shortDisplay() const; /** Retrieve a generic BOM object from the dedicated list. */ GenericBom_T getGenericBom (const XapianDocID_T& iDocID) const; Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -1,6 +1,8 @@ // ////////////////////////////////////////////////////////////////////// // Import section // ////////////////////////////////////////////////////////////////////// +// C +#include <cassert> // STL #include <istream> #include <ostream> @@ -23,63 +25,47 @@ #include <xapian.h> namespace OPENTREP { - + // ////////////////////////////////////////////////////////////////////// - void RequestInterpreter:: - interpretTravelRequest (soci::session& ioSociSession, - const TravelDatabaseName_T& iTravelDatabaseName, - const TravelQuery_T& iTravelQuery) { + void createResults (const DocumentList_T& iDocumentList, + const Xapian::Database& iXapianDatabase, + ResultHolder& ioResultHolder) { - try { - - // Make the database - Xapian::Database lXapianDatabase (iTravelDatabaseName); - - // Create a ResultHolder object - ResultHolder& lResultHolder = - FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); - - // DEBUG - OPENTREP_LOG_DEBUG (std::endl - << "========================================="); + // Back-up the (retrieved) matching Xapian documents into still + // to-be-created Result objects. + for (DocumentList_T::const_iterator itDoc = iDocumentList.begin(); + itDoc != iDocumentList.end(); ++itDoc) { + // Retrieve both the Xapian document object and the corresponding + // matching percentage (most of the time, it is 100%) + const Document& lMatchingDocument = *itDoc; - // Main algorithm - DocumentList_T lDocumentList; - lResultHolder.searchString (lDocumentList); - - // Back-up the (retrieved) matching Xapian documents into still - // to-be-created Result objects. - for (DocumentList_T::const_iterator itDoc = lDocumentList.begin(); - itDoc != lDocumentList.end(); ++itDoc) { - // Retrieve both the Xapian document object and the corresponding - // matching percentage (most of the time, it is 100%) - const Document& lMatchingDocument = *itDoc; - - // Create a Result object - Result& lResult = FacResult::instance().create (lXapianDatabase); - - // Fill the Result object with both the corresponding Document object - // and its associated query string - lResult.setMatchingDocument (lMatchingDocument); - - // Add the Result object (holding the list of matching - // documents) to the dedicated list. - FacResultHolder::initLinkWithResult (lResultHolder, lResult); - } - - // DEBUG - OPENTREP_LOG_DEBUG (std::endl - << "=========================================" - << std::endl << "Matching list: " << std::endl - << lResultHolder.toString() - << "=========================================" - << std::endl << std::endl); - - // Create a PlaceHolder object, to collect the matching Place objects - PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create(); + // Create a Result object + Result& lResult = FacResult::instance().create (iXapianDatabase); + + // Fill the Result object with both the corresponding Document object + // and its associated query string + lResult.setMatchingDocument (lMatchingDocument); + + // Add the Result object (holding the list of matching + // documents) to the dedicated list. + FacResultHolder::initLinkWithResult (ioResultHolder, lResult); + } + // DEBUG + OPENTREP_LOG_DEBUG (std::endl + << "=========================================" + << std::endl << "Matching list: " << std::endl + << ioResultHolder.toString() + << "=========================================" + << std::endl << std::endl); + } + + // ////////////////////////////////////////////////////////////////////// + void createPlaces (const ResultHolder& iResultHolder, + soci::session& ioSociSession, PlaceHolder& ioPlaceHolder) { + // Browse the list of result objects - const ResultList_T& lResultList = lResultHolder.getResultList(); + const ResultList_T& lResultList = iResultHolder.getResultList(); for (ResultList_T::const_iterator itResult = lResultList.begin(); itResult != lResultList.end(); ++itResult) { // Retrieve the result object @@ -108,7 +94,7 @@ if (hasRetrievedPlace == true) { // Insert the Place object within the PlaceHolder object - FacPlaceHolder::initLinkWithPlace (lPlaceHolder, lPlace); + FacPlaceHolder::initLinkWithPlace (ioPlaceHolder, lPlace); // DEBUG OPENTREP_LOG_DEBUG ("Retrieved Document: " << lPlace.toString()); @@ -118,18 +104,63 @@ OPENTREP_LOG_DEBUG ("No retrieved Document for ID = " << lDocID); } } + } + + // ////////////////////////////////////////////////////////////////////// + NbOfMatches_T RequestInterpreter:: + interpretTravelRequest (soci::session& ioSociSession, + const TravelDatabaseName_T& iTravelDatabaseName, + const TravelQuery_T& iTravelQuery, + LocationList_T& ioLocationList) { + NbOfMatches_T oNbOfMatches = 0; + // Create a PlaceHolder object, to collect the matching Place objects + PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create(); + + try { + + // Make the database + Xapian::Database lXapianDatabase (iTravelDatabaseName); + + // Create a ResultHolder object + ResultHolder& lResultHolder = + FacResultHolder::instance().create (iTravelQuery, lXapianDatabase); + // DEBUG - OPENTREP_LOG_NOTIFICATION (std::endl - << "=========================================" - << std::endl << "Summary:" << std::endl - << lPlaceHolder.toShortString() << std::endl - << "=========================================" - << std::endl); + OPENTREP_LOG_DEBUG (std::endl + << "========================================="); + // Main algorithm + DocumentList_T lDocumentList; + lResultHolder.searchString (lDocumentList); + + /** Create the list of Result objects corresponding to the list + of documents. */ + createResults (lDocumentList, lXapianDatabase, lResultHolder); + + /** Create the list of Place objects, for each of which a + look-up is made in the SQL database (e.g., MySQL or Oracle) + to retrieve complementary data. */ + createPlaces (lResultHolder, ioSociSession, lPlaceHolder); + } catch (const Xapian::Error& error) { OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } + + // DEBUG + OPENTREP_LOG_DEBUG (std::endl + << "=========================================" + << std::endl << "Summary:" << std::endl + << lPlaceHolder.toShortString() << std::endl + << "=========================================" + << std::endl); + + /** Create the list of Location structures, which are light copies + of the Place objects. Those (Location) structures are passed + back to the caller of the service. */ + lPlaceHolder.createLocations (ioLocationList); + + return oNbOfMatches; } } Modified: trunk/opentrep/opentrep/command/RequestInterpreter.hpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.hpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/command/RequestInterpreter.hpp 2009-07-18 18:32:04 UTC (rev 138) @@ -8,6 +8,7 @@ #include <string> // OpenTrep #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/Location.hpp> // Forward declarations namespace soci { @@ -23,10 +24,19 @@ class RequestInterpreter { friend class OPENTREP_Service; private: - /** Interpret a search query. */ - static void interpretTravelRequest (soci::session&, - const TravelDatabaseName_T&, - const TravelQuery_T&); + /** Match the given string, thanks to a full-text search on the + underlying Xapian index (named "database"). + @param soci::session& SQL Database (e.g., MySQL, Oracle) session. + @param const TravelDatabaseName_T& Filepath to the Xapian database. + @param const std::string& (Travel-related) query string (e.g., + "sna francicso rio de janero lso angles reykyavki nce iev mow"). + @param LocationList_T& List of (geographical) locations, if any, + matching the given query string. + @return NbOfMatches_T Number of matches. */ + static NbOfMatches_T interpretTravelRequest (soci::session&, + const TravelDatabaseName_T&, + const TravelQuery_T&, + LocationList_T&); private: /** Constructors. */ Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-18 18:32:04 UTC (rev 138) @@ -19,8 +19,9 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - OPENTREP_Service::OPENTREP_Service (std::ostream& ioLogStream, - const std::string& iXapianDatabaseFilepath) + OPENTREP_Service:: + OPENTREP_Service (std::ostream& ioLogStream, + const std::string& iXapianDatabaseFilepath) : _opentrepServiceContext (NULL) { init (ioLogStream, iXapianDatabaseFilepath); } @@ -43,6 +44,12 @@ } // ////////////////////////////////////////////////////////////////////// + void logInit (const LOG::EN_LogLevel iLogLevel, + std::ostream& ioLogOutputFile) { + Logger::instance().setLogParameters (iLogLevel, ioLogOutputFile); + } + + // ////////////////////////////////////////////////////////////////////// void OPENTREP_Service::init (std::ostream& ioLogStream, const std::string& iTravelDatabaseName) { // Set the log file @@ -65,12 +72,6 @@ } // ////////////////////////////////////////////////////////////////////// - void OPENTREP_Service::logInit (const LOG::EN_LogLevel iLogLevel, - std::ostream& ioLogOutputFile) { - Logger::instance().setLogParameters (iLogLevel, ioLogOutputFile); - } - - // ////////////////////////////////////////////////////////////////////// void OPENTREP_Service::finalise () { assert (_opentrepServiceContext != NULL); @@ -109,8 +110,9 @@ } // ////////////////////////////////////////////////////////////////////// - void OPENTREP_Service:: - interpretTravelRequest (const std::string& iTravelQuery) { + NbOfMatches_T OPENTREP_Service:: + interpretTravelRequest (const std::string& iTravelQuery, + LocationList_T& ioLocationList) { if (_opentrepServiceContext == NULL) { throw NonInitialisedServiceException(); } @@ -128,9 +130,10 @@ // Delegate the query execution to the dedicated command BasChronometer lRequestInterpreterChronometer; lRequestInterpreterChronometer.start(); - RequestInterpreter::interpretTravelRequest (lSociSession, - lTravelDatabaseName, - iTravelQuery); + const NbOfMatches_T nbOfMatches = + RequestInterpreter::interpretTravelRequest (lSociSession, + lTravelDatabaseName, + iTravelQuery, ioLocationList); const double lRequestInterpreterMeasure = lRequestInterpreterChronometer.elapsed(); @@ -138,6 +141,8 @@ OPENTREP_LOG_DEBUG ("Match query on Xapian database (index): " << lRequestInterpreterMeasure << " - " << lOPENTREP_ServiceContext.display()); + + return nbOfMatches; } } Modified: trunk/opentrep/opentrep/sources.mk =================================================================== --- trunk/opentrep/opentrep/sources.mk 2009-07-18 15:15:40 UTC (rev 137) +++ trunk/opentrep/opentrep/sources.mk 2009-07-18 18:32:04 UTC (rev 138) @@ -1,4 +1,6 @@ service_h_sources = \ $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ + $(top_srcdir)/opentrep/OPENTREP_Abstract.hpp \ + $(top_srcdir)/opentrep/Location.hpp \ $(top_srcdir)/opentrep/OPENTREP_Service.hpp service_cc_sources = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-18 20:30:12
|
Revision: 139 http://opentrep.svn.sourceforge.net/opentrep/?rev=139&view=rev Author: denis_arnaud Date: 2009-07-18 20:30:08 +0000 (Sat, 18 Jul 2009) Log Message: ----------- [Dev] Fixed a bug causing an infinite loop when no-matching words were part of the query string. Modified Paths: -------------- trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -232,11 +232,11 @@ const OPENTREP::NbOfMatches_T nbOfMatches = opentrepService.interpretTravelRequest (lTravelQuery, lLocationList); - if (nbOfMatches != 0) { std::cout << nbOfMatches << " (geographical) location(s) have been found " << "matching your query (`" << lTravelQuery << "´)." << std::endl; + if (nbOfMatches != 0) { OPENTREP::NbOfMatches_T idx = 1; for (OPENTREP::LocationList_T::const_iterator itLocation = lLocationList.begin(); Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -96,8 +96,8 @@ break; } - // Remove a word from the query string - StringMatcher::removeOneWord (ioPartialQueryString); + // Remove the furthest right word from the query string + StringMatcher::removeFurthestRightWord (ioPartialQueryString); // Stop when the resulting string gets empty. if (ioPartialQueryString.empty() == true) { @@ -150,22 +150,40 @@ // DEBUG OPENTREP_LOG_DEBUG ("==> Matching of the query string: `" << lQueryString << "'"); + + /** + Remove, from the lRemainingQueryString string, the part + which has been already successfully parsed. <br>For + instance, when 'sna francisco rio de janeiro' is the + initial full clean query string, the searchString() + method first reduce the query string to 'sna francisco', + which successfully matches against SFO (San Francisco + airport). <br>Then, the remaining part of the query + string to be parsed is 'rio de janeiro'. So, the already + parsed part, namely 'sna francisco', must be subtracted + from the initial query string. + */ + StringMatcher::subtractParsedToRemaining (lQueryString, + lRemainingQueryString); + } else { + // DEBUG + OPENTREP_LOG_DEBUG ("==> No matching of the query string: `" + << lRemainingQueryString + << "'. Skip the beginning word."); + assert (lQueryString.empty() == true); + + /** + We must suppress (at least) the furthest left word, as it + hinders the remaining of the query string to be + matched. If that furthest left word is the only word of + the query string, the remaining query string will + therefore be empty, and the loop will therefore be exited + in the next step below. + */ + // Remove the furthest right word from the query string + StringMatcher::removeFurthestLeftWord (lRemainingQueryString); } - /** - Remove, from the lRemainingQueryString string, the part which - has been already successfully parsed. - <br>For instance, when 'sna francisco rio de janeiro' is the - initial full clean query string, the searchString() method - first reduce the query string to 'sna francisco', which - successfully matches against SFO (San Francisco airport). - <br>Then, the remaining part of the query string to be parsed is - 'rio de janeiro'. So, the already parsed part, namely - 'sna francisco', must be subtracted from the initial query string. - */ - StringMatcher::subtractParsedToRemaining (lQueryString, - lRemainingQueryString); - // If there is nothing left to be parsed, we have then finished // to parse the initial string. if (lRemainingQueryString.empty() == true) { Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -312,7 +312,7 @@ } // ////////////////////////////////////////////////////////////////////// - void StringMatcher::removeOneWord (std::string& ioQueryString) { + void StringMatcher::removeFurthestRightWord (std::string& ioQueryString) { assert (ioQueryString.empty() == false); WordList_T lWordList; @@ -328,6 +328,22 @@ } // ////////////////////////////////////////////////////////////////////// + void StringMatcher::removeFurthestLeftWord (std::string& ioQueryString) { + assert (ioQueryString.empty() == false); + + WordList_T lWordList; + WordHolder::tokeniseStringIntoWordList (ioQueryString, lWordList); + assert (lWordList.empty() == false); + + // Remove the furthest left word + lWordList.pop_front(); + + const std::string& lReducedString = + WordHolder::createStringFromWordList (lWordList); + ioQueryString = lReducedString; + } + + // ////////////////////////////////////////////////////////////////////// void StringMatcher:: subtractParsedToRemaining (const std::string& iAlreadyParsedQueryString, std::string& ioRemainingQueryString) { Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-18 20:30:08 UTC (rev 139) @@ -41,8 +41,11 @@ extractBestMatchingDocumentFromMSet (const Xapian::MSet&, Document&); /** Remove the word furthest at right. */ - static void removeOneWord (std::string& ioQueryString); + static void removeFurthestRightWord (std::string& ioQueryString); + /** Remove the word furthest at left. */ + static void removeFurthestLeftWord (std::string& ioQueryString); + /** Remove, from a string, the part corresponding to the one given as parameter. */ static void Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 18:32:04 UTC (rev 138) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-18 20:30:08 UTC (rev 139) @@ -159,6 +159,7 @@ of the Place objects. Those (Location) structures are passed back to the caller of the service. */ lPlaceHolder.createLocations (ioLocationList); + oNbOfMatches = ioLocationList.size(); return oNbOfMatches; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-20 15:17:38
|
Revision: 148 http://opentrep.svn.sourceforge.net/opentrep/?rev=148&view=rev Author: denis_arnaud Date: 2009-07-20 15:17:31 +0000 (Mon, 20 Jul 2009) Log Message: ----------- [Dev] Created a DBParams structure in the interface (API), so that MySQL parameters can be set by the caller. Modified Paths: -------------- trunk/opentrep/opentrep/Location.hpp trunk/opentrep/opentrep/OPENTREP_Service.hpp trunk/opentrep/opentrep/OPENTREP_Types.hpp trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/command/SociSessionManager.cpp trunk/opentrep/opentrep/command/SociSessionManager.hpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp trunk/opentrep/opentrep/sources.mk Added Paths: ----------- trunk/opentrep/opentrep/DBParams.hpp trunk/opentrep/opentrep/batches/opentrep_indexer.cfg trunk/opentrep/opentrep/batches/opentrep_searcher.cfg Added: trunk/opentrep/opentrep/DBParams.hpp =================================================================== --- trunk/opentrep/opentrep/DBParams.hpp (rev 0) +++ trunk/opentrep/opentrep/DBParams.hpp 2009-07-20 15:17:31 UTC (rev 148) @@ -0,0 +1,140 @@ +#ifndef __OPENTREP_DBPARAMS_HPP +#define __OPENTREP_DBPARAMS_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <istream> +#include <ostream> +#include <sstream> +#include <string> +// OpenTrep +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/OPENTREP_Abstract.hpp> + +namespace OPENTREP { + + /** List of names for a given (geographical) dbparams. */ + typedef std::list<std::string> DBParamsNameList_T; + + + /** Structure modelling a (geographical) dbparams. */ + struct DBParams : public OPENTREP_Abstract { + public: + // ///////// Getters //////// + /** Get the database user name. */ + std::string getUser() const { + return _user; + } + + /** Get the database user password. */ + std::string getPassword() const { + return _passwd; + } + + /** Get the database host name. */ + std::string getHost() const { + return _host; + } + + /** Get the database port number. */ + std::string getPort() const { + return _port; + } + + /** Get the database name. */ + std::string getDBName() const { + return _dbname; + } + + + // ///////// Setters ////////// + /** Set the database user name. */ + void setUser (const std::string& iUser) { + _user = iUser; + } + + /** Set the database password. */ + void setPassword (const std::string& iPasswd) { + _passwd = iPasswd; + } + + /** Set the database host name. */ + void setHost (const std::string& iHost) { + _host = iHost; + } + + /** Set the database port number. */ + void setPort (const std::string& iPort) { + _port = iPort; + } + + /** Set the database name. */ + void setDBName (const std::string& iDBName) { + _dbname = iDBName; + } + + + public: + // ///////// Display methods //////// + /** Dump a structure into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + /** Read a structure from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream&) { + } + + /** Get a short display of the DBParams structure. */ + std::string toShortString() const { + std::ostringstream oStr; + oStr << _dbname << "." << _user << "@" << _host << ":" << _port; + return oStr.str(); + } + + /** Get the serialised version of the DBParams structure. */ + std::string toString() const { + std::ostringstream oStr; + oStr << _dbname << "." << _user << "@" << _host << ":" << _port; + return oStr.str(); + } + + + public: + /** Main Constructor. */ + DBParams (const std::string& iDBUser, const std::string& iDBPasswd, + const std::string& iDBHost, const std::string& iDBPort, + const std::string& iDBName) + : _user (iDBUser), _passwd (iDBPasswd), _host (iDBHost), _port (iDBPort), + _dbname (iDBName) { + } + + /** Default Constructor. */ + // DBParams (); + /** Default copy constructor. */ + // DBParams (const DBParams&); + + /** Destructor. */ + virtual ~DBParams() {} + + + private: + // /////// Attributes ///////// + /** Database user name. */ + std::string _user; + /** Database user password. */ + std::string _passwd; + /** Database host name. */ + std::string _host; + /** Database port number. */ + std::string _port; + /** Database name. */ + std::string _dbname; + }; + +} +#endif // __OPENTREP_DBPARAMS_HPP Modified: trunk/opentrep/opentrep/Location.hpp =================================================================== --- trunk/opentrep/opentrep/Location.hpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/Location.hpp 2009-07-20 15:17:31 UTC (rev 148) @@ -121,7 +121,7 @@ } /** Set the list of (American) English names for that location. */ - void getNameList (const LocationNameList_T& iNameList) { + void setNameList (const LocationNameList_T& iNameList) { _nameList = iNameList; } Modified: trunk/opentrep/opentrep/OPENTREP_Service.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-20 15:17:31 UTC (rev 148) @@ -9,6 +9,7 @@ #include <string> // OpenTREP #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/DBParams.hpp> #include <opentrep/Location.hpp> namespace OPENTREP { @@ -22,7 +23,7 @@ public: // /////////// Business Methods ///////////// /** Build the Xapian database (index) on the BOM held in memory. */ - void buildSearchIndex (); + void buildSearchIndex(); /** Match the given string, thanks to a full-text search on the underlying Xapian index (named "database"). @@ -39,8 +40,9 @@ // ////////// Constructors and destructors ////////// /** Constructor. @param std::ostream& Output log stream (for instance, std::cout). + @param const DBParams& The SQL database parameters. @param const std::string& Filepath of the Xapian index/database. */ - OPENTREP_Service (std::ostream& ioLogStream, + OPENTREP_Service (std::ostream& ioLogStream, const DBParams&, const std::string& iXapianDatabaseFilepath); /** Destructor. */ @@ -55,7 +57,7 @@ OPENTREP_Service (const OPENTREP_Service&); /** Initialise. */ - void init (std::ostream& ioLogStream, + void init (std::ostream& ioLogStream, const DBParams&, const std::string& iXapianDatabaseFilepath); /** Finalise. */ Modified: trunk/opentrep/opentrep/OPENTREP_Types.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-07-20 15:17:31 UTC (rev 148) @@ -33,6 +33,9 @@ class DocumentNotFoundException : public RootException { }; + class SQLDatabaseConnectionImpossibleException : public RootException { + }; + // /////////////// Log ///////////// /** Level of logs. */ Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-20 15:17:31 UTC (rev 148) @@ -4,14 +4,15 @@ #include <iostream> #include <sstream> #include <fstream> -#include <map> #include <vector> +#include <string> // Boost (Extended STL) #include <boost/date_time/posix_time/posix_time.hpp> #include <boost/date_time/gregorian/gregorian.hpp> #include <boost/program_options.hpp> -// OPENTREP +// OpenTREP #include <opentrep/OPENTREP_Service.hpp> +#include <opentrep/DBParams.hpp> #include <opentrep/config/opentrep-paths.hpp> @@ -26,7 +27,14 @@ /** Default name and location for the Xapian database. */ const std::string K_OPENTREP_DEFAULT_DATABSE_FILEPATH("/tmp/opentrep/traveldb"); +/** Default name and location for the Xapian database. */ +const std::string K_OPENTREP_DEFAULT_DB_USER ("opentrep"); +const std::string K_OPENTREP_DEFAULT_DB_PASSWD ("opentrep"); +const std::string K_OPENTREP_DEFAULT_DB_DBNAME ("opentrep"); +const std::string K_OPENTREP_DEFAULT_DB_HOST ("localhost"); +const std::string K_OPENTREP_DEFAULT_DB_PORT ("3306"); + // ///////// Parsing of Options & Configuration ///////// /** Early return status (so that it can be differentiated from an error). */ const int K_OPENTREP_EARLY_RETURN_STATUS = 99; @@ -34,7 +42,10 @@ /** Read and parse the command line options. */ int readConfiguration (int argc, char* argv[], std::string& ioDatabaseFilepath, - std::string& ioLogFilename) { + std::string& ioLogFilename, + std::string& ioDBUser, std::string& ioDBPasswd, + std::string& ioDBHost, std::string& ioDBPort, + std::string& ioDBDBName) { // Declare a group of options that will be allowed only on command line boost::program_options::options_description generic ("Generic options"); @@ -42,7 +53,7 @@ ("prefix", "print installation prefix") ("version,v", "print version string") ("help,h", "produce help message"); - + // Declare a group of options that will be allowed both on command // line and in config file boost::program_options::options_description config ("Configuration"); @@ -53,6 +64,21 @@ ("log,l", boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME), "Filepath for the logs") + ("user,u", + boost::program_options::value< std::string >(&ioDBUser)->default_value(K_OPENTREP_DEFAULT_DB_USER), + "SQL database hostname (e.g., opentrep)") + ("passwd,p", + boost::program_options::value< std::string >(&ioDBPasswd)->default_value(K_OPENTREP_DEFAULT_DB_PASSWD), + "SQL database hostname (e.g., opentrep)") + ("host,h", + boost::program_options::value< std::string >(&ioDBHost)->default_value(K_OPENTREP_DEFAULT_DB_HOST), + "SQL database hostname (e.g., localhost)") + ("port,P", + boost::program_options::value< std::string >(&ioDBPort)->default_value(K_OPENTREP_DEFAULT_DB_PORT), + "SQL database port (e.g., 3306)") + ("dbname,m", + boost::program_options::value< std::string >(&ioDBDBName)->default_value(K_OPENTREP_DEFAULT_DB_DBNAME), + "SQL database name (e.g., opentrep)") ; // Hidden options, will be allowed both on command line and @@ -111,6 +137,31 @@ std::cout << "Log filename is: " << ioLogFilename << std::endl; } + if (vm.count ("user")) { + ioDBUser = vm["user"].as< std::string >(); + std::cout << "SQL database user name is: " << ioDBUser << std::endl; + } + + if (vm.count ("passwd")) { + ioDBPasswd = vm["passwd"].as< std::string >(); + // std::cout << "SQL database user password is: " << ioDBPasswd << std::endl; + } + + if (vm.count ("host")) { + ioDBHost = vm["host"].as< std::string >(); + std::cout << "SQL database host name is: " << ioDBHost << std::endl; + } + + if (vm.count ("port")) { + ioDBPort = vm["port"].as< std::string >(); + std::cout << "SQL database port number is: " << ioDBPort << std::endl; + } + + if (vm.count ("dbname")) { + ioDBDBName = vm["dbname"].as< std::string >(); + std::cout << "SQL database name is: " << ioDBDBName << std::endl; + } + return 0; } @@ -124,14 +175,26 @@ // Xapian database name (directory of the index) OPENTREP::TravelDatabaseName_T lXapianDatabaseName; - + + // SQL database parameters + std::string lDBUser; + std::string lDBPasswd; + std::string lDBHost; + std::string lDBPort; + std::string lDBDBName; + // Call the command-line option parser const int lOptionParserStatus = - readConfiguration (argc, argv, lXapianDatabaseName, lLogFilename); + readConfiguration (argc, argv, lXapianDatabaseName, lLogFilename, + lDBUser, lDBPasswd, lDBHost, lDBPort, lDBDBName); if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) { return 0; } + + // Set the database parameters + OPENTREP::DBParams lDBParams (lDBUser, lDBPasswd, lDBHost, lDBPort, + lDBDBName); // Set the log parameters std::ofstream logOutputFile; @@ -145,7 +208,7 @@ << std::endl; // Initialise the context - OPENTREP::OPENTREP_Service opentrepService (logOutputFile, + OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lDBParams, lXapianDatabaseName); // Launch the indexation Added: trunk/opentrep/opentrep/batches/opentrep_indexer.cfg =================================================================== --- trunk/opentrep/opentrep/batches/opentrep_indexer.cfg (rev 0) +++ trunk/opentrep/opentrep/batches/opentrep_indexer.cfg 2009-07-20 15:17:31 UTC (rev 148) @@ -0,0 +1,7 @@ +database=../../test/traveldb +log=opentrep_indexer.log +user=opentrep +passwd=opentrep +host=localhost +port=3306 +dbname=opentrep Added: trunk/opentrep/opentrep/batches/opentrep_searcher.cfg =================================================================== --- trunk/opentrep/opentrep/batches/opentrep_searcher.cfg (rev 0) +++ trunk/opentrep/opentrep/batches/opentrep_searcher.cfg 2009-07-20 15:17:31 UTC (rev 148) @@ -0,0 +1,9 @@ +database=../../test/traveldb +log=opentrep_indexer.log +user=opentrep +passwd=opentrep +host=localhost +port=3306 +dbname=opentrep +error=3 +query="sna francicso rio de janero lso angles reykyavki" \ No newline at end of file Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-20 15:17:31 UTC (rev 148) @@ -13,6 +13,7 @@ #include <boost/program_options.hpp> // OpenTREP #include <opentrep/OPENTREP_Service.hpp> +#include <opentrep/DBParams.hpp> #include <opentrep/config/opentrep-paths.hpp> @@ -30,6 +31,13 @@ /** Default travel query string, to be seached against the Xapian database. */ const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso angles reykyavki"); +/** Default name and location for the Xapian database. */ +const std::string K_OPENTREP_DEFAULT_DB_USER ("opentrep"); +const std::string K_OPENTREP_DEFAULT_DB_PASSWD ("opentrep"); +const std::string K_OPENTREP_DEFAULT_DB_DBNAME ("opentrep"); +const std::string K_OPENTREP_DEFAULT_DB_HOST ("localhost"); +const std::string K_OPENTREP_DEFAULT_DB_PORT ("3306"); + /** Default error distance for spelling corrections. */ const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE = 3; @@ -90,7 +98,10 @@ unsigned short& ioSpellingErrorDistance, std::string& ioQueryString, std::string& ioDatabaseFilepath, - std::string& ioLogFilename) { + std::string& ioLogFilename, + std::string& ioDBUser, std::string& ioDBPasswd, + std::string& ioDBHost, std::string& ioDBPort, + std::string& ioDBDBName) { // Initialise the travel query string, if that one is empty if (ioQueryString.empty() == true) { @@ -124,6 +135,21 @@ ("log,l", boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME), "Filepath for the logs") + ("user,u", + boost::program_options::value< std::string >(&ioDBUser)->default_value(K_OPENTREP_DEFAULT_DB_USER), + "SQL database hostname (e.g., opentrep)") + ("passwd,p", + boost::program_options::value< std::string >(&ioDBPasswd)->default_value(K_OPENTREP_DEFAULT_DB_PASSWD), + "SQL database hostname (e.g., opentrep)") + ("host,h", + boost::program_options::value< std::string >(&ioDBHost)->default_value(K_OPENTREP_DEFAULT_DB_HOST), + "SQL database hostname (e.g., localhost)") + ("port,P", + boost::program_options::value< std::string >(&ioDBPort)->default_value(K_OPENTREP_DEFAULT_DB_PORT), + "SQL database port (e.g., 3306)") + ("dbname,m", + boost::program_options::value< std::string >(&ioDBDBName)->default_value(K_OPENTREP_DEFAULT_DB_DBNAME), + "SQL database name (e.g., opentrep)") ; // Hidden options, will be allowed both on command line and @@ -182,6 +208,31 @@ std::cout << "Log filename is: " << ioLogFilename << std::endl; } + if (vm.count ("user")) { + ioDBUser = vm["user"].as< std::string >(); + std::cout << "SQL database user name is: " << ioDBUser << std::endl; + } + + if (vm.count ("passwd")) { + ioDBPasswd = vm["passwd"].as< std::string >(); + // std::cout << "SQL database user password is: " << ioDBPasswd << std::endl; + } + + if (vm.count ("host")) { + ioDBHost = vm["host"].as< std::string >(); + std::cout << "SQL database host name is: " << ioDBHost << std::endl; + } + + if (vm.count ("port")) { + ioDBPort = vm["port"].as< std::string >(); + std::cout << "SQL database port number is: " << ioDBPort << std::endl; + } + + if (vm.count ("dbname")) { + ioDBDBName = vm["dbname"].as< std::string >(); + std::cout << "SQL database name is: " << ioDBDBName << std::endl; + } + std::cout << "The spelling error distance is: " << ioSpellingErrorDistance << std::endl; @@ -205,18 +256,30 @@ // Xapian database name (directory of the index) OPENTREP::TravelDatabaseName_T lXapianDatabaseName; + // SQL database parameters + std::string lDBUser; + std::string lDBPasswd; + std::string lDBHost; + std::string lDBPort; + std::string lDBDBName; + // Xapian spelling error distance unsigned short lSpellingErrorDistance; // Call the command-line option parser const int lOptionParserStatus = readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery, - lXapianDatabaseName, lLogFilename); + lXapianDatabaseName, lLogFilename, + lDBUser, lDBPasswd, lDBHost, lDBPort, lDBDBName); if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) { return 0; } + // Set the database parameters + OPENTREP::DBParams lDBParams (lDBUser, lDBPasswd, lDBHost, lDBPort, + lDBDBName); + // Set the log parameters std::ofstream logOutputFile; // open and clean the log outputfile @@ -224,7 +287,7 @@ logOutputFile.clear(); // Initialise the context - OPENTREP::OPENTREP_Service opentrepService (logOutputFile, + OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lDBParams, lXapianDatabaseName); // Query the Xapian database (index) Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-20 15:17:31 UTC (rev 148) @@ -322,7 +322,7 @@ NbOfMatches_T idx = 1; for ( ; itDoc != iMatchingSet.end(); ++itDoc, ++idx) { const Xapian::percent& lPercentage = itDoc.get_percent(); - const Xapian::Document& lDocument = itDoc.get_document(); + // const Xapian::Document& lDocument = itDoc.get_document(); // DEBUG /* Modified: trunk/opentrep/opentrep/command/SociSessionManager.cpp =================================================================== --- trunk/opentrep/opentrep/command/SociSessionManager.cpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/command/SociSessionManager.cpp 2009-07-20 15:17:31 UTC (rev 148) @@ -2,7 +2,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // C -#include <assert.h> +#include <cassert> // STL #include <string> #include <sstream> @@ -10,23 +10,21 @@ #include <soci/core/soci.h> #include <soci/backends/mysql/soci-mysql.h> // OpenTrep +#include <opentrep/DBParams.hpp> #include <opentrep/command/SociSessionManager.hpp> #include <opentrep/service/Logger.hpp> namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - void SociSessionManager::init (soci::session*& ioSociSession_ptr) { + void SociSessionManager::init (soci::session*& ioSociSession_ptr, + const DBParams& iDBParams) { + // Database parameters - const std::string lUserName ("opentrep"); - const std::string lPassword ("opentrep"); - const std::string lDBName ("opentrep"); - const std::string lDBPort ("3306"); - const std::string lDBHost ("localhost"); std::ostringstream oStr; - oStr << "db=" << lDBName << " user=" << lUserName - << " password=" << lPassword << " port=" << lDBPort - << " host=" << lDBHost; + oStr << "db=" << iDBParams.getDBName() << " user=" << iDBParams.getUser() + << " password=" << iDBParams.getPassword() + << " port=" << iDBParams.getPort() << " host=" << iDBParams.getHost(); const std::string lSociSessionConnectionString (oStr.str()); // Instanciate a SOCI Session: nothing is performed at that stage @@ -40,6 +38,12 @@ } catch (std::exception const& lException) { OPENTREP_LOG_ERROR ("Error while opening a connection to database: " << lException.what()); + OPENTREP_LOG_ERROR ("Database parameters used:" + << " db=" << iDBParams.getDBName() + << " user=" << iDBParams.getUser() + << " port=" << iDBParams.getPort() + << " host=" << iDBParams.getHost()); + throw SQLDatabaseConnectionImpossibleException(); } } Modified: trunk/opentrep/opentrep/command/SociSessionManager.hpp =================================================================== --- trunk/opentrep/opentrep/command/SociSessionManager.hpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/command/SociSessionManager.hpp 2009-07-20 15:17:31 UTC (rev 148) @@ -11,13 +11,16 @@ } namespace OPENTREP { + + // Forward declarations + struct DBParams; /** Class handling the SOCI session. */ class SociSessionManager { friend class OPENTREP_Service; private: /** Initialise (MySQL) database connection. */ - static void init (soci::session*&); + static void init (soci::session*&, const DBParams&); /** Finalise (MySQL) database connection. */ static void finalise (soci::session*&); Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-20 15:17:31 UTC (rev 148) @@ -20,10 +20,10 @@ // ////////////////////////////////////////////////////////////////////// OPENTREP_Service:: - OPENTREP_Service (std::ostream& ioLogStream, + OPENTREP_Service (std::ostream& ioLogStream, const DBParams& iDBParams, const std::string& iXapianDatabaseFilepath) : _opentrepServiceContext (NULL) { - init (ioLogStream, iXapianDatabaseFilepath); + init (ioLogStream, iDBParams, iXapianDatabaseFilepath); } // ////////////////////////////////////////////////////////////////////// @@ -51,6 +51,7 @@ // ////////////////////////////////////////////////////////////////////// void OPENTREP_Service::init (std::ostream& ioLogStream, + const DBParams& iDBParams, const std::string& iTravelDatabaseName) { // Set the log file logInit (LOG::DEBUG, ioLogStream); @@ -62,7 +63,7 @@ // Initialise the SOCI Session soci::session* lSociSession_ptr = lOPENTREP_ServiceContext.getSociSession(); - SociSessionManager::init (lSociSession_ptr); + SociSessionManager::init (lSociSession_ptr, iDBParams); assert (lSociSession_ptr != NULL); lOPENTREP_ServiceContext.setSociSession (*lSociSession_ptr); Modified: trunk/opentrep/opentrep/sources.mk =================================================================== --- trunk/opentrep/opentrep/sources.mk 2009-07-20 14:03:18 UTC (rev 147) +++ trunk/opentrep/opentrep/sources.mk 2009-07-20 15:17:31 UTC (rev 148) @@ -1,6 +1,7 @@ service_h_sources = \ $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ $(top_srcdir)/opentrep/OPENTREP_Abstract.hpp \ + $(top_srcdir)/opentrep/DBParams.hpp \ $(top_srcdir)/opentrep/Location.hpp \ $(top_srcdir)/opentrep/OPENTREP_Service.hpp service_cc_sources = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-21 05:54:19
|
Revision: 154 http://opentrep.svn.sourceforge.net/opentrep/?rev=154&view=rev Author: denis_arnaud Date: 2009-07-21 05:54:17 +0000 (Tue, 21 Jul 2009) Log Message: ----------- [Dev] Added a structure for the scale of distance errors. Modified Paths: -------------- trunk/opentrep/opentrep/OPENTREP_Service.hpp trunk/opentrep/opentrep/sources.mk Added Paths: ----------- trunk/opentrep/opentrep/DistanceErrorRule.hpp Added: trunk/opentrep/opentrep/DistanceErrorRule.hpp =================================================================== --- trunk/opentrep/opentrep/DistanceErrorRule.hpp (rev 0) +++ trunk/opentrep/opentrep/DistanceErrorRule.hpp 2009-07-21 05:54:17 UTC (rev 154) @@ -0,0 +1,98 @@ +#ifndef __OPENTREP_DISTANCEERRORRULE_HPP +#define __OPENTREP_DISTANCEERRORRULE_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <istream> +#include <ostream> +#include <string> +#include <map> +// OpenTrep +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/OPENTREP_Abstract.hpp> + +namespace OPENTREP { + + /** Number of letters in a word or phrase. */ + typedef unsigned int NbOfLetters_T; + + /** Number of errors. */ + typedef unsigned short NbOfErrors_T; + + /** List of names for a given (geographical) distanceerrorrule. */ + typedef std::map<NbOfLetters_T, NbOfErrors_T> DistanceErrorScale_T; + + + /** Structure modelling a (geographical) distanceerrorrule. */ + struct DistanceErrorRule : public OPENTREP_Abstract { + public: + // ///////// Getters //////// + /** Get the DistanceErrorRule code. */ + const DistanceErrorScale_T& getDistanceErrorScale() const { + return _scale; + } + + + // ///////// Setters ////////// + + + public: + // ///////// Display methods //////// + /** Dump a structure into an output stream. + @param ostream& the output stream. */ + void toStream (std::ostream& ioOut) const { + ioOut << toString(); + } + + /** Read a structure from an input stream. + @param istream& the input stream. */ + void fromStream (std::istream&) { + } + + /** Get a short display of the DistanceErrorRule structure. */ + std::string toShortString() const { + std::ostringstream oStr; + NbOfLetters_T idx = 0; + for (DistanceErrorScale_T::const_iterator itError = _scale.begin(); + itError != _scale.end(); ++itError, ++idx) { + if (idx != 0) { + oStr << ", "; + } + oStr << itError->second << ": " << itError->first; + } + return oStr.str(); + } + + /** Get the serialised version of the DistanceErrorRule structure. */ + std::string toString() const { + std::ostringstream oStr; + oStr << toShortString(); + return oStr.str(); + } + + + public: + /** Main Constructor. */ + DistanceErrorRule (const DistanceErrorScale_T& iScale) + : _scale (iScale) { + } + + /** Default Constructor. */ + // DistanceErrorRule (); + /** Default copy constructor. */ + // DistanceErrorRule (const DistanceErrorRule&); + + /** Destructor. */ + virtual ~DistanceErrorRule() {} + + + private: + // /////// Attributes ///////// + /** Scale of distance errors. */ + DistanceErrorScale_T _scale; + }; + +} +#endif // __OPENTREP_DISTANCEERRORRULE_HPP Modified: trunk/opentrep/opentrep/OPENTREP_Service.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-20 22:41:55 UTC (rev 153) +++ trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-07-21 05:54:17 UTC (rev 154) @@ -11,6 +11,7 @@ #include <opentrep/OPENTREP_Types.hpp> #include <opentrep/DBParams.hpp> #include <opentrep/Location.hpp> +#include <opentrep/DistanceErrorRule.hpp> namespace OPENTREP { Modified: trunk/opentrep/opentrep/sources.mk =================================================================== --- trunk/opentrep/opentrep/sources.mk 2009-07-20 22:41:55 UTC (rev 153) +++ trunk/opentrep/opentrep/sources.mk 2009-07-21 05:54:17 UTC (rev 154) @@ -3,5 +3,6 @@ $(top_srcdir)/opentrep/OPENTREP_Abstract.hpp \ $(top_srcdir)/opentrep/DBParams.hpp \ $(top_srcdir)/opentrep/Location.hpp \ + $(top_srcdir)/opentrep/DistanceErrorRule.hpp \ $(top_srcdir)/opentrep/OPENTREP_Service.hpp service_cc_sources = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-21 21:56:59
|
Revision: 157 http://opentrep.svn.sourceforge.net/opentrep/?rev=157&view=rev Author: denis_arnaud Date: 2009-07-21 21:56:56 +0000 (Tue, 21 Jul 2009) Log Message: ----------- [Dev] Worked on the edit distance calculation. Modified Paths: -------------- trunk/opentrep/opentrep/batches/opentrep_searcher.cfg trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/batches/opentrep_searcher.cfg =================================================================== --- trunk/opentrep/opentrep/batches/opentrep_searcher.cfg 2009-07-21 16:57:52 UTC (rev 156) +++ trunk/opentrep/opentrep/batches/opentrep_searcher.cfg 2009-07-21 21:56:56 UTC (rev 157) @@ -1,9 +1,9 @@ database=../../test/traveldb -log=opentrep_indexer.log +log=opentrep_searcher.log user=opentrep passwd=opentrep host=localhost port=3306 dbname=trep_opentrep error=3 -query="sna francicso rio de janero lso angles reykyavki" \ No newline at end of file +query="sna francicso rio de janero lso angles reykyavki" Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-21 16:57:52 UTC (rev 156) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-21 21:56:56 UTC (rev 157) @@ -247,6 +247,7 @@ int main (int argc, char* argv[]) { try { + /* const OPENTREP::NbOfLetters_T lScaleArray[5] = {3, 6, 9, 14, 19}; const OPENTREP::DistanceErrorScaleArray_T lScaleBoostArray = @@ -264,6 +265,7 @@ } return 0; + */ // Travel query OPENTREP::TravelQuery_T lTravelQuery; @@ -316,7 +318,7 @@ lNonMatchedWordList); std::cout << nbOfMatches << " (geographical) location(s) have been found " - << "matching your query (`" << lTravelQuery << "\xB4). " + << "matching your query (`" << lTravelQuery << "'). " << lNonMatchedWordList.size() << " words were left unmatched." << std::endl; Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-21 16:57:52 UTC (rev 156) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-21 21:56:56 UTC (rev 157) @@ -85,8 +85,8 @@ // DEBUG OPENTREP_LOG_DEBUG ("Current initial query string: `" - << ioPartialQueryString << "´ --- Kept query: `" - << oMatchedString << "´ for " + << ioPartialQueryString << "' --- Kept query: `" + << oMatchedString << "' for " << lMatchingSet.size() << " matches."); if (lMatchingSet.empty() == false) { @@ -160,8 +160,8 @@ lMatchingDocument.notifyIfExtraMatch(); OPENTREP_LOG_DEBUG ("==> " << lNbOfMatches << " matches for the query string: `" - << lMatchedString << "´ (from `" - << lQueryString << "´)"); + << lMatchedString << "' (from `" + << lQueryString << "')"); /** Remove, from the lRemainingQueryString string, the part Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-21 16:57:52 UTC (rev 156) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-21 21:56:56 UTC (rev 157) @@ -7,6 +7,7 @@ #include <istream> #include <ostream> #include <sstream> +#include <limits> #include <string> #include <list> #include <map> @@ -43,9 +44,13 @@ corrections. If the word is correctly spelled, it is copied as is. Otherwise, a corrected version is stored. */ // ////////////////////////////////////////////////////////////////////// - static void createCorrectedWordList (const WordList_T& iOriginalWordList, + static bool createCorrectedWordList (const WordList_T& iOriginalWordList, WordList_T& ioCorrectedWordList, + EditDistance_T& ioMaxEditDistance, const Xapian::Database& iDatabase) { + bool hasReachedMaximalAllowableEditDistance = false; + EditDistance_T lMaxEditDistance= std::numeric_limits<EditDistance_T>::min(); + // Empty the target list ioCorrectedWordList.clear(); @@ -55,9 +60,22 @@ for (WordList_T::const_iterator itWord = iOriginalWordList.begin(); itWord != iOriginalWordList.end(); ++itWord) { const std::string& lOriginalWord = *itWord; - const EditDistance_T lEditDistance= calculateEditDistance(lOriginalWord); + + // Calculate the distance, depending on the length of the word + EditDistance_T lCalculatedEditDistance = + calculateEditDistance (lOriginalWord); + + // Store the greatest edit distance/error + lMaxEditDistance = std::max (lMaxEditDistance, lCalculatedEditDistance); + + // Limit the edit distance to the given maximal one + lCalculatedEditDistance = std::min (lCalculatedEditDistance, + ioMaxEditDistance); + + // Get a spelling suggestion for that word const std::string& lSuggestedWord = - iDatabase.get_spelling_suggestion (lOriginalWord, lEditDistance); + iDatabase.get_spelling_suggestion (lOriginalWord, + lCalculatedEditDistance); if (lSuggestedWord.empty() == true) { ioCorrectedWordList.push_back (lOriginalWord); @@ -68,14 +86,32 @@ // DEBUG /* - OPENTREP_LOG_DEBUG ("Original word: `" << lOriginalWord - << "' ==> corrected word: `" << lSuggestedWord << "'"); + OPENTREP_LOG_DEBUG ("Original word: `" << lOriginalWord + << "' ==> corrected word: `" + << lSuggestedWord + << "'. Maximal allowable distance: " + << ioMaxEditDistance + << ", maximum of edit distance: " + << lMaxEditDistance + << ", calculated edit distance: " + << lCalculatedEditDistance); */ } + /** + When the maximal allowable edit distance reaches the maximum + of the calculated edit distance, it becomes useless to go on + increasing the maximal allowable edit distance. + */ + if (lMaxEditDistance <= ioMaxEditDistance) { + hasReachedMaximalAllowableEditDistance = true; + } + } catch (const Xapian::Error& error) { OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } + + return hasReachedMaximalAllowableEditDistance; } // /////////////////////////////////////////////////////////////////// @@ -109,43 +145,17 @@ WordHolder::tokeniseStringIntoWordList (iSearchString, lOriginalWordList); /** - We rebuild a clean query string from the word list. Indeed, + A clean query string is rebuilt from the word list. Indeed, the original string may have contained a few separators (e.g., '/', ';', etc.), which have been removed by the tokeniseStringIntoWordList() method. All those separators are thus replaced by spaces. - For instance, the 'san francisco, ca, us' initial string would be - replaced by 'san francisco ca us'. + <br>For instance, the 'san francisco, ca, us' initial string + would be replaced by 'san francisco ca us'. */ const std::string lOriginalQueryString = WordHolder::createStringFromWordList (lOriginalWordList); - - WordList_T lCorrectedWordList; - createCorrectedWordList (lOriginalWordList, lCorrectedWordList, - ioDatabase); - - const std::string lCorrectedQueryString = - WordHolder::createStringFromWordList (lCorrectedWordList); - /** - Try to find, if relevant, an orthographic suggestion for the whole - phrase/string. With the above example, 'sna francisco' yields the - suggestion 'san francisco'. - */ - const EditDistance_T lEditDistance = - calculateEditDistance (lOriginalQueryString); - const std::string lFullWordCorrectedString = - ioDatabase.get_spelling_suggestion(lOriginalQueryString, lEditDistance); - - // DEBUG - /* - OPENTREP_LOG_DEBUG ("Query string `" << lOriginalQueryString - << "' ==> corrected query string: `" - << lCorrectedQueryString - << "' and correction for the full query string: `" - << lFullWordCorrectedString << "'"); - */ - // Build the query object Xapian::QueryParser lQueryParser; lQueryParser.set_database (ioDatabase); @@ -163,58 +173,23 @@ << "'"); */ + // Start an enquire session + Xapian::Enquire enquire (ioDatabase); + /** - The Xapian::QueryParser::parse_query() method aggregates all the words - with operators inbetween them (here, the "PHRASE" operator). - With the above example ('sna francicso'), it yields - "sna PHRASE 2 francicso". + The Xapian::QueryParser::parse_query() method aggregates all + the words with operators inbetween them (here, the "PHRASE" + operator). With the above example ('sna francicso'), it + yields "sna PHRASE 2 francicso". */ - Xapian::Query lQuery = + Xapian::Query lOriginalQuery = lQueryParser.parse_query (lOriginalQueryString, Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_LOVEHATE - | Xapian::QueryParser::FLAG_SPELLING_CORRECTION); - /** - Strangely enough (is it?), the corrected query given by the Xapian - QueryParser corresponds to the full original string, where words - have been corrected one by one, but considered as a single block. - With the above example, 'sna francicso' yields (wrongly) - 'sna francisco', instead of "sna PHRASE 2 francisco", as generated - by the following code. - */ - // Xapian::Query lCorrectedQuery = - // lQueryParser.get_corrected_query_string(); - Xapian::Query lCorrectedQuery = - lQueryParser.parse_query (lCorrectedQueryString, - Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_LOVEHATE); - /** - As, with the above example, the full corrected string is - 'san francisco', it yields the query "san PHRASE 2 francisco", - which is eventually right. - */ - Xapian::Query lFullQueryCorrected = - lQueryParser.parse_query (lFullWordCorrectedString, - Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_LOVEHATE); - - // DEBUG - /* - OPENTREP_LOG_DEBUG ("Query `" << lQuery.get_description() - << "', corrected query `" << lCorrectedQuery.get_description() - << "' and corrected for full query `" - << lFullQueryCorrected.get_description() << "' "); - */ - - // Start an enquire session - Xapian::Enquire enquire (ioDatabase); - // Give the query object to the enquire session - enquire.set_query (lQuery); + enquire.set_query (lOriginalQuery); // Get the top 10 results of the query ioMatchingSet = enquire.get_mset (0, 10); @@ -223,13 +198,60 @@ int nbMatches = ioMatchingSet.size(); // DEBUG - // OPENTREP_LOG_DEBUG (nbMatches << " results found"); + OPENTREP_LOG_DEBUG ("Original query `" << lOriginalQueryString + << ", i.e., `" << lOriginalQuery.get_description() + << "' => " << nbMatches << " results found"); /** When no match is found, we search on the corrected phrase/string (where the words have been corrected one by one). */ - if (nbMatches == 0) { + if (nbMatches != 0) { + oMatchedString = lOriginalQueryString; + return oMatchedString; + } + assert (nbMatches == 0 && oMatchedString.empty() == true); + + bool shouldStop = false; + EditDistance_T lMaxEditDistance = 0; + /** + From the clean list of original words, another list of + spell-corrected words is built. + <br>The original query string must first be checked, without + allowing any spelling errors. If no match is found, the list + of spell-corrected words must be rebuilt allowing an + distance/error of 1. If again no match is found, the list is + rebuilt allowing a distance/error of 2. And so on until the + maximal allowable distance/error is reached. + */ + while (shouldStop == false) { + WordList_T lCorrectedWordList; + const bool hasReachedMaximalAllowableEditDistance = + createCorrectedWordList (lOriginalWordList, lCorrectedWordList, + lMaxEditDistance, ioDatabase); + + const std::string lCorrectedQueryString = + WordHolder::createStringFromWordList (lCorrectedWordList); + + /** + Strangely enough (is it?), the corrected query given by the Xapian + QueryParser corresponds to the full original string, where words + have been corrected one by one, but considered as a single block. + <br>With the above example, 'sna francicso' yields (wrongly) + 'sna francisco', instead of "sna PHRASE 2 francisco", as generated + by the following (uncommented) code. + <br>Just to be clear, the two lines of commented code below + are wrong. Those two commented lines are kept, just to be + sure that nobody gets the idea to use them. + */ + // Xapian::Query lCorrectedQuery = + // lQueryParser.get_corrected_query_string(); + Xapian::Query lCorrectedQuery = + lQueryParser.parse_query (lCorrectedQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + enquire.set_query (lCorrectedQuery); ioMatchingSet = enquire.get_mset (0, 10); @@ -237,32 +259,71 @@ nbMatches = ioMatchingSet.size(); // DEBUG - //OPENTREP_LOG_DEBUG(nbMatches << " results found on corrected string"); + OPENTREP_LOG_DEBUG ("Corrected query `" << lCorrectedQueryString + << "' with maximal edit distance of " + << lMaxEditDistance + << ", i.e., `" << lCorrectedQuery.get_description() + << "' => " << nbMatches + << " results found on corrected string"); - } else { - oMatchedString = lOriginalQueryString; + if (nbMatches != 0) { + oMatchedString = lCorrectedQueryString; + return oMatchedString; + } + + // Allow for one more spelling error + ++lMaxEditDistance; + + /** + Stop when it is no longer necessary to increase the maximal + allowable edit distance, as it is already greater than the + maximum of the calculated edit distance. + */ + if (hasReachedMaximalAllowableEditDistance == true) { + shouldStop = true; + } } + assert (nbMatches == 0 && oMatchedString.empty() == true); /** - If there is still no match, we search on the string corrected - as a whole. + Try to find, if relevant, an orthographic suggestion for the whole + phrase/string. With the above example, 'sna francisco' yields the + suggestion 'san francisco'. */ - if (nbMatches == 0) { - enquire.set_query (lFullQueryCorrected); - ioMatchingSet = enquire.get_mset (0, 10); - - // Display the results - nbMatches = ioMatchingSet.size(); + const EditDistance_T lCalculatedEditDistance = + calculateEditDistance (lOriginalQueryString); - // DEBUG - // OPENTREP_LOG_DEBUG (nbMatches - // << " results found on corrected full string"); - - } else { - oMatchedString = lCorrectedQueryString; - } + const std::string lFullWordCorrectedString = + ioDatabase.get_spelling_suggestion (lOriginalQueryString, + lCalculatedEditDistance); - if (nbMatches != 0 && oMatchedString.empty() == true) { + /** + Since there is still no match, we search on the string + corrected as a whole. + <br>As, with the above example, the full corrected string is + 'san francisco', it yields the query "san PHRASE 2 + francisco", which is eventually right. + */ + Xapian::Query lFullQueryCorrected = + lQueryParser.parse_query (lFullWordCorrectedString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + enquire.set_query (lFullQueryCorrected); + ioMatchingSet = enquire.get_mset (0, 10); + + // Display the results + nbMatches = ioMatchingSet.size(); + + // DEBUG + OPENTREP_LOG_DEBUG ("Query corrected as a full sentence `" + << lFullWordCorrectedString << "', i.e., `" + << lFullQueryCorrected.get_description() << "' => " + << nbMatches + << " results found on corrected full string"); + + if (nbMatches != 0) { oMatchedString = lFullWordCorrectedString; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-22 06:06:29
|
Revision: 158 http://opentrep.svn.sourceforge.net/opentrep/?rev=158&view=rev Author: denis_arnaud Date: 2009-07-22 06:06:26 +0000 (Wed, 22 Jul 2009) Log Message: ----------- [Dev] Re-worked the algorithm for the search with maximal allowable edit distance. Modified Paths: -------------- trunk/opentrep/opentrep/DistanceErrorRule.hpp trunk/opentrep/opentrep/OPENTREP_Types.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/ResultHolder.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp Modified: trunk/opentrep/opentrep/DistanceErrorRule.hpp =================================================================== --- trunk/opentrep/opentrep/DistanceErrorRule.hpp 2009-07-21 21:56:56 UTC (rev 157) +++ trunk/opentrep/opentrep/DistanceErrorRule.hpp 2009-07-22 06:06:26 UTC (rev 158) @@ -17,19 +17,6 @@ namespace OPENTREP { - // ////////////////////////////////////////////////////////////////// - /** Number of letters in a word or phrase. */ - typedef unsigned int NbOfLetters_T; - - /** Number of errors. */ - typedef unsigned short NbOfErrors_T; - - /** Number of (distance) errors allowed for a given number of letters. */ - typedef std::map<NbOfLetters_T, NbOfErrors_T> DistanceErrorScale_T; - - /** Number of (distance) errors allowed for a given number of letters. */ - typedef boost::array<NbOfLetters_T, 5> DistanceErrorScaleArray_T; - // //////////////////////////////////////////////////////////////////// /** Default distance error scale. <br>Allowed error for a given number of letters: Modified: trunk/opentrep/opentrep/OPENTREP_Types.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-07-21 21:56:56 UTC (rev 157) +++ trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-07-22 06:06:26 UTC (rev 158) @@ -8,6 +8,9 @@ #include <exception> #include <string> #include <list> +#include <map> +// Boost Arry +#include <boost/array.hpp> namespace OPENTREP { @@ -87,15 +90,27 @@ /** Travel search query. */ typedef std::string TravelQuery_T; - /** Number of matching documents. */ - typedef unsigned short NbOfMatches_T; - /** Word, which is the atomic element of a query string. */ typedef std::string Word_T; /** List of words. */ typedef std::list<Word_T> WordList_T; + /** Number of letters in a word or phrase. */ + typedef unsigned int NbOfLetters_T; + + /** Number of matching documents. */ + typedef unsigned short NbOfMatches_T; + + /** Number of errors. */ + typedef unsigned short NbOfErrors_T; + + /** Number of (distance) errors allowed for a given number of letters. */ + typedef std::map<NbOfLetters_T, NbOfErrors_T> DistanceErrorScale_T; + + /** Number of (distance) errors allowed for a given number of letters. */ + typedef boost::array<NbOfLetters_T, 5> DistanceErrorScaleArray_T; + } #endif // __OPENTREP_OPENTREP_TYPES_HPP Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-21 21:56:56 UTC (rev 157) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-22 06:06:26 UTC (rev 158) @@ -68,8 +68,12 @@ } // ////////////////////////////////////////////////////////////////////// - std::string ResultHolder::searchString (TravelQuery_T& ioPartialQueryString, - Document& ioMatchingDocument) { + std::string ResultHolder:: + searchString (Xapian::MSet& ioMatchingSet, + TravelQuery_T& ioPartialQueryString, + NbOfErrors_T& ioMaxEditDistance, + bool ioHasReachedMaximalAllowableEditDistance, + Document& ioMatchingDocument) { std::string oMatchedString; // Catch any Xapian::Error exceptions thrown @@ -78,21 +82,22 @@ bool shouldStop = false; while (shouldStop == false) { // Retrieve the list of documents matching the query string - Xapian::MSet lMatchingSet; - oMatchedString = StringMatcher::searchString (lMatchingSet, - ioPartialQueryString, - _database); + oMatchedString = + StringMatcher::searchString (ioMatchingSet, ioPartialQueryString, + ioMaxEditDistance, + ioHasReachedMaximalAllowableEditDistance, + _database); // DEBUG OPENTREP_LOG_DEBUG ("Current initial query string: `" << ioPartialQueryString << "' --- Kept query: `" << oMatchedString << "' for " - << lMatchingSet.size() << " matches."); + << ioMatchingSet.size() << " matches."); - if (lMatchingSet.empty() == false) { + if (ioMatchingSet.empty() == false) { // Create the corresponding list of documents StringMatcher:: - extractBestMatchingDocumentFromMSet (lMatchingSet, + extractBestMatchingDocumentFromMSet (ioMatchingSet, ioMatchingDocument); // Since a result has been found, the search can be stopped @@ -105,7 +110,17 @@ // word must be removed from the query string. StringMatcher::removeFurthestRightWord (ioPartialQueryString); - // Stop when the resulting string gets empty. + /** + Stop when the resulting string gets empty. + + <br>Note that whether maximal allowable edit distance/error + has been reached is not checked at that stage. That + algorithm is performed independently for each level of + maximal allowable edit distance/error. Only the caller + (below) retriggers this process by changing the level of + maximal allowable edit distance/error, until that latter be + reached. + */ if (ioPartialQueryString.empty() == true) { shouldStop = true; } @@ -119,6 +134,78 @@ } // ////////////////////////////////////////////////////////////////////// + std::string ResultHolder::searchString (TravelQuery_T& ioPartialQueryString, + Document& ioMatchingDocument) { + std::string oMatchedString; + + // Catch any Xapian::Error exceptions thrown + try { + + bool shouldStop = false; + NbOfErrors_T lMaxEditDistance = 0; + + /** + The query string must first be checked, without allowing any + spelling errors, but by removing the furthest right word at + every step. + <br>If no match is found, the maximal allowable edit + distance/error becomes 1, and the process (trying to match + the whole sentence, then by removing the furthest right word, + etc.) is re-performed. + <br>If no match is found, the maximal allowable edit + distance/error becomes 2. + <br>And so on until the maximum of the edit distance/error + becomes greater than the maximal allowable distance/error. + reached. + */ + while (shouldStop == false) { + /** + A copy of the query is made, as it the copy will be altered by + the above process, whereas a clean copy needs to be reprocessed + for each level of maximal edit distance/error. + */ + TravelQuery_T lPartialQueryString (ioPartialQueryString); + + Xapian::MSet lMatchingSet; + bool hasReachedMaximalAllowableEditDistance = false; + oMatchedString = searchString (lMatchingSet, lPartialQueryString, + lMaxEditDistance, + hasReachedMaximalAllowableEditDistance, + ioMatchingDocument); + + if (oMatchedString.empty() == false) { + // Create the corresponding list of documents + StringMatcher:: + extractBestMatchingDocumentFromMSet (lMatchingSet, + ioMatchingDocument); + + // Since a result has been found, the search can be stopped + // for that part of the query. + shouldStop = true; + break; + } + + // Allow for one more spelling error + ++lMaxEditDistance; + + /** + Stop when it is no longer necessary to increase the maximal + allowable edit distance, as it is already greater than the + maximum of the calculated edit distance. + */ + if (hasReachedMaximalAllowableEditDistance == true) { + shouldStop = true; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + } + + return oMatchedString; + } + + // ////////////////////////////////////////////////////////////////////// void ResultHolder::searchString (DocumentList_T& ioDocumentList, WordList_T& ioWordList) { Modified: trunk/opentrep/opentrep/bom/ResultHolder.hpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-21 21:56:56 UTC (rev 157) +++ trunk/opentrep/opentrep/bom/ResultHolder.hpp 2009-07-22 06:06:26 UTC (rev 158) @@ -46,9 +46,24 @@ private: /** Retrieve the document best matching the query string. + @param Xapian::MSet& The Xapian matching set. It can be empty. @param TravelQuery_T& The partial query string. + @param NbOfErrors_T& The maximal allowable edit distance/error. + @param bool Whether or not the maximal allowable edit distance/error + has become greater than the maximum of the edit distance/errors + calculated on the phrase. @param MatchingDocument_T& The best matching Xapian document (if found). @return bool Whether such a best matching document has been found. */ + std::string searchString (Xapian::MSet& ioMatchingSet, + TravelQuery_T& ioPartialQueryString, + NbOfErrors_T& ioMaxEditDistance, + bool ioHasReachedMaximalAllowableEditDistance, + Document& ioMatchingDocument); + + /** Retrieve the document best matching the query string. + @param TravelQuery_T& The partial query string. + @param MatchingDocument_T& The best matching Xapian document (if found). + @return bool Whether such a best matching document has been found. */ std::string searchString (TravelQuery_T& ioPartialQueryString, Document&); Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-21 21:56:56 UTC (rev 157) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-22 06:06:26 UTC (rev 158) @@ -44,12 +44,9 @@ corrections. If the word is correctly spelled, it is copied as is. Otherwise, a corrected version is stored. */ // ////////////////////////////////////////////////////////////////////// - static bool createCorrectedWordList (const WordList_T& iOriginalWordList, + static void createCorrectedWordList (const WordList_T& iOriginalWordList, WordList_T& ioCorrectedWordList, - EditDistance_T& ioMaxEditDistance, const Xapian::Database& iDatabase) { - bool hasReachedMaximalAllowableEditDistance = false; - EditDistance_T lMaxEditDistance= std::numeric_limits<EditDistance_T>::min(); // Empty the target list ioCorrectedWordList.clear(); @@ -62,16 +59,9 @@ const std::string& lOriginalWord = *itWord; // Calculate the distance, depending on the length of the word - EditDistance_T lCalculatedEditDistance = + const EditDistance_T lCalculatedEditDistance = calculateEditDistance (lOriginalWord); - // Store the greatest edit distance/error - lMaxEditDistance = std::max (lMaxEditDistance, lCalculatedEditDistance); - - // Limit the edit distance to the given maximal one - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - ioMaxEditDistance); - // Get a spelling suggestion for that word const std::string& lSuggestedWord = iDatabase.get_spelling_suggestion (lOriginalWord, @@ -88,36 +78,24 @@ /* OPENTREP_LOG_DEBUG ("Original word: `" << lOriginalWord << "' ==> corrected word: `" - << lSuggestedWord - << "'. Maximal allowable distance: " - << ioMaxEditDistance - << ", maximum of edit distance: " - << lMaxEditDistance - << ", calculated edit distance: " - << lCalculatedEditDistance); + << lSuggestedWord << "'"); */ } - /** - When the maximal allowable edit distance reaches the maximum - of the calculated edit distance, it becomes useless to go on - increasing the maximal allowable edit distance. - */ - if (lMaxEditDistance <= ioMaxEditDistance) { - hasReachedMaximalAllowableEditDistance = true; - } - } catch (const Xapian::Error& error) { OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } - - return hasReachedMaximalAllowableEditDistance; } // /////////////////////////////////////////////////////////////////// - std::string StringMatcher::searchString (Xapian::MSet& ioMatchingSet, - const std::string& iSearchString, - const Xapian::Database& ioDatabase) { + std::string StringMatcher:: + searchString (Xapian::MSet& ioMatchingSet, + const std::string& iSearchString, + NbOfErrors_T& ioMaxEditDistance, + bool ioHasReachedMaximalAllowableEditDistance, + const Xapian::Database& ioDatabase) { + NbOfErrors_T lMaxEditDistance = std::numeric_limits<EditDistance_T>::min(); + std::string oMatchedString; // Catch any Xapian::Error exceptions thrown @@ -207,81 +185,79 @@ (where the words have been corrected one by one). */ if (nbMatches != 0) { + /** + When the maximal allowable edit distance reaches the maximum + of the calculated edit distance, it becomes useless to go on + increasing the maximal allowable edit distance. + */ + if (lMaxEditDistance <= ioMaxEditDistance) { + ioHasReachedMaximalAllowableEditDistance = true; + } + oMatchedString = lOriginalQueryString; return oMatchedString; } assert (nbMatches == 0 && oMatchedString.empty() == true); - bool shouldStop = false; - EditDistance_T lMaxEditDistance = 0; /** From the clean list of original words, another list of - spell-corrected words is built. - <br>The original query string must first be checked, without - allowing any spelling errors. If no match is found, the list - of spell-corrected words must be rebuilt allowing an - distance/error of 1. If again no match is found, the list is - rebuilt allowing a distance/error of 2. And so on until the - maximal allowable distance/error is reached. + spell-corrected words is built, where every word is + spell-corrected one by one, according to its own length. + <br>For instance, "sna" can not be corrected into "san", as + "sna" has a length of 3 letters only, but "francicso" will be + corrected into "francisco". So, "sna francicso" will be + corrected into "sna francisco". */ - while (shouldStop == false) { - WordList_T lCorrectedWordList; - const bool hasReachedMaximalAllowableEditDistance = - createCorrectedWordList (lOriginalWordList, lCorrectedWordList, - lMaxEditDistance, ioDatabase); + WordList_T lCorrectedWordList; + createCorrectedWordList (lOriginalWordList, lCorrectedWordList, + ioDatabase); - const std::string lCorrectedQueryString = - WordHolder::createStringFromWordList (lCorrectedWordList); + const std::string lCorrectedQueryString = + WordHolder::createStringFromWordList (lCorrectedWordList); - /** - Strangely enough (is it?), the corrected query given by the Xapian - QueryParser corresponds to the full original string, where words - have been corrected one by one, but considered as a single block. - <br>With the above example, 'sna francicso' yields (wrongly) - 'sna francisco', instead of "sna PHRASE 2 francisco", as generated - by the following (uncommented) code. - <br>Just to be clear, the two lines of commented code below - are wrong. Those two commented lines are kept, just to be - sure that nobody gets the idea to use them. - */ - // Xapian::Query lCorrectedQuery = - // lQueryParser.get_corrected_query_string(); - Xapian::Query lCorrectedQuery = - lQueryParser.parse_query (lCorrectedQueryString, - Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_LOVEHATE); + /** + Strangely enough (is it?), the corrected query given by the Xapian + QueryParser corresponds to the full original string, where words + have been corrected one by one, but considered as a single block. + <br>With the above example, 'sna francicso' yields (wrongly) + 'sna francisco', instead of "sna PHRASE 2 francisco", as generated + by the following (uncommented) code. + <br>Just to be clear, the two lines of commented code below + are wrong. Those two commented lines are kept, just to be + sure that nobody gets the idea to use them. + */ + // Xapian::Query lCorrectedQuery = + // lQueryParser.get_corrected_query_string(); + Xapian::Query lCorrectedQuery = + lQueryParser.parse_query (lCorrectedQueryString, + Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_LOVEHATE); + + enquire.set_query (lCorrectedQuery); + ioMatchingSet = enquire.get_mset (0, 10); - enquire.set_query (lCorrectedQuery); - ioMatchingSet = enquire.get_mset (0, 10); - - // Display the results - nbMatches = ioMatchingSet.size(); - - // DEBUG - OPENTREP_LOG_DEBUG ("Corrected query `" << lCorrectedQueryString - << "' with maximal edit distance of " - << lMaxEditDistance - << ", i.e., `" << lCorrectedQuery.get_description() - << "' => " << nbMatches - << " results found on corrected string"); - - if (nbMatches != 0) { - oMatchedString = lCorrectedQueryString; - return oMatchedString; - } - - // Allow for one more spelling error - ++lMaxEditDistance; - + // Display the results + nbMatches = ioMatchingSet.size(); + + // DEBUG + OPENTREP_LOG_DEBUG ("Corrected query `" << lCorrectedQueryString + << "', i.e., `" << lCorrectedQuery.get_description() + << "' => " << nbMatches + << " results found on corrected string"); + + if (nbMatches != 0) { /** - Stop when it is no longer necessary to increase the maximal - allowable edit distance, as it is already greater than the - maximum of the calculated edit distance. + When the maximal allowable edit distance reaches the maximum + of the calculated edit distance, it becomes useless to go on + increasing the maximal allowable edit distance. */ - if (hasReachedMaximalAllowableEditDistance == true) { - shouldStop = true; + if (lMaxEditDistance <= ioMaxEditDistance) { + ioHasReachedMaximalAllowableEditDistance = true; } + + oMatchedString = lCorrectedQueryString; + return oMatchedString; } assert (nbMatches == 0 && oMatchedString.empty() == true); @@ -290,13 +266,20 @@ phrase/string. With the above example, 'sna francisco' yields the suggestion 'san francisco'. */ - const EditDistance_T lCalculatedEditDistance = + NbOfErrors_T lCalculatedEditDistance = calculateEditDistance (lOriginalQueryString); + + // Store the greatest edit distance/error + lMaxEditDistance = std::max (lMaxEditDistance, lCalculatedEditDistance); + // Limit the edit distance to the given maximal one + lCalculatedEditDistance = std::min (lCalculatedEditDistance, + ioMaxEditDistance); + const std::string lFullWordCorrectedString = ioDatabase.get_spelling_suggestion (lOriginalQueryString, lCalculatedEditDistance); - + /** Since there is still no match, we search on the string corrected as a whole. @@ -309,24 +292,26 @@ Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_LOVEHATE); - + enquire.set_query (lFullQueryCorrected); ioMatchingSet = enquire.get_mset (0, 10); // Display the results nbMatches = ioMatchingSet.size(); - + // DEBUG OPENTREP_LOG_DEBUG ("Query corrected as a full sentence `" - << lFullWordCorrectedString << "', i.e., `" - << lFullQueryCorrected.get_description() << "' => " - << nbMatches + << lFullWordCorrectedString + << "' with a maximal edit distance of " + << lMaxEditDistance + << ", i.e., `"<< lFullQueryCorrected.get_description() + << "' => " << nbMatches << " results found on corrected full string"); - + if (nbMatches != 0) { oMatchedString = lFullWordCorrectedString; } - + // DEBUG /* const Xapian::Query& lActualQuery = enquire.get_query(); @@ -338,6 +323,15 @@ OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); } + /** + When the maximal allowable edit distance reaches the maximum + of the calculated edit distance, it becomes useless to go on + increasing the maximal allowable edit distance. + */ + if (lMaxEditDistance <= ioMaxEditDistance) { + ioHasReachedMaximalAllowableEditDistance = true; + } + return oMatchedString; } Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-21 21:56:56 UTC (rev 157) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-22 06:06:26 UTC (rev 158) @@ -28,11 +28,17 @@ words of the search string. @param Xapian::MSet& The Xapian matching set. It can be empty. @param const std::string& The query string. + @param NbOfErrors_T& The maximal allowable edit distance/error. + @param bool Whether or not the maximal allowable edit distance/error + has become greater than the maximum of the edit distance/errors + calculated on the phrase. @param const Xapian::Database& The Xapian index/database. @return std::string The query string, potentially corrected, which has yielded matches. */ static std::string searchString (Xapian::MSet&, const std::string& iSearchString, + NbOfErrors_T& ioMaxEditDistance, + bool ioHasReachedMaximalAllowableEditDistance, const Xapian::Database&); /** Extract the best matching Xapian document. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-23 09:54:14
|
Revision: 162 http://opentrep.svn.sourceforge.net/opentrep/?rev=162&view=rev Author: denis_arnaud Date: 2009-07-23 09:54:10 +0000 (Thu, 23 Jul 2009) Log Message: ----------- Fixed the matching bug (until some other is discovered...). Modified Paths: -------------- trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp Modified: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp 2009-07-23 06:20:10 UTC (rev 161) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-23 09:54:10 UTC (rev 162) @@ -23,9 +23,9 @@ // ////////////////////////////////////////////////////////////////////// std::string Document::describeKey() const { std::ostringstream oStr; - oStr << "`" << describeShortKey() << "´"; + oStr << "`" << describeShortKey() << "'"; if (_correctedQueryString.empty() == false) { - oStr << " (corrected into `" << _correctedQueryString << "´)"; + oStr << " (corrected into `" << _correctedQueryString << "')"; } return oStr.str(); } Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-23 06:20:10 UTC (rev 161) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-23 09:54:10 UTC (rev 162) @@ -4,6 +4,7 @@ // C #include <cassert> #include <sstream> +#include <limits> // Xapian #include <xapian.h> // OpenTREP @@ -89,8 +90,14 @@ <br>And so on until the maximum of the edit distance/error becomes greater than the maximal allowable distance/error. reached. + + <br>NOTE: that feature is de-activated, as it seems it does + not bring any added value. To re-activate it, just initialise + the lMaxEditDistance to 0, instead of to the positive infinite. */ - NbOfErrors_T lMaxEditDistance = 0; + // NbOfErrors_T lMaxEditDistance = 0; + NbOfErrors_T lMaxEditDistance = std::numeric_limits<NbOfErrors_T>::max(); + bool hasReachedMaximalAllowableEditDistance = false; bool shouldStop = false; while (shouldStop == false) { Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-23 06:20:10 UTC (rev 161) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-23 09:54:10 UTC (rev 162) @@ -31,7 +31,7 @@ on a 4-letter word, while an edit distance of 3 will be allowed on an 11-letter word. */ // ////////////////////////////////////////////////////////////////////// - static unsigned int calculateEditDistance (const std::string& iPhrase) { + static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) { EditDistance_T oEditDistance = 2; const EditDistance_T lQueryStringSize = iPhrase.size(); @@ -87,13 +87,113 @@ } } + /** + That algorithm checks whether the first (resp. the last) word of + the original string "belongs" to the spell-corrected suggestion + (i.e., "recognised" by the Xapian database/index). + + <br>For instance, let assume that the original string is 'nce + petropavlosk' and that the spell-corrected suggestion (for the + full original string) is 'petropavlosk', meaning that an edit + distance/error of 3 letters ('nce') has been allowed. + + <br>We check whether the left-reduced original string, namely + 'petropavlosk', returns the same suggestion as the non-reduced + original string, namely 'nce petropavlosk'. In that case, both + the reduced and non-reduced original strings yield the same + spell-corrected suggestions, namely 'petropavlosk'. Hence, the + spell-corrected suggestion (namely 'petropavlosk') should not be + taken into account, i.e., the spell-corrected suggestion should + be empty. + + <br>In other words, 'nce' does not belong to the spell-corrected + suggestion 'petropavlosk'. So, 'petropavlosk' is not a good + suggestion at that stage (because if accepted, it would enduce + the removal of 'nce' from the remaining string). + */ // /////////////////////////////////////////////////////////////////// + void checkAndAlterIfNeeded (TravelQuery_T& ioSuggestedString, + const TravelQuery_T& iOriginalString, + const NbOfErrors_T& iEditDistance, + const Xapian::Database& iDatabase) { + + /** + Store a copy of the suggested string, as it will me altered by + the below method. + */ + TravelQuery_T lOriginalStringCopy (iOriginalString); + StringMatcher::removeFurthestLeftWord (lOriginalStringCopy); + + /** + Get a spell-corrected suggestion for the reduced original string. + */ + std::string lSuggestionForReducedOriginalString = + iDatabase.get_spelling_suggestion (lOriginalStringCopy, iEditDistance); + + if (lSuggestionForReducedOriginalString.empty() == true + || lSuggestionForReducedOriginalString == ioSuggestedString) { + /** + The suggestion on the reduced-original string is either empty + or the same as the suggestion on the original string. Either + way, the suggestion is not valid. It must be discarded + (emptied). + <br>Note that if the suggestion on the reduced-original + string is empty, it normally means that the reduced-original + string is correct (in spelling terms), as the non-reduced + original string yielded a suggestion. + <br>There may be side effects with the edit distance/error, + though, but it has been unobserved, as of now. + */ + ioSuggestedString = ""; + + // DEBUG + OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString + << "') for `" << iOriginalString + << "' is the same as the suggestion for the reduced " + << "original string (`" << lOriginalStringCopy + << "') -> discarded."); + + return; + } + + /** + Store a copy of the suggested string, as it will me altered by + the below method. + */ + lOriginalStringCopy = iOriginalString; + StringMatcher::removeFurthestRightWord (lOriginalStringCopy); + + /** + Get a spell-corrected suggestion for the reduced original string. + */ + lSuggestionForReducedOriginalString = + iDatabase.get_spelling_suggestion (lOriginalStringCopy, iEditDistance); + + if (lSuggestionForReducedOriginalString.empty() == true + || lSuggestionForReducedOriginalString == ioSuggestedString) { + /** + See the remark above. + */ + ioSuggestedString = ""; + + // DEBUG + OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString + << "') for `" << iOriginalString + << "' is the same as the suggestion for the reduced " + << "original string (`" << lOriginalStringCopy + << "') -> discarded."); + + return; + } + } + + // /////////////////////////////////////////////////////////////////// std::string StringMatcher:: searchString (Xapian::MSet& ioMatchingSet, - const std::string& iSearchString, + const TravelQuery_T& iSearchString, NbOfErrors_T& ioMaxEditDistance, bool& ioHasReachedMaximalAllowableEditDistance, - const Xapian::Database& ioDatabase) { + const Xapian::Database& iDatabase) { NbOfErrors_T lMaxEditDistance = std::numeric_limits<EditDistance_T>::min(); std::string oMatchedString; @@ -136,7 +236,7 @@ // Build the query object Xapian::QueryParser lQueryParser; - lQueryParser.set_database (ioDatabase); + lQueryParser.set_database (iDatabase); /** As explained in http://www.xapian.org/docs/queryparser.html, Xapian::Query::OP_ADJ is better than Xapian::Query::OP_PHRASE, @@ -152,7 +252,7 @@ */ // Start an enquire session - Xapian::Enquire enquire (ioDatabase); + Xapian::Enquire enquire (iDatabase); /** The Xapian::QueryParser::parse_query() method aggregates all @@ -212,7 +312,7 @@ */ WordList_T lCorrectedWordList; createCorrectedWordList (lOriginalWordList, lCorrectedWordList, - ioDatabase); + iDatabase); const std::string lCorrectedQueryString = WordHolder::createStringFromWordList (lCorrectedWordList); @@ -280,11 +380,18 @@ lCalculatedEditDistance = std::min (lCalculatedEditDistance, ioMaxEditDistance); - const std::string lFullWordCorrectedString = - ioDatabase.get_spelling_suggestion (lOriginalQueryString, - lCalculatedEditDistance); - + std::string lFullWordCorrectedString = + iDatabase.get_spelling_suggestion (lOriginalQueryString, + lCalculatedEditDistance); + /** + Check that the suggestion does not encompass extra words, which + will be otherwise/rather recognised in another step. + */ + checkAndAlterIfNeeded (lFullWordCorrectedString, lOriginalQueryString, + lCalculatedEditDistance, iDatabase); + + /** Since there is still no match, we search on the string corrected as a whole. <br>As, with the above example, the full corrected string is Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-23 06:20:10 UTC (rev 161) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-23 09:54:10 UTC (rev 162) @@ -3,6 +3,8 @@ // ////////////////////////////////////////////////////////////////////// // C #include <cassert> +// Boost +#include <boost/date_time/gregorian/gregorian.hpp> // OpenTrep #include <opentrep/basic/BasConst_OPENTREP_Service.hpp> #include <opentrep/basic/BasChronometer.hpp> @@ -144,6 +146,13 @@ assert (_opentrepServiceContext != NULL); OPENTREP_ServiceContext& lOPENTREP_ServiceContext= *_opentrepServiceContext; + // Get the date-time for the present time + // boost::gregorian::date lNowDateTime = boost::gregorian::now.date(); + + // DEBUG + // OPENTREP_LOG_DEBUG (lNowDateTime << " - Match query '" << iTravelQuery + // << " ' on Xapian database (index)"); + // Check that the travel request is not empty if (iTravelQuery.empty() == true) { OPENTREP_LOG_ERROR ("The travel request is empty."); @@ -174,10 +183,8 @@ // DEBUG OPENTREP_LOG_DEBUG ("Match query on Xapian database (index): " - << lRequestInterpreterMeasure << " - " - << lOPENTREP_ServiceContext.display()); + << lRequestInterpreterMeasure); - } catch (const std::exception& error) { OPENTREP_LOG_ERROR ("Exception: " << error.what()); throw InterpreteTravelRequestException(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-23 17:47:43
|
Revision: 163 http://opentrep.svn.sourceforge.net/opentrep/?rev=163&view=rev Author: denis_arnaud Date: 2009-07-23 17:47:31 +0000 (Thu, 23 Jul 2009) Log Message: ----------- [Dev] Better protected the wrappers (e.g., Python) against unexpected exceptions. Modified Paths: -------------- trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/batches/searcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/python/pyopentrep.cpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-23 09:54:10 UTC (rev 162) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-07-23 17:47:31 UTC (rev 163) @@ -70,7 +70,7 @@ ("passwd,p", boost::program_options::value< std::string >(&ioDBPasswd)->default_value(K_OPENTREP_DEFAULT_DB_PASSWD), "SQL database hostname (e.g., opentrep)") - ("host,h", + ("host,H", boost::program_options::value< std::string >(&ioDBHost)->default_value(K_OPENTREP_DEFAULT_DB_HOST), "SQL database hostname (e.g., localhost)") ("port,P", Modified: trunk/opentrep/opentrep/batches/searcher.cpp =================================================================== --- trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-23 09:54:10 UTC (rev 162) +++ trunk/opentrep/opentrep/batches/searcher.cpp 2009-07-23 17:47:31 UTC (rev 163) @@ -126,9 +126,6 @@ ("error,e", boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE), "Spelling error distance (e.g., 3)") - ("query,q", - boost::program_options::value< WordList_T >(&lWordList)->multitoken(), - "Traval query word list (e.g. sna francicso rio de janero lso anglese reykyavki") ("database,d", boost::program_options::value< std::string >(&ioDatabaseFilepath)->default_value(K_OPENTREP_DEFAULT_DATABSE_FILEPATH), "Xapian database filepath (e.g., /tmp/opentrep/traveldb)") @@ -141,7 +138,7 @@ ("passwd,p", boost::program_options::value< std::string >(&ioDBPasswd)->default_value(K_OPENTREP_DEFAULT_DB_PASSWD), "SQL database hostname (e.g., opentrep)") - ("host,h", + ("host,H", boost::program_options::value< std::string >(&ioDBHost)->default_value(K_OPENTREP_DEFAULT_DB_HOST), "SQL database hostname (e.g., localhost)") ("port,P", @@ -150,6 +147,9 @@ ("dbname,m", boost::program_options::value< std::string >(&ioDBDBName)->default_value(K_OPENTREP_DEFAULT_DB_DBNAME), "SQL database name (e.g., opentrep)") + ("query,q", + boost::program_options::value< WordList_T >(&lWordList)->multitoken(), + "Travel query word list (e.g. sna francicso rio de janero lso anglese reykyavki), which sould be located at the end of the command line (otherwise, the other options would be interpreted as part of that travel query word list)") ; // Hidden options, will be allowed both on command line and Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-23 09:54:10 UTC (rev 162) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-23 17:47:31 UTC (rev 163) @@ -114,7 +114,7 @@ // /////////////////////////////////////////////////////////////////// void checkAndAlterIfNeeded (TravelQuery_T& ioSuggestedString, const TravelQuery_T& iOriginalString, - const NbOfErrors_T& iEditDistance, + const NbOfErrors_T& iMaxEditDistance, const Xapian::Database& iDatabase) { /** @@ -126,33 +126,50 @@ /** Get a spell-corrected suggestion for the reduced original string. + <br>Limit the edit distance to the given maximal one. */ + NbOfErrors_T lCalculatedEditDistance = + calculateEditDistance (lOriginalStringCopy); + + lCalculatedEditDistance = std::min (lCalculatedEditDistance, + iMaxEditDistance); + std::string lSuggestionForReducedOriginalString = - iDatabase.get_spelling_suggestion (lOriginalStringCopy, iEditDistance); + iDatabase.get_spelling_suggestion (lOriginalStringCopy, + lCalculatedEditDistance); + + /** + Note that if the suggestion on the reduced-original string is + empty, it normally means that the reduced-original string is + correct (in spelling terms), as the non-reduced original string + yielded a suggestion. + */ + if (lSuggestionForReducedOriginalString.empty() == true) { + lSuggestionForReducedOriginalString = lOriginalStringCopy; + } + + if (lSuggestionForReducedOriginalString == ioSuggestedString) { - if (lSuggestionForReducedOriginalString.empty() == true - || lSuggestionForReducedOriginalString == ioSuggestedString) { + // DEBUG + OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString + << "') for `" << iOriginalString + << "', with an edit distance/error of " + << lCalculatedEditDistance + << " over " << iMaxEditDistance << " allowable" + << ", is the same as the suggestion for the reduced " + << "original string (`" << lOriginalStringCopy + << "') -> discarded."); + /** The suggestion on the reduced-original string is either empty or the same as the suggestion on the original string. Either way, the suggestion is not valid. It must be discarded (emptied). - <br>Note that if the suggestion on the reduced-original - string is empty, it normally means that the reduced-original - string is correct (in spelling terms), as the non-reduced - original string yielded a suggestion. <br>There may be side effects with the edit distance/error, though, but it has been unobserved, as of now. */ ioSuggestedString = ""; - // DEBUG - OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString - << "') for `" << iOriginalString - << "' is the same as the suggestion for the reduced " - << "original string (`" << lOriginalStringCopy - << "') -> discarded."); - return; } @@ -165,24 +182,44 @@ /** Get a spell-corrected suggestion for the reduced original string. + <br>Limit the edit distance to the given maximal one. */ + lCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); + + lCalculatedEditDistance = std::min (lCalculatedEditDistance, + iMaxEditDistance); + lSuggestionForReducedOriginalString = - iDatabase.get_spelling_suggestion (lOriginalStringCopy, iEditDistance); + iDatabase.get_spelling_suggestion (lOriginalStringCopy, + lCalculatedEditDistance); - if (lSuggestionForReducedOriginalString.empty() == true - || lSuggestionForReducedOriginalString == ioSuggestedString) { - /** - See the remark above. - */ - ioSuggestedString = ""; + /** + Note that if the suggestion on the reduced-original string is + empty, it normally means that the reduced-original string is + correct (in spelling terms), as the non-reduced original string + yielded a suggestion. + */ + if (lSuggestionForReducedOriginalString.empty() == true) { + lSuggestionForReducedOriginalString = lOriginalStringCopy; + } + + if (lSuggestionForReducedOriginalString == ioSuggestedString) { // DEBUG OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString - << "' is the same as the suggestion for the reduced " + << "', with an edit distance/error of " + << lCalculatedEditDistance + << " over " << iMaxEditDistance << " allowable" + << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy << "') -> discarded."); + /** + See the remark above. + */ + ioSuggestedString = ""; + return; } } @@ -389,7 +426,7 @@ will be otherwise/rather recognised in another step. */ checkAndAlterIfNeeded (lFullWordCorrectedString, lOriginalQueryString, - lCalculatedEditDistance, iDatabase); + ioMaxEditDistance, iDatabase); /** Since there is still no match, we search on the string Modified: trunk/opentrep/opentrep/python/pyopentrep.cpp =================================================================== --- trunk/opentrep/opentrep/python/pyopentrep.cpp 2009-07-23 09:54:10 UTC (rev 162) +++ trunk/opentrep/opentrep/python/pyopentrep.cpp 2009-07-23 17:47:31 UTC (rev 163) @@ -85,10 +85,17 @@ // DEBUG *_logOutputStream << oStr.str() << std::endl; - } catch (const std::exception& error) { - *_logOutputStream << "Exception: " << error.what() << std::endl; - } + } catch (const RootException& eOpenTrepError) { + *_logOutputStream << "OpenTrep error: " << eOpenTrepError.what() + << std::endl; + } catch (const std::exception& eStdError) { + *_logOutputStream << "Error: " << eStdError.what() << std::endl; + + } catch (...) { + *_logOutputStream << "Unknown error" << std::endl; + } + return oStr.str(); } @@ -145,9 +152,16 @@ // DEBUG *_logOutputStream << "Python wrapper initialised" << std::endl; - } catch (const std::exception& error) { - *_logOutputStream << "Exception: " << error.what() << std::endl; - } + } catch (const RootException& eOpenTrepError) { + *_logOutputStream << "OpenTrep error: " << eOpenTrepError.what() + << std::endl; + + } catch (const std::exception& eStdError) { + *_logOutputStream << "Error: " << eStdError.what() << std::endl; + + } catch (...) { + *_logOutputStream << "Unknown error" << std::endl; + } return isEverythingOK; } Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-23 09:54:10 UTC (rev 162) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-07-23 17:47:31 UTC (rev 163) @@ -5,6 +5,7 @@ #include <cassert> // Boost #include <boost/date_time/gregorian/gregorian.hpp> +#include <boost/date_time/posix_time/ptime.hpp> // OpenTrep #include <opentrep/basic/BasConst_OPENTREP_Service.hpp> #include <opentrep/basic/BasChronometer.hpp> @@ -147,11 +148,16 @@ OPENTREP_ServiceContext& lOPENTREP_ServiceContext= *_opentrepServiceContext; // Get the date-time for the present time - // boost::gregorian::date lNowDateTime = boost::gregorian::now.date(); + boost::posix_time::ptime lNowDateTime = + boost::posix_time::second_clock::local_time(); + boost::gregorian::date lNowDate = lNowDateTime.date(); // DEBUG - // OPENTREP_LOG_DEBUG (lNowDateTime << " - Match query '" << iTravelQuery - // << " ' on Xapian database (index)"); + OPENTREP_LOG_DEBUG (std::endl + << "===================================================" + << std::endl + << lNowDateTime << " - Match query '" << iTravelQuery + << "' on Xapian database (index)"); // Check that the travel request is not empty if (iTravelQuery.empty() == true) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-07-27 05:56:55
|
Revision: 170 http://opentrep.svn.sourceforge.net/opentrep/?rev=170&view=rev Author: denis_arnaud Date: 2009-07-27 05:56:43 +0000 (Mon, 27 Jul 2009) Log Message: ----------- [Dev] Prepared the code to dig out the edit distance and extra and alternate locations. Modified Paths: -------------- trunk/opentrep/opentrep/bom/Document.cpp trunk/opentrep/opentrep/bom/Document.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/factory/FacPlace.cpp trunk/opentrep/opentrep/factory/FacPlace.hpp Modified: trunk/opentrep/opentrep/bom/Document.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Document.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -25,7 +25,8 @@ std::ostringstream oStr; oStr << "`" << describeShortKey() << "'"; if (_correctedQueryString.empty() == false) { - oStr << " (corrected into `" << _correctedQueryString << "')"; + oStr << " (corrected into `" << _correctedQueryString + << "' with an edit distance/error of " << _editDistance << ")"; } return oStr.str(); } @@ -37,11 +38,12 @@ const Xapian::docid& lDocID = _document.get_docid(); oStr << " => Document ID " << lDocID << " matching at " << _percentage - << "% [" << _document.get_data() << "]"; + << "% (edit distance of " << _editDistance << ") [" + << _document.get_data() << "]"; if (_documentList.empty() == false) { oStr << " along with " << _documentList.size() - << " other matching document(s) ("; + << " other equivalent matching document(s) ("; unsigned short idx = 0; for (XapianDocumentList_T::const_iterator itDoc = _documentList.begin(); @@ -53,6 +55,25 @@ } oStr << lDocID; } + oStr << ")"; + } + + if (_alternateDocumentList.empty() == false) { + oStr << " and with still " << _alternateDocumentList.size() + << " other less matching document(s) ("; + + unsigned short idx = 0; + for (XapianAlternateDocumentList_T::const_iterator itDoc = + _alternateDocumentList.begin(); + itDoc != _alternateDocumentList.end(); ++itDoc, ++idx) { + const Xapian::percent& lPercentage = itDoc->first; + const Xapian::Document& lXapianDoc = itDoc->second; + const Xapian::docid& lDocID = lXapianDoc.get_docid(); + if (idx != 0) { + oStr << ", "; + } + oStr << lDocID << " / " << lPercentage << "%"; + } oStr << ")." << std::endl; } else { @@ -73,7 +94,7 @@ if (_documentList.empty() == false) { oStr << " along with " << _documentList.size() - << " other matching document(s) { "; + << " other equivalent matching document(s) { "; unsigned short idx = 0; for (XapianDocumentList_T::const_iterator itDoc = _documentList.begin(); @@ -85,6 +106,26 @@ } oStr << "Doc ID " << lDocID << " [" << lXapianDoc.get_data() << "]"; } + oStr << " }"; + } + + if (_alternateDocumentList.empty() == false) { + oStr << " and with still " << _alternateDocumentList.size() + << " other less matching document(s) { "; + + unsigned short idx = 0; + for (XapianAlternateDocumentList_T::const_iterator itDoc = + _alternateDocumentList.begin(); + itDoc != _alternateDocumentList.end(); ++itDoc, ++idx) { + const Xapian::percent& lPercentage = itDoc->first; + const Xapian::Document& lXapianDoc = itDoc->second; + const Xapian::docid& lDocID = lXapianDoc.get_docid(); + if (idx != 0) { + oStr << ", "; + } + oStr << lDocID << " / " << lPercentage << "% [" + << lXapianDoc.get_data() << "]"; + } oStr << " }." << std::endl; } else { Modified: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -17,6 +17,12 @@ // //////////////// Type definitions ///////////////// /** List of Xapian documents. */ typedef std::list<Xapian::Document> XapianDocumentList_T; + + /** Pair of a Xapian document and its associated matching percentage. */ + typedef std::pair<Xapian::percent, Xapian::Document> XapianDocumentPair_T; + + /** List of Xapian documents. */ + typedef std::list<XapianDocumentPair_T> XapianAlternateDocumentList_T; // //////////////// Main Class ///////////////// @@ -48,12 +54,19 @@ return _percentage; } - /** Get the extra list of matching Xapian documents. */ + /** Get the extra list of matching Xapian documents (i.e., those + having matched with the same weight as the main one). */ const XapianDocumentList_T& getExtraDocumentList() const { return _documentList; } + /** Get the alternate list of matching Xapian documents (i.e., those + having matched with a lower weight than the main one). */ + const XapianAlternateDocumentList_T& getAlternateDocumentList() const { + return _alternateDocumentList; + } + // ////////////////// Setters //////////////// /** Set the query string. */ void setQueryString (const TravelQuery_T& iQueryString) { @@ -75,12 +88,25 @@ _percentage = iPercentage; } + /** Set the edit distance/error, with which the matching has been made. */ + void setEditDistance (const NbOfErrors_T& iEditDistance) { + _editDistance = iEditDistance; + } + /** Add a matching Xapian document (having the same matching percentage). */ void addExtraDocument (const Xapian::Document& iMatchingDocument) { _documentList.push_back (iMatchingDocument); } + /** Add a matching Xapian document (having a lower matching percentage). */ + void addAlternateDocument (const Xapian::percent& iMatchingPercentage, + const Xapian::Document& iMatchingDocument) { + _alternateDocumentList. + push_back (XapianDocumentPair_T (iMatchingPercentage, + iMatchingDocument)); + } + public: // /////////// Business methods ///////// /** Retrieve the number of extra matches for the given query string, @@ -143,10 +169,18 @@ /** Matching document, as returned by the Xapian full text search. */ Xapian::Document _document; + /** Edit distance/error, with which the matching has been made. */ + NbOfErrors_T _editDistance; + /** List of Xapian documents having the same matching percentage. <br>Hence, any of those other Xapian documents could have been chosen, instead of the main one. */ XapianDocumentList_T _documentList; + + /** List of Xapian documents having the a lower matching percentage. + <br>Those alternate matches can be suggested (in the famous + "Did you mean Xxx?" question) to the end user. */ + XapianAlternateDocumentList_T _alternateDocumentList; }; } Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -10,12 +10,13 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - Place::Place () : _world (NULL), _placeHolder (NULL) { + Place::Place () : _world (NULL), _placeHolder (NULL), _mainPlace (NULL) { } // ////////////////////////////////////////////////////////////////////// Place::Place (const Place& iPlace) : _world (iPlace._world), _placeHolder (iPlace._placeHolder), + _mainPlace (iPlace._mainPlace), _placeCode (iPlace._placeCode), _cityCode (iPlace._cityCode), _stateCode (iPlace._stateCode), _countryCode (iPlace._countryCode), _regionCode (iPlace._regionCode), _continentCode (iPlace._continentCode), Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -14,6 +14,7 @@ #include <opentrep/Location.hpp> #include <opentrep/bom/BomAbstract.hpp> #include <opentrep/bom/Names.hpp> +#include <opentrep/bom/PlaceList.hpp> namespace OPENTREP { @@ -93,6 +94,16 @@ language. */ bool getNameList (const Language::EN_Language&, NameList_T&) const; + /** Get the list of extra matching (similar) places. */ + const PlaceOrderedList_T& getExtraPlaceList() const { + return _extraPlaceList; + } + + /** Get the list of alternate matching (less similar) places. */ + const PlaceOrderedList_T& getAlternatePlaceList() const { + return _alternatePlaceList; + } + // ///////// Setters //////// /** Set the Place code. */ @@ -206,9 +217,14 @@ /** Parent World. */ World* _world; - /** Parent PlaceHolder. */ + /** Parent PlaceHolder (not always defined,for instance if the + current Place object is an extra or alternate one). */ PlaceHolder* _placeHolder; + /** Parent (main) Place (not always defined,for instance if the + current Place object is itself a main one). */ + Place* _mainPlace; + private: // /////// Attributes ///////// /** Place code. */ @@ -233,6 +249,12 @@ NameMatrix_T _nameMatrix; /** Xapian document ID. */ XapianDocID_T _docID; + + /** List of extra matching (similar) places. */ + PlaceOrderedList_T _extraPlaceList; + + /** List of alternate matching (less similar) places. */ + PlaceOrderedList_T _alternatePlaceList; }; } Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -110,26 +110,26 @@ << lMaxEditDistance << "."); // Retrieve the list of Xapian documents matching the query string + NbOfErrors_T lCalculatedEditDistance = 0; oMatchedString = - StringMatcher::searchString (ioMatchingSet, ioPartialQueryString, - lMaxEditDistance, - hasReachedMaximalAllowableEditDistance, - _database); + StringMatcher::searchString(ioMatchingSet, ioPartialQueryString, + lCalculatedEditDistance, lMaxEditDistance, + hasReachedMaximalAllowableEditDistance, + _database); // DEBUG OPENTREP_LOG_DEBUG ("---- Current query string: `" << ioPartialQueryString << "' --- Kept query: `" << oMatchedString - << "', with a maximal edit distance of " - << lMaxEditDistance << ", for " + << "', with an edit distance of a maximum of " + << lCalculatedEditDistance << " (over " + << lMaxEditDistance << "), for " << ioMatchingSet.size() << " matches."); if (ioMatchingSet.empty() == false) { - // Create the corresponding list of documents - StringMatcher:: - extractBestMatchingDocumentFromMSet (ioMatchingSet, - ioMatchingDocument); - + // Store the calculated (and applied) edit distance/erro + ioMatchingDocument.setEditDistance (lCalculatedEditDistance); + // Since a result has been found, the search can be stopped // for that part of the query. shouldStop = true; Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -114,12 +114,13 @@ // /////////////////////////////////////////////////////////////////// void checkAndAlterIfNeeded (TravelQuery_T& ioSuggestedString, const TravelQuery_T& iOriginalString, + NbOfErrors_T& ioCalculatedEditDistance, const NbOfErrors_T& iMaxEditDistance, const Xapian::Database& iDatabase) { /** - Store a copy of the suggested string, as it will me altered by - the below method. + Store a copy of the suggested string, as it will be altered by + the below method, i.e., removeFurthestLeftWord(). */ TravelQuery_T lOriginalStringCopy (iOriginalString); StringMatcher::removeFurthestLeftWord (lOriginalStringCopy); @@ -128,15 +129,14 @@ Get a spell-corrected suggestion for the reduced original string. <br>Limit the edit distance to the given maximal one. */ - NbOfErrors_T lCalculatedEditDistance = - calculateEditDistance (lOriginalStringCopy); + ioCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - iMaxEditDistance); + ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, + iMaxEditDistance); std::string lSuggestionForReducedOriginalString = iDatabase.get_spelling_suggestion (lOriginalStringCopy, - lCalculatedEditDistance); + ioCalculatedEditDistance); /** Note that if the suggestion on the reduced-original string is @@ -154,7 +154,7 @@ OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString << "', with an edit distance/error of " - << lCalculatedEditDistance + << ioCalculatedEditDistance << " over " << iMaxEditDistance << " allowable" << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy @@ -184,14 +184,14 @@ Get a spell-corrected suggestion for the reduced original string. <br>Limit the edit distance to the given maximal one. */ - lCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); + ioCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - iMaxEditDistance); + ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, + iMaxEditDistance); lSuggestionForReducedOriginalString = iDatabase.get_spelling_suggestion (lOriginalStringCopy, - lCalculatedEditDistance); + ioCalculatedEditDistance); /** Note that if the suggestion on the reduced-original string is @@ -209,7 +209,7 @@ OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString << "', with an edit distance/error of " - << lCalculatedEditDistance + << ioCalculatedEditDistance << " over " << iMaxEditDistance << " allowable" << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy @@ -228,6 +228,7 @@ std::string StringMatcher:: searchString (Xapian::MSet& ioMatchingSet, const TravelQuery_T& iSearchString, + NbOfErrors_T& ioCalculatedEditDistance, NbOfErrors_T& ioMaxEditDistance, bool& ioHasReachedMaximalAllowableEditDistance, const Xapian::Database& iDatabase) { @@ -407,26 +408,26 @@ phrase/string. With the above example, 'sna francisco' yields the suggestion 'san francisco'. */ - NbOfErrors_T lCalculatedEditDistance = - calculateEditDistance (lOriginalQueryString); + ioCalculatedEditDistance = calculateEditDistance (lOriginalQueryString); // Store the greatest edit distance/error - lMaxEditDistance = std::max (lMaxEditDistance, lCalculatedEditDistance); + lMaxEditDistance = std::max (lMaxEditDistance, ioCalculatedEditDistance); // Limit the edit distance to the given maximal one - lCalculatedEditDistance = std::min (lCalculatedEditDistance, - ioMaxEditDistance); + ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, + ioMaxEditDistance); std::string lFullWordCorrectedString = iDatabase.get_spelling_suggestion (lOriginalQueryString, - lCalculatedEditDistance); + ioCalculatedEditDistance); /** Check that the suggestion does not encompass extra words, which will be otherwise/rather recognised in another step. */ checkAndAlterIfNeeded (lFullWordCorrectedString, lOriginalQueryString, - ioMaxEditDistance, iDatabase); + ioCalculatedEditDistance, ioMaxEditDistance, + iDatabase); /** Since there is still no match, we search on the string @@ -528,7 +529,7 @@ NbOfMatches_T idx = 1; for ( ; itDoc != iMatchingSet.end(); ++itDoc, ++idx) { const Xapian::percent& lPercentage = itDoc.get_percent(); - // const Xapian::Document& lDocument = itDoc.get_document(); + const Xapian::Document& lDocument = itDoc.get_document(); // DEBUG /* @@ -536,12 +537,15 @@ << lDocument.get_docid() << " matching at " << lPercentage << "%."); */ - + + /** If the matching percentage is the same as for the main + (chosen) Xapian document, then add it to the dedicated + list. Otherwise, add it to the alternative choices. */ if (lPercentage == lBestPercentage) { - ioMatchingDocument.addExtraDocument (itDoc.get_document()); + ioMatchingDocument.addExtraDocument (lDocument); } else { - break; + ioMatchingDocument.addAlternateDocument (lPercentage, lDocument); } } } Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -28,6 +28,7 @@ words of the search string. @param Xapian::MSet& The Xapian matching set. It can be empty. @param const std::string& The query string. + @param NbOfErrors_T& The calculated (and applied) edit distance/error. @param NbOfErrors_T& The maximal allowable edit distance/error. @param bool& Whether or not the maximal allowable edit distance/error has become greater than the maximum of the edit distance/errors @@ -37,6 +38,7 @@ which has yielded matches. */ static std::string searchString (Xapian::MSet&, const std::string& iSearchString, + NbOfErrors_T& ioCalculatedEditDistance, NbOfErrors_T& ioMaxEditDistance, bool& ioHasReachedMaximalAllowableEditDistance, const Xapian::Database&); Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -72,6 +72,13 @@ const Result* lResult_ptr = *itResult; assert (lResult_ptr != NULL); + /** + TODO: Add a loop for retrieving both extra and alternate Documents + Use FacPlace::initLinkWithExtraPlace() and + FacPlace::initLinkWithAlternatePlace() + */ + + // Retrieve the parameters of the best matching document const Xapian::Document& lDocument = lResult_ptr->getXapianDocument(); const Xapian::percent& lDocPercentage = Modified: trunk/opentrep/opentrep/factory/FacPlace.cpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlace.cpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/factory/FacPlace.cpp 2009-07-27 05:56:43 UTC (rev 170) @@ -63,4 +63,26 @@ return *oPlace_ptr; } + // ////////////////////////////////////////////////////////////////////// + void FacPlace::initLinkWithExtraPlace (Place& ioMainPlace, + Place& ioExtraPlace) { + // Link the main Place to the extra Place, and vice versa + ioExtraPlace._mainPlace = &ioMainPlace; + + // Add the extra Place to the main Place internal map (of extra + // Place objects) + ioMainPlace._extraPlaceList.push_back (&ioExtraPlace); + } + + // ////////////////////////////////////////////////////////////////////// + void FacPlace::initLinkWithAlternatePlace (Place& ioMainPlace, + Place& ioAlternatePlace) { + // Link the main Place to the alternate Place, and vice versa + ioAlternatePlace._mainPlace = &ioMainPlace; + + // Add the alternate Place to the main Place internal map (of + // alternate Place objects) + ioMainPlace._extraPlaceList.push_back (&ioAlternatePlace); + } + } Modified: trunk/opentrep/opentrep/factory/FacPlace.hpp =================================================================== --- trunk/opentrep/opentrep/factory/FacPlace.hpp 2009-07-25 22:27:45 UTC (rev 169) +++ trunk/opentrep/opentrep/factory/FacPlace.hpp 2009-07-27 05:56:43 UTC (rev 170) @@ -36,6 +36,20 @@ @return Place& The newly created object. */ Place& clone (const Place&); + /** Initialise the link between a Place and an extra Place. + @param Place& Main Place object. + @param Place& Extra Place object. + @exception FacExceptionNullPointer + @exception FacException.*/ + static void initLinkWithExtraPlace (Place&, Place&); + + /** Initialise the link between a Place and an alternate Place. + @param Place& Main Place object. + @param Place& Alternate Place object. + @exception FacExceptionNullPointer + @exception FacException.*/ + static void initLinkWithAlternatePlace (Place&, Place&); + private: /** Default Constructor. <br>This constructor is private in order to ensure the singleton This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-08-10 16:23:43
|
Revision: 173 http://opentrep.svn.sourceforge.net/opentrep/?rev=173&view=rev Author: denis_arnaud Date: 2009-08-10 16:23:33 +0000 (Mon, 10 Aug 2009) Log Message: ----------- [Dev] The edit distance figures are now reported within the interface (Location structure). That work is finished, but not fully tested yet. Modified Paths: -------------- trunk/opentrep/opentrep/Location.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/ResultHolder.cpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/bom/StringMatcher.hpp trunk/opentrep/opentrep/bom/sources.mk trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/python/pyopentrep.cpp trunk/opentrep/opentrep/python/pyopentrep.py Added Paths: ----------- trunk/opentrep/opentrep/bom/Levenshtein.cpp trunk/opentrep/opentrep/bom/Levenshtein.hpp Modified: trunk/opentrep/opentrep/Location.hpp =================================================================== --- trunk/opentrep/opentrep/Location.hpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/Location.hpp 2009-08-10 16:23:33 UTC (rev 173) @@ -84,6 +84,12 @@ return _editDistance; } + /** Get the maximal allowable edit distance/error, with which the + matching has been made. */ + const NbOfErrors_T& getAllowableEditDistance () const { + return _allowableEditDistance; + } + /** Get the list of extra matching (similar) locations. */ const LocationList_T& getExtraLocationList() const { return _extraLocationList; @@ -156,6 +162,12 @@ _editDistance = iEditDistance; } + /** Set the maxiaml allowable edit distance/error, with which the + matching has been made. */ + void setAllowableEditDistance (const NbOfErrors_T& iAllowableEditDistance) { + _allowableEditDistance = iAllowableEditDistance; + } + /** Add an extra matching location. */ void addExtraLocation (const Location& iExtraLocation) { _extraLocationList.push_back (iExtraLocation); @@ -186,8 +198,8 @@ oStr << _locationCode << ", " << _cityCode << ", " << _stateCode << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup - << ", " << _longitude << ", " << _latitude - << ", " << _percentage << ", " << _editDistance; + << ", " << _longitude << ", " << _latitude << ", " << _percentage + << ", " << _editDistance << ", " << _allowableEditDistance; if (_extraLocationList.empty() == false) { oStr << " " << _extraLocationList.size() << " extra match(es)"; @@ -251,13 +263,15 @@ const double iLongitude, const double iLatitude, const LocationNameList_T& iNameList, const MatchingPercentage_T& iPercentage, - const NbOfErrors_T& iEditDistance) + const NbOfErrors_T& iEditDistance, + const NbOfErrors_T& iAllowableEditDistance) : _locationCode (iPlaceCode), _cityCode (iCityCode), _stateCode (iStateCode), _countryCode (iCountryCode), _regionCode (iRegionCode), _continentCode (iContinentCode), _timeZoneGroup (iTimeZoneGroup), _longitude (iLongitude), _latitude (iLatitude), _nameList (iNameList), - _percentage (iPercentage), _editDistance (iEditDistance) { + _percentage (iPercentage), _editDistance (iEditDistance), + _allowableEditDistance (iAllowableEditDistance) { } /** Default Constructor. */ @@ -298,6 +312,10 @@ /** Allowed edit error/distance. */ NbOfErrors_T _editDistance; + /** Maximum allowable edit distance/error, with which the matching + has been made. */ + NbOfErrors_T _allowableEditDistance; + /** List of extra matching (similar) locations. */ LocationList_T _extraLocationList; Added: trunk/opentrep/opentrep/bom/Levenshtein.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Levenshtein.cpp (rev 0) +++ trunk/opentrep/opentrep/bom/Levenshtein.cpp 2009-08-10 16:23:33 UTC (rev 173) @@ -0,0 +1,111 @@ +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <string> +#include <vector> +// OpenTREP +#include <opentrep/bom/Levenshtein.hpp> + +namespace OPENTREP { + + // ////////////////////////////////////////////////////////////////// + int Levenshtein::getDistance (const std::string& iSource, + const std::string& iTarget) { + + // Step 1 + + const int n = iSource.length(); + const int m = iTarget.length(); + + if (n == 0) { + return m; + } + + if (m == 0) { + return n; + } + + // Definition of Matrix Type + typedef std::vector<std::vector<int> > Matrix_T; + + Matrix_T matrix (n+1); + + // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't + // allow for allocation on declaration of 2.nd dimension of vec of vec + + for (int i = 0; i <= n; i++) { + matrix[i].resize(m+1); + } + + // Step 2 + + for (int i = 0; i <= n; i++) { + matrix[i][0]=i; + } + + for (int j = 0; j <= m; j++) { + matrix[0][j]=j; + } + + // Step 3 + + for (int i = 1; i <= n; i++) { + + const char s_i = iSource[i-1]; + + // Step 4 + + for (int j = 1; j <= m; j++) { + + const char t_j = iTarget[j-1]; + + // Step 5 + + int cost; + if (s_i == t_j) { + cost = 0; + + } else { + cost = 1; + } + + // Step 6 + + const int above = matrix[i-1][j]; + const int left = matrix[i][j-1]; + const int diag = matrix[i-1][j-1]; + int cell = std::min ( above + 1, std::min (left + 1, diag + cost)); + + // Step 6A: Cover transposition, in addition to deletion, + // insertion and substitution. This step is taken from: + // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's + // Enhanced Dynamic Programming ASM Algorithm" + // (http://www.acm.org/~hlb/publications/asm/asm.html) + + if (i>2 && j>2) { + int trans = matrix[i-2][j-2] + 1; + + if (iSource[i-2] != t_j) { + trans++; + } + + if (s_i != iTarget[j-2]) { + trans++; + } + + if (cell > trans) { + cell = trans; + } + } + + matrix[i][j] = cell; + } + } + + // Step 7 + + return matrix[n][m]; + } + +} Added: trunk/opentrep/opentrep/bom/Levenshtein.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Levenshtein.hpp (rev 0) +++ trunk/opentrep/opentrep/bom/Levenshtein.hpp 2009-08-10 16:23:33 UTC (rev 173) @@ -0,0 +1,28 @@ +// +// Levenshtein Distance Algorithm: C++ Implementation by Anders Sewerin Johansen +// +#ifndef __OPENTREP_BOM_LEVENSHTEIN_HPP +#define __OPENTREP_BOM_LEVENSHTEIN_HPP + +// ////////////////////////////////////////////////////////////////////// +// Import section +// ////////////////////////////////////////////////////////////////////// +// STL +#include <string> +// OpenTREP +#include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/BomAbstract.hpp> + +namespace OPENTREP { + + /** Class aggregating utilities around the Levenshtein edit + distance/error. */ + class Levenshtein : public BomAbstract { + public: + /** Calculate the edit distance between two strings. */ + static int getDistance (const std::string& iSource, + const std::string& iTarget); + }; + +} +#endif // __OPENTREP_BOM_LEVENSHTEIN_HPP Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-08-10 16:23:33 UTC (rev 173) @@ -23,7 +23,8 @@ _timeZoneGroup (iPlace._timeZoneGroup), _longitude (iPlace._longitude), _latitude (iPlace._latitude), _nameMatrix (iPlace._nameMatrix), _docID (iPlace._docID), _percentage (iPlace._percentage), - _editDistance (iPlace._editDistance) { + _editDistance (iPlace._editDistance), + _allowableEditDistance (iPlace._allowableEditDistance) { } // ////////////////////////////////////////////////////////////////////// @@ -80,7 +81,8 @@ << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude << ", " << _docID << ", " << _percentage - << ", " << _editDistance << ". "; + << ", " << _editDistance << ", " << _allowableEditDistance + << ". "; for (NameMatrix_T::const_iterator itNameList = _nameMatrix.begin(); itNameList != _nameMatrix.end(); ++itNameList) { @@ -136,7 +138,7 @@ << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude << ", " << _docID << ", " << _percentage - << ", " << _editDistance; + << ", " << _editDistance << ", " << _allowableEditDistance; NameMatrix_T::const_iterator itNameHolder = _nameMatrix.begin(); if (itNameHolder != _nameMatrix.end()) { @@ -189,6 +191,7 @@ << ", docID = " << _docID << ", percentage = " << _percentage << "%" << ", edit distance = " << _editDistance + << ", allowable edit distance = " << _allowableEditDistance << std::endl; return oStr.str(); } @@ -262,7 +265,7 @@ Location oLocation (_placeCode, lCityCode, _stateCode, _countryCode, _regionCode, _continentCode, _timeZoneGroup, _longitude, _latitude, lNameList, - _percentage, _editDistance); + _percentage, _editDistance, _allowableEditDistance); // Add extra matching locations, whenever they exist if (_extraPlaceList.empty() == false) { Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-08-10 16:23:33 UTC (rev 173) @@ -91,6 +91,12 @@ return _editDistance; } + /** Get the maximal allowable edit distance/error, with which the + matching has been made. */ + const NbOfErrors_T& getAllowableEditDistance () const { + return _allowableEditDistance; + } + /** Get the map of name lists. */ const NameMatrix_T& getNameMatrix () const { return _nameMatrix; @@ -176,6 +182,12 @@ _editDistance = iEditDistance; } + /** Set the maxiaml allowable edit distance/error, with which the + matching has been made. */ + void setAllowableEditDistance (const NbOfErrors_T& iAllowableEditDistance) { + _allowableEditDistance = iAllowableEditDistance; + } + public: // ////////// Setters in underlying names //////// @@ -277,6 +289,10 @@ /** Allowed edit error/distance. */ NbOfErrors_T _editDistance; + /** Maximum allowable edit distance/error, with which the matching + has been made. */ + NbOfErrors_T _allowableEditDistance; + /** List of extra matching (similar) places. */ PlaceOrderedList_T _extraPlaceList; Modified: trunk/opentrep/opentrep/bom/ResultHolder.cpp =================================================================== --- trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/bom/ResultHolder.cpp 2009-08-10 16:23:33 UTC (rev 173) @@ -69,95 +69,6 @@ } // ////////////////////////////////////////////////////////////////////// - std::string ResultHolder::searchString (Xapian::MSet& ioMatchingSet, - TravelQuery_T& ioPartialQueryString, - Document& ioMatchingDocument) { - std::string oMatchedString; - - // Catch any Xapian::Error exceptions thrown - try { - - /** - The query string must first be checked, without allowing any - spelling errors, but by removing the furthest right word at - every step. - <br>If no match is found, the maximal allowable edit - distance/error becomes 1, and the process (trying to match - the whole sentence, then by removing the furthest right word, - etc.) is re-performed. - <br>If no match is found, the maximal allowable edit - distance/error becomes 2. - <br>And so on until the maximum of the edit distance/error - becomes greater than the maximal allowable distance/error. - reached. - - <br>NOTE: that feature is de-activated, as it seems it does - not bring any added value. To re-activate it, just initialise - the lMaxEditDistance to 0, instead of to the positive infinite. - */ - // NbOfErrors_T lMaxEditDistance = 0; - NbOfErrors_T lMaxEditDistance = std::numeric_limits<NbOfErrors_T>::max(); - - bool hasReachedMaximalAllowableEditDistance = false; - bool shouldStop = false; - while (shouldStop == false) { - - // DEBUG - OPENTREP_LOG_DEBUG ("--------"); - OPENTREP_LOG_DEBUG ("Current query string: `" - << ioPartialQueryString - << "', with a maximal edit distance of " - << lMaxEditDistance << "."); - - // Retrieve the list of Xapian documents matching the query string - NbOfErrors_T lCalculatedEditDistance = 0; - oMatchedString = - StringMatcher::searchString(ioMatchingSet, ioPartialQueryString, - lCalculatedEditDistance, lMaxEditDistance, - hasReachedMaximalAllowableEditDistance, - _database); - - // DEBUG - OPENTREP_LOG_DEBUG ("---- Current query string: `" - << ioPartialQueryString << "' --- Kept query: `" - << oMatchedString - << "', with an edit distance of a maximum of " - << lCalculatedEditDistance << " (over " - << lMaxEditDistance << "), for " - << ioMatchingSet.size() << " matches."); - - if (ioMatchingSet.empty() == false) { - // Store the calculated (and applied) edit distance/erro - ioMatchingDocument.setEditDistance (lCalculatedEditDistance); - - // Since a result has been found, the search can be stopped - // for that part of the query. - shouldStop = true; - break; - } - - // Allow for one more spelling error - ++lMaxEditDistance; - - /** - Stop when it is no longer necessary to increase the maximal - allowable edit distance, as it is already greater than the - maximum of the calculated edit distance. - */ - if (hasReachedMaximalAllowableEditDistance == true) { - shouldStop = true; - } - } - - } catch (const Xapian::Error& error) { - OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); - throw XapianException(); - } - - return oMatchedString; - } - - // ////////////////////////////////////////////////////////////////////// std::string ResultHolder::searchString (TravelQuery_T& ioPartialQueryString, Document& ioMatchingDocument) { std::string oMatchedString; @@ -186,8 +97,12 @@ << lPartialQueryString << "'"); Xapian::MSet lMatchingSet; - oMatchedString = searchString (lMatchingSet, lPartialQueryString, - ioMatchingDocument); + NbOfErrors_T lEditDistance; + NbOfErrors_T lAllowableEditDistance; + oMatchedString = + StringMatcher::searchString (lMatchingSet, lPartialQueryString, + ioMatchingDocument, lEditDistance, + lAllowableEditDistance, _database); if (oMatchedString.empty() == false) { // Create the corresponding document (from the Xapian MSet object) @@ -195,6 +110,11 @@ extractBestMatchingDocumentFromMSet (lMatchingSet, ioMatchingDocument); + // Note: the allowable edit distance/error, as well as the + // effective (Levenshtein) edit distance/error, have been + // set, in the Document object, by the above call to the + // searchString() method. + // Since a result has been found, the search can be stopped // for that part of the query. ioPartialQueryString = lPartialQueryString; @@ -277,12 +197,18 @@ ioDocumentList.push_back (lMatchingDocument); // DEBUG - const NbOfMatches_T lNbOfMatches = + const NbOfMatches_T& lNbOfMatches = lMatchingDocument.notifyIfExtraMatch(); + const NbOfErrors_T& lEditDistance = + lMatchingDocument.getEditDistance(); + const NbOfErrors_T& lAllowableEditDistance = + lMatchingDocument.getAllowableEditDistance(); OPENTREP_LOG_DEBUG ("==> " << lNbOfMatches << " main matches for the query string: `" - << lMatchedString << "' (from `" - << lQueryString << "')"); + << lMatchedString << "' (from `" << lQueryString + << "' -> Levenshtein edit distance of " + << lEditDistance << " over allowable " + << lAllowableEditDistance << ")"); /** Remove, from the lRemainingQueryString string, the part Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-08-10 16:23:33 UTC (rev 173) @@ -18,23 +18,20 @@ // OpenTREP #include <opentrep/bom/WordHolder.hpp> #include <opentrep/bom/StringMatcher.hpp> +#include <opentrep/bom/Levenshtein.hpp> #include <opentrep/service/Logger.hpp> namespace OPENTREP { - // ///////////// Type definitions ////////// - /** Edit distance (e.g., 2 or 3). */ - typedef unsigned int EditDistance_T; - /** Given the size of the phrase, determine the allowed edit distance for spelling purpose. For instance, an edit distance of 1 will be allowed on a 4-letter word, while an edit distance of 3 will be allowed on an 11-letter word. */ // ////////////////////////////////////////////////////////////////////// static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) { - EditDistance_T oEditDistance = 2; + NbOfErrors_T oEditDistance = 2; - const EditDistance_T lQueryStringSize = iPhrase.size(); + const NbOfErrors_T lQueryStringSize = iPhrase.size(); oEditDistance = lQueryStringSize / 4; return oEditDistance; @@ -46,11 +43,17 @@ // ////////////////////////////////////////////////////////////////////// static void createCorrectedWordList (const WordList_T& iOriginalWordList, WordList_T& ioCorrectedWordList, + NbOfErrors_T& ioEditDistance, + NbOfErrors_T& ioAllowableEditDistance, const Xapian::Database& iDatabase) { // Empty the target list ioCorrectedWordList.clear(); - + + // Re-set the edit distances/errors + ioEditDistance = 0; + ioAllowableEditDistance = 0; + // Catch any Xapian::Error exceptions thrown try { @@ -59,9 +62,13 @@ const std::string& lOriginalWord = *itWord; // Calculate the distance, depending on the length of the word - const EditDistance_T lCalculatedEditDistance = + const NbOfErrors_T lCalculatedEditDistance = calculateEditDistance (lOriginalWord); + // The allowable edit distance/error is considered to be the + // cumulated allowable edit distance/error over all the words + ioAllowableEditDistance += lCalculatedEditDistance; + // Get a spelling suggestion for that word const std::string& lSuggestedWord = iDatabase.get_spelling_suggestion (lOriginalWord, @@ -69,9 +76,16 @@ if (lSuggestedWord.empty() == true) { ioCorrectedWordList.push_back (lOriginalWord); - + // The edit distance is not modified (as the spelling was correct) + } else { ioCorrectedWordList.push_back (lSuggestedWord); + + // The edit distance/error increases from the Levenshtein + // edit distance/error + const NbOfErrors_T& lLevenshteinDistance = + Levenshtein::getDistance (lOriginalWord, lSuggestedWord); + ioAllowableEditDistance += lLevenshteinDistance; } // DEBUG @@ -114,7 +128,6 @@ // /////////////////////////////////////////////////////////////////// void checkAndAlterIfNeeded (TravelQuery_T& ioSuggestedString, const TravelQuery_T& iOriginalString, - NbOfErrors_T& ioCalculatedEditDistance, const NbOfErrors_T& iMaxEditDistance, const Xapian::Database& iDatabase) { @@ -129,15 +142,13 @@ Get a spell-corrected suggestion for the reduced original string. <br>Limit the edit distance to the given maximal one. */ - ioCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); + NbOfErrors_T lEditDistance = calculateEditDistance (lOriginalStringCopy); - ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, - iMaxEditDistance); + lEditDistance = std::min (lEditDistance, iMaxEditDistance); std::string lSuggestionForReducedOriginalString = - iDatabase.get_spelling_suggestion (lOriginalStringCopy, - ioCalculatedEditDistance); - + iDatabase.get_spelling_suggestion (lOriginalStringCopy, lEditDistance); + /** Note that if the suggestion on the reduced-original string is empty, it normally means that the reduced-original string is @@ -153,9 +164,9 @@ // DEBUG OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString - << "', with an edit distance/error of " - << ioCalculatedEditDistance - << " over " << iMaxEditDistance << " allowable" + << "', with an allowable edit distance/error of " + << lEditDistance + << " over a maximum of " << iMaxEditDistance << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy << "') -> discarded."); @@ -174,7 +185,7 @@ } /** - Store a copy of the suggested string, as it will me altered by + Store a copy of the suggested string, as it will be altered by the below method. */ lOriginalStringCopy = iOriginalString; @@ -184,14 +195,13 @@ Get a spell-corrected suggestion for the reduced original string. <br>Limit the edit distance to the given maximal one. */ - ioCalculatedEditDistance = calculateEditDistance (lOriginalStringCopy); + lEditDistance = calculateEditDistance (lOriginalStringCopy); - ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, - iMaxEditDistance); + lEditDistance = std::min (lEditDistance, iMaxEditDistance); lSuggestionForReducedOriginalString = iDatabase.get_spelling_suggestion (lOriginalStringCopy, - ioCalculatedEditDistance); + lEditDistance); /** Note that if the suggestion on the reduced-original string is @@ -208,9 +218,9 @@ // DEBUG OPENTREP_LOG_DEBUG ("The suggestion (`" << ioSuggestedString << "') for `" << iOriginalString - << "', with an edit distance/error of " - << ioCalculatedEditDistance - << " over " << iMaxEditDistance << " allowable" + << "', with an allowable edit distance/error of " + << lEditDistance + << " over a maximum of " << iMaxEditDistance << ", is the same as the suggestion for the reduced " << "original string (`" << lOriginalStringCopy << "') -> discarded."); @@ -225,15 +235,16 @@ } // /////////////////////////////////////////////////////////////////// - std::string StringMatcher:: - searchString (Xapian::MSet& ioMatchingSet, - const TravelQuery_T& iSearchString, - NbOfErrors_T& ioCalculatedEditDistance, - NbOfErrors_T& ioMaxEditDistance, - bool& ioHasReachedMaximalAllowableEditDistance, - const Xapian::Database& iDatabase) { - NbOfErrors_T lMaxEditDistance = std::numeric_limits<EditDistance_T>::min(); + std::string searchStringIter (Xapian::MSet& ioMatchingSet, + const TravelQuery_T& iSearchString, + NbOfErrors_T& ioEditDistance, + NbOfErrors_T& ioAllowableEditDistance, + const NbOfErrors_T& iMaxEditDistance, + bool& ioHasReachedMaximalAllowableEditDistance, + const Xapian::Database& iDatabase) { + NbOfErrors_T lMaxEditDistance = std::numeric_limits<NbOfErrors_T>::min(); + // Initialisation std::string oMatchedString; // Catch any Xapian::Error exceptions thrown @@ -289,6 +300,13 @@ << "'"); */ + // By default, as there can be a match without changing + // anything, the edit distance is null, and the allowable edit + // distance could be anything. It makes sense, though, to set it + // at the maximum. + ioEditDistance = 0; + ioAllowableEditDistance = iMaxEditDistance; + // Start an enquire session Xapian::Enquire enquire (iDatabase); @@ -330,10 +348,10 @@ of the calculated edit distance, it becomes useless to go on increasing the maximal allowable edit distance. */ - if (lMaxEditDistance <= ioMaxEditDistance) { + if (lMaxEditDistance <= iMaxEditDistance) { ioHasReachedMaximalAllowableEditDistance = true; } - + oMatchedString = lOriginalQueryString; return oMatchedString; } @@ -350,6 +368,7 @@ */ WordList_T lCorrectedWordList; createCorrectedWordList (lOriginalWordList, lCorrectedWordList, + ioEditDistance, ioAllowableEditDistance, iDatabase); const std::string lCorrectedQueryString = @@ -394,7 +413,7 @@ of the calculated edit distance, it becomes useless to go on increasing the maximal allowable edit distance. */ - if (lMaxEditDistance <= ioMaxEditDistance) { + if (lMaxEditDistance <= iMaxEditDistance) { ioHasReachedMaximalAllowableEditDistance = true; } @@ -408,27 +427,40 @@ phrase/string. With the above example, 'sna francisco' yields the suggestion 'san francisco'. */ - ioCalculatedEditDistance = calculateEditDistance (lOriginalQueryString); + ioEditDistance = calculateEditDistance (lOriginalQueryString); // Store the greatest edit distance/error - lMaxEditDistance = std::max (lMaxEditDistance, ioCalculatedEditDistance); + lMaxEditDistance = std::max (lMaxEditDistance, ioEditDistance); // Limit the edit distance to the given maximal one - ioCalculatedEditDistance = std::min (ioCalculatedEditDistance, - ioMaxEditDistance); + ioEditDistance = std::min (ioEditDistance, iMaxEditDistance); + + // Store the allowable edit distance/error + ioAllowableEditDistance = ioEditDistance; + // Let Xapian find a spelling correction (if any) std::string lFullWordCorrectedString = iDatabase.get_spelling_suggestion (lOriginalQueryString, - ioCalculatedEditDistance); + ioEditDistance); /** Check that the suggestion does not encompass extra words, which - will be otherwise/rather recognised in another step. + will be otherwise recognised in another step. + <br>See the comment of the checkAndAlterIfNeeded() function + for more details. */ checkAndAlterIfNeeded (lFullWordCorrectedString, lOriginalQueryString, - ioCalculatedEditDistance, ioMaxEditDistance, - iDatabase); + iMaxEditDistance, iDatabase); + + if (lFullWordCorrectedString.empty() == true) { + ioEditDistance = 0; + } else { + // Store the effective (Levenshtein) edit distance/error + ioEditDistance = Levenshtein::getDistance (lOriginalQueryString, + lFullWordCorrectedString); + } + /** Since there is still no match, we search on the string corrected as a whole. @@ -452,8 +484,12 @@ /* OPENTREP_LOG_DEBUG ("Query corrected as a full sentence `" << lFullWordCorrectedString - << "' with an allowable maximal edit distance of " - << ioMaxEditDistance + << "' with a Levenshtein edit distance of " + << ioEditDistance + << " over an allowable edit distance of " + << ioAllowableEditDistance + << " over a maximum of " + << iMaxEditDistance << " on a potential of " << lMaxEditDistance << ", i.e., `"<< lFullQueryCorrected.get_description() << "' => " << nbMatches @@ -480,7 +516,7 @@ of the calculated edit distance, it becomes useless to go on increasing the maximal allowable edit distance. */ - if (ioMaxEditDistance >= lMaxEditDistance) { + if (iMaxEditDistance >= lMaxEditDistance) { ioHasReachedMaximalAllowableEditDistance = true; } @@ -488,6 +524,104 @@ } // ////////////////////////////////////////////////////////////////////// + std::string StringMatcher:: + searchString (Xapian::MSet& ioMatchingSet, + const TravelQuery_T& iPartialQueryString, + Document& ioMatchingDocument, + NbOfErrors_T& ioEditDistance, + NbOfErrors_T& ioAllowableEditDistance, + const Xapian::Database& iDatabase) { + std::string oMatchedString; + + // Catch any Xapian::Error exceptions thrown + try { + + /** + The query string must first be checked, without allowing any + spelling errors, but by removing the furthest right word at + every step. + <br>If no match is found, the maximal allowable edit + distance/error becomes 1, and the process (trying to match + the whole sentence, then by removing the furthest right word, + etc.) is re-performed. + <br>If no match is found, the maximal allowable edit + distance/error becomes 2. + <br>And so on until the maximum of the edit distance/error + becomes greater than the maximal allowable distance/error. + reached. + + <br>NOTE: that feature is de-activated, as it seems it does + not bring any added value. To re-activate it, just initialise + the lMaxEditDistance to 0, instead of to the positive infinite. + */ + // NbOfErrors_T lMaxEditDistance = 0; + NbOfErrors_T lMaxEditDistance = std::numeric_limits<NbOfErrors_T>::max(); + + bool hasReachedMaximalAllowableEditDistance = false; + bool shouldStop = false; + while (shouldStop == false) { + + // DEBUG + OPENTREP_LOG_DEBUG ("--------"); + OPENTREP_LOG_DEBUG ("Current query string: `" + << iPartialQueryString + << "', with a maximal edit distance of " + << lMaxEditDistance << "."); + + // Retrieve the list of Xapian documents matching the query string + NbOfErrors_T lEditDistance; + NbOfErrors_T lAllowableEditDistance; + oMatchedString = + searchStringIter (ioMatchingSet, iPartialQueryString, lEditDistance, + lAllowableEditDistance, lMaxEditDistance, + hasReachedMaximalAllowableEditDistance, iDatabase); + + // DEBUG + OPENTREP_LOG_DEBUG ("---- Current query string: `" + << iPartialQueryString << "' --- Kept query: `" + << oMatchedString + << "', with a Levenshtein edit distance of " + << lEditDistance + << " over an allowable edit distance of " + << lAllowableEditDistance << " (over a maximum of " + << lMaxEditDistance << "), for " + << ioMatchingSet.size() << " matches."); + + if (ioMatchingSet.empty() == false) { + // Store the effective (Levenshtein) edit distance/error + ioMatchingDocument.setEditDistance (lEditDistance); + + // Store the allowable edit distance/error + ioMatchingDocument.setAllowableEditDistance (lAllowableEditDistance); + + // Since a result has been found, the search can be stopped + // for that part of the query. + shouldStop = true; + break; + } + + // Allow for one more spelling error + ++lMaxEditDistance; + + /** + Stop when it is no longer necessary to increase the maximal + allowable edit distance, as it is already greater than the + maximum of the calculated edit distance. + */ + if (hasReachedMaximalAllowableEditDistance == true) { + shouldStop = true; + } + } + + } catch (const Xapian::Error& error) { + OPENTREP_LOG_ERROR ("Exception: " << error.get_msg()); + throw XapianException(); + } + + return oMatchedString; + } + + // ////////////////////////////////////////////////////////////////////// void StringMatcher:: extractBestMatchingDocumentFromMSet (const Xapian::MSet& iMatchingSet, Document& ioMatchingDocument) { Modified: trunk/opentrep/opentrep/bom/StringMatcher.hpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/bom/StringMatcher.hpp 2009-08-10 16:23:33 UTC (rev 173) @@ -29,9 +29,13 @@ /** Search, within the Xapian database, for occurrences of the words of the search string. @param Xapian::MSet& The Xapian matching set. It can be empty. - @param const std::string& The query string. - @param NbOfErrors_T& The calculated (and applied) edit distance/error. - @param NbOfErrors_T& The maximal allowable edit distance/error. + @param const TravelQuery_T& The query string. + @param NbOfErrors_T& The Levenshtein edit distance/error. + @param NbOfErrors_T& The effective allowable edit distance/error, + as calculated as a function of the number of letters. + @param NbOfErrors_T& The maximal allowable edit distance/error for + that step/call. We always have: + ioEditDistance <= ioAllowableEditDistance <= iMaxEditDistance @param bool& Whether or not the maximal allowable edit distance/error has become greater than the maximum of the edit distance/errors calculated on the phrase. @@ -39,12 +43,12 @@ @return std::string The query string, potentially corrected, which has yielded matches. */ static std::string searchString (Xapian::MSet&, - const std::string& iSearchString, - NbOfErrors_T& ioCalculatedEditDistance, - NbOfErrors_T& ioMaxEditDistance, - bool& ioHasReachedMaximalAllowableEditDistance, + const TravelQuery_T& iQueryString, + Document& ioMatchingDocument, + NbOfErrors_T& ioEditDistance, + NbOfErrors_T& ioAllowableEditDistance, const Xapian::Database&); - + /** Extract the best matching Xapian document. <br>If there are several such best matching documents (for instance, several at, say, 100%), one is taken randomly. Well, Modified: trunk/opentrep/opentrep/bom/sources.mk =================================================================== --- trunk/opentrep/opentrep/bom/sources.mk 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/bom/sources.mk 2009-08-10 16:23:33 UTC (rev 173) @@ -13,6 +13,7 @@ $(top_srcdir)/opentrep/bom/Result.hpp \ $(top_srcdir)/opentrep/bom/ResultList.hpp \ $(top_srcdir)/opentrep/bom/ResultHolder.hpp \ + $(top_srcdir)/opentrep/bom/Levenshtein.hpp \ $(top_srcdir)/opentrep/bom/StringMatcher.hpp bom_cc_sources = $(top_srcdir)/opentrep/bom/BomAbstract.cpp \ $(top_srcdir)/opentrep/bom/BomType.cpp \ @@ -25,4 +26,5 @@ $(top_srcdir)/opentrep/bom/Document.cpp \ $(top_srcdir)/opentrep/bom/Result.cpp \ $(top_srcdir)/opentrep/bom/ResultHolder.cpp \ + $(top_srcdir)/opentrep/bom/Levenshtein.cpp \ $(top_srcdir)/opentrep/bom/StringMatcher.cpp Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-08-10 16:23:33 UTC (rev 173) @@ -143,6 +143,15 @@ bool hasRetrievedPlace = retrieveAndFillPlace (lDocument, ioSociSession, lPlace); + // Retrieve the effective (Levenshtein) edit distance/error, as + // well as the allowable edit distance/error, and store them in + // the Place object. + const NbOfErrors_T& lEditDistance = lDocument.getEditDistance(); + const NbOfErrors_T& lAllowableEditDistance = + lDocument.getAllowableEditDistance(); + lPlace.setEditDistance (lEditDistance); + lPlace.setAllowableEditDistance (lAllowableEditDistance); + // If there was no place corresponding to the place code with // the SQL database, an exception is thrown. Hence, here, by // construction, the place has been retrieved from the SQL @@ -181,6 +190,12 @@ // Same remark as above assert (hasRetrievedPlace == true); + // The extra matching Place object has the very same effective + // (Levenshtein) and allowable edit distances/errors as the + // main Place object. + lExtraPlace.setEditDistance (lEditDistance); + lExtraPlace.setAllowableEditDistance (lAllowableEditDistance); + // Insert the extra matching Place object within the dedicated // list within the main Place object FacPlace::initLinkWithExtraPlace (lPlace, lExtraPlace); @@ -214,6 +229,12 @@ // Same remark as above assert (hasRetrievedPlace == true); + // The extra matching Place object has the very same effective + // (Levenshtein) and allowable edit distances/errors as the + // main Place object. + lAlterPlace.setEditDistance (lEditDistance); + lAlterPlace.setAllowableEditDistance (lAllowableEditDistance); + // Insert the alternate matching Place object within the dedicated // list within the main Place object FacPlace::initLinkWithAlternatePlace (lPlace, lAlterPlace); Modified: trunk/opentrep/opentrep/python/pyopentrep.cpp =================================================================== --- trunk/opentrep/opentrep/python/pyopentrep.cpp 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/python/pyopentrep.cpp 2009-08-10 16:23:33 UTC (rev 173) @@ -17,15 +17,30 @@ struct OpenTrepSearcher { public: - + /** Wrapper around the search use case. */ std::string search (const std::string& iTravelQuery) { - std::ostringstream oStr; + const bool areFullDetailsRequired = false; + return searchImpl (iTravelQuery, areFullDetailsRequired); + } + /** Wrapper around the search use case. */ + std::string searchWithFullDetails (const std::string& iTravelQuery) { + const bool areFullDetailsRequired = true; + return searchImpl (iTravelQuery, areFullDetailsRequired); + } + + private: + /** Wrapper around the search use case. */ + std::string searchImpl (const std::string& iTravelQuery, + const bool areFullDetailsRequired) { + std::ostringstream oNoDetailedStr; + std::ostringstream oDetailedStr; + // Sanity check if (_logOutputStream == NULL) { - oStr << "The log filepath is not valid." << std::endl; - return oStr.str(); + oNoDetailedStr << "The log filepath is not valid." << std::endl; + return oNoDetailedStr.str(); } assert (_logOutputStream != NULL); @@ -36,11 +51,13 @@ << std::endl; if (_opentrepService == NULL) { - oStr << "The OpenTREP service has not been initialised, i.e., " - << "the init() method has not been called correctly on the " - << "OpenTrepSearcher object. Please check that all the " - << "parameters are not empty and point to actual files."; - return oStr.str(); + oNoDetailedStr << "The OpenTREP service has not been initialised, " + << "i.e., the init() method has not been called " + << "correctly on the OpenTrepSearcher object. Please " + << "check that all the parameters are not empty and " + << "point to actual files."; + *_logOutputStream << oNoDetailedStr.str(); + return oNoDetailedStr.str(); } assert (_opentrepService != NULL); @@ -57,59 +74,79 @@ if (nbOfMatches != 0) { NbOfMatches_T idx = 0; + for(LocationList_T::const_iterator itLocation = lLocationList.begin(); itLocation != lLocationList.end(); ++itLocation, ++idx) { const Location& lLocation = *itLocation; + if (idx != 0) { - oStr << ","; + oNoDetailedStr << ","; } - oStr << lLocation.getLocationCode(); + + oNoDetailedStr << lLocation.getLocationCode(); + oDetailedStr << idx+1 << ". " << lLocation.toShortString() + << std::endl; // List of extra matching locations (those with the same // matching weight/percentage) const LocationList_T& lExtraLocationList = lLocation.getExtraLocationList(); if (lExtraLocationList.empty() == false) { + oDetailedStr << " Extra matches: " << std::endl; + + NbOfMatches_T idxExtra = 0; for (LocationList_T::const_iterator itLoc = lExtraLocationList.begin(); - itLoc != lExtraLocationList.end(); ++itLoc) { - oStr << ":"; + itLoc != lExtraLocationList.end(); ++itLoc, ++idxExtra) { + oNoDetailedStr << ":"; + oDetailedStr << " " << idx+1 << "." << idxExtra+1 << ". "; + const Location& lExtraLocation = *itLoc; - oStr << lExtraLocation.getLocationCode(); + oNoDetailedStr << lExtraLocation.getLocationCode(); + oDetailedStr << lExtraLocation << std::endl; } } // The matching weight/percentage is the same for the main // and the extra matching locations - oStr << "/" << lLocation.getPercentage(); + oNoDetailedStr << "/" << lLocation.getPercentage(); // List of alternate matching locations (those with a lower // matching weight/percentage) const LocationList_T& lAlternateLocationList = lLocation.getAlternateLocationList(); if (lAlternateLocationList.empty() == false) { + oDetailedStr << " Alternate matches: " << std::endl; + + NbOfMatches_T idxAlter = 0; for (LocationList_T::const_iterator itLoc = lAlternateLocationList.begin(); - itLoc != lAlternateLocationList.end(); ++itLoc) { - oStr << "-"; + itLoc != lAlternateLocationList.end(); ++itLoc, ++idxAlter) { + oNoDetailedStr << "-"; + oDetailedStr << " " << idx+1 << "." << idxAlter+1 << ". "; + const Location& lAlternateLocation = *itLoc; - oStr << lAlternateLocation.getLocationCode() - << "/" << lAlternateLocation.getPercentage(); + oNoDetailedStr << lAlternateLocation.getLocationCode() + << "/" << lAlternateLocation.getPercentage(); + oDetailedStr << lAlternateLocation << std::endl; } } } } if (lNonMatchedWordList.empty() == false) { - oStr << ";"; + oNoDetailedStr << ";"; + oDetailedStr << "Not recognised words:" << std::endl; NbOfMatches_T idx = 0; for (WordList_T::const_iterator itWord = lNonMatchedWordList.begin(); itWord != lNonMatchedWordList.end(); ++itWord, ++idx) { const Word_T& lWord = *itWord; if (idx != 0) { - oStr << ","; + oNoDetailedStr << ","; + oDetailedStr << idx+1 << "." << std::endl; } - oStr << lWord; + oNoDetailedStr << lWord; + oDetailedStr << lWord; } } @@ -118,7 +155,10 @@ << "' yielded:" << std::endl; // DEBUG - *_logOutputStream << oStr.str() << std::endl; + *_logOutputStream << "Short version: " + << oNoDetailedStr.str() << std::endl; + *_logOutputStream << "Long version: " + << oDetailedStr.str() << std::endl; } catch (const RootException& eOpenTrepError) { *_logOutputStream << "OpenTrep error: " << eOpenTrepError.what() @@ -130,8 +170,14 @@ } catch (...) { *_logOutputStream << "Unknown error" << std::endl; } - - return oStr.str(); + + // Return the string corresponding to the request (either with + // or without details). + if (areFullDetailsRequired == true) { + return oDetailedStr.str(); + } else { + return oNoDetailedStr.str(); + } } public: @@ -213,5 +259,6 @@ BOOST_PYTHON_MODULE(libpyopentrep) { boost::python::class_<OPENTREP::OpenTrepSearcher> ("OpenTrepSearcher") .def ("search", &OPENTREP::OpenTrepSearcher::search) + .def ("searchWithFullDetails", &OPENTREP::OpenTrepSearcher::searchWithFullDetails) .def ("init", &OPENTREP::OpenTrepSearcher::init); } Modified: trunk/opentrep/opentrep/python/pyopentrep.py =================================================================== --- trunk/opentrep/opentrep/python/pyopentrep.py 2009-08-10 12:17:27 UTC (rev 172) +++ trunk/opentrep/opentrep/python/pyopentrep.py 2009-08-10 16:23:33 UTC (rev 173) @@ -13,13 +13,14 @@ # If no search string was supplied as arguments of the command-line, # ask the user for some -searchString = sys.argv[1:] +searchString = ' '.join(sys.argv[1:]) if searchString == '': # Ask for the user input searchString = raw_input('Enter a search string, or just Enter for the default one (' + defaultSearchString + '): ') if searchString == '' : searchString = defaultSearchString # Call the OpenTrep C++ library -result = openTrepLibrary.search(searchString) +#result = openTrepLibrary.search(searchString) +result = openTrepLibrary.searchWithFullDetails(searchString) print 'Result:' print result This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-08-24 20:05:52
|
Revision: 187 http://opentrep.svn.sourceforge.net/opentrep/?rev=187&view=rev Author: denis_arnaud Date: 2009-08-24 20:05:41 +0000 (Mon, 24 Aug 2009) Log Message: ----------- [Dev] 1. Fixed a bug in the edit distance/error storage. 2. The Location structure now gets the original and corrected keywords. Modified Paths: -------------- trunk/opentrep/opentrep/Location.hpp trunk/opentrep/opentrep/bom/Document.hpp trunk/opentrep/opentrep/bom/Place.cpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/bom/StringMatcher.cpp trunk/opentrep/opentrep/command/RequestInterpreter.cpp trunk/opentrep/opentrep/python/pyopentrep.py Modified: trunk/opentrep/opentrep/Location.hpp =================================================================== --- trunk/opentrep/opentrep/Location.hpp 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/Location.hpp 2009-08-24 20:05:41 UTC (rev 187) @@ -74,6 +74,16 @@ return _nameList; } + /** Get the original keywords. */ + std::string getOriginalKeywords() const { + return _originalKeywords; + } + + /** Get the corrected keywords. */ + std::string getCorrectedKeywords() const { + return _correctedKeywords; + } + /** Get the matching percentage. */ const MatchingPercentage_T& getPercentage() const { return _percentage; @@ -152,6 +162,16 @@ _nameList = iNameList; } + /** Set the original keywords. */ + void setOriginalKeywords (const std::string& iOriginalKeywords) { + _originalKeywords = iOriginalKeywords; + } + + /** Set the corrected keywords. */ + void setCorrectedKeywords (const std::string& iCorrectedKeywords) { + _correctedKeywords = iCorrectedKeywords; + } + /** Set the Xapian matching percentage. */ void setPercentage (const MatchingPercentage_T& iPercentage) { _percentage = iPercentage; @@ -198,15 +218,18 @@ oStr << _locationCode << ", " << _cityCode << ", " << _stateCode << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup - << ", " << _longitude << ", " << _latitude << ", " << _percentage + << ", " << _longitude << ", " << _latitude + << ", " << _originalKeywords << ", " << _correctedKeywords + << ", " << _percentage << ", " << _editDistance << ", " << _allowableEditDistance; if (_extraLocationList.empty() == false) { - oStr << " " << _extraLocationList.size() << " extra match(es)"; + oStr << " with " << _extraLocationList.size() << " extra match(es)"; } if (_alternateLocationList.empty() == false) { - oStr << " " << _alternateLocationList.size() << " alternate match(es)"; + oStr << " with " << _alternateLocationList.size() + << " alternate match(es)"; } return oStr.str(); @@ -262,6 +285,8 @@ const std::string& iTimeZoneGroup, const double iLongitude, const double iLatitude, const LocationNameList_T& iNameList, + const std::string& iOriginalKeywords, + const std::string& iCorrectedKeywords, const MatchingPercentage_T& iPercentage, const NbOfErrors_T& iEditDistance, const NbOfErrors_T& iAllowableEditDistance) @@ -270,6 +295,8 @@ _regionCode (iRegionCode), _continentCode (iContinentCode), _timeZoneGroup (iTimeZoneGroup), _longitude (iLongitude), _latitude (iLatitude), _nameList (iNameList), + _originalKeywords (iOriginalKeywords), + _correctedKeywords (iCorrectedKeywords), _percentage (iPercentage), _editDistance (iEditDistance), _allowableEditDistance (iAllowableEditDistance) { } @@ -306,6 +333,12 @@ /** List of (American) English names. */ LocationNameList_T _nameList; + /** Original keywords. */ + std::string _originalKeywords; + + /** Original keywords. */ + std::string _correctedKeywords; + /** Matching percentage. */ MatchingPercentage_T _percentage; Modified: trunk/opentrep/opentrep/bom/Document.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Document.hpp 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/bom/Document.hpp 2009-08-24 20:05:41 UTC (rev 187) @@ -34,13 +34,13 @@ public: // ////////////////// Getters //////////////// /** Get the query string. */ - const TravelQuery_T& getTravelQuery() { + const TravelQuery_T& getTravelQuery() const { return _queryString; } /** Get the corrected query string. <br>When empty, it means that no correction was necessary. */ - const TravelQuery_T& getCorrectedTravelQuery() { + const TravelQuery_T& getCorrectedTravelQuery() const { return _correctedQueryString; } Modified: trunk/opentrep/opentrep/bom/Place.cpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.cpp 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/bom/Place.cpp 2009-08-24 20:05:41 UTC (rev 187) @@ -22,6 +22,8 @@ _regionCode (iPlace._regionCode), _continentCode (iPlace._continentCode), _timeZoneGroup (iPlace._timeZoneGroup), _longitude (iPlace._longitude), _latitude (iPlace._latitude), _nameMatrix (iPlace._nameMatrix), + _originalKeywords (iPlace._originalKeywords), + _correctedKeywords (iPlace._correctedKeywords), _docID (iPlace._docID), _percentage (iPlace._percentage), _editDistance (iPlace._editDistance), _allowableEditDistance (iPlace._allowableEditDistance) { @@ -80,6 +82,7 @@ << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude + << ", " << _originalKeywords << ", " << _correctedKeywords << ", " << _docID << ", " << _percentage << ", " << _editDistance << ", " << _allowableEditDistance << ". "; @@ -137,6 +140,7 @@ << ", " << _countryCode << ", " << _regionCode << ", " << _continentCode << ", " << _timeZoneGroup << ", " << _longitude << ", " << _latitude + << ", " << _originalKeywords << ", " << _correctedKeywords << ", " << _docID << ", " << _percentage << ", " << _editDistance << ", " << _allowableEditDistance; @@ -188,6 +192,8 @@ << ", time zone group = " << _timeZoneGroup << ", longitude = " << _longitude << ", latitude = " << _latitude + << ", original keywords = " << _originalKeywords + << ", corrected keywords = " << _correctedKeywords << ", docID = " << _docID << ", percentage = " << _percentage << "%" << ", edit distance = " << _editDistance @@ -265,6 +271,7 @@ Location oLocation (_placeCode, lCityCode, _stateCode, _countryCode, _regionCode, _continentCode, _timeZoneGroup, _longitude, _latitude, lNameList, + _originalKeywords, _correctedKeywords, _percentage, _editDistance, _allowableEditDistance); // Add extra matching locations, whenever they exist Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/bom/Place.hpp 2009-08-24 20:05:41 UTC (rev 187) @@ -76,6 +76,16 @@ return _latitude; } + /** Get the original keywords. */ + std::string getOriginalKeywords() const { + return _originalKeywords; + } + + /** Get the corrected keywords. */ + std::string getCorrectedKeywords() const { + return _correctedKeywords; + } + /** Get the Xapian document ID. */ const XapianDocID_T& getDocID() const { return _docID; @@ -167,6 +177,16 @@ _latitude = iLatitude; } + /** Set the original keywords. */ + void setOriginalKeywords (const std::string& iOriginalKeywords) { + _originalKeywords = iOriginalKeywords; + } + + /** Set the corrected keywords. */ + void setCorrectedKeywords (const std::string& iCorrectedKeywords) { + _correctedKeywords = iCorrectedKeywords; + } + /** Set the Xapian document ID. */ void setDocID (const XapianDocID_T& iDocID) { _docID = iDocID; @@ -280,6 +300,12 @@ /** List of names, for each given language. */ NameMatrix_T _nameMatrix; + /** Original keywords. */ + std::string _originalKeywords; + + /** Original keywords. */ + std::string _correctedKeywords; + /** Xapian document ID. */ XapianDocID_T _docID; Modified: trunk/opentrep/opentrep/bom/StringMatcher.cpp =================================================================== --- trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/bom/StringMatcher.cpp 2009-08-24 20:05:41 UTC (rev 187) @@ -398,7 +398,7 @@ // Display the results nbMatches = ioMatchingSet.size(); - + // DEBUG /* OPENTREP_LOG_DEBUG ("Corrected query `" << lCorrectedQueryString @@ -417,6 +417,10 @@ ioHasReachedMaximalAllowableEditDistance = true; } + // Store the effective (Levenshtein) edit distance/error + ioEditDistance = Levenshtein::getDistance (lOriginalQueryString, + lCorrectedQueryString); + oMatchedString = lCorrectedQueryString; return oMatchedString; } Modified: trunk/opentrep/opentrep/command/RequestInterpreter.cpp =================================================================== --- trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/command/RequestInterpreter.cpp 2009-08-24 20:05:41 UTC (rev 187) @@ -63,11 +63,17 @@ /** Helper function. */ // ////////////////////////////////////////////////////////////////////// - bool retrieveAndFillPlace (const Xapian::Document& iDocument, + bool retrieveAndFillPlace (const std::string& iOriginalKeywords, + const std::string& iCorrectedKeywords, + const Xapian::Document& iDocument, const Xapian::percent& iDocPercentage, soci::session& ioSociSession, Place& ioPlace) { bool hasRetrievedPlace = false; + // Set the original and corrected/suggested keywords + ioPlace.setOriginalKeywords (iOriginalKeywords); + ioPlace.setCorrectedKeywords (iCorrectedKeywords); + // Set the matching percentage ioPlace.setPercentage (iDocPercentage); @@ -111,10 +117,16 @@ // ////////////////////////////////////////////////////////////////////// bool retrieveAndFillPlace (const Document& iDocument, soci::session& ioSociSession, Place& ioPlace) { + // Note that Document::getTravelQuery() returns a TravelQuery_T, which + // is actually a std::string + const std::string& lOriginalKeywords = iDocument.getTravelQuery(); + const std::string& lCorrectedKeywords = iDocument.getCorrectedTravelQuery(); + // Delegate const Xapian::Document& lXapianDocument = iDocument.getXapianDocument(); const Xapian::percent& lDocPercentage = iDocument.getXapianPercentage(); - return retrieveAndFillPlace (lXapianDocument, lDocPercentage, + return retrieveAndFillPlace (lOriginalKeywords, lCorrectedKeywords, + lXapianDocument, lDocPercentage, ioSociSession, ioPlace); } @@ -166,6 +178,9 @@ // Retrieve the list of extra matching documents (documents // matching with the same weight/percentage) + const std::string& lOriginalKeywords = lDocument.getTravelQuery(); + const std::string& lCorrectedKeywords = + lDocument.getCorrectedTravelQuery(); const Xapian::percent& lExtraDocPercentage = lDocument.getXapianPercentage(); const XapianDocumentList_T& lExtraDocumentList = @@ -183,9 +198,10 @@ // Retrieve, in the MySQL database, the place corresponding to // the place code located as the first word of the Xapian // document data. - hasRetrievedPlace = retrieveAndFillPlace (lExtraDocument, - lExtraDocPercentage, - ioSociSession, lExtraPlace); + hasRetrievedPlace = + retrieveAndFillPlace (lOriginalKeywords, lCorrectedKeywords, + lExtraDocument, lExtraDocPercentage, + ioSociSession, lExtraPlace); // Same remark as above assert (hasRetrievedPlace == true); @@ -222,9 +238,10 @@ // Retrieve, in the MySQL database, the place corresponding to // the place code located as the first word of the Xapian // document data. - hasRetrievedPlace = retrieveAndFillPlace (lAlterDocument, - lAlterDocPercentage, - ioSociSession, lAlterPlace); + hasRetrievedPlace = + retrieveAndFillPlace (lOriginalKeywords, lCorrectedKeywords, + lAlterDocument, lAlterDocPercentage, + ioSociSession, lAlterPlace); // Same remark as above assert (hasRetrievedPlace == true); Modified: trunk/opentrep/opentrep/python/pyopentrep.py =================================================================== --- trunk/opentrep/opentrep/python/pyopentrep.py 2009-08-23 16:50:59 UTC (rev 186) +++ trunk/opentrep/opentrep/python/pyopentrep.py 2009-08-24 20:05:41 UTC (rev 187) @@ -5,6 +5,8 @@ # Default search string defaultSearchString = 'sna francicso rio de janero lso angles reykyavki' +needDetails = True + # Parser helpers def getMain(locations): return locations[:3] @@ -23,12 +25,24 @@ if searchString == '' : searchString = defaultSearchString # Call the OpenTrep C++ library -result = openTrepLibrary.search(searchString) -#result = openTrepLibrary.searchWithFullDetails(searchString) +if needDetails == True: + result = openTrepLibrary.searchWithFullDetails (searchString) +else: + result = openTrepLibrary.search (searchString) + print 'Raw result from the OpenTrep library:' print result -# defaults +# If we have requested the detailed display, the result string is +# potentially big and complex, and is not aimed to be parsed. So, it +# is better to stop here. +if needDetails == True: + quit() + +# As we have requested no details, the result string is aimed to be +# parsed, so as to get the whole meaning of it. + +# Defaults msg, form_value, original_form_value, unrecognized = '', '', '', '' # Sample of result string to be parsed: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-10-04 18:10:38
|
Revision: 201 http://opentrep.svn.sourceforge.net/opentrep/?rev=201&view=rev Author: denis_arnaud Date: 2009-10-04 18:10:32 +0000 (Sun, 04 Oct 2009) Log Message: ----------- [Dev] The number of (database) entries is now shown after the indexation. Modified Paths: -------------- trunk/opentrep/opentrep/OPENTREP_Service.hpp trunk/opentrep/opentrep/OPENTREP_Types.hpp trunk/opentrep/opentrep/batches/indexer.cpp trunk/opentrep/opentrep/command/IndexBuilder.cpp trunk/opentrep/opentrep/command/IndexBuilder.hpp trunk/opentrep/opentrep/service/OPENTREP_Service.cpp Modified: trunk/opentrep/opentrep/OPENTREP_Service.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-10-04 17:50:46 UTC (rev 200) +++ trunk/opentrep/opentrep/OPENTREP_Service.hpp 2009-10-04 18:10:32 UTC (rev 201) @@ -24,7 +24,7 @@ public: // /////////// Business Methods ///////////// /** Build the Xapian database (index) on the BOM held in memory. */ - void buildSearchIndex(); + NbOfDBEntries_T buildSearchIndex(); /** Match the given string, thanks to a full-text search on the underlying Xapian index (named "database"). Modified: trunk/opentrep/opentrep/OPENTREP_Types.hpp =================================================================== --- trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-10-04 17:50:46 UTC (rev 200) +++ trunk/opentrep/opentrep/OPENTREP_Types.hpp 2009-10-04 18:10:32 UTC (rev 201) @@ -96,6 +96,9 @@ /** Travel search query. */ typedef std::string TravelQuery_T; + /** Number of entries in the Xapian database. */ + typedef unsigned int NbOfDBEntries_T; + /** Word, which is the atomic element of a query string. */ typedef std::string Word_T; Modified: trunk/opentrep/opentrep/batches/indexer.cpp =================================================================== --- trunk/opentrep/opentrep/batches/indexer.cpp 2009-10-04 17:50:46 UTC (rev 200) +++ trunk/opentrep/opentrep/batches/indexer.cpp 2009-10-04 18:10:32 UTC (rev 201) @@ -212,11 +212,15 @@ lXapianDatabaseName); // Launch the indexation - opentrepService.buildSearchIndex(); + const OPENTREP::NbOfDBEntries_T lNbOfEntries = + opentrepService.buildSearchIndex(); // Close the Log outputFile logOutputFile.close(); + // + std::cout << lNbOfEntries << " entries have been processed" << std::endl; + } catch (const OPENTREP::RootException& otexp) { std::cerr << "Standard exception: " << otexp.what() << std::endl; return -1; Modified: trunk/opentrep/opentrep/command/IndexBuilder.cpp =================================================================== --- trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-10-04 17:50:46 UTC (rev 200) +++ trunk/opentrep/opentrep/command/IndexBuilder.cpp 2009-10-04 18:10:32 UTC (rev 201) @@ -141,9 +141,10 @@ } // ////////////////////////////////////////////////////////////////////// - void IndexBuilder:: + NbOfDBEntries_T IndexBuilder:: buildSearchIndex (soci::session& ioSociSession, const TravelDatabaseName_T& iTravelDatabaseName) { + NbOfDBEntries_T oNbOfEntries = 0; try { @@ -164,7 +165,6 @@ lDatabase.begin_transaction(); // Iterate through the retrieved database rows - int idx = 1; const bool shouldDoReset = true; bool hasStillData = DBManager::iterateOnStatement (lSelectStatement, lPlace, @@ -174,11 +174,13 @@ // Xapian index IndexBuilder::addDocumentToIndex (lDatabase, lPlace); + // + ++oNbOfEntries; + // DEBUG - OPENTREP_LOG_DEBUG ("[" << idx << "] " << lPlace); + OPENTREP_LOG_DEBUG ("[" << oNbOfEntries << "] " << lPlace); - // Index incrementing - ++idx; + // Iterate on the MySQL database cursor hasStillData = DBManager::iterateOnStatement (lSelectStatement, lPlace, shouldDoReset); } @@ -198,6 +200,8 @@ OPENTREP_LOG_ERROR ("Error: " << lStdError.what()); throw RootException(); } + + return oNbOfEntries; } } Modified: trunk/opentrep/opentrep/command/IndexBuilder.hpp =================================================================== --- trunk/opentrep/opentrep/command/IndexBuilder.hpp 2009-10-04 17:50:46 UTC (rev 200) +++ trunk/opentrep/opentrep/command/IndexBuilder.hpp 2009-10-04 18:10:32 UTC (rev 201) @@ -29,7 +29,8 @@ static void addDocumentToIndex (Xapian::WritableDatabase&, Place&); /** Build Xapian database. */ - static void buildSearchIndex (soci::session&, const TravelDatabaseName_T&); + static NbOfDBEntries_T buildSearchIndex (soci::session&, + const TravelDatabaseName_T&); private: /** Constructors. */ Modified: trunk/opentrep/opentrep/service/OPENTREP_Service.cpp =================================================================== --- trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-10-04 17:50:46 UTC (rev 200) +++ trunk/opentrep/opentrep/service/OPENTREP_Service.cpp 2009-10-04 18:10:32 UTC (rev 201) @@ -99,7 +99,9 @@ } // ////////////////////////////////////////////////////////////////////// - void OPENTREP_Service::buildSearchIndex () { + NbOfDBEntries_T OPENTREP_Service::buildSearchIndex () { + NbOfDBEntries_T oNbOfEntries = 0; + if (_opentrepServiceContext == NULL) { throw NonInitialisedServiceException(); } @@ -119,7 +121,8 @@ // Delegate the index building to the dedicated command BasChronometer lBuildSearchIndexChronometer; lBuildSearchIndexChronometer.start(); - IndexBuilder::buildSearchIndex (lSociSession, lTravelDatabaseName); + oNbOfEntries = IndexBuilder::buildSearchIndex (lSociSession, + lTravelDatabaseName); const double lBuildSearchIndexMeasure = lBuildSearchIndexChronometer.elapsed(); @@ -131,7 +134,9 @@ } catch (const std::exception& error) { OPENTREP_LOG_ERROR ("Exception: " << error.what()); throw BuildIndexException(); - } + } + + return oNbOfEntries; } // ////////////////////////////////////////////////////////////////////// This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2009-11-02 14:44:12
|
Revision: 204 http://opentrep.svn.sourceforge.net/opentrep/?rev=204&view=rev Author: denis_arnaud Date: 2009-11-02 14:43:52 +0000 (Mon, 02 Nov 2009) Log Message: ----------- [Struct] Deleted duplicated build of the core library. Modified Paths: -------------- trunk/opentrep/opentrep/Makefile.am trunk/opentrep/opentrep/core/Makefile.am trunk/opentrep/opentrep/core/sources.mk Modified: trunk/opentrep/opentrep/Makefile.am =================================================================== --- trunk/opentrep/opentrep/Makefile.am 2009-10-31 00:09:00 UTC (rev 203) +++ trunk/opentrep/opentrep/Makefile.am 2009-11-02 14:43:52 UTC (rev 204) @@ -10,18 +10,7 @@ EXTRA_DIST = config_msvc.h -# Library -lib_LTLIBRARIES = lib@PACKAGE@.la - -lib@PACKAGE@_la_SOURCES = $(service_h_sources) $(service_cc_sources) -lib@PACKAGE@_la_LIBADD = $(top_builddir)/@PACKAGE@/basic/libbas.la \ - $(top_builddir)/@PACKAGE@/bom/libbom.la \ - $(top_builddir)/@PACKAGE@/factory/libfac.la \ - $(top_builddir)/@PACKAGE@/dbadaptor/libdba.la \ - $(top_builddir)/@PACKAGE@/command/libcmd.la \ - $(top_builddir)/@PACKAGE@/service/libsvc.la -lib@PACKAGE@_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) - # Header files -nobase_pkginclude_HEADERS = $(service_h_sources) -nobase_nodist_pkginclude_HEADERS = $(top_builddir)/@PACKAGE@/config.h +#nobase_pkginclude_HEADERS = $(service_h_sources) +#nobase_nodist_pkginclude_HEADERS = $(top_builddir)/@PACKAGE@/config.h + Modified: trunk/opentrep/opentrep/core/Makefile.am =================================================================== --- trunk/opentrep/opentrep/core/Makefile.am 2009-10-31 00:09:00 UTC (rev 203) +++ trunk/opentrep/opentrep/core/Makefile.am 2009-11-02 14:43:52 UTC (rev 204) @@ -22,3 +22,8 @@ lib@PACKAGE@_la_LDFLAGS = \ $(BOOST_DATE_TIME_LIB) $(BOOST_PROGRAM_OPTIONS_LIB) \ $(SOCI_LIBS) -version-info $(GENERIC_LIBRARY_VERSION) + +# Header files +pkginclude_HEADERS = $(service_h_sources) +nobase_nodist_pkginclude_HEADERS = $(top_builddir)/@PACKAGE@/config.h + Modified: trunk/opentrep/opentrep/core/sources.mk =================================================================== --- trunk/opentrep/opentrep/core/sources.mk 2009-10-31 00:09:00 UTC (rev 203) +++ trunk/opentrep/opentrep/core/sources.mk 2009-11-02 14:43:52 UTC (rev 204) @@ -1,3 +1,9 @@ -service_h_sources = $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ - $(top_srcdir)/opentrep/OPENTREP_Service.hpp +service_h_sources = \ + $(top_srcdir)/opentrep/OPENTREP_Types.hpp \ + $(top_srcdir)/opentrep/OPENTREP_Abstract.hpp \ + $(top_srcdir)/opentrep/DBParams.hpp \ + $(top_srcdir)/opentrep/Location.hpp \ + $(top_srcdir)/opentrep/DistanceErrorRule.hpp \ + $(top_srcdir)/opentrep/OPENTREP_Service.hpp service_cc_sources = + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <den...@us...> - 2010-09-05 18:56:18
|
Revision: 227 http://opentrep.svn.sourceforge.net/opentrep/?rev=227&view=rev Author: denis_arnaud Date: 2010-09-05 18:56:12 +0000 (Sun, 05 Sep 2010) Log Message: ----------- [Dev] Added a SQL request for getting closest places to some given coordinates (there is still some work to do). Modified Paths: -------------- trunk/opentrep/opentrep/Location.hpp trunk/opentrep/opentrep/bom/Place.hpp trunk/opentrep/opentrep/command/DBManager.cpp trunk/opentrep/opentrep/command/DBManager.hpp trunk/opentrep/opentrep/command/IndexBuilder.cpp Modified: trunk/opentrep/opentrep/Location.hpp =================================================================== --- trunk/opentrep/opentrep/Location.hpp 2010-09-05 16:25:14 UTC (rev 226) +++ trunk/opentrep/opentrep/Location.hpp 2010-09-05 18:56:12 UTC (rev 227) @@ -5,8 +5,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // STL -#include <istream> -#include <ostream> +#include <iosfwd> #include <string> #include <list> // OpenTrep Modified: trunk/opentrep/opentrep/bom/Place.hpp =================================================================== --- trunk/opentrep/opentrep/bom/Place.hpp 2010-09-05 16:25:14 UTC (rev 226) +++ trunk/opentrep/opentrep/bom/Place.hpp 2010-09-05 18:56:12 UTC (rev 227) @@ -5,8 +5,7 @@ // Import section // ////////////////////////////////////////////////////////////////////// // STL -#include <istream> -#include <ostream> +#include <iosfwd> #include <string> #include <map> // OpenTrep Bom Modified: trunk/opentrep/opentrep/command/DBManager.cpp =================================================================== --- trunk/opentrep/opentrep/command/DBManager.cpp 2010-09-05 16:25:14 UTC (rev 226) +++ trunk/opentrep/opentrep/command/DBManager.cpp 2010-09-05 18:56:12 UTC (rev 227) @@ -1,15 +1,14 @@ // ////////////////////////////////////////////////////////////////////// // Import section // ////////////////////////////////////////////////////////////////////// -// C -#include <assert.h> +// STL +#include <cassert> // SOCI #include <soci/core/soci.h> #include <soci/backends/mysql/soci-mysql.h> // OpenTrep #include <opentrep/bom/World.hpp> #include <opentrep/bom/Place.hpp> -#include <opentrep/factory/FacWorld.hpp> #include <opentrep/factory/FacPlace.hpp> #include <opentrep/dbadaptor/DbaPlace.hpp> #include <opentrep/command/DBManager.hpp> @@ -18,9 +17,10 @@ namespace OPENTREP { // ////////////////////////////////////////////////////////////////////// - void DBManager::prepareSelectStatement (soci::session& ioSociSession, - soci::statement& ioSelectStatement, - Place& ioPlace) { + void DBManager:: + prepareSelectFromCodeStatement (soci::session& ioSociSession, + soci::statement& ioSelectStatement, + Place& ioPlace) { try { @@ -60,6 +60,78 @@ // ////////////////////////////////////////////////////////////////////// void DBManager:: + prepareSelectFromCoordStatement (soci::session& ioSociSession, + soci::statement& ioSelectStatement, + const double& iLatitude, + const double& iLongitude) { + + try { + + // Instanciate a SQL statement (no request is performed at that stage) + /** + select rpd.code AS code, city_code, xapian_docid, is_airport, is_city, + is_main, is_commercial, state_code, country_code, region_code, + continent_code, time_zone_grp, longitude, latitude, language_code, + classical_name, extended_name, alternate_name1, alternate_name2, + alternate_name3, alternate_name4, alternate_name5, alternate_name6, + alternate_name7, alternate_name8, alternate_name9, alternate_name10 + from ref_place_details rpd, ref_place_names rpn + where rpd.code = iPlaceCode + and rpn.code = rpd.code; + + select (airpop.tpax)/1000 as 'popularity', + places.code as 'airport_code', places.code as 'city_code', + places.longitude as 'longitude', places.latitude as 'latitude' + from airport_popularity AS airpop, ref_place_details AS places + WHERE places.longitude >= ${PL_LON_LOWER} + AND places.longitude <= ${PL_LON_UPPER} + AND places.latitude >= ${PL_LAT_LOWER} + AND places.latitude <= ${PL_LAT_UPPER} + AND airpop.airport_code = places.code + AND places.is_city = 'y' + AND names.code = places.code + ORDER BY airpop.tpax DESC + */ + Place& lPlace = FacPlace::instance().create(); + const double K_ERROR = 2.0; + const double lLowerBoundLatitude = iLatitude - K_ERROR; + const double lUpperBoundLatitude = iLatitude + K_ERROR; + const double lLowerBoundLongitude = iLongitude - K_ERROR; + const double lUpperBoundLongitude = iLongitude + K_ERROR; + + ioSelectStatement = + (ioSociSession.prepare + << "select rpd.code AS code, city_code, xapian_docid, is_airport, " + << "is_city, is_main, is_commercial, state_code, country_code, " + << "region_code, continent_code, time_zone_grp, longitude, latitude, " + << "language_code, classical_name, extended_name, " + << "alternate_name1, alternate_name2, alternate_name3, " + << "alternate_name4, alternate_name5, alternate_name6, " + << "alternate_name7, alternate_name8, alternate_name9, " + << "alternate_name10 " + << "from ref_place_details rpd, ref_place_names rpn, " + << " popularity pop " + << "where latitude >= :lower_latitude " + << " and latitude <= :upper_latitude " + << " and longitude >= :lower_longitude " + << " and longitude <= :upper_longitude " + << " and rpn.code = rpd.code" + << " and pop.airport_code = rpd.code", + soci::into (lPlace), soci::use (lLowerBoundLatitude), + soci::use (lUpperBoundLatitude), soci::use (lLowerBoundLongitude), + soci::use (lUpperBoundLongitude)); + + // Execute the SQL query + ioSelectStatement.execute(); + + } catch (std::exception const& lException) { + OPENTREP_LOG_ERROR ("Error: " << lException.what()); + throw SQLDatabaseException(); + } + } + + // ////////////////////////////////////////////////////////////////////// + void DBManager:: prepareSelectOnPlaceCodeStatement (soci::session& ioSociSession, soci::statement& ioSelectStatement, const std::string& iPlaceCode, @@ -248,4 +320,22 @@ return oHasRetrievedPlace; } + // ////////////////////////////////////////////////////////////////////// + bool DBManager::retrieveClosestPlaces (soci::session& ioSociSession, + const double& iLatitude, + const double& iLongitude, + PlaceOrderedList_T& ioPlaceList) { + bool oHasRetrievedPlace = false; + + try { + + + } catch (std::exception const& lException) { + OPENTREP_LOG_ERROR ("Error: " << lException.what()); + throw SQLDatabaseException(); + } + + return oHasRetrievedPlace; + } + } Modified: trunk/opentrep/opentrep/command/DBManager.hpp =================================================================== --- trunk/opentrep/opentrep/command/DBManager.hpp 2010-09-05 16:25:14 UTC (rev 226) +++ trunk/opentrep/opentrep/command/DBManager.hpp 2010-09-05 18:56:12 UTC (rev 227) @@ -6,6 +6,7 @@ // ////////////////////////////////////////////////////////////////////// // OpenTrep #include <opentrep/OPENTREP_Types.hpp> +#include <opentrep/bom/PlaceList.hpp> // Forward declarations namespace soci { @@ -15,45 +16,89 @@ namespace OPENTREP { - // Forward declarations - class Place; - /** Class building the Business Object Model (BOM) from data retrieved from the database. */ class DBManager { public: /** Update the Xapian document ID field of the database row - corresponding to the given Place object. */ + corresponding to the given Place object. + @param soci::session& SOCI session handler. + @param const Place& The place to be updated. */ static void updatePlaceInDB (soci::session&, const Place&); /** Retrieve, from the (MySQL) database, the row corresponding to the given place code (e.g., 'sfo' for San Francisco Intl airport), and fill the given Place object with that retrieved - data. */ + data. + @param soci::session& SOCI session handler. + @param const std::string& The code of the place to be retrieved. + @param Place& The object corresponding to the place to be retrieved. + It has to be given empty, and is filled by the method. */ static bool retrievePlace (soci::session&, const std::string& iPlaceCode, Place&); + /** Retrieve, from the (MySQL) database, the row corresponding to + the given place code (e.g., 'sfo' for San Francisco Intl + airport), and fill the given Place object with that retrieved + data. + @param soci::session& SOCI session handler. + @param const std::double& The latitude of the place to be retrieved. + @param const std::double& The longitude of the place to be retrieved. + @param PlaceOrderedList_T& The list corresponding to the places to be + retrieved. It has to be given empty, and is filled by the + method. */ + static bool retrieveClosestPlaces (soci::session&, const double& iLatitude, + const double& iLongitude, + PlaceOrderedList_T&); + public: - /** Prepare (parse and put in cache) the SQL statement. */ - static void prepareSelectStatement (soci::session&, soci::statement&, - Place&); + /** Prepare (parse and put in cache) the SQL statement. + @param soci::session& SOCI session handler. + @param soci::statement& SOCI SQL statement handler. + @param Place& The object corresponding to the place to be retrieved. + It has to be given empty, and is filled by the method. */ + static void prepareSelectFromCodeStatement (soci::session&, + soci::statement&, Place&); + /** Prepare (parse and put in cache) the SQL statement. + @param soci::session& SOCI session handler. + @param soci::statement& SOCI SQL statement handler. + @param Place& The object corresponding to the place to be retrieved. + It has to be given empty, and is filled by the method. */ + static void prepareSelectFromCoordStatement (soci::session&, + soci::statement&, + const double& iLatitude, + const double& iLongitude); + /** Iterate on the SQL statement. <br>The SQL has to be already prepared. - @parameter const bool Tells whether the Place object should be reset. */ + @param soci::statement& SOCI SQL statement handler. + @param Place& The object corresponding to the place to be retrieved. + It has to be given empty, and is filled by the method. + @param const bool Tells whether the Place object should be reset. */ static bool iterateOnStatement (soci::statement&, Place&, const bool iShouldDoReset); private: - /** Prepare (parse and put in cache) the SQL statement. */ + /** Prepare (parse and put in cache) the SQL statement. + @param soci::session& SOCI session handler. + @param soci::statement& SOCI SQL statement handler. + @param const std::string& The code of the place to be retrieved. + @param Place& The object corresponding to the place to be retrieved. + It has to be given empty, and is filled by the method. */ static void prepareSelectOnPlaceCodeStatement(soci::session&, soci::statement&, const std::string& iPlaceCode, Place&); - /** Prepare (parse and put in cache) the SQL statement. */ + /** Prepare (parse and put in cache) the SQL statement. + @param soci::session& SOCI session handler. + @param soci::statement& SOCI SQL statement handler. + @param const XapianDocID_T& The code of the place to be retrieved. + @param Place& The object corresponding to the place to be retrieved. + It has to be given empty, and is filled by the method. */ static void prepareSelectOnDocIDStatement (soci::session&, soci::statement&, const XapianDocID_T&, Place&); Modified: trunk/opentrep/opentrep/command/IndexBuilder.cpp =================================================================== --- trunk/opentrep/opentrep/command/IndexBuilder.cpp 2010-09-05 16:25:14 UTC (rev 226) +++ trunk/opentrep/opentrep/command/IndexBuilder.cpp 2010-09-05 18:56:12 UTC (rev 227) @@ -154,8 +154,8 @@ // Prepare and execute the select statement soci::statement lSelectStatement (ioSociSession); - DBManager::prepareSelectStatement (ioSociSession, lSelectStatement, - lPlace); + DBManager::prepareSelectFromCodeStatement (ioSociSession, + lSelectStatement, lPlace); // Create the Xapian database (index) Xapian::WritableDatabase lDatabase (iTravelDatabaseName, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |