Below are some small example applications.
For each example below, there is assumed to be an Indri index and the path to it is passed in as the first argument on the command line.
The following example shows a method for retrieving field text for the title and any "person" named entities in an Indri index. This example assumes that an Indri index has been built with "title" and "person" tagged as fields at the time the index was built. It also assumes that the path to the index is the first argument to the program.
#include <vector> #include "indri/Repository.hpp" // a global repository object for our index indri::collection::Repository repository; using namespace indri::api; void getFieldText(int documentID, std::string field) { // get the index from the repository // in our case it will be the first index indri::collection::Repository::index_state repIndexState = repository.indexes(); indri::index::Index *thisIndex=(*repIndexState)[0]; // get the field ID int fieldID=thisIndex->field(field); // is this a valid field? If not, exit if (fieldID < 1) { return; } // retrieve the document vector for this document const indri::index::TermList *termList=thisIndex->termList(documentID); // ensure we have a valid term list! if (!termList) { return; } // get the vector of fields indri::utility::greedy_vector< indri::index::FieldExtent > fieldVec=termList->fields(); // go through the fields (create an iterator) indri::utility::greedy_vector< indri::index::FieldExtent >::iterator fIter=fieldVec.begin(); while (fIter!=fieldVec.end()) { // does the ID of this field extent match our field ID? if ((*fIter).id==fieldID) { // yes! We can print out the text here int beginTerm=(*fIter).begin; int endTerm=(*fIter).end; // note that the text is inclusive of the beginning // but exclusive of the ending for (int t=beginTerm; t < endTerm; t++) { // get this term ID int thisTermID=termList->terms()[t]; // convert the term ID to its string representation cout << thisIndex->term(thisTermID) << " "; } cout << endl; } // increment the iterator fIter++; } // destroy the term list object delete termList; termList=NULL; } void main(int argc, char *argv[]) { // we assume the index path is the first argument and the query is second char *indexPath=argv[1]; // open our repository object (read-only!!) repository.openRead(indexPath); // retrieve the title field for document 5 getFieldText(5, "title"); // retrieve any person fields for document 5 getFieldText(5, "person"); // close the repository... repository.close(); }
This example takes a path to the index and a query as the two arguments to it. It opens the index, runs the query, and then prints out a two-column listing consisting of the document ID and the snippet of text that matched on the query.
#include <vector> #include "indri/QueryEnvironment.hpp" #include "indri/SnippetBuilder.hpp" using namespace indri::api; void main(int argc, char *argv[]) { // we assume the index path is the first argument and the query is second char *indexPath=argv[1]; char *query=argv[2]; // our builder object - false in the constructor means no HTML output. SnippetBuilder builder(false); // create a query environment QueryEnvironment indriEnvironment; // open the index indriEnvironment.addIndex(indexPath); // run the query, max of 1000 results QueryAnnotation *results=indriEnvironment.runAnnotatedQuery(query, 1000); // extract the results as a vector of ScoredExtentResult items std::vector<indri::api::ScoredExtentResult> resultVector=results->getResults(); // get the number of results int totalNumResults=resultVector.size(); // get the parsed documents for the results std::vector<ParsedDocument*> parsedDocs=indriEnvironment.documents(resultVector); // for each result, print out the document ID and the snippet... for (int i=0; i < totalNumResults; i++) { // get the document ID int thisResultDocID=resultVector[i].document; // get this document's parsed doc representation ParsedDocument* parsedDoc=parsedDocs[i]; // print the document ID and the snippet cout << thisResultDocID << "\t"; cout << builder.build(thisResultDocID, parsedDoc, results) << "\n"; } // note that we do not need to explicitly delete the // QueryEnvironment object here to close the index. It will // automatically be removed when it goes out of scope. }