Below are some small example applications.
For each example below, there is assumed to be an Indri index and the path to it is passed in as the first argument on the command line.
The following example shows a method for retrieving field text for the title and any "person" named entities in an Indri index. This example assumes that an Indri index has been built with "title" and "person" tagged as fields at the time the index was built. It also assumes that the path to the index is the first argument to the program.
#include <vector>
#include "indri/Repository.hpp"
// a global repository object for our index
indri::collection::Repository repository;
using namespace indri::api;
void getFieldText(int documentID, std::string field) {
// get the index from the repository
// in our case it will be the first index
indri::collection::Repository::index_state repIndexState = repository.indexes();
indri::index::Index *thisIndex=(*repIndexState)[0];
// get the field ID
int fieldID=thisIndex->field(field);
// is this a valid field? If not, exit
if (fieldID < 1) { return; }
// retrieve the document vector for this document
const indri::index::TermList *termList=thisIndex->termList(documentID);
// ensure we have a valid term list!
if (!termList) { return; }
// get the vector of fields
indri::utility::greedy_vector< indri::index::FieldExtent > fieldVec=termList->fields();
// go through the fields (create an iterator)
indri::utility::greedy_vector< indri::index::FieldExtent >::iterator fIter=fieldVec.begin();
while (fIter!=fieldVec.end()) {
// does the ID of this field extent match our field ID?
if ((*fIter).id==fieldID) {
// yes! We can print out the text here
int beginTerm=(*fIter).begin;
int endTerm=(*fIter).end;
// note that the text is inclusive of the beginning
// but exclusive of the ending
for (int t=beginTerm; t < endTerm; t++) {
// get this term ID
int thisTermID=termList->terms()[t];
// convert the term ID to its string representation
cout << thisIndex->term(thisTermID) << " ";
}
cout << endl;
}
// increment the iterator
fIter++;
}
// destroy the term list object
delete termList;
termList=NULL;
}
void main(int argc, char *argv[]) {
// we assume the index path is the first argument and the query is second
char *indexPath=argv[1];
// open our repository object (read-only!!)
repository.openRead(indexPath);
// retrieve the title field for document 5
getFieldText(5, "title");
// retrieve any person fields for document 5
getFieldText(5, "person");
// close the repository...
repository.close();
}
This example takes a path to the index and a query as the two arguments to it. It opens the index, runs the query, and then prints out a two-column listing consisting of the document ID and the snippet of text that matched on the query.
#include <vector>
#include "indri/QueryEnvironment.hpp"
#include "indri/SnippetBuilder.hpp"
using namespace indri::api;
void main(int argc, char *argv[]) {
// we assume the index path is the first argument and the query is second
char *indexPath=argv[1];
char *query=argv[2];
// our builder object - false in the constructor means no HTML output.
SnippetBuilder builder(false);
// create a query environment
QueryEnvironment indriEnvironment;
// open the index
indriEnvironment.addIndex(indexPath);
// run the query, max of 1000 results
QueryAnnotation *results=indriEnvironment.runAnnotatedQuery(query, 1000);
// extract the results as a vector of ScoredExtentResult items
std::vector<indri::api::ScoredExtentResult> resultVector=results->getResults();
// get the number of results
int totalNumResults=resultVector.size();
// get the parsed documents for the results
std::vector<ParsedDocument*> parsedDocs=indriEnvironment.documents(resultVector);
// for each result, print out the document ID and the snippet...
for (int i=0; i < totalNumResults; i++) {
// get the document ID
int thisResultDocID=resultVector[i].document;
// get this document's parsed doc representation
ParsedDocument* parsedDoc=parsedDocs[i];
// print the document ID and the snippet
cout << thisResultDocID << "\t";
cout << builder.build(thisResultDocID, parsedDoc, results) << "\n";
}
// note that we do not need to explicitly delete the
// QueryEnvironment object here to close the index. It will
// automatically be removed when it goes out of scope.
}