From: Jimmy Z. <cra...@co...> - 2006-11-06 18:11:35
|
Can you provide a few sample XML files that you used for the testing? I am sure there can be additioal performance tuning for performance evaluation... So far the response we got compare VTD-XML favorably with Xerces... for the java version.. ----- Original Message ----- From: "John Kraal - Kewill Interchain NL" <jk...@in...> To: <vtd...@li...> Sent: Monday, November 06, 2006 4:23 AM Subject: [Vtd-xml-users] Performance in comparison to libxml2 > Hello, > > I've been playing with vtdxml for a while now, and my latest experience > is not very encouraging to go on :(... > > I have created a file with _lots_ of recurring structures and amounts of > recurring data (about 137 MB), eventually I created a xpath to select > the name of every relation in the 4th role (doesn't really matter). > > So, I executed it, with the code included for vtdxml (see vtdxml.c), and > it performed like this: > > unims@gxvm1:~/src/xmltest$ time ./test ./jkl.files.xml > '//relation[@tsroln=04]/tsnam1' >> /dev/null > > real 0m4.940s > user 0m4.625s > sys 0m0.256s > > With 162450 bytes of terminal output. > > Then, the horrible thing happened, I used lixml2 with an adjusted > reference xpath-program. (xpath1.c from their website, only with content > retrieval): > > unims@gxvm1:~/src/libxmltest$ time ./test ./jkl.files.xml > '//relation[@tsroln=04]/tsnam1' >> /dev/null > > real 0m2.545s > user 0m2.201s > sys 0m0.321s > > The size of the output was 150808 bytes. > > This is an incredible difference, am I doing something wrong? Or did I > have too many expectations of vtd? > > Regards, > John Kraal > -------------------------------------------------------------------------------- > /** > * section: XPath > * synopsis: Evaluate XPath expression and prints result node set. > * purpose: Shows how to evaluate XPath expression and register > * known namespaces in XPath context. > * usage: xpath1 <xml-file> <xpath-expr> [<known-ns-list>] > * test: ./xpath1 test3.xml '//child2' > xpath1.tmp ; diff xpath1.tmp > xpath1.res ; rm xpath1.tmp > * author: Aleksey Sanin > * copy: see Copyright for the status of this software. > */ > #include <stdlib.h> > #include <stdio.h> > #include <string.h> > #include <assert.h> > > #include <libxml/tree.h> > #include <libxml/parser.h> > #include <libxml/xpath.h> > #include <libxml/xpathInternals.h> > > #if defined(LIBXML_XPATH_ENABLED) && defined(LIBXML_SAX1_ENABLED) > > > static void usage(const char *name); > int execute_xpath_expression(const char* filename, const xmlChar* > xpathExpr, const xmlChar* nsList); > int register_namespaces(xmlXPathContextPtr xpathCtx, const xmlChar* > nsList); > void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output); > > int > main(int argc, char **argv) { > /* Parse command line and process file */ > if((argc < 3) || (argc > 4)) { > fprintf(stderr, "Error: wrong number of arguments.\n"); > usage(argv[0]); > return(-1); > } > > /* Init libxml */ > xmlInitParser(); > LIBXML_TEST_VERSION > > /* Do the main job */ > if(execute_xpath_expression(argv[1], BAD_CAST argv[2], (argc > 3) ? > BAD_CAST argv[3] : NULL) < 0) { > usage(argv[0]); > return(-1); > } > > /* Shutdown libxml */ > xmlCleanupParser(); > > /* > * this is to debug memory for regression tests > */ > xmlMemoryDump(); > return 0; > } > > /** > * usage: > * @name: the program name. > * > * Prints usage information. > */ > static void > usage(const char *name) { > assert(name); > > fprintf(stderr, "Usage: %s <xml-file> <xpath-expr> > [<known-ns-list>]\n", name); > fprintf(stderr, "where <known-ns-list> is a list of known > namespaces\n"); > fprintf(stderr, "in \"<prefix1>=<href1> <prefix2>=href2> ...\" > format\n"); > } > > /** > * execute_xpath_expression: > * @filename: the input XML filename. > * @xpathExpr: the xpath expression for evaluation. > * @nsList: the optional list of known namespaces in > * "<prefix1>=<href1> <prefix2>=href2> ..." format. > * > * Parses input XML file, evaluates XPath expression and prints results. > * > * Returns 0 on success and a negative value otherwise. > */ > int > execute_xpath_expression(const char* filename, const xmlChar* xpathExpr, > const xmlChar* nsList) { > xmlDocPtr doc; > xmlXPathContextPtr xpathCtx; > xmlXPathObjectPtr xpathObj; > > assert(filename); > assert(xpathExpr); > > /* Load XML document */ > doc = xmlParseFile(filename); > if (doc == NULL) { > fprintf(stderr, "Error: unable to parse file \"%s\"\n", filename); > return(-1); > } > > /* Create xpath evaluation context */ > xpathCtx = xmlXPathNewContext(doc); > if(xpathCtx == NULL) { > fprintf(stderr,"Error: unable to create new XPath context\n"); > xmlFreeDoc(doc); > return(-1); > } > > /* Register namespaces from list (if any) */ > if((nsList != NULL) && (register_namespaces(xpathCtx, nsList) < 0)) { > fprintf(stderr,"Error: failed to register namespaces list > \"%s\"\n", nsList); > xmlXPathFreeContext(xpathCtx); > xmlFreeDoc(doc); > return(-1); > } > > /* Evaluate xpath expression */ > xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx); > if(xpathObj == NULL) { > fprintf(stderr,"Error: unable to evaluate xpath expression > \"%s\"\n", xpathExpr); > xmlXPathFreeContext(xpathCtx); > xmlFreeDoc(doc); > return(-1); > } > > /* Print results */ > print_xpath_nodes(xpathObj->nodesetval, stdout); > > /* Cleanup */ > xmlXPathFreeObject(xpathObj); > xmlXPathFreeContext(xpathCtx); > xmlFreeDoc(doc); > > return(0); > } > > /** > * register_namespaces: > * @xpathCtx: the pointer to an XPath context. > * @nsList: the list of known namespaces in > * "<prefix1>=<href1> <prefix2>=href2> ..." format. > * > * Registers namespaces from @nsList in @xpathCtx. > * > * Returns 0 on success and a negative value otherwise. > */ > int > register_namespaces(xmlXPathContextPtr xpathCtx, const xmlChar* nsList) { > xmlChar* nsListDup; > xmlChar* prefix; > xmlChar* href; > xmlChar* next; > > assert(xpathCtx); > assert(nsList); > > nsListDup = xmlStrdup(nsList); > if(nsListDup == NULL) { > fprintf(stderr, "Error: unable to strdup namespaces list\n"); > return(-1); > } > > next = nsListDup; > while(next != NULL) { > /* skip spaces */ > while((*next) == ' ') next++; > if((*next) == '\0') break; > > /* find prefix */ > prefix = next; > next = (xmlChar*)xmlStrchr(next, '='); > if(next == NULL) { > fprintf(stderr,"Error: invalid namespaces list format\n"); > xmlFree(nsListDup); > return(-1); > } > *(next++) = '\0'; > > /* find href */ > href = next; > next = (xmlChar*)xmlStrchr(next, ' '); > if(next != NULL) { > *(next++) = '\0'; > } > > /* do register namespace */ > if(xmlXPathRegisterNs(xpathCtx, prefix, href) != 0) { > fprintf(stderr,"Error: unable to register NS with prefix=\"%s\" and > href=\"%s\"\n", prefix, href); > xmlFree(nsListDup); > return(-1); > } > } > > xmlFree(nsListDup); > return(0); > } > > /** > * print_xpath_nodes: > * @nodes: the nodes set. > * @output: the output file handle. > * > * Prints the @nodes content to @output. > */ > void > print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) { > xmlNodePtr cur; > xmlChar* content; > int size; > int i; > > assert(output); > size = (nodes) ? nodes->nodeNr : 0; > > fprintf(output, "Result (%d nodes):\n", size); > for(i = 0; i < size; ++i) { > assert(nodes->nodeTab[i]); > > if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) { > xmlNsPtr ns; > > ns = (xmlNsPtr)nodes->nodeTab[i]; > cur = (xmlNodePtr)ns->next; > if(cur->ns) { > fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", > ns->prefix, ns->href, cur->ns->href, cur->name); > } else { > fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", > ns->prefix, ns->href, cur->name); > } > } else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE) { > cur = nodes->nodeTab[i]; > content = xmlNodeGetContent(cur); > if(cur->ns) { > fprintf(output, "= element node \"%s:%s\": \"%s\"\n", > cur->ns->href, cur->name, content); > } else { > fprintf(output, "= element node \"%s\": \"%s\"\n", > cur->name, content); > } > xmlFree(content); > } else { > cur = nodes->nodeTab[i]; > content = xmlNodeGetContent(cur); > fprintf(output, "= node \"%s\": type %d: value \"%s\"\n", cur->name, > cur->type, content); > xmlFree(content); > } > } > } > > #else > int main(void) { > fprintf(stderr, "XPath support not compiled in\n"); > exit(1); > } > #endif > -------------------------------------------------------------------------------- > #include <string.h> > #include <stdio.h> > #include <wchar.h> > #include <stdlib.h> > #include <fcntl.h> > #include <sys/types.h> > #include <sys/stat.h> > #include <vtdxml/xpath1.h> > #include <vtdxml/helper.h> > #include <vtdxml/vtdGen.h> > > struct exception_context the_exception_context[1]; > > int main(int argc, char **argv, char **envv) > { > exception e; > FILE *f = NULL; > int i=0,t,result,count=0; > wchar_t *tmpString, *tmpString2; > char *filename; > wchar_t *xpath; > struct stat s; > UByte *xml = NULL; > VTDGen *vg = NULL; > VTDNav *vn = NULL; > AutoPilot *ap = NULL; > > Try > { > if (argc > 2) > { > filename = (char *)malloc(strlen(argv[1]) + 1); > xpath = (wchar_t *)malloc((strlen(argv[2]) + 1) * sizeof(wchar_t)); > strcpy(filename, argv[1]); > mbstowcs(xpath , argv[2], strlen(argv[2])+1); > wprintf(L"Using xpath: %ls\n", xpath); > } > else > { > filename = "test.xml"; > xpath = L"//*"; > } > f = fopen(filename, "r"); > stat(filename, &s); > i = (int)s.st_size; > wprintf(L"size of the file is %d bytes\n", i); > xml = (UByte *)malloc(sizeof(UByte) * i); > i = fread(xml,sizeof(UByte),i,f); > > vg = createVTDGen(); > setDoc(vg,xml,i); > parse(vg,TRUE); > > vn = getNav(vg); > ap = createAutoPilot2(); > declareXPathNameSpace(ap,L"ns1",L""); > > if (selectXPath(ap, xpath)) > { > bind(ap, vn); > while ((result = evalXPath(ap)) != -1) > { > tmpString = toString(vn, result); > t = getText(vn); > if (t != -1) > { > tmpString2 = toNormalizedString(vn,t); > wprintf(L"r: %d e: %ls (%d): %ls \n", > result, > tmpString, > t, > tmpString2); > free(tmpString); > free(tmpString2); > } > count++; > } > } > wprintf(L"\nTotal number of elements %d \n", count); > fclose(f); > > freeVTDNav(vn); > freeVTDGen(vg); > freeAutoPilot(ap); > } Catch (e) { > wprintf (L"exception occurred\nerr: %s\nmoreerr: %s\n", e.msg, e.sub_msg); > return 1; > } > > return 0; > } > -------------------------------------------------------------------------------- > ------------------------------------------------------------------------- > Using Tomcat but need to do more? Need to support web services, security? > Get stuff done quickly with pre-integrated technology to make your job > easier > Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo > http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 -------------------------------------------------------------------------------- > _______________________________________________ > Vtd-xml-users mailing list > Vtd...@li... > https://lists.sourceforge.net/lists/listinfo/vtd-xml-users > |