Revision: 3345 http://archive-access.svn.sourceforge.net/archive-access/?rev=3345&view=rev Author: binzino Date: 2010-11-23 00:31:27 +0000 (Tue, 23 Nov 2010) Log Message: ----------- Remove bogus debug message. Add config for path to pdftotext executable. Modified Paths: -------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/parse-pdf2/src/java/org/archive/nutchwax/parse/pdf/PDFParser.java Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/parse-pdf2/src/java/org/archive/nutchwax/parse/pdf/PDFParser.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/parse-pdf2/src/java/org/archive/nutchwax/parse/pdf/PDFParser.java 2010-11-22 22:44:48 UTC (rev 3344) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/parse-pdf2/src/java/org/archive/nutchwax/parse/pdf/PDFParser.java 2010-11-23 00:31:27 UTC (rev 3345) @@ -64,8 +64,6 @@ public ParseResult getParse( Content content ) { - System.out.println( "PDFParser" ); - Metadata metadata = new Metadata(); String title = ""; String text = ""; @@ -83,7 +81,7 @@ fos.close(); // Now create a Process to call 'pdftotext' to extract the metadata. - ProcessBuilder pb = new ProcessBuilder( "/usr/bin/pdftotext", "-htmlmeta", "-f", "1", "-l", "1", tmpfile.toString(), "-" ); + ProcessBuilder pb = new ProcessBuilder( this.conf.get( "org.archive.nutchwax.parse.pdf.pdftotext.path", "/usr/bin/pdftotext" ), "-htmlmeta", "-f", "1", "-l", "1", tmpfile.toString(), "-" ); Process p = pb.start(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |