From: <my...@us...> - 2009-11-12 14:52:42
|
Revision: 2122 http://aperture.svn.sourceforge.net/aperture/?rev=2122&view=rev Author: mylka Date: 2009-11-12 14:52:30 +0000 (Thu, 12 Nov 2009) Log Message: ----------- 2840003 - committed a first draft of the way to have more than one extractor on a single stream. The solution simply reads the stream into a byte array and then passes two ByteArrayInputStreams to two extractors, it's bad but simple and much more resilient to faulty extractors Modified Paths: -------------- aperture/trunk/example/cli/src/main/java/org/semanticdesktop/aperture/examples/handler/SimpleCrawlerHandler.java Modified: aperture/trunk/example/cli/src/main/java/org/semanticdesktop/aperture/examples/handler/SimpleCrawlerHandler.java =================================================================== --- aperture/trunk/example/cli/src/main/java/org/semanticdesktop/aperture/examples/handler/SimpleCrawlerHandler.java 2009-11-12 14:50:00 UTC (rev 2121) +++ aperture/trunk/example/cli/src/main/java/org/semanticdesktop/aperture/examples/handler/SimpleCrawlerHandler.java 2009-11-12 14:52:30 UTC (rev 2122) @@ -6,7 +6,9 @@ */ package org.semanticdesktop.aperture.examples.handler; +import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -39,6 +41,8 @@ import org.semanticdesktop.aperture.extractor.FileExtractor; import org.semanticdesktop.aperture.extractor.FileExtractorFactory; import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry; +import org.semanticdesktop.aperture.extractor.util.ThreadedExtractorWrapper; +import org.semanticdesktop.aperture.extractor.xmp.XMPExtractorFactory; import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; import org.semanticdesktop.aperture.rdf.RDFContainer; @@ -95,6 +99,8 @@ private ExtractorRegistry extractorRegistry; + private XMPExtractorFactory xmpExtractorFactory; + private SubCrawlerRegistry subCrawlerRegistry; ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -191,6 +197,7 @@ } if (l_extractingContents) { extractorRegistry = new DefaultExtractorRegistry(); + xmpExtractorFactory = new XMPExtractorFactory(); subCrawlerRegistry = new DefaultSubCrawlerRegistry(); } @@ -467,20 +474,52 @@ @SuppressWarnings("unchecked") private boolean applyExtractor(URI id, InputStream contentStream, String mimeType, RDFContainer metadata) - throws ExtractorException { + throws ExtractorException, IOException { Set extractors = extractorRegistry.getExtractorFactories(mimeType); + boolean supportedByXmp = xmpExtractorFactory.getSupportedMimeTypes().contains(mimeType); + boolean result = false; + byte [] buffer = null; + + if (!extractors.isEmpty() && supportedByXmp) { + buffer = IOUtil.readBytes(contentStream); + } + if (!extractors.isEmpty()) { ExtractorFactory factory = (ExtractorFactory) extractors.iterator().next(); Extractor extractor = factory.get(); + ThreadedExtractorWrapper wrapper = new ThreadedExtractorWrapper(extractor); if (verbose) { System.out.print("|ex:" + extractor.getClass().getName()); } - extractor.extract(id, contentStream, null, mimeType, metadata); - return true; + if (buffer != null) { + contentStream = new BufferedInputStream(new ByteArrayInputStream(buffer)); + } + try { + wrapper.extract(id, contentStream, null, mimeType, metadata); + result = true; + } catch (Exception e) { + e.printStackTrace(); + } } - else { - return false; + + if (supportedByXmp) { + Extractor extractor = xmpExtractorFactory.get(); + ThreadedExtractorWrapper wrapper = new ThreadedExtractorWrapper(extractor); + if (verbose) { + System.out.print("|ex:" + extractor.getClass().getName()); + } + if (buffer != null) { + contentStream = new BufferedInputStream(new ByteArrayInputStream(buffer)); + } + try { + wrapper.extract(id, contentStream, null, mimeType, metadata); + result = true; + } catch (Exception e) { + e.printStackTrace(); + } } + + return result; } @SuppressWarnings("unchecked") This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |