From: <my...@us...> - 2009-11-26 16:11:36
|
Revision: 2129 http://aperture.svn.sourceforge.net/aperture/?rev=2129&view=rev Author: mylka Date: 2009-11-26 16:11:22 +0000 (Thu, 26 Nov 2009) Log Message: ----------- updated the addon crawlers to report the error cause Modified Paths: -------------- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/webdav/crawler/WebdavCrawler.java aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/websites/bibsonomy/BibsonomyCrawler.java Modified: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/webdav/crawler/WebdavCrawler.java =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/webdav/crawler/WebdavCrawler.java 2009-11-19 16:17:16 UTC (rev 2128) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/webdav/crawler/WebdavCrawler.java 2009-11-26 16:11:22 UTC (rev 2129) @@ -127,8 +127,7 @@ protected ExitCode crawlObjects() { DataSource dataSource = getDataSource(); if(!(dataSource instanceof WebDataSource)){ - logger.error("wrong data source type"); - return ExitCode.FATAL_ERROR; + return reportFatalErrorCause("wrong data source type"); } source = (WebDataSource)dataSource; try { @@ -152,13 +151,10 @@ //sets the path relative to root root.setPath(root.getPath() + path); } else { - logger.error("Unknown Protocol"); - return ExitCode.FATAL_ERROR; + return reportFatalErrorCause("Unknown Protocol: " + url.getProtocol()); } } catch (Exception e) { - logger.error("WebdavResource Accessfailure"); - e.printStackTrace(); - return ExitCode.FATAL_ERROR; + return reportFatalErrorCause("WebdavResource Accessfailure",e); } Modified: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/websites/bibsonomy/BibsonomyCrawler.java =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/websites/bibsonomy/BibsonomyCrawler.java 2009-11-19 16:17:16 UTC (rev 2128) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/websites/bibsonomy/BibsonomyCrawler.java 2009-11-26 16:11:22 UTC (rev 2129) @@ -99,16 +99,17 @@ } } } else { - logger.warn("Couldn't get data from Bibsonomy. Status code: " + return reportFatalErrorCause("Couldn't get data from Bibsonomy. Status code: " + q.getHttpStatusCode() + " error: " + q.getError()); - return ExitCode.FATAL_ERROR; } } catch (IllegalStateException e) { - logger.warn("Couldn't get data from Bibsonomy", e); + reportFatalErrorCause("Couldn't get data from Bibsonomy", e); + return ExitCode.FATAL_ERROR; } catch (ErrorPerformingRequestException e) { - logger.warn("Couldn't get data from Bibsonomy", e); + reportFatalErrorCause("Couldn't get data from Bibsonomy", e); + return ExitCode.FATAL_ERROR; } } while (found == 50); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-07-05 23:09:15
|
Revision: 2381 http://aperture.svn.sourceforge.net/aperture/?rev=2381&view=rev Author: mylka Date: 2010-07-05 21:43:10 +0000 (Mon, 05 Jul 2010) Log Message: ----------- committed an expanded version of the ApertureRuntime tests, I did this in the aperture-addons folder to gather some feedback before thinking about committing it to the core Added Paths: ----------- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java Added: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java (rev 0) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java 2010-07-05 21:43:10 UTC (rev 2381) @@ -0,0 +1,494 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.util; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Set; +import java.util.regex.Pattern; + +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.ontoware.rdf2go.vocabulary.RDF; +import org.semanticdesktop.aperture.accessor.DataAccessor; +import org.semanticdesktop.aperture.accessor.DataAccessorFactory; +import org.semanticdesktop.aperture.accessor.DataAccessorRegistry; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.FileDataObject; +import org.semanticdesktop.aperture.accessor.RDFContainerFactory; +import org.semanticdesktop.aperture.accessor.UrlNotFoundException; +import org.semanticdesktop.aperture.accessor.base.DataObjectBase; +import org.semanticdesktop.aperture.accessor.base.RDFContainerFactoryImpl; +import org.semanticdesktop.aperture.accessor.impl.DefaultDataAccessorRegistry; +import org.semanticdesktop.aperture.extractor.Extractor; +import org.semanticdesktop.aperture.extractor.ExtractorException; +import org.semanticdesktop.aperture.extractor.ExtractorFactory; +import org.semanticdesktop.aperture.extractor.ExtractorRegistry; +import org.semanticdesktop.aperture.extractor.FileExtractor; +import org.semanticdesktop.aperture.extractor.FileExtractorFactory; +import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; +import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.runtime.ApertureRuntime; +import org.semanticdesktop.aperture.subcrawler.SubCrawler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerRegistry; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; +import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; +import org.semanticdesktop.aperture.vocabulary.NID3; +import org.semanticdesktop.aperture.vocabulary.NIE; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @author Antoni + * + */ +public class ExpandedApertureRuntime { + + private static final Pattern URI_START_PATTERN = Pattern.compile("^[a-z]+:.*$"); + + private Logger logger = LoggerFactory.getLogger(ApertureRuntime.class); + + private DataAccessorRegistry accessorRegistry; + private SubCrawlerRegistry subCrawlerRegistry; + private ExtractorRegistry extractorRegistry; + private MimeTypeIdentifier identifier; + private RDFContainerFactory factory; + + public interface TextProcessor { + public void process(URI uri, String text); + } + + public ExpandedApertureRuntime() { + this.extractorRegistry = new DefaultExtractorRegistry(); + this.identifier = new MagicMimeTypeIdentifier(); + this.accessorRegistry = new DefaultDataAccessorRegistry(); + this.subCrawlerRegistry = new DefaultSubCrawlerRegistry(); + this.factory = new RDFContainerFactoryImpl(); + } + + /** + * Tries to extract as much information from the given URI as possible. + * + * @param uri the uri from which information is to be extracted. Only the URU with schemes supported by + * the {@link DefaultDataAccessorRegistry} registry and {@link DefaultSubCrawlerRegistry} can be accessed and extracted. In most cases + * the resulting InputStream will be read in its entirety, and the method may try download the + * content into a temporary file (created with {@link File#createTempFile(String, String)}. The + * file will be deleted before this method returns, yet there must be enough room on the partition + * that houses the temporary folder. + * + * @return an {@link RDFContainer} instance containing the data extracted from the URL. It must be disposed + * properly by the caller of this method, with a call to {@link RDFContainer#dispose()}. This method + * may return null if an object with this URI has not been found. + * + * @throws IllegalArgumentException if the uriString is invalid + * @throws IOException if an I/O error occurs during processing + */ + public RDFContainer extractFrom(String uriString) throws IOException { + if (uriString == null) { + throw new NullPointerException("The URL cannot be null"); + } + RDFContainerFactory fac = new RDFContainerFactoryImpl(); + URI uri = new URIImpl(uriString); + DataObject obj = null; + if (SubCrawlerUtil.isSubcrawledObjectUri(uri)) { + URI topLevelUri = SubCrawlerUtil.getRootObjectUri(uri); + DataObject rootObj = null; + try { + rootObj = accessUri(topLevelUri,fac); + if (rootObj == null) { + return null; + } else if (!(rootObj instanceof FileDataObject)) { + return null; + } + FileDataObject fobj = (FileDataObject)rootObj; + InputStream contentStream = fobj.getContent(); + if (contentStream == null) { + return null; + } + obj = SubCrawlerUtil.getDataObject(uri, contentStream, null, null, null, new RDFContainerFactoryImpl(), subCrawlerRegistry); + } catch (Exception e) { + if (rootObj != null) { + rootObj.dispose(); + } + return null; + } + } else { + obj = accessUri(uri,fac); + } + + if (obj == null) { + return null; + } else if (!(obj instanceof FileDataObject)) { + RDFContainer containerToReturn = obj.getMetadata(); + safelyDispose(obj); + return containerToReturn; + } else { + FileDataObject fobj = (FileDataObject)obj; + InputStream stream = fobj.getContent(); + RDFContainer container = fobj.getMetadata(); + if (stream == null) { + return container; + } + String mimeType = identifyMimeType(stream, uri); + tryToApplyExtractors(stream, uri, container, mimeType); + safelyDispose(obj); + return container; + } + } + + /** + * Tries to extract as much information from the given input stream as possible. + * + * @param stream the stream from which information is to be extracted. In most cases the stream will be + * read in its entirety, so it will probably be unusable after this method returns. THIS METHOD + * may try to download the entire file to a temporary file + * @param nameOrPathOrUrl (obligatory) this argument has two purposes: firstly, the file name and + * extension will be extracted from it which in turn will be used to aid the MIME type + * detection. Secondly it will serve as the basis for the creation of the URI for the returned + * RDFContainer. If this argument is already a valid URI it will be used unchanged. Otherwise a + * new uri will be created, with a 'file:' prefix, and the name or path appended to that prefix, + * correctly URL-encoded. The process is heuristic, so in a general case the user should make + * no assumptions what will the URI exactly look like. It should always be checked by calling + * {@link RDFContainer#getDescribedUri()} on the returned {@link RDFContainer} instance. + * @return an {@link RDFContainer} instance containing the data extracted from the InputStream. + * @throws IOException if an I/O error occurs in the process. + */ + public RDFContainer extractFrom(InputStream stream, String nameOrPathOrUrl) throws IOException { + if (stream == null) { + throw new NullPointerException("stream cannot be null"); + } + + if (nameOrPathOrUrl == null) { + throw new NullPointerException("nameOrPathOrUrl cannot be null"); + } + + if (!stream.markSupported()) { + stream = new BufferedInputStream(stream); + } + + URI uri = tryToConvertToUri(nameOrPathOrUrl); + + Model model = RDF2Go.getModelFactory().createModel(); + model.open(); + RDFContainer container = new RDFContainerImpl(model,uri); + String mimeType = identifyMimeType(stream, uri); + tryToApplyExtractors(stream, uri, container, mimeType); + return container; + } + + /** + * Tries to extract as much information from the given file as possible. + * + * @param file the file from which information is to be extracted + * @return an {@link RDFContainer} instance containing the data extracted from the file. + * @throws IOException if an I/O error occurs in the process. + */ + public RDFContainer extractFrom(File file) throws IOException { + if (file == null) { + throw new NullPointerException("stream cannot be null"); + } + + if (!file.canRead()) { + throw new IOException("The file: " + file.getAbsolutePath() + " is unreadable"); + } + + InputStream stream = new FileInputStream(file); + return extractFrom(stream, file.toURI().toString()); + } + + public void extractAllText(InputStream stream, String nameOrPathOrUrl, TextProcessor processor) throws IOException, SubCrawlerException { + if (stream == null) { + throw new NullPointerException("stream cannot be null"); + } + + if (processor == null) { + throw new NullPointerException("processor cannot be null"); + } + + if (nameOrPathOrUrl == null) { + throw new NullPointerException("nameOrPathOrUrl cannot be null"); + } + + if (!stream.markSupported()) { + stream = new BufferedInputStream(stream); + } + + URI uri = tryToConvertToUri(nameOrPathOrUrl); + + Model model = RDF2Go.getModelFactory().createModel(); + model.open(); + RDFContainer container = new RDFContainerImpl(model,uri); + String mimeType = identifyMimeType(stream, uri); + boolean ok = tryToApplyExtractors(stream, uri, container, mimeType); + if (ok) { + String text = getFullText(container); + if (text != null) { + processor.process(uri, text); + } + container.dispose(); + } else { + Set subcrawlers = subCrawlerRegistry.get(mimeType); + if (!subcrawlers.isEmpty()) { + SubCrawlerFactory factory = (SubCrawlerFactory) subcrawlers.iterator().next(); + SubCrawler subcrawler = factory.get(); + subcrawler.subCrawl(uri, stream, new TextExtractingSubCrawlerHandler(processor), null, null, null, mimeType, container); + String text = getFullText(container); + if (text != null) { + processor.process(uri, text); + } + container.dispose(); + } + } + } + + private class TextExtractingSubCrawlerHandler implements SubCrawlerHandler { + + private TextProcessor processor; + public TextExtractingSubCrawlerHandler(TextProcessor processor) { + this.processor = processor; + } + public RDFContainerFactory getRDFContainerFactory(String url) {return factory;} + + public void objectNew(DataObject object) { + if (object instanceof FileDataObject) { + try { + InputStream contentStream = ((FileDataObject)object).getContent(); + extractAllText(contentStream, object.getID().toString(), processor); + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + object.dispose(); + } + + public void objectNotModified(String url) {} // can't happen + public void objectChanged(DataObject object) {} // can't happen + } + + /** + * Tries to convert a string into something resembling an URI. No guarantees are made. The goals were: + * <ul> + * <li>strings that already resemble uris are unchanged</li> + * <li>simple file names are converted to uris of the form uri:file+name.pdf. The name of the file is + * URL-encoded.</li> + * <li>file paths are converted to uri:C:/Documents+Settings/Antoni/My+Documents/file.pdf</li> + * </ul> + * + * Package access has been chosen to allow for unit testing. + * + * @param nameOrPathOrUrl + * @return + */ + URI tryToConvertToUri(String nameOrPathOrUrl) { + URI uri = null; + + // first replace backslashes into slashes, it won't affect proper URI's but will fix + // most of the normal windows paths + nameOrPathOrUrl = nameOrPathOrUrl.replaceAll("\\\\", "/"); + + // then see if the string begins with a uri scheme, this will also not affect proper + // URI's but will fix most of the paths and/or simple file names + if (!URI_START_PATTERN.matcher(nameOrPathOrUrl).matches()) { + nameOrPathOrUrl = "file:" + nameOrPathOrUrl; + } + + try { + // leave some typical URL characters, so that normal proper URL's will come unchanged from + // this treatment, + uri = new URIImpl(HttpClientUtil.formUrlEncode(nameOrPathOrUrl, "/:%!?&+.=")); + } + catch (Exception e) { + // something is wrong with the URI, let's apply proper formUriEncoding, it's plain impossible + // that uri:<url_encoded_string> may be considered invalid + uri = new URIImpl("file:" + HttpClientUtil.formUrlEncode(nameOrPathOrUrl)); + } + + return uri; + } + + /** + * Tries to identify the MIME type of the given input stream. + * + * @param stream the input stream you'd like to identify. If the stream supports mark() and reset(), it's + * {@link InputStream#markSupported()} method returns true, the stream is marked and then reset + * at the end. If not, then it's not. If you would like to use that stream afterwards, make + * sure it does support {@link InputStream#mark(int)} or wrap it in a + * {@link BufferedInputStream} if it doesn't. Closing the stream is YOUR responsiblity so make + * so remember about this. You can also leave this argument as NULL in which case the + * identifier will try to guess the MIME type from the file name or url. + * @param nameOrPathOrUrl an optional argument, that may help with checking. Many MIME types can be + * detected by looking at the extension of that file. It can be the name of the file, its path + * or an URL. + * + * @return a string with the MIME type or null if the MIME type has not been recognized + * @throws IOException if an I/O error occurs while reading from the stream + */ + public String identifyMimeType(InputStream stream, String nameOrPathOrUrl) throws IOException { + URI uri = null; + if (nameOrPathOrUrl != null) { + uri = tryToConvertToUri(nameOrPathOrUrl); + } + return identifyMimeType(stream, uri); + } + + private DataObject accessUri(URI topLevelUri, RDFContainerFactory fac) throws UrlNotFoundException, IOException { + int colonIndex = topLevelUri.toString().indexOf(":"); + if (colonIndex < 0) { + throw new IllegalArgumentException("The URI " + topLevelUri + "doesn't contain a colon"); + } + String scheme = topLevelUri.toString().substring(0, colonIndex); + Set dafSet = accessorRegistry.get(scheme); + if (dafSet == null || dafSet.isEmpty()) { + return null; + } + DataAccessorFactory daf = (DataAccessorFactory)dafSet.iterator().next(); + if (daf == null) { + return null; + } + DataAccessor da = daf.get(); + DataObject dob = da.getDataObject(topLevelUri.toString(), null, null, fac); + return dob; + } + + + + private boolean tryToApplyExtractors(InputStream stream, URI uri, RDFContainer container, String mimeType) throws IOException { + boolean ok = false; + try { + ok = applyExtractor(uri, stream, mimeType, container); + if (ok) { + return true; + } + + ok = applyFileExtractor(uri, stream, mimeType, container); + if (ok) { + return true; + } + } + catch (Exception e) { // this should cover both ExtractorExceptions and IOExceptions + logger.warn("Couldn't extract information from: " + uri.toString(), e); + } + return false; + } + + private String identifyMimeType(InputStream stream, URI uri) throws IOException { + byte[] bytes = null; + if (stream != null) { + int minimumArrayLength = identifier.getMinArrayLength(); + if (stream.markSupported()) { + stream.mark(minimumArrayLength + 10); // add some for safety + } + bytes = IOUtil.readBytes(stream, minimumArrayLength); + if (stream.markSupported()) { + stream.reset(); + } + } + return identifier.identify(bytes, null, uri); + } + + @SuppressWarnings("unchecked") + private boolean applyExtractor(URI id, InputStream contentStream, String mimeType, RDFContainer metadata) + throws ExtractorException { + Set extractors = extractorRegistry.getExtractorFactories(mimeType); + if (!extractors.isEmpty()) { + ExtractorFactory factory = (ExtractorFactory) extractors.iterator().next(); + Extractor extractor = factory.get(); + extractor.extract(id, contentStream, null, mimeType, metadata); + return true; + } + else { + return false; + } + } + + @SuppressWarnings("unchecked") + private boolean applyFileExtractor(URI id, InputStream stream, String mimeType, RDFContainer metadata) + throws ExtractorException, IOException { + Set fileextractors = extractorRegistry.getFileExtractorFactories(mimeType); + if (!fileextractors.isEmpty()) { + FileExtractorFactory factory = (FileExtractorFactory) fileextractors.iterator().next(); + FileExtractor extractor = factory.get(); + File tempFile = null; + try { + tempFile = File.createTempFile("aperture", "tmp"); + IOUtil.writeStream(stream, tempFile); + extractor.extract(id, tempFile, null, mimeType, metadata); + return true; + } + finally { + if (tempFile != null) { + tempFile.delete(); + } + } + } + else { + return false; + } + } + + private void closeClosable(InputStream stream) { + if (stream != null) { + try { + stream.close(); + } catch (Exception e) { + // nothing to be done... + } + } + } + + private String getFullText(RDFContainer container) { + String string = container.getString(NIE.plainTextContent); + if (string == null) { + string = container.getString(org.semanticdesktop.aperture.vocabulary.NMO.plainTextMessageContent); + if (string == null) { + string = container.getString(NID3.unsynchronizedTextContent); + } + } + return string; + } + + /** + * This is a hack that allows us to return the metadata RDFContainers while their + * parent data objects are disposed properly and don't cause warnings + * @param ob + */ + private void safelyDispose(DataObject ob) { + DataObjectBase obj = (DataObjectBase)ob; + obj.setMetadata(new RDFContainerImpl(null, RDF.Bag, true)); + obj.dispose(); + } + + /** + * Returns the extractor registry. + * @return the extractor registry. + */ + public ExtractorRegistry getExtractorRegistry() { + return this.extractorRegistry; + } + + /** + * Returns the mime type identifier. + * @return the mime type identifier. + */ + public MimeTypeIdentifier getMimeTypeIdentifier() { + return this.identifier; + } + +} Property changes on: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java (rev 0) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java 2010-07-05 21:43:10 UTC (rev 2381) @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; +import org.semanticdesktop.aperture.util.ExpandedApertureRuntime.TextProcessor; + +/** + * @author Antoni + * + */ +public class ExpandedApertureRuntimeMain { + + public static void main(String [] args) throws IOException, SubCrawlerException { + if (args.length < 1) { + usage(); + } + File file = new File(args[0]); + FileInputStream st = new FileInputStream(file); + ExpandedApertureRuntime rt = new ExpandedApertureRuntime(); + rt.extractAllText(st, file.toURI().toString(), new TextProcessor() { + + public void process(URI uri, String text) { + System.out.println("----------------------"); + System.out.println(uri.toString()); + System.out.println("----------------------"); + System.out.println(text); + System.out.println("----------------------"); + } + }); + + } + + private static void usage() { + System.out.println("Usage: java -cp <classpath> <package>.ExpandedApertureRuntimeMain <fileToCrawl>"); + System.exit(-1); + } +} Property changes on: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-08-10 18:04:20
|
Revision: 2407 http://aperture.svn.sourceforge.net/aperture/?rev=2407&view=rev Author: mylka Date: 2010-08-10 18:04:14 +0000 (Tue, 10 Aug 2010) Log Message: ----------- [3035317] applied the patch to ExpandedApertureRuntime submitted by cbamford Modified Paths: -------------- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/accessor/impl/defaults.xml aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java Modified: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/accessor/impl/defaults.xml =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/accessor/impl/defaults.xml 2010-08-10 10:47:41 UTC (rev 2406) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/accessor/impl/defaults.xml 2010-08-10 18:04:14 UTC (rev 2407) @@ -5,6 +5,7 @@ <name>org.semanticdesktop.aperture.accessor.file.FileAccessorFactory</name> </dataAccessorFactory> <dataAccessorFactory> - <name>dfki.km.medico.aperture.accessor.webdav.WebdavAccessorFactory</name> + <!--name>dfki.km.medico.aperture.accessor.webdav.WebdavAccessorFactory</name --> + <name>org.semanticdesktop.aperture.webdav.accessor.WebdavAccessorFactory</name> </dataAccessorFactory> </dataAccessorFactories> \ No newline at end of file Modified: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java 2010-08-10 10:47:41 UTC (rev 2406) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntime.java 2010-08-10 18:04:14 UTC (rev 2407) @@ -211,7 +211,7 @@ return extractFrom(stream, file.toURI().toString()); } - public void extractAllText(InputStream stream, String nameOrPathOrUrl, TextProcessor processor) throws IOException, SubCrawlerException { + public void extractAllText(InputStream stream, String nameOrPathOrUrl, TextProcessor processor, String mimeType) throws IOException, SubCrawlerException { if (stream == null) { throw new NullPointerException("stream cannot be null"); } @@ -233,7 +233,9 @@ Model model = RDF2Go.getModelFactory().createModel(); model.open(); RDFContainer container = new RDFContainerImpl(model,uri); - String mimeType = identifyMimeType(stream, uri); + if (mimeType == null) { + mimeType = identifyMimeType(stream, uri); + } boolean ok = tryToApplyExtractors(stream, uri, container, mimeType); if (ok) { String text = getFullText(container); @@ -268,7 +270,7 @@ if (object instanceof FileDataObject) { try { InputStream contentStream = ((FileDataObject)object).getContent(); - extractAllText(contentStream, object.getID().toString(), processor); + extractAllText(contentStream, object.getID().toString(), processor, null); } catch (RuntimeException e) { throw e; } catch (Exception e) { Modified: aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java =================================================================== --- aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java 2010-08-10 10:47:41 UTC (rev 2406) +++ aperture-addons/trunk/src/main/java/org/semanticdesktop/aperture/util/ExpandedApertureRuntimeMain.java 2010-08-10 18:04:14 UTC (rev 2407) @@ -36,7 +36,7 @@ System.out.println(text); System.out.println("----------------------"); } - }); + }, null); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |