From: <my...@us...> - 2010-05-26 16:46:10
|
Revision: 2345 http://aperture.svn.sourceforge.net/aperture/?rev=2345&view=rev Author: mylka Date: 2010-05-26 16:46:03 +0000 (Wed, 26 May 2010) Log Message: ----------- [2932901] added proper isPartOf links between mbox emails and the mbox file in the mbox subcrawler Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java 2010-05-26 16:38:56 UTC (rev 2344) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java 2010-05-26 16:46:03 UTC (rev 2345) @@ -26,6 +26,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; import org.semanticdesktop.aperture.subcrawler.base.AbstractSubCrawler; +import org.semanticdesktop.aperture.vocabulary.NIE; /** * <p> @@ -56,9 +57,10 @@ MimeMessage message = new MimeMessage(null,new ByteArrayInputStream(bytes)); URI attachmentUriPrefix = createChildUri(parentMetadata.getDescribedUri(), MailUtil.getMessageId(message)); RDFContainerFactory myFac = handler.getRDFContainerFactory(parentMetadata.getDescribedUri().toString()); - fac = new DataObjectFactory(message,myFac,null,dataSource,attachmentUriPrefix,null,"/",attachmentUriPrefix.toString()); + fac = new DataObjectFactory(message,myFac,null,dataSource, + attachmentUriPrefix,null,"/",attachmentUriPrefix.toString()); DataObject object = null; - + boolean first = true; /* * Note that the stopRequested check is BEFORE getObject(). Otherwise if the crawler is stopped * the object is obtained and only AFTER this loop is stopped. This object is not disposed by the @@ -70,6 +72,12 @@ // first of all get a string version of the message uri String queuedUri = object.getID().toString(); + if (first) { + object.getMetadata().add(NIE.isPartOf,parentMetadata.getDescribedUri()); + first = false; + } + + /* * See if this url has been accessed before so that we can stop immediately. Note that no * check on message date is done as messages are immutable. Therefore we only have to check @@ -106,6 +114,8 @@ * added to the objects metadata RDFContainer */ MailUtil.registerParentRelationshipInAccessData(object, accessData); + + /* * Report this object as a new object (assumption: objects are always new, never changed, Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java 2010-05-26 16:38:56 UTC (rev 2344) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java 2010-05-26 16:46:03 UTC (rev 2345) @@ -11,6 +11,7 @@ import org.ontoware.rdf2go.exception.ModelException; import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.accessor.AccessData; import org.semanticdesktop.aperture.accessor.DataObject; @@ -23,6 +24,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NIE; /** * Tests for the {@link MboxSubCrawler} @@ -143,13 +145,21 @@ SubCrawler accessor = new MboxSubCrawler(); for (String ob : handler.getNewObjects()) { + URI obUri = new URIImpl(ob); InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mbox-testfolder", getClass()); DataObject obj = accessor.getDataObject(new URIImpl("uri:dummyuri"), - SubCrawlerUtil.getSubCrawledObjectPath(new URIImpl(ob)), stream, null,null,null, + SubCrawlerUtil.getSubCrawledObjectPath(obUri), stream, null,null,null, new RDFContainerFactoryImpl()); assertDataObjectOK(obj); obj.dispose(); stream.close(); + if (!ob.endsWith("/1")) { + // emails should have isPartOf links to their parent files + checkStatement(obUri, NIE.isPartOf, new URIImpl("uri:dummyuri"), handler.getModel()); + } else { + // attachments shouldn't + assertFalse(handler.getModel().contains(obUri, NIE.isPartOf, new URIImpl("uri:dummyuri"))); + } } handler.close(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |