From: <my...@us...> - 2010-04-06 13:39:46
|
Revision: 2317 http://aperture.svn.sourceforge.net/aperture/?rev=2317&view=rev Author: mylka Date: 2010-04-06 13:39:38 +0000 (Tue, 06 Apr 2010) Log Message: ----------- fixes in VcardSubCrawler (support missing N property components, support relative URL property values, add proper isPartOf links to everythign) and in IcalCrawler (add proper isPartOf links to everythign, both in normal and attachmentsOnlyMode) Modified Paths: -------------- aperture/trunk/core/pom.xml aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/TestBasicSubCrawlerHandler.java Added Paths: ----------- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-illegalurl.vcf aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-incompletenproperty.vcf Modified: aperture/trunk/core/pom.xml =================================================================== --- aperture/trunk/core/pom.xml 2010-04-02 17:54:09 UTC (rev 2316) +++ aperture/trunk/core/pom.xml 2010-04-06 13:39:38 UTC (rev 2317) @@ -277,7 +277,7 @@ <dependency> <groupId>net.fortuna.ical4j</groupId> <artifactId>ical4j-vcard</artifactId> - <version>0.9.3.ant20100401</version> + <version>0.9.3.ant20100406</version> <exclusions> <exclusion> <groupId>commons-io</groupId> Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-04-02 17:54:09 UTC (rev 2316) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-04-06 13:39:38 UTC (rev 2317) @@ -141,6 +141,8 @@ private SubCrawlerHandler subCrawlerHandler; private boolean attachmentsOnlyMode; + + private Resource calendarResource; /** Default constructor. */ public IcalCrawler() { @@ -227,6 +229,7 @@ */ parentMetadata.add(RDF.type, NCAL.Calendar); parentMetadata.add(NIE.plainTextContent, new String(bytes, Charset.forName("UTF-8"))); + this.calendarResource = parentMetadata.getDescribedUri(); builder = new CalendarBuilder(); calendar = builder.build(new ByteArrayInputStream(bytes)); @@ -1263,6 +1266,13 @@ RDFContainer attachmentContainer = prepareDataObjectRDFContainer(attachmentUri); addStatement(rdfContainer, parentNode, NCAL.attach, attachmentUri); addStatement(attachmentContainer, attachmentUri, RDF.type, NCAL.Attachment); + + if (attachmentsOnlyMode) { + addStatement(attachmentContainer, attachmentUri, NIE.isPartOf, calendarResource); + } else { + addStatement(attachmentContainer, attachmentUri, NIE.isPartOf, parentNode); + } + crawlParameterList(property, attachmentContainer); passAttachmentToHandler(attachmentContainer, attach.getBinary()); Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-04-02 17:54:09 UTC (rev 2316) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-04-06 13:39:38 UTC (rev 2317) @@ -259,6 +259,7 @@ processContact(contact, container, contactUri, handler, accessData, source, out); parentMetadata.add(NCO.containsContact, contactUri); container.add(RDF.type, NCO.ContactListDataObject); + container.add(NIE.isPartOf, parentMetadata.getDescribedUri()); passMetadataToHandler(container, handler, contactHash, accessData, source); } catch (Exception e) { logger.warn("Failed to process vcard",e); @@ -341,7 +342,7 @@ for (int i = 0; i < length(name.getAdditionalNames()); i++) { String addName = name.getAdditionalNames()[i]; - model.addStatement(contactResource, NCO.nameAdditional, addName); + addStringProperty(model, contactResource, NCO.nameAdditional, addName); } BDay bday = (BDay)vc.getProperty(Id.BDAY); if (bday != null) { @@ -751,7 +752,8 @@ RDFContainerFactory factory = handler.getRDFContainerFactory(attachmentUri.toString()); RDFContainer container = factory.getRDFContainer(attachmentUri); parentMetadata.add(property, attachmentUri); - container.add(RDF.type, NFO.Audio); + container.add(NIE.isPartOf, contactResource); + container.add(RDF.type, NFO.Audio); container.add(RDF.type, NFO.Attachment); addStringProperty(container.getModel(), container.getDescribedUri(), NIE.mimeType, mimeType); passAttachmentToHandler(container, handler, attachmentHash, accessData, source, bytes); @@ -766,6 +768,7 @@ RDFContainer container = factory.getRDFContainer(attachmentUri); parentMetadata.add(property, attachmentUri); container.add(RDF.type, NIE.InformationElement); + container.add(NIE.isPartOf, contactResource); container.add(RDF.type, NFO.Attachment); addStringProperty(container.getModel(), container.getDescribedUri(), NIE.mimeType, mimeType); passAttachmentToHandler(container, handler, attachmentHash, accessData, source, bytes); @@ -784,11 +787,23 @@ } } - private void addUriProperty(Model model, Resource contactResource, URI property, String uriString) { + private void addUriProperty(Model model, Resource contactResource, URI property, String uriString) { if (uriString != null) { - URI uri = model.createURI(uriString); - model.addStatement(contactResource, property, uri); - model.addStatement(uri, RDF.type, RDFS.Resource); + if (uriString.startsWith("www.")) { + // a little fallback, for a very common case of an uri + // that starts with www. but doesn't contain the http:// prefix + // this causes an error on some model implementations that require + // absolute URIs + uriString = "http://" + uriString; + } + URI uri = null; + try { + uri = model.createURI(uriString); + model.addStatement(contactResource, property, uri); + model.addStatement(uri, RDF.type, RDFS.Resource); + } catch (Exception e) { + logger.warn("Couldn't add an uri property",e); + } } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-04-02 17:54:09 UTC (rev 2316) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-04-06 13:39:38 UTC (rev 2317) @@ -7,18 +7,22 @@ package org.semanticdesktop.aperture.subcrawler.vcard; import java.io.InputStream; +import java.net.URISyntaxException; import java.util.Iterator; import java.util.Set; import org.ontoware.aifbcommons.collection.ClosableIterator; +import org.ontoware.rdf2go.exception.ModelRuntimeException; import org.ontoware.rdf2go.model.Model; import org.ontoware.rdf2go.model.QueryResultTable; import org.ontoware.rdf2go.model.QueryRow; import org.ontoware.rdf2go.model.Statement; +import org.ontoware.rdf2go.model.impl.DelegatingModel; import org.ontoware.rdf2go.model.node.Node; import org.ontoware.rdf2go.model.node.NodeOrVariable; import org.ontoware.rdf2go.model.node.Resource; import org.ontoware.rdf2go.model.node.ResourceOrVariable; +import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.UriOrVariable; import org.ontoware.rdf2go.model.node.Variable; import org.ontoware.rdf2go.model.node.impl.PlainLiteralImpl; @@ -26,9 +30,11 @@ import org.ontoware.rdf2go.vocabulary.RDF; import org.semanticdesktop.aperture.accessor.AccessData; import org.semanticdesktop.aperture.accessor.base.AccessDataImpl; +import org.semanticdesktop.aperture.extractor.ExtractorRegistry; import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerRegistry; import org.semanticdesktop.aperture.test.subcrawler.SubCrawlerTestBase; import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; import org.semanticdesktop.aperture.util.DateUtil; @@ -139,18 +145,89 @@ assertNewModUnmod(handler, 1, 0, 0); assertTrue(metadata.getString(NIE.plainTextContent).contains("ORG:AM Services;Police")); - metadata.getModel().dump(); assertTrue(metadata.getModel().contains( new URIImpl("vcard:uri:dummyuri!/d6bb8c38b78663b2aeef0b30538968660caf95c9"), new URIImpl("http://www.semanticdesktop.org/ontologies/2007/01/19/nie#mimeType"), new PlainLiteralImpl("image/jpeg"))); + // we need a proper isPartOf triple + assertTrue(metadata.getModel().contains( + new URIImpl("vcard:uri:dummyuri!/d6bb8c38b78663b2aeef0b30538968660caf95c9"), + NIE.isPartOf, + metadata.getDescribedUri())); + validate(metadata); metadata.dispose(); metadata = null; } + /** + * A line like this: + * + * <pre> + * N;LANGUAGE=en-au:Mylka;Antoni;;Mr. + * </pre> + * + * Should yield a proper N property and not a parse error due to a + * missing semicolong at the end + * + * @throws Exception + */ + public void testMissingNComponents() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-incompletenproperty.vcf", subCrawler); + // no additional data objects + assertNewModUnmod(handler, 0, 0, 0); + + assertTrue(metadata.getString(NIE.plainTextContent).contains("NOTE;ENCODING=QUOTED-PRINTABLE:Antoni Mylka from Aperture project=0D=0A")); + + checkStatement(NCO.nameFamily, "Mylka", metadata); + checkStatement(NCO.nameGiven, "Antoni", metadata); + + assertNull(metadata.getString(NCO.nameAdditional)); + checkStatement(NCO.nameHonorificPrefix, "Mr.", metadata); + assertNull(metadata.getString(NCO.nameHonorificSuffix)); + + validate(metadata); + metadata.dispose(); + metadata = null; + } + + /** + * A line like this: + * + * <pre> + * URL;WORK:www.mylka.pl + * </pre> + * + * Should yield a proper URI and not a parse error. The problem is that some + * {@link Model} implementations accept 'www.mylka.pl' as a proper URI and some + * don't. The {@link VcardSubCrawler} should accommodate for those that don't. + * + * The case with an URL property beginning with www. is very common and could + * be treated specially. + * + * In order to enforce uri checking we hacked a little special RDFContainerFactory. + * + * Viva la anonymous inner clases. + * + * @throws Exception + */ + public void testIllegalURLProperty() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + + metadata = subCrawl(DOCS_PATH + "vcard-illegalurl.vcf", subCrawler); + assertNewModUnmod(handler, 0, 0, 0); + assertTrue(metadata.getString(NIE.plainTextContent).contains("URL;WORK:www.mylka.pl")); + + checkStatement(NCO.url, new URIImpl("http://www.mylka.pl"), metadata); + + validate(metadata); + metadata.dispose(); + metadata = null; + } + public void testKontactExampleExtraction() throws Exception { VcardSubCrawler subCrawler = new VcardSubCrawler(); metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); @@ -216,6 +293,7 @@ for (int i = 0; i < 30; i++) { String st = id.next(); assertTrue(st.startsWith("vcard:uri:dummyuri!/")); + assertEquals(metadata.getDescribedUri(),findSingleObjectResource(metadata.getModel(), new URIImpl(st), NIE.isPartOf)); } metadata.dispose(); metadata = null; Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/TestBasicSubCrawlerHandler.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/TestBasicSubCrawlerHandler.java 2010-04-02 17:54:09 UTC (rev 2316) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/TestBasicSubCrawlerHandler.java 2010-04-06 13:39:38 UTC (rev 2317) @@ -75,7 +75,7 @@ initialize(exReg,subReg); } - private void initialize(ExtractorRegistry registry, SubCrawlerRegistry subCrawlerReg) { + protected void initialize(ExtractorRegistry registry, SubCrawlerRegistry subCrawlerReg) { model = RDF2Go.getModelFactory().createModel(); model.open(); newObjects = new HashSet<String>(); Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-illegalurl.vcf =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-illegalurl.vcf (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-illegalurl.vcf 2010-04-06 13:39:38 UTC (rev 2317) @@ -0,0 +1,10 @@ +BEGIN:VCARD +VERSION:2.1 +N;LANGUAGE=en-au:Mylka;Antoni;;Mr. +FN:Antoni Mylka +ADR;WORK:;Szczecin +URL;WORK:www.mylka.pl +NOTE;ENCODING=QUOTED-PRINTABLE:Antoni Mylka from Aperture project=0D=0A +EMAIL;PREF;INTERNET:an...@my... +REV:20071109T104602Z +END:VCARD Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-incompletenproperty.vcf =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-incompletenproperty.vcf (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-incompletenproperty.vcf 2010-04-06 13:39:38 UTC (rev 2317) @@ -0,0 +1,9 @@ +BEGIN:VCARD +VERSION:2.1 +N;LANGUAGE=en-au:Mylka;Antoni;;Mr. +FN:Antoni Mylka +ADR;WORK:;Szczecin +NOTE;ENCODING=QUOTED-PRINTABLE:Antoni Mylka from Aperture project=0D=0A +EMAIL;PREF;INTERNET:an...@my... +REV:20071109T104602Z +END:VCARD This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |