From: <my...@us...> - 2010-03-12 10:52:58
|
Revision: 2295 http://aperture.svn.sourceforge.net/aperture/?rev=2295&view=rev Author: mylka Date: 2010-03-12 10:52:52 +0000 (Fri, 12 Mar 2010) Log Message: ----------- 2969249 - added a new method, that extracts information from a URL, with the help of accessors. This fixes the problem because HTTPAccessor reads the Charset from HTTP headers, something which the normal HtmlExtractor can't Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/runtime/ApertureRuntime.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/runtime/ApertureRuntimeTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/runtime/ApertureRuntime.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/runtime/ApertureRuntime.java 2010-03-12 09:02:32 UTC (rev 2294) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/runtime/ApertureRuntime.java 2010-03-12 10:52:52 UTC (rev 2295) @@ -11,6 +11,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.MalformedURLException; import java.util.Set; import java.util.regex.Pattern; @@ -18,6 +19,15 @@ import org.ontoware.rdf2go.model.Model; import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.accessor.DataAccessor; +import org.semanticdesktop.aperture.accessor.DataAccessorFactory; +import org.semanticdesktop.aperture.accessor.DataAccessorRegistry; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.FileDataObject; +import org.semanticdesktop.aperture.accessor.RDFContainerFactory; +import org.semanticdesktop.aperture.accessor.UrlNotFoundException; +import org.semanticdesktop.aperture.accessor.base.RDFContainerFactoryImpl; +import org.semanticdesktop.aperture.accessor.impl.DefaultDataAccessorRegistry; import org.semanticdesktop.aperture.extractor.Extractor; import org.semanticdesktop.aperture.extractor.ExtractorException; import org.semanticdesktop.aperture.extractor.ExtractorFactory; @@ -29,6 +39,9 @@ import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerRegistry; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; +import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; import org.semanticdesktop.aperture.util.HttpClientUtil; import org.semanticdesktop.aperture.util.IOUtil; import org.slf4j.Logger; @@ -44,15 +57,124 @@ private Logger logger = LoggerFactory.getLogger(ApertureRuntime.class); + private DataAccessorRegistry accessorRegistry; + private SubCrawlerRegistry subCrawlerRegistry; private ExtractorRegistry extractorRegistry; private MimeTypeIdentifier identifier; public ApertureRuntime() { this.extractorRegistry = new DefaultExtractorRegistry(); this.identifier = new MagicMimeTypeIdentifier(); + this.accessorRegistry = new DefaultDataAccessorRegistry(); + this.subCrawlerRegistry = new DefaultSubCrawlerRegistry(); } + + /** + * Tries to extract as much information from the given URI as possible. + * + * @param uri the uri from which information is to be extracted. Only the URU with schemes supported by + * the {@link DefaultDataAccessorRegistry} registry and {@link DefaultSubCrawlerRegistry} can be accessed and extracted. In most cases + * the resulting InputStream will be read in its entirety, and the method may try download the + * content into a temporary file (created with {@link File#createTempFile(String, String)}. The + * file will be deleted before this method returns, yet there must be enough room on the partition + * that houses the temporary folder. + * + * @return an {@link RDFContainer} instance containing the data extracted from the URL. It must be disposed + * properly by the caller of this method, with a call to {@link RDFContainer#dispose()}. This method + * may return null if an object with this URI has not been found. + * + * @throws IllegalArgumentException if the uriString is invalid + * @throws IOException if an I/O error occurs during processing + */ + public RDFContainer extractFrom(String uriString) throws IOException { + if (uriString == null) { + throw new NullPointerException("The URL cannot be null"); + } + RDFContainerFactory fac = new RDFContainerFactoryImpl(); + URI uri = new URIImpl(uriString); + DataObject obj = null; + if (SubCrawlerUtil.isSubcrawledObjectUri(uri)) { + URI topLevelUri = SubCrawlerUtil.getRootObjectUri(uri); + DataObject rootObj = null; + try { + rootObj = accessUri(topLevelUri,fac); + if (rootObj == null) { + return null; + } else if (!(rootObj instanceof FileDataObject)) { + return null; + } + FileDataObject fobj = (FileDataObject)rootObj; + InputStream contentStream = fobj.getContent(); + if (contentStream == null) { + return null; + } + obj = SubCrawlerUtil.getDataObject(uri, contentStream, null, null, null, new RDFContainerFactoryImpl(), subCrawlerRegistry); + } catch (Exception e) { + if (rootObj != null) { + rootObj.dispose(); + } + return null; + } + } else { + obj = accessUri(uri,fac); + } + + if (obj == null) { + return null; + } else if (!(obj instanceof FileDataObject)) { + return obj.getMetadata(); + } else { + FileDataObject fobj = (FileDataObject)obj; + InputStream stream = fobj.getContent(); + RDFContainer container = fobj.getMetadata(); + if (stream == null) { + return container; + } + tryToApplyExtractors(stream, uri, container); + return container; + } + } + + private void tryToApplyExtractors(InputStream stream, URI uri, RDFContainer container) throws IOException { + boolean ok = false; + try { + String mimeType = identifyMimeType(stream, uri); + + ok = applyExtractor(uri, stream, mimeType, container); + if (ok) { + return; + } + + ok = applyFileExtractor(uri, stream, mimeType, container); + if (ok) { + return; + } + } + catch (Exception e) { // this should cover both ExtractorExceptions and IOExceptions + logger.warn("Couldn't extract information from: " + uri.toString(), e); + } + } - /** + private DataObject accessUri(URI topLevelUri, RDFContainerFactory fac) throws UrlNotFoundException, IOException { + int colonIndex = topLevelUri.toString().indexOf(":"); + if (colonIndex < 0) { + throw new IllegalArgumentException("The URI " + topLevelUri + "doesn't contain a colon"); + } + String scheme = topLevelUri.toString().substring(0, colonIndex); + Set dafSet = accessorRegistry.get(scheme); + if (dafSet == null || dafSet.isEmpty()) { + return null; + } + DataAccessorFactory daf = (DataAccessorFactory)dafSet.iterator().next(); + if (daf == null) { + return null; + } + DataAccessor da = daf.get(); + DataObject dob = da.getDataObject(topLevelUri.toString(), null, null, fac); + return dob; + } + + /** * Tries to extract as much information from the given input stream as possible. * * @param stream the stream from which information is to be extracted. In most cases the stream will be Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/runtime/ApertureRuntimeTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/runtime/ApertureRuntimeTest.java 2010-03-12 09:02:32 UTC (rev 2294) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/runtime/ApertureRuntimeTest.java 2010-03-12 10:52:52 UTC (rev 2295) @@ -81,6 +81,22 @@ FileUtil.deltree(tmpDir); } } + + public void testGetInfoFromFileUrl() throws Exception { + File tmpDir = new File(System.getProperty("java.io.tmpdir"), "ApertureRuntimeTest.tmpDir").getCanonicalFile(); + try { + FileUtil.deltree(tmpDir); + tmpDir.mkdir(); + checkFullTextFromFileUrl(tmpDir,"plain-text.txt", "normal plain text"); + checkFullTextFromFileUrl(tmpDir,"plain-text-without-extension", "normal plain text"); + checkFullTextFromFileUrl(tmpDir,"html-handwritten.html", "example text."); + checkFullTextFromFileUrl(tmpDir,"xml-handwritten.xml", "handwritten XML"); + checkFullTextFromFileUrl(tmpDir,"rtf-word-2000.rtf", "example RTF"); + checkMP3FromUrl(tmpDir,"jingle3.mp3","The Aperture test album"); + } finally { + FileUtil.deltree(tmpDir); + } + } private void checkFullTextFromStream(String filename, String fullTextPart) throws IOException { InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + filename, ApertureTestBase.class); @@ -100,6 +116,16 @@ cont.dispose(); } + private void checkFullTextFromFileUrl(File tempFolder, String filename, String fullTextPart) throws IOException { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + filename, ApertureTestBase.class); + File newFile = new File(tempFolder,filename); + IOUtil.writeStream(stream, newFile); + RDFContainer cont = ar.extractFrom(newFile.toURI().toString()); + String text = cont.getString(NIE.plainTextContent); + assertTrue(text.contains(fullTextPart)); + cont.dispose(); + } + private void checkMP3(File tempFolder, String filename, String album) throws IOException { InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + filename, ApertureTestBase.class); File newFile = new File(tempFolder,filename); @@ -110,6 +136,16 @@ cont.dispose(); } + private void checkMP3FromUrl(File tempFolder, String filename, String album) throws IOException { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + filename, ApertureTestBase.class); + File newFile = new File(tempFolder,filename); + IOUtil.writeStream(stream, newFile); + RDFContainer cont = ar.extractFrom(newFile.toURI().toString()); + String fileAlbum = cont.getString(NID3.albumTitle); + assertEquals(fileAlbum,album); + cont.dispose(); + } + private void checkMimeType(String filename, String mimetype) throws IOException { assertEquals(mimetype, ar.identifyMimeType( ResourceUtil.getInputStream(DOCS_PATH + filename, ApertureTestBase.class),null)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-03-25 09:12:21
|
Revision: 2299 http://aperture.svn.sourceforge.net/aperture/?rev=2299&view=rev Author: mylka Date: 2010-03-25 09:12:14 +0000 (Thu, 25 Mar 2010) Log Message: ----------- a little fix in the MagicMimeTypeIdentifier that makes it work with URIs produced by crawlers that use a different subcrawled uri convention Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/mime/identifier/magic/MagicMimeTypeIdentifier.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/TestMagicMimeTypeIdentifier.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/mime/identifier/magic/MagicMimeTypeIdentifier.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/mime/identifier/magic/MagicMimeTypeIdentifier.java 2010-03-23 19:14:24 UTC (rev 2298) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/mime/identifier/magic/MagicMimeTypeIdentifier.java 2010-03-25 09:12:14 UTC (rev 2299) @@ -372,13 +372,6 @@ } public String identify(byte[] firstBytes, String fileName, URI uri) { - // Knowledge we gained through the years: - if (fileName != null && fileName.toLowerCase().endsWith(".pdf.part")) - { - logger.debug("File "+fileName+" is considered as a part, therefore faulty. No mimetype returned."); - return null; - } - // see if the file is some kind of UTF file char[] firstChars = null; byte[] realBom = null; @@ -430,8 +423,10 @@ if (extension == null && uri != null) { extension = uri.toString(); - if (!uri.toString().contains("!")) { + if (!extension.contains("!/") && !extension.contains("?/")) { // don't remove the query on a subcrawled uri + // the ?/ occurs in uris produced by some crawlers + // even though it is against the convention extension = removeFragment('?', extension); extension = removeFragment('#', extension); } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/TestMagicMimeTypeIdentifier.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/TestMagicMimeTypeIdentifier.java 2010-03-23 19:14:24 UTC (rev 2298) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/TestMagicMimeTypeIdentifier.java 2010-03-25 09:12:14 UTC (rev 2299) @@ -6,6 +6,7 @@ */ package org.semanticdesktop.aperture.mime.identifier.magic; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -136,6 +137,24 @@ String determinedType = identifier.identify(bytes, null, new URIImpl(uri)); assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml", determinedType); } + + /** + * Some crawlers generate incorrect subcrawled URIs that use the question mark, instead of + * an exclamation mark to separate the top-level uri from the internal path. + * + * @throws Exception + */ + public void testQuestionMarkSubCrawledURI() throws Exception { + MagicMimeTypeIdentifierFactory factory = new MagicMimeTypeIdentifierFactory(); + MimeTypeIdentifier identifier = factory.get(); + // let's create some weird file that will definitely not be recognized + byte [] bytes = new byte [] {127,127,127,127,127,127,127,127,127,127,127,127}; + String uri = "pff:S:%5cwork%5ccompany%5cpst%5cinvalid_mime_test.pst?/1/1/1#1"; + // this uri should be identified as NULL, and not fallback to extension-based + // identification, because PST is not the extension of the object + String determinedType = identifier.identify(bytes, null, new URIImpl(uri)); + assertNull(determinedType); + } private void checkMimeType(String resourceName, String mimeType, MimeTypeIdentifier identifier) throws IOException { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-03-26 15:49:32
|
Revision: 2300 http://aperture.svn.sourceforge.net/aperture/?rev=2300&view=rev Author: mylka Date: 2010-03-26 15:49:26 +0000 (Fri, 26 Mar 2010) Log Message: ----------- made the vcard subcrawler report the photos and sounds as separate data objects Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/SubCrawlerTestBase.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-03-25 09:12:14 UTC (rev 2299) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-03-26 15:49:26 UTC (rev 2300) @@ -6,11 +6,13 @@ */ package org.semanticdesktop.aperture.subcrawler.vcard; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.io.StringWriter; import java.nio.charset.Charset; import java.util.Date; import java.util.List; @@ -33,12 +35,14 @@ import net.fortuna.ical4j.vcard.property.BDay; import net.fortuna.ical4j.vcard.property.Email; import net.fortuna.ical4j.vcard.property.Geo; +import net.fortuna.ical4j.vcard.property.Key; import net.fortuna.ical4j.vcard.property.Logo; import net.fortuna.ical4j.vcard.property.N; import net.fortuna.ical4j.vcard.property.Nickname; import net.fortuna.ical4j.vcard.property.Org; import net.fortuna.ical4j.vcard.property.Photo; import net.fortuna.ical4j.vcard.property.Revision; +import net.fortuna.ical4j.vcard.property.Sound; import net.fortuna.ical4j.vcard.property.Telephone; import org.apache.commons.codec.DecoderException; @@ -53,12 +57,14 @@ import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.RDFContainerFactory; import org.semanticdesktop.aperture.accessor.base.DataObjectBase; +import org.semanticdesktop.aperture.accessor.base.FileDataObjectBase; import org.semanticdesktop.aperture.datasource.DataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.util.ModelUtil; import org.semanticdesktop.aperture.subcrawler.SubCrawler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; import org.semanticdesktop.aperture.subcrawler.base.AbstractSubCrawler; import org.semanticdesktop.aperture.util.DateUtil; import org.semanticdesktop.aperture.util.StringUtil; @@ -71,13 +77,13 @@ import org.slf4j.LoggerFactory; /** - * An Extractor Implementation working with VCard documents. + * A {@link SubCrawler} Implementation working with VCard documents. * <p> * Known issues: * <ul> * <li>The preferred contact media aren't marked as such in the output, because the NCO doesn't cover this * <li>Theoretically the email addresses can have the TYPE=x400, this is not supported, all email addresses - * are treated as internet addresses. + * are treated as Internet addresses. * <li>The VCARD specification doesn't distinguish between private and business email addresses, so this * extractor doesn't do it either. * <li>The REV property defined in RFC 2426 sec. 3.6.4 doesn't have any direct equivalent in NCO, therefore @@ -85,16 +91,16 @@ * <li>NCO doesn't allow to preserve the order of the additional names, so this crawler discards that order. * Every additional name receives a separate nco:nameAdditional triple and the triples themselves are * unordered by definition.</li> - * <li>The above consideration also applies to nicknames. Nicknames can be ordered in the vcard but they are - * left unordered in the rdf data extracted from it.</li> - * <li>The ORG type in the vcard can specify an entity within an organization at an arbitrary level of nesting. + * <li>The above consideration also applies to nicknames. Nicknames can be ordered in the VCard but they are + * left unordered in the RDF data extracted from it.</li> + * <li>The ORG type in the VCard can specify an entity within an organization at an arbitrary level of nesting. * E.g a team within a project, within a department, within a division, within a company within a corporation. * NCO only allows for a single nco:department property of the affiliation, therefore supporting only a single * level of nesting. If more than one organizational unit is specified in the ORG element, the information about - * which unit is nested within which is lost, all units are recorded in the rdf at the same level with separate + * which unit is nested within which is lost, all units are recorded in the RDF at the same level with separate * nco:department triples attached to the affiliation resource. * </li> - * <li>Other elements of the vcard specification that aren't supported by NCO: (they are supported by JPIM) + * <li>Other elements of the VCard specification that aren't supported by NCO: (they are supported by JPIM) * <ul> * <li>ACCESS</li> * <li>CATEGORY</li> @@ -106,8 +112,8 @@ * <p> * <b>URIs for VCARDS</b><br/><br/> This crawler uses following conventions to generate URIS: * <ol> - * <li>If the UID parameter is present, it is concatenated to the stream id (preceeded by a hash)</li> - * <li>If it's not, then the contact is serialized to a string and a hash of that string is contactenated. to + * <li>If the UID parameter is present, it is concatenated to the stream id (preceded by a hash)</li> + * <li>If it's not, then the contact is serialized to a string and a hash of that string is concatenated. to * the stream id.</li> * </ol> * This guarantees that an unmodified contact will be detected and reported as unmodified. (Which is not the @@ -147,7 +153,7 @@ List<VCard> cards = builder.buildAll(); VCardOutputter outputter = new VCardOutputter(false); if (cards.size() == 1) { - processContact(cards.get(0), parentMetadata.getModel(), parentMetadata.getDescribedUri()); + processContact(cards.get(0), parentMetadata, parentMetadata.getDescribedUri(), handler, accessData, dataSource, outputter); } else { processAddressBook(cards, parentMetadata, handler, outputter, accessData, dataSource); @@ -196,7 +202,7 @@ URI contactUri = generateURIForContact(contact, parentMetadata, contactHash); RDFContainerFactory factory = handler.getRDFContainerFactory(contactUri.toString()); RDFContainer container = factory.getRDFContainer(contactUri); - processContact(contact, container.getModel(), contactUri); + processContact(contact, container, contactUri, handler, accessData, source, out); parentMetadata.add(NCO.containsContact, contactUri); container.add(RDF.type, NCO.ContactListDataObject); passMetadataToHandler(container, handler, contactHash, accessData, source); @@ -225,17 +231,50 @@ } } } + + private void passAttachmentToHandler(RDFContainer container, SubCrawlerHandler handler, + String attachmentHash, AccessData accessData, DataSource source, byte [] bytes) { + URI uri = container.getDescribedUri(); + DataObject object = new FileDataObjectBase(uri, source, container, new ByteArrayInputStream(bytes)); + if (accessData == null) { + handler.objectNew(object); + } else if (!accessData.isKnownId(uri.toString())) { + accessData.put(uri.toString(), OBJECT_HASH_KEY, attachmentHash); + handler.objectNew(object); + } else { + String oldHash = accessData.get(uri.toString(), OBJECT_HASH_KEY); + if (oldHash == null || !oldHash.equals(attachmentHash)) { + accessData.put(uri.toString(), OBJECT_HASH_KEY, attachmentHash); + handler.objectChanged(object); + } else { + handler.objectNotModified(uri.toString()); + } + } + } - private void processContact(VCard contact, Model model, Resource contactResource) { + private void processContact(VCard contact, RDFContainer container, URI contactResource, SubCrawlerHandler handler, + AccessData accessData, DataSource dataSource, VCardOutputter out) { + Model model = container.getModel(); model.addStatement(contactResource, RDF.type, NCO.Contact); - processPersonalIdentity(contact, model, contactResource); - Resource affiliationResource = processOrganizationIdentity(contact, model, contactResource); - processCommonProperties(contact, model, contactResource, affiliationResource); + processPersonalIdentity(contact, container, contactResource, handler, accessData, dataSource); + Resource affiliationResource = processOrganizationIdentity(contact, container, contactResource, handler, accessData, dataSource); + processCommonProperties(contact, container, contactResource, affiliationResource, handler, accessData, dataSource); + // add the fulltext + StringWriter sw = new StringWriter(); + try { + out.output(contact, sw); + container.add(NIE.plainTextContent, sw.toString()); + } catch (Exception e) { + logger.warn("Couldn't serialize the vcard",e); + } + } - private void processPersonalIdentity(VCard vc, Model model, - Resource contactResource) { + private void processPersonalIdentity(VCard vc, RDFContainer parentMetadata, + URI contactResource, SubCrawlerHandler handler, AccessData accessData, DataSource dataSource) { + + Model model = parentMetadata.getModel(); // this property is present in all contacts, regardless of whether they are PersonContacts // or OrganizationContacts, the presence of this property cannot tell us anything interesting @@ -274,8 +313,9 @@ } Photo photo = (Photo)vc.getProperty(Id.PHOTO); if (photo != null) { - processImage(model, contactResource, NCO.photo, - getParameterValue(photo, net.fortuna.ical4j.vcard.Parameter.Id.TYPE)); + processImage(photo.getBinary(), parentMetadata, contactResource, NCO.photo, + getParameterValue(photo, net.fortuna.ical4j.vcard.Parameter.Id.TYPE), handler, + accessData, dataSource); } for (int i = 0; i < length(name.getPrefixes()); i++) { String prefix = name.getPrefixes()[i]; @@ -288,9 +328,11 @@ } - private Resource processOrganizationIdentity(VCard organizationalIdentity, Model model, - Resource contactResource) { + private Resource processOrganizationIdentity(VCard organizationalIdentity, RDFContainer parentMetadata, + URI contactResource, SubCrawlerHandler handler, AccessData accessData, DataSource dataSource) { + Model model = parentMetadata.getModel(); + // first some sanity checking if (organizationalIdentity == null) { return null; @@ -323,15 +365,16 @@ } // now we know we have to create an organization resource - Resource organizationResource = ModelUtil.generateRandomResource(model); + URI organizationResource = ModelUtil.generateRandomURI(model); model.addStatement(organizationResource, RDF.type, NCO.OrganizationContact); model.addStatement(affiliationResource, NCO.org, organizationResource); Logo logo = (Logo)organizationalIdentity.getProperty(Id.LOGO); if (logo != null) { - processImage(model, organizationResource, NCO.logo, - getParameterValue(logo, net.fortuna.ical4j.vcard.Parameter.Id.TYPE)); + processImage(logo.getBinary(), parentMetadata, organizationResource, NCO.logo, + getParameterValue(logo, net.fortuna.ical4j.vcard.Parameter.Id.TYPE), handler, + accessData, dataSource); } Org org = (Org) organizationalIdentity.getProperty(Id.ORG); @@ -349,11 +392,12 @@ return affiliationResource; } - private void processCommonProperties(VCard contact, Model model, Resource contactResource, - Resource affiliationResource) { + private void processCommonProperties(VCard contact, RDFContainer parentMetadata, URI contactResource, + Resource affiliationResource, SubCrawlerHandler handler, AccessData accessData, + DataSource dataSource) { // so, first the addresses List<Property> adrs = contact.getProperties(Id.ADR); - + Model model = parentMetadata.getModel(); for (Property address : adrs) { // let's hope this simple comparison will work as desired... String type = getParameterValue(address, net.fortuna.ical4j.vcard.Parameter.Id.TYPE); @@ -370,13 +414,17 @@ processGeographicalInformation(model, contactResource, NCO.hasLocation, contact); Property key = contact.getProperty(Id.KEY); if (key != null) { - processPublicKey(model, contactResource, NCO.key, - getParameterValue(key, net.fortuna.ical4j.vcard.Parameter.Id.TYPE)); + Key keyProp = (Key)key; + processPublicKey(keyProp.getBinary(), parentMetadata, contactResource, NCO.key, + getParameterValue(key, net.fortuna.ical4j.vcard.Parameter.Id.TYPE), + handler, accessData, dataSource); } Property sound = contact.getProperty(Id.SOUND); if (sound != null) { - processSound(model, contactResource, NCO.sound, - getParameterValue(sound, net.fortuna.ical4j.vcard.Parameter.Id.TYPE)); + Sound soundProp = (Sound)sound; + processSound(soundProp.getBinary(), parentMetadata, contactResource, NCO.sound, + getParameterValue(key, net.fortuna.ical4j.vcard.Parameter.Id.TYPE), + handler, accessData, dataSource); } // and then the simple properties @@ -390,7 +438,7 @@ } } - private void processAddress(Model model, Address address, Resource contactResource, + private void processAddress(Model model, Address address, Resource contactResource, Resource affiliationResource, boolean preferred) { if (address != null) { Resource addressResource = ModelUtil.generateRandomResource(model); @@ -602,28 +650,57 @@ } } - private void processImage(Model model, Resource contactResource, URI property, String mimeType) { - Resource imageResource = ModelUtil.generateRandomResource(model); - model.addStatement(imageResource, RDF.type, NEXIF.Photo); - model.addStatement(imageResource, RDF.type, NFO.Attachment); - model.addStatement(contactResource, property, imageResource); - addStringProperty(model, imageResource, NIE.mimeType, mimeType); + private void processImage(byte [] bytes, RDFContainer parentMetadata, URI contactResource, + URI property, String mimeType, SubCrawlerHandler handler, AccessData accessData, + DataSource source) { + String attachmentHash = StringUtil.sha1Hash(bytes); + URI attachmentUri = generateURIForAttachment(contactResource, attachmentHash); + RDFContainerFactory factory = handler.getRDFContainerFactory(attachmentUri.toString()); + RDFContainer container = factory.getRDFContainer(attachmentUri); + parentMetadata.add(property, attachmentUri); + container.add(NIE.isPartOf, contactResource); + container.add(RDF.type, NCO.ContactListDataObject); + container.add(RDF.type, NEXIF.Photo); + container.add(RDF.type, NFO.Attachment); + addStringProperty(container.getModel(), container.getDescribedUri(), NIE.mimeType, mimeType); + passAttachmentToHandler(container, handler, attachmentHash, accessData, source, bytes); + } - private void processSound(Model model, Resource contactResource, URI property, String mimeType) { - Resource soundResource = ModelUtil.generateRandomResource(model); - model.addStatement(soundResource, RDF.type, NFO.Audio); - model.addStatement(soundResource, RDF.type, NFO.Attachment); - model.addStatement(contactResource, property, soundResource); - addStringProperty(model, soundResource, NIE.mimeType, mimeType); + private URI generateURIForAttachment(URI contactResource, String attachmentHash) { + if (!SubCrawlerUtil.isSubcrawledObjectUri(contactResource)) { + return createChildUri(contactResource, attachmentHash); + } else { + return new org.ontoware.rdf2go.model.node.impl.URIImpl(contactResource.toString() + "/" + attachmentHash); + } + } + + private void processSound(byte [] bytes, RDFContainer parentMetadata, URI contactResource, + URI property, String mimeType, SubCrawlerHandler handler, AccessData accessData, + DataSource source) { + String attachmentHash = StringUtil.sha1Hash(bytes); + URI attachmentUri = generateURIForAttachment(contactResource, attachmentHash); + RDFContainerFactory factory = handler.getRDFContainerFactory(attachmentUri.toString()); + RDFContainer container = factory.getRDFContainer(attachmentUri); + parentMetadata.add(property, attachmentUri); + container.add(RDF.type, NFO.Audio); + container.add(RDF.type, NFO.Attachment); + addStringProperty(container.getModel(), container.getDescribedUri(), NIE.mimeType, mimeType); + passAttachmentToHandler(container, handler, attachmentHash, accessData, source, bytes); } - private void processPublicKey(Model model, Resource contactResource, URI property, String mimeType) { - Resource keyResource = ModelUtil.generateRandomResource(model); - model.addStatement(keyResource, RDF.type, NIE.InformationElement); - model.addStatement(keyResource, RDF.type, NFO.Attachment); - model.addStatement(contactResource, property, keyResource); - addStringProperty(model, keyResource, NIE.mimeType, mimeType); + private void processPublicKey(byte [] bytes, RDFContainer parentMetadata, URI contactResource, + URI property, String mimeType, SubCrawlerHandler handler, AccessData accessData, + DataSource source) { + String attachmentHash = StringUtil.sha1Hash(bytes); + URI attachmentUri = generateURIForAttachment(contactResource, attachmentHash); + RDFContainerFactory factory = handler.getRDFContainerFactory(attachmentUri.toString()); + RDFContainer container = factory.getRDFContainer(attachmentUri); + parentMetadata.add(property, attachmentUri); + container.add(RDF.type, NIE.InformationElement); + container.add(RDF.type, NFO.Attachment); + addStringProperty(container.getModel(), container.getDescribedUri(), NIE.mimeType, mimeType); + passAttachmentToHandler(container, handler, attachmentHash, accessData, source, bytes); } private void addStringProperty(Model model, Resource resource, URI property, String value) { @@ -632,13 +709,6 @@ } } - private void addDateProperty(Model model, Resource resource, URI property, Date date) { - if (date != null) { - String dateString = DateUtil.date2String(date); - model.addStatement(resource, property, model.createDatatypeLiteral(dateString, XSD._date)); - } - } - private void addDateTimeProperty(Model model, Resource resource, URI property, Date date) { if (date != null) { String dateString = DateUtil.dateTime2String(date); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-03-25 09:12:14 UTC (rev 2299) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-03-26 15:49:26 UTC (rev 2300) @@ -53,6 +53,25 @@ } } + public void testVCardAttachment() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "vcard-antoni-kontact.vcf", + getClass()); + URI uri = new URIImpl( + "vcard:" + + "file:///C:/somefolder/somevcard.vcf" + + "!/d6bb8c38b78663b2aeef0b30538968660caf95c9"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, null, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof FileDataObject); + assertMimeType("image/jpeg", uri, ((FileDataObject)obj).getContent()); + obj.dispose(); + for (Map.Entry<String, RDFContainer> entry : fac.returnedContainers.entrySet()) { + assertFalse(entry.getValue().getModel().isOpen()); + } + } + /** * Tests if the method can extract a file whose name contains a space from inside a ZIP archive. * @throws Exception Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-03-25 09:12:14 UTC (rev 2299) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-03-26 15:49:26 UTC (rev 2300) @@ -26,6 +26,7 @@ import org.ontoware.rdf2go.vocabulary.XSD; import org.semanticdesktop.aperture.accessor.AccessData; import org.semanticdesktop.aperture.accessor.base.AccessDataImpl; +import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.test.subcrawler.SubCrawlerTestBase; @@ -88,8 +89,14 @@ VcardSubCrawler subCrawler = new VcardSubCrawler(); metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); // note that NO additional data objects have been reported, this - // file contains only one contact - assertNewModUnmod(handler, 0, 0, 0); + // file contains only one contact, but this contact has a photo, + // which is returned as a separate object + assertNewModUnmod(handler, 1, 0, 0); + + // we should get the fulltext too + String fullText = metadata.getString(NIE.plainTextContent); + assertTrue(fullText.contains("Antoni")); + validate(metadata); metadata.dispose(); metadata = null; @@ -420,7 +427,7 @@ private RDFContainer subCrawl(String string, VcardSubCrawler subCrawler) throws Exception { InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(string, this.getClass()); - handler = new TestBasicSubCrawlerHandler(); + handler = new TestBasicSubCrawlerHandler(new DefaultExtractorRegistry()); RDFContainer parentMetadata = new RDFContainerImpl(handler.getModel(),new URIImpl("uri:dummyuri")); subCrawler.subCrawl(null, stream, handler, null, null, null, null, parentMetadata); return parentMetadata; Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java 2010-03-25 09:12:14 UTC (rev 2299) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java 2010-03-26 15:49:26 UTC (rev 2300) @@ -612,7 +612,7 @@ stream.mark(minimumArrayLength + 10); // add some for safety byte[] bytes = IOUtil.readBytes(stream, minimumArrayLength); String mimeType = mimeTypeIdentifier.identify(bytes, null, uri); - assertEquals(mimeType, desiredMimeType); + assertEquals(desiredMimeType, mimeType); stream.reset(); } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/SubCrawlerTestBase.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/SubCrawlerTestBase.java 2010-03-25 09:12:14 UTC (rev 2299) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/subcrawler/SubCrawlerTestBase.java 2010-03-26 15:49:26 UTC (rev 2300) @@ -127,9 +127,9 @@ */ public void assertNewModUnmod(TestBasicSubCrawlerHandler handler, int newObjects, int changedObjects, int unchangedObjects) { - assertEquals(handler.getNewObjects().size(), newObjects); - assertEquals(handler.getChangedObjects().size(), changedObjects); - assertEquals(handler.getUnchangedObjects().size(), unchangedObjects); + assertEquals(newObjects, handler.getNewObjects().size()); + assertEquals(changedObjects, handler.getChangedObjects().size()); + assertEquals(unchangedObjects, handler.getUnchangedObjects().size()); } protected class CompressorSubCrawlerHandler extends TestBasicSubCrawlerHandler { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-03-31 01:19:39
|
Revision: 2303 http://aperture.svn.sourceforge.net/aperture/?rev=2303&view=rev Author: mylka Date: 2010-03-31 01:19:31 +0000 (Wed, 31 Mar 2010) Log Message: ----------- initial version of the IcalSubCrawler, with associated unit tests Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/base/CrawlerBase.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java Added Paths: ----------- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-somedocs.zip Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/base/CrawlerBase.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/base/CrawlerBase.java 2010-03-30 19:42:05 UTC (rev 2302) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/base/CrawlerBase.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -362,7 +362,9 @@ } protected void reportAccessingObject(String url) { - handler.accessingObject(this, url); + if (handler != null) { + handler.accessingObject(this, url); + } } protected void reportNewDataObject(DataObject object) { @@ -371,7 +373,7 @@ handler.objectNew(this, object); } - private void touchObject(String string) { + protected void touchObject(String string) { if (accessData != null) { accessData.touch(string); } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-03-30 19:42:05 UTC (rev 2302) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -7,11 +7,13 @@ package org.semanticdesktop.aperture.crawler.ical; import java.io.ByteArrayInputStream; +import java.io.Closeable; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; @@ -20,6 +22,7 @@ import java.util.regex.Pattern; import net.fortuna.ical4j.data.CalendarBuilder; +import net.fortuna.ical4j.data.CalendarOutputter; import net.fortuna.ical4j.data.ParserException; import net.fortuna.ical4j.model.Calendar; import net.fortuna.ical4j.model.Component; @@ -65,6 +68,7 @@ import org.ontoware.rdf2go.vocabulary.RDF; import org.ontoware.rdf2go.vocabulary.RDFS; import org.ontoware.rdf2go.vocabulary.XSD; +import org.semanticdesktop.aperture.accessor.AccessData; import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.FileDataObject; import org.semanticdesktop.aperture.accessor.RDFContainerFactory; @@ -72,9 +76,15 @@ import org.semanticdesktop.aperture.accessor.base.FileDataObjectBase; import org.semanticdesktop.aperture.crawler.ExitCode; import org.semanticdesktop.aperture.crawler.base.CrawlerBase; +import org.semanticdesktop.aperture.datasource.DataSource; import org.semanticdesktop.aperture.datasource.ical.IcalDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.util.ModelUtil; +import org.semanticdesktop.aperture.subcrawler.PathNotFoundException; +import org.semanticdesktop.aperture.subcrawler.SubCrawler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; import org.semanticdesktop.aperture.util.StringUtil; import org.semanticdesktop.aperture.util.UriUtil; import org.semanticdesktop.aperture.vocabulary.GEO; @@ -92,7 +102,7 @@ * {@link UriUtil#generateRandomResource(Model)} method, which may or may not be blank nodes. See the * documentation for {@link UriUtil#generateRandomResource(Model)} for more details. */ -public class IcalCrawler extends CrawlerBase { +public class IcalCrawler extends CrawlerBase implements SubCrawler { /** * URI of the xsd:yearMonthDuration datatype. This constant will be removed when an appropriate constant @@ -119,10 +129,24 @@ * @see #createBaseUri(File) */ private String baseuri; + + /** + * A little flag that is set to true if we're operating in the subcrawler mode. It enables some + * hacks that squeeae the IcalCrawler into the SubCrawler corset. + */ + private boolean subCrawlerMode; + private SubCrawlerHandler subCrawlerHandler; /** Default constructor. */ public IcalCrawler() { - // empty... for the time being. + // This is necessary to support files where content lines are split between multiple + // physical lines + System.setProperty("ical4j.unfolding.relaxed", "true"); + + // This is necessary to support files generated by Lotus Notes + // They contain uris with '<' and '>' that have to be removed + // before conversion to java.net.URI + CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_NOTES_COMPATIBILITY, true); } /** @@ -167,13 +191,60 @@ return crawlIcalFile(icalFile); } + public void subCrawl(URI id, InputStream stream, SubCrawlerHandler handler, + DataSource dataSource, AccessData accessData, Charset charset, + String mimeType, RDFContainer parentMetadata) + throws SubCrawlerException { + subCrawlerMode = true; + subCrawlerHandler = handler; + baseuri = parentMetadata.getDescribedUri().toString(); + setDataSource(dataSource); + setAccessData(accessData); + + + CalendarBuilder builder = null; + Calendar calendar = null; + try { + builder = new CalendarBuilder(); + calendar = builder.build(stream); + parentMetadata.add(RDF.type, NCAL.Calendar); + PropertyList propertyList = calendar.getProperties(); + crawlPropertyList(propertyList, parentMetadata.getDescribedUri(), parentMetadata, null); + ComponentList componentList = calendar.getComponents(); + crawlComponentList(componentList, parentMetadata); + } + catch (ParserException pe) { + throw new SubCrawlerException(pe); + } + catch (IOException ioe) { + throw new SubCrawlerException(ioe); + } + finally { + closeClosable(stream); + } + } + + public DataObject getDataObject(URI parentUri, String path, + InputStream stream, DataSource dataSource, Charset charset, + String mimeType, RDFContainerFactory factory) + throws SubCrawlerException, PathNotFoundException { + // TODO Auto-generated method stub + return null; + } + + public void stopSubCrawler() { + // not implemented yet + } + + + /** * Creates the base URI from the ical file. * @param icalFile the file with the ical information * @return the string with the base uri * @throws IOException if a canonical path cannot be generated for this file */ - private String createBaseUri(File icalFile) throws IOException + private String createBaseUri(File icalFile) throws IOException { //String result = "file:///"; //result += icalFile.getCanonicalPath(); @@ -201,15 +272,6 @@ CalendarBuilder builder = null; Calendar calendar = null; try { - // This is necessary to support files where content lines are split between multiple - // physical lines - System.setProperty("ical4j.unfolding.relaxed", "true"); - - // This is necessary to support files generated by Lotus Notes - // They contain uris with '<' and '>' that have to be removed - // before conversion to java.net.URI - CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_NOTES_COMPATIBILITY, true); - fin = new FileReader(icalFile); builder = new CalendarBuilder(); calendar = builder.build(fin); @@ -1152,22 +1214,24 @@ * @param rdfContainer the container to store the generated statements in * */ - public void crawlAttachProperty(Property property, Resource parentNode, RDFContainer rdfContainer) { - URI attachmentURI = generateAttachmentUri(rdfContainer.getDescribedUri()); + public void crawlAttachProperty(Property property, Resource parentNode, RDFContainer rdfContainer) { Parameter valueParameter = property.getParameter(Parameter.VALUE); String propertyValue = property.getValue(); - addStatement(rdfContainer, parentNode, NCAL.attach, attachmentURI); if (valueParameter == null || valueParameter.equals(Value.URI)) { URI uri = tryToCreateAnUri(rdfContainer,propertyValue); - addStatement(rdfContainer, attachmentURI, RDF.type, NCAL.Attachment); - addStatement(rdfContainer, attachmentURI, NCAL.attachmentUri, uri); + addStatement(rdfContainer, parentNode, NCAL.attach, uri); + addStatement(rdfContainer, uri, RDF.type, NCAL.Attachment); addStatement(rdfContainer, uri, RDF.type, RDFS.Resource); } else if (valueParameter.equals(Value.BINARY)) { - RDFContainer attachmentContainer = prepareDataObjectRDFContainer(attachmentURI); - addStatement(attachmentContainer, attachmentURI, RDF.type, NCAL.Attachment); + Attach attach = (Attach) property; + byte [] bytes = attach.getBinary(); + URI attachmentUri = new URIImpl(parentNode.toString() + "/attachment-" + StringUtil.sha1Hash(bytes)); + RDFContainer attachmentContainer = prepareDataObjectRDFContainer(attachmentUri); + addStatement(rdfContainer, parentNode, NCAL.attach, attachmentUri); + addStatement(attachmentContainer, attachmentUri, RDF.type, NCAL.Attachment); crawlParameterList(property, attachmentContainer); - Attach attach = (Attach) property; + passAttachmentToHandler(attachmentContainer, attach.getBinary()); } } @@ -3247,7 +3311,14 @@ // remove it from the deprecated URI's list, so it won't be reported as removed after crawling //deprecatedUrls.remove(uri.toString()); //RDFContainerFactory containerFactory = handler.getRDFContainerFactory(this, uri.toString()); - RDFContainerFactory containerFactory = getRDFContainerFactory(uri.toString()); + RDFContainerFactory containerFactory = null; + + if (subCrawlerMode) { + containerFactory = subCrawlerHandler.getRDFContainerFactory(uri.toString()); + } else { + containerFactory = getRDFContainerFactory(uri.toString()); + } + RDFContainer rdfContainer = containerFactory.getRDFContainer(uri); rdfContainer.add(RDF.type,NCAL.CalendarDataObject); return rdfContainer; @@ -3356,6 +3427,14 @@ */ private void passComponentToHandler(RDFContainer metadata, Component component) { DataObject dataObject = new DataObjectBase(metadata.getDescribedUri(), getDataSource(), metadata); + + /* + * add the fulltext + */ + if (component != null) { + metadata.add(NIE.plainTextContent,component.toString()); + } + String id = metadata.getDescribedUri().toString(); if (accessData == null) { //handler.objectNew(this, dataObject); @@ -3415,8 +3494,40 @@ dataObject.dispose(); } } + + - /** + @Override + protected void reportModifiedDataObject(DataObject object) { + if (subCrawlerMode) { + touchObject(object.getID().toString()); + subCrawlerHandler.objectChanged(object); + } else { + super.reportModifiedDataObject(object); + } + } + + @Override + protected void reportNewDataObject(DataObject object) { + if (subCrawlerMode) { + touchObject(object.getID().toString()); + subCrawlerHandler.objectNew(object); + } else { + super.reportNewDataObject(object); + } + } + + @Override + protected void reportUnmodifiedDataObject(String url) { + if (subCrawlerMode) { + accessData.touchRecursively(url); + subCrawlerHandler.objectNotModified(url); + } else { + super.reportUnmodifiedDataObject(url); + } + } + + /** * Updates the accessData with the current state of the given object. * * @param metadata The RDFContainer with metadata about the object to be updated. @@ -3537,7 +3648,11 @@ } Property uidProperty = component.getProperty(Property.UID); if (uidProperty != null) { - return new URIImpl(baseuri + uidProperty.getValue()); + if (subCrawlerMode) { + return SubCrawlerUtil.createChildUri(new URIImpl(baseuri), uidProperty.getValue(), "ical"); + } else { + return new URIImpl(baseuri + uidProperty.getValue()); + } } else { return generateSumOfAllPropertiesURI(component); @@ -3575,8 +3690,12 @@ Property property = (Property) it.next(); sumOfAllProperties.append(property.getValue()); } - String result = baseuri + StringUtil.sha1Hash(sumOfAllProperties.toString()); - return new URIImpl(result); + + if (subCrawlerMode) { + return SubCrawlerUtil.createChildUri(new URIImpl(baseuri), StringUtil.sha1Hash(sumOfAllProperties.toString()), "ical"); + } else { + return new URIImpl(baseuri + StringUtil.sha1Hash(sumOfAllProperties.toString())); + } } /** @@ -3586,21 +3705,24 @@ * @return a URI for an anonymous calendar componetn (Valarm or a timezone observance). */ private URI generateAnonymousComponentUri(Component component) { - String result = baseuri + component.getName() + "-" + java.util.UUID.randomUUID().toString(); - return new URIImpl(result); + if (subCrawlerMode) { + return SubCrawlerUtil.createChildUri(new URIImpl(baseuri), component.getName() + "-" + java.util.UUID.randomUUID().toString(), "ical"); + } else { + return new URIImpl(baseuri + component.getName() + "-" + java.util.UUID.randomUUID().toString()); + } } private URI createTimeZoneURI(String tzidParamValue) { - return new URIImpl(baseuri + tzidParamValue); + if (subCrawlerMode) { + return SubCrawlerUtil.createChildUri(new URIImpl(baseuri), tzidParamValue, "ical"); + } else { + return new URIImpl(baseuri + tzidParamValue); + } } private Resource generateAnonymousNode(RDFContainer rdfContainer) { return ModelUtil.generateRandomResource(rdfContainer.getModel()); } - - private URI generateAttachmentUri(URI describedUri) { - return new URIImpl(describedUri.toString() + "/attachment"); - } ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////// CONVERSION OF ICAL PROPERTY VALUES INTO RDF NODES ///////////////////////////// @@ -3942,5 +4064,13 @@ return datatypeURI; } - + private void closeClosable(Closeable stream) { + if (stream != null) { + try { + stream.close(); + } catch (Exception e) { + logger.warn("Couldn't close the stream"); + } + } + } } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-03-30 19:42:05 UTC (rev 2302) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -15,6 +15,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.ontoware.rdf2go.RDF2Go; +import org.ontoware.rdf2go.model.Model; import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.accessor.DataObject; @@ -22,7 +24,11 @@ import org.semanticdesktop.aperture.accessor.RDFContainerFactory; import org.semanticdesktop.aperture.accessor.base.DataObjectBase; import org.semanticdesktop.aperture.datasource.DataSource; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.util.HttpClientUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A utility class containing some methods useful when working with subcrawlers and subcrawled resources. @@ -31,6 +37,8 @@ private static final Pattern uriSchemePattern = Pattern.compile("\\w\\w+:"); + private static Logger logger = LoggerFactory.getLogger(SubCrawlerUtil.class); + /** * <p> * Tries to access a DataObject that is hidden in a stream. This method can get the desired object @@ -320,5 +328,78 @@ } else { return false; } - } + } + + + /** + * Creates a URI for a subcrawled entity. Uses a scheme invented within the apache commons VFS project. + * @param objectUri the uri of the parent data object + * @param childPath the path within the the child object + * @return a uri for a subcrawled entity. + * @see <a href="http://commons.apache.org/vfs/filesystems.html">VFS Filesystems Documentation</a> + */ + public static URI createChildUri(URI objectUri, String childPath, String prefix) { + return new URIImpl(prefix + ":" + + objectUri.toString() + "!/" + + HttpClientUtil.formUrlEncode(childPath,"/-_.")); + } + + public static DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, + String mimeType, RDFContainerFactory factory, String prefix, SubCrawler sc) throws SubCrawlerException, PathNotFoundException { + Model model = RDF2Go.getModelFactory().createModel(); + model.open(); + RDFContainer parentMetadata = new RDFContainerImpl(model,parentUri); + URI childUri = createChildUri(parentUri, path.startsWith("/") ? path.substring(1) : path, prefix); + GetDataObjectSubCrawlerHandler handler = new GetDataObjectSubCrawlerHandler(factory,childUri, sc); + sc.subCrawl(parentUri, stream, handler, dataSource, null, charset, mimeType, parentMetadata); + parentMetadata.dispose(); + DataObject result = handler.getObjectToReturn(); + if (result != null) { + return result; + } else { + throw new PathNotFoundException(sc.getClass().getName(), parentUri, path); + } + } + + private static class GetDataObjectSubCrawlerHandler implements SubCrawlerHandler { + + private RDFContainerFactory fac; + private URI requiredUri; + private SubCrawler sc; + private DataObject objectToReturn; + + public GetDataObjectSubCrawlerHandler(RDFContainerFactory fac, URI requiredUri, SubCrawler sc) { + this.fac = fac; + this.requiredUri = requiredUri; + this.sc = sc; + this.objectToReturn = null; + } + + public DataObject getObjectToReturn() { + return objectToReturn; + } + + public RDFContainerFactory getRDFContainerFactory(String url) { + return fac; + } + + public void objectNew(DataObject object) { + if (object.getID().equals(requiredUri) && objectToReturn == null) { + this.objectToReturn = object; + sc.stopSubCrawler(); + } else { + object.dispose(); + } + } + + public void objectChanged(DataObject object) { + logger.warn("Got an \"objectChanged\" call inside a getDataObject method, uri:" + object.getID()); + object.dispose(); + } + + public void objectNotModified(String url) { + logger.warn("Got an \"objectNotModified\" call inside a getDataObject method, uri:" + url); + } + } + } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java 2010-03-30 19:42:05 UTC (rev 2302) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -22,6 +22,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; import org.semanticdesktop.aperture.util.HttpClientUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,64 +50,11 @@ * @see <a href="http://commons.apache.org/vfs/filesystems.html">VFS Filesystems Documentation</a> */ protected URI createChildUri(URI objectUri, String childPath) { - return new URIImpl(getUriPrefix() + ":" + - objectUri.toString() + "!/" + - HttpClientUtil.formUrlEncode(childPath,"/-_.")); + return SubCrawlerUtil.createChildUri(objectUri, childPath, getUriPrefix()); } public DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, String mimeType, RDFContainerFactory factory) throws SubCrawlerException, PathNotFoundException { - Model model = RDF2Go.getModelFactory().createModel(); - model.open(); - RDFContainer parentMetadata = new RDFContainerImpl(model,parentUri); - URI childUri = createChildUri(parentUri, path.startsWith("/") ? path.substring(1) : path); - GetDataObjectSubCrawlerHandler handler = new GetDataObjectSubCrawlerHandler(factory,childUri); - subCrawl(parentUri, stream, handler, dataSource, null, charset, mimeType, parentMetadata); - parentMetadata.dispose(); - DataObject result = handler.getObjectToReturn(); - if (result != null) { - return result; - } else { - throw new PathNotFoundException(this.getClass().getName(), parentUri, path); - } + return SubCrawlerUtil.getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory, getUriPrefix(), this); } - - private class GetDataObjectSubCrawlerHandler implements SubCrawlerHandler { - - private RDFContainerFactory fac; - private URI requiredUri; - private DataObject objectToReturn; - - public GetDataObjectSubCrawlerHandler(RDFContainerFactory fac, URI requiredUri) { - this.fac = fac; - this.requiredUri = requiredUri; - this.objectToReturn = null; - } - - public DataObject getObjectToReturn() { - return objectToReturn; - } - - public RDFContainerFactory getRDFContainerFactory(String url) { - return fac; - } - - public void objectNew(DataObject object) { - if (object.getID().equals(requiredUri)) { - this.objectToReturn = object; - stopSubCrawler(); - } else { - object.dispose(); - } - } - - public void objectChanged(DataObject object) { - logger.warn("Got an \"objectChanged\" call inside a getDataObject method, uri:" + object.getID()); - object.dispose(); - } - - public void objectNotModified(String url) { - logger.warn("Got an \"objectNotModified\" call inside a getDataObject method, uri:" + url); - } - } } Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.crawler.ical; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import org.ontoware.aifbcommons.collection.ClosableIterator; +import org.ontoware.rdf2go.exception.ModelException; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.Statement; +import org.ontoware.rdf2go.model.Syntax; +import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.Variable; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.accessor.AccessData; +import org.semanticdesktop.aperture.accessor.base.AccessDataImpl; +import org.semanticdesktop.aperture.datasource.ical.IcalDataSource; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.subcrawler.SubCrawler; +import org.semanticdesktop.aperture.test.TestIncrementalCrawlerHandler; +import org.semanticdesktop.aperture.test.subcrawler.SubCrawlerTestBase; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NCAL; +import org.semanticdesktop.aperture.vocabulary.NIE; + +/** + * A set of tests of the {@link SubCrawler} functionality of the {@link IcalCrawler} class. + * @author Antoni + * + */ +public class IcalSubCrawlerTest extends SubCrawlerTestBase { + + + public static final String ICAL_TESTDATA_PATH = DOCS_PATH + "icaltestdata/"; + + private AccessData accessData; + + public void setUp() { + accessData = new AccessDataImpl(); + try { + accessData.initialize(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + + /** + * All events are returned as separate data objects. They have proper subcrawler uris. + * The fulltext of the .ics file should be attached to the parent {@link RDFContainer} + */ + public void testNormalSubcrawlCal01() throws Exception { + SubCrawler sc = new IcalCrawler(); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(); + InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "icaltestdata/cal01.ics", this.getClass()); + RDFContainer parentMetadata = new RDFContainerImpl(handler.getModel(),new URIImpl("uri:dummyuri/icaltestdata/cal01.ics")); + sc.subCrawl(null, stream, handler, null, null, null, null, parentMetadata); + + /* + * there is one VTIMEZONE and 3 VEVENTS + */ + assertNewModUnmod(handler, 4, 0, 0); + + Resource federalReserveMeeting = findComponentByUid(handler.getModel(), "20020630T230600Z-3895-69-1-16@jammer"); + + /** + * The fulltext of the federal reserve meeting should be there + */ + checkStatement(handler.getModel(), federalReserveMeeting, NIE.plainTextContent, + "ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;RSVP=TRUE;LANGUAGE=en:MAILTO:hoopy@frood.example"); + handler.close(); + } + + + + public void testIncrementalCrawlerHandler() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + assertNewModUnmod(handler, 4, 0, 0); + handler.close(); + } + + public void testOneChangedObject() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + assertNewModUnmod(handler, 4, 0, 0); + accessData.store(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = readIcalFile("cal01-1.ics", accessData); + // the event is reported as changed (new sequence number) + // all other three components are unchanged + assertNewModUnmod(handler2, 0, 1, 3); + handler.close(); + handler2.close(); + } + + public void testOneLetterChangedInTimezone() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + accessData.store(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = readIcalFile("cal01-2.ics", accessData); + + assertNewModUnmod(handler, 4, 0, 0); + assertNewModUnmod(handler2, 0, 1, 3); + handler.close(); + handler2.close(); + } + + public void testBymonthChangedInTimezone() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + assertNewModUnmod(handler, 4, 0, 0); + accessData.store(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = readIcalFile("cal01-5.ics", accessData); + assertNewModUnmod(handler2, 0, 1, 3); + handler.close(); + handler2.close(); + } + + public void testDtstartChangedInTimezone() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + assertNewModUnmod(handler, 4, 0, 0); + accessData.store(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = readIcalFile("cal01-6.ics", accessData); + assertNewModUnmod(handler2, 0, 1, 3); + handler.close(); + handler2.close(); + } + + public void testOneNewComponentAddition() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + assertNewModUnmod(handler, 4, 0, 0); + accessData.store(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = readIcalFile("cal01-3.ics", accessData); + // we added a new component, other should be unchanged + assertNewModUnmod(handler2, 1, 0, 4); + handler.close(); + handler2.close(); + } + + public void testOneComponentDeletion() throws Exception { + TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); + assertNewModUnmod(handler, 4, 0, 0); + accessData.store(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = readIcalFile("cal01-4.ics", accessData); + // we have removed a component, 3 other should be unchanged + // the subcrawler can't detect the object deletion + assertNewModUnmod(handler2, 0, 0, 3); + + // .. but we can count the untouched ids + int counter = 0; + ClosableIterator<String> iter = accessData.getUntouchedIDsIterator(); + while (iter.hasNext()) { + iter.next(); + counter++; + } + // there should be one untouched id + assertEquals(1,counter); + + handler.close(); + handler2.close(); + } + + + /** + * Crawls the ICAL file and returns the crawler handler. + */ + private TestBasicSubCrawlerHandler readIcalFile(String fileName, AccessData accessData) + throws Exception { + InputStream fileStream = ResourceUtil.getInputStream(ICAL_TESTDATA_PATH + fileName,this.getClass()); + assertNotNull(fileStream); + + TestBasicSubCrawlerHandler testCrawlerHandler = new TestBasicSubCrawlerHandler(); + + SubCrawler sc = new IcalCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(testCrawlerHandler.getModel(),new URIImpl("uri:dummyuri/icaltestdata/testfile.ics")); + sc.subCrawl(null, fileStream, testCrawlerHandler, null, accessData, null, null, parentMetadata); + + + + return testCrawlerHandler; + } + + + private Resource findComponentByUid(Model model, String uid) throws ModelException { + ClosableIterator<? extends Statement> iterator = null; + try { + iterator = model.findStatements(Variable.ANY, NCAL.uid, Variable.ANY); + boolean found = false; + Statement statement = null; + while (iterator.hasNext()) { + statement = iterator.next(); + if (statement.getObject().toString().equals(uid)) { + found = true; + break; + } + } + iterator.close(); + assertTrue(found); + return statement.getSubject(); + } finally { + closeIterator(iterator); + } + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java 2010-03-30 19:42:05 UTC (rev 2302) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -26,6 +26,8 @@ import org.ontoware.rdf2go.vocabulary.RDF; import org.ontoware.rdf2go.vocabulary.RDFS; import org.ontoware.rdf2go.vocabulary.XSD; +import org.semanticdesktop.aperture.crawler.CrawlReport; +import org.semanticdesktop.aperture.crawler.ExitCode; import org.semanticdesktop.aperture.datasource.ical.IcalDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.test.ApertureTestBase; @@ -123,7 +125,7 @@ Resource dtEndDatetimeNode = findSingleObjectResource(model, veventNode, NCAL.dtend); Resource recurBlankNode = findSingleObjectResource(model, veventNode, NCAL.rrule); assertEquals(countOutgoingTriples(model, veventNode, RDF.type), 3); - assertEquals(countOutgoingTriples(model, veventNode),12); + assertEquals(countOutgoingTriples(model, veventNode),13); assertSingleValueProperty(model, dtStartNcalDatetimeNode, NCAL.dateTime, "2002-06-30T09:00:00",XSD._dateTime); Resource timezoneNode = findSingleObjectResource(model, dtStartNcalDatetimeNode, NCAL.ncalTimezone); @@ -202,7 +204,7 @@ "http://host2.com/pub/busy/jpublic-01.ifb"); assertSingleValueProperty(model, vfreebusyNode, NCAL.comment, "This iCalendar file contains busy time information forthe next three months."); - assertEquals(countOutgoingTriples(model, vfreebusyNode),11); + assertEquals(countOutgoingTriples(model, vfreebusyNode),12); } private void assertMultiValueIntermediateNodeProperty(Model model3, Resource vfreebusyNode, URI firstProperty, @@ -236,7 +238,7 @@ findSingleObjectResource(model, vjournalNode, NCAL.organizer); assertSingleValueProperty(model, vjournalNode, NCAL.description, "journal\n"); assertSingleValueProperty(model, vjournalNode, NCAL.class_, NCAL.publicClassification); - assertEquals(countOutgoingTriples(model,vjournalNode),11); + assertEquals(12,countOutgoingTriples(model,vjournalNode)); } public void testVTimezoneComponent() throws Exception { @@ -255,7 +257,7 @@ "http://timezones.r.us.net/tz/US-California-Los_Angeles"); Resource standardObservanceNode = findSingleObjectResource(model, vtimezoneNode, NCAL.standard); Resource daylightObservanceNode = findSingleObjectResource(model, vtimezoneNode, NCAL.daylight); - assertEquals(countOutgoingTriples(model, vtimezoneNode),7); + assertEquals(countOutgoingTriples(model, vtimezoneNode),8); assertSingleValueProperty(model,standardObservanceNode,RDF.type,NCAL.TimezoneObservance); assertSingleValueProperty(model,standardObservanceNode,NCAL.tzoffsetfrom,"-0400"); @@ -305,9 +307,7 @@ Resource veventNode = findComponentByUid(model, "EB825E41-23CE-11D7-B93D-003065B0C95E"); Resource valarmNode = findSingleObjectResource(model, veventNode, NCAL.hasAlarm); Resource attachmentNode = findSingleObjectResource(model, valarmNode, NCAL.attach); - assertSingleValueURIProperty(model, attachmentNode, NCAL.attachmentUri, "http://www.w3.org/index.html"); - Resource pingUri = findSingleObjectResource(model,attachmentNode, NCAL.attachmentUri); - assertSingleValueURIProperty(model,pingUri, RDF.type, RDFS.Resource.toString()); + assertEquals("http://www.w3.org/index.html",attachmentNode.toString()); } public void testAttendeeProperty() throws Exception { @@ -916,7 +916,13 @@ icalCrawler.setCrawlerHandler(testCrawlerHandler); icalCrawler.crawl(); - + + CrawlReport report = icalCrawler.getCrawlReport(); + if (report.getExitCode() == ExitCode.FATAL_ERROR) { + report.getFatalErrorCause().printStackTrace(); + fail(); + } + assertTrue(file.delete()); model = testCrawlerHandler.getModel(); model2 = configurationContainer.getModel(); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java 2010-03-30 19:42:05 UTC (rev 2302) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/test/ApertureTestBase.java 2010-03-31 01:19:31 UTC (rev 2303) @@ -285,7 +285,7 @@ } /** - * Returns a list of subjects of all triples with the given subject and predicate + * Returns a list of subjects of all triples with the given object and predicate * @param model the model to check in * @param predicate the predicate of the triple to be found * @param object the object predicate @@ -319,13 +319,26 @@ */ public void checkStatement(URI property, String substring, RDFContainer container) throws ModelException { + checkStatement(container.getModel(), container.getDescribedUri(), property, substring); + } + + /** + * Asserts that the given container contains the given property, and that one of the values of that + * property is a literal, whose label contains the given substring. + * + * @param property the property to look for + * @param substring the substring to look for + * @param container the container to look in + * @throws ModelException if something goes wrong + */ + public void checkStatement(Model model, Resource subject, URI property, String substring) + throws ModelException { // setup some info - Model model = container.getModel(); boolean encounteredSubstring = false; // loop over all statements that have the specified property uri as predicate ClosableIterator<? extends Statement> statements = model.findStatements( - container.getDescribedUri(), property, Variable.ANY); + subject, property, Variable.ANY); try { while (statements.hasNext()) { // check the property type Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-somedocs.zip =================================================================== (Binary files differ) Property changes on: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-somedocs.zip ___________________________________________________________________ Added: svn:mime-type + application/octet-stream This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-03-31 02:01:13
|
Revision: 2304 http://aperture.svn.sourceforge.net/aperture/?rev=2304&view=rev Author: mylka Date: 2010-03-31 02:01:07 +0000 (Wed, 31 Mar 2010) Log Message: ----------- moved the fulltext of .ics files to the parent data object, not to the child objects with events Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-03-31 01:19:31 UTC (rev 2303) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-03-31 02:01:07 UTC (rev 2304) @@ -13,6 +13,7 @@ import java.io.FileReader; import java.io.IOException; import java.io.InputStream; +import java.io.StringWriter; import java.nio.charset.Charset; import java.util.Collection; import java.util.Iterator; @@ -32,6 +33,7 @@ import net.fortuna.ical4j.model.Property; import net.fortuna.ical4j.model.PropertyList; import net.fortuna.ical4j.model.Recur; +import net.fortuna.ical4j.model.ValidationException; import net.fortuna.ical4j.model.component.Observance; import net.fortuna.ical4j.model.component.VAlarm; import net.fortuna.ical4j.model.component.VEvent; @@ -85,6 +87,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; +import org.semanticdesktop.aperture.util.IOUtil; import org.semanticdesktop.aperture.util.StringUtil; import org.semanticdesktop.aperture.util.UriUtil; import org.semanticdesktop.aperture.vocabulary.GEO; @@ -201,13 +204,16 @@ setDataSource(dataSource); setAccessData(accessData); - CalendarBuilder builder = null; Calendar calendar = null; try { + byte [] bytes = IOUtil.readBytes(stream); + builder = new CalendarBuilder(); - calendar = builder.build(stream); + calendar = builder.build(new ByteArrayInputStream(bytes)); parentMetadata.add(RDF.type, NCAL.Calendar); + parentMetadata.add(NIE.plainTextContent, new String(bytes, Charset.forName("UTF-8"))); + PropertyList propertyList = calendar.getProperties(); crawlPropertyList(propertyList, parentMetadata.getDescribedUri(), parentMetadata, null); ComponentList componentList = calendar.getComponents(); @@ -320,12 +326,24 @@ rdfContainer.add(NIE.rootElementOf,getDataSource().getID()); + storeFulltext(calendar, rdfContainer); passComponentToHandler(rdfContainer, null); ComponentList componentList = calendar.getComponents(); crawlComponentList(componentList, rdfContainer); } + private void storeFulltext(Calendar calendar, RDFContainer rdfContainer) { + try { + CalendarOutputter out = new CalendarOutputter(false); + StringWriter sw = new StringWriter(); + out.output(calendar, sw); + rdfContainer.add(NIE.plainTextContent,sw.toString()); + } catch (Exception e) { + logger.warn("Couldn't store the ical fulltext",e); + } + } + /** * Crawls a single calendar component. Checks the name of the component and dispatches it to the proper * component-handling method. @@ -3428,13 +3446,6 @@ private void passComponentToHandler(RDFContainer metadata, Component component) { DataObject dataObject = new DataObjectBase(metadata.getDescribedUri(), getDataSource(), metadata); - /* - * add the fulltext - */ - if (component != null) { - metadata.add(NIE.plainTextContent,component.toString()); - } - String id = metadata.getDescribedUri().toString(); if (accessData == null) { //handler.objectNew(this, dataObject); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java 2010-03-31 01:19:31 UTC (rev 2303) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java 2010-03-31 02:01:07 UTC (rev 2304) @@ -75,12 +75,13 @@ /** * The fulltext of the federal reserve meeting should be there */ - checkStatement(handler.getModel(), federalReserveMeeting, NIE.plainTextContent, - "ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;RSVP=TRUE;LANGUAGE=en:MAILTO:hoopy@frood.example"); + checkStatement(parentMetadata.getModel(), parentMetadata.getDescribedUri(), NIE.plainTextContent, + "ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;"); + + + handler.close(); } - - public void testIncrementalCrawlerHandler() throws Exception { TestBasicSubCrawlerHandler handler = readIcalFile("cal01.ics", accessData); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java 2010-03-31 01:19:31 UTC (rev 2303) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/TestIcalCrawler.java 2010-03-31 02:01:07 UTC (rev 2304) @@ -87,7 +87,7 @@ assertMultiValueProperty(model, calendarNode, RDF.type, NCAL.Calendar); assertEquals(countOutgoingTriples(model, calendarNode, RDF.type), 3); - assertEquals(countStatements(model), 8); + assertEquals(countStatements(model), 9); } public void testAlarmComponent() throws Exception { @@ -125,7 +125,7 @@ Resource dtEndDatetimeNode = findSingleObjectResource(model, veventNode, NCAL.dtend); Resource recurBlankNode = findSingleObjectResource(model, veventNode, NCAL.rrule); assertEquals(countOutgoingTriples(model, veventNode, RDF.type), 3); - assertEquals(countOutgoingTriples(model, veventNode),13); + assertEquals(countOutgoingTriples(model, veventNode),12); assertSingleValueProperty(model, dtStartNcalDatetimeNode, NCAL.dateTime, "2002-06-30T09:00:00",XSD._dateTime); Resource timezoneNode = findSingleObjectResource(model, dtStartNcalDatetimeNode, NCAL.ncalTimezone); @@ -204,7 +204,7 @@ "http://host2.com/pub/busy/jpublic-01.ifb"); assertSingleValueProperty(model, vfreebusyNode, NCAL.comment, "This iCalendar file contains busy time information forthe next three months."); - assertEquals(countOutgoingTriples(model, vfreebusyNode),12); + assertEquals(countOutgoingTriples(model, vfreebusyNode),11); } private void assertMultiValueIntermediateNodeProperty(Model model3, Resource vfreebusyNode, URI firstProperty, @@ -238,7 +238,7 @@ findSingleObjectResource(model, vjournalNode, NCAL.organizer); assertSingleValueProperty(model, vjournalNode, NCAL.description, "journal\n"); assertSingleValueProperty(model, vjournalNode, NCAL.class_, NCAL.publicClassification); - assertEquals(12,countOutgoingTriples(model,vjournalNode)); + assertEquals(11,countOutgoingTriples(model,vjournalNode)); } public void testVTimezoneComponent() throws Exception { @@ -257,7 +257,7 @@ "http://timezones.r.us.net/tz/US-California-Los_Angeles"); Resource standardObservanceNode = findSingleObjectResource(model, vtimezoneNode, NCAL.standard); Resource daylightObservanceNode = findSingleObjectResource(model, vtimezoneNode, NCAL.daylight); - assertEquals(countOutgoingTriples(model, vtimezoneNode),8); + assertEquals(countOutgoingTriples(model, vtimezoneNode),7); assertSingleValueProperty(model,standardObservanceNode,RDF.type,NCAL.TimezoneObservance); assertSingleValueProperty(model,standardObservanceNode,NCAL.tzoffsetfrom,"-0400"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-03-31 11:13:34
|
Revision: 2306 http://aperture.svn.sourceforge.net/aperture/?rev=2306&view=rev Author: mylka Date: 2010-03-31 11:13:27 +0000 (Wed, 31 Mar 2010) Log Message: ----------- made the vcard and ical subcrawlers put in the fulltext regardless of parsing errors, Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java Added Paths: ----------- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/icaltestdata/cal01-corrupted.ics aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-dirk-corrupted.vcf Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-03-31 02:10:23 UTC (rev 2305) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-03-31 11:13:27 UTC (rev 2306) @@ -209,22 +209,24 @@ try { byte [] bytes = IOUtil.readBytes(stream); + /* + * fulltext before parsing + */ + parentMetadata.add(RDF.type, NCAL.Calendar); + parentMetadata.add(NIE.plainTextContent, new String(bytes, Charset.forName("UTF-8"))); + builder = new CalendarBuilder(); calendar = builder.build(new ByteArrayInputStream(bytes)); - parentMetadata.add(RDF.type, NCAL.Calendar); - parentMetadata.add(NIE.plainTextContent, new String(bytes, Charset.forName("UTF-8"))); + PropertyList propertyList = calendar.getProperties(); crawlPropertyList(propertyList, parentMetadata.getDescribedUri(), parentMetadata, null); ComponentList componentList = calendar.getComponents(); crawlComponentList(componentList, parentMetadata); } - catch (ParserException pe) { - throw new SubCrawlerException(pe); + catch (Exception e) { + logger.warn("error while parsing the ical file: " + parentMetadata.getDescribedUri(),e); } - catch (IOException ioe) { - throw new SubCrawlerException(ioe); - } finally { closeClosable(stream); } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-03-31 02:10:23 UTC (rev 2305) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-03-31 11:13:27 UTC (rev 2306) @@ -148,7 +148,9 @@ byte [] bytes = baos.toByteArray(); - // add the fulltext + // add the fulltext, before the parser is invoked, so that + // the fulltext is added regardless of the whether the file + // is parseable or not try { String fulltext = new String(bytes, Charset.forName("UTF-8")); parentMetadata.add(NIE.plainTextContent,fulltext); @@ -179,7 +181,7 @@ } } catch (Exception e) { - throw new SubCrawlerException(e); + logger.warn("Error while parsing vcard: " + parentMetadata.getDescribedUri(),e); } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java 2010-03-31 02:10:23 UTC (rev 2305) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/ical/IcalSubCrawlerTest.java 2010-03-31 11:13:27 UTC (rev 2306) @@ -70,15 +70,39 @@ */ assertNewModUnmod(handler, 4, 0, 0); - Resource federalReserveMeeting = findComponentByUid(handler.getModel(), "20020630T230600Z-3895-69-1-16@jammer"); - /** - * The fulltext of the federal reserve meeting should be there + * The fulltext should be there */ checkStatement(parentMetadata.getModel(), parentMetadata.getDescribedUri(), NIE.plainTextContent, "ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;"); + handler.close(); + } + + /** + * A corrupted file should still yield the fulltext, just like a text file. + */ + public void testCorruptedSubcrawlCal01() throws Exception { + SubCrawler sc = new IcalCrawler(); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(); + InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "icaltestdata/cal01-corrupted.ics", this.getClass()); + RDFContainer parentMetadata = new RDFContainerImpl(handler.getModel(),new URIImpl("uri:dummyuri/icaltestdata/cal01-corrupted.ics")); + sc.subCrawl(null, stream, handler, null, null, null, null, parentMetadata); + + /* + * there are no objects, the file is corrupted + */ + assertNewModUnmod(handler, 0, 0, 0); + /** + * The fulltext should be there + */ + checkStatement(parentMetadata.getModel(), parentMetadata.getDescribedUri(), NIE.plainTextContent, + "ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION;"); + checkStatement(parentMetadata.getModel(), parentMetadata.getDescribedUri(), NIE.plainTextContent, + "THIS IS A COMPLETELY CORRUPTED STRING IN THE MIDDLE OF A ICAL"); + checkStatement(parentMetadata.getModel(), parentMetadata.getDescribedUri(), NIE.plainTextContent, + "THIS SHOULD BREAK THE PARSER BUT THE FULLTEXT SHOULD STILL BE PRESERVED"); handler.close(); } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-03-31 02:10:23 UTC (rev 2305) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-03-31 11:13:27 UTC (rev 2306) @@ -42,37 +42,37 @@ private RDFContainer metadata; private TestBasicSubCrawlerHandler handler; -// public void testRfc2426ExampleExtraction() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Model model = metadata.getModel(); -// assertStatementCount(2, model, Variable.ANY, RDF.type, NCO.PersonContact); -// assertNewModUnmod(handler, 2, 0, 0); -// -// validate(metadata); -// metadata.dispose(); -// metadata = null; -// } -// -// /** -// * The vcard-rfc2426.vcf contains more than one vcard, therefore the vcards inside will get a proper -// * vcard: uri. This test checks this. It uses an iterator because at the time of writing the jpim library -// * generated its own uids in a really crappy way that changed with each crawl. -// * -// * @throws Exception -// */ -// public void testRfc2426VcardUris() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Iterator<String> id = handler.getNewObjects().iterator(); -// String st = id.next(); -// assertTrue(st.startsWith("vcard:uri:dummyuri!/")); -// st = id.next(); -// assertTrue(st.startsWith("vcard:uri:dummyuri!/")); -// metadata.dispose(); -// metadata = null; -// } + public void testRfc2426ExampleExtraction() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Model model = metadata.getModel(); + assertStatementCount(2, model, Variable.ANY, RDF.type, NCO.PersonContact); + assertNewModUnmod(handler, 2, 0, 0); + + validate(metadata); + metadata.dispose(); + metadata = null; + } + /** + * The vcard-rfc2426.vcf contains more than one vcard, therefore the vcards inside will get a proper + * vcard: uri. This test checks this. It uses an iterator because at the time of writing the jpim library + * generated its own uids in a really crappy way that changed with each crawl. + * + * @throws Exception + */ + public void testRfc2426VcardUris() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Iterator<String> id = handler.getNewObjects().iterator(); + String st = id.next(); + assertTrue(st.startsWith("vcard:uri:dummyuri!/")); + st = id.next(); + assertTrue(st.startsWith("vcard:uri:dummyuri!/")); + metadata.dispose(); + metadata = null; + } + public void testOutlookExampleExtraction() throws Exception { VcardSubCrawler subCrawler = new VcardSubCrawler(); metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); @@ -91,373 +91,387 @@ metadata = null; } -// /** -// * The trick is that a line like this: -// * -// * <pre> -// * PHOTO;VALUE=URL:https://sourceforge.net/apps/trac/aperture/raw-attachment/wiki/MiscWikiFiles/gunnar.jpg -// * </pre> -// * -// * Should not yield a separate data object -// * -// * @throws Exception -// */ -// public void testOutlookExampleWithUrlPhoto() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003-urlphoto.vcf", subCrawler); -// // note that NO additional data objects have been reported, this -// // file contains only one contact, the photo has VALUE=URL, therefore -// // it should NOT be reported as a separate data object -// assertNewModUnmod(handler, 0, 0, 0); -// assertTrue(metadata.getString(NIE.plainTextContent).contains("Trippstadter Str.")); -// -// // moreover we need to make sure that the url of the photo is there -// checkStatement(NCO.photo, new URIImpl("https://sourceforge.net/apps/trac/aperture/raw-attachment/wiki/MiscWikiFiles/gunnar.jpg"), metadata); -// -// validate(metadata); -// metadata.dispose(); -// metadata = null; -// } -// -// public void testKontactExampleExtraction() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); -// // note that NO additional data objects have been reported, this -// // file contains only one contact, but this contact has a photo, -// // which is returned as a separate object -// assertNewModUnmod(handler, 1, 0, 0); -// -// // we should get the fulltext too -// String fullText = metadata.getString(NIE.plainTextContent); -// assertTrue(fullText.contains("Antoni")); -// -// validate(metadata); -// metadata.dispose(); -// metadata = null; -// } -// -// public void testDirkExtraction() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-dirk.vcf", subCrawler); -// // note that NO additional data objects have been reported, this -// // file contains only one contact -// assertNewModUnmod(handler, 0, 0, 0); -// validate(metadata); -// metadata.dispose(); -// metadata = null; -// } -// -// public void testSapVcardsExtraction() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-vCards-SAP.vcf", subCrawler); -// assertNewModUnmod(handler, 30, 0, 0); -// validate(metadata); -// metadata.dispose(); -// metadata = null; -// } -// -// /** -// * The vcard-vCards-SAP.vcf contains more than one vcard, therefore the vcards inside will get a proper -// * vcard: uri. This test checks this. It uses an iterator because at the time of writing the jpim library -// * generated its own uids in a really crappy way that changed with each crawl. -// * -// * @throws Exception -// */ -// public void testSapVcardsUris() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-vCards-SAP.vcf", subCrawler); -// Iterator<String> id = handler.getNewObjects().iterator(); -// for (int i = 0; i < 30; i++) { -// String st = id.next(); -// assertTrue(st.startsWith("vcard:uri:dummyuri!/")); -// } -// metadata.dispose(); -// metadata = null; -// } -// -// /** -// * This case tests if the issue 2475980 is solved. Originally the vcard crawler used a library -// * called jpim which was crappy. Then we switched to ical4j-vcard which seems to be better. -// * -// * The vCards-SAP file contains many vcards, if the file doesn't change, the crawler should -// * report that all encountered objects are unchanged. -// * -// * @throws Exception -// */ -// public void testSapVcardsUrisDontChange() throws Exception { -// AccessData ad = new AccessDataImpl(); -// ad.initialize(); -// -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP.vcf", this.getClass()); -// TestBasicSubCrawlerHandler chandler = new TestBasicSubCrawlerHandler(); -// RDFContainer parentMetadata = new RDFContainerImpl(chandler.getModel(),new URIImpl("uri:dummyuri")); -// subCrawler.subCrawl(null, stream, chandler, null, ad, null, null, parentMetadata); -// assertNewModUnmod(chandler, 30, 0, 0); -// chandler.close(); -// -// subCrawler = new VcardSubCrawler(); -// stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP.vcf", this.getClass()); -// chandler = new TestBasicSubCrawlerHandler(); -// parentMetadata = new RDFContainerImpl(chandler.getModel(),new URIImpl("uri:dummyuri")); -// subCrawler.subCrawl(null, stream, chandler, null, ad, null, null, parentMetadata); -// assertNewModUnmod(chandler, 0, 0, 30); -// chandler.close(); -// -// subCrawler = new VcardSubCrawler(); -// stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP-onemodified.vcf", this.getClass()); -// chandler = new TestBasicSubCrawlerHandler(); -// parentMetadata = new RDFContainerImpl(chandler.getModel(),new URIImpl("uri:dummyuri")); -// subCrawler.subCrawl(null, stream, chandler, null, ad, null, null, parentMetadata); -// assertNewModUnmod(chandler, 1, 0, 29); -// chandler.close(); -// } -// -// public void testFrankDawsonNames() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Model model = metadata.getModel(); -// assertStatementCount(2, model, Variable.ANY, RDF.type, NCO.PersonContact); -// Resource frankDawsonContact = findContact(model, "Frank Dawson"); -// assertSingleValueProperty(model, frankDawsonContact, NCO.nameFamily, "Dawson"); -// assertSingleValueProperty(model, frankDawsonContact, NCO.nameGiven, "Frank"); -// } -// -// public void testAntoniNames() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// assertSingleValueProperty(model, antoniContact, NCO.nameFamily, "Mylka"); -// assertSingleValueProperty(model, antoniContact, NCO.nameGiven, "Antoni"); -// assertSingleValueProperty(model, antoniContact, NCO.nameAdditional, "Jozef"); -// assertSingleValueProperty(model, antoniContact, NCO.nameHonorificPrefix, "Herr"); -// assertSingleValueProperty(model, antoniContact, NCO.nameHonorificSuffix, "jun."); -// } -// -// public void testUrl() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource frankDawsonContact = findContact(model, "Frank Dawson"); -// Resource url = findSingleObjectResource(model, frankDawsonContact, NCO.url); -// assertTrue(url.toString().equals("http://home.earthlink.net/~fdawson")); -// } -// -// public void testTelephoneNumbers() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource frankDawsonContact = findContact(model, "Frank Dawson"); -// Resource affiliation = findSingleObjectResource(model, frankDawsonContact, NCO.hasAffiliation); -// assertSingleValueProperty(model, affiliation, RDF.type, NCO.Affiliation); -// -// -// Set<Resource> telephoneNumbers = findObjectResourceSet(model, affiliation, NCO.hasPhoneNumber); -// assertEquals(2, telephoneNumbers.size()); -// assertSparqlQuery(model, -// "PREFIX nco: <" + NCO.NS_NCO + "> " + -// "SELECT ?number " + -// "WHERE" + -// " { " + affiliation.toSPARQL() + " nco:hasPhoneNumber ?phoneNumber . " + -// " ?phoneNumber a nco:PhoneNumber ." + -// " ?phoneNumber nco:phoneNumber ?number ." + -// " FILTER (regex(?number,\"\\\\+1-919-676-9515\"))" + // weird, four slashes are necessary... -// " }"); -// assertSparqlQuery(model, -// "PREFIX nco: <" + NCO.NS_NCO + "> " + -// "SELECT ?number " + -// "WHERE" + -// " { " + affiliation.toSPARQL() + " nco:hasPhoneNumber ?phoneNumber . " + -// " ?phoneNumber a nco:PhoneNumber ." + -// " ?phoneNumber nco:phoneNumber ?number ." + -// " FILTER (regex(?number,\"\\\\+1-919-676-9564\"))" + -// " }"); -// } -// -// public void testEmailAddresses() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource frankDawsonContact = findContact(model, "Frank Dawson"); -// Set<Resource> emails = findObjectResourceSet(model,frankDawsonContact, NCO.hasEmailAddress); -// assertEquals(2, emails.size()); -// assertSparqlQuery(model, -// "PREFIX nco: <" + NCO.NS_NCO + "> " + -// "SELECT ?email " + -// "WHERE" + -// " { " + frankDawsonContact.toSPARQL() + " nco:hasEmailAddress ?email . " + -// " ?email a nco:EmailAddress ." + -// " ?email nco:emailAddress ?address ." + -// " FILTER (regex(?address,\"Frank_Dawson@Lotus.com\"))" + -// " }"); -// assertSparqlQuery(model, -// "PREFIX nco: <" + NCO.NS_NCO + "> " + -// "SELECT ?email " + -// "WHERE" + -// " { " + frankDawsonContact.toSPARQL() + " nco:hasEmailAddress ?email . " + -// " ?email a nco:EmailAddress ." + -// " ?email nco:emailAddress ?address ." + -// " FILTER (regex(?address,\"fd...@ea...\"))" + -// " }"); -// } -// -// public void testWorkPostalAddress() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource frankDawsonContact = findContact(model, "Frank Dawson"); -// assertTrue(frankDawsonContact.toString().startsWith("vcard:")); -// Resource affiliation = findSingleObjectResource(model, frankDawsonContact, NCO.hasAffiliation); -// assertSingleValueProperty(model, affiliation, RDF.type, NCO.Affiliation); -// Resource address = findSingleObjectResource(model, affiliation, NCO.hasPostalAddress); -// assertMultiValueProperty(model, address, RDF.type, NCO.PostalAddress); -// assertMultiValueProperty(model, address, RDF.type, NCO.ParcelDeliveryAddress); -// assertSingleValueProperty(model, address, NCO.streetAddress, "6544 Battleford Drive"); -// assertSingleValueProperty(model, address, NCO.locality, "Raleigh"); -// assertSingleValueProperty(model, address, NCO.region, "NC"); -// } -// -// public void testHomePostalAddress() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// Resource address = findSingleObjectResource(model, antoniContact, NCO.hasPostalAddress); -// assertMultiValueProperty(model, address, RDF.type, NCO.PostalAddress); -// -// assertSingleValueProperty(model, address, NCO.streetAddress, "Budryka 2/1110"); -// assertSingleValueProperty(model, address, NCO.locality, "Krakow"); -// assertSingleValueProperty(model, address, NCO.region, "malopolskie"); -// assertSingleValueProperty(model, address, NCO.postalcode, "30-072"); -// assertSingleValueProperty(model, address, NCO.country, "Polen"); -// } -// -// public void testRole() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// Resource affiliation = findSingleObjectResource(model, antoniContact, NCO.hasAffiliation); -// assertSingleValueProperty(model, affiliation, NCO.role, "Software-Developer"); -// } -// -// public void testTitle() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// Resource affiliation = findSingleObjectResource(model, antoniContact, NCO.hasAffiliation); -// assertSingleValueProperty(model, affiliation, NCO.title, "Intern"); -// } -// -// public void testNickname() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// assertSingleValueProperty(model, antoniContact, NCO.nickname, "Ant"); -// } -// -// public void testBday() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// String dateString = findSingleObjectNode(model, antoniContact, NCO.birthDate).asLiteral().getValue(); -// assertTrue(DateUtil.dateTimeStringEqualToUTCString(dateString, "1980-01-18T00:00:00Z")); -// } -// -// public void testOrganization() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); -// Resource affiliation = findSingleObjectResource(model, antoniContact, NCO.hasAffiliation); -// assertSingleValueProperty(model, affiliation, RDF.type, NCO.Affiliation); -// assertSingleValueProperty(model, affiliation, NCO.department, "Knowledge-Management"); -// Resource organization = findSingleObjectResource(model, affiliation, NCO.org); -// assertSingleValueProperty(model, organization, RDF.type, NCO.OrganizationContact); -// assertSingleValueProperty(model, organization, NCO.fullname, "DFKI"); -// } -// -// public void testNote() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-dirk.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource dirkContact = findContact(model, "Dirk"); -// assertEquals("uri:dummyuri",dirkContact.toString()); -// assertSingleValueProperty(model, dirkContact, NCO.note, "The canonical Dirk\r\n"); -// metadata.dispose(); -// metadata = null; -// } -// -// public void testRev() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-dirk.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource dirkContact = findContact(model, "Dirk"); -// // this property is not testable, since the value is timezone-dependent -// //assertSingleValueProperty(model, dirkContact, NIE.contentLastModified, "2007-11-09T10:46:02Z", XSD._dateTime); -// String dateString = findSingleObjectNode(model, dirkContact, NIE.contentLastModified).asLiteral().getValue(); -// assertTrue(dateString.startsWith("2007-11")); -// metadata.dispose(); -// metadata = null; -// } -// -// public void testRev2() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource dirkContact = findContact(model, "Antoni Mylka"); -// // this property is not testable, since the value is timezone-dependent -// //assertSingleValueProperty(model, dirkContact, NIE.contentLastModified, "2008-01-28T15:50:16Z", XSD._dateTime); -// String dateString = findSingleObjectNode(model, dirkContact, NIE.contentLastModified).asLiteral().getValue(); -// assertTrue(dateString.startsWith("2008-01")); -// metadata.dispose(); -// metadata = null; -// } -// -// /** -// * This is supposed to test a solution to the issue number 2475957. -// * -// * RFC 2426 Sec. 3.6.4 http://tools.ietf.org/html/rfc2426#section-3.6.4 -// * -// * State that the value of the REV property can be of date-time type, which in turn is defined -// * in RFC 2425 sec. 5.8.4 -// * -// * http://tools.ietf.org/html/rfc2425#section-5.8.4 -// * -// * That definition allows for a timezone component to appear. At one point the -// * vcard crawler ignored that timezone definition, took the date-time string without -// * the timezone and treated it as one from the default timezone for the current locale -// * which was obviously wrong. -// * -// * The vcard-antoni-kontact.vcf has a REV property that denotes a timestamp in the UTC timezone. -// * This test checks if the date is recorded correctly. -// * -// * @throws Exception -// */ -// public void testRev3() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource antoniContact = findContact(model, "Antoni Mylka"); -// String dateString = findSingleObjectNode(model, antoniContact, NIE.contentLastModified).asLiteral().getValue(); -// DateUtil.dateTimeStringEqualToUTCString(dateString, "2008-01-28T15:50:16Z"); -// metadata.dispose(); -// metadata = null; -// } -// -// public void testUid() throws Exception { -// VcardSubCrawler subCrawler = new VcardSubCrawler(); -// metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); -// Model model = metadata.getModel(); -// Resource dirkContact = findContact(model, "Antoni Mylka"); -// assertSingleValueProperty(model, dirkContact, NCO.contactUID, "BHTRsCvcmd"); -// metadata.dispose(); -// metadata = null; -// } + /** + * The trick is that a line like this: + * + * <pre> + * PHOTO;VALUE=URL:https://sourceforge.net/apps/trac/aperture/raw-attachment/wiki/MiscWikiFiles/gunnar.jpg + * </pre> + * + * Should not yield a separate data object + * + * @throws Exception + */ + public void testOutlookExampleWithUrlPhoto() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003-urlphoto.vcf", subCrawler); + // note that NO additional data objects have been reported, this + // file contains only one contact, the photo has VALUE=URL, therefore + // it should NOT be reported as a separate data object + assertNewModUnmod(handler, 0, 0, 0); + assertTrue(metadata.getString(NIE.plainTextContent).contains("Trippstadter Str.")); + + // moreover we need to make sure that the url of the photo is there + checkStatement(NCO.photo, new URIImpl("https://sourceforge.net/apps/trac/aperture/raw-attachment/wiki/MiscWikiFiles/gunnar.jpg"), metadata); + + validate(metadata); + metadata.dispose(); + metadata = null; + } + + public void testKontactExampleExtraction() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); + // note that NO additional data objects have been reported, this + // file contains only one contact, but this contact has a photo, + // which is returned as a separate object + assertNewModUnmod(handler, 1, 0, 0); + + // we should get the fulltext too + String fullText = metadata.getString(NIE.plainTextContent); + assertTrue(fullText.contains("Antoni")); + + validate(metadata); + metadata.dispose(); + metadata = null; + } + + public void testDirkExtraction() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-dirk.vcf", subCrawler); + // note that NO additional data objects have been reported, this + // file contains only one contact + assertNewModUnmod(handler, 0, 0, 0); + validate(metadata); + metadata.dispose(); + metadata = null; + } + + public void testCorruptedDirkExtraction() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-dirk-corrupted.vcf", subCrawler); + assertNewModUnmod(handler, 0, 0, 0); + // the fulltext should be there, even though the file is corrupted + assertTrue(metadata.getString(NIE.plainTextContent).contains( + "THIS IS A COMPLETELY CORRUPTED STRING IN THE MIDDLE OF A VCARD")); + assertTrue(metadata.getString(NIE.plainTextContent).contains( + "THIS SHOULD BREAK THE PARSER BUT THE FULLTEXT SHOULD STILL BE PRESERVED")); + validate(metadata); + metadata.dispose(); + metadata = null; + } + + public void testSapVcardsExtraction() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-vCards-SAP.vcf", subCrawler); + assertNewModUnmod(handler, 30, 0, 0); + validate(metadata); + metadata.dispose(); + metadata = null; + } + + /** + * The vcard-vCards-SAP.vcf contains more than one vcard, therefore the vcards inside will get a proper + * vcard: uri. This test checks this. It uses an iterator because at the time of writing the jpim library + * generated its own uids in a really crappy way that changed with each crawl. + * + * @throws Exception + */ + public void testSapVcardsUris() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-vCards-SAP.vcf", subCrawler); + Iterator<String> id = handler.getNewObjects().iterator(); + for (int i = 0; i < 30; i++) { + String st = id.next(); + assertTrue(st.startsWith("vcard:uri:dummyuri!/")); + } + metadata.dispose(); + metadata = null; + } + + /** + * This case tests if the issue 2475980 is solved. Originally the vcard crawler used a library + * called jpim which was crappy. Then we switched to ical4j-vcard which seems to be better. + * + * The vCards-SAP file contains many vcards, if the file doesn't change, the crawler should + * report that all encountered objects are unchanged. + * + * @throws Exception + */ + public void testSapVcardsUrisDontChange() throws Exception { + AccessData ad = new AccessDataImpl(); + ad.initialize(); + + VcardSubCrawler subCrawler = new VcardSubCrawler(); + InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP.vcf", this.getClass()); + TestBasicSubCrawlerHandler chandler = new TestBasicSubCrawlerHandler(); + RDFContainer parentMetadata = new RDFContainerImpl(chandler.getModel(),new URIImpl("uri:dummyuri")); + subCrawler.subCrawl(null, stream, chandler, null, ad, null, null, parentMetadata); + assertNewModUnmod(chandler, 30, 0, 0); + chandler.close(); + + subCrawler = new VcardSubCrawler(); + stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP.vcf", this.getClass()); + chandler = new TestBasicSubCrawlerHandler(); + parentMetadata = new RDFContainerImpl(chandler.getModel(),new URIImpl("uri:dummyuri")); + subCrawler.subCrawl(null, stream, chandler, null, ad, null, null, parentMetadata); + assertNewModUnmod(chandler, 0, 0, 30); + chandler.close(); + + subCrawler = new VcardSubCrawler(); + stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP-onemodified.vcf", this.getClass()); + chandler = new TestBasicSubCrawlerHandler(); + parentMetadata = new RDFContainerImpl(chandler.getModel(),new URIImpl("uri:dummyuri")); + subCrawler.subCrawl(null, stream, chandler, null, ad, null, null, parentMetadata); + assertNewModUnmod(chandler, 1, 0, 29); + chandler.close(); + } + + public void testFrankDawsonNames() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Model model = metadata.getModel(); + assertStatementCount(2, model, Variable.ANY, RDF.type, NCO.PersonContact); + Resource frankDawsonContact = findContact(model, "Frank Dawson"); + assertSingleValueProperty(model, frankDawsonContact, NCO.nameFamily, "Dawson"); + assertSingleValueProperty(model, frankDawsonContact, NCO.nameGiven, "Frank"); + } + + public void testAntoniNames() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + assertSingleValueProperty(model, antoniContact, NCO.nameFamily, "Mylka"); + assertSingleValueProperty(model, antoniContact, NCO.nameGiven, "Antoni"); + assertSingleValueProperty(model, antoniContact, NCO.nameAdditional, "Jozef"); + assertSingleValueProperty(model, antoniContact, NCO.nameHonorificPrefix, "Herr"); + assertSingleValueProperty(model, antoniContact, NCO.nameHonorificSuffix, "jun."); + } + + public void testUrl() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Model model = metadata.getModel(); + Resource frankDawsonContact = findContact(model, "Frank Dawson"); + Resource url = findSingleObjectResource(model, frankDawsonContact, NCO.url); + assertTrue(url.toString().equals("http://home.earthlink.net/~fdawson")); + } + + public void testTelephoneNumbers() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Model model = metadata.getModel(); + Resource frankDawsonContact = findContact(model, "Frank Dawson"); + Resource affiliation = findSingleObjectResource(model, frankDawsonContact, NCO.hasAffiliation); + assertSingleValueProperty(model, affiliation, RDF.type, NCO.Affiliation); + + + Set<Resource> telephoneNumbers = findObjectResourceSet(model, affiliation, NCO.hasPhoneNumber); + assertEquals(2, telephoneNumbers.size()); + assertSparqlQuery(model, + "PREFIX nco: <" + NCO.NS_NCO + "> " + + "SELECT ?number " + + "WHERE" + + " { " + affiliation.toSPARQL() + " nco:hasPhoneNumber ?phoneNumber . " + + " ?phoneNumber a nco:PhoneNumber ." + + " ?phoneNumber nco:phoneNumber ?number ." + + " FILTER (regex(?number,\"\\\\+1-919-676-9515\"))" + // weird, four slashes are necessary... + " }"); + assertSparqlQuery(model, + "PREFIX nco: <" + NCO.NS_NCO + "> " + + "SELECT ?number " + + "WHERE" + + " { " + affiliation.toSPARQL() + " nco:hasPhoneNumber ?phoneNumber . " + + " ?phoneNumber a nco:PhoneNumber ." + + " ?phoneNumber nco:phoneNumber ?number ." + + " FILTER (regex(?number,\"\\\\+1-919-676-9564\"))" + + " }"); + } + + public void testEmailAddresses() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Model model = metadata.getModel(); + Resource frankDawsonContact = findContact(model, "Frank Dawson"); + Set<Resource> emails = findObjectResourceSet(model,frankDawsonContact, NCO.hasEmailAddress); + assertEquals(2, emails.size()); + assertSparqlQuery(model, + "PREFIX nco: <" + NCO.NS_NCO + "> " + + "SELECT ?email " + + "WHERE" + + " { " + frankDawsonContact.toSPARQL() + " nco:hasEmailAddress ?email . " + + " ?email a nco:EmailAddress ." + + " ?email nco:emailAddress ?address ." + + " FILTER (regex(?address,\"Frank_Dawson@Lotus.com\"))" + + " }"); + assertSparqlQuery(model, + "PREFIX nco: <" + NCO.NS_NCO + "> " + + "SELECT ?email " + + "WHERE" + + " { " + frankDawsonContact.toSPARQL() + " nco:hasEmailAddress ?email . " + + " ?email a nco:EmailAddress ." + + " ?email nco:emailAddress ?address ." + + " FILTER (regex(?address,\"fd...@ea...\"))" + + " }"); + } + + public void testWorkPostalAddress() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler); + Model model = metadata.getModel(); + Resource frankDawsonContact = findContact(model, "Frank Dawson"); + assertTrue(frankDawsonContact.toString().startsWith("vcard:")); + Resource affiliation = findSingleObjectResource(model, frankDawsonContact, NCO.hasAffiliation); + assertSingleValueProperty(model, affiliation, RDF.type, NCO.Affiliation); + Resource address = findSingleObjectResource(model, affiliation, NCO.hasPostalAddress); + assertMultiValueProperty(model, address, RDF.type, NCO.PostalAddress); + assertMultiValueProperty(model, address, RDF.type, NCO.ParcelDeliveryAddress); + assertSingleValueProperty(model, address, NCO.streetAddress, "6544 Battleford Drive"); + assertSingleValueProperty(model, address, NCO.locality, "Raleigh"); + assertSingleValueProperty(model, address, NCO.region, "NC"); + } + + public void testHomePostalAddress() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + Resource address = findSingleObjectResource(model, antoniContact, NCO.hasPostalAddress); + assertMultiValueProperty(model, address, RDF.type, NCO.PostalAddress); + + assertSingleValueProperty(model, address, NCO.streetAddress, "Budryka 2/1110"); + assertSingleValueProperty(model, address, NCO.locality, "Krakow"); + assertSingleValueProperty(model, address, NCO.region, "malopolskie"); + assertSingleValueProperty(model, address, NCO.postalcode, "30-072"); + assertSingleValueProperty(model, address, NCO.country, "Polen"); + } + + public void testRole() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + Resource affiliation = findSingleObjectResource(model, antoniContact, NCO.hasAffiliation); + assertSingleValueProperty(model, affiliation, NCO.role, "Software-Developer"); + } + + public void testTitle() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + Resource affiliation = findSingleObjectResource(model, antoniContact, NCO.hasAffiliation); + assertSingleValueProperty(model, affiliation, NCO.title, "Intern"); + } + + public void testNickname() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + assertSingleValueProperty(model, antoniContact, NCO.nickname, "Ant"); + } + + public void testBday() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + String dateString = findSingleObjectNode(model, antoniContact, NCO.birthDate).asLiteral().getValue(); + assertTrue(DateUtil.dateTimeStringEqualToUTCString(dateString, "1980-01-18T00:00:00Z")); + } + + public void testOrganization() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-outlook2003.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Jozef Mylka jun."); + Resource affiliation = findSingleObjectResource(model, antoniContact, NCO.hasAffiliation); + assertSingleValueProperty(model, affiliation, RDF.type, NCO.Affiliation); + assertSingleValueProperty(model, affiliation, NCO.department, "Knowledge-Management"); + Resource organization = findSingleObjectResource(model, affiliation, NCO.org); + assertSingleValueProperty(model, organization, RDF.type, NCO.OrganizationContact); + assertSingleValueProperty(model, organization, NCO.fullname, "DFKI"); + } + + public void testNote() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-dirk.vcf", subCrawler); + Model model = metadata.getModel(); + Resource dirkContact = findContact(model, "Dirk"); + assertEquals("uri:dummyuri",dirkContact.toString()); + assertSingleValueProperty(model, dirkContact, NCO.note, "The canonical Dirk\r\n"); + metadata.dispose(); + metadata = null; + } + + public void testRev() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-dirk.vcf", subCrawler); + Model model = metadata.getModel(); + Resource dirkContact = findContact(model, "Dirk"); + // this property is not testable, since the value is timezone-dependent + //assertSingleValueProperty(model, dirkContact, NIE.contentLastModified, "2007-11-09T10:46:02Z", XSD._dateTime); + String dateString = findSingleObjectNode(model, dirkContact, NIE.contentLastModified).asLiteral().getValue(); + assertTrue(dateString.startsWith("2007-11")); + metadata.dispose(); + metadata = null; + } + + public void testRev2() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); + Model model = metadata.getModel(); + Resource dirkContact = findContact(model, "Antoni Mylka"); + // this property is not testable, since the value is timezone-dependent + //assertSingleValueProperty(model, dirkContact, NIE.contentLastModified, "2008-01-28T15:50:16Z", XSD._dateTime); + String dateString = findSingleObjectNode(model, dirkContact, NIE.contentLastModified).asLiteral().getValue(); + assertTrue(dateString.startsWith("2008-01")); + metadata.dispose(); + metadata = null; + } + + /** + * This is supposed to test a solution to the issue number 2475957. + * + * RFC 2426 Sec. 3.6.4 http://tools.ietf.org/html/rfc2426#section-3.6.4 + * + * State that the value of the REV property can be of date-time type, which in turn is defined + * in RFC 2425 sec. 5.8.4 + * + * http://tools.ietf.org/html/rfc2425#section-5.8.4 + * + * That definition allows for a timezone component to appear. At one point the + * vcard crawler ignored that timezone definition, took the date-time string without + * the timezone and treated it as one from the default timezone for the current locale + * which was obviously wrong. + * + * The vcard-antoni-kontact.vcf has a REV property that denotes a timestamp in the UTC timezone. + * This test checks if the date is recorded correctly. + * + * @throws Exception + */ + public void testRev3() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); + Model model = metadata.getModel(); + Resource antoniContact = findContact(model, "Antoni Mylka"); + String dateString = findSingleObjectNode(model, antoniContact, NIE.contentLastModified).asLiteral().getValue(); + DateUtil.dateTimeStringEqualToUTCString(dateString, "2008-01-28T15:50:16Z"); + metadata.dispose(); + metadata = null; + } + + public void testUid() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(); + metadata = subCrawl(DOCS_PATH + "vcard-antoni-kontact.vcf", subCrawler); + Model model = metadata.getModel(); + Resource dirkContact = findContact(model, "Antoni Mylka"); + assertSingleValueProperty(model, dirkContact, NCO.contactUID, "BHTRsCvcmd"); + metadata.dispose(); + metadata = null; + } private RDFContainer subCrawl(String string, VcardSubCrawler subCrawler) throws Exception { InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(string, this.getClass()); Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/icaltestdata/cal01-corrupted.ics =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/icaltestdata/cal01-corrupted.ics (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/icaltestdata/cal01-corrupted.ics 2010-03-31 11:13:27 UTC (rev 2306) @@ -0,0 +1,84 @@ +BEGIN:VCALENaDAR +CALSCdALE:GREGORIAN +PRODID:-//Ximian//NONSGML Evolution Calendar//EN +VERSION:2.0 +BEGIN:VTIMEZONE +TZID:/softwarestudio.org/Olson_20011030_5/America/New_York +TZURL:http://timezones.r.us.net/tz/US-California-Los_Angeles +BEGIN:STANDARD +TZOFFSETFROM:-0400 +TZOFFSETTO:-0asdf500 +TZNAME:EST +DTSTART:19701025T020000 +RRULE:FREQ=YEARLY;INTERVAL=1;BYDAY=-1SU;BYMONTH=10 +END:STANDARD +BEGIN:DAYLIGHT +TZOFFSETFROM:-0500 +TZOFFSETTO:-0400 +TZNAME:EDT + +THIS IS A COMPLETELY CORRUPTED STRING IN THE MIDDLE OF A ICAL +THIS SHOULD BREAK THE PARSER BUT THE FULLTEXT SHOULD STILL BE PRESERVED + + +DTSTART:19700405T020000 +RRULE:FREQ=YEARLY;INTERVAL=1;BYDAY=1SU;BYMONTH=4 +END:DAYLIGHT +END:VTIMEZONE +BEGIN:VEVENT +UID:20020630T230353Z-3895-69-1-0@jammer +DTSTAMP:20020630T230353Z +DTSTART;TZID=/softwarestudio.org/Olson_20011030_5/America/New_York: + 20020630T090000 +DTEND;TZID=/softwarestudio.org/Olson_20011030_5/America/New_York: + 20020630T103000 +TRANSP:OPAQUE +SEQUENCE:2 +SUMMARY:Church +CLASS:PRIVATE +RRULE:FREQ=WEEKLY;INTERVAL=1;BYDAY=SU +END:VEVENT +BEGIN:VEVENT +UID:20020630T230445Z-3895-69-1-7@jammer +DTSTAMP:20020630T230445Z +DTSTART;VALUE=DATE:20020703 +DTEND;VALUE=DATE:20020706 +TRANSP:OPAQUE +SEQUENCE:2 +SUMMARY:Scooby Conference +LOCAT/%$#@%!_ION:Saan Francisco +CONTACT:Jim Dolittle\, ABC Industries\, +1-919-555-1234 +DESCRIPTION:can't wait!\n +CATEGORIES:Miscellaneous +CLASS:PUBLIC +BEGIN:VALARM +X-EVOLUTION-ALARM-UID:20020701T033628Z-3895-69-1-48@jammer +TRIGGER;RELATED=START:-PT30M +ACTION:DISPLAY +DESCRIPTION:Federal Reserve Board Meeting +END:VALARM +END:VEVENT +BEGIN:VEVENT +UID:20020630T230600Z-3895-69-1-16@jammer +DTSTAMP:20020630T230600Z +DTSTART;TZID=/softwarestudio.org/Olson_20011030_5/America/New_York: + 20020718T090000 +DTEND;TZID=/softwarestudio.org/Olson_20011030_5/America/New_York: + 20020718T093000 +TRANSP:OPAQUE +SEQUENCE:3 +SUMMARY:Federal Reserve Board Meeting +CLASS:PUBLIC +ORGANIZER;CN=Dan Connolly:MAILTO:con...@w3... +ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=NEEDS-ACTION; + RSVP=TRUE;LANGUAGE=en:MAILTO:hoopy@frood.example +ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=OPT-PARTICIPANT;PARTSTAT=NEEDS-ACTION; + RSVP=TRUE;LANGUAGE=en:MAILTO:prefect@ford.example +BEGIN:VALARM +X-EVOLUTION-ALARM-UID:20020701T033628Z-3895-69-1-22@jammer +TRIGfGER;VALUE=DURATION;RELATED=START:-PT15M +ACTIdON:DISPLAY +DESCRIPTION:Federal Reserve Board Meeting +END:VALARdM +END:VEVENTa +END:VCALasENDAR Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-dirk-corrupted.vcf =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-dirk-corrupted.vcf (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-dirk-corrupted.vcf 2010-03-31 11:13:27 UTC (rev 2306) @@ -0,0 +1,12 @@ +BEGIN:VCdARD +VERSION:2.1 +N:dirk +FN:Dirk +NOTE;EN_CORRUPTED_CODING=QUOTED-PRINTABLE:The canonical Dirk=0D=0A + +THIS IS A COMPLETELY CORRUPTED STRING IN THE MIDDLE OF A VCARD +THIS SHOULD BREAK THE PARSER BUT THE FULLTEXT SHOULD STILL BE PRESERVED + +EMAIL.$$$$$;PREF;INTERNET:di...@sa... +REV:2007110as9T104602Z +END:VCAhRD This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-04-01 13:01:01
|
Revision: 2309 http://aperture.svn.sourceforge.net/aperture/?rev=2309&view=rev Author: mylka Date: 2010-04-01 13:00:53 +0000 (Thu, 01 Apr 2010) Log Message: ----------- applied Igor's patch to SubCrawlerUtil and added some unit tests Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java Added Paths: ----------- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-mail-attachment.zip aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-mail-forwarded-message.zip Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-04-01 12:54:37 UTC (rev 2308) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-04-01 13:00:53 UTC (rev 2309) @@ -112,10 +112,13 @@ bad = true; DataObject newObject = subCrawler.getDataObject(new URIImpl(current[2]), current[1], currentStream, dataSource, charset, mimeType, containerFactory); - if (!stack.isEmpty() && !(newObject instanceof FileDataObject)) { - throw new SubCrawlerException("an intermediate DataObject has no stream: " + currentUri); - } else { - currentStream = ((FileDataObject)newObject).getContent(); + + if (!stack.isEmpty()) { + if (newObject instanceof FileDataObject) { + currentStream = ((FileDataObject)newObject).getContent(); + } else { + throw new SubCrawlerException("an intermediate DataObject has no stream: " + currentUri); + } } ((DataObjectBase)newObject).setWrappedDataObject(object); object = newObject; Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-04-01 12:54:37 UTC (rev 2308) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-04-01 13:00:53 UTC (rev 2309) @@ -13,10 +13,12 @@ import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.FileDataObject; +import org.semanticdesktop.aperture.crawler.mail.MessageDataObject; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; import org.semanticdesktop.aperture.test.ApertureTestBase; import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NMO; /** * An integration test for the {@link SubCrawlerUtil#getDataObject} method. It is in a separate class @@ -73,6 +75,82 @@ } /** + * An attachment inside an .eml file inside a zip. The attachment is a jpeg file. + * @throws Exception + */ + public void testZipWithMail() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "zip-mail-attachment.zip", + getClass()); + URI uri = new URIImpl( + "zip:" + + "file:///C:/somefolder/somezip.zip" + + "!/mail-attachment.eml"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, null, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof FileDataObject); + assertMimeType("message/rfc822", uri, ((FileDataObject)obj).getContent()); + obj.dispose(); + for (Map.Entry<String, RDFContainer> entry : fac.returnedContainers.entrySet()) { + assertFalse(entry.getValue().getModel().isOpen()); + } + } + + /** + * An attachment inside an .eml file inside a zip. The attachment is a jpeg file. + * @throws Exception + */ + public void testZipWithMailWithAttachment() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "zip-mail-attachment.zip", + getClass()); + URI uri = new URIImpl( + "mime:" + + "zip:" + + "file:///C:/somefolder/somezip.zip" + + "!/mail-attachment.eml" + + "!/1"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, null, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof FileDataObject); + assertMimeType("image/jpeg", uri, ((FileDataObject)obj).getContent()); + obj.dispose(); + for (Map.Entry<String, RDFContainer> entry : fac.returnedContainers.entrySet()) { + assertFalse(entry.getValue().getModel().isOpen()); + } + } + + /** + * A forwarded message attached to an .eml file inside a zip. The attachment is a jpeg file. + * @throws Exception + */ + public void testZipWithMailWithForwardedMessage() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "zip-mail-forwarded-message.zip", + getClass()); + URI uri = new URIImpl( + "mime:" + + "zip:" + + "file:///C:/somefolder/somezip.zip" + + "!/mail-forwarded-message.eml" + + "!/1"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, null, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof MessageDataObject); + + assertTrue(obj.getMetadata().getString(NMO.plainTextMessageContent).contains( + "It compiles and works under java 1.5 and java 1.6, both in plain jvm and")); + + obj.dispose(); + for (Map.Entry<String, RDFContainer> entry : fac.returnedContainers.entrySet()) { + assertFalse(entry.getValue().getModel().isOpen()); + } + } + + /** * Tests if the method can extract a file whose name contains a space from inside a ZIP archive. * @throws Exception */ Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-mail-attachment.zip =================================================================== (Binary files differ) Property changes on: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-mail-attachment.zip ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-mail-forwarded-message.zip =================================================================== (Binary files differ) Property changes on: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/zip-mail-forwarded-message.zip ___________________________________________________________________ Added: svn:mime-type + application/octet-stream This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-04-21 19:21:42
|
Revision: 2329 http://aperture.svn.sourceforge.net/aperture/?rev=2329&view=rev Author: mylka Date: 2010-04-21 19:21:36 +0000 (Wed, 21 Apr 2010) Log Message: ----------- converted the FileExtractor into an extractor (PoiOOXML), added ExractorFactories Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractor.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/AllExtendedPropertiesTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLMain.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractorTest.java Added Paths: ----------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLExcelExtractorFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLPowerPointExtractorFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLWordExtractorFactory.java Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLExcelExtractorFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLExcelExtractorFactory.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLExcelExtractorFactory.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.extractor.poiooxml; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.semanticdesktop.aperture.extractor.Extractor; +import org.semanticdesktop.aperture.extractor.ExtractorFactory; +import org.semanticdesktop.aperture.extractor.openxml.OpenXmlExtractor; +import org.semanticdesktop.aperture.extractor.poiooxml.PoiOOXmlExtractor.Type; + +/** + * An {@link ExtractorFactory} implementation for Open XML Spreadsheets. + * + * @author Antoni + * + */ +public class OOXMLExcelExtractorFactory implements ExtractorFactory { + + private static final Set MIME_TYPES; + + static { + HashSet set = new HashSet(); + set.add("application/vnd.openxmlformats-officedocument.spreadsheetml"); + + MIME_TYPES = Collections.unmodifiableSet(set); + } + + public Extractor get() { + return new PoiOOXmlExtractor(Type.EXCEL); + } + + public Set getSupportedMimeTypes() { + return MIME_TYPES; + } + +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLExcelExtractorFactory.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLPowerPointExtractorFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLPowerPointExtractorFactory.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLPowerPointExtractorFactory.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.extractor.poiooxml; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.semanticdesktop.aperture.extractor.Extractor; +import org.semanticdesktop.aperture.extractor.ExtractorFactory; +import org.semanticdesktop.aperture.extractor.openxml.OpenXmlExtractor; +import org.semanticdesktop.aperture.extractor.poiooxml.PoiOOXmlExtractor.Type; + +/** + * An {@link ExtractorFactory} implementation for Open XML Presentations. + * + * @author Antoni + * + */ +public class OOXMLPowerPointExtractorFactory implements ExtractorFactory { + + private static final Set MIME_TYPES; + + static { + HashSet set = new HashSet(); + set.add("application/vnd.openxmlformats-officedocument.presentationml"); + MIME_TYPES = Collections.unmodifiableSet(set); + } + + public Extractor get() { + return new PoiOOXmlExtractor(Type.POWERPOINT); + } + + public Set getSupportedMimeTypes() { + return MIME_TYPES; + } + +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLPowerPointExtractorFactory.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLWordExtractorFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLWordExtractorFactory.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLWordExtractorFactory.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -0,0 +1,42 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.extractor.poiooxml; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.semanticdesktop.aperture.extractor.Extractor; +import org.semanticdesktop.aperture.extractor.ExtractorFactory; +import org.semanticdesktop.aperture.extractor.openxml.OpenXmlExtractor; +import org.semanticdesktop.aperture.extractor.poiooxml.PoiOOXmlExtractor.Type; + +/** + * An {@link ExtractorFactory} implementation for Open XML Word Processing documents. + * + * @author Antoni + */ +public class OOXMLWordExtractorFactory implements ExtractorFactory { + + private static final Set MIME_TYPES; + + static { + HashSet set = new HashSet(); + set.add("application/vnd.openxmlformats-officedocument.wordprocessingml"); + + MIME_TYPES = Collections.unmodifiableSet(set); + } + + public Extractor get() { + return new PoiOOXmlExtractor(Type.WORD); + } + + public Set getSupportedMimeTypes() { + return MIME_TYPES; + } + +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLWordExtractorFactory.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractor.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractor.java 2010-04-20 23:36:28 UTC (rev 2328) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractor.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -6,9 +6,10 @@ */ package org.semanticdesktop.aperture.extractor.poiooxml; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; +import java.io.BufferedInputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; import java.nio.charset.Charset; import java.util.Date; import java.util.StringTokenizer; @@ -32,7 +33,7 @@ import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; import org.semanticdesktop.aperture.extractor.Extractor; import org.semanticdesktop.aperture.extractor.ExtractorException; -import org.semanticdesktop.aperture.extractor.FileExtractor; +import org.semanticdesktop.aperture.extractor.microsoft.util.PoiUtil; import org.semanticdesktop.aperture.extractor.openxml.OpenXmlExtractor; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.util.ModelUtil; @@ -49,7 +50,7 @@ * * @author Antoni */ -public class PoiOOXmlExtractor implements FileExtractor { +public class PoiOOXmlExtractor implements Extractor { private static final String EXTENDED_PROPERTIES_NS = "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/"; @@ -66,18 +67,24 @@ this.type = type; } - public void extract(URI id, File file, Charset charset, String mimeType, + public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer container) throws ExtractorException { String text = null; POIXMLDocument document = null; POIXMLTextExtractor extractor = null; - if (!file.exists()) { - throw new ExtractorException("file " + file.getPath() + " doesn't exist"); + OPCPackage pack = null; + + int bufferSize = PoiUtil.getBufferSize(); + if (!stream.markSupported()) { + stream = new BufferedInputStream(stream, bufferSize); } + stream = new UncloseableInputStream(stream); + stream.mark(bufferSize); + try { - OPCPackage pack = OPCPackage.openOrCreate(file); + pack = OPCPackage.open(stream); switch (this.type) { case WORD: document = new XWPFDocument(pack); @@ -102,8 +109,10 @@ } catch (Exception e) { // this means that we can't open this file, this happens on documents created // with beta versions of office, we need to fallback to the old OpenXmlExtractor - tryWithOldOpenXmlExtractor(id,file,charset,mimeType,container); - } + tryWithOldOpenXmlExtractor(id,stream,charset,mimeType,container); + } finally { + closeOPCPackage(pack); + } // store the full-text, if any if (text != null) { @@ -116,6 +125,19 @@ container.add(RDF.type, NFO.PaginatedTextDocument); } + /** + * @param pack + */ + private void closeOPCPackage(OPCPackage pack) { + if (pack != null) { + try { + pack.close(); + } catch (IOException e) { + logger.warn("Couldn't close the package: ", e); + } + } + } + private void extractCommonMetadta(CoreProperties props, RDFContainer container) { //props.getCategory(); // no equivalent in NIE //props.getContentStatus(); // no equivalent in NIE @@ -237,16 +259,17 @@ container.add(uri,value); } - private void tryWithOldOpenXmlExtractor(URI id, File file, Charset charset, String mimeType, RDFContainer container) + private void tryWithOldOpenXmlExtractor(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer container) throws ExtractorException { OpenXmlExtractor ex = new OpenXmlExtractor(); try { - ex.extract(id, new FileInputStream(file), charset, mimeType, container); - } catch (FileNotFoundException e) { - throw new ExtractorException(e); + stream.reset(); + ex.extract(id, stream, charset, mimeType, container); } catch (ExtractorException e) { throw e; - } + } catch (IOException e) { + logger.warn("Couldn't use the fall-back OpenXmlExtractor",e); + } } private void addStringProperty(RDFContainer container, URI predicate, String value) { @@ -285,4 +308,14 @@ extractCommonMetadta(document.getProperties().getCoreProperties(), container); extractExtendedProperties(document.getProperties().getExtendedProperties(), container); } + + private static class UncloseableInputStream extends FilterInputStream { + protected UncloseableInputStream(InputStream in) { + super(in); + } + @Override + public void close() throws IOException { + // do nothing + } + } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/AllExtendedPropertiesTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/AllExtendedPropertiesTest.java 2010-04-20 23:36:28 UTC (rev 2328) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/AllExtendedPropertiesTest.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -23,10 +23,10 @@ import org.ontoware.rdf2go.model.node.impl.DatatypeLiteralImpl; import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.ontoware.rdf2go.vocabulary.XSD; -import org.semanticdesktop.aperture.extractor.FileExtractor; +import org.semanticdesktop.aperture.extractor.Extractor; import org.semanticdesktop.aperture.extractor.poiooxml.PoiOOXmlExtractor.Type; import org.semanticdesktop.aperture.rdf.RDFContainer; -import org.semanticdesktop.aperture.test.extractor.FileExtractorTestBase; +import org.semanticdesktop.aperture.test.extractor.ExtractorTestBase; import org.semanticdesktop.aperture.vocabulary.APERTURE_NIE_EXTENSIONS; import org.semanticdesktop.aperture.vocabulary.NFO; import org.semanticdesktop.aperture.vocabulary.NIE; @@ -44,7 +44,7 @@ * @author Antoni Mylka * */ -public final class AllExtendedPropertiesTest extends FileExtractorTestBase { +public final class AllExtendedPropertiesTest extends ExtractorTestBase { private static final String WORD_2010_DOC = DOCS_PATH + "microsoft-word-2010beta.docx"; private static final String EXTENDED_PROPERTIES_NS = "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties/"; @@ -52,7 +52,7 @@ return new URIImpl(EXTENDED_PROPERTIES_NS + name); } public void testGetAllExtendedProperties() throws Exception{ - FileExtractor fex = new PoiOOXmlExtractor(Type.WORD); + Extractor fex = new PoiOOXmlExtractor(Type.WORD); RDFContainer cont = extract(WORD_2010_DOC,fex); checkStatement(u("Application"), "Microsoft Office Word", cont); checkStatement(NIE.generator, "Microsoft Office Word", cont); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLMain.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLMain.java 2010-04-20 23:36:28 UTC (rev 2328) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/OOXMLMain.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -17,7 +17,6 @@ import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.Variable; import org.semanticdesktop.aperture.extractor.Extractor; -import org.semanticdesktop.aperture.extractor.FileExtractor; import org.semanticdesktop.aperture.extractor.openxml.OpenXmlExtractor; import org.semanticdesktop.aperture.extractor.poiooxml.PoiOOXmlExtractor.Type; import org.semanticdesktop.aperture.rdf.RDFContainer; @@ -33,7 +32,7 @@ public static void main(String [] args) throws Exception { File folder = new File("D:\\intella\\docx1048"); File [] files = folder.listFiles(); - FileExtractor exNew = new PoiOOXmlExtractor(Type.WORD); + Extractor exNew = new PoiOOXmlExtractor(Type.WORD); Extractor exOld = new OpenXmlExtractor(); int newNulls = 0; int newTotalContent = 0; @@ -56,7 +55,7 @@ RDFContainer contOld = new RDFContainerImpl(modelOld, uri); try { - exNew.extract(null, f, null, null, contNew); + exNew.extract(null, new FileInputStream(f), null, null, contNew); String content = contNew.getString(NIE.plainTextContent); if (content == null) { newNulls++; Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractorTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractorTest.java 2010-04-20 23:36:28 UTC (rev 2328) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/poiooxml/PoiOOXmlExtractorTest.java 2010-04-21 19:21:36 UTC (rev 2329) @@ -10,16 +10,16 @@ import org.ontoware.rdf2go.exception.ModelException; import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.extractor.Extractor; import org.semanticdesktop.aperture.extractor.ExtractorException; -import org.semanticdesktop.aperture.extractor.FileExtractor; import org.semanticdesktop.aperture.extractor.poiooxml.PoiOOXmlExtractor.Type; import org.semanticdesktop.aperture.rdf.RDFContainer; -import org.semanticdesktop.aperture.test.extractor.FileExtractorTestBase; +import org.semanticdesktop.aperture.test.extractor.ExtractorTestBase; import org.semanticdesktop.aperture.vocabulary.NCO; import org.semanticdesktop.aperture.vocabulary.NFO; import org.semanticdesktop.aperture.vocabulary.NIE; -public class PoiOOXmlExtractorTest extends FileExtractorTestBase { +public class PoiOOXmlExtractorTest extends ExtractorTestBase { private static final String WORD_PREFIX = "microsoft-word-2007beta2."; @@ -73,7 +73,7 @@ private RDFContainer getStatements(String resourceName, PoiOOXmlExtractor.Type type) throws ExtractorException, IOException { - FileExtractor extractor = new PoiOOXmlExtractor(type); + Extractor extractor = new PoiOOXmlExtractor(type); return extract(resourceName, extractor); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-05-26 16:39:03
|
Revision: 2344 http://aperture.svn.sourceforge.net/aperture/?rev=2344&view=rev Author: mylka Date: 2010-05-26 16:38:56 +0000 (Wed, 26 May 2010) Log Message: ----------- [2932901] committed the mbox subcrawler Modified Paths: -------------- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/subcrawler/impl/defaults.xml aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mbox/TestMboxCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/impl/TestDefaultSubCrawlerRegistry.java Added Paths: ----------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStream.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/bundle/ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/bundle/MboxSubCrawlerActivator.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStreamTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail1.eml aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail2.eml aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail3.eml aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail4.eml Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStream.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStream.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStream.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,151 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler.mbox; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import javax.mail.internet.MimeMessage; + +/** + * + * A stream that filters out all occurences of the 'From -' lines. These lines mark the beginning + * of each message in an mbox file and interfere with the parsing done inside {@link MimeMessage} + * class. + * + * @author Antoni + * + */ +public class FromLineFilterStream { + + private static enum State { + BEFORE_FROM, + IN_FROM, + CONTENT, + END_OF_STREAM + } + + private static final byte [] FROM_LINE = new byte[] {0x46, 0x72, 0x6f, 0x6d, 0x20}; + private static final byte CARRIAGE_RETURN = 0x0A; + + + private InputStream in; + private State state; + + + private byte [] lastFiveBytes; + private byte sixthByte; + private byte [] buffer; + private int bufferSize; + private int bufferPointer; + + private ByteArrayOutputStream out; + + public FromLineFilterStream(InputStream in) { + this.in = in; + } + + public byte [] getNextMessageBytes() throws IOException { + if (buffer == null) { + buffer = new byte[4096]; + lastFiveBytes = new byte[5]; + bufferSize = in.read(buffer); + bufferPointer = 0; + this.state = State.BEFORE_FROM; + } + + out = new ByteArrayOutputStream(); + + while (true) { + switch (state) { + case END_OF_STREAM: + return null; + case BEFORE_FROM: + readByte(); + if (foundFrom()) { + state = State.IN_FROM; + } + break; + case IN_FROM: + readByte(); + if (lastFiveBytes[4] == CARRIAGE_RETURN) { + state = State.CONTENT; + readByte(); + readByte(); + readByte(); + readByte(); + readByte(); + } + break; + case CONTENT: + readByte(); + if (foundFrom()) { + out.write(sixthByte); + state = State.IN_FROM; + byte [] res = out.toByteArray(); + out = new ByteArrayOutputStream(); + return res; + } + if (state == State.END_OF_STREAM) { + out.write(sixthByte); + out.write(lastFiveBytes[0]); + out.write(lastFiveBytes[1]); + out.write(lastFiveBytes[2]); + out.write(lastFiveBytes[3]); + //out.write(lastFiveBytes[4]); // it's end of stream, no byte has been read + byte [] res = out.toByteArray(); + out = new ByteArrayOutputStream(); + return res; + } else { + out.write(sixthByte); + } + break; + } + } + } + + private boolean foundFrom() { + return lastFiveBytes[0] == FROM_LINE[0] && + lastFiveBytes[1] == FROM_LINE[1] && + lastFiveBytes[2] == FROM_LINE[2] && + lastFiveBytes[3] == FROM_LINE[3] && + lastFiveBytes[4] == FROM_LINE[4]; + } + + /** + * @param in2 + * @throws IOException + */ + private void readByte() throws IOException { + + if (state == State.END_OF_STREAM) { + return; + } + + sixthByte = lastFiveBytes[0]; + lastFiveBytes[0] = lastFiveBytes[1]; + lastFiveBytes[1] = lastFiveBytes[2]; + lastFiveBytes[2] = lastFiveBytes[3]; + lastFiveBytes[3] = lastFiveBytes[4]; + + if (bufferPointer < bufferSize) { + lastFiveBytes[4] = buffer[bufferPointer]; + bufferPointer++; + } else { + bufferSize = in.read(buffer); + bufferPointer = 0; + if (bufferSize == -1) { + state = State.END_OF_STREAM; + return; + } else { + lastFiveBytes[4] = buffer[bufferPointer]; + bufferPointer++; + } + } + } +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStream.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler.mbox; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; + +import javax.mail.MessagingException; +import javax.mail.internet.MimeMessage; + +import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.accessor.AccessData; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.RDFContainerFactory; +import org.semanticdesktop.aperture.crawler.mail.AbstractJavaMailCrawler; +import org.semanticdesktop.aperture.crawler.mail.DataObjectFactory; +import org.semanticdesktop.aperture.crawler.mail.MailUtil; +import org.semanticdesktop.aperture.datasource.DataSource; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; +import org.semanticdesktop.aperture.subcrawler.base.AbstractSubCrawler; + +/** + * <p> + * A SubCrawer implementation for mbox files + * </p> + * + * <p> + * It is basically a thin wrapper around the DataObjectFactory. + * </p> + */ +public class MboxSubCrawler extends AbstractSubCrawler { + + private boolean stopRequested; + + public void subCrawl(URI id, InputStream stream, SubCrawlerHandler handler, DataSource dataSource, + AccessData accessData, Charset charset, String mimeType, RDFContainer parentMetadata) + throws SubCrawlerException { + DataObjectFactory fac = null; + FromLineFilterStream fromLineFilterStream = new FromLineFilterStream(stream); + stopRequested = false; + while (!stopRequested) { + try { + byte [] bytes = fromLineFilterStream.getNextMessageBytes(); + if (bytes == null) { + return; + } + + MimeMessage message = new MimeMessage(null,new ByteArrayInputStream(bytes)); + URI attachmentUriPrefix = createChildUri(parentMetadata.getDescribedUri(), MailUtil.getMessageId(message)); + RDFContainerFactory myFac = handler.getRDFContainerFactory(parentMetadata.getDescribedUri().toString()); + fac = new DataObjectFactory(message,myFac,null,dataSource,attachmentUriPrefix,null,"/",attachmentUriPrefix.toString()); + DataObject object = null; + + /* + * Note that the stopRequested check is BEFORE getObject(). Otherwise if the crawler is stopped + * the object is obtained and only AFTER this loop is stopped. This object is not disposed by the + * dataObjectFactory.disposeRemainingObjects() in the finally clause, yields a warning message and + * can potentially lead to problems. + */ + while (!stopRequested && (object = fac.getObject()) != null) { + + // first of all get a string version of the message uri + String queuedUri = object.getID().toString(); + + /* + * See if this url has been accessed before so that we can stop immediately. Note that no + * check on message date is done as messages are immutable. Therefore we only have to check + * whether the AccessData knows this ID. + */ + if (accessData != null && accessData.get(queuedUri, AbstractJavaMailCrawler.ACCESSED_KEY) != null) { + object.dispose(); + + if (accessData.isTouched(queuedUri)) { + // do nothing we see the same URI for the second time + } else { + /* + * We use the MailUtil.getMessageId, if the same uri occurs twice - this means that the + * message is unchanged. + */ + accessData.touch(queuedUri); + handler.objectNotModified(queuedUri); + } + + continue; + } + + /* + * store the information in the access data that we have met this object, + */ + if (accessData != null) { + accessData.put(queuedUri, AbstractJavaMailCrawler.ACCESSED_KEY, ""); + accessData.touch(queuedUri); + } + + /* + * register parent child relationship (necessary in order to be able to report unmodified or + * deleted attachments). This relationship is recorded in the accessdata no new information is + * added to the objects metadata RDFContainer + */ + MailUtil.registerParentRelationshipInAccessData(object, accessData); + + /* + * Report this object as a new object (assumption: objects are always new, never changed, + * since mails are immutable). This MUST happen last because the CrawlerHandler will probably + * dispose of it. + */ + handler.objectNew(object); + } + + } + catch (MessagingException e) { + throw new SubCrawlerException(e); + } + catch (IOException e) { + throw new SubCrawlerException(e); + } + finally { + if (fac != null) { + fac.disposeRemainingObjects(); + } + } + } + } + + public void stopSubCrawler() { + stopRequested = true; + } + + @Override + public String getUriPrefix() { + return MboxSubCrawlerFactory.MBOX_URI_PREFIX; + } +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerFactory.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerFactory.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler.mbox; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.semanticdesktop.aperture.subcrawler.SubCrawler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; + +/** + * A factory for {@link MboxSubCrawler}s + */ +public class MboxSubCrawlerFactory implements SubCrawlerFactory { + + private static final Set MIME_TYPES; + + public static String MBOX_URI_PREFIX = "mbox"; + + static { + HashSet set = new HashSet(); + set.add("application/mbox"); + + MIME_TYPES = Collections.unmodifiableSet(set); + } + + public SubCrawler get() { + return new MboxSubCrawler(); + } + + public Set getSupportedMimeTypes() { + return MIME_TYPES; + } + + public String getUriPrefix() { + return MBOX_URI_PREFIX; + } +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerFactory.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/bundle/MboxSubCrawlerActivator.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/bundle/MboxSubCrawlerActivator.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/bundle/MboxSubCrawlerActivator.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2006 - 2008 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler.mbox.bundle; + +import java.util.Hashtable; + +import org.osgi.framework.BundleActivator; +import org.osgi.framework.BundleContext; +import org.osgi.framework.ServiceRegistration; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; +import org.semanticdesktop.aperture.subcrawler.mbox.MboxSubCrawlerFactory; +import org.semanticdesktop.aperture.subcrawler.mime.MimeSubCrawlerFactory; + +public class MboxSubCrawlerActivator implements BundleActivator { + + private ServiceRegistration registration; + + public void start(BundleContext context) throws Exception { + registration = context.registerService(SubCrawlerFactory.class + .getName(), new MboxSubCrawlerFactory(), new Hashtable()); + } + + public void stop(BundleContext context) throws Exception { + registration.unregister(); + } +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/bundle/MboxSubCrawlerActivator.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/subcrawler/impl/defaults.xml =================================================================== --- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/subcrawler/impl/defaults.xml 2010-05-26 15:21:04 UTC (rev 2343) +++ aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/subcrawler/impl/defaults.xml 2010-05-26 16:38:56 UTC (rev 2344) @@ -22,4 +22,7 @@ <subCrawlerFactory> <name>org.semanticdesktop.aperture.subcrawler.mime.MimeSubCrawlerFactory</name> </subCrawlerFactory> + <subCrawlerFactory> + <name>org.semanticdesktop.aperture.subcrawler.mbox.MboxSubCrawlerFactory</name> + </subCrawlerFactory> </subCrawlerFactories> \ No newline at end of file Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mbox/TestMboxCrawler.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mbox/TestMboxCrawler.java 2010-05-26 15:21:04 UTC (rev 2343) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mbox/TestMboxCrawler.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -473,9 +473,4 @@ fos.close(); return outFile; } - - - private URI toURI(File file) { - return URIImpl.createURIWithoutChecking(file.toURI().toString()); - } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/impl/TestDefaultSubCrawlerRegistry.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/impl/TestDefaultSubCrawlerRegistry.java 2010-05-26 15:21:04 UTC (rev 2343) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/impl/TestDefaultSubCrawlerRegistry.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -12,7 +12,7 @@ public void testRegistry() { DefaultSubCrawlerRegistry registry = new DefaultSubCrawlerRegistry(); - assertEquals(7, registry.getAll().size()); + assertEquals(8, registry.getAll().size()); assertEquals(1, registry.get("text/x-vcard").size()); assertEquals(1, registry.get("application/zip").size()); assertEquals(1, registry.get("application/gzip").size()); @@ -21,5 +21,6 @@ assertEquals(1, registry.get("application/x-compress").size()); assertEquals(1, registry.get("message/rfc822").size()); assertEquals(1, registry.get("message/news").size()); + assertEquals(1, registry.get("application/mbox").size()); } } Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStreamTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStreamTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStreamTest.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler.mbox; + +import info.aduna.io.IOUtil; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.junit.Assert; +import org.semanticdesktop.aperture.test.ApertureTestBase; +import org.semanticdesktop.aperture.util.ResourceUtil; + +/** + * @author Antoni + * + */ +public class FromLineFilterStreamTest extends ApertureTestBase { + + public void testFilter() throws IOException { + byte [] mboxBytes = bytes("mbox-aperture-inc1"); + InputStream stream = new ByteArrayInputStream(mboxBytes); + FromLineFilterStream flfs = new FromLineFilterStream(stream); + + byte [] msg1 = flfs.getNextMessageBytes(); + byte [] msg2 = flfs.getNextMessageBytes(); + byte [] msg3 = flfs.getNextMessageBytes(); + byte [] msg4 = flfs.getNextMessageBytes(); + assertNull(flfs.getNextMessageBytes()); + + byte [] exMsg1 = bytes("mail-mbox-aperture-inc1-mail1.eml"); + byte [] exMsg2 = bytes("mail-mbox-aperture-inc1-mail2.eml"); + byte [] exMsg3 = bytes("mail-mbox-aperture-inc1-mail3.eml"); + byte [] exMsg4 = bytes("mail-mbox-aperture-inc1-mail4.eml"); + + Assert.assertArrayEquals(exMsg1, msg1); + Assert.assertArrayEquals(exMsg2, msg2); + Assert.assertArrayEquals(exMsg3, msg3); + Assert.assertArrayEquals(exMsg4, msg4); + + } + + private byte [] bytes(String resourceName) throws IOException { + return IOUtil.readBytes(ResourceUtil.getInputStream(DOCS_PATH + resourceName, getClass())); + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/FromLineFilterStreamTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler.mbox; + +import java.io.File; +import java.io.InputStream; + +import org.ontoware.rdf2go.exception.ModelException; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.accessor.AccessData; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.base.AccessDataImpl; +import org.semanticdesktop.aperture.accessor.base.RDFContainerFactoryImpl; +import org.semanticdesktop.aperture.crawler.mbox.MboxTestBase; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.subcrawler.SubCrawler; +import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.util.ResourceUtil; + +/** + * Tests for the {@link MboxSubCrawler} + */ +public class MboxSubCrawlerTest extends MboxTestBase { + + + /** + * This tests if a crawler crawls a file and what messages have been extracted from it. + * @throws ModelException + */ + public void testCrawler() throws Exception { + TestBasicSubCrawlerHandler crawlerHandler = crawl("mbox-aperture-dev",null, null); + Model model = crawlerHandler.getModel(); + assertNewModUnmod(crawlerHandler, 144, 0, 0); + validate(model); + model.close(); + } + + public void testAddedMail() throws Exception { + AccessData accessData = new AccessDataImpl(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler1 = crawl("mbox-aperture-inc1",accessData, null); + accessData.store(); + // four mails, everything is new + assertNewModUnmod(handler1, 4, 0, 0); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = crawl("mbox-aperture-inc2",accessData, null); + accessData.store(); + // one new mail, while all other four mails are unchanged + assertNewModUnmod(handler2, 1, 0, 4); + handler1.close(); + handler2.close(); + } + + public void testDeletedMail() throws Exception { + AccessData accessData = new AccessDataImpl(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler1 = crawl("mbox-aperture-inc1",accessData, null); + accessData.store(); + // four mails, everything is new + assertNewModUnmod(handler1, 4, 0, 0); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = crawl("mbox-aperture-inc3",accessData, null); + accessData.store(); + // no new mails, three unchanged emails + // the one deleted email is simply not reported + assertNewModUnmod(handler2, 0, 0, 3); + handler1.close(); + handler2.close(); + } + + public void testModifiedMail() throws Exception { + AccessData accessData = new AccessDataImpl(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler1 = crawl("mbox-aperture-inc1",accessData, null); + accessData.store(); + // four mails, everything is new + assertNewModUnmod(handler1, 4, 0, 0); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = crawl("mbox-aperture-inc4",accessData, null); + accessData.store(); + // no new mails, one has been modified + // this means that the modifed email is reported as new + // while the old version would be reported as deleted, but it can't be done + // by the subcrawler itself + assertNewModUnmod(handler2, 1, 0, 3); + handler1.close(); + handler2.close(); + } + + public void testNoBlankLineBetweenMails() throws Exception { + TestBasicSubCrawlerHandler handler1 = crawl("mbox-noblanklinebetweenmails.mbox",null, null); + assertNewModUnmod(handler1, 4, 0, 0); + } + + /** + * The crawler should treat multiple occurences of the same email in a single folder correctly. + * + * mbox-identicalemails contains three emails where the first and the third are the same. Three new + * data objects must be reported, two of them for emails. The third data object is the one + * for the folder itself. The second occurence of the same URI should not be reported. + * + * @throws Exception + */ + public void testTwoIdenticalEmailsInASingleMbox() throws Exception { + + AccessData accessData = new AccessDataImpl(); + accessData.initialize(); + TestBasicSubCrawlerHandler handler1 = crawl("mbox-identicalemails",accessData, null); + accessData.store(); + assertNewModUnmod(handler1, 2, 0, 0); + accessData.initialize(); + TestBasicSubCrawlerHandler handler2 = crawl("mbox-identicalemails",accessData, null); + accessData.store(); + assertNewModUnmod(handler2, 0, 0, 2); + handler1.close(); + handler2.close(); + } + + /** + * There are two emails, the first one has a small plain text attachment the second email has an mp3 + * attachment together with the folder itself this makes five objects. + * <pre> + * parentFolder + * \- mboxFile + * |- mail1 + * | \- attachment1 + * \- mail2 + * \- attachment2 + * </pre> + * We want to access all five objects one by one + * @throws Exception + */ + public void testBasicSingleObjectAccessor() throws Exception { + + TestBasicSubCrawlerHandler handler = crawl("mbox-testfolder", null, null); + SubCrawler accessor = new MboxSubCrawler(); + + for (String ob : handler.getNewObjects()) { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mbox-testfolder", getClass()); + DataObject obj = accessor.getDataObject(new URIImpl("uri:dummyuri"), + SubCrawlerUtil.getSubCrawledObjectPath(new URIImpl(ob)), stream, null,null,null, + new RDFContainerFactoryImpl()); + assertDataObjectOK(obj); + obj.dispose(); + stream.close(); + } + + handler.close(); + } + + private TestBasicSubCrawlerHandler crawl(String fileName, AccessData data, File oldTempFile) throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + fileName, this.getClass()); + + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(); + MboxSubCrawler sc = new MboxSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(handler.getModel(),new URIImpl("uri:dummyuri")); + sc.subCrawl(null, stream, handler, null, data, null, null, parentMetadata); + + return handler; + } + + public void assertNewModUnmod(TestBasicSubCrawlerHandler handler, int newObjects, + int changedObjects, int unchangedObjects) { + assertEquals(newObjects, handler.getNewObjects().size()); + assertEquals(changedObjects, handler.getChangedObjects().size()); + assertEquals(unchangedObjects, handler.getUnchangedObjects().size()); + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail1.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail1.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail1.eml 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,57 @@ +X-Account-Key: account3 +X-UIDL: 0000ed56033dd35e +X-Mozilla-Status: 0001 +X-Mozilla-Status2: 00000000 +Received: from lists-outbound.sourceforge.net ([66.35.250.225]:33996 "EHLO + lists-outbound.sourceforge.net") by ps11.test.onet.pl with ESMTP + id <S1007234AbWJDQwG>; Wed, 4 Oct 2006 18:52:06 +0200 +Received: from sc8-sf-list2-new.sourceforge.net (unknown [10.3.1.94]) + by sc8-sf-spam2.sourceforge.net (Postfix) with ESMTP id A388B126A9 + for <ant...@po...>; Wed, 4 Oct 2006 09:52:03 -0700 (PDT) +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +From: ape...@li... +To: ant...@po... +Subject: confirm c6989dd83a4acb9a0ebecff87c309211496a8e4d +Reply-To: ape...@li... +Message-ID: <mai...@li...> +Date: Wed, 04 Oct 2006 09:52:01 -0700 +Precedence: bulk +X-BeenThere: ape...@li... +X-Mailman-Version: 2.1.8 +List-Id: <aperture-devel.lists.sourceforge.net> +X-List-Administrivia: yes +Sender: ape...@li... +Errors-To: ape...@li... +X-OnetAntySpam: NIE, to nie jest SPAM +X-OrigFrom: ape...@li... +X-ZA0: unknown (-1,0) +Status: + +Mailing list subscription confirmation notice for mailing list +Aperture-devel + +We have received a request from 10.3.1.94 for subscription of your +email address, "ant...@po...", to the +ap...@li... mailing list. To confirm that +you want to be added to this mailing list, simply reply to this +message, keeping the Subject: header intact. Or visit this web page: + + https://lists.sourceforge.net/lists/confirm/aperture-devel/c6989dd83a4acb9a0ebecff87c309211496a8e4d + + +Or include the following line -- and only the following line -- in a +message to ape...@li...: + + confirm c6989dd83a4acb9a0ebecff87c309211496a8e4d + +Note that simply sending a `reply' to this message should work from +most mail readers, since that usually leaves the Subject: line in the +right form (additional "Re:" text in the Subject: is okay). + +If you do not wish to be subscribed to this list, please simply +disregard this message. If you think you are being maliciously +subscribed to the list, or have any other questions, send them to +ap...@li.... + Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail2.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail2.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail2.eml 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,58 @@ +X-Account-Key: account3 +X-UIDL: 0000ed56033dd7fc +X-Mozilla-Status: 0001 +X-Mozilla-Status2: 00000000 +Received: from lists-outbound.sourceforge.net ([66.35.250.225]:24806 "EHLO + lists-outbound.sourceforge.net") by ps26.test.onet.pl with ESMTP + id <S4817394AbWJDRCf>; Wed, 4 Oct 2006 19:02:35 +0200 +Received: from sc8-sf-list2-new.sourceforge.net (unknown [10.3.1.94]) + by sc8-sf-spam2.sourceforge.net (Postfix) with ESMTP id 29E6D1224D + for <ant...@po...>; Wed, 4 Oct 2006 10:02:22 -0700 (PDT) +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +Subject: Ape...@li... mailing list reminder +From: ape...@li... +To: ant...@po... +X-No-Archive: yes +Message-ID: <mai...@li...> +Date: Wed, 04 Oct 2006 10:02:19 -0700 +Precedence: bulk +X-BeenThere: ape...@li... +X-Mailman-Version: 2.1.8 +List-Id: <aperture-devel.lists.sourceforge.net> +X-List-Administrivia: yes +Sender: ape...@li... +Errors-To: ape...@li... +X-OnetAntySpam: NIE, to nie jest SPAM +X-OrigFrom: ape...@li... +X-ZA0: unknown (-1,0) +Status: + +You, or someone posing as you, has requested a password reminder for +your membership on the mailing list +ap...@li.... You will need this password in +order to change your membership options (e.g. do you want regular +delivery or digest delivery), and having this password makes it easier +for you to unsubscribe from the mailing list. + +You are subscribed with the address: ant...@po... + +Your Aperture-devel password is: kochanaalinka + +To make changes to your membership options, log in and visit your +options web page: + + https://lists.sourceforge.net/lists/options/aperture-devel/antoni_mylka%40poczta.onet.pl + + +You can also make such changes via email by sending a message to: + + ape...@li... + +with the text "help" in the subject or body. The automatic reply will +contain more detailed instructions. + +Questions or comments? Please send them to the Aperture-devel mailing +list administrator at ape...@li.... + Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail3.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail3.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail3.eml 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,58 @@ +X-Account-Key: account3 +X-UIDL: 0000ed56033de1f5 +X-Mozilla-Status: 0001 +X-Mozilla-Status2: 00000000 +Received: from lists-outbound.sourceforge.net ([66.35.250.225]:59622 "EHLO + lists-outbound.sourceforge.net") by ps15.test.onet.pl with ESMTP + id <S6371102AbWJDRDx>; Wed, 4 Oct 2006 19:03:53 +0200 +Received: from sc8-sf-list2-new.sourceforge.net (unknown [10.3.1.94]) + by sc8-sf-spam2.sourceforge.net (Postfix) with ESMTP id B42EA12065 + for <ant...@po...>; Wed, 4 Oct 2006 10:03:51 -0700 (PDT) +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +Subject: Ape...@li... mailing list reminder +From: ape...@li... +To: ant...@po... +X-No-Archive: yes +Message-ID: <mai...@li...> +Date: Wed, 04 Oct 2006 10:03:48 -0700 +Precedence: bulk +X-BeenThere: ape...@li... +X-Mailman-Version: 2.1.8 +List-Id: <aperture-devel.lists.sourceforge.net> +X-List-Administrivia: yes +Sender: ape...@li... +Errors-To: ape...@li... +X-OnetAntySpam: NIE, to nie jest SPAM +X-OrigFrom: ape...@li... +X-ZA0: unknown (-1,0) +Status: + +You, or someone posing as you, has requested a password reminder for +your membership on the mailing list +ap...@li.... You will need this password in +order to change your membership options (e.g. do you want regular +delivery or digest delivery), and having this password makes it easier +for you to unsubscribe from the mailing list. + +You are subscribed with the address: ant...@po... + +Your Aperture-devel password is: kochanaalinka + +To make changes to your membership options, log in and visit your +options web page: + + https://lists.sourceforge.net/lists/options/aperture-devel/antoni_mylka%40poczta.onet.pl + + +You can also make such changes via email by sending a message to: + + ape...@li... + +with the text "help" in the subject or body. The automatic reply will +contain more detailed instructions. + +Questions or comments? Please send them to the Aperture-devel mailing +list administrator at ape...@li.... + Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail4.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail4.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mbox-aperture-inc1-mail4.eml 2010-05-26 16:38:56 UTC (rev 2344) @@ -0,0 +1,135 @@ +X-Account-Key: account3 +X-UIDL: 0000ed56034ba39d +X-Mozilla-Status: 0011 +X-Mozilla-Status2: 00000000 +Received: from lists-outbound.sourceforge.net ([66.35.250.225]:897 "EHLO + lists-outbound.sourceforge.net") by ps28.test.onet.pl with ESMTP + id <S2245883AbWJIPFq>; Mon, 9 Oct 2006 17:05:46 +0200 +Received: from sc8-sf-list2-new.sourceforge.net (unknown [10.3.1.94]) + by sc8-sf-spam2.sourceforge.net (Postfix) with ESMTP + id 4FF26160DA; Mon, 9 Oct 2006 08:05:45 -0700 (PDT) +Received: from sc8-sf-mx2-b.sourceforge.net ([10.3.1.92] + helo=mail.sourceforge.net) + by sc8-sf-list2-new.sourceforge.net with esmtp (Exim 4.43) + id 1GWwhI-0007BE-Hy for ape...@li...; + Mon, 09 Oct 2006 08:05:40 -0700 +Received: from www4.aduna.biz ([66.98.176.99] helo=mail.aduna.biz) + by mail.sourceforge.net with esmtps (TLSv1:AES256-SHA:256) + (Exim 4.44) id 1GWwhJ-0000OE-40 + for ape...@li...; + Mon, 09 Oct 2006 08:05:41 -0700 +Received: (qmail 19084 invoked from network); 9 Oct 2006 17:05:31 +0200 +Received: from home.aduna-software.com (HELO ?10.1.0.11?) (213.84.59.34) + by www4.aduna.biz with (DHE-RSA-AES256-SHA encrypted) SMTP; + 9 Oct 2006 17:05:31 +0200 +Message-ID: <452...@ad...> +Date: Mon, 09 Oct 2006 17:05:27 +0200 +From: Christiaan Fluit <chr...@ad...> +User-Agent: Thunderbird 1.5.0.7 (Windows/20060909) +MIME-Version: 1.0 +To: Aperture Developers <ape...@li...> +References: <452...@df...> <452...@ad...> + <452...@df...> +In-Reply-To: <452...@df...> +X-Spam-Score: 0.0 (/) +X-Spam-Report: Spam Filtering performed by sourceforge.net. + See http://spamassassin.org/tag/ for more details. + Report problems to + http://sf.net/tracker/?func=add&group_id=1&atid=200001 +Cc: Antoni Mylka <Ant...@df...> +Subject: Re: [Aperture-devel] New Aperture Developer +X-BeenThere: ape...@li... +X-Mailman-Version: 2.1.8 +Precedence: list +List-Id: <aperture-devel.lists.sourceforge.net> +List-Unsubscribe: <https://lists.sourceforge.net/lists/listinfo/aperture-devel>, + <mailto:ape...@li...?subject=unsubscribe> +List-Archive: <http://sourceforge.net/mailarchive/forum.php?forum=aperture-devel> +List-Post: <mailto:ape...@li...> +List-Help: <mailto:ape...@li...?subject=help> +List-Subscribe: <https://lists.sourceforge.net/lists/listinfo/aperture-devel>, + <mailto:ape...@li...?subject=subscribe> +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +Sender: ape...@li... +Errors-To: ape...@li... +X-OnetAntySpam: NIE, to nie jest SPAM +X-OrigFrom: ape...@li... +X-ZA0: unknown (-1,0) +Status: + +Leo Sauermann wrote: +> I would go for DC and other popular vocabs as well. +> +> note we have a wikipage to discuss these things here: +> https://gnowsis.opendfki.de/wiki/ApertureDiscussion +> +> if you have any ideas about DC, add them here: +> https://gnowsis.opendfki.de/wiki/ApertureDiscussion#Vocabulary:useDCinsteadofdata + +I have made some notes during the last weeks, I'll add them here soon. + +> I would keep the RDFContainer. Its a good thing. I would only build it +> on top of RDF2GO, so that it can be used on both jena and sesame. +> https://gnowsis.opendfki.de/wiki/ApertureDiscussion#RDFContainerbasedonRDF2GO + +I see. I'm still considering this, not sure whether it's a good approach: + +pros: + +- bindings for various RDF stores that we get for free + +cons: + +- is RDF2GO still using java.net.URIs? That would mean a lot of +conversions that are potentially not necessary, e.g. when using a Sesame +Repository: org.openrdf.model.URIs get translated to java.net.URIs and +back to org.openrdf.model.URIs. + +- RDFContainer lacks full RDF graph access. A simple getStatements +method with a subject parameter would solve this though. I've also read +comments by Gunnar about having to cast RDFContainer to +SesameRDFContainer in code he wrote, I guess he had the same problem? + +>> Once we have completed this move, we might also look at the AccessData +>> API. +>> [snip] +> hm, I would think that using an rdfgraph here is overkill. +> I always thought we can implement the AccessData interface on top of the +> store you are actually crawling to. + +That's what I did with RepositoryAccessData. It wraps a Repository, +which *can* be the same as used by the CrawlerHandler. + +I'm already using this code in AutoFocus, which has dramatically +improved the memory requirements of WebCrawler. This crawler used to +keep the entire hypertext graph in main memory. Both the AccessData and +the CrawlerHandler use the same Repository, using contexts to keep +information separate (you really want to enforce that AccessData only +reads and writes its own data). + +However, the AccessData API limits the data that it can hold to simple +key-value pairs, with the exception of the notion of "referred IDs", +which are used for indicating links, nested objects, etc. Whenever you +have some other graph-like data, you cannot store it properly in the +AccessData, unless you're willing to encode multiple values in a single +string. Also, the value is often a primitive value but can also be +another URI (e.g. a redirectsTo property between two URLs). Right now +these URIs are stored as literals by RepositoryAccessData. + + +Regards, + +Chris +-- + +------------------------------------------------------------------------- +Take Surveys. Earn Cash. Influence the Future of IT +Join SourceForge.net's Techsay panel and you'll get the chance to share your +opinions on IT & business topics through brief surveys -- and earn cash +http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV +_______________________________________________ +Aperture-devel mailing list +Ap...@li... +https://lists.sourceforge.net/lists/listinfo/aperture-devel + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-05-26 16:46:10
|
Revision: 2345 http://aperture.svn.sourceforge.net/aperture/?rev=2345&view=rev Author: mylka Date: 2010-05-26 16:46:03 +0000 (Wed, 26 May 2010) Log Message: ----------- [2932901] added proper isPartOf links between mbox emails and the mbox file in the mbox subcrawler Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java 2010-05-26 16:38:56 UTC (rev 2344) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawler.java 2010-05-26 16:46:03 UTC (rev 2345) @@ -26,6 +26,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; import org.semanticdesktop.aperture.subcrawler.base.AbstractSubCrawler; +import org.semanticdesktop.aperture.vocabulary.NIE; /** * <p> @@ -56,9 +57,10 @@ MimeMessage message = new MimeMessage(null,new ByteArrayInputStream(bytes)); URI attachmentUriPrefix = createChildUri(parentMetadata.getDescribedUri(), MailUtil.getMessageId(message)); RDFContainerFactory myFac = handler.getRDFContainerFactory(parentMetadata.getDescribedUri().toString()); - fac = new DataObjectFactory(message,myFac,null,dataSource,attachmentUriPrefix,null,"/",attachmentUriPrefix.toString()); + fac = new DataObjectFactory(message,myFac,null,dataSource, + attachmentUriPrefix,null,"/",attachmentUriPrefix.toString()); DataObject object = null; - + boolean first = true; /* * Note that the stopRequested check is BEFORE getObject(). Otherwise if the crawler is stopped * the object is obtained and only AFTER this loop is stopped. This object is not disposed by the @@ -70,6 +72,12 @@ // first of all get a string version of the message uri String queuedUri = object.getID().toString(); + if (first) { + object.getMetadata().add(NIE.isPartOf,parentMetadata.getDescribedUri()); + first = false; + } + + /* * See if this url has been accessed before so that we can stop immediately. Note that no * check on message date is done as messages are immutable. Therefore we only have to check @@ -106,6 +114,8 @@ * added to the objects metadata RDFContainer */ MailUtil.registerParentRelationshipInAccessData(object, accessData); + + /* * Report this object as a new object (assumption: objects are always new, never changed, Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java 2010-05-26 16:38:56 UTC (rev 2344) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/mbox/MboxSubCrawlerTest.java 2010-05-26 16:46:03 UTC (rev 2345) @@ -11,6 +11,7 @@ import org.ontoware.rdf2go.exception.ModelException; import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.accessor.AccessData; import org.semanticdesktop.aperture.accessor.DataObject; @@ -23,6 +24,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NIE; /** * Tests for the {@link MboxSubCrawler} @@ -143,13 +145,21 @@ SubCrawler accessor = new MboxSubCrawler(); for (String ob : handler.getNewObjects()) { + URI obUri = new URIImpl(ob); InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mbox-testfolder", getClass()); DataObject obj = accessor.getDataObject(new URIImpl("uri:dummyuri"), - SubCrawlerUtil.getSubCrawledObjectPath(new URIImpl(ob)), stream, null,null,null, + SubCrawlerUtil.getSubCrawledObjectPath(obUri), stream, null,null,null, new RDFContainerFactoryImpl()); assertDataObjectOK(obj); obj.dispose(); stream.close(); + if (!ob.endsWith("/1")) { + // emails should have isPartOf links to their parent files + checkStatement(obUri, NIE.isPartOf, new URIImpl("uri:dummyuri"), handler.getModel()); + } else { + // attachments shouldn't + assertFalse(handler.getModel().contains(obUri, NIE.isPartOf, new URIImpl("uri:dummyuri"))); + } } handler.close(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-06-01 13:38:46
|
Revision: 2349 http://aperture.svn.sourceforge.net/aperture/?rev=2349&view=rev Author: mylka Date: 2010-06-01 13:38:40 +0000 (Tue, 01 Jun 2010) Log Message: ----------- 3008473 - added a MailUtil.parseAddressHeader method, which supports some of the non-standard atrocities found in emails "in the field" see the MailUtilTest class for an overview of the monster we're facing Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-05-27 11:03:50 UTC (rev 2348) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-06-01 13:38:40 UTC (rev 2349) @@ -20,13 +20,11 @@ import java.util.Map; import java.util.regex.Pattern; -import javax.mail.Address; import javax.mail.BodyPart; import javax.mail.Message; import javax.mail.MessagingException; import javax.mail.Multipart; import javax.mail.Part; -import javax.mail.Message.RecipientType; import javax.mail.internet.ContentType; import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; @@ -43,6 +41,7 @@ import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.RDFContainerFactory; import org.semanticdesktop.aperture.accessor.base.FileDataObjectBase; +import org.semanticdesktop.aperture.crawler.mail.MailUtil.LiberalInternetAddress; import org.semanticdesktop.aperture.crawler.mail.base.MessageDataObjectBase; import org.semanticdesktop.aperture.datasource.DataSource; import org.semanticdesktop.aperture.helper.html.HtmlParserException; @@ -832,7 +831,7 @@ addObjectIfNotNull(NMO.messageId, localMessage.getHeader("Message-ID"), result); - Address[] from = MailUtil.getAddressHeader(localMessage, "From"); + List<LiberalInternetAddress> from = MailUtil.getAddressHeader(localMessage, "From"); if (from == null) { from = MailUtil.getAddressHeader(localMessage, "Sender"); } @@ -1452,6 +1451,12 @@ for (int i = 0; i < array.length; i++) { MailUtil.addAddressMetadata(array[i], predicate, metadata); } + } else if (value instanceof List) { + List list = (List)value; + for (Object obj : list) { + LiberalInternetAddress a = (LiberalInternetAddress)obj; + MailUtil.addAddressMetadata(a, predicate, metadata); + } } else if (value != null) { logger.warn("Unknown address class: " + value.getClass().getName()); @@ -1494,7 +1499,7 @@ return string; } - private void addContactArrayIfNotNull(URI predicate, Address[] addresses, HashMap result) { + private void addContactArrayIfNotNull(URI predicate, List<LiberalInternetAddress> addresses, HashMap result) { if (addresses != null) { result.put(predicate, addresses); } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-05-27 11:03:50 UTC (rev 2348) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-06-01 13:38:40 UTC (rev 2349) @@ -24,9 +24,11 @@ import java.util.Date; import java.util.Enumeration; import java.util.HashSet; +import java.util.List; import java.util.Locale; import java.util.Set; import java.util.UUID; +import java.util.regex.Pattern; import javax.mail.Header; import javax.mail.Message; @@ -71,6 +73,15 @@ } private static final Charset normal = Charset.forName("ISO-8859-1"); + // a pretty liberal email address pattern + private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile(".+@.+\\.\\w+"); + + private static final Character SEMICOLON = new Character(';'); + private static final Character QUOTATION_MARK = new Character('"' ); + private static final Character LT_BRACKET = new Character('<'); + private static final Character COMMA = new Character(','); + private static final Character BACKSLASH = new Character('\\'); + /** * Converts a string (possibly containing non-ascii characters) to it's representation in the * UTF7-IMAP encoding. E.g for 'Böser' 'B&APY-ser' is returned. @@ -111,56 +122,329 @@ } return null; } + + public static class LiberalInternetAddress { + private String name; + private List<String> addresses = new ArrayList<String>(); + public LiberalInternetAddress(String name, String address, String ... additionalAddresses) { + this.setName(name); + if (address != null) { + this.getAddresses().add(address); + } + if (additionalAddresses != null) { + for (String a : additionalAddresses) { + if (a != null && a.trim().length() > 0) { + this.getAddresses().add(a.trim()); + } + } + } + } + public void setName(String name) { + this.name = name; + } + public String getName() { + return name; + } + public List<String> getAddresses() { + return addresses; + } + } - public static InternetAddress[] getAddressHeader(Message msg, String name) + public static List<LiberalInternetAddress> getAddressHeader(Message msg, String name) throws MessagingException - { + { String[] headers = msg.getHeader(name); if (headers == null) { return null; } - - - ArrayList<InternetAddress> addrs = new ArrayList<InternetAddress>(); + ArrayList<LiberalInternetAddress> addrs = new ArrayList<LiberalInternetAddress>(); for (int i = 0; i < headers.length; i++) { String header = headers[i]; + addrs.addAll(parseAddressHeaderValue(header)); - String [] values = header.split(";"); - for (String value : values) { - value = MimeUtility.unfold(value); - value = decodeText(value); - try { - if (value.contains("@") || value.contains("<")) { - InternetAddress [] iAddrs = InternetAddress.parseHeader(value, false); - if (iAddrs != null && iAddrs.length > 0) { - for (InternetAddress iAddr : iAddrs) { - addrs.add(iAddr); - } - } - } else { - // this is for addresses that only contain the name - // like To: John Smith - // if a string doesn't have anything that resembles an email - // it must recorded as the NAME of the contact, not as the address - InternetAddress iAddr = new InternetAddress(); - iAddr.setPersonal(value); - addrs.add(iAddr); - } + } + + return addrs; + } + + /** + * Parses a value of an address header and extracts email addresses. + * + * <p> + * + * </p> + * + * @param header + * @return + */ + public static List<LiberalInternetAddress> parseAddressHeaderValue(String header) { + List<LiberalInternetAddress> addrs = new ArrayList<LiberalInternetAddress>(); + + /* + * First "tokenize" the string into chunks and separators + */ + ArrayList<String> chunks = new ArrayList<String>(); + ArrayList<Character> separators = new ArrayList<Character>(); + tokenize(header, chunks, separators); + + /* + * Then use some heuristics on the chunks and their separators in order to guess which chunk is a name and which + * is an address + * + * These heuristics are based on the examples we've found, if you find an example that doesn't fit those + * heuristics - feel free to tweak them (as long as your changes don't break any of our current known cases). + */ + int chunkIndex = 0; + while (chunkIndex < chunks.size()) { + String chunk = chunks.get(chunkIndex).trim(); + + /* + * empty chunks happen before the first separator in a block + * they also happen between separators, e.g. between " and < in a normal address, but those are covered by + * the functions for their respective cases + */ + if (chunk.length() == 0) { + chunkIndex++; + continue; + } + + LiberalInternetAddress newAddress = new LiberalInternetAddress(null, null); + + int newChunkIndex = 0; + + if ((newChunkIndex = processSingleChunkCase( + newAddress, chunk, chunks, separators, chunkIndex)) > chunkIndex) { + chunkIndex = newChunkIndex; + addrs.add(newAddress); + continue; + } - } catch (Exception e) { - // manual parsing + if ((newChunkIndex = processNameWithQuotationMarksAndAddress( + newAddress, chunk, chunks, separators, chunkIndex)) > chunkIndex) { + chunkIndex = newChunkIndex; + addrs.add(newAddress); + continue; + } + + if ((newChunkIndex = processNameWithoutQuotationMarksAndAddress( + newAddress, chunk, chunks, separators, chunkIndex)) > chunkIndex) { + chunkIndex = newChunkIndex; + addrs.add(newAddress); + continue; + } + + + // if no heuristics fit, just disregard this block and move the index forward, + // to prevent endless loops on really broken cases + chunkIndex++; + + } + return addrs; + } + + /** + * Normal single address + * <pre>To: ant...@do...</pre> + * + * Broken, single name + * + * <pre>To: Antoni Mylka</pre> + */ + private static int processSingleChunkCase(LiberalInternetAddress newAddress, String chunk, + ArrayList<String> chunks, ArrayList<Character> separators, int chunkIndex) { + Character leftSeparator = getLeftSeparator(separators, chunkIndex); + Character rightSeparator = getRightSeparator(separators, chunkIndex); + + if (LT_BRACKET.equals(leftSeparator)) { + // this has to be an email address + if (isAddress(chunk)) { + newAddress.getAddresses().add(chunk); + while (SEMICOLON.equals(getRightSeparator(separators, chunkIndex)) && chunkIndex < chunks.size() - 1) { + chunkIndex++; + newAddress.getAddresses().add(chunks.get(chunkIndex)); } + return chunkIndex + 1; } + } else if (rightSeparator == null || COMMA.equals(rightSeparator) || SEMICOLON.equals(rightSeparator)) { + if (isAddress(chunk)) { + newAddress.getAddresses().add(chunk); + } else { + newAddress.setName(decodeText(chunk)); + } + return chunkIndex + 1; } - - InternetAddress [] result = new InternetAddress[addrs.size()]; - addrs.toArray(result); - return result; + return 0; } + + /** + * Normal name in quotation marks and the address in < > + * <pre>To: "Antoni Mylka" <ant...@do...></pre> + * + * Broken, name in quotation marks + multiple addreses in < > + * <pre>To: "Antoni Mylka" <ant...@do...;an...@co...></pre> + */ + private static int processNameWithQuotationMarksAndAddress(LiberalInternetAddress newAddress, String chunk, + ArrayList<String> chunks, ArrayList<Character> separators, int chunkIndex) { + Character leftSeparator = getLeftSeparator(separators, chunkIndex); + Character rightSeparator = getRightSeparator(separators, chunkIndex); + if (QUOTATION_MARK.equals(leftSeparator) && QUOTATION_MARK.equals(rightSeparator)) { + newAddress.setName(decodeText(chunk)); + // we skip the empty chunk betweem " and < + chunkIndex++; + // and we move to the address chunk + chunkIndex++; + + if (chunkIndex < chunks.size()) { + String address = chunks.get(chunkIndex); + // let's assume some sanity, this is an address and we don't need to decode it + newAddress.getAddresses().add(address); + while (SEMICOLON.equals(getRightSeparator(separators, chunkIndex)) && chunkIndex < chunks.size() - 1) { + chunkIndex++; + newAddress.getAddresses().add(chunks.get(chunkIndex)); + } + } + // move to the next chunk - the first chunk in the next recipient block + chunkIndex++; + return chunkIndex; + } + return 0; + } + + /** + * Name without quotation marks and the address in < > + * <pre>To: Antoni Mylka <ant...@do...></pre> + * + * Broken, name without + multiple addreses in < > + * <pre>To: Antoni Mylka <ant...@do...;an...@co...></pre> + */ + private static int processNameWithoutQuotationMarksAndAddress(LiberalInternetAddress newAddress, String chunk, + ArrayList<String> chunks, ArrayList<Character> separators, int chunkIndex) { + Character rightSeparator = getRightSeparator(separators, chunkIndex); + if (LT_BRACKET.equals(rightSeparator)) { + String name = chunks.get(chunkIndex); + newAddress.setName(decodeText(name)); + + // and we move to the address chunk + chunkIndex++; + + if (chunkIndex < chunks.size()) { + String address = chunks.get(chunkIndex); + // let's assume some sanity, this is an address and we don't need to decode it + newAddress.getAddresses().add(address); + while (SEMICOLON.equals(getRightSeparator(separators, chunkIndex)) && chunkIndex < chunks.size() - 1) { + chunkIndex++; + newAddress.getAddresses().add(chunks.get(chunkIndex)); + } + } + // move to the next chunk - the first chunk in the next recipient block + chunkIndex++; + return chunkIndex; + } + return 0; + } + + private static Character getRightSeparator(ArrayList<Character> separators, int chunkIndex) { + Character rightSeparator = + (chunkIndex < separators.size()) + ? separators.get(chunkIndex) + : null; + return rightSeparator; + } + + private static Character getLeftSeparator(ArrayList<Character> separators, int chunkIndex) { + Character leftSeparator = + (chunkIndex > 0 && separators.size() >= chunkIndex ) + ? separators.get(chunkIndex - 1) + : null; + return leftSeparator; + } + + private static void tokenize(String header, ArrayList<String> chunks, ArrayList<Character> separators) { + tokenize(header, chunks, separators, true); + + // now a little hack, some mailers use semicolons as separators and commas as normal content characters + // To: Mylka, Antoni; Fluit, Christiaan; Reuschling, Christian + // if we spot a comma just next to a semicolon on the separator list, we need to repeat the tokenization + // but this time a comma is not a separator + // as I said, this is to accommodate a particularly + boolean repeat = false; + for (int i = 0 ; i < separators.size(); i++) { + Character s = separators.get(i); + if (COMMA.equals(s) && (i > 0 && SEMICOLON.equals(separators.get(i-1)) || + i < separators.size() - 1 && SEMICOLON.equals(separators.get(i+1)))) { + repeat = true; + break; + } + } + if (repeat) { + chunks.clear(); + separators.clear(); + tokenize(header, chunks, separators, false); + } + } + private static void tokenize(String header, ArrayList<String> chunks, ArrayList<Character> separators, + boolean commaIsASeparator) { + StringBuilder currentChunk = new StringBuilder(); + boolean insideQuotes = false; + int i = 0; + while (i < header.length()) { + char c = header.charAt(i); + if (BACKSLASH.equals(c)) { + // this is used for escaped characters + i++; + if (i < header.length()) { + char c2 = header.charAt(i); + currentChunk.append(c2); + } + } else if (QUOTATION_MARK.equals(c)) { + chunks.add(currentChunk.toString().trim()); + separators.add(c); + currentChunk = new StringBuilder(); + if (insideQuotes) { + insideQuotes = false; + } else { + insideQuotes = true; + } + } else if (!insideQuotes && isSeparator(c,commaIsASeparator)) { + // inside quotes we disregard all separators + chunks.add(currentChunk.toString().trim()); + separators.add(c); + currentChunk = new StringBuilder(); + } else if (isLineBreak(c)) { + // do nothing, swallow line breaks + } else { + currentChunk.append(c); + } + i++; + } + chunks.add(currentChunk.toString().trim()); + } + /** + * @param chunk + * @return + */ + private static boolean isAddress(String chunk) { + return EMAIL_ADDRESS_PATTERN.matcher(chunk).matches(); + } + + private static boolean isSeparator(char s, boolean commaIsASeparator) { + if (commaIsASeparator) { + return s == '<' || s == '>' || s == ',' || s == ';'; + } else { + return s == '<' || s == '>' || s == ';'; + } + } + + private static boolean isLineBreak(char c) { + return c == '\n' || c == '\r'; + } + + + /** * Alternative for {@link MimeUtility#decodeText(String)} with a less strict * parsing algorithm. The algorithm is (and will be) tuned to what we see * "in the wild". @@ -296,7 +580,51 @@ } } } + + /** + * Add statements modeling the specified address metadata to the RDFContainer, using the specified + * predicate to connect the address resource to the mail resource. + * + * @param address The InternetAddress that will be encoded in the RDF model. + * @param predicate The property URI that will be used to connect the address metadata to the mail + * resource. + * @param metadata The RDFContainer that will receive the RDF statements and whose described URI is + * expected to represent the mail resource. + */ + public static void addAddressMetadata(LiberalInternetAddress address, URI predicate, RDFContainer metadata) { + // fetch the name + String name = address.getName(); + if (name != null) { + name = name.trim(); + } + List<String> addrs = address.getAddresses(); + + // proceed when at least one has a reasonable value + if (hasRealValue(name) || addrs.size() > 0) {// create a URI for this address + URI person = metadata.getModel().createURI(getPersonURI(null,null)); + + // connect the person resource to the mail resource + metadata.add(predicate, person); + metadata.getModel().addStatement(person, RDF.type, NCO.Contact); + // add name and address details + if (hasRealValue(name)) { + Literal literal = metadata.getModel().createPlainLiteral(name); + metadata.getModel().addStatement(person, NCO.fullname, literal); + } + + for (String emailAddress : addrs) { + if (hasRealValue(emailAddress)) { + Literal literal = metadata.getModel().createPlainLiteral(emailAddress); + Resource emailResource = metadata.getModel().createURI(getEmailURI("mailto:", emailAddress)); + metadata.getModel().addStatement(person, NCO.hasEmailAddress, emailResource); + metadata.getModel().addStatement(emailResource, RDF.type, NCO.EmailAddress); + metadata.getModel().addStatement(emailResource, NCO.emailAddress, literal); + } + } + } + } + /** * Derive a URI for a person based on an email address and a name that can be used in an RDF graph. At * least one of these properties has to have a real value. Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-05-27 11:03:50 UTC (rev 2348) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-06-01 13:38:40 UTC (rev 2349) @@ -10,13 +10,21 @@ import java.io.InputStream; import java.text.ParseException; import java.util.Date; +import java.util.List; +import java.util.Set; import javax.mail.MessagingException; import javax.mail.internet.MimeMessage; +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.Resource; +import org.semanticdesktop.aperture.crawler.mail.MailUtil.LiberalInternetAddress; +import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.test.ApertureTestBase; import org.semanticdesktop.aperture.util.DateUtil; import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NCO; +import org.semanticdesktop.aperture.vocabulary.NMO; /** * A test case for the MailUtil class. @@ -114,9 +122,107 @@ assertTrue(!res.equals(res2)); } + public void testAddLiberalAddressMetadata() throws Exception { + RDFContainer container = createRDFContainer("uri:dummyuri"); + LiberalInternetAddress a = new LiberalInternetAddress("Antoni Mylka", "ant...@do...", + "oth...@ot..."); + Model model = container.getModel(); + + MailUtil.addAddressMetadata(a, NMO.to, container); + assertSparqlQuery(model, "SELECT ?c WHERE { " + + container.getDescribedUri().toSPARQL() + " " + NMO.to.toSPARQL() + " ?c . " + + "?c " + NCO.hasEmailAddress.toSPARQL() + " ?e1 . " + + "?e1 " + NCO.emailAddress.toSPARQL() + " \"ant...@do...\" . " + + "?c " + NCO.hasEmailAddress.toSPARQL() + " ?e2 . " + + "?e2 " + NCO.emailAddress.toSPARQL() + " \"oth...@ot...\" }"); + } + + public void testAddressHeadersParsing() { + assertHeaders("Antoni Mylka; Christiaan Fluit", + a("Antoni Mylka",null), + a("Christiaan Fluit",null)); + assertHeaders("Antoni Mylka, Christiaan Fluit", + a("Antoni Mylka",null), + a("Christiaan Fluit",null)); + assertHeaders("Mylka, Antoni; Fluit, Christiaan", + a("Mylka, Antoni",null), + a("Fluit, Christiaan",null)); + assertHeaders("Antoni Mylka <ant...@do...>", + a("Antoni Mylka","ant...@do...")); + assertHeaders("\"Antoni Mylka\" <ant...@do...>", + a("Antoni Mylka","ant...@do...")); + assertHeaders("\"Mylka, Antoni\" <ant...@do...>", + a("Mylka, Antoni","ant...@do...")); + assertHeaders("my...@us...", + a(null, "my...@us...")); + /* + * three addresses, + * first normal (quotation marks and pointy brackets) + * a separator is missing between the first and the second, there is an unescaped comma inside the "name" part of the second recipient + * the third recipient has his name given without the quotation marks + */ + assertHeaders("\"Antoni Mylka\" <am...@do...>\"Fluit, Christiaan\" <fl...@do...>, Arjohn Kampman <ar...@do...>", + a("Antoni Mylka","am...@do..."), + a("Fluit, Christiaan","fl...@do..."), + a("Arjohn Kampman","ar...@do...")); + + /* + * A recipient with two email addresses in a single recipient block + */ + assertHeaders("Antoni Mylka <am...@do...;an...@do...>", + a("Antoni Mylka","am...@do...","an...@do...")); + + assertHeaders("<my...@us...>", + a(null, "my...@us...")); + + assertHeaders("<my...@us...;oth...@us...>", + a(null, "my...@us...","oth...@us...")); + } + private void check(String st, String utcString) throws ParseException { Date date = MailUtil.parseReceivedHeader(st); assertTrue(DateUtil.dateTimeEqualToUTCString(date, utcString)); } + + private void assertHeaders(String string, LiberalInternetAddress ... expectedAddresses) { + List<LiberalInternetAddress> actualAddresses = MailUtil.parseAddressHeaderValue(string); + if (expectedAddresses == null || expectedAddresses.length == 0) { + assertEquals(0,actualAddresses.size()); + } else { + assertEquals(expectedAddresses.length,actualAddresses.size()); + } + for (int i = 0; i < expectedAddresses.length; i++) { + LiberalInternetAddress actualAddress = actualAddresses.get(i); + LiberalInternetAddress expectedAddress = expectedAddresses[i]; + if (expectedAddress.getName() == null) { + assertNull(actualAddress.getName()); + } else { + assertEquals(expectedAddress.getName(),actualAddress.getName()); + } + assertEquals(expectedAddress.getAddresses().size(), actualAddress.getAddresses().size()); + for (int j = 0; j < expectedAddress.getAddresses().size(); j++) { + assertEquals(expectedAddress.getAddresses().get(j),actualAddress.getAddresses().get(j)); + } + } + } + + /** + * @param string + * @param string2 + * @return + */ + private MailUtil.LiberalInternetAddress a(String name, String a1) { + return a(name,a1,(String[])null); + } + + /** + * @param string + * @param string2 + * @return + */ + private MailUtil.LiberalInternetAddress a(String name, String a1, String ... addrs) { + LiberalInternetAddress a = new LiberalInternetAddress(name,a1,addrs); + return a; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-06-07 11:40:55
|
Revision: 2351 http://aperture.svn.sourceforge.net/aperture/?rev=2351&view=rev Author: mylka Date: 2010-06-07 11:40:48 +0000 (Mon, 07 Jun 2010) Log Message: ----------- [3008473] added another little weird To header to the list of supported cases (To: antoni <>, christiaan <>). It should yield a name WITHOUT an address, and not with an address which is an empty string Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-06-04 12:08:07 UTC (rev 2350) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-06-07 11:40:48 UTC (rev 2351) @@ -332,10 +332,15 @@ if (chunkIndex < chunks.size()) { String address = chunks.get(chunkIndex); // let's assume some sanity, this is an address and we don't need to decode it - newAddress.getAddresses().add(address); + if (address.trim().length() > 0) { + newAddress.getAddresses().add(address); + } while (SEMICOLON.equals(getRightSeparator(separators, chunkIndex)) && chunkIndex < chunks.size() - 1) { chunkIndex++; - newAddress.getAddresses().add(chunks.get(chunkIndex)); + String addressChunk = chunks.get(chunkIndex); + if (addressChunk.trim().length() > 0) { + newAddress.getAddresses().add(addressChunk); + } } } // move to the next chunk - the first chunk in the next recipient block Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-06-04 12:08:07 UTC (rev 2350) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-06-07 11:40:48 UTC (rev 2351) @@ -155,6 +155,10 @@ a("Mylka, Antoni","ant...@do...")); assertHeaders("my...@us...", a(null, "my...@us...")); + assertHeaders("antoni <>, christiaan fluit <>", + a("antoni",null), + a("christiaan fluit",null)); + /* * three addresses, * first normal (quotation marks and pointy brackets) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-06-07 15:17:57
|
Revision: 2352 http://aperture.svn.sourceforge.net/aperture/?rev=2352&view=rev Author: mylka Date: 2010-06-07 15:17:51 +0000 (Mon, 07 Jun 2010) Log Message: ----------- added support for emails, who store their message ids not in the normal Message-ID header, but in some proprietary Mapi-Smtp-Message-ID or Mapi-125-Message-ID headers Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java Added Paths: ----------- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mapi125messageid.eml Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-06-07 11:40:48 UTC (rev 2351) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-06-07 15:17:51 UTC (rev 2352) @@ -829,7 +829,7 @@ result.put(NMO.messageSubject, subject); } - addObjectIfNotNull(NMO.messageId, localMessage.getHeader("Message-ID"), result); + addObjectIfNotNull(NMO.messageId, getMessageId(localMessage), result); List<LiberalInternetAddress> from = MailUtil.getAddressHeader(localMessage, "From"); if (from == null) { @@ -859,7 +859,57 @@ } } - private void addBlankMessageArrayIfNotNull(URI uri, String[] headers, HashMap result) { + /** + * @param localMessage + * @return + * @throws MessagingException + */ + private String [] getMessageId(Message localMessage) throws MessagingException { + ArrayList<String> result = new ArrayList<String>(); + String [] ids = localMessage.getHeader("Message-ID"); + if (ids != null && ids.length > 0) { + for (String id : ids) { + if (id != null) { + String trimmedId = id.trim(); + if (trimmedId.length() > 0 && !result.contains(trimmedId)) { + result.add(trimmedId); + } + } + } + } + + ids = localMessage.getHeader("Mapi-Smtp-Message-Id"); + if (ids != null && ids.length > 0) { + for (String id : ids) { + if (id != null) { + String trimmedId = id.trim(); + if (trimmedId.length() > 0 && !result.contains(trimmedId)) { + result.add(trimmedId); + } + } + } + } + + ids = localMessage.getHeader("Mapi-125-Message-Id"); + if (ids != null && ids.length > 0) { + for (String id : ids) { + if (id != null) { + String trimmedId = id.trim(); + if (trimmedId.length() > 0 && !result.contains(trimmedId)) { + result.add(trimmedId); + } + } + } + } + + if (result.size() > 0) { + return result.toArray(new String []{}); + } else { + return null; + } + } + + private void addBlankMessageArrayIfNotNull(URI uri, String[] headers, HashMap result) { if (headers == null || headers.length == 0) { return; } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java 2010-06-07 11:40:48 UTC (rev 2351) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java 2010-06-07 15:17:51 UTC (rev 2352) @@ -816,6 +816,17 @@ o3.dispose(); } + public void testMapiHeadersEmail() throws Exception { + DataObjectFactory fac = wrapEmail("mail-mapi125messageid.eml"); + DataObject o1 = fac.getObject(); + assertNull(fac.getObject()); + + assertEquals("<200...@po...>", + o1.getMetadata().getString(NMO.messageId)); + + o1.dispose(); + } + ////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////// BASIC PLUMBING ///////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mapi125messageid.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mapi125messageid.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-mapi125messageid.eml 2010-06-07 15:17:51 UTC (rev 2352) @@ -0,0 +1,123 @@ +Date: Fri, 4 Jan 2008 16:13:18 +0100 (CET) +From: Andrea Colangelo <wa...@li...> +To: ubu...@li... +Subject: [Bug 179555] Re: startupmanager crashed with IndexError + inget_default_boot() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Mapi-125-List-Id: Ubuntu bug tracker changes - HIGH VOLUME + <ubuntu-bugs.lists.ubuntu.com> +Mapi-Internet-Cpid: 20127 +Mapi-125-Subject: [Bug 179555] Re: startupmanager crashed with IndexError in + get_default_boot() +Mapi-2003-Unicode-32921: ubu...@li... +Mapi-125-To: ubu...@li... +Mapi-Client-Submit-Time: Sat, 05 Jan 2008 02:13:18 +1100 +Mapi-2003-Unicode-32920: 2.1.8 +Mapi-Internet-References: <200...@ga...> +Mapi-Sent-Representing-Name: Andrea Colangelo +Mapi-125-X-Mailman-Version: 2.1.8 +Mapi-125-Message-Id: <200...@po...> +Mapi-Message-Delivery-Time: Sat, 05 Jan 2008 02:20:34 +1100 +Mapi-Last-Modification-Time: Sun, 30 Mar 2008 21:12:37 +1100 +Mapi-Message-Size: 12327 +Mapi-2003-Unicode-32958: Bug Contact (Ubuntu) @ubuntu-bugs +Mapi-2003-Unicode-32957: Launchpad (canonical.com) +Mapi-Message-Flags: 0 +Mapi-2003-Unicode-32959: distribution=ubuntu; sourcepackage=startupmanager;component=universe; status=Confirmed; importance=Medium;assignee=None; +Mapi-125-List-Help: <mailto:ubu...@li...?subject=help> +Mapi-125-Errors-To: ubu...@li... +Mapi-List-Unsubscribe: <https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs>,<mailto:ubu...@li...?subject=unsubscribe> +Mapi-Sender-Name: ubu...@li... +Mapi-125-Return-Path: <ubu...@li...> +Mapi-125-Received: by 10.142.170.1 with SMTP id s1cs493751wfe; + Fri, 4 Jan 2008 07:20:34 -0800 (PST) by 10.101.68.19 with SMTP id v19mr35642043ank.104.1199460033693; + Fri, 04 Jan 2008 07:20:33 -0800 (PST) from chlorine.canonical.com (chlorine.canonical.com [91.189.94.204]) + by mx.google.com with ESMTP id i4si273578nfh.36.2008.01.04.07.20.21; + Fri, 04 Jan 2008 07:20:33 -0800 (PST) from localhost ([127.0.0.1] helo=chlorine.canonical.com) + by chlorine.canonical.com with esmtp (Exim 4.60) + (envelope-from <ubu...@li...>) + id 1JAoLM-0006za-Tf; Fri, 04 Jan 2008 15:20:20 +0000 from adelie.canonical.com ([91.189.90.139]) + by chlorine.canonical.com with esmtp (Exim 4.60) + (envelope-from <bo...@ca...>) id 1JAoLK-0006yN-LF + for ubu...@li...; Fri, 04 Jan 2008 15:20:18 +0000 from [82.211.81.190] (helo=forster.canonical.com) + by adelie.canonical.com with esmtp (Exim 4.60 #1 (Debian)) + id 1JAoLJ-0004A5-9z + for <ubu...@li...>; Fri, 04 Jan 2008 15:20:18 +0000 from forster.canonical.com (localhost [127.0.0.1]) + by forster.canonical.com (Postfix) with ESMTP id 92DCD4680B3 + for <ubu...@li...>; + Fri, 4 Jan 2008 15:20:15 +0000 (GMT) +Mapi-125-X-Launchpad-Message-Rationale: Bug Contact (Ubuntu) @ubuntu-bugs +Mapi-125-X-Launchpad-Bug: distribution=ubuntu; sourcepackage=startupmanager; + component=universe; status=Confirmed; importance=Medium; + assignee=None; +Mapi-2003-Unicode-3625: =?UTF-8?Q?00000003=01t...@gm...=01pop.gmail.com?= +Mapi-2003-Unicode-3624: =?UTF-8?Q?00000003=01t...@gm...=01pop.gmail.com?= +Mapi-List-Help: <mailto:ubu...@li...?subject=help> +Mapi-125-Content-Transfer-Encoding: 7bit +Mapi-125-MIME-Version: 1.0 +Mapi-125-Date: Fri, 04 Jan 2008 15:13:18 -0000 +Mapi-125-Content-Type: text/plain; charset="us-ascii" +Mapi-Conversation-Topic: [Bug 179555] Re: startupmanager crashed with IndexError inget_default_boot() +Mapi-Sent-Representing-Addrtype: SMTP +Mapi-125-References: <200...@ga...> +Mapi-125-List-Post: <mailto:ubu...@li...> +Mapi-Creation-Time: Sun, 30 Mar 2008 21:12:36 +1100 +Mapi-125-Received-SPF: neutral (google.com: 91.189.94.204 is neither permitted nor denied by best guess record for domain of ubu...@li...) client-ip=91.189.94.204; +Mapi-2003-Unicode-32836: =?UTF-8?Q?00000003=01t...@gm...?= +Mapi-Sender-Addrtype: SMTP +Mapi-125-Reply-To: Bug 179555 <17...@bu...> +Mapi-Body-Summary: Debdiff based on Andrea (Corbellini)'s patch. Built, installed and tested. + I'll take care to submit a bug report and the patch to Debian. + + ** Attachment added: "startupmanager_1.9.9-1ubuntu1.debdiff" + http://launchpadlibrarian.net/11153909/startupma +Mapi-Received-By-Name: OSI-Mail Test +Mapi-125-Precedence: bulk +PST-Slackspace-Item: false +Mapi-2003-Unicode-32835: pop.gmail.com +Mapi-Reply-Recipient-Names: Bug 179555 +Mapi-125-List-Subscribe: <https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs>, + <mailto:ubu...@li...?subject=subscribe> +Mapi-2003-I4-26128: 542 +Mapi-125-X-BeenThere: ubu...@li... +Mapi-List-Subscribe: <https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs>,<mailto:ubu...@li...?subject=subscribe> +Mapi-Sent-Representing-Email-Address: wa...@li... +Mapi-Smtp-Message-Id: <200...@po...> +Mapi-125-X-Generated-By: Launchpad (canonical.com) +Mapi-2003-I4-32792: 1188400 +Mapi-Message-Class: IPM.Note +Mapi-Subject: [Bug 179555] Re: startupmanager crashed with IndexError inget_default_boot() +Mapi-125-List-Unsubscribe: <https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs>, + <mailto:ubu...@li...?subject=unsubscribe> +Mapi-Display-To: ubu...@li... +Mapi-125-Sender: ubu...@li... +Mapi-Received-By-Email-Address: tri...@gm... +Mapi-125-From: Andrea Colangelo <wa...@li...> +Mapi-125-Authentication-Results: mx.google.com; spf=neutral (google.com: 91.189.94.204 is neither permitted nor denied by best guess record for domain of ubu...@li...) smtp.mail=ubu...@li... +Mapi-Return-Path: ubu...@li... +Mapi-125-Delivered-To: tri...@gm... +Mapi-Sender-Email-Address: ubu...@li... +Mapi-Received-By-Addrtype: SMTP +Content-Transfer-Encoding: 7bit + +Debdiff based on Andrea (Corbellini)'s patch. Built, installed and tested. +I'll take care to submit a bug report and the patch to Debian. + +** Attachment added: "startupmanager_1.9.9-1ubuntu1.debdiff" + http://launchpadlibrarian.net/11153909/startupmanager_1.9.9-1ubuntu1.debdiff + +** Changed in: startupmanager (Ubuntu) + Assignee: Andrea Colangelo (warp10) => (unassigned) + Status: In Progress => Confirmed + +-- +startupmanager crashed with IndexError in get_default_boot() +https://bugs.launchpad.net/bugs/179555 +You received this bug notification because you are a member of Ubuntu +Bugs, which is the bug contact for Ubuntu. + +-- +ubuntu-bugs mailing list +ub...@li... +https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-06-10 14:14:42
|
Revision: 2353 http://aperture.svn.sourceforge.net/aperture/?rev=2353&view=rev Author: mylka Date: 2010-06-10 14:14:35 +0000 (Thu, 10 Jun 2010) Log Message: ----------- made the address header yield the same results regardless of the location of linebreaks Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-06-07 15:17:51 UTC (rev 2352) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-06-10 14:14:35 UTC (rev 2353) @@ -419,7 +419,16 @@ separators.add(c); currentChunk = new StringBuilder(); } else if (isLineBreak(c)) { - // do nothing, swallow line breaks + /* + * a line is broken, a linebreak + whitespace at the beginning of a subsequent line + * should be broken into a single space, we don't worry about adding any + * superfluous spaces at the end of a chunk, because we use trim() anyway + * before the chunk is added onto the list of chunks + */ + while (i+1 < header.length() && Character.isWhitespace(header.charAt(i+1))) { + i++; + } + currentChunk.append(" "); } else { currentChunk.append(c); } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-06-07 15:17:51 UTC (rev 2352) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-06-10 14:14:35 UTC (rev 2353) @@ -181,6 +181,22 @@ assertHeaders("<my...@us...;oth...@us...>", a(null, "my...@us...","oth...@us...")); + + /* + * It shouldn't make any difference where the line is broken, if we have a + * line break and the next line begins with some whitespace, the tokenizer + * should change interpret every occurence of such a string as a single space + */ + assertHeaders("Antoni Mylka <Ant...@ad...>, Christiaan\n" + + "\t Fluit <Chr...@ad...>", + a("Antoni Mylka","Ant...@ad..."), + // now we should have a single space between 'Christiaan' and 'Fluit' + a("Christiaan Fluit","Chr...@ad...")); + assertHeaders("Antoni Mylka <Ant...@ad...>, Christiaan Fluit\n" + + "\t <Chr...@ad...>", + a("Antoni Mylka","Ant...@ad..."), + // now we should have NO space at the end of Christiaan Fluit + a("Christiaan Fluit","Chr...@ad...")); } private void check(String st, String utcString) throws ParseException { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-06-24 13:35:17
|
Revision: 2359 http://aperture.svn.sourceforge.net/aperture/?rev=2359&view=rev Author: mylka Date: 2010-06-24 13:35:09 +0000 (Thu, 24 Jun 2010) Log Message: ----------- [3020798] added a fix for text/rfc822-headers messages. Refactored the common methods out of DataObjectFactoryTest into AbstractDataObjectFactoryTest, this makes it easier to add new classes that test the functionality of the DataObjectFactory Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java Added Paths: ----------- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/AbstractDataObjectFactoryTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-06-24 11:25:30 UTC (rev 2358) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-06-24 13:35:09 UTC (rev 2359) @@ -522,9 +522,12 @@ // we explicitly don't need any content from this mail part result = handleEmptyContentSinglePart(uri, messageCreationDate); } - else if ("message/rfc822".equals(mimeType)) { + else if ("message/rfc822".equals(mimeType) || + "text/rfc822-headers".equals(mimeType)) { // the part is a message in itself, we need to crawl into it - return handleRfc822SinglePart(mailPart, uri); + result = handleRfc822SinglePart(mailPart, uri); + result.put(NIE.mimeType, mimeType); + return result; } else { // it is a normal single part, it may be a forwarded message or an attachment, or the message itself @@ -626,6 +629,17 @@ result.put(RDF.type,NMO.Email); return result; } + else if (content instanceof InputStream) { + /* + * this part contains a nested message but javamail returns InputStream + * this happens on mail parts with the text/rfc822-headers which occur in "Delivery Status Notification" + * messages. + */ + Message nestedMessage = new MimeMessage(null, (InputStream)content); + HashMap result = handleMailPart(nestedMessage, uri, MailUtil.getStereotypicalContentCreatedDate(nestedMessage), true); + result.put(RDF.type,NMO.Email); + return result; + } else { logger.warn("message/rfc822 part with unknown content class: " + (content == null ? null : content.getClass())); @@ -994,7 +1008,12 @@ String uriPrefix = getBodyPartURIPrefix(uri); // interpret every nested part - int nrParts = part.getCount(); + int nrParts = 0; + if ((nrParts = getNestedPartCount(part)) == -1) { + return parent; + } + + ArrayList children = new ArrayList(nrParts); boolean first = true; for (int i = 0; i < nrParts; i++) { @@ -1038,10 +1057,15 @@ return parent; } - private HashMap handleAlternativePart(Multipart part, ContentType contentType, URI uri, Date date) + private HashMap handleAlternativePart(Multipart part, ContentType contentType, URI uri, Date date) throws MessagingException, IOException { // nothing to return when there are no parts - int count = part.getCount(); + int count = 0; + + if ((count = getNestedPartCount(part)) == -1) { + return null; + } + if (count == 0) { return null; } @@ -1113,7 +1137,10 @@ // interpret every body part in the digest multipart ArrayList children = new ArrayList(); - int nrParts = part.getCount(); + int nrParts = 0; + if ((nrParts = getNestedPartCount(part)) == -1) { + return parent; + } for (int i = 0; i < nrParts; i++) { // fetch the body part Part bodyPart = part.getBodyPart(i); @@ -1155,7 +1182,10 @@ // find the index of the root part, if specified (defaults to 0) int rootPartIndex = 0; - int nrBodyParts = part.getCount(); + int nrBodyParts = 0; + if ((nrBodyParts = getNestedPartCount(part)) == -1) { + return parent; + } String rootPartString = contentType.getParameter("start"); if (rootPartString != null) { @@ -1246,7 +1276,8 @@ Date date) throws MessagingException, IOException { // interpret the first body part, which contains the actual content HashMap child = null; - if (part.getCount() >= 2) { + int nrParts = getNestedPartCount(part); + if (nrParts >= 2) { child = handleMailPart(part.getBodyPart(partIndex), uri, date, false); } else { @@ -1287,7 +1318,10 @@ } // the first part contains a human-readable error message and will be treated as the mail body - int count = part.getCount(); + int count = 0; + if ((count = getNestedPartCount(part)) == -1) { + return parent; + } if (count > 0) { HashMap errorPart = handleMailPart(part.getBodyPart(0), uri, date, true); if (errorPart != null) { @@ -1579,6 +1613,20 @@ return prefix + partUriDelimiter; } } + + private int getNestedPartCount(Multipart part) throws MessagingException { + try { + int nrParts = part.getCount(); + return nrParts; + } catch (MessagingException e) { + String msg = e.getMessage(); + if (msg != null && msg.contains("Missing") && msg.contains("boundary")) { + return -1; + } else { + throw e; + } + } + } /** * Transfer all properties from one interpreted mail part to another, taking care to merge information Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/AbstractDataObjectFactoryTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/AbstractDataObjectFactoryTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/AbstractDataObjectFactoryTest.java 2010-06-24 13:35:09 UTC (rev 2359) @@ -0,0 +1,176 @@ +/** + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.crawler.mail; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; + +import javax.mail.MessagingException; +import javax.mail.internet.MimeMessage; + +import org.ontoware.rdf2go.model.Model; +import org.ontoware.rdf2go.model.node.Node; +import org.ontoware.rdf2go.model.node.Resource; +import org.ontoware.rdf2go.model.node.URI; +import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.ontoware.rdf2go.vocabulary.RDF; +import org.ontoware.rdf2go.vocabulary.XSD; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.FileDataObject; +import org.semanticdesktop.aperture.test.ApertureTestBase; +import org.semanticdesktop.aperture.test.ApertureTestBase.TestRDFContainerFactory; +import org.semanticdesktop.aperture.util.DateUtil; +import org.semanticdesktop.aperture.util.IOUtil; +import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NCO; +import org.semanticdesktop.aperture.vocabulary.NIE; +import org.semanticdesktop.aperture.vocabulary.NMO; + +/** + * @author Antoni + * + */ +public abstract class AbstractDataObjectFactoryTest extends ApertureTestBase { + + protected TestRDFContainerFactory containerFactory; + + protected void testSenderAndReceiver(Model model, URI emailUri, String senderName, String senderEmail, String receiverName, String receiverEmail) { + Resource sender = findSingleObjectResource(model, emailUri, NMO.from); + if (senderName != null) { + assertSingleValueProperty(model, sender, NCO.fullname, senderName); + } + Resource senderAddress = findSingleObjectResource(model, sender, NCO.hasEmailAddress); + assertSingleValueProperty(model, senderAddress, NCO.emailAddress, senderEmail); + + Resource receiver = findSingleObjectResource(model, emailUri, NMO.to); + if (receiverName != null) { + assertSingleValueProperty(model, receiver, NCO.fullname, receiverName); + } + Resource receiverAddress = findSingleObjectResource(model, receiver, NCO.hasEmailAddress); + assertSingleValueProperty(model, receiverAddress, NCO.emailAddress, receiverEmail); + } + + protected void testStandardMessageMetadata(Model model, URI emailUri, String charset, String mimeType, String contentMimeType, + String subject, String byteSize, String contentCreated, String messageId) throws Exception { + if (charset != null) { + assertSingleValueProperty(model, emailUri, NIE.characterSet, charset); + } + if (mimeType != null) { + assertSingleValueProperty(model, emailUri, NIE.mimeType, mimeType); + } + if (contentMimeType != null) { + assertSingleValueProperty(model, emailUri, NMO.contentMimeType, contentMimeType); + } + if (subject != null) { + assertSingleValueProperty(model, emailUri, NMO.messageSubject, subject); + } + if (byteSize != null) { + assertSingleValueProperty(model, emailUri, NIE.byteSize, model.createDatatypeLiteral(byteSize, XSD._integer)); + } + + if (contentCreated != null) { + // this exhibits the problem with ambiguous dates + String contentCreatedDateString = findSingleObjectNode(model, emailUri, NIE.contentCreated).asLiteral().getValue(); + assertTrue(DateUtil.dateTimeStringEqualToUTCString(contentCreatedDateString, contentCreated)); + //assertSingleValueProperty(model, emailUri, NIE.contentCreated, model.createDatatypeLiteral(contentCreated, XSD._dateTime)); + } + + + if (messageId != null) { + // this exhibits the problem with brackets + assertSingleValueProperty(model, emailUri, NMO.messageId, messageId); + } + + Set<Resource> emailTypes = findObjectResourceSet(model, emailUri, RDF.type); + assertEquals(4,emailTypes.size()); + assertTrue(emailTypes.contains(NMO.Email)); + assertTrue(emailTypes.contains(NMO.MimeEntity)); + assertTrue(emailTypes.contains(NMO.MailboxDataObject)); + assertTrue(emailTypes.contains(NIE.DataObject)); + } + + /** + * + */ + public AbstractDataObjectFactoryTest() { + super(); + } + + @Override + public void setUp() { + this.containerFactory = new TestRDFContainerFactory(); + } + + @Override + public void tearDown() { + containerFactory = null; + } + + protected DataObjectFactory wrapEmail(String path, String resourceName) throws MessagingException, IOException { + InputStream stream = ResourceUtil.getInputStream(path + resourceName, this.getClass()); + MimeMessage msg = new MimeMessage(null, stream); + DataObjectFactory fac = new DataObjectFactory(msg,containerFactory,null,null, + new URIImpl("uri:dummymailuri:" + resourceName), null); + return fac; + } + + protected void assertMessageId(String id, DataObject obj) { + assertEquals(id, findSingleObjectNode(obj.getMetadata().getModel(), obj.getID(), NMO.messageId) + .asLiteral().getValue()); + } + + protected void assertMessageContentContains(String string, DataObject obj) { + assertTrue(obj.getMetadata().getString(NMO.plainTextMessageContent).contains(string)); + } + + protected void assertAsciiFileContentContains(DataObject obj, String string) throws IOException { + assertTrue(IOUtil.readString( + new InputStreamReader(((FileDataObject) obj).getContent(), Charset.forName("US-ASCII"))) + .contains(string)); + } + + @SuppressWarnings("unchecked") + protected void assertReferencedEmails(DataObject object, URI prop, String ... ids) { + Set<String> referencedIdsSet = new TreeSet<String>(); + referencedIdsSet.addAll(Arrays.asList(ids)); + Collection<Node> nodes = object.getMetadata().getAll(prop); + assertEquals(referencedIdsSet.size(), nodes.size()); + Model model = object.getMetadata().getModel(); + for (Node node : nodes) { + Resource res = node.asResource(); + assertSingleValueProperty(model, res, RDF.type, NMO.Email); + String value = findSingleObjectNode(model, res, NMO.messageId).asLiteral().getValue(); + assertTrue(referencedIdsSet.remove(value)); + } + assertTrue(referencedIdsSet.isEmpty()); + } + + /** + * No content can be lost, everything either is a message (and the content is there) or a File + * (and the content is in the input stream). + * @param objs + * @throws IOException + * @throws MessagingException + */ + protected void assertNoContentLost(DataObjectFactory fac, DataObject ... objs) throws IOException, MessagingException { + assertNull(fac.getObject()); + for (DataObject obj : objs) { + assertNotNull(obj); + assertTrue( (obj instanceof MessageDataObject && ((MessageDataObject)obj).getMimeMessage() != null && + obj.getMetadata().getString(NMO.plainTextMessageContent) != null) || + (obj instanceof FileDataObject && + ((FileDataObject)obj).getContent() != null || obj.getMetadata().getString(NIE.plainTextContent) != null)); + } + } + +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/AbstractDataObjectFactoryTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java 2010-06-24 11:25:30 UTC (rev 2358) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java 2010-06-24 13:35:09 UTC (rev 2359) @@ -8,33 +8,25 @@ import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.Map; import java.util.Set; -import java.util.TreeSet; import javax.mail.MessagingException; import javax.mail.internet.MimeMessage; import org.ontoware.rdf2go.exception.ModelException; import org.ontoware.rdf2go.model.Model; -import org.ontoware.rdf2go.model.node.Node; import org.ontoware.rdf2go.model.node.Resource; import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.ontoware.rdf2go.util.RDFTool; import org.ontoware.rdf2go.vocabulary.RDF; -import org.ontoware.rdf2go.vocabulary.XSD; import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.FileDataObject; import org.semanticdesktop.aperture.extractor.Extractor; import org.semanticdesktop.aperture.extractor.pdf.PdfExtractorFactory; import org.semanticdesktop.aperture.rdf.RDFContainer; -import org.semanticdesktop.aperture.test.ApertureTestBase; import org.semanticdesktop.aperture.util.DateUtil; import org.semanticdesktop.aperture.util.IOUtil; import org.semanticdesktop.aperture.util.ResourceUtil; @@ -48,7 +40,7 @@ * A test case for the data object factory. It checks if the MimeMessage -> RDF mapping actually is * correct (i.e. the same as we would imagine it :) */ -public class DataObjectFactoryTest extends ApertureTestBase { +public class DataObjectFactoryTest extends AbstractDataObjectFactoryTest { /** * This method runs the data object factory over a simple email, with plain-text content written in @@ -57,7 +49,7 @@ * @throws Exception */ public void testOrdinarySinglePartPlainTextEmail() throws Exception { - DataObjectFactory fac = wrapEmail("mail-thunderbird-1.5.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-thunderbird-1.5.eml"); DataObject obj = fac.getObject(); assertTrue(obj instanceof MessageDataObject); // there should only be one data object @@ -113,7 +105,7 @@ * @throws Exception */ public void testMultipartAlternative() throws Exception { - DataObjectFactory fac = wrapEmail("mail-multipart-plain-html.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-multipart-plain-html.eml"); DataObject obj = fac.getObject(); assertTrue(obj instanceof MessageDataObject); // there should only be one data object @@ -154,7 +146,7 @@ * @throws Exception */ public void testMultipartMixed() throws Exception { - DataObjectFactory fac = wrapEmail("mail-multipart-test.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-multipart-test.eml"); DataObject obj1 = fac.getObject(); assertTrue(obj1 instanceof MessageDataObject); DataObject obj2 = fac.getObject(); @@ -255,7 +247,7 @@ * @throws Exception */ public void testMessageInAThread() throws Exception { - DataObjectFactory fac = wrapEmail("mail-threaded.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-threaded.eml"); DataObject obj = fac.getObject(); assertTrue(obj instanceof MessageDataObject); RDFContainer metadata = obj.getMetadata(); @@ -296,7 +288,7 @@ * @throws Exception */ public void testForwardedMessageWithReferecesAndInReplyToHeaders() throws Exception { - DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-forwarded-references.eml"); DataObject myGreeting = fac.getObject(); assertTrue(myGreeting instanceof MessageDataObject); DataObject forwardedMsg = fac.getObject(); @@ -341,7 +333,7 @@ * @throws Exception */ public void testXmlAttachment() throws Exception { - DataObjectFactory fac = wrapEmail("mail-xml-attachment.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-xml-attachment.eml"); DataObject mailContent = fac.getObject(); assertTrue(mailContent instanceof MessageDataObject); DataObject xmlAttachment = fac.getObject(); @@ -378,7 +370,7 @@ * @throws Exception */ public void testPlainTextAttachment() throws Exception { - DataObjectFactory fac = wrapEmail("mail-plaintext-attachment.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-plaintext-attachment.eml"); DataObject mail = fac.getObject(); assertTrue(mail.getMetadata().getString(NMO.plainTextMessageContent).contains("Example body text.")); DataObject attachment = fac.getObject(); @@ -426,7 +418,7 @@ * @throws Exception */ public void testUnsupportedOperationException() throws Exception { - DataObjectFactory fac = wrapEmail("mail-UnsupportedOperationException.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-UnsupportedOperationException.eml"); DataObject mail = fac.getObject(); assertTrue(mail.getMetadata().getString(NMO.plainTextMessageContent).contains( "I've attached my .java file")); @@ -464,7 +456,7 @@ * @throws Exception */ public void testSuperfluousCharsets() throws Exception { - DataObjectFactory fac = wrapEmail("mail-multipart-test.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-multipart-test.eml"); DataObject email = fac.getObject(); DataObject pdf = fac.getObject(); DataObject forwardedEmail = fac.getObject(); @@ -482,7 +474,7 @@ * @throws Exception */ public void testCorrectlyInferredRFC2045Charset() throws Exception { - DataObjectFactory fac = wrapEmail("mail-thunderbird-1.5-unspecifiedcharset.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-thunderbird-1.5-unspecifiedcharset.eml"); DataObject email = fac.getObject(); assertEquals("iso-8859-1",email.getMetadata().getString(NIE.characterSet)); assertNoContentLost(fac, email); @@ -496,7 +488,7 @@ * @throws Exception */ public void testCorrectlyInferredRFC2045CharsetPlaintextAttachment() throws Exception { - DataObjectFactory fac = wrapEmail("mail-plaintext-attachment.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-plaintext-attachment.eml"); DataObject email = fac.getObject(); DataObject attachment = fac.getObject(); assertNoContentLost(fac, email, attachment); @@ -513,7 +505,7 @@ * @throws Exception */ public void testGetObjectAndDisposeAllOther() throws Exception { - DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-forwarded-references.eml"); DataObject object = fac.getObjectAndDisposeAllOtherObjects("uri:dummymailuri:mail-forwarded-references.eml#1-1"); assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); @@ -539,7 +531,7 @@ * @throws Exception */ public void testGetObjectString() throws Exception { - DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-forwarded-references.eml"); DataObject object = fac.getObject("uri:dummymailuri:mail-forwarded-references.eml#1-1"); assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); @@ -571,7 +563,7 @@ * @throws Exception */ public void testGetAllDataObjects() throws Exception { - DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-forwarded-references.eml"); Map<URI, DataObject> objects = fac.getAllDataObjects(); assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); @@ -599,7 +591,7 @@ * @throws MessagingException */ public void testReceivedDate() throws MessagingException, IOException { - DataObjectFactory fac = wrapEmail("mail-threaded.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-threaded.eml"); DataObject o1 = fac.getObject(); assertTrue(DateUtil.dateTimeEqualToUTCString(o1.getMetadata().getDate(NMO.sentDate), "2006-10-09T15:09:58Z")); @@ -615,7 +607,7 @@ * the nie:plainTextContent property. */ public void testMultipartRelatedBug() throws Exception { - DataObjectFactory fac = wrapEmail("mail-multipart-related-bug.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-multipart-related-bug.eml"); DataObject o1 = fac.getObject(); DataObject o2 = fac.getObject(); assertNoContentLost(fac, o1,o2); @@ -631,7 +623,7 @@ * it used a GnuPG certificate generated with the Gnome Seahorse application */ public void testPgpEncryptedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgp-encrypted.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgp-encrypted.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -645,7 +637,7 @@ * it used a GnuPG certificate generated with the Gnome Seahorse application */ public void testPgpMimeEncryptedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-encrypted.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgpmime-encrypted.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -661,7 +653,7 @@ * header in that email has been uppercased. */ public void testPgpMimeUcaseEncryptedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-encrypted-ucase.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgpmime-encrypted-ucase.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -676,7 +668,7 @@ * generated with OpenSSL. */ public void testSMimeEncryptedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-smime-encrypted.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-smime-encrypted.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -691,7 +683,7 @@ * generated with OpenSSL. The Content-Type headers have been uppercased. */ public void testSMimeUcaseEncryptedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-smime-encrypted-ucase.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-smime-encrypted-ucase.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -705,7 +697,7 @@ * An encrypted and signed email, is an encrypted email */ public void testPgpEncryptedSignedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgp-encrypted-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgp-encrypted-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -718,7 +710,7 @@ * An encrypted and signed email, is an encrypted email */ public void testPgpMimeEncryptedSignedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-encrypted-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgpmime-encrypted-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -732,7 +724,7 @@ * An encrypted and signed email, is an encrypted email */ public void testSMimeEncryptedSignedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-smime-encrypted-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-smime-encrypted-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -747,7 +739,7 @@ * it used a GnuPG certificate generated with the Gnome Seahorse application */ public void testPgpSignedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgp-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgp-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); assertNull(fac.getObject()); @@ -762,7 +754,7 @@ * it used a GnuPG certificate generated with the Gnome Seahorse application */ public void testPgpMimeSignedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-pgpmime-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); DataObject o2 = fac.getObject(); @@ -779,7 +771,7 @@ * generated with OpenSSL. */ public void testSMimeSignedEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-smime-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-smime-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); DataObject o2 = fac.getObject(); @@ -797,7 +789,7 @@ * empty content. */ public void testAutomatedDebianInstallsEmail() throws MessagingException, IOException, ModelException { - DataObjectFactory fac = wrapEmail("encrypted/mail-automatingdebianinstalls-signed.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "encrypted/mail-automatingdebianinstalls-signed.eml"); DataObject o1 = fac.getObject(); assertNotNull(o1); DataObject o2 = fac.getObject(); @@ -817,7 +809,7 @@ } public void testMapiHeadersEmail() throws Exception { - DataObjectFactory fac = wrapEmail("mail-mapi125messageid.eml"); + DataObjectFactory fac = wrapEmail(DOCS_PATH, "mail-mapi125messageid.eml"); DataObject o1 = fac.getObject(); assertNull(fac.getObject()); @@ -826,118 +818,5 @@ o1.dispose(); } - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////// BASIC PLUMBING ///////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////////////////////////////////// - - private void testSenderAndReceiver(Model model, URI emailUri, String senderName, String senderEmail, - String receiverName, String receiverEmail) { - Resource sender = findSingleObjectResource(model, emailUri, NMO.from); - if (senderName != null) { - assertSingleValueProperty(model, sender, NCO.fullname, senderName); - } - Resource senderAddress = findSingleObjectResource(model, sender, NCO.hasEmailAddress); - assertSingleValueProperty(model, senderAddress, NCO.emailAddress, senderEmail); - - Resource receiver = findSingleObjectResource(model, emailUri, NMO.to); - if (receiverName != null) { - assertSingleValueProperty(model, receiver, NCO.fullname, receiverName); - } - Resource receiverAddress = findSingleObjectResource(model, receiver, NCO.hasEmailAddress); - assertSingleValueProperty(model, receiverAddress, NCO.emailAddress, receiverEmail); - } - - private void testStandardMessageMetadata(Model model, URI emailUri, String charset, String mimeType, - String contentMimeType, String subject, String byteSize, String contentCreated, String messageId) throws Exception { - assertSingleValueProperty(model, emailUri, NIE.characterSet, charset); - assertSingleValueProperty(model, emailUri, NIE.mimeType, mimeType); - assertSingleValueProperty(model, emailUri, NMO.contentMimeType, contentMimeType); - assertSingleValueProperty(model, emailUri, NMO.messageSubject, subject); - assertSingleValueProperty(model, emailUri, NIE.byteSize, model.createDatatypeLiteral(byteSize, XSD._integer)); - - // this exhibits the problem with ambiguous dates - String contentCreatedDateString = findSingleObjectNode(model, emailUri, NIE.contentCreated).asLiteral().getValue(); - assertTrue(DateUtil.dateTimeStringEqualToUTCString(contentCreatedDateString, contentCreated)); - //assertSingleValueProperty(model, emailUri, NIE.contentCreated, model.createDatatypeLiteral(contentCreated, XSD._dateTime)); - - - // this exhibits the problem with brackets - assertSingleValueProperty(model, emailUri, NMO.messageId, messageId); - - Set<Resource> emailTypes = findObjectResourceSet(model, emailUri, RDF.type); - assertEquals(4,emailTypes.size()); - assertTrue(emailTypes.contains(NMO.Email)); - assertTrue(emailTypes.contains(NMO.MimeEntity)); - assertTrue(emailTypes.contains(NMO.MailboxDataObject)); - assertTrue(emailTypes.contains(NIE.DataObject)); - } - - private TestRDFContainerFactory containerFactory; - - @Override public void setUp() { - this.containerFactory = new TestRDFContainerFactory(); - } - - @Override public void tearDown() { - containerFactory = null; - } - - private DataObjectFactory wrapEmail(String resourceName) throws MessagingException, IOException { - InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + resourceName, this.getClass()); - MimeMessage msg = new MimeMessage(null, stream); - DataObjectFactory fac = new DataObjectFactory(msg,containerFactory,null,null, - new URIImpl("uri:dummymailuri:" + resourceName), null); - return fac; - } - - private void assertMessageId(String id, DataObject obj) { - assertEquals(id, findSingleObjectNode(obj.getMetadata().getModel(), obj.getID(), NMO.messageId) - .asLiteral().getValue()); - } - - private void assertMessageContentContains(String string, DataObject obj) { - assertTrue(obj.getMetadata().getString(NMO.plainTextMessageContent).contains(string)); - } - - private void assertAsciiFileContentContains(DataObject obj, String string) throws IOException { - assertTrue(IOUtil.readString( - new InputStreamReader(((FileDataObject) obj).getContent(), Charset.forName("US-ASCII"))) - .contains(string)); - } - - @SuppressWarnings("unchecked") - private void assertReferencedEmails(DataObject object, URI prop, String ... ids) { - Set<String> referencedIdsSet = new TreeSet<String>(); - referencedIdsSet.addAll(Arrays.asList(ids)); - Collection<Node> nodes = object.getMetadata().getAll(prop); - assertEquals(referencedIdsSet.size(), nodes.size()); - Model model = object.getMetadata().getModel(); - for (Node node : nodes) { - Resource res = node.asResource(); - assertSingleValueProperty(model, res, RDF.type, NMO.Email); - String value = findSingleObjectNode(model, res, NMO.messageId).asLiteral().getValue(); - assertTrue(referencedIdsSet.remove(value)); - } - assertTrue(referencedIdsSet.isEmpty()); - } - - /** - * No content can be lost, everything either is a message (and the content is there) or a File - * (and the content is in the input stream). - * @param objs - * @throws IOException - * @throws MessagingException - */ - private void assertNoContentLost(DataObjectFactory fac, DataObject ... objs) throws IOException, MessagingException { - assertNull(fac.getObject()); - for (DataObject obj : objs) { - assertNotNull(obj); - assertTrue( (obj instanceof MessageDataObject && ((MessageDataObject)obj).getMimeMessage() != null && - obj.getMetadata().getString(NMO.plainTextMessageContent) != null) || - (obj instanceof FileDataObject && - ((FileDataObject)obj).getContent() != null || obj.getMetadata().getString(NIE.plainTextContent) != null)); - } - } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-07-14 13:54:36
|
Revision: 2395 http://aperture.svn.sourceforge.net/aperture/?rev=2395&view=rev Author: mylka Date: 2010-07-14 13:54:29 +0000 (Wed, 14 Jul 2010) Log Message: ----------- [3029535] added another hack that removes single quotes from the name Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-07-14 11:50:16 UTC (rev 2394) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-07-14 13:54:29 UTC (rev 2395) @@ -405,7 +405,7 @@ currentChunk.append(c2); } } else if (QUOTATION_MARK.equals(c)) { - chunks.add(currentChunk.toString().trim()); + chunks.add(trimAndRemoveSingleQuotes(currentChunk.toString())); separators.add(c); currentChunk = new StringBuilder(); if (insideQuotes) { @@ -415,7 +415,7 @@ } } else if (!insideQuotes && isSeparator(c,commaIsASeparator)) { // inside quotes we disregard all separators - chunks.add(currentChunk.toString().trim()); + chunks.add(trimAndRemoveSingleQuotes(currentChunk.toString())); separators.add(c); currentChunk = new StringBuilder(); } else if (isLineBreak(c)) { @@ -434,8 +434,24 @@ } i++; } - chunks.add(currentChunk.toString().trim()); + + chunks.add(trimAndRemoveSingleQuotes(currentChunk.toString())); } + + private static String trimAndRemoveSingleQuotes(String currentChunk) { + String trimmedChunk = currentChunk.toString().trim(); + + // now a little hack, sometimes we see mail headers like this: + // "'ch...@ho...'" <ch...@ho...> + // enclosed both within double quotes and single quotes, + // we should remove those single quotes + if (trimmedChunk.length() >= 2 && + trimmedChunk.charAt(0) == '\'' && + trimmedChunk.charAt(trimmedChunk.length() - 1) == '\'') { + trimmedChunk = trimmedChunk.substring(1,trimmedChunk.length() - 1); + } + return trimmedChunk; + } /** * @param chunk Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-07-14 11:50:16 UTC (rev 2394) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-07-14 13:54:29 UTC (rev 2395) @@ -11,13 +11,11 @@ import java.text.ParseException; import java.util.Date; import java.util.List; -import java.util.Set; import javax.mail.MessagingException; import javax.mail.internet.MimeMessage; import org.ontoware.rdf2go.model.Model; -import org.ontoware.rdf2go.model.node.Resource; import org.semanticdesktop.aperture.crawler.mail.MailUtil.LiberalInternetAddress; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.test.ApertureTestBase; @@ -199,6 +197,16 @@ a("Christiaan Fluit","Chr...@ad...")); } + public void testTripleQuotesNames() { + /* + * Some headers use names which are enclosed in "' ... '", that is, a string is enclosed in single + * quotes, and they are enclosed in double quotes. Such abominations defile the output and ought to be + * cleansed before the innocent user is confronted with them. + */ + assertHeaders("\"'ch...@ho...'\" <ch...@ho...>", + a("ch...@ho...","ch...@ho...")); + } + private void check(String st, String utcString) throws ParseException { Date date = MailUtil.parseReceivedHeader(st); assertTrue(DateUtil.dateTimeEqualToUTCString(date, utcString)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-08-19 16:34:18
|
Revision: 2411 http://aperture.svn.sourceforge.net/aperture/?rev=2411&view=rev Author: mylka Date: 2010-08-19 16:34:11 +0000 (Thu, 19 Aug 2010) Log Message: ----------- [3043080] a test for the magic mime type identifier Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java Added Paths: ----------- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/AbstractIdentificationTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java 2010-08-18 12:29:35 UTC (rev 2410) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java 2010-08-19 16:34:11 UTC (rev 2411) @@ -27,6 +27,7 @@ static { HashSet set = new HashSet(); set.add("application/bzip2"); + set.add("application/x-bzip2"); MIME_TYPES = Collections.unmodifiableSet(set); } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java 2010-08-18 12:29:35 UTC (rev 2410) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java 2010-08-19 16:34:11 UTC (rev 2411) @@ -27,6 +27,7 @@ static { HashSet set = new HashSet(); set.add("application/gzip"); + set.add("application/x-gzip"); MIME_TYPES = Collections.unmodifiableSet(set); } Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/AbstractIdentificationTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/AbstractIdentificationTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/AbstractIdentificationTest.java 2010-08-19 16:34:11 UTC (rev 2411) @@ -0,0 +1,28 @@ +package org.semanticdesktop.aperture.mime.identifier; + +import static org.junit.Assert.assertEquals; +import info.aduna.io.ResourceUtil; + +import java.io.BufferedInputStream; +import java.io.InputStream; + +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; +import org.semanticdesktop.aperture.util.IOUtil; + +public abstract class AbstractIdentificationTest { + + protected void test(MimeTypeIdentifier mimeTypeIdentifier, String desiredMimeType, String path, boolean withPath) throws Exception { + InputStream stream = new BufferedInputStream(ResourceUtil.getInputStream(path)); + int minimumArrayLength = mimeTypeIdentifier.getMinArrayLength(); + stream.mark(minimumArrayLength + 10); // add some for safety + byte[] bytes = IOUtil.readBytes(stream, minimumArrayLength); + String mimeType = null; + if (withPath) { + mimeType = mimeTypeIdentifier.identify(bytes, path, null); + } else { + mimeType = mimeTypeIdentifier.identify(bytes, null, null); + } + assertEquals(desiredMimeType, mimeType); + stream.reset(); + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/AbstractIdentificationTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java 2010-08-19 16:34:11 UTC (rev 2411) @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2010 Aduna. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.mime.identifier.magic; + +import org.junit.Before; +import org.junit.Test; +import org.semanticdesktop.aperture.mime.identifier.AbstractIdentificationTest; +import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier; + +public class ApertureDocumentsIdentificationTest extends AbstractIdentificationTest { + + private MimeTypeIdentifier identifier; + + @Before + public void setUp() { + this.identifier = new MagicMimeTypeIdentifier(); + } + + @Test + public void testIdentification() throws Exception { + + t("bzip2-txt-bziptest.txt.bz2", "application/bzip2", "application/bzip2"); + t("compress-txt-compresstest.txt.Z", "application/x-compress", "application/x-compress"); + + + + t("corel-presentations-3.0.shw", "application/vnd.wordperfect","application/presentations"); + t("corel-presentations-x3.shw", "application/vnd.ms-office", "application/presentations"); + t("corel-quattro-pro-6.wb2", "application/wb2", "application/wb2"); + t("corel-quattro-pro-7.wb3", "application/vnd.ms-office", "application/x-quattropro"); + t("corel-quattro-pro-x3.qpw", "application/vnd.ms-office", "application/x-quattropro"); + t("corel-wordperfect-4.2.wp", null, "application/vnd.wordperfect"); + t("corel-wordperfect-5.0.wp", "application/vnd.wordperfect","application/vnd.wordperfect"); + t("corel-wordperfect-5.1-far-east.wp", "application/vnd.wordperfect","application/vnd.wordperfect"); + t("corel-wordperfect-5.1.wp", "application/vnd.wordperfect","application/vnd.wordperfect"); + t("corel-wordperfect-x3.wpd", "application/vnd.wordperfect","application/vnd.wordperfect"); + t("counting-input-stream-test-file.dat", "application/zip", "application/zip"); + t("faulty-fileaccessdata-is-ignored.xml","application/gzip", "application/gzip"); + t("html-condenast.html", "text/html", "text/html"); + t("html-handwritten-with-wrong-file-extension.txt","text/html", "text/html"); + t("html-handwritten.html", "text/html", "text/html"); + t("html-mixed-case-header-and-wrong-extension.txt","text/html", "text/html"); + t("html-quelle.de.html", "text/html", "text/html"); + t("html-utf16-leading-whitespace-wrong-extension.doc","text/html", "text/html"); + t("html-youtube-contenttypeinhttpheaders.html","text/html", "text/html"); + t("jingle1.mp3", null, "audio/mpeg"); + t("jingle2.mp3", null, "audio/mpeg"); + t("jingle3.mp3", "audio/mpeg", "audio/mpeg"); + t("jpg-exif-img_9367.JPG", "image/jpeg", "image/jpeg"); + t("jpg-exif-zerolength.jpg", "text/plain", "image/jpeg"); // empty file + t("jpg-geotagged-ipanema.jpg", "image/jpeg", "image/jpeg"); + t("jpg-geotagged.jpg", "image/jpeg", "image/jpeg"); + t("mail-attachment.eml", "message/rfc822", "message/rfc822"); + t("mail-conflict-desktop1.eml", "text/plain", "message/rfc822"); // wrong + t("mail-conflict-desktop2.eml", "text/plain", "message/rfc822"); // wrong + t("mail-forwarded-references.eml", "text/plain", "message/rfc822"); // wrong + t("mail-mapi125messageid.eml", "message/rfc822", "message/rfc822"); + t("mail-mbox-aperture-inc1-mail1.eml", "text/plain", "message/rfc822"); // wrong + t("mail-mbox-aperture-inc1-mail2.eml", "text/plain", "message/rfc822"); // wrong + t("mail-mbox-aperture-inc1-mail3.eml", "text/plain", "message/rfc822"); // wrong + t("mail-mbox-aperture-inc1-mail4.eml", "text/plain", "message/rfc822"); // wrong + t("mail-multipart-plain-html.eml", "text/plain", "message/rfc822"); // wrong + t("mail-multipart-related-bug.eml", "message/rfc822", "message/rfc822"); + t("mail-multipart-test.eml", "text/plain", "message/rfc822"); // wrong + t("mail-multipart-test.eml.tar.gz", "application/gzip", "application/gzip"); + t("mail-plaintext-attachment.eml", "message/rfc822", "message/rfc822"); + t("mail-threaded.eml", "application/mbox", "application/mbox"); + t("mail-thunderbird-1.5-unspecifiedcharset.eml","message/rfc822", "message/rfc822"); + t("mail-thunderbird-1.5.eml", "message/rfc822", "message/rfc822"); + t("mail-UnsupportedOperationException.eml","message/rfc822", "message/rfc822"); + t("mail-xml-attachment.eml", "message/rfc822", "message/rfc822"); + t("mail.msg", "application/vnd.ms-office", "application/x-msg"); + t("mbox-aperture-dev", "application/mbox", "application/mbox"); + t("mbox-aperture-inc1", "application/mbox", "application/mbox"); + t("mbox-aperture-inc2", "application/mbox", "application/mbox"); + t("mbox-aperture-inc3", "application/mbox", "application/mbox"); + t("mbox-aperture-inc4", "application/mbox", "application/mbox"); + t("mbox-noblanklinebetweenmails.mbox", "application/mbox", "application/mbox"); + t("mbox-testfolder", "application/mbox", "application/mbox"); + t("mhtml-firefox.mht", "message/rfc822", "message/rfc822"); + t("mhtml-internet-explorer.mht", "message/rfc822", "message/rfc822"); + + t("microsoft-excel-2000.xls", "application/vnd.ms-office", // wrong + "application/vnd.ms-excel"); + t("microsoft-excel-2007beta2.xlam", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + t("microsoft-excel-2007beta2.xlsb", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + t("microsoft-excel-2007beta2.xlsm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + t("microsoft-excel-2007beta2.xlsx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + t("microsoft-excel-2007beta2.xltm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + t("microsoft-excel-2007beta2.xltx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + t("microsoft-excel-2010beta.xlsx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.spreadsheetml"); + + t("microsoft-powerpoint-2000.ppt", "application/vnd.ms-office", // wrong + "application/vnd.ms-powerpoint"); + t("microsoft-powerpoint-2007beta2.potm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-2007beta2.potx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-2007beta2.ppsm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-2007beta2.ppsx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-2007beta2.pptm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-2007beta2.pptx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-2010beta.pptx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.presentationml"); + t("microsoft-powerpoint-invalidunicode.ppt","application/vnd.ms-office", // wrong + "application/vnd.ms-powerpoint"); + + t("microsoft-publisher-2003.pub","application/vnd.ms-office","application/x-mspublisher"); // wrong + t("microsoft-visio.vsd","application/vnd.ms-office","application/vnd.visio"); // wrong + + t("microsoft-word-2000-with-wrong-file-extension.pdf","application/vnd.ms-office", // wrong + "application/vnd.ms-office"); // wrong + t("microsoft-word-2000.doc", "application/vnd.ms-office", // wrong + "application/vnd.ms-word"); + t("microsoft-word-2007beta2.docm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.wordprocessingml"); + t("microsoft-word-2007beta2.docx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.wordprocessingml"); + t("microsoft-word-2007beta2.dotm", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.wordprocessingml"); + t("microsoft-word-2007beta2.dotx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.wordprocessingml"); // to clarify + t("microsoft-word-2010beta.docx", "application/zip", // wrong + "application/vnd.openxmlformats-officedocument.wordprocessingml"); + t("microsoft-word-history-blair.doc", "application/vnd.ms-office", // wrong + "application/vnd.ms-word"); + t("microsoft-word-illegal-unicode-characters.doc", "application/vnd.ms-office", // wrong + "application/vnd.ms-word"); + t("microsoft-word-testdoc-comments.doc", "application/vnd.ms-office", // wrong + "application/vnd.ms-word"); + t("microsoft-word-testdoc-nocomments.doc","application/vnd.ms-office", // wrong + "application/vnd.ms-word"); + + t("microsoft-works-spreadsheet-3.0.wks", "application/wb2","application/wb2"); + t("microsoft-works-spreadsheet-4.0-2000.wks", "application/vnd.ms-works","application/vnd.ms-works"); + t("microsoft-works-spreadsheet-7.0.xlr", "application/vnd.ms-office","application/vnd.ms-works"); + + t("microsoft-works-word-processor-2000.wps", "application/vnd.ms-office","application/vnd.ms-works"); // wrong + t("microsoft-works-word-processor-3.0.wps", "application/vnd.ms-office","application/vnd.ms-works"); + t("microsoft-works-word-processor-4.0.wps", "application/vnd.ms-office","application/vnd.ms-works"); + t("microsoft-works-word-processor-7.0.wps", "application/vnd.ms-office","application/vnd.ms-works"); + + t("openoffice-1.1.5-calc-template.stc", "application/zip", "application/vnd.sun.xml.calc.template"); + t("openoffice-1.1.5-calc.sxc", "application/zip", "application/vnd.sun.xml.calc"); + t("openoffice-1.1.5-draw-template.std", "application/zip", "application/vnd.sun.xml.draw.template"); + t("openoffice-1.1.5-draw.sxd", "application/zip", "application/vnd.sun.xml.draw"); + t("openoffice-1.1.5-impress-template.sti", "application/zip", "application/vnd.sun.xml.impress.template"); + t("openoffice-1.1.5-impress.sxi", "application/zip", "application/vnd.sun.xml.impress"); + t("openoffice-1.1.5-writer-template.stw", "application/zip", "application/vnd.sun.xml.writer.template"); + t("openoffice-1.1.5-writer.sxw", "application/zip", "application/vnd.sun.xml.writer"); + + t("openoffice-2.0-calc-template.ots", "application/zip", + "application/vnd.oasis.opendocument.spreadsheet-template"); + t("openoffice-2.0-calc.ods", "application/zip", + "application/vnd.oasis.opendocument.spreadsheet"); + t("openoffice-2.0-draw-template.otg", "application/zip", + "application/vnd.oasis.opendocument.graphics-template"); + t("openoffice-2.0-draw.odg", "application/zip", + "application/vnd.oasis.opendocument.graphics"); + t("openoffice-2.0-formula.odf", "application/zip", + "application/vnd.oasis.opendocument.formula"); + t("openoffice-2.0-impress-template.otp","application/zip", + "application/vnd.oasis.opendocument.presentation-template"); + t("openoffice-2.0-impress.odp", "application/zip", + "application/vnd.oasis.opendocument.presentation"); + t("openoffice-2.0-writer-template.ott", "application/zip", + "application/vnd.oasis.opendocument.text-template"); + t("openoffice-2.0-writer.odt", "application/zip", + "application/vnd.oasis.opendocument.text"); + + t("pdf-distiller-6-weirdchars.pdf", "application/pdf", "application/pdf"); + t("pdf-manyauthors.pdf", "application/pdf", "application/pdf"); + t("pdf-no-author.pdf", "application/pdf", "application/pdf"); + t("pdf-openoffice-1.1.5-writer.pdf", "application/pdf", "application/pdf"); + t("pdf-openoffice-2.0-writer.pdf", "application/pdf", "application/pdf"); + t("pdf-openoffice-2.0-writer.pdf.tar", "application/x-tar", "application/x-tar"); + t("pdf-word-2000-pdfcreator-0.8.0.pdf", "application/pdf", "application/pdf"); + t("pdf-word-2000-pdfmaker-7.0.pdf", "application/pdf", "application/pdf"); + t("pdf-word-2000-pdfwriter-7.0.pdf", "application/pdf", "application/pdf"); + + t("plain-text-ansi.txt", "text/plain", "text/plain"); + t("plain-text-china-wikipedia-utf16be.txt", null, "text/plain"); + t("plain-text-china-wikipedia-utf8.txt", null, "text/plain"); + t("plain-text-chinese-garbled-name-gb18030.txt", null, "text/plain"); + t("plain-text-chinese-gb18030.txt", null, "text/plain"); + t("plain-text-chinese-utf16.txt", "text/plain", "text/plain"); + t("plain-text-empty.txt", "text/plain", "text/plain"); // empty file + t("plain-text-japan-wikipedia-eucjp.txt", null, "text/plain"); + t("plain-text-japanese-juniversalchardettest-bomremoved-utf16le.txt", null, "text/plain"); + t("plain-text-japanese-juniversalchardettest-eucjp.txt", null, "text/plain"); + t("plain-text-japanese-juniversalchardettest-iso2022jp.txt", null, "text/plain"); + t("plain-text-japanese-juniversalchardettest-shiftjis.txt", null, "text/plain"); + t("plain-text-japanese-juniversalchardettest-utf8nobom.txt", null, "text/plain"); + t("plain-text-pt-ksiega1-latin2.txt", null, "text/plain"); + t("plain-text-pt-ksiega1-utf16be.txt", null, "text/plain"); + t("plain-text-pt-ksiega1-utf16le.txt", null, "text/plain"); + t("plain-text-pt-ksiega1-utf8.txt", null, "text/plain"); + t("plain-text-utf16be.txt", "text/plain", "text/plain"); + t("plain-text-utf16le.txt", "text/plain", "text/plain"); + t("plain-text-utf8.txt", "text/plain", "text/plain"); + t("plain-text-with-null-character.txt", null, "text/plain"); + t("plain-text-without-extension", "text/plain", "text/plain"); + t("plain-text.txt", "text/plain", "text/plain"); + + t("rtf-openoffice-1.1.5.rtf", "text/rtf", "text/rtf"); + t("rtf-openoffice-2.0.rtf", "text/rtf", "text/rtf"); + t("rtf-staroffice-5.2.rtf", "text/rtf", "text/rtf"); + t("rtf-word-2000.rtf", "text/rtf", "text/rtf"); + + t("staroffice-5.2-calc-template.vor", "application/vnd.ms-office", "application/vnd.ms-office"); + t("staroffice-5.2-calc.sdc", "application/vnd.ms-office", "application/vnd.stardivision.calc"); + t("staroffice-5.2-draw-template.vor", "application/vnd.ms-office", "application/vnd.ms-office"); + t("staroffice-5.2-draw.sda", "application/vnd.ms-office", "application/vnd.stardivision.draw"); + t("staroffice-5.2-impress-template.vor", "application/vnd.ms-office", "application/vnd.ms-office"); + t("staroffice-5.2-impress.sdd", "application/vnd.ms-office", "application/vnd.stardivision.impress"); + t("staroffice-5.2-writer-template.vor", "application/vnd.ms-office", "application/vnd.ms-office"); + t("staroffice-5.2-writer.sdw", "application/vnd.ms-office", "application/vnd.stardivision.writer"); + + t("tar-test.tar","application/x-tar","application/x-tar"); + + t("thunderbird-addressbook.mab","text/plain","application/x-mozilla-addressbook"); + + t("vcard-antoni-cardpicture.vcf","text/x-vcard","text/x-vcard"); + t("vcard-antoni-kontact.vcf","text/x-vcard","text/x-vcard"); + t("vcard-antoni-outlook2003-urlphoto.vcf","text/x-vcard","text/x-vcard"); + t("vcard-antoni-outlook2003.vcf","text/x-vcard","text/x-vcard"); + t("vcard-dirk-corrupted.vcf","text/plain","text/x-vcard"); // wrong, but this one is corrupted + t("vcard-dirk.vcf","text/x-vcard","text/x-vcard"); + t("vcard-illegalurl.vcf","text/x-vcard","text/x-vcard"); + t("vcard-incompletenproperty.vcf","text/x-vcard","text/x-vcard"); + t("vcard-rfc2426.vcf","text/x-vcard","text/x-vcard"); + t("vcard-vCards-SAP-onemodified.vcf","text/x-vcard","text/x-vcard"); + t("vcard-vCards-SAP.vcf","text/x-vcard","text/x-vcard"); + + t("xml-handwritten.xml","text/xml","text/xml"); + t("xml-nonexistent-dtd.xml","text/xml","text/xml"); + t("xml-nonexistent-remote-dtd.xml","text/xml","text/xml"); + t("xml-nonexistent-remote-xsd.xml","text/xml","text/xml"); + t("xml-nonexistent-xsd.xml","text/xml","text/xml"); + t("xml-utf8-bom","text/xml","text/xml"); + + t("zip_7zr_on_linux_password_hello.zip","application/x-7z-compressed","application/x-7z-compressed"); + t("zip-infiniteloop.zip","application/zip","application/zip"); + t("zip-mail-attachment.zip","application/zip","application/zip"); + t("zip-mail-forwarded-message.zip","application/zip","application/zip"); + t("zip-multivolume-firstvolume.zip","application/zip","application/zip"); + t("zip-problem.zip","application/zip","application/zip"); + t("zip-somedocs.zip","application/zip","application/zip"); + t("zip-test.zip","application/zip","application/zip"); + } + + private void t(String name, String mimeTypeWithoutName, String mimeTypeWithName) throws Exception { + test(identifier, mimeTypeWithoutName, "/org/semanticdesktop/aperture/docs/" + name, false); + test(identifier, mimeTypeWithName, "/org/semanticdesktop/aperture/docs/" + name, true); + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-08-26 14:56:17
|
Revision: 2416 http://aperture.svn.sourceforge.net/aperture/?rev=2416&view=rev Author: mylka Date: 2010-08-26 14:56:10 +0000 (Thu, 26 Aug 2010) Log Message: ----------- [3041877] something even better than AttachmentsOnlyMode, i've come up with MultiLevel mode for the VcardSubcrawler Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerFactory.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java Added Paths: ----------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/util/ByteArrayCharSequence.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/util/ByteArrayCharSequenceTest.java aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-vCards-SAP-markussprung.vcf Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-08-26 11:37:05 UTC (rev 2415) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawler.java 2010-08-26 14:56:10 UTC (rev 2416) @@ -16,9 +16,13 @@ import java.net.URISyntaxException; import java.nio.charset.Charset; import java.text.ParseException; +import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import net.fortuna.ical4j.data.ParserException; import net.fortuna.ical4j.model.ValidationException; import net.fortuna.ical4j.util.CompatibilityHints; import net.fortuna.ical4j.vcard.Group; @@ -72,6 +76,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; import org.semanticdesktop.aperture.subcrawler.base.AbstractSubCrawler; +import org.semanticdesktop.aperture.util.ByteArrayCharSequence; import org.semanticdesktop.aperture.util.DateUtil; import org.semanticdesktop.aperture.util.IOUtil; import org.semanticdesktop.aperture.util.StringUtil; @@ -133,6 +138,18 @@ private Logger logger = LoggerFactory.getLogger(this.getClass()); + private static final Pattern VCARD_START_PATTERN = Pattern.compile("BEGIN:VCARD"); + + private boolean multiLevelMode; + + public VcardSubCrawler() { + this(false); + } + + public VcardSubCrawler(boolean multiLevelMode) { + this.multiLevelMode = multiLevelMode; + } + /** * @see SubCrawler#subCrawl(URI, InputStream, SubCrawlerHandler, DataSource, AccessData, Charset, String, RDFContainer) */ @@ -148,49 +165,58 @@ try { // first read the whole stream into a byte array - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - IOUtil.writeStream(stream, baos); - byte [] bytes = baos.toByteArray(); + byte [] bytes = IOUtil.readBytes(stream); - // add the fulltext, before the parser is invoked, so that // the fulltext is added regardless of the whether the file // is parseable or not try { String fulltext = new String(bytes, "UTF-8"); - parentMetadata.add(NIE.plainTextContent,fulltext); + parentMetadata.put(NIE.plainTextContent,fulltext); } catch (Exception e) { logger.warn("Couldn't add the fulltext of the vcard file",e); } - - Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes)); - GroupRegistry groupRegistry = new GroupRegistry(); - PropertyFactoryRegistry propReg = new PropertyFactoryRegistry(); - ParameterFactoryRegistry parReg = new ParameterFactoryRegistry(); - - addTypeParamsToRegistry(parReg); - addCardpicturePropertyToRegistry(propReg); - CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_RELAXED_PARSING, true); - CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_RELAXED_UNFOLDING, true); - CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_RELAXED_VALIDATION, true); - CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_OUTLOOK_COMPATIBILITY, true); - CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_NOTES_COMPATIBILITY, true); - VCardBuilder builder = new VCardBuilder(reader,groupRegistry,propReg,parReg); - List<VCard> cards = builder.buildAll(); - VCardOutputter outputter = new VCardOutputter(false); - if (cards.size() == 1) { - processContact(cards.get(0), parentMetadata, parentMetadata.getDescribedUri(), handler, accessData, dataSource, outputter); - } - else { - processAddressBook(cards, parentMetadata, handler, outputter, accessData, dataSource); - } + ArrayList<Integer> indexes = new ArrayList<Integer>(); + Matcher matcher = VCARD_START_PATTERN.matcher(new ByteArrayCharSequence(bytes)); + while (matcher.find()) { + indexes.add(matcher.start()); + } + VCardOutputter outputter = new VCardOutputter(false); + + if (indexes.size() == 0) { + throw new SubCrawlerException("Couldn't find a BEGIN:VCARD block in the file"); + } else if (indexes.size() == 1){ + Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,indexes.get(0),bytes.length - indexes.get(0))); + VCardBuilder builder = createVcardBuilder(reader); + VCard vcard = builder.build(); + processContact(vcard, parentMetadata, parentMetadata.getDescribedUri(), handler, accessData, dataSource, outputter); + } else { + processAddressBook(bytes, indexes, parentMetadata, handler, outputter, accessData, dataSource); + } } catch (Exception e) { logger.warn("Error while parsing vcard: " + parentMetadata.getDescribedUri(),e); } } + private VCardBuilder createVcardBuilder(Reader reader) { + GroupRegistry groupRegistry = new GroupRegistry(); + PropertyFactoryRegistry propReg = new PropertyFactoryRegistry(); + ParameterFactoryRegistry parReg = new ParameterFactoryRegistry(); + + addTypeParamsToRegistry(parReg); + addCardpicturePropertyToRegistry(propReg); + CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_RELAXED_PARSING, true); + CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_RELAXED_UNFOLDING, true); + CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_RELAXED_VALIDATION, true); + CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_OUTLOOK_COMPATIBILITY, true); + CompatibilityHints.setHintEnabled(CompatibilityHints.KEY_NOTES_COMPATIBILITY, true); + + VCardBuilder builder = new VCardBuilder(reader,groupRegistry,propReg,parReg); + return builder; + } + private void addTypeParamsToRegistry(ParameterFactoryRegistry parReg) { for (final String name : new String[] {"HOME","WORK","MSG","PREF","VOICE","FAX","CELL", "VIDEO","PAGER","BBS","MODEM","CAR","ISDN","PCS","INTERNET","X400","DOM", @@ -247,20 +273,36 @@ return VcardSubCrawlerFactory.VCARD_URI_PREFIX; } - private void processAddressBook(List<VCard> contacts, RDFContainer parentMetadata, - SubCrawlerHandler handler, VCardOutputter out, AccessData accessData, DataSource source) { + private void processAddressBook(byte [] bytes, List<Integer> indexes, RDFContainer parentMetadata, + SubCrawlerHandler handler, VCardOutputter out, AccessData accessData, DataSource source) throws IOException, ParserException { parentMetadata.add(RDF.type, NCO.ContactList); - for (VCard contact : contacts) { + for (int i = 0; i < indexes.size(); i++) { + int start = indexes.get(i); + int length = bytes.length - start; + if (i < indexes.size() - 1) { + length = indexes.get(i+1) - indexes.get(i); + } + Reader reader = new InputStreamReader(new ByteArrayInputStream(bytes,start,length)); + VCardBuilder builder = createVcardBuilder(reader); + VCard contact = builder.build(); + try { String contactHash = getContactHash(contact, out); URI contactUri = generateURIForContact(contact, parentMetadata, contactHash); RDFContainerFactory factory = handler.getRDFContainerFactory(contactUri.toString()); RDFContainer container = factory.getRDFContainer(contactUri); - processContact(contact, container, contactUri, handler, accessData, source, out); parentMetadata.add(NCO.containsContact, contactUri); container.add(RDF.type, NCO.ContactListDataObject); container.add(NIE.isPartOf, parentMetadata.getDescribedUri()); - passMetadataToHandler(container, handler, contactHash, accessData, source); + + if (multiLevelMode) { + passFileDataObjectToHandler(container, bytes, start, length, handler, + contactHash, accessData, source); + } else { + processContact(contact, container, contactUri, handler, accessData, source, out); + passMetadataToHandler(container, handler, contactHash, accessData, source); + } + } catch (Exception e) { logger.warn("Failed to process vcard",e); } @@ -287,6 +329,26 @@ } } + private void passFileDataObjectToHandler(RDFContainer container, byte [] buf, int start, int length, SubCrawlerHandler handler, String objectHash, + AccessData accessData, DataSource source) { + URI uri = container.getDescribedUri(); + DataObject object = new FileDataObjectBase(uri, source, container, new ByteArrayInputStream(buf,start,length)); + if (accessData == null) { + handler.objectNew(object); + } else if (!accessData.isKnownId(uri.toString())) { + accessData.put(uri.toString(), OBJECT_HASH_KEY, objectHash); + handler.objectNew(object); + } else { + String oldHash = accessData.get(uri.toString(), OBJECT_HASH_KEY); + if (oldHash == null || !oldHash.equals(objectHash)) { + accessData.put(uri.toString(), OBJECT_HASH_KEY, objectHash); + handler.objectChanged(object); + } else { + handler.objectNotModified(uri.toString()); + } + } + } + private void passAttachmentToHandler(RDFContainer container, SubCrawlerHandler handler, String attachmentHash, AccessData accessData, DataSource source, byte [] bytes) { URI uri = container.getDescribedUri(); Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerFactory.java 2010-08-26 11:37:05 UTC (rev 2415) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerFactory.java 2010-08-26 14:56:10 UTC (rev 2416) @@ -14,6 +14,16 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; public class VcardSubCrawlerFactory implements SubCrawlerFactory { + + private boolean multiLevelMode = false; + + public VcardSubCrawlerFactory() { + this(false); + } + + public VcardSubCrawlerFactory(boolean multiLevelMode) { + this.multiLevelMode = multiLevelMode; + } private static final Set MIME_TYPES; @@ -27,7 +37,7 @@ } public SubCrawler get() { - return new VcardSubCrawler(); + return new VcardSubCrawler(multiLevelMode); } public Set getSupportedMimeTypes() { Added: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/util/ByteArrayCharSequence.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/util/ByteArrayCharSequence.java (rev 0) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/util/ByteArrayCharSequence.java 2010-08-26 14:56:10 UTC (rev 2416) @@ -0,0 +1,56 @@ +package org.semanticdesktop.aperture.util; + +import java.io.UnsupportedEncodingException; + +import org.apache.commons.lang.NullArgumentException; + +public class ByteArrayCharSequence implements CharSequence { + + private byte [] buf; + private int startIdx; + private int endIdx; + + public ByteArrayCharSequence(byte[] bs) { + this(bs,0,bs.length); + } + + private ByteArrayCharSequence(byte [] bs, int startIdx, int endIdx) { + if (bs == null) { + throw new NullArgumentException("bs cannot be null"); + } + if (startIdx < 0 || startIdx > endIdx || endIdx > bs.length) { + throw new IllegalArgumentException("The indices should be : 0 <= startIdx <= endIdx < " + bs.length); + } + this.buf = bs; + this.startIdx = startIdx; + this.endIdx = endIdx; + } + + public char charAt(int index) { + if (index < 0 || index >= endIdx - startIdx) { + throw new IndexOutOfBoundsException("Index " + index + " not between 0 and " + (endIdx - startIdx)); + } + return (char)(buf[startIdx + index] & 0xFF); + } + + public int length() { + return endIdx - startIdx; + } + + public CharSequence subSequence(int start, int end) { + + if (start < 0 || start > end || end > endIdx - startIdx) { + throw new IllegalArgumentException("The indices should be : 0 <= start <= end < " + length()); + } + + return new ByteArrayCharSequence(buf,startIdx + start,startIdx + end); + } + + public String toString() { + try { + return new String(buf,"ISO-8859-1"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // this will not happen + } + } +} Property changes on: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/util/ByteArrayCharSequence.java ___________________________________________________________________ Added: svn:mime-type + text/plain Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-08-26 11:37:05 UTC (rev 2415) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/vcard/VcardSubCrawlerTest.java 2010-08-26 14:56:10 UTC (rev 2416) @@ -8,6 +8,7 @@ import java.io.InputStream; import java.net.URISyntaxException; +import java.util.Arrays; import java.util.Iterator; import java.util.Set; @@ -29,15 +30,21 @@ import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.ontoware.rdf2go.vocabulary.RDF; import org.semanticdesktop.aperture.accessor.AccessData; +import org.semanticdesktop.aperture.accessor.DataObject; +import org.semanticdesktop.aperture.accessor.FileDataObject; import org.semanticdesktop.aperture.accessor.base.AccessDataImpl; +import org.semanticdesktop.aperture.accessor.base.RDFContainerFactoryImpl; import org.semanticdesktop.aperture.extractor.ExtractorRegistry; import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.SubCrawlerRegistry; +import org.semanticdesktop.aperture.subcrawler.impl.SubCrawlerRegistryImpl; import org.semanticdesktop.aperture.test.subcrawler.SubCrawlerTestBase; import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; import org.semanticdesktop.aperture.util.DateUtil; +import org.semanticdesktop.aperture.util.IOUtil; +import org.semanticdesktop.aperture.util.ResourceUtil; import org.semanticdesktop.aperture.vocabulary.NCO; import org.semanticdesktop.aperture.vocabulary.NIE; @@ -61,6 +68,36 @@ metadata = null; } + public void testRfc2426ExampleExtractionMultiLevelMode() throws Exception { + /* + * With normal crawl, the two vcards are reported as normal file data objects + */ + VcardSubCrawler subCrawler = new VcardSubCrawler(true); + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler, (SubCrawlerRegistry)null); + Model model = metadata.getModel(); + // we have ZERO here + assertStatementCount(0, model, Variable.ANY, RDF.type, NCO.PersonContact); + assertNewModUnmod(handler, 2, 0, 0); + metadata.dispose(); + + /* + * but with the subcrawler registry enabled, the same file is processed on + * two levels and the content is OK + */ + + SubCrawlerRegistry reg = new SubCrawlerRegistryImpl(); + reg.add(new VcardSubCrawlerFactory(true)); + + metadata = subCrawl(DOCS_PATH + "vcard-rfc2426.vcf", subCrawler, reg); + model = metadata.getModel(); + // we have TWO here (multi-level processing) + assertStatementCount(2, model, Variable.ANY, RDF.type, NCO.PersonContact); + assertNewModUnmod(handler, 2, 0, 0); + metadata.dispose(); + } + + + /** * The vcard-rfc2426.vcf contains more than one vcard, therefore the vcards inside will get a proper * vcard: uri. This test checks this. It uses an iterator because at the time of writing the jpim library @@ -278,11 +315,27 @@ metadata.dispose(); metadata = null; } + + public void testSapVcardsExtractionMultiLevelMode() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(true); + metadata = subCrawl(DOCS_PATH + "vcard-vCards-SAP.vcf", subCrawler); + assertStatementCount(0, metadata.getModel(), Variable.ANY, RDF.type, NCO.PersonContact); + assertNewModUnmod(handler, 30, 0, 0); + metadata.dispose(); + + SubCrawlerRegistry reg = new SubCrawlerRegistryImpl(); + reg.add(new VcardSubCrawlerFactory(true)); + metadata = subCrawl(DOCS_PATH + "vcard-vCards-SAP.vcf", subCrawler, reg); + assertStatementCount(30, metadata.getModel(), Variable.ANY, RDF.type, NCO.PersonContact); + assertNewModUnmod(handler, 30, 0, 0); + metadata.dispose(); + + + } /** * The vcard-vCards-SAP.vcf contains more than one vcard, therefore the vcards inside will get a proper - * vcard: uri. This test checks this. It uses an iterator because at the time of writing the jpim library - * generated its own uids in a really crappy way that changed with each crawl. + * vcard: uri. This test checks this. * * @throws Exception */ @@ -292,6 +345,7 @@ Iterator<String> id = handler.getNewObjects().iterator(); for (int i = 0; i < 30; i++) { String st = id.next(); + System.out.println(st); assertTrue(st.startsWith("vcard:uri:dummyuri!/")); assertEquals(metadata.getDescribedUri(),findSingleObjectResource(metadata.getModel(), new URIImpl(st), NIE.isPartOf)); } @@ -299,6 +353,18 @@ metadata = null; } + public void testGetIndividualVcardInMultiLevelMode() throws Exception { + VcardSubCrawler subCrawler = new VcardSubCrawler(true); + DataObject ob = subCrawler.getDataObject(new URIImpl("uri:dummyuri"), "52787654178f112738459c0c29d155f244316ec4", + ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP.vcf", getClass()), null, null, null, new RDFContainerFactoryImpl()); + FileDataObject fdo = (FileDataObject)ob; + + byte [] vcardbytes = IOUtil.readBytes(fdo.getContent()); + byte [] expectedbytes = IOUtil.readBytes(ResourceUtil.getInputStream(DOCS_PATH + "vcard-vCards-SAP-markussprung.vcf", getClass())); + + assertTrue(Arrays.equals(expectedbytes, vcardbytes)); + } + /** * This case tests if the issue 2475980 is solved. Originally the vcard crawler used a library * called jpim which was crappy. Then we switched to ical4j-vcard which seems to be better. @@ -591,6 +657,14 @@ return parentMetadata; } + private RDFContainer subCrawl(String string, VcardSubCrawler subCrawler, SubCrawlerRegistry reg) throws Exception { + InputStream stream = org.semanticdesktop.aperture.util.ResourceUtil.getInputStream(string, this.getClass()); + handler = new TestBasicSubCrawlerHandler(reg); + RDFContainer parentMetadata = new RDFContainerImpl(handler.getModel(),new URIImpl("uri:dummyuri")); + subCrawler.subCrawl(null, stream, handler, null, null, null, null, parentMetadata); + return parentMetadata; + } + private void assertStatementCount(int count, Model model, ResourceOrVariable subject, UriOrVariable predicate, NodeOrVariable object) { int result = 0; ClosableIterator<? extends Statement> iter = null; @@ -606,7 +680,7 @@ assertEquals(count,result); } - public Resource findContact(Model model, String fullname) { + private Resource findContact(Model model, String fullname) { QueryResultTable table = model.sparqlSelect( "PREFIX nco: <" + NCO.NS_NCO + "> " + "SELECT ?contact " + Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/util/ByteArrayCharSequenceTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/util/ByteArrayCharSequenceTest.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/util/ByteArrayCharSequenceTest.java 2010-08-26 14:56:10 UTC (rev 2416) @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-Style license + */ +package org.semanticdesktop.aperture.util; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +public class ByteArrayCharSequenceTest { + + @Test + public void testSingleByteSequence() { + ByteArrayCharSequence s1 = new ByteArrayCharSequence( + new byte[] { 0x00 }); + assertEquals(1, s1.length()); + assertEquals('\u0000', s1.charAt(0)); + + ByteArrayCharSequence s2 = new ByteArrayCharSequence( + new byte[] { 0x7F }); + assertEquals(1, s2.length()); + assertEquals('\u007F', s2.charAt(0)); + + ByteArrayCharSequence s3 = new ByteArrayCharSequence( + new byte[] { (byte) 0xA0 }); + assertEquals(1, s3.length()); + assertEquals('\u00A0', s3.charAt(0)); + + ByteArrayCharSequence s4 = new ByteArrayCharSequence( + new byte[] { (byte) 0xFF }); + assertEquals(1, s4.length()); + assertEquals('\u00FF', s4.charAt(0)); + } + + @Test + public void testMultiByteSequence() { + { + ByteArrayCharSequence s1 = new ByteArrayCharSequence(new byte[] { + 0x00, 0x01, 0x03 }); + assertEquals(3, s1.length()); + assertEquals('\u0000', s1.charAt(0)); + assertEquals('\u0001', s1.charAt(1)); + assertEquals('\u0003', s1.charAt(2)); + assertEquals("\u0000\u0001\u0003", s1.toString()); + } + { + ByteArrayCharSequence s2 = new ByteArrayCharSequence(new byte[] { + 0x41, 0x6E, 0x74, 0x6F, 0x6E, 0x69 }); + assertEquals(6, s2.length()); + assertEquals('\u0041', s2.charAt(0)); + assertEquals('\u006E', s2.charAt(1)); + assertEquals('\u0074', s2.charAt(2)); + assertEquals('\u006F', s2.charAt(3)); + assertEquals('\u006E', s2.charAt(4)); + assertEquals('\u0069', s2.charAt(5)); + assertEquals("Antoni", s2.toString()); + } + } + + @Test + public void testSubSequence() { + ByteArrayCharSequence s2 = new ByteArrayCharSequence(new byte[] { 0x41, + 0x6E, 0x74, 0x6F, 0x6E, 0x69 }); + + { + CharSequence sub1 = s2.subSequence(0, 2); + assertEquals(2, sub1.length()); + assertEquals('\u0041', sub1.charAt(0)); + assertEquals('\u006E', sub1.charAt(1)); + } + { + CharSequence sub2 = s2.subSequence(3, 5); + assertEquals(2, sub2.length()); + assertEquals('\u006F', sub2.charAt(0)); + assertEquals('\u006E', sub2.charAt(1)); + } + { + CharSequence sub3 = s2.subSequence(1, 6); + assertEquals(5, sub3.length()); + assertEquals('\u006E', sub3.charAt(0)); + assertEquals('\u0074', sub3.charAt(1)); + assertEquals('\u006F', sub3.charAt(2)); + assertEquals('\u006E', sub3.charAt(3)); + assertEquals('\u0069', sub3.charAt(4)); + + CharSequence sub3_1 = sub3.subSequence(1, 5); + assertEquals(4, sub3_1.length()); + assertEquals('\u0074', sub3_1.charAt(0)); + assertEquals('\u006F', sub3_1.charAt(1)); + assertEquals('\u006E', sub3_1.charAt(2)); + assertEquals('\u0069', sub3_1.charAt(3)); + } + } + + @Test(expected=IndexOutOfBoundsException.class) + public void testToSmallIndex() { + ByteArrayCharSequence s2 = new ByteArrayCharSequence(new byte[] { 0x41,0x6E, 0x74, 0x6F, 0x6E, 0x69 }); + s2.charAt(-1); + } + + @Test(expected=IndexOutOfBoundsException.class) + public void testToLargeIndex() { + ByteArrayCharSequence s2 = new ByteArrayCharSequence(new byte[] { 0x41,0x6E, 0x74, 0x6F, 0x6E, 0x69 }); + s2.charAt(6); + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/util/ByteArrayCharSequenceTest.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-vCards-SAP-markussprung.vcf =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-vCards-SAP-markussprung.vcf (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/vcard-vCards-SAP-markussprung.vcf 2010-08-26 14:56:10 UTC (rev 2416) @@ -0,0 +1,9 @@ +BEGIN:VCARD +VERSION:3.0 +N:Sprung;Markus;;; +FN:Markus Sprung +ORG:SAP; +EMAIL;type=INTERNET;type=WORK;type=pref:Mar...@sa... +CATEGORIES:Demo,Nepomuk(SAP) +X-ABUID:8365FAAF-0362-4AF9-A080-D4DAED8BBF94\:ABPerson +END:VCARD This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-08-27 12:51:38
|
Revision: 2418 http://aperture.svn.sourceforge.net/aperture/?rev=2418&view=rev Author: mylka Date: 2010-08-27 12:51:31 +0000 (Fri, 27 Aug 2010) Log Message: ----------- fixed the compressor subcrawlers to use the NFO.fileName of the parent metadata if the uri doesn't contain the file name added a legacy mode that allows legacy applications to stay with the old behavior Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerFactory.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -25,9 +25,35 @@ import org.semanticdesktop.aperture.vocabulary.NFO; /** - * A SubCrawler Implementation working with compressors. + * A SubCrawler Implementation working with compressors. <br/> + * + * If the "legacyMode" flag is set to true the crawler will generate the URI of the child object according + * to the old rules: + * <ol> + * <li>Take the uri</li> + * <li>Take the last segement of the uri, after the last slash - the file name</li> + * <li>If the compressor-specific extension is present in the last segment, remove it (e.g. archive.txt.gz becomes archive.txt) + * </ol> + * + * This works for files in filesystems. Unfortunately it doesn't work if a compressed file is attached to an email + * because then the URI does not contain the file name, e.g. mime:file:///C:/folder/file.eml!/2. That's why an improvement + * has been implemented. If the extension is not present, the parent metadata is searched for the NFO.fileName property. + * If it is found - it is used instead. This covers the mail attachments issue, because the mail-related crawlers put + * the file name in the parent metadata even though the uri doesn't contain it.<br/><br/> + * + * The legacy mode has been preserved for legacy applications which depend on the fact that uris are constant. It is not the + * default though. Argument-free constructors of the implementation subclasses (at least those in aperture-core, at the time + * of writing) will use the new algorithm by default. Legacy mode requires an explicit flag to be passed. + * + * */ public abstract class AbstractCompressorSubCrawler extends AbstractSubCrawler { + + protected boolean legacyNameMode; + + protected AbstractCompressorSubCrawler(boolean legacyNameMode) { + this.legacyNameMode = legacyNameMode; + } /** * Returns a stream that uncompresses the data @@ -55,7 +81,7 @@ InputStream uncompressedStream = null; try { - URI contentUri = getContentUri(parentMetadata.getDescribedUri()); + URI contentUri = getContentUri(parentMetadata); uncompressedStream = getUncompressedStream(stream); parentMetadata.add(RDF.type, NFO.Archive); @@ -105,15 +131,26 @@ * @param archiveUri the uri of the archive * @return the uri of the compressed file content */ - protected URI getContentUri(URI archiveUri) { + protected URI getContentUri(RDFContainer parentMetadata) { // this method is supposed to be overridden, so either there is an error, or a subclass // has called super.getContentUri, therefore we invent an arbitrary extension + URI archiveUri = parentMetadata.getDescribedUri(); String string = archiveUri.toString(); - int hashIndex = string.indexOf("/"); - if (hashIndex != -1) { - return createChildUri(archiveUri, string.substring(hashIndex) + ".content"); + String parentFileName = parentMetadata.getString(NFO.fileName); + if (legacyNameMode || parentFileName == null) { + int hashIndex = string.indexOf("/"); + if (hashIndex != -1) { + return createChildUri(archiveUri, string.substring(hashIndex) + ".content"); + } else { + return createChildUri(archiveUri, string + ".content"); + } } else { - return createChildUri(archiveUri, string + ".content"); + int dotIndex = parentFileName.lastIndexOf('.'); + if (dotIndex != -1) { + return createChildUri(archiveUri, parentFileName.substring(0,dotIndex)); + } else { + return createChildUri(archiveUri, parentFileName + ".content"); + } } } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawler.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawler.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -11,6 +11,7 @@ import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.util.ModelUtil; import org.semanticdesktop.aperture.subcrawler.base.AbstractCompressorSubCrawler; @@ -19,6 +20,18 @@ */ public class BZip2SubCrawler extends AbstractCompressorSubCrawler { + public BZip2SubCrawler() { + super(false); + } + + /** + * Constructor that sets the legacy mode. See {@link AbstractCompressorSubCrawler} for details. + * @param legacyMode + */ + public BZip2SubCrawler(boolean legacyMode) { + super(legacyMode); + } + @Override protected InputStream getUncompressedStream(InputStream stream) throws IOException { return new BZip2CompressorInputStream(stream); @@ -30,7 +43,8 @@ } @Override - protected URI getContentUri(URI archiveUri) { + protected URI getContentUri(RDFContainer parentMetadata) { + URI archiveUri = parentMetadata.getDescribedUri(); String name = ModelUtil.getFileName(archiveUri); if (name.endsWith(".bz2")) { return createChildUri(archiveUri, name.substring(0,name.length() - 4)); @@ -39,7 +53,7 @@ } if (name.endsWith(".tbz2")) { return createChildUri(archiveUri, name.substring(0,name.length() - 4) + "tar"); } else { - return super.getContentUri(archiveUri); + return super.getContentUri(parentMetadata); } } } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerFactory.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -12,12 +12,27 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; +import org.semanticdesktop.aperture.subcrawler.base.AbstractCompressorSubCrawler; /** * Produces BZip2SubCrawler instances */ @SuppressWarnings("unchecked") public class BZip2SubCrawlerFactory implements SubCrawlerFactory { + + private boolean legacyMode; + + public BZip2SubCrawlerFactory() { + this.legacyMode = false; + } + + /** + * Constructor that sets the legacy mode. See {@link AbstractCompressorSubCrawler} for details. + * @param legacyMode + */ + public BZip2SubCrawlerFactory(boolean legacyMode) { + this.legacyMode = legacyMode; + } private static final Set MIME_TYPES; @@ -33,7 +48,7 @@ } public SubCrawler get() { - return new BZip2SubCrawler(); + return new BZip2SubCrawler(legacyMode); } public Set getSupportedMimeTypes() { Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawler.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawler.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -13,6 +13,7 @@ import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; +import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.util.ModelUtil; import org.semanticdesktop.aperture.subcrawler.base.AbstractCompressorSubCrawler; @@ -20,6 +21,18 @@ * A SubCrawler Implementation working with GZIP archives. */ public class CompressSubCrawler extends AbstractCompressorSubCrawler { + + public CompressSubCrawler() { + super(false); + } + + /** + * Constructor that sets the legacy mode. See {@link AbstractCompressorSubCrawler} for details. + * @param legacyMode + */ + public CompressSubCrawler(boolean legacyMode) { + super(legacyMode); + } @Override protected InputStream getUncompressedStream(InputStream stream) throws IOException { @@ -29,9 +42,14 @@ @Override - protected URI getContentUri(URI archiveUri) { + protected URI getContentUri(RDFContainer parentMetadata) { + URI archiveUri = parentMetadata.getDescribedUri(); String name = ModelUtil.getFileName(archiveUri); - return createChildUri(archiveUri, name.substring(0, name.length() -2)); + if (name.toLowerCase().endsWith(".z")) { + return createChildUri(archiveUri, name.substring(0, name.length() - 2)); + } else { + return super.getContentUri(parentMetadata); + } } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerFactory.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerFactory.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -12,6 +12,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; +import org.semanticdesktop.aperture.subcrawler.base.AbstractCompressorSubCrawler; /** * Produces CompressSubCrawlers @@ -19,6 +20,20 @@ @SuppressWarnings("unchecked") public class CompressSubCrawlerFactory implements SubCrawlerFactory { + private boolean legacyMode; + + public CompressSubCrawlerFactory() { + this.legacyMode = false; + } + + /** + * Constructor that sets the legacy mode. See {@link AbstractCompressorSubCrawler} for details. + * @param legacyMode + */ + public CompressSubCrawlerFactory(boolean legacyMode) { + this.legacyMode = legacyMode; + } + private static final Set MIME_TYPES; /** Prefix used for uris of entries inside gzip archives */ @@ -32,7 +47,7 @@ } public SubCrawler get() { - return new CompressSubCrawler(); + return new CompressSubCrawler(legacyMode); } public Set getSupportedMimeTypes() { Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawler.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawler.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -11,6 +11,7 @@ import java.util.zip.GZIPInputStream; import org.ontoware.rdf2go.model.node.URI; +import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.util.ModelUtil; import org.semanticdesktop.aperture.subcrawler.base.AbstractCompressorSubCrawler; @@ -18,6 +19,18 @@ * A SubCrawler Implementation working with GZIP archives. */ public class GZipSubCrawler extends AbstractCompressorSubCrawler { + + public GZipSubCrawler() { + super(false); + } + + /** + * Constructor that sets the legacy mode. See {@link AbstractCompressorSubCrawler} for details. + * @param legacyMode + */ + public GZipSubCrawler(boolean legacyMode) { + super(legacyMode); + } @Override protected InputStream getUncompressedStream(InputStream stream) throws IOException { @@ -25,14 +38,15 @@ } @Override - protected URI getContentUri(URI archiveUri) { + protected URI getContentUri(RDFContainer parentMetadata) { + URI archiveUri = parentMetadata.getDescribedUri(); String name = ModelUtil.getFileName(archiveUri); if (name.endsWith(".gz")) { return createChildUri(archiveUri, name.substring(0,name.length() - 3)); } else if (name.endsWith(".tgz")) { return createChildUri(archiveUri, name.substring(0,name.length() - 3) + "tar"); } else { - return super.getContentUri(archiveUri); + return super.getContentUri(parentMetadata); } } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerFactory.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -12,6 +12,7 @@ import org.semanticdesktop.aperture.subcrawler.SubCrawler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory; +import org.semanticdesktop.aperture.subcrawler.base.AbstractCompressorSubCrawler; /** * Produces GzipSubCrawlers @@ -19,6 +20,20 @@ @SuppressWarnings("unchecked") public class GZipSubCrawlerFactory implements SubCrawlerFactory { + private boolean legacyMode; + + public GZipSubCrawlerFactory() { + this.legacyMode = false; + } + + /** + * Constructor that sets the legacy mode. See {@link AbstractCompressorSubCrawler} for details. + * @param legacyMode + */ + public GZipSubCrawlerFactory(boolean legacyMode) { + this.legacyMode = legacyMode; + } + private static final Set MIME_TYPES; /** Prefix used for uris of entries inside gzip archives */ @@ -33,7 +48,7 @@ } public SubCrawler get() { - return new GZipSubCrawler(); + return new GZipSubCrawler(legacyMode); } public Set getSupportedMimeTypes() { Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -6,8 +6,16 @@ */ package org.semanticdesktop.aperture.subcrawler.bzip2; +import java.io.InputStream; + +import org.ontoware.rdf2go.RDF2Go; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.ArchiveSubCrawlerTestBase; +import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NFO; /** * A test case for the bzip2 subcrawler @@ -37,6 +45,30 @@ public void testBZip2TestIncremental() throws Exception { testCrawlerIncremental(new BZip2SubCrawlerFactory(), "TestBZip2SubCrawlerCombination.tmpDir", "bzip2-txt-bziptest.txt.bz2", ".bz2",1); } + + public void testLegacyGeneration() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "bzip2-txt-bziptest.txt.bz2", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + BZip2SubCrawler sc = new BZip2SubCrawler(true); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:dummyuri"); + parentMetadata.add(NFO.fileName,"bzip2-txt-bziptest.txt.bz2"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("bzip2:uri:dummyuri!/uri%3Adummyuri.content", contentObject); + } + + public void testNewGeneration() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "bzip2-txt-bziptest.txt.bz2", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + BZip2SubCrawler sc = new BZip2SubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:dummyuri"); + parentMetadata.add(NFO.fileName,"bzip2-txt-bziptest.txt.bz2"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("bzip2:uri:dummyuri!/bzip2-txt-bziptest.txt", contentObject); + } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -6,8 +6,17 @@ */ package org.semanticdesktop.aperture.subcrawler.compress; +import java.io.InputStream; + +import org.ontoware.rdf2go.RDF2Go; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.ArchiveSubCrawlerTestBase; +import org.semanticdesktop.aperture.subcrawler.bzip2.BZip2SubCrawler; +import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NFO; /** * A test case for the compress subcrawler @@ -39,6 +48,30 @@ public void testCompressTestIncremental() throws Exception { testCrawlerIncremental(new CompressSubCrawlerFactory(), "TestCompressSubCrawlerCombination.tmpDir", FILE_NAME, ".Z",1); } + + public void testLegacyGeneration() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + FILE_NAME, getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + CompressSubCrawler sc = new CompressSubCrawler(true); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:dummyuri"); + parentMetadata.add(NFO.fileName,"compress-txt-compresstest.txt.Z"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("compress:uri:dummyuri!/uri%3Adummyuri.content",contentObject); + } + + public void testNewGeneration() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + FILE_NAME, getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + CompressSubCrawler sc = new CompressSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:dummyuri"); + parentMetadata.add(NFO.fileName,"compress-txt-compresstest.txt.Z"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("compress:uri:dummyuri!/compress-txt-compresstest.txt",contentObject); + } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java 2010-08-27 10:56:42 UTC (rev 2417) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java 2010-08-27 12:51:31 UTC (rev 2418) @@ -6,8 +6,17 @@ */ package org.semanticdesktop.aperture.subcrawler.gzip; +import java.io.InputStream; + +import org.ontoware.rdf2go.RDF2Go; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.ArchiveSubCrawlerTestBase; +import org.semanticdesktop.aperture.subcrawler.compress.CompressSubCrawler; +import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NFO; /** * A test case for the gzip subcrawler @@ -37,6 +46,30 @@ public void testGZipTestIncremental() throws Exception { testCrawlerIncremental(new GZipSubCrawlerFactory(), "TestGZipSubCrawlerCombination.tmpDir", "gzip-txt-gziptest.txt.gz", ".gz",1); } + + public void testLegacyGeneration() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "gzip-txt-gziptest.txt.gz", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + GZipSubCrawler sc = new GZipSubCrawler(true); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:dummyuri"); + parentMetadata.add(NFO.fileName,"gzip-txt-gziptest.txt.gz"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("gzip:uri:dummyuri!/uri%3Adummyuri.content",contentObject); + } + + public void testNewGeneration() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "gzip-txt-gziptest.txt.gz", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + GZipSubCrawler sc = new GZipSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:dummyuri"); + parentMetadata.add(NFO.fileName,"gzip-txt-gziptest.txt.gz"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("gzip:uri:dummyuri!/gzip-txt-gziptest.txt",contentObject); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-09-07 12:27:22
|
Revision: 2420 http://aperture.svn.sourceforge.net/aperture/?rev=2420&view=rev Author: mylka Date: 2010-09-07 12:27:11 +0000 (Tue, 07 Sep 2010) Log Message: ----------- made the compressor subcrawlers more resillient in the presence of archive files with wrong extensions Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java 2010-09-06 18:09:46 UTC (rev 2419) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractCompressorSubCrawler.java 2010-09-07 12:27:11 UTC (rev 2420) @@ -19,6 +19,7 @@ import org.semanticdesktop.aperture.accessor.base.FileDataObjectBase; import org.semanticdesktop.aperture.datasource.DataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.util.ModelUtil; import org.semanticdesktop.aperture.subcrawler.SubCrawler; import org.semanticdesktop.aperture.subcrawler.SubCrawlerException; import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler; @@ -137,10 +138,13 @@ URI archiveUri = parentMetadata.getDescribedUri(); String string = archiveUri.toString(); String parentFileName = parentMetadata.getString(NFO.fileName); + if (parentFileName == null) { + parentFileName = ModelUtil.getFileName(archiveUri); + } if (legacyNameMode || parentFileName == null) { - int hashIndex = string.indexOf("/"); + int hashIndex = string.lastIndexOf("/"); if (hashIndex != -1) { - return createChildUri(archiveUri, string.substring(hashIndex) + ".content"); + return createChildUri(archiveUri, string.substring(hashIndex + 1) + ".content"); } else { return createChildUri(archiveUri, string + ".content"); } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java 2010-09-06 18:09:46 UTC (rev 2419) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/bzip2/BZip2SubCrawlerTest.java 2010-09-07 12:27:11 UTC (rev 2420) @@ -56,6 +56,20 @@ String contentObject = handler.getNewObjects().iterator().next(); assertEquals("bzip2:uri:dummyuri!/uri%3Adummyuri.content", contentObject); + handler.close(); + } + + public void testLegacyGenerationWrongExtension() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "bzip2-txt-bziptest.txt.bz2", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + BZip2SubCrawler sc = new BZip2SubCrawler(true); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "file:/C:/folder/bzipped.WRONG"); + parentMetadata.add(NFO.fileName,"bzip2-txt-bziptest.txt.bz2"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("bzip2:file:/C:/folder/bzipped.WRONG!/bzipped.WRONG.content", contentObject); + handler.close(); } public void testNewGeneration() throws Exception { @@ -68,7 +82,19 @@ String contentObject = handler.getNewObjects().iterator().next(); assertEquals("bzip2:uri:dummyuri!/bzip2-txt-bziptest.txt", contentObject); + handler.close(); } + + public void testNewGenerationNoFileNameMetadataWrongExtension() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "bzip2-txt-bziptest.txt.bz2", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + BZip2SubCrawler sc = new BZip2SubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "file:/C:/bzip2file.bzipped"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("bzip2:file:/C:/bzip2file.bzipped!/bzip2file", contentObject); + handler.close(); + } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java 2010-09-06 18:09:46 UTC (rev 2419) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/compress/CompressSubCrawlerTest.java 2010-09-07 12:27:11 UTC (rev 2420) @@ -61,6 +61,18 @@ assertEquals("compress:uri:dummyuri!/uri%3Adummyuri.content",contentObject); } + public void testLegacyGenerationWrongExtension() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + FILE_NAME, getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + CompressSubCrawler sc = new CompressSubCrawler(true); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "file:/C:/folder/compressed.WRONG"); + parentMetadata.add(NFO.fileName,"compress-txt-compresstest.txt.Z"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("compress:file:/C:/folder/compressed.WRONG!/compressed.WRONG.content",contentObject); + } + public void testNewGeneration() throws Exception { InputStream is = ResourceUtil.getInputStream(DOCS_PATH + FILE_NAME, getClass()); TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); @@ -72,6 +84,17 @@ String contentObject = handler.getNewObjects().iterator().next(); assertEquals("compress:uri:dummyuri!/compress-txt-compresstest.txt",contentObject); } + + public void testNewGenerationNoFilenameInMetadataWrongExtension() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + FILE_NAME, getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + CompressSubCrawler sc = new CompressSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "file:/C:/compressfile.WRONG"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("compress:file:/C:/compressfile.WRONG!/compressfile",contentObject); + } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java 2010-09-06 18:09:46 UTC (rev 2419) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/gzip/GZipSubCrawlerTest.java 2010-09-07 12:27:11 UTC (rev 2420) @@ -59,6 +59,18 @@ assertEquals("gzip:uri:dummyuri!/uri%3Adummyuri.content",contentObject); } + public void testLegacyGenerationWrongExtension() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "gzip-txt-gziptest.txt.gz", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + GZipSubCrawler sc = new GZipSubCrawler(true); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "file:/C:/folder/gzipped.WRONG"); + parentMetadata.add(NFO.fileName,"gzip-txt-gziptest.txt.gz"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("gzip:file:/C:/folder/gzipped.WRONG!/gzipped.WRONG.content",contentObject); + } + public void testNewGeneration() throws Exception { InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "gzip-txt-gziptest.txt.gz", getClass()); TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); @@ -70,6 +82,17 @@ String contentObject = handler.getNewObjects().iterator().next(); assertEquals("gzip:uri:dummyuri!/gzip-txt-gziptest.txt",contentObject); } + + public void testNewGenerationNoFilenameinMetadataWrongExtension() throws Exception { + InputStream is = ResourceUtil.getInputStream(DOCS_PATH + "gzip-txt-gziptest.txt.gz", getClass()); + TestBasicSubCrawlerHandler handler = new TestBasicSubCrawlerHandler(new DefaultSubCrawlerRegistry()); + GZipSubCrawler sc = new GZipSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "file:/C:/gzippedfile.WRONG"); + sc.subCrawl(parentMetadata.getDescribedUri(), is, handler, null, null, null, null, parentMetadata); + + String contentObject = handler.getNewObjects().iterator().next(); + assertEquals("gzip:file:/C:/gzippedfile.WRONG!/gzippedfile",contentObject); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-09-07 15:44:42
|
Revision: 2422 http://aperture.svn.sourceforge.net/aperture/?rev=2422&view=rev Author: mylka Date: 2010-09-07 15:44:35 +0000 (Tue, 07 Sep 2010) Log Message: ----------- added support for cpio archive detection in mimetypes.xml Modified Paths: -------------- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/mime/identifier/magic/mimetypes.xml aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java Modified: aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/mime/identifier/magic/mimetypes.xml =================================================================== --- aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/mime/identifier/magic/mimetypes.xml 2010-09-07 15:40:11 UTC (rev 2421) +++ aperture/trunk/core/src/main/resources/org/semanticdesktop/aperture/mime/identifier/magic/mimetypes.xml 2010-09-07 15:44:35 UTC (rev 2422) @@ -210,6 +210,12 @@ <magicNumber encoding="string">ISc(</magicNumber> </description> +<description> + <mimeType>application/x-cpio</mimeType> <!-- taken from http://www.w3schools.com/media/media_mimeref.asp --> + <magicNumber encoding="string">0707</magicNumber> <!-- http://www.mkssoftware.com/docs/man4/cpio.4.asp --> + <extensions>cpio</extensions> +</description> + <!-- ================== Web Types ================== --> <description> Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java 2010-09-07 15:40:11 UTC (rev 2421) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/mime/identifier/magic/ApertureDocumentsIdentificationTest.java 2010-09-07 15:44:35 UTC (rev 2422) @@ -38,6 +38,7 @@ t("corel-wordperfect-5.1-far-east.wp", "application/vnd.wordperfect","application/vnd.wordperfect"); t("corel-wordperfect-5.1.wp", "application/vnd.wordperfect","application/vnd.wordperfect"); t("corel-wordperfect-x3.wpd", "application/vnd.wordperfect","application/vnd.wordperfect"); + t("cpio-testfile.txt.cpio", "application/x-cpio", "application/x-cpio"); t("counting-input-stream-test-file.dat", "application/zip", "application/zip"); t("emlx-74719.emlx", "text/plain", "message/x-emlx"); t("faulty-fileaccessdata-is-ignored.xml","application/gzip", "application/gzip"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-09-08 21:38:48
|
Revision: 2427 http://aperture.svn.sourceforge.net/aperture/?rev=2427&view=rev Author: mylka Date: 2010-09-08 21:38:39 +0000 (Wed, 08 Sep 2010) Log Message: ----------- added an additional method to the subcrawler interface, this method allows the user to pass the parent metadata to the getDataObjectMethod. This helps with compressor subcrawlers in cases when the .gz archive is an email attachment, the uri ends with #1 or !/1 and the only real source of the file name is the parent metadata. It is necessary to find resources like gzip:mime:file:/C:/file.eml!/1!/file.txt if the attachment was a gzip file named file.txt.gz Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractArchiverSubCrawler.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java Added Paths: ----------- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubcrawlerIntegration.java aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-gzippeddiffattachment.eml Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-09-08 14:53:10 UTC (rev 2426) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/ical/IcalCrawler.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -257,6 +257,13 @@ // TODO Auto-generated method stub return null; } + + public DataObject getDataObject(URI parentUri, String path, + InputStream stream, DataSource dataSource, Charset charset, + String mimeType, RDFContainerFactory factory, RDFContainer parentMetadata) + throws SubCrawlerException, PathNotFoundException { + return getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory); + } public void stopSubCrawler() { // not implemented yet Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawler.java 2010-09-08 14:53:10 UTC (rev 2426) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawler.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -101,6 +101,29 @@ String mimeType, RDFContainerFactory factory) throws SubCrawlerException, PathNotFoundException; /** + * Get a DataObject from the specified stream with the given path. + * + * @param parentUri the URI of the parent object where the path will be looked for + * @param path the path of the requested resource + * @param stream the stream that contains the resource + * @param dataSource data source that will be returned by the {@link DataObject#getDataSource()} method of + * the returned data object. Some implementations may require that this reference is not null + * and that it contains some particular information + * @param charset the charset in which the input stream is encoded (optional). + * @param mimeType the MIME type of the passed stream (optional). + * @param factory An RDFContainerFactory that delivers the RDFContainer to which the metadata of the + * DataObject should be added. The provided RDFContainer can later be retrieved as the + * DataObject's metadata container. + * @param parentMetadata the metadata of the parent object, it may aid the process of traversing the stream + * in search of the object with the given path + * @return The DataObject extracted from the given stream with the given path + * @throws SubCrawlerException if any I/O error occurs + * @throws PathNotFoundException if the requested path is not found + */ + public DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, + String mimeType, RDFContainerFactory factory, RDFContainer parentMetadata) throws SubCrawlerException, PathNotFoundException; + + /** * Stops a running crawl as fast as possible. This method may return before the crawling has actually * stopped. */ Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-09-08 14:53:10 UTC (rev 2426) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -24,6 +24,7 @@ import org.semanticdesktop.aperture.accessor.RDFContainerFactory; import org.semanticdesktop.aperture.accessor.base.DataObjectBase; import org.semanticdesktop.aperture.datasource.DataSource; +import org.semanticdesktop.aperture.datasource.config.MovableDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.util.HttpClientUtil; @@ -76,6 +77,48 @@ public static DataObject getDataObject(URI uri, InputStream stream, DataSource dataSource, Charset charset, String mimeType, RDFContainerFactory containerFactory, SubCrawlerRegistry registry) throws SubCrawlerException, PathNotFoundException, IOException { + return getDataObject(uri, stream, dataSource, charset, mimeType, containerFactory, registry, null); + } + + /** + * <p> + * Tries to access a DataObject that is hidden in a stream. This method can get the desired object + * through multiple levels of nesting. E.g. for an uri: + * </p> + * <p> + * "zip:mime:file:/C:/Users/Chris/Desktop/docx%20problem/Useful%20documents1.eml!/#1!/Board+paper.docx" + * </p> + * <p> + * This method will assume that the given stream points at the root data object. i.e.: + * </p> + * <p> + * "file:/C:/Users/Chris/Desktop/docx%20problem/Useful%20documents1.eml" + * </p> + * <p> + * Then it will apply a MimeSubCrawler on that stream, to get the first attachment, and afterwards it + * will apply the ZipSubCrawler on that attachment to get the desired file. + * </p> + * + * + * @param uri the uri of the subcrawled object + * @param stream the stream pointing at the root data object of the uri + * @param dataSource the data source that will be returned from the {@link DataObject#getDataSource()} method + * of the returned object + * @param charset a charset (optional) + * @param mimeType the mime type of the stream (optional) + * @param containerFactory the factory of RDFContainers + * @param registry a SubCrawlerRegistry, from which all the necessary SubCrawlerFactories will be obtained + * @param parentMetadata the metadata of the parent object, the one from which the parent stream comes, in + * some cases it might aid the process + * @return a DataObject for the given URI + * @throws SubCrawlerException + * @throws PathNotFoundException + * @throws IOException + */ + @SuppressWarnings("unchecked") + public static DataObject getDataObject(URI uri, InputStream stream, DataSource dataSource, + Charset charset, String mimeType, RDFContainerFactory containerFactory, + SubCrawlerRegistry registry, RDFContainer parentMetadata) throws SubCrawlerException, PathNotFoundException, IOException { if (!isSubcrawledObjectUri(uri)) { throw new SubCrawlerException("not a proper subcrawled object uri: " + uri.toString()); } @@ -111,7 +154,7 @@ SubCrawler subCrawler = factory.get(); bad = true; DataObject newObject = subCrawler.getDataObject(new URIImpl(current[2]), - current[1], currentStream, dataSource, charset, mimeType, containerFactory); + current[1], currentStream, dataSource, charset, mimeType, containerFactory, parentMetadata); if (!stack.isEmpty()) { if (newObject instanceof FileDataObject) { @@ -122,6 +165,7 @@ } ((DataObjectBase)newObject).setWrappedDataObject(object); object = newObject; + parentMetadata = newObject.getMetadata(); bad = false; } } @@ -349,9 +393,22 @@ public static DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, String mimeType, RDFContainerFactory factory, String prefix, SubCrawler sc) throws SubCrawlerException, PathNotFoundException { - Model model = RDF2Go.getModelFactory().createModel(); - model.open(); - RDFContainer parentMetadata = new RDFContainerImpl(model,parentUri); + return getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory, prefix, sc, null); + } + + public static DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, + String mimeType, RDFContainerFactory factory, String prefix, SubCrawler sc, RDFContainer parentMetadata) throws SubCrawlerException, PathNotFoundException { + + if (dataSource != null && dataSource instanceof MovableDataSource) { + parentUri = new URIImpl(((MovableDataSource)dataSource).getMovableURI(parentUri.toString())); + } + + if (parentMetadata == null) { + Model model = RDF2Go.getModelFactory().createModel(); + model.open(); + parentMetadata = new RDFContainerImpl(model,parentUri); + } + URI childUri = createChildUri(parentUri, path.startsWith("/") ? path.substring(1) : path, prefix); GetDataObjectSubCrawlerHandler handler = new GetDataObjectSubCrawlerHandler(factory,childUri, sc); sc.subCrawl(parentUri, stream, handler, dataSource, null, charset, mimeType, parentMetadata); Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractArchiverSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractArchiverSubCrawler.java 2010-09-08 14:53:10 UTC (rev 2426) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractArchiverSubCrawler.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -153,7 +153,7 @@ @Override public DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, - String mimeType, RDFContainerFactory factory) throws SubCrawlerException, PathNotFoundException { + String mimeType, RDFContainerFactory factory, RDFContainer parentMetadata) throws SubCrawlerException, PathNotFoundException { if (stream == null) { throw new SubCrawlerException("The stream cannot be null"); } Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java 2010-09-08 14:53:10 UTC (rev 2426) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/base/AbstractSubCrawler.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -16,6 +16,7 @@ import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.RDFContainerFactory; import org.semanticdesktop.aperture.datasource.DataSource; +import org.semanticdesktop.aperture.datasource.config.MovableDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.PathNotFoundException; @@ -55,6 +56,11 @@ public DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, String mimeType, RDFContainerFactory factory) throws SubCrawlerException, PathNotFoundException { - return SubCrawlerUtil.getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory, getUriPrefix(), this); + return getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory,null); + } + + public DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, + String mimeType, RDFContainerFactory factory, RDFContainer parentMetadata) throws SubCrawlerException, PathNotFoundException { + return SubCrawlerUtil.getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory, getUriPrefix(), this, parentMetadata); } } Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-09-08 14:53:10 UTC (rev 2426) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -9,12 +9,16 @@ import java.io.InputStream; import java.util.Map; +import org.apache.tools.ant.types.resources.comparators.FileSystem; +import org.ontoware.rdf2go.RDF2Go; import org.ontoware.rdf2go.model.node.URI; import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.FileDataObject; import org.semanticdesktop.aperture.crawler.mail.MessageDataObject; +import org.semanticdesktop.aperture.datasource.filesystem.FileSystemDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; import org.semanticdesktop.aperture.test.ApertureTestBase; import org.semanticdesktop.aperture.util.ResourceUtil; @@ -98,6 +102,60 @@ } /** + * An .eml file has a .gz attachment which has one file. + * @throws Exception + */ + public void testMailWithGzipAttachment() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mail-gzippeddiffattachment.eml", + getClass()); + URI uri = new URIImpl( + "gzip:" + + "mime:" + + "file:///C:/somefolder/someeml.eml" + + "!/1" + + "!/baseline_symbols.txt"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, null, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof FileDataObject); + assertMimeType("text/plain", uri, ((FileDataObject)obj).getContent()); + obj.dispose(); + for (Map.Entry<String, RDFContainer> entry : fac.returnedContainers.entrySet()) { + assertFalse(entry.getValue().getModel().isOpen()); + } + } + + /** + * An .eml file has a .gz attachment which has one file. We ask for an absolute uri, but since + * this is a movable data source, we get a movable uri in return. + * @throws Exception + */ + public void testMailWithGzipAttachmentMovableDataSource() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mail-gzippeddiffattachment.eml", + getClass()); + URI uri = new URIImpl( + "gzip:" + + "mime:" + + "file:/C:/somefolder/someeml.eml" + + "!/1" + + "!/baseline_symbols.txt"); + FileSystemDataSource fsds = new FileSystemDataSource(); + fsds.setConfiguration(new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:ds")); + fsds.setRootFolder("C:/somefolder"); + fsds.setMovableIdentifier("movable"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, fsds, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof FileDataObject); + assertMimeType("text/plain", uri, ((FileDataObject)obj).getContent()); + assertEquals("gzip:mime:aperture://movable/someeml.eml!/1!/baseline_symbols.txt", obj.getID().toString()); + obj.dispose(); + fsds.dispose(); + } + + /** * An attachment inside an .eml file inside a zip. The attachment is a jpeg file. * @throws Exception */ Added: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubcrawlerIntegration.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubcrawlerIntegration.java (rev 0) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubcrawlerIntegration.java 2010-09-08 21:38:39 UTC (rev 2427) @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2010 Aduna and Deutsches Forschungszentrum fuer Kuenstliche Intelligenz DFKI GmbH. + * All rights reserved. + * + * Licensed under the Aperture BSD-style license. + */ +package org.semanticdesktop.aperture.subcrawler; + +import java.io.InputStream; + +import org.ontoware.rdf2go.RDF2Go; +import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry; +import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.subcrawler.gzip.GZipSubCrawler; +import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; +import org.semanticdesktop.aperture.test.ApertureTestBase; +import org.semanticdesktop.aperture.test.subcrawler.TestBasicSubCrawlerHandler; +import org.semanticdesktop.aperture.util.ResourceUtil; +import org.semanticdesktop.aperture.vocabulary.NFO; + +/** + * Some tests for multi-level subcrawling + * + * @author Antoni + * + */ +public class TestSubcrawlerIntegration extends ApertureTestBase { + + public void testMultipartTestEmlTarGzWithFilename() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + + "mail-multipart-test.eml.tar.gz", getClass()); + TestBasicSubCrawlerHandler hndlr = new TestBasicSubCrawlerHandler( + new DefaultExtractorRegistry(), new DefaultSubCrawlerRegistry()); + SubCrawler topLevelSubcrawler = new GZipSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:1"); + parentMetadata.add(NFO.fileName,"mail-multipart-test.eml.tar.gz"); + topLevelSubcrawler.subCrawl(parentMetadata.getDescribedUri(), stream, hndlr, null, null, null, null, parentMetadata); + + assertEquals(4,hndlr.getNewObjects().size()); + assertTrue(hndlr.getNewObjects().contains("gzip:uri:1!/mail-multipart-test.eml.tar")); + assertTrue(hndlr.getNewObjects().contains("tar:gzip:uri:1!/mail-multipart-test.eml.tar!/mail-multipart-test.eml")); + assertTrue(hndlr.getNewObjects().contains("mime:tar:gzip:uri:1!/mail-multipart-test.eml.tar!/mail-multipart-test.eml!/1")); + assertTrue(hndlr.getNewObjects().contains("mime:tar:gzip:uri:1!/mail-multipart-test.eml.tar!/mail-multipart-test.eml!/2")); + hndlr.close(); + } + + public void testMultipartTestEmlTarGzWithoutFilename() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + + "mail-multipart-test.eml.tar.gz", getClass()); + TestBasicSubCrawlerHandler hndlr = new TestBasicSubCrawlerHandler( + new DefaultExtractorRegistry(), new DefaultSubCrawlerRegistry()); + SubCrawler topLevelSubcrawler = new GZipSubCrawler(); + RDFContainer parentMetadata = new RDFContainerImpl(RDF2Go.getModelFactory().createModel().open(), "uri:1"); + topLevelSubcrawler.subCrawl(parentMetadata.getDescribedUri(), stream, hndlr, null, null, null, null, parentMetadata); + + assertEquals(4,hndlr.getNewObjects().size()); + assertTrue(hndlr.getNewObjects().contains("gzip:uri:1!/uri%3A1.content")); + assertTrue(hndlr.getNewObjects().contains("tar:gzip:uri:1!/uri%3A1.content!/mail-multipart-test.eml")); + assertTrue(hndlr.getNewObjects().contains("mime:tar:gzip:uri:1!/uri%3A1.content!/mail-multipart-test.eml!/1")); + assertTrue(hndlr.getNewObjects().contains("mime:tar:gzip:uri:1!/uri%3A1.content!/mail-multipart-test.eml!/2")); + hndlr.close(); + } +} Property changes on: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubcrawlerIntegration.java ___________________________________________________________________ Added: svn:mime-type + text/plain Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-gzippeddiffattachment.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-gzippeddiffattachment.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-gzippeddiffattachment.eml 2010-09-08 21:38:39 UTC (rev 2427) @@ -0,0 +1,449 @@ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary=sDCecy9vEe +Received: by 10.142.128.13 with SMTP id a13cs23035wfd; + Sun, 30 Mar 2008 07:10:12 -0700 (PDT) +Received: by 10.82.118.1 with SMTP id q1mr12476633buc.13.1206886210747; + Sun, 30 Mar 2008 07:10:10 -0700 (PDT) +Received: from sourceware.org (sourceware.org [209.132.176.174]) + by mx.google.com with SMTP id c25si8206519ika.9.2008.03.30.07.10.08; + Sun, 30 Mar 2008 07:10:10 -0700 (PDT) +Received: (qmail 20115 invoked by alias); 30 Mar 2008 14:10:03 -0000 +Received: (qmail 20082 invoked by uid 22791); 30 Mar 2008 14:10:01 -0000 +Received: from mail.cs.tu-berlin.de (HELO mail.cs.tu-berlin.de) (130.149.17.13) by sourceware.org (qpsmtpd/0.31) with ESMTP; Sun, 30 Mar 2008 14:09:26 +0000 +Received: from localhost (localhost [127.0.0.1]) by localhost-12225.cs.tu-berlin.de (Postfix) with ESMTP id 7107BF97B for <gcc...@gc...>; Sun, 30 Mar 2008 16:09:23 +0200 (MEST) +Received: from mailhost.cs.tu-berlin.de ([127.0.0.1]) by localhost (mail.cs.tu-berlin.de [127.0.0.1]) (amavisd-new, port 12224) with ESMTP id NXBh3Fm+MUbM 26160-03; Sun, 30 Mar 2008 16:09:11 +0200 (MEST) 13633 +Received: from bolero.cs.tu-berlin.de (bolero.cs.tu-berlin.de [130.149.19.1]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mailhost.cs.tu-berlin.de (Postfix) with ESMTP; Sun, 30 Mar 2008 16:09:11 +0200 (MEST) +Received: (from doko@localhost) by bolero.cs.tu-berlin.de (8.13.8+Sun/8.13.3/Submit) id m2UE9BmE014212; Sun, 30 Mar 2008 16:09:11 +0200 (MEST) +From: Matthias Klose <do...@cs...> +Message-ID: <184...@ga...rgle.HOWL> +Date: Sun, 30 Mar 2008 16:09:10 +0200 +To: Benjamin Kosnik <bk...@re...> +Cc: "John David Anglin" <da...@hi...>, ja...@re..., gcc...@gc..., lib...@gc... +Subject: Re: [PATCH] Updated baseline_symbols.txt +In-Reply-To: <200...@wa...> +Sender: gcc...@gc... +Return-Path: <gcc-patches-return-214824-triage.boks=gma...@gc...> +Received-SPF: neutral (google.com: 209.132.176.174 is neither permitted nor denied by domain of gcc-patches-return-214824-triage.boks=gma...@gc...) client-ip=209.132.176.174; +Authentication-Results: mx.google.com; spf=neutral (google.com: 209.132.176.174 is neither permitted nor denied by domain of gcc-patches-return-214824-triage.boks=gma...@gc...) smtp.mail=gcc-patches-return-214824-triage.boks=gma...@gc... +X-Spam-Check-By: sourceware.org +References: <47D...@cs...> <200...@hi...> <200...@wa...> +X-Mailer: VM 7.17 under 21.4 (patch 21) "Educational Television" XEmacs Lucid +X-IsSubscribed: yes +Mailing-List: contact gcc...@gc...; run by ezmlm +Precedence: bulk +List-Id: <gcc-patches.gcc.gnu.org> +List-Unsubscribe: <mailto:gcc-patches-unsubscribe-triage.boks=gma...@gc...> +List-Archive: <http://gcc.gnu.org/ml/gcc-patches/> +List-Post: <mailto:gcc...@gc...> +List-Help: <mailto:gcc...@gc...> +Delivered-To: mailing list gcc...@gc... +X-MIMETrack: Itemize by Notes Client on Forensics Matter/Forensics Matter(Release + 7.0.2|September 26, 2006) at 05/14/2008 11:36:31 AM +X-Notes-Item: 1; name=$SMTPNotFromNotes +X-Notes-Item: 1; name=$NoteHasNativeMIME +X-Notes-Item: 15; name=$Hops; type=300 +X-Notes-Item: Wed, 14 May 2008 12:36:31 +1100; name=DeliveredDate; type=400 +X-Notes-Item: GmailId11900061db81aef6; name=MailPop3UIDL +X-Notes-Item: CN=Forensics Matter/O=Forensics Matter; name=$UpdatedBy; type=501; flags=44 + +--sDCecy9vEe +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit +Content-Description: message body text + +Benjamin Kosnik writes: +> +> > > > Has hppa-linux also changed the long double size, or why those +> > > > has been not found by configure before and now they are? You +> > > > could ressurrect the math exports as *l@GLIBCXX_* instead of +> > > > *l@@GLIBCXX_* by tweaking the #ifdef above _GLIBCXX_MATHL_WRAPPER +> > > > definition in compatibility.cc - change that #ifdef +> > > > _GLIBCXX_LONG_DOUBLE_COMPAT to +> > > > #if defined (_GLIBCXX_LONG_DOUBLE_COMPAT) || (defined (__hppa__) +> > > > && defined (__linux__)) or whatever Benjamin prefers (of course +> > > > the primary thing is to find out what changed). No idea why +> > > > __signbitl is popping up, it shouldn't be exported. +> > +> > The size of long double has not changed on hppa-linux. It is the same +> > size as double. I believe what has changed is glibc. Version 2.6 +> > introduced long double math routines and maybe some other new +> > functions. +> +> I can't find the hppa baseline symbol diffs. Can somebody re-send? + +Attached. + + +--sDCecy9vEe +Content-Type: application/octet-stream; name=baseline_symbols.txt.gz +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; + filename="baseline_symbols.txt.gz" + +H4sICN1dykcCA2Jhc2VsaW5lX3N5bWJvbHMudHh0AMxdbZOjOJL+fj9moqBsl+s+9e40c1HRs72O +8u7Fxn0hMMguUYBYhC17f/1Jwi/YBXpJYdMRNdE1pcwnU8pUKiUS8cc/f/7+3+H//fSewjDe76MV +3nneLAxRsc3D+lCiEBdr8v0p2H379vu//vWXv76F3m/P//WHGZsHY/NN2F7CMKqq6GCrZQefB+Qz +1TPOIkoBet7yeUA+Uz3LVUQRQM9bPg/IZ6TnK+cjuKhRZatpJ6cH5jTR1ue/r7dFXGNSWKrbw+rB +WU0VphjksT2sHpzVSGEvDHc5UOMeXs+B10jnZ2mfJMpRUUeZrda93J4Tt5Hm7ZlAwhzlK+t5qMPw +BsBQ9WVTbMOCVLk3Df/EtOa/JygUkWjyQchnsFg+hd++/c+fb3/l/OHzbxNDZsqiMnjnzEsPwj/b +FlL8DsD7UqEdqigCMc/rKiroGlWy5/3Kew1GglbbDQ+Iy2jNx5qPf1STSgJ5fvi3cIPqMN/WaH+j +y2+vNkgzjhTVdRR/8LBQbDIULH4uwxMtRf/eoiJu1A9WjnIS1JbjoPXrWWljbS3RG1V7rdyFcSXd +yULXSM+XkYuyzEGl+bUJtllUwdF80UE+GaIMJ1GNBkB84YDnmd0DMmlAyqjiQ4EyDoPqmkumz3ys +b0fZezJhpJzxi8Q272vDyuMhNyp3O5LjOIySJFj8L8bderZYRO5FSCZMR2ID1/ANQWZHkHWFUJjx +MBOkIKBXacY15p5lAjDnO4h9/BEVGxRGRWI+DnNaJ5iE9FDEIZfGvWD9Fi9rz+NgfDmpIlzTtzgI +JqLRaFCMEQWFwjcAkNPtOiNsWMzZnnJ7FsEixgPDllsB+2NY3BeK0CdZr4M9b/TDN0L5lEKfCa74 +/z/L//97iYqcr32Diy0JDZb1ZM3/fRNxJV/RWsSgOrir7DnhS780/KAj+cr3ZvHnOsLZwLg89Tzq +O6ij/u4Fi3n49vfwj7c/g0GB/TsBN7nygHgeHI/d4jHXmNeJ6BbzuiBdY14X5jnmMTww7DHmDYv7 +gJjXK/YBMa9L9iXmpUPCtmLesLgDxLwu3EFiXiewfydgl5jXiWcW83yeYXKPWRFx2oiqHBfCSXm+ +mGSo6jkpuHDPmvT07c/VUxDwrbTYr9C6Iodb2RMts9g14QLXmG9Q/oPs+WcyL46zCOfhiifLnzwx +S2EgFFU7dARJrTA8l1Hwvo7C4o/dYhc4gtwoMdPyn7Yq9UeFoiTEiX0/nK3hDWMN37tY49SfT3QI +FgY9ej1v1Y4DEqQ2TGL3Hmfoyw67xfZD83RCNCckjKNaHJz8WNav56bFYpd2Tk4tpicxt2Uc8T0o +B30KFz927z/5v940PP5dDPs2q4PBJCx2MChfQiU82AksLLUUB+LbVfiJi+Ss//I5XE7DphfcAY4M +Tt3wm8FfczFhuV1lOA5pFQeYS5NSl35ohnvzKGcQo95iTi9nvmdgOewmeF2PcMQBDKbnv5fBDgrU +oZo4AvzaicBM3e4nOEIM1/fcYKRw97OVW/89antDFTSzxgPNmh7BX7w97AkKbf/3pP9PwuUsbPSB ++H+3Qsop0DMsRtOi5wnRI0a+R/KIQ9+t0f3GXv18aJj5enV4PAuDqiJVuCZVHtUcWOZIOaI02qBA +gD+H3wVlmNONSDe6DycNUBdRFeVI/D7hEsqKd4OPIMrOC0XsDu0/naF5YhFXuBQBpxEARH8+IzJS +CV2hak7PQDzjwcXGAerldOgd7cMMFZv6Q7Gf0KGJXAiJP/U9bfhh8ERKpHK6pxYmOMI14qgIY5KX +UYWCd4XllquuzdUyEjssqVCCaSrWHHF2cT08UyiKvSK+jBKZCDtkLTDSb4OgDADDB9cPB8CBdelZ +Yqxx5TYyHTBD4MDH5gYI2CsxveWDJTE3emeUEkMEnPgDxZ+nGJGmtyHHeCL0YNkrNbt4TkGcDN+N +NBAU3PxfsWDde2m5keNI9UANhQUfqw4wUA/96Hb/b8b3jIoENLcm4Tsq5bNnvq/JUPSJBsChnACK +E5PyECxgNp0kUR3BxK6bZBtktAsznBvseg07TO8K7DX065GfGed0hTa4gLHGIt+EsaK8rA8w1srF +NVrcDuxg5zjyg1RvzkVL0KDNOnN5Q94K7iIzvm8WPgIabLEXAQcQwYyh0+nltEm4Te8t2aWfuACI +XIg5A6RuCAP0opkwED3mp/QwSPt3tTqADOe4hjnhPI7KKMbAYDUX+2hwbBZFXxg++fC+P3XBs01M +tkXdD4xnFBV1dYh3q34iYkJEbze7ip1CB20f6O3GNU6/WdAaE3PXpcbEKh2+bCctic2pNSrfUCvV +MNwsUustIbXb+NHOTZlK9W56KwbNOH5lUOnTuVUCMNhxaLrQwaFQSbkZouotD7Xa2FCr7Qs9blJU +g6PZitDTnkHR/QuJjkY96g2NSlClGUr1ikJ1OT3VZe5Ul5/Tcx6t6EaLRkukHrEjkUKWNjmm2hSY +ahNdqk9nqT5ppfrUlLYS0FhPJAZPT5WmRmCSLDWhMxUrbduPaJbhUZM8jhpka9QgJ6MmmRdV5Ve1 +97SKEvnUDCcT9hHV/S+XSGrM/Yr/oCifVslqu+6Xy4lzUqBDKTKkt/hYhFWgzfHBiy1nSSiM04u3 +VRXSQ74imS1rQsJNRbaleFplybquophniBue6lqyPicoxnmUNU86rZlJCB5kwQwe52cuNqrxTry9 +sylsmblYDGauP8iWRkXC12BVcO1knohyF7CHCG64pWfiqb6DsWeNscGjPmvsjV34HcZ+DphbHjiK +eOAo4sGjiAePIh48inguUcRziSKeSxTxXKKI5xJFPJco4jlFEc8piniOUcRzjCKeYxTxHKOIB4gi +DJyLMHAuwuC5CIPnIgyeizCXXIS55CLMJRdhLrkIc8lFmEsuwpxyEeaUizDHXIQ55iLMMRdhjrkI +g+UiDJyLMHAuwuC5CIPnIgyeizCXXIS55CLMJRdhLrkIc8lFmEsuwpxyEeaUizDHXIQ55iLMMRdh +jrkIs8hFiM0hyoV4VmqehdWivrTGOTqmR03VWpSHZX6aAIqjq25ueR1Hw0yDhTW7+AOQXV6cEh1o +GK1WFdphrkhiByGLoWUH4GrId7S4nesPB0XEsW+5rcXRP2db/PDr3Jj35TgMdhLnJ8Pbsb2eO2vD +xzodjdlx3zqaJfuthS3Y+xzNHKLf0Sww+h3NHOTsaEwUVNg4GrtyNHOJV45mznbtaCq+jGxw3NTn +dx1QXxH7oSiix827x2LuYBqSEikXAf8cX3kkNgixz+LAHO1jJN+sMDgzf76o1HsPiYGeNzCdr/Zb +wDRd7r6PRrsoGYD06HL1cN0WyFCdzvsmbPtkMb7qPmmANOpgBYq2NsYMxaA05gxE3IEqrrIIkmYz +enLsgJu/mqCYGdcSyVQhmMuaoMC6BXTaIwwZxFpkMGuRYaxFBrEWGcxaxNFa0wbmvPh133e2KVUl +GKYo5RAoUyTueRkAZjOMNsN0Sr646gwz26A6I7EVTuddT9Zj04lSDoECsHc3zGYYbYbplL29uxd/ +C3vjYtPjN8tIOM8zNe3YEan/hSIN0ryN1B+VzdSyAtMH6C64QTuqeN9Cp9vrJZEbYODs0IDagYfO +Ds1YOzLo2JGBx44MOnZkoLF7ritv8hHRj7d3ZQ1/EGfBctZ/6+0tFJUMviGDVvLEFKiRS82okSBG +KlrjoZmGRjCNep4BsVbiswmIwXCYDsU0FkWE4tz2SRyhk4wwcTN5rDpF7OYxp9+WpbWMhseAfs7J +GU6QvOyWu+rCiseA+FU86oj4LpgdJcQLO65YS86aXlNxoQSmQb34wRTvSN1wnW3IVAd03Tzm9Ecb +WsloeEzovVPvC1JbdF+cTsak2KGqFrfQsDyin0GtZ5xycXycGzkLc4baoC9f/NGKx8BVrvyRmfnj +FRfrJ5+JV3My5ONEnNVjRU35iXRSRDnSk6F/i7Lyfpu+xCRB8a5+i+Pre06lK20L+oHXzY3ti5jH +zPfbuGmGJdwMFbxZ/eSvH0A8pu69y8YMQjy5jDImnmMURDgvBEX6Y9GMB/eyCR+QiRiZGf9lBhgZ +oRTZ1sMBCnc7DtIFM9XjsAHNz1zNz9zNzwYxPzMyPzO2FlOan9mbnzmbP8s4p1z+ZUw/v66iWgiu +2EQxxRVb82PELF4Ma77SQqq8SVTMlPXFOndhNFZXZmsW9C/Qrr1wBS1lvdp3iN2ajRmzXZmNWfSN +fTEbU72bfs14YzZTdU9mM6V/gXatZTZTWa/mHRIfA9ygWu6GX/H5/Op8w1nnwWUgrzxDe/6XuObR +p35LA7F9aX7el/UcEyrvRns/35PO/xMB4f0f91cm+5WUyX8lZepfSZn9r6TMYUxlZi1l1hmJ6sBE +kf5XJKGKyG8omYhe7EYTvRpNcjKaZDSa5PVoktPRJGejSc5Hk1yPJnk/muTD0JLF1mm8CGojfTWm +8GRM4WhM4esxhadjCs/GFJ6PKbweU/h+TOEG8ZX1Ce8qn7j3rtNRmexXUib/lZSpfyVl9r+SMocx +lbnfrtNOkUF3nXcSvRpNcjKaZDSa5PVoktPRJGejSc5Hk1yPJnk/muTBs6Khd533k74aU3gypnA0 +pvD1mMLTMYVnYwrPxxRejyl8P6ZwTXwtt8ctLzF+OiMeLcs7AkTKLu8cjdsqLOLla/iOBxfbfHSV +inq2q+1lW3as2xa4y81HkrsfSe7hUXInZ7dqNl/SsZYzUWrzcg9/ml76KQW+JZ09vUdXv4hGjxL9 +LN6sv5Wz+LF7iJzVQ6QkD5GCHiIle4iU/CFS9g+RMvhyJ6+jiJIgxteLHA9PwwclefXPY2Zon6jV +owQljxKEHiUoe5Sg/FGC9o8SZDBtWZ+g7tPcr1kqu5rAzDBLtRRrkqUyk3XdTW4+ktz9SHIPj5Lb +laWK4tp7+ZNZlsriO3TVLEu9h+iulZ0ZrYEDyFk9REryECnoIVKyh0jJHyJl/xApgy93pyyVXWep +IjINH5R6FvF7zNA+UatHCUoeJQg9SlD2KEH5owTtHyWof9rOxY16cURr/WV6Z7iXdYSzbYV0F4jN +c0RptEEig/bm16/1Cs2VHyW64vafWtzriuQNv+Id2Ta3fKSHMTYWN5GXNwny98vbrnBO1SvybeZp +nBGKAmxEfDpKt+mXeAMJ3DXxPrO5gqzX4orLE4xwXXyBtX3BVRGolzAXL2E2XsJuvMS1xw7+w8z8 +h4e98x3Ehtelf+Gpqy3SvMB+xSNeHFxHGbViMr5V/ZrL9HrrKy6be72/MEIkzi2HfW455K92w80A +vsAAvsAgvsBAvsBAvsCgvsCgvsAsfYFZ+gIz9AV5aynghbwneTE0qRJU3QPea8KrvH7a5KH2QnUn +tLMWgn98LQ6Ir8bjaSFUYAh9JtFhPC3EnRL39Tz51WV5ibecOqP1tF0IzWf1SRGeb6RKbe6gydH/ +Rjf+tD0kLeNgcav/o0flmAD+Aq4i79g/jcsOR6ePFRjqo0yPgSrNR4/d89Hj9vwOMdvy/QXbRdoS +/k6LNFCLgY0N1GJcg99pkbbUArBI20q4V+R1eFtp4EXaVpO7LdKWiqgWafboUbnnIm2nivMizQZX +aT567J6PHreHXqQHq79sErPlX8MHCozjwYX1l6+ZyhukTqCZQWajOZTAO/Su/zGrRt4uyqKqig5v +aTCh+D+qE6FX8dwqyjIS6x9cne40J7TT+t7T+YtSVC1RhSI+TeXAjcjagbvGCM49EY/zXNhx5sK+ +IcRh4LS3qOv4dVf0avhn+vugNQgvVSLDNrwP8W6xg3Mrj5BbvF1T3X7qdKGYT51ObuOp08ltPHU6 +P7ViPnW62Y2nTvfnYoynDuirSTp+i6nT+RUV/dXVGgS7qdOFYD51urjVU0f5ncIrSvkpW9DjjUu2 +/ia/pG16C4WiYGAobbxxtWm9x736laWj4YVfv2U5Qu+tFDAaANsTxfvOCydtvHG1sZgXolzEDzX1 +IQ9Q5A4eYjdF7jsQw84W+E77SnKsLCobWNYdOtXagz6iX73i7tC11/PrDVex7QG9vJLsuUmGnV9c +DS47lrSZTs2hxKLhBfX5z/272Cv5Dr008Nz7d9jAic2VOH+/XL+ZOpPqv3P+Kr7vw5VcE547hKJu +Mqpj8d2AZbhY7FITPk/ybUtR2s0ZfwqgeL+PVnjneS/89yyiNDzTB4uFkTqTMMS0KTxDVVia9WEq +mdai4Ivvhvq55Ff7fvcUHxU6USgBfC2ArwT4rsb/ruJmWv2ZWn+m1Z+p9Wdq/ZlKf0V9ME9D/iJO +W8MPvumveBcWYpJ4oTOS74T0tzBD0WfIGxNgr5ailpxPzG1cB6mDJssQ5WV9CKv+ykolwrPUpDzI +anbKB+Xnaxhuiq2YtWLCFuJBXHaJbc33QHj0mocPkCZipJQ2G0SaUH46CFITvAFI4pk4t1UWxSiM +tvsgTVMGgJm2YGi0Rhyn/4s1KiQ/4v5nz/aMCpDnT8J3VMqdaYK495NDAHT9FhCmpSjvdwaq0FrY +2K1bHCSJ6ggO4jylGxjxvjdFtYxTyMFU0yMO5RTRKkNgJH921kh8yyuMiuQCeu2Bv00HgwWp+iLf +WY9WjUuBIobEkW8iZTxfbIBSIMyrCD48g6vFLId6eQWdsJRFZfDe+w0oFe90hTa4gEidxpmoOoBw +8pWDBwPdKtYsYneGB61aRwkpwFtmUVkKM/eWwxgyO4jm7gnxlCt2l66noJ5TijcFdNjOzCmcGzxs +LfbUQT5o2JrdrOFcuL8EBwEpzPQt7tSBHWr8K/7URQOQcSpwdJ9VSJbZpA6sAI1f5Au8PMHr+Lqj +YcpxCwFRQaaHC6D+Ip2DDLngzckOOfa9DWGtwnG/Yr54/lxOwsuG8xUwR+xlgiIBSAxoBw2SlD5C +DjCK2Qvi+TgoJQdIkj+PkJS6OF2aujkt9OigBeBq/BS+iLUwgKPI18AdaDESR/qn9I/dCDcMqrcQ +EBXybd3sDSFdKLf0I1xF8SckfxLnsT+gh5cNcwrnduL0HTghA33hhaudOgz1DsL1ZhbAguAfIV/N +wMqJNV5izFwwGogpGMJ38WXfyZf9wInTd+AE+bLv6su+gy+DniP97j/Gl/0BfNl39uXvoPkOe0IX +LWEHJ5wP6LqcEyAP7yFLfPknrHecD9g7ztknLxFLJuebNuXB58qDzqoZBcaut80fAL/fi5LvT4o2 +hV4Kz8RXZY6LXRC8L/Ht1+5a7+XcMKws6RNLemRJv7akTy3pM0v63JK+tqTfW9If9PSywHMR4z6H +ObXHSoJ3x3lgiaJRRt3cOzcmJUKfimauUaIaqgk9FHE//5Ry+E2wrCfrktA3UfyTr2S9aFgHGqb9 +ucR0Kd7JxlUvfY2ybKNQYlsox2DG91akur39rb09a5FoILBOyE4vZNcHQVFRVwexT1ji1TcNla+k +euEjkuECqWzbpul1r5dyW8vNYS/FXPgQJblS1gDrFlasW3iAdQsr1i2sWLewYt3CqnVLlJH8wY24 +DAMNyaVAcPmkJ9a8drd8VkO4DiPH6P1KYNO6UjUmqkakalyrGrGqMVU1ZqrGXNVIVY21qnGvauy5 +U3dJZNFw3zwl03W2pR+97khkWC7tYvmJyTSWExnLS4USrOL7MfGNgD6DkXaYJDoi34BIMX/PJP3T +mIgaD73WreLfxQ+ZoxJFCtMmX9kQJzbEyIY4syHObYj3NsQHLfEASw1RLDVkgKWGKJYaolhqiMpV +VT6aNUsD6V9qTiRGS82ZGL7USIgfSoVdB5ljrBRtiaINKdrWijasaEsVbZmiLVe0UUVbrWjbK9r6 +lhf6tRJbvHMjKtMNOXwjDpPKanpbP22Aa1JUSb/U+cbaauh4SbmnT8LBIJehRPQtEIUWngU9F9FP +3VGgHPcSd5Yhx32+TlXFxlRZUkw7C4dVNu8sEDZj0BUCU/NyX2pT1EutSnepZYEudSnDpeBiW/ql +pFYYQeHfX0tnBUOqJL8tkVXauVI72rHgtS+4Uk1VK9XUrlLDElIZWIbAUAy1rtaUXopCYwMSLUz/ +S5rXNHqFUoU+p4JME5JUR6NW+UKTapEUKhvWOUpzDoOiVyZVDWCLJtUSKYewTZTqsRR6a+oRqa7q +8ELQK+NS/hd/6flVtLwl7IdrqgGVEpU1f/RSlmegU5uwB860dukUW37yncI5D+qty7LHVbgfBEuR +okHg0sHAVLPDGky8ruIN19HmZzC4VG/RNDWxuzrLvK4qo4ZkqQmesgfqOjLaLvWKb4CuJuotYT+c +tvKLtuq7elWXhVi6LVRDlOqpjCioprm/Q2cCvZzUoE+7/rY3I7eWhRqTUCtJhEhJ65vQNqSeltQ3 +sZxvZDk/MKKgmmaF5XxDy/kGlvMVlvMHtZxvYTnf2HKKozyqOMqjstomVrSqTMSbe1kV5TFUFsHE +ilaVUN7cx1qL2yHEFwfFgSPfyC6P1z4fPxX8/uPqA/IKFHEpq7j3AasqTK7pPEM6X0WHz5drPPN/ +zCibQGpOiU1JjTENIX1jPX1zPX1TPX1TPdUWb9F5hnRKize3vwiH9J/aZ5Bh+f+9XVuT4jqS/kcn +ygLqMk+zO1Oz0duzZzqqOvrVYWwBZozF2KYE59evbcA2YEl5kc9Dd1SgzE+pVGZK1iUVVXV4yd9j +m5223JfXyNpMNGLe7m2kVRpl9adH2BO8/2jyl5zfuQvPdVi74wG66ekxiC0Soy54DQeChXEUb+SF +AAmGIhce5Bc+5RcocrtdPpIHOHIBJw+ms7PAg50FLjsLEP0U4Ows8GBngcvOsPILFDnGzgKcnQUI +O9PTxTPtIZ5pVzzTiHigcfFMe4hn2hXPCPILFDnYzjQunmlcPNPTxTPtIZ5pVzzTiHigcfFMe4hn +2hXPCPILFDnGzgKcnTnjmQLP53vK51Uh5R/SeNjghvg8/a5/m7XHpf61l/lOJRLICpLnPHOmVQFU +jr2LFHDKrWBT7iBs38C5vuErZree35WNWD0UE+/yIwD1ry+DX6/D/zsGBE4ruBILHxILOK3VZu5p +AwQt2Hb0BLajubajLbaj3zEgcFrBlVj4kFjAaeG2oxG2ox22c3vOTMj/vH98j++3Q4YL9zYeSCX6 +wqARldzwmCrJ1DqNQ1kUqrAvv9/RCjitvYuGhAGU0No5RZSvJbBJQ1oBp7U3aUhoa5IIw/MpxlWa +NcFVrBIEdfNSy8OtksDKoeoBv1mPe5gBpPBardd+HukXcdacr4IzPB/Ltaxy6xLfCM/+UOX2pcZ7 +ppfmgLparcBH1B8hXstTGZ6V+hp++1f4j2//fIfPrsxoKQPkrIrwvO5aez5cIW/lRuldlJ8w/Xse +4/bVprnr02w6ymNYwbkFhxvlXNaYIeI2NfTy1Lx0eWmVbdR7oBc4emv8eCAOMMTwZmpkMzWymRrT +TI1ppmN4FonaRWkOGwJuiQWC2N66G0pr0y4HJmHS3hALBLFd2htKq7TqUIVqFbZjnFPaG2KBILZL +e0Nplbb7jlweVo7v9hta95f7LXkpqybXvnXQGjI0B66aKmxbXrcMxtEKMVY8AO5VabjWRURtzua2 +T2BaZvAPDLXyDvvm7PlTCFTfq/qSxSpT+h3I8LZvzrm0T+NBOQ55cqkDaDNNNP3Hj69t/f+X5YrM +CNf3KEUQxxjiDYI4Smux4eQxjnyDIoeKLUg6FxidC4zOBUbnAqdzgdO5wOlcQHVuj803lODYbB3P +m8c4w+7pQmv1d6QBnNQhwHUmabpq1t3MqAlWsrDoEgLWXNw431kI28DFlO25vxrjBa85X1jJYpfm +jYT1aNxcL2ZBCtGeis7rsF6FlWpyesgijzLriArBHTa9fSGmBr32EU/il+tQ1+jAK7T5O52F6vhy +B0A4P+YhGGnt7+3Vner5vMzIQYNMvCAwgEUHGIx7HQKA89LGEvnvx8nezSSNVYWH+SS0Gh+zTEBd +kEkhAAY0VYTgQJZTIDiQiSgA528Bk585QrrGbSc/U37MCD/6sDh1hB8Fo4/wo3CMEX4UjzfCj0Ga +R3jN0qSfEX4U2c8IP/pMOnuEH0XFjfCjD6gjR/hRDPIIP/ok+3WE55lJN8KzYS4jPA/H8wg/WoX/ +Ed5YzQQj/Fhd/Qi/5cAMRngeDmGEH8UhjPBjOJgRfpSfOUJiRvhRfqb8oBH+vOTg+7vLggoOqRaM +l/SyM8hCOa8eem0cyORs/MK/SEwrAFmxjZ+pEowVe55bWFCxVjw6VKCt2BDlfDcOY8WGwOldJKYV +YKyYHItt/AD5Uwv/SOpmEdpy++JRl1OAJlOAyilAV1OAbqcAzaYA3U0BWk0BepwC9OQJ9Jwx2/0x +BMHQbJDRnNr6M7BsDPKhPYjNh2AFW3sucBjEOV84zxCACxg2CErecQww/LAgBNOevxyGYc9xDsJw +50FHwngQZeujQV9+GvTFEWWQlFiEy796gBJ8qD7DO89jhzisINZlk2ehDDLO89plShXMHksgnxqp +41NjGsGYE3bIB0fK/OBIJ/zguOTIF6ElNTEGaJAuee4HcJBSue3tzwUfdxJTsub5ByMsuQAJF0By +AVZcgJQLsOUCZFyAHReg5AJUXIAjF+DkAlC26X2z9ao5CI73HWAYhDcgMMDoebVyzav3zAZfX25w +TyQUePrnA0n4QoKMuBAYwYMZvpLB0/XjSxoihKxvADGX3hET74jSO2LmHXHnHfHoHfHkB5H1GaGY +nxFqqs8IxfyMUMzPCMX8jFDMzwgbf4b5jAAA4T4jIICUzwgX7ne2ziYxRss7L1D+hMkvmfwrJn/K +5N8y+TMm/47JXzL5Kyb/kclv+XQoDnmTsQJ2cfSOWmCo7TH5ljSAk9oi7fwSpDwf14HAOk86QEAA +Rx0gMNizDkBMplBiAqG4xmC1URAAVy0oe/Z0cAcCi7Zn4tEdCAzXntFTYRCAmEAorjGg7Bk9EQYB +QJrg/+MHiMwUTUwmGldroI5X3I5XzI5X0wzMysfArPwMzGqCgVlxB2Y1wcCsuAOz4g7MijswK+7A +rKYZmJWPgVn5GZjVBAOz4g7MaoKBWXEHZsUdmBV3YFb0gTmuNRR/VV1GqPh2q8aV8snJL3j8dvW5 +mAMOM0Ztmqk2zVSb5qhNc9SmkWrLsoYOnDlthENgOVzKuCcPcOSYBmt0gzW6wRrXYI1rsCOX2vx6 +5Q62zHRPLlDk9nbe0doauQj/mZZVmNem3a6dzzdK/bue8odwjstTr6Etgc490/MhbyuCC/ZSyLpV +1lnAPctrPRLkZXMF+YdDOsgT6cHT/rD0cFMUV5mP+6KgGuelrNZNarFZsyHDx9qfsbhA9mPOIJDF +ennY7S3r+zAU+F1tGN7ei1TNXW2+htw5XYA4+0MtTewBxZEaBgZ0cNwUBqE8l01HxXwYd/4dIFAu +j5UHeSpVlvFDWuQnPNLRk/mAkgKBkF6a6OkhdLz8WfH+5U+N9S9lexWCrZ7XNA+jryY7ABcIkJAI +BlR3vOdADcpyBERqxnQ/UQCUUQGIdLlQwA7coOwMIKTz5FmEbBSuHMKLHIIrh3W6D0Ng6+Lv7Fac +H1bFaHP8cuZE83BrZZPE5vGbfe08XGPn4Sas/RmLC4QcTEdXcdHzcGbOJBje3otU6Hm4EcWeeQmI +087DtQcURwInGBB6Hj5+iBo9D6dmyQICoefh4zCUeTg1cRcUyFPPU+bhU2Xoglf0p8V6yjx89GIA +ZR5OTRsGA+LNw8m5yIBI/Tyca+G0efg4UjcPZwZu2jzcsDHpYcaGm4cb9iJ9yCG4cuDm4eg9TRgC +uxWIeXiarw2z+c+oy79+2CdNeJTrvfXZEARqm/TxStc+NkgKva562ndOrNsucCzc+p4LrVVAM3D+ +iLdbH3heB1BQbX5HUVeVuBUkFxpyzccJZxokfnsmAuIivAvuslc5Rb/U0JPAiukkFpNIDBw6nDAB +BmYk+tcwmhm3zah+47axnmvcXrbHUOuR7N0DKO7D0IXWB3CNCeBmvCkCuL22SQK4sUrcp4cLDfmx +4IQjBnAzICmAG+HOAbx3hyk6iBfJzbDiTxBdTAKLC+lmGGtI38myjNayhB8cG2MRaBZ740boka2w +f8bc02t8qzW+1RrZao1steME2SI/7M4PgcP7eoRFoFnsrR6hD5D0qFZrfKs1vtUa2WqNbLWrr9vr +nWvZa6n+8S3tPtzDtJJFVKlidCfNrSEmvJgU3q55JnYwITaqR7UJe2xNBt+jSHgxKTyuR5HYwYTY +7h7dH26tRfn1UTq8mBQe0KN07GBCbFSPahM2w0fp8GJSeFyPIrGDCbGtPdp9SMGO3T/QCxy9XYn3 +xDatPIdhN2eIo3jTTJSaDF/t36BtszGEuv1bHINAMlg1MMqA1YK1wx8ZNFttGqs2jVWbxqpNY9Xm +mG8+p/lXlKVJGBXrw07mlctRHhkEksHe4gdqW3NfwrCJIQhTH+cQWA5rG8Y50O2w9tsIh0a3XKNb +rtEt1+iWOyz2Zadyebr9kvzn8sk9CpsZBZHRrgsjV0DiwuskoOokoOokIOkkIOkkwOtEU+1EU+1E +k+xEk+xEk+xEU+1EU+1Ek+xEk+xEu+3kNQwHfJfg26oSN30w41jDsoVNkNisirWwBTQ2gmoDT6oN +aKoNaKoNaKoNaKqlWK32ZLWaZrWaZrWaZrWaZrWaZrXak9VqmtVqmtVqmtVqmtVCYu1w88uccAZ6 +CgsFhz7MgkWfCllMKreYSm672aGQAm9IWOP0cdQEhUvfscdWMxWy+HMaIKZCRtstbQMfh2Sz27f+ +CQ0vURWHhw6raPjJoMW0kovJJLeaKBIq8AeFNlIf0RUHTA+v6HomgxZ/UhPEZNB4+6XFWCQUwH6V +5yCrpg2yarogq6YNsmq6IKv8BVnlL8gqf0FWTRVk1Z8UZNV0QVb9SUFWTRdklb8gq/wFWeUjyEJP +XO331odpcTgYYcYOIlCEMeKMC7OIq9O+2RMMZnEWlY2+q2iZWZLedRzNyn6wCMPzYlXzvXGIqx/f +q+UWxAojFPQ6BLAOmzn3RAGESDiJ9HtwfsoqrdIoS/+QYfs7gG9U3VsIG4BI0LAFABugXg1Rr33z +tjmeoIqdMbvkk+W1MCPvJc/kU1jHajz7eMZJEKsh9SSId5iD0iX5uszSWL6G3/JEHmVzlGnbrD+/ +fkVZVBTR6dv2/XNuvLX7wC5w7GdrC4Jm7VvVf5Vxu/zdyJw6mEQ9eLbNv/hQVMm1Kk7vLr5F+G23 +z4Ln82tyVZRll2X3H99//wwXqyiW1Z1N//YCgjmztjAiTW7w4DIVcp/VLB1Y3YEdIgzlbYDSaeUC +hIMSwSNUKsszGEzPjr3fe9qPBhlMDSQUCBEESgQBFMEW224JhZuwtYx6WvfZnkCOw2Yv2nZD7JZv +0fGdf4bW91LzxZnKZc/58ThgwLBeG6x62lLZwZrs8iKEQb7VkIms+dSJL6BtvLolDKCE7n59Xmdq +GWVt+HOQvlzmaU7Ms/cBiJpI1Rrgu5sYIOCFypZT85YSEO//5tb12dMBRPDmClBzBbi5Atxct8UA +rA9gd+fEHMYmvlwS0I8+VQCfMNpgCEzCT92CULctOJiZAgqTADBpP92iKd2i/XSLpnSLpnSLpnSL +xnVLm77f9G28hTGCyAQVX4DwHQrtyAIYGURpmqo0DVOapipNw5SmYUrTMKXZv3Zf8sOumcjh73Bu +fUMK75BWNRIhA/+QkO5BX8jc+oYU3iFB3ePvviUV0tU9+0OFv1259Q0pvEM6u4cAGfiHhHQP+qrk +1jek8A4J6h5/NyGpkJbueV1GSRhHpfVDuScKIES26lJVtuuawSz8v3BdKB1qVSTle7p0syyahEih +TqtNPdonqXoH8Ly0R3HbhcEsa3L6lO/NF6P8kvndUt7NQssAoJDrOk7JokN4//GPrx7j4zNMzQuV +HY54qgVJ0nKvSjmQ5Qsgwvxbnla2T+Y7SgGl/DsYE9Sl8MzaHcvzsdaEigHgL5dtFghpk7bpUEjH +DcVHeoGjt3rMA3GAIYaoG2QRIGMANQTUAKvg12w25s1G88ruHTOMTnAqEbBKrLob0gVAOpAGNUeD +GqhBzdGgBmpQAzWogRq0f2m9Xi8915oW4nbn9lr0Dl9pHsLhPnfvOQ032aH8MDpBllAwJRQwOqsp +DOkCIB3IFLRfU9BkU9BmU9DvUH4YnSBLKJgSChgdzBQ00BQcUeGaA8jjAgwZU/jHtOqSihlMgAnq +I3+rMGRM4R8T1kf+FmLImM4+8rsUQ8YU/jHdfeR3NYaMCeojf+sxZEzhHxPWR/6WZMiYtj4anLi6 +HqQB0m5BdAKBKWCYVh0N6YDtTo/met/CMD4eg7e3V/ABwZvnkGz85kOCUAjDQUEou+mwIJTfcWDw +FqZZsmtXYGxuM6AKQFTCStXetFGl4dlqeYzlvkpV3uQOvyQqr/81+7VUzObkWymrFsMb6OJ6xb87 +oORe9nJgzqpU1p2miOzzVZpl5ic4Xdzt4tqP8acYA/OVDwfsIs5kVPjSOnyF0QVUJG2qft/NfYnV +/rTaVfb3cRwgr76N9fJV6bWlfwvITn75hPQrDj3mOEKflZWuBGCQHH9TlBckRzG5QXIUlBkkxzAv +QbIKZv11ovNFIpjxjL5O2kZOTeU2R06OUPTIyXv11AVkiZyc5qIj5+irgb4t2BQ5OS0FRk7DA3T+ +xaEHImDkxD5j52K1CdxFRqtsPVUAorLV2KY38rx4SAcVE4BaVUkGDaYAhXWUvxVEOqiYABTYUf4W +Eemg7o7yu4xIBxUTgAI6yu9KIh0U1lH+1hLpoGICUGBH+VtOpIPaOqoGOeO0qRwAdM+rQso/pPmA +VU/aTJHiFJyc4IYRIEkz5aHBgxRi7eCeCiSqtQuaS89hmq+UtcaeKgBRmWqs9fV0yOVxL+NKJkaa +WT1fDmuz2qV5PUf+8Y+vr3cT6TwMY5V/yaJmUOHXt+T968f3+ONn+HE/1f74Dtw1H0GVk6Cu/KA2 +yuqVatXWoqHNpQ43UZ5ksrASP4fXr9jm+nEt9Wgo/nj85v0Z/nwK3z+fwx/fP2d3t9D6m+GjNYzF +EGYN1aY5zXk9BGq0uZemc/an/utlfH0kHf3Q6eQxHJscxx9rLAe/b2m73m1ram2E6erUHHQ97NN8 +3ZydejhjOFiTbzOmfixrOaUMExkXsnnaoFb+8Pduqd9U7xgEFSHN2UL0EFiEgaabmJeaI9nrIY+j +w3pThd1Xm5H27YrbnFq8nLg03TUdkmdqXRtL+zALhLyI8rV0kYunTh/LLIr/Xdvuwazr+92cUZxC +tZfbMrkyKfzDtPfX4lzET9QuSnOA/FftyHxdbeD06lCFanXWkoU+uG9Xkda9TGhY8BAXQqlWlNjz +sTTGQVMthAjkqKX3DKfB9+RFbVvNdrWrk0TXSV/D54gsHLMrx90LRhaWeeeGdy+5WHj6Pi/kMsqi +vMmuoYq6rrr3jUZhtIm3QYxqhsWwHq175KUBsDYyC6YuXcN3ej+4WqAc4zQUai7raQh6WmEFw84g +LGClT8lKX5ItVtmh3PgS7YLmSbaXtayyNJfm/H9mI3mprXf57XMefi7Cn6Z1Y79VmNIGDWoxJoLz +1xAPVZgb8raJynOin2/Nx1ef9P7y/sX70rX3YsPQRIxg8GpS02U8AI0H6JKpUTkJdRqTMNCRtAek +6/V5Oi9FF7Tr0PR6cOvAtHrwy5j0enCrcOh6hld/GMwEyxjek2AwU2omHg9nVDSxVVIP6jIqmtgu +yVtwnJom7iXyJginpgn6qVuwNX3wvR1KaZ2LfHz/GQIqssEEHmC0H2k0XZr7+REbQ5Mw+lkSnZlW +s3muxAHTfsC6GROHnagX4ryJUxUy/FGrIsQ/TlXIAEio6mYOxeKn2crNTIrFT6yfOp9i1TW9tZJn +Vay6prdX+tyKV9n0PUafYfEqm6TPsvIbbOkv/hm+fy7CH98jb0ixN6SNJyRfbfPVMku7sCu27aGA +T1mlqlxl0br0ijxrkPeFjNMyVblX6Hn4IctpxF40UmufiK8NomW3lw7aHN7/9jkzrv+OIz8sYauH +ld/vkNXlGj2BCv50XkN+boNPPUHd7TN5rCu0YUM3DAjYckK55YRyryaUezWh3KQzNz7hYn9gXiWD +B2ACMjQAE6DBARiP7QrAeERAACaCAgIwFhlnqQ+7eMRYvs/MI0XN36I17K1j1pDG7VkoUIvx+cqF +cUtjVBq2WSAgd7NAMA5pCteMMb2bC0e+gDaegD58SfRhkQh7GOWHObQT0D68ogEHChoyaKCgQcMG +ChK2daAgIboGCjqoa6AoygnPsxTuiXr6OLkDze0K9zydAS2nk1pOJ/VqOqlX00mNPW5nD5ZYtA+v +aPBgSUCGBksCNDhY4rFdwRKPCAiWRFBAsJzqzNzPTf4a/v6ZGK8qdQSBnaCTYmV+rvjdVQsMBCsJ +6j47CsQpyby7384RBYTilOW1O3/OeemaCEeSDv+OMRHOJR3gEuEIqRH16ynM64BtdbsBjRsnBeCk +ABwFwFEAnCp4SgEqM9CD8BUSX+HwWcEMiUOQBxdHkDgIeVJP8qSe5FGe5FFcea4hm21AICCKRGQV +gYAwEilfEvnqNeWr15SvXlO+ek2xe83TNIKMSJWRMJkgI0JkfOsjny9FIiHJUvJUiYRESKn861L5 +16Xyr0vlT5dvqHkVYLabRD/MBc2O6luuzpdDx9MxJJmJPwPx59HW9DuEW29Nv7u5w/gYnfMGNGdj +u8u7f/1rTfZf//0tDH6bDSmv+RRs5ee78UYKuU7z5sn3eGMiaR/I6lNbGMkORSHzwRX7tmYTdSJ3 +Ub7OjOUyT+xSNTlo3ApqHkDvBdpXxZDwt+Ce9Pwyd2lDu5CEK4va14eoSMJoqQoXSfyfQ1pIO1Eh +M9muIY0T7Q+FDL/SojpEmYmmkK3NmYqthV8yDuO4UoWVoBYxP+ytJA6IpG5mJYWbZOYmsVI45Mil +Fo7ymaN8rDg55dGuDrwmd10fj+FeFqXKoyytTuHX0xhVma7zZVplt7HjX//9v+9/+/mXp78MGEZ+ +q+197FfR/zoKepOMxPDz0/jv49BD6YY/G+pcjP/8PP7zy/jPr+M/v11/Dp7E6zmB0qwqmuQiiayi +NKvHrHBfNKd/s3r28f6QmeWBG80pmuHx2+/BU5u0PVqmX0GTL0c2J+S7VE7vN7ZgY30JwzZJPpE3 +zqKyJPLum5VuCu9bzavSvHlFEc/d5AxZNefjryMOnr1Mye1usmd87Rj8s1b6JGpSW0QZBWGoPRXu +5G4J1mPNus4P7WMBs/bNzHCVZvKcbXvkHPZo0HFjjR6zhmK9nrHatz29CDcGiJdw5FlGK0Oznjic +jNkp+5VNF6WCUt7dZcNQayf1IPeRi3SQ98hBKtqba+GyGTelW+RbcqfMYpjByEU7zF7koh1mLnLR +dp1XG6GDdnaTzMdN3C7ZWvwFhzDmIEAEy3IWDoEiw01KIwcxZNkUCUEQGbIMiISgSHG5q9l5392V +TSS7RrO3tzPBvn/P4PT++W3qKgf1YriSZ3Co86IOFsi47uICul5KBKvogcOpo0X3hCO4jnsOdx3X +C4NdHagrb1h03B03EPr+cCu78ir7AB13i82FfpeIzUH+fJ+EzUH/0t+/75TT3uYn8QUUPk2sT0Pq +Q20/ELCoYQG3mk8BY0qmfEqmfEjW5UewkxkzIQDZNJbtmt0ASOiUn5axAAjqM64SsxAAQX1G0dd+ +9d1KNkgAAKV0dSf18jsU1aea+qf/nHS2J7HAzPjpLvnKPRjWp4OQr9GDYb32PmSB4nOQ5AZEph1k +ENf8HKzhjJPNLmQuC+7oHN/nHZ1DJx1d648A+brVvnG6+dM5/2lcz/UcFJlaOyjUwaDU+bylmMVp +biVYaIscHYlZkI7ELMnrWVZtFuWiOedA+0BosrzFhdC9zNdTXlasjP3bU7oWBHvKsxdDMBUQc5hA +B0rp0pJ7THykNGE+txsvS/MEsLkysBdP4Wcod/vqFBZyX0+3VVFLaoUsaYw/v7W5PkZW3a+lS2tp +bC1NrKXSWrqylm6spam1dGstzaylO2tpaS2trKVf1lJtLT1aS0+2UmvnW/ve2vXWnrd2vLXfrd1u +7XVrp1v73Nrl1h63dri1v63dbe1tS2d/gnY/elLn7kdL+hMS8ntKBaWkr3AbIeCT7XsIxZdCMaTw +sVRiB0MuSNyDKZ+SKbZkv37vMmIt2hxbVmLohuM4tXZQO6btVzLSB+UIM9y6Xq6OLroNEKOUPa17 +J7Knde9E9rTuncie1r0T+drSAq3g9QoM2bbsifs9CqPaemLA3t5bF3ghxjhOrV3UoL2kwYSyhv+8 +PquWvVvIx5LzztO8wjEFaCZNqUlDa5rXE3pZ1K6VpbvGmaL3ZqwLl6rZkkhIvM3hl7Us0Lyiycpc +fzY3T3Sf0Nyzhvs/h1RW4e/R72j2eS14uZdxGmXpH4SGL5rqE5mrYhdmqizxAFWan/La48KlXCnj ++RUzwEsjQXMgsW5BvqYoYVEH1D1a8tdac/IYxRWW8a2xFRkvFm8Uzp1KDpmicDY6wvbwkuEWS4Zb +LFluseS5xZLpFkuuWyy5brHku8WS5BZLqlssyW6xJLvFkuwWMcMtYoZbxCy3iHluETPdIua6Rcx1 +i5jvFjHJLWKqW8Rkt4jJbhGT3SJhuEXCcIuE5RYJzy0SplskXLdIuG6R8N0iIblFQnWLhOwWCdkt +ErJbSIZbSIZbSJZbSJ5bSKZbSK5bSK5bSL5bSJJbSKpbSLJbSLJbSLJbrBhusWK4xYrlFiueW6yY +brHiusWK6xYrvlusSG6xorrFiuwWK7JbrMhusWG4xYbhFhuWW2x4brFhusWG6xYbrlts+G6xIbnF +huoWG7JbbMhusSG7Rcpwi5ThFinLLVKeW6RMt0i5bpFy3SLlu0VKcouU6hYp2S1SslukZLfYMtxi +y3CLLcsttjy32DLdYst1iy3XLbZ8t9iS3GJLdYst2S22ZLfYkt0iY7hFxnCLjOUWGc8tMqZbZFy3 +yLhukfHdIiO5RUZ1i4zsFhnZLTKyW+wYbrFjuMWO5RY7nlvsmG6x47rFjusWO75b7EhusaO6xY7s +FjuyW+zIblEy3KJkuEXJcouS5xYl0y1KrluUXLco+W5RktyipLpFSXaLkuwWJdktKoZbVAy3qFhu +UfHcomK6RcV1i4rrFhXfLSqSW1RUt6jIblGR3aIiu4VmuIVmuIVmuYXmuYVmuoXmuoXmuoXmu4Um +uYWmuoUmu4Umu4Umu8WR4RZHhlscWW5x5LnFkekWR65bHLluceS7xZHkFkeqWxzJbnEku8WR7BYn +hlucGG5xYrnFiecWJ6ZbnLhuceK6xYnvFieSW5yobnEiu8WJ7BYnsFsYczEB7moY8zHReDWjXuj9 +kiYh662u2stE4FBg44eEAxM/PCSYEBBhwQSBCQ0mDFR4MIJgQoQJBBkmDDCAUGHghIULAzMwZFi4 +AWHDwu0KHU3OJHXJCz9CIp76q46ANK0mcu0iByWqO5P/giflvTAAsnX0lM5r3gNS1zXvntR9+XNA +67z8OaB1Xv7saQF3NAfE7muXPTHkamRPPUVCRQu6hyxHA3SEnYITKnb09nRGPZnPrGAjqD415sip +1NM5DDO4RgqgSsX1GjM0VIjb1DrDy7g4hgDFoLE1aEgNwMy6ZgZTwJ51ARuYmdbC4a4DmJnWwmGq +Y35OW5ZaS5W1FPRCIQxB0RB++U5pNwbrIaXdPayvyDUC6yN0vfa5wBC5QB/ZNJzt52diLYU9ZGiB ++IUICkYG7WIABwUzh6sOVya3R0IXojPv2QilCbNLFoHLjOziCyh8mlifBtTXfHCcJ/hRbd5FFJ8z +oBg16OIz6fOagKLPoLeIsjrGY8j3mwhOHudVkcHJk3SdVnDydf0JuoGT17NM0yrEGPm+SHOEMG2n +w8nLfRRLOPlhv0fI/nx0aPLn52i2tXPR0lwUm4sSc5E0F63MRRtzUWou2pqLMnPRzlxUmosqc9GX +uUibi47motFsarNLutjx0W7Wv3cDm4MbGQIUg8bWoCE1kEfsDgIZZZ182sU3q/kuCa3uaoRMZQA4 +GoVjzxXckTkH8BFKlyper09K+JhWj6B6mFWfUX9RXpQbZQW+KDfGi3ibbZQd9TjaBcGZDnl2/iok +6Wc+wgrVj4EX8nKciRfy4t5s3mexRqfDG2OGx6vnEanBb/3NngdrEPjHiMzsGs7+i6bxMV5ku3+x +3/ibdbkVocslFg7t4HDmcx6hNGG+PFoNJo4Y2IFvPM5eR9gRbzzO3gaZD0lvsBkR4H73gIDNtGpE +oMuQshHw86UWYDwd86VsaSmLLWWJpUxaylaWso2lLLWUbS1lmaVsZykrLWWVpezLUqYtZUdL2clc +ZvpeOBemtsLxucH8acT9UbOQebd1Sc62bIYAm/4jhGJDKH5DFLEhPxmxzIhAkIH/7pcdC5eYucNy +vCcyv+ylpdZSZS11JyQfUCooJXVseEBQdASmm/7iu+kvvo/94vvYr4meY7Sge/jY7dE9JHR3gFG8 +85efhO4OMKJkXhczRlB99G+3v9+/nXpGQ7iIGQPuI/Px70HIN8EYL+KTZJQd8Ukynw/27fAf0mZ2 +jWJnLiICcHDy4HZxH9lgtV2fMeK23o0Dk2dkSYayuDB/fnQp0tvIFiBkUOu2jkEHEjty+1pyR+bz +VcpHUA+nE+bnz+Xf685c54emc4NZWSWpsq5AULHQcvmZPFuxkPYCejWsfathFx3rWP2HZAPZH31w +A+V7Vdp4S5zMpROwauJEY6TteesGt7bKXXOOHc4lOi51qBxs/eN1wWvNlshVdMiqcB9VtepyMPPr +tUoIx3AzUaQJiiVAsmh8LRpWy90DJzh67aYfe3mjvwiA5g2K5ipG7b+nTKKZRWPc8rivlZRXBO7m +uDWVez6sO3gi8A9qx/MviihJj1iu5/YQRYllez2zYaVcMkxjyTGNJcs0lizTWDJNY8k0jSXJNJY0 +01hSTSNmmEbMMY2YZRoxyzRipmnETNOISaYR00wjpppGwjCNhGMaCcs0EpZpJEzTSJimkZBMI6GZ +RkI1DckwDckxDckyDckyDck0Dck0DUkyDUkzDUk1jRXDNFYc01ixTGPFMo0V0zRWTNNYkUxjRTON +FdU0NgzT2HBMY8MyjQ3LNDZM09gwTWNDMo0NzTQ2VNNIGaaRckwjZZlGyjKNlGkaKdM0UpJppDTT +SKmmsWWYxpZjGluWaWxZprFlmsaWaRpbkmlsaaaxpZpGxjCNjGMaGcs0MpZpZEzTyJimkZFMI6OZ +RkY1jR3DNHYc09ixTGPHMo0d0zR2TNPYkUxjRzONHdU0SoZplBzTKFmmUbJMo2SaRsk0jZJkGiXN +NEqqaVQM06g4plGxTKNimUbFNI2KaRoVyTQqmmlUVNPQDNPQHNPQLNPQLNPQTNPQTNPQJNPQNNPQ +VNM4MkzjyDGNI8s0jizTODJN48g0jSPJNI400zhSTePEMI0TxzROLNM4sUzjxDSNE9M0TiTTONFM +4wQ0DWMWS6B5mPjBJmJMgwk2EzMC1FSMWTAR5mLGgJuMKfGk22wMnBDTMeWsBJlPdygyeKqiZSad +R8l6DtdJoguh+8jRc6biKJOzKMsgdPO87goQYXPuCUJ4lhRCeU0pBKK9dAqEtktLACOuFVBFxclK +bDz17OoO47lnAOM1MROY1G0d7dFZ/M0FMCzu6gIIFn96GAyLOz/sgu0yIwZPq0xF1SqVGZijjNM6 +IqYrh4X3HEGUbA8lphLhOH7aEc6i/R5K6fDennIp10DK+FAAKWvPAlLKHKik2UYegZQqrqCUByDl +PJMrIOlilR4lsFGLIl1voLhVcciBan1eRskyraDEae4Ksz2xVCs4cvnvdK9LGHGbthQM/bJWCt7E +l3KjtOtseE99yNOqDjsw6tf2ponjFPyAvBGl+QNG3lyPk4hY8rZUKrPkgnugP2smzYGafGvzncVO ++QepipzheZCsyEk7yMsBpwXgEm8OwnH9DsDUzMhwXL9DMDl9KwLYr4bJKVwRwB51/PPzx/fRxCDX +wqWtMLYVJrZCaStc2Qo3tsLUVri1FWa2wp2tsLQVVrbCL1uhthUebYWjiUIWXQ4KDxfEHWC4G2gP +YMqnZJwL4gvRpXKA5Vs0MgQoBo2tQUNqwKXcdfGR6tPE+mztmz/c0Xw939EsT3lMuPQJBITf/Fw8 +X1/zGdw1C+Mo3rQrS829wObnP+pWl3gEDUPokm/T1guAoB4SHdyCerpp/Ajq4abx4noL12v6iBFU +H1q9RfWl1kdUD3p9voZwv7n7x2A9aPYe1pNqx2C96faXIV/Y8306IXQWHiMCOMHI80M2IUWH4F+1 +t2PhphMdls8g/Ajqzah/OTNqPV9SsngbfIGAiJbMx9NhaCd51/K6QgctNcWlEQFu6h0CPS2QBYMl +BykPigUI6WxdnhxcknAnn8tyuo29cbKXLs/LBDnGLOg+osIdut8XyizoHga+ZgL4P7+Q2S6MTAGa +SVNq0tCaoFkvDBwawAHZvnwkBiF73sJ8BPa0wncL7HGF7xHY0wpfBwxaVx+hhvQfaG19hBqE7Xt9 +fQTZt3V4X2MfQfZtH/7X2cegfWva/1r7GLRHXf/8dps4cRGGK1XEMgkPuU7bLe1+5fY3YWN9a1ll +us77J0vt7P2BnlUUy8om5e3jQw7CPt+UnVB0aROdpLP+1TMnLX3ia4YAz3uvEN0+p5XK8YZnR9a6 +uRvN/rzHmWx0b+dctDQXxeaixFwkzUUrc9HGXJSai7bmosxctDMXleaiylz0ZS7S5qKjuehkLPpp +yFt9KVS2QnqG6XsA+GLM/wPFE2sZASoDAA== +--sDCecy9vEe +Content-Type: application/octet-stream; name=hppa-symbols.diff.gz +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; + filename="hppa-symbols.diff.gz" + +H4sICCae70cCA2hwcGEtc3ltYm9scy5kaWZmAL1cbY/bNhL+fPkV/niFdxNTsi07wAG5pmrh21xi +rHNFcV8IWaJtbiRRlei3+/U3pLxavZAy5ZcWKZKVZp4hZ4YzQ2q4szggh489n8Uruv7gLemHhGX8 +wyZJvMeQxtvD4zreflh6GYGfCM6O0ZKF2Xt+4O/+cf1/7x4fHy+R/be/p2RHM8riHrJtx5r89K7f +71+GtGfpDxqvgTk5/vTu06feo20/oGmvD3+NUO/Tp3e9X//z9fNH/N+vaIAxAOGYpREa4S804/Dv +gGCBPN7GG8Z+uLtPn377Mvv58x9/YPv90JDZgfmQNCOXcU946sXZiqTufDHAC4SrIP0CBOUgAVlu +12iKF96KYMpJ6nGWSiRk4X/jNeE42nJyqI3m/bQT1BigPM49f4MzUHBI3PnXBX6lzcifWxL7+QTc +5bWCAlIW1Bh3zxxuWgzbeLxd4fPB1m3dPuWK/HNm6nWAst+054Wh3gHPI02qdtiGXnoFnCXmCMvC +C2ngcXILSAcQs72XuM+wTHTKH+YoiZeCOkgIOIRzkJ3ZoPC6qtHAiDMDzoZMwVwMfprz+ocD2Bb8 +j0XUx14QuPPfKdVMuMTjYJwwFgoLMt/ARSxTlPEJZZUSgkOIOu7LZUhTac0VBRczQphgTA7+xovX +BHtx0EEXk4wHlEGQj30M8sAbVjN/wRECtBRDqKQ8m/muOxQvm4rpXwMpKNp85ALM0XYVsv2NQceH +DKwau3Of3ho32QrcpxsDOxkhP9hq5R7gpYVnLIPVRX4ENIWfbfnzt4TEEaTE28uFYsJd8OEK/p6J +KBMtMy5CEnfvK3zCoCiQ1r+tMqfJ0vN/rDwa3hp4C+Vkegd//Yzc+QTPvuFfZ1/c2yJb90L+ZXBj +JfyCFICmEXBfB9xfHQGVkFdGQBXm1RFQBVpEwD29Ne4pAt4Y+K+IgFq5f0UEVAl/i4AvN8UtRcAb +A98iAqqAbxMBlcjWvZCvioBKQMMIaEH9CY6zZBm4KEkjGgtnhWIyCIncQAD7P3+eYfTeVrGP8+p1 +9mU5cF3YeYttTcZTdqxLH57nFtsrGlNOYR/zP9IEEAcOI/th3OtPRg9O5bhBBzmWpXRG0h1sa6C+ +/uG+vHQaF7pqVqg5q/mvu/nOraP0O6LUhjE+P4zX3QnfpMQLMA0umEquTT/0aHTS5tx/uRCl3SbC +1BMkTI3QsGbrp+rudYzdNGUpXrE08ji4MNhrDvvKiMC/LWG7JKUxFxb0U5pwykTGaexqzeHtAnLP +0kDU75cijQokcC7Y/Tax+sZYzuse1jvgkMRrvmlb/+fgxPkEEY+0RwhPBqdNSBwinDuLMAEaApDv +xdhnUeKlxH1u2k94jDWQ0cGynbrLLLKJUEzWXDsVIrHzpkuypnEbFT3UMmHJTBCJB0tYX/yYEBoM +9xuPt5w5SXIKxoc/xItGaQDhvEU0UEcsJsdkG/t85p+CbkzWJ8t1ZoUiRcMqT3cHcgna9rihT0g4 +IVtTP/cS1Tyr1BYWlqR5zoIK3aEZZlAGtTMVmgG1jBOfbWOuPQoUDLZQPTn4RC5zE+3bb6PS7mtN +hlrDUVaHXXDyaatPOZReIu2VZ0d7Mn1A44bBpjk0y2ONXsDC00s5D6iYOQDC9G0guCGcenxlb7B5 +ioYbL9vMIFws9UiuH7qLMdZXX3WsTHJYphxnZQ+NkXLJmSE5EdSkldhYPyNshpOPEJlQn5VpG6GY +6KRVH2UnHPkicIP/o0HAMGewORGfivyWJK9j6sCwTRK1FFnxopFY1KMxagZhJ95GGPbQUpFTVsTK +In0qA5E7BqnAJTT8vOATCklApNl9pB/yjSUd/jJJR32AmIhs4XsZN0gUBaAjdsLblJxNeZOIZJm3 +JiKkyu89PothowUVOMNi8KKIyczYZRX7yr5KWZQDzFXeMpUpe2xPmt4y4TQiF6jWVul1/mTxyG9x +8kul6Qx5XuDOgzoz9Y6zF3eoqvTKtp8K28tPLgbGP2UmGIoyIaNBUXJkbR5xBkYUL9ewE6YsChxn +KLfMlvWAqi6xCBqnDpWXqO2lpdXugsodMznA2KDUhN2u+7ygz9+x9sN1jWPZlSHoykC6Mqy6Mrx0 +ZQi7MkRdGXhXhkNXhqOO4c1xqPw0XP+WpiLw2ykgNqBRviKKyKJcF80VMbHzQ6QprIhhdUWwcUZi +nh7bfL+gsVpoRMDfpxDpmt/3SupjcsMZZxDXZ/MnuVBYm7rL9MtO1EEnatKJOuxEHXWiPnSiPmqo +S4YRh8OXuU4VRG96cUh8uXMiazCW516WYzcCdmN/aHSU3BkAXQvQkhwKANoCoEggFm4NRN1hl3dB +De6CSu6CuroL6stdUMO7oEZ3QeV3QT3cBfWoRe11Q83T9p6eDRwmIPvrUZT1wX6BsDLmjuUeG9lD +u1ESFCJZi0iDsqETjnUlTrn82NOzsbgNqlmiWNjIBw1Bl7eHDG4PSW4PGd4eMro95OH2kEcdZEcn +15VyilXeHfm69aerAXUByJ7kRd/QsR4aNZ9RJTnOltso8dvGbYhDuDhYb9miGSPFkGX8tlLQEIez +LPPbPmcaQ2l6OS9CUndvdodyku1S0QqWO8bQkqc3aISQgWOoU0p3x9Ckptwx9vRqpO6Ooca5yDG6 +tLhdhKRuausO1e4Yo7xkGUHJ0uIYNF5r/C7/zHdVy9qFMnW9uhfCaVt0NR51Fi/bsH3kxUe/pc2m +G6Ku0+1CPEhP4uPBXYwD2Gdwc/dz8u6c0Xhg4H7a73x3dD+tTF2j5IVw2v7IdvfT413qfnrEy9xP +i5e731IWM1DH3cdKhn44Bv9DA3BEZzSoNz0UH1r37Wdib1TIiKrtdGuM1fcOh/LC47xxsWlqxFzc +xGp8WTfiV9+3NOPVXLc0Y26/bVmt+MfrLKQ+meCZuGdLUnCAl+faZ716q0Qrv9WRP2S+F+ZNaz6D +f2W+bDQTw6bnuCxYCVIFsj0T+xC+1iw9tiSkE+MIz6IklD2RsDviXhgCs78R+/avCzxaeT5p3CNz +FANQ4OS8EseiQQXw3HRKaClJQuAp0MCOBaQhzLQEU2jmhKTByte2k2+KHMWmqCJBmNpsLK0rvErZ +uspPlFIpEKQWsvXSx7EXkdaoXWUcFYz5Y4Ox5YwOMPohi8kb6/Mc4E4/iPi69bkh2ESAQR3K29Fg +p1PvecrN5IxEBdpHE4jA05qZihaL1hOt4iO/7K1dp2wvW2szly4NeEayF35P+QbL3vj6jeO+ismR +HaVytYWhSOCZK5YHxLq4tjyqBiwhpGQNwY6kBYTo7X4DeV5g2rL+CyDZ3BHQLBEd+G+j2ZkMYjiL +KW89rKiRWsakzZUijT2xLbkmpzZ6QKhu7VKobV1pFULLkLCl7XbBp1jejphOJ8apt9wj1g6gT7/G +GJoUbMyvS8PGAGdScXXT/NYd01o4lciQGZmldKopsh8mvb41QM4DsstOBSXgEJc7p3azlbuDUPT8 +XXQHnS53wf9iw/D8ZBoDBWwGgXcbk0NCfE4CWLo7V0s8EsQx2b/ei1FQ99+ooRo6nQ2+nniqjn2e +myeJ3/H3ARa9qJAc7VrRUSq4lCJUBwjdRfQqIvhGhOLXEL7TTtgRJkqOb8cZ6g4lqjwSLUZUD3jt +AlTz7SigVxHwNlnpqq2zBW+kq6PIVNsENjLgjy+Nbr7yklqILPu8hKESggMC2TaCFDF/qjwvlqrW +DVUYF0PQuNMw8oVqD+QFi4E9hQVbWaeQxk4qzG+f5BcDtH2zZQa25ZitcCp+gUEbAyrGmDJ5QJDS +9UanAf3vrZBIdX/ChK0ucdrnpX6V6sRc4LpNMRW9lJy3aEXctejxRJ9uY9mkedZUVmGq09GJhkVe +wkFj68EWXiI6WKxyOIfk5MHa8UUoVV0sxMERKmjQgAg3SoL14YATyIMsho0WP+LdoEL2KMk8n2Vh +VV/Fq4zGulfci63qu9IbDZNPaKh7xbKNEq4xutNzSEXK56BvlmqErCIW6F6lDcDi3eaYMK4UFgZ6 +rpCt0UDNxdbK5zC6lQYsYXslCxhoo3uhfv5nqp4KWG2jexEqQwPO6Dpe0jpc79vP/3I/f/84+Fj2 +SMXD9+hdX/XYKlGrgSt5owRSzyfKFxr4yiBrdzSVz0cawWPNc0fzfKJ5Pi2eo4E1yWtDcVtlguXv +QqKhuCWWpCIiyV+f06x0G+zGrK8zRhYwfp/JXxIGpbK3pDskSh0ibh2IAz7IiyvmVoNPGy9UA3KD +cimzH3pZpmOWl7qQLb/02JZtn5LuCVLo4LffRYEtb/aJo4S8156a9torDnE02KKP39f18avyZkfs +bncEmtj9EnZdy7AnWLHUJwHU+3saBxUdi1/i0cI7lbwE4sJbZq3z96r8tfOVFiV8n4lrmfJcWVW1 +NShzdTUp/w/5yo6/mlAAAA== +--sDCecy9vEe-- This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2010-10-04 16:29:44
|
Revision: 2431 http://aperture.svn.sourceforge.net/aperture/?rev=2431&view=rev Author: mylka Date: 2010-10-04 16:29:37 +0000 (Mon, 04 Oct 2010) Log Message: ----------- fixed a little bug in the default implementation of SubCrawler#getDataObject(URI parentUri, String path), it would dispose the parent metadata RDFContainer which is not needed Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java Added Paths: ----------- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-baselinesymbols.eml Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-09-21 15:04:45 UTC (rev 2430) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/subcrawler/SubCrawlerUtil.java 2010-10-04 16:29:37 UTC (rev 2431) @@ -27,6 +27,7 @@ import org.semanticdesktop.aperture.datasource.config.MovableDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; +import org.semanticdesktop.aperture.subcrawler.base.AbstractSubCrawler; import org.semanticdesktop.aperture.util.HttpClientUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -396,23 +397,53 @@ return getDataObject(parentUri, path, stream, dataSource, charset, mimeType, factory, prefix, sc, null); } + /** + * Default implementation of {@link SubCrawler#getDataObject(URI, String, InputStream, DataSource, Charset, String, RDFContainerFactory, RDFContainer)}. + * It is used by {@link AbstractSubCrawler}, if the subcrawler does not provide its own. There are several issues with it. The most important + * one is that under the hood a normal {@link SubCrawler#subCrawl(URI, InputStream, SubCrawlerHandler, DataSource, org.semanticdesktop.aperture.accessor.AccessData, Charset, String, RDFContainer)} + * is used. The subcrawler cannot find out that it's not a "normal" subcrawl, an therefore it performs all the normal cleanup it usually does, it + * assumes that the returned data object are processed completely in {@link SubCrawlerHandler} methods and that afterwards they are no longer + * needed. This is the case during "normal" subcrawl but here it's not. This is most clearly visible with archiver and compressor subcrawlers. + * + * <h1> + * Always use the getDataObject method on the subcrawler, and never call this directly. + * </h1> + * + * + * @param parentUri + * @param path + * @param stream + * @param dataSource + * @param charset + * @param mimeType + * @param factory + * @param prefix + * @param sc + * @param parentMetadata + * @return + * @throws SubCrawlerException + * @throws PathNotFoundException + */ public static DataObject getDataObject(URI parentUri, String path, InputStream stream, DataSource dataSource, Charset charset, String mimeType, RDFContainerFactory factory, String prefix, SubCrawler sc, RDFContainer parentMetadata) throws SubCrawlerException, PathNotFoundException { if (dataSource != null && dataSource instanceof MovableDataSource) { parentUri = new URIImpl(((MovableDataSource)dataSource).getMovableURI(parentUri.toString())); } - + boolean disposeParentMetadata = false; if (parentMetadata == null) { Model model = RDF2Go.getModelFactory().createModel(); model.open(); parentMetadata = new RDFContainerImpl(model,parentUri); + disposeParentMetadata = true; } URI childUri = createChildUri(parentUri, path.startsWith("/") ? path.substring(1) : path, prefix); GetDataObjectSubCrawlerHandler handler = new GetDataObjectSubCrawlerHandler(factory,childUri, sc); sc.subCrawl(parentUri, stream, handler, dataSource, null, charset, mimeType, parentMetadata); - parentMetadata.dispose(); + if (disposeParentMetadata) { + parentMetadata.dispose(); + } DataObject result = handler.getObjectToReturn(); if (result != null) { return result; Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-09-21 15:04:45 UTC (rev 2430) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/subcrawler/TestSubCrawlerUtilIntegration.java 2010-10-04 16:29:37 UTC (rev 2431) @@ -15,11 +15,14 @@ import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.semanticdesktop.aperture.accessor.DataObject; import org.semanticdesktop.aperture.accessor.FileDataObject; +import org.semanticdesktop.aperture.accessor.base.DataObjectBase; +import org.semanticdesktop.aperture.accessor.base.FileDataObjectBase; import org.semanticdesktop.aperture.crawler.mail.MessageDataObject; import org.semanticdesktop.aperture.datasource.filesystem.FileSystemDataSource; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl; import org.semanticdesktop.aperture.subcrawler.impl.DefaultSubCrawlerRegistry; +import org.semanticdesktop.aperture.subcrawler.zip.ZipSubCrawler; import org.semanticdesktop.aperture.test.ApertureTestBase; import org.semanticdesktop.aperture.util.ResourceUtil; import org.semanticdesktop.aperture.vocabulary.NMO; @@ -231,5 +234,42 @@ assertFalse(entry.getValue().getModel().isOpen()); } } + + /** + * The parent metadata should never be disposed. + * + */ + public void testGetDataObjectWithSpaceDontDisposeParentMetadata() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mail-baselinesymbols.eml", + getClass()); + URI uri = new URIImpl( + "gzip:" + + "mime:" + + "file:///C:/somefolder/mail-baselinesymbols.eml" + + "!/2" + + "!/hppa-symbols.diff"); + TestRDFContainerFactory fac = new TestRDFContainerFactory(); + DataObject obj = SubCrawlerUtil.getDataObject(uri, stream, null, null, null, fac, + new DefaultSubCrawlerRegistry()); + assertNotNull(obj); + assertTrue(obj instanceof FileDataObject); + assertMimeType("text/plain", uri, ((FileDataObject)obj).getContent()); + + // now the trick is that the parent data object should not be disposed + DataObjectBase fdo = (DataObjectBase)obj; + DataObjectBase gzipParent = (DataObjectBase)fdo.getWrappedDataObject(); + + // the immediate object should be open + assertTrue(fdo.getMetadata().getModel().isOpen()); + // the parent (the one for gzip) should be open + assertTrue(gzipParent.getMetadata().getModel().isOpen()); + + + obj.dispose(); + // now everything should be closed + for (Map.Entry<String, RDFContainer> entry : fac.returnedContainers.entrySet()) { + assertFalse(entry.getValue().getModel().isOpen()); + } + } } Added: aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-baselinesymbols.eml =================================================================== --- aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-baselinesymbols.eml (rev 0) +++ aperture/trunk/core/src/test/resources/org/semanticdesktop/aperture/docs/mail-baselinesymbols.eml 2010-10-04 16:29:37 UTC (rev 2431) @@ -0,0 +1,541 @@ +Delivered-To: tri...@gm... +Received: by 10.142.128.13 with SMTP id a13cs23035wfd; + Sun, 30 Mar 2008 07:10:12 -0700 (PDT) +Received: by 10.82.118.1 with SMTP id q1mr12476633buc.13.1206886210747; + Sun, 30 Mar 2008 07:10:10 -0700 (PDT) +Return-Path: <gcc-patches-return-214824-triage.boks=gma...@gc...> +Received: from sourceware.org (sourceware.org [209.132.176.174]) + by mx.google.com with SMTP id c25si8206519ika.9.2008.03.30.07.10.08; + Sun, 30 Mar 2008 07:10:10 -0700 (PDT) +Received-SPF: neutral (google.com: 209.132.176.174 is neither permitted nor denied by domain of gcc-patches-return-214824-triage.boks=gma...@gc...) client-ip=209.132.176.174; +Authentication-Results: mx.google.com; spf=neutral (google.com: 209.132.176.174 is neither permitted nor denied by domain of gcc-patches-return-214824-triage.boks=gma...@gc...) smtp.mail=gcc-patches-return-214824-triage.boks=gma...@gc... +Received: (qmail 20115 invoked by alias); 30 Mar 2008 14:10:03 -0000 +Received: (qmail 20082 invoked by uid 22791); 30 Mar 2008 14:10:01 -0000 +X-Spam-Check-By: sourceware.org +Received: from mail.cs.tu-berlin.de (HELO mail.cs.tu-berlin.de) (130.149.17.13) by sourceware.org (qpsmtpd/0.31) with ESMTP; Sun, 30 Mar 2008 14:09:26 +0000 +Received: from localhost (localhost [127.0.0.1]) by localhost-12225.cs.tu-berlin.de (Postfix) with ESMTP id 7107BF97B for <gcc...@gc...>; Sun, 30 Mar 2008 16:09:23 +0200 (MEST) +Received: from mailhost.cs.tu-berlin.de ([127.0.0.1]) by localhost (mail.cs.tu-berlin.de [127.0.0.1]) (amavisd-new, port 12224) with ESMTP id NXBh3Fm+MUbM 26160-03; Sun, 30 Mar 2008 16:09:11 +0200 (MEST) 13633 +Received: from bolero.cs.tu-berlin.de (bolero.cs.tu-berlin.de [130.149.19.1]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mailhost.cs.tu-berlin.de (Postfix) with ESMTP; Sun, 30 Mar 2008 16:09:11 +0200 (MEST) +Received: (from doko@localhost) by bolero.cs.tu-berlin.de (8.13.8+Sun/8.13.3/Submit) id m2UE9BmE014212; Sun, 30 Mar 2008 16:09:11 +0200 (MEST) +From: Matthias Klose <do...@cs...> +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="sDCecy9vEe" +Content-Transfer-Encoding: 7bit +Message-ID: <184...@ga...rgle.HOWL> +Date: Sun, 30 Mar 2008 16:09:10 +0200 +To: Benjamin Kosnik <bk...@re...> +Cc: "John David Anglin" <da...@hi...>, ja...@re..., gcc...@gc..., lib...@gc... +Subject: Re: [PATCH] Updated baseline_symbols.txt +In-Reply-To: <200...@wa...> +References: <47D...@cs...> <200...@hi...> <200...@wa...> +X-Mailer: VM 7.17 under 21.4 (patch 21) "Educational Television" XEmacs Lucid +X-IsSubscribed: yes +Mailing-List: contact gcc...@gc...; run by ezmlm +Precedence: bulk +List-Id: <gcc-patches.gcc.gnu.org> +List-Unsubscribe: <mailto:gcc-patches-unsubscribe-triage.boks=gma...@gc...> +List-Archive: <http://gcc.gnu.org/ml/gcc-patches/> +List-Post: <mailto:gcc...@gc...> +List-Help: <mailto:gcc...@gc...> +Sender: gcc...@gc... +Delivered-To: mailing list gcc...@gc... + + +--sDCecy9vEe +Content-Type: text/plain; charset=us-ascii +Content-Description: message body text +Content-Transfer-Encoding: 7bit + +Benjamin Kosnik writes: +> +> > > > Has hppa-linux also changed the long double size, or why those +> > > > has been not found by configure before and now they are? You +> > > > could ressurrect the math exports as *l@GLIBCXX_* instead of +> > > > *l@@GLIBCXX_* by tweaking the #ifdef above _GLIBCXX_MATHL_WRAPPER +> > > > definition in compatibility.cc - change that #ifdef +> > > > _GLIBCXX_LONG_DOUBLE_COMPAT to +> > > > #if defined (_GLIBCXX_LONG_DOUBLE_COMPAT) || (defined (__hppa__) +> > > > && defined (__linux__)) or whatever Benjamin prefers (of course +> > > > the primary thing is to find out what changed). No idea why +> > > > __signbitl is popping up, it shouldn't be exported. +> > +> > The size of long double has not changed on hppa-linux. It is the same +> > size as double. I believe what has changed is glibc. Version 2.6 +> > introduced long double math routines and maybe some other new +> > functions. +> +> I can't find the hppa baseline symbol diffs. Can somebody re-send? + +Attached. + + +--sDCecy9vEe +Content-Type: application/octet-stream +Content-Disposition: attachment; + filename="baseline_symbols.txt.gz" +Content-Transfer-Encoding: base64 + +H4sICN1dykcCA2Jhc2VsaW5lX3N5bWJvbHMudHh0AMxdbZOjOJL+fj9moqBs +l+s+9e40c1HRs72O8u7Fxn0hMMguUYBYhC17f/1Jwi/YBXpJYdMRNdE1pcwn +U8pUKiUS8cc/f/7+3+H//fSewjDe76MV3nneLAxRsc3D+lCiEBdr8v0p2H37 +9vu//vWXv76F3m/P//WHGZsHY/NN2F7CMKqq6GCrZQefB+Qz1TPOIkoBet7y +eUA+Uz3LVUQRQM9bPg/IZ6TnK+cjuKhRZatpJ6cH5jTR1ue/r7dFXGNSWKrb +w+rBWU0VphjksT2sHpzVSGEvDHc5UOMeXs+B10jnZ2mfJMpRUUeZrda93J4T +t5Hm7ZlAwhzlK+t5qMPwBsBQ9WVTbMOCVLk3Df/EtOa/JygUkWjyQchnsFg+ +hd++/c+fb3/l/OHzbxNDZsqiMnjnzEsPwj/bFlL8DsD7UqEdqigCMc/rKiro +GlWy5/3Kew1GglbbDQ+Iy2jNx5qPf1STSgJ5fvi3cIPqMN/WaH+jy2+vNkgz +jhTVdRR/8LBQbDIULH4uwxMtRf/eoiJu1A9WjnIS1JbjoPXrWWljbS3RG1V7 +rdyFcSXdyULXSM+XkYuyzEGl+bUJtllUwdF80UE+GaIMJ1GNBkB84YDnmd0D +MmlAyqjiQ4EyDoPqmkumz3ysb0fZezJhpJzxi8Q272vDyuMhNyp3O5LjOIyS +JFj8L8bderZYRO5FSCZMR2ID1/ANQWZHkHWFUJjxMBOkIKBXacY15p5lAjDn +O4h9/BEVGxRGRWI+DnNaJ5iE9FDEIZfGvWD9Fi9rz+NgfDmpIlzTtzgIJqLR +aFCMEQWFwjcAkNPtOiNsWMzZnnJ7FsEixgPDllsB+2NY3BeK0CdZr4M9b/TD +N0L5lEKfCa74/z/L//97iYqcr32Diy0JDZb1ZM3/fRNxJV/RWsSgOrir7Dnh +S780/KAj+cr3ZvHnOsLZwLg89TzqO6ij/u4Fi3n49vfwj7c/g0GB/TsBN7ny +gHgeHI/d4jHXmNeJ6BbzuiBdY14X5jnmMTww7DHmDYv7gJjXK/YBMa9L9iXm +pUPCtmLesLgDxLwu3EFiXiewfydgl5jXiWcW83yeYXKPWRFx2oiqHBfCSXm+ +mGSo6jkpuHDPmvT07c/VUxDwrbTYr9C6Iodb2RMts9g14QLXmG9Q/oPs+Wcy +L46zCOfhiifLnzwxS2EgFFU7dARJrTA8l1Hwvo7C4o/dYhc4gtwoMdPyn7Yq +9UeFoiTEiX0/nK3hDWMN37tY49SfT3QIFgY9ej1v1Y4DEqQ2TGL3Hmfoyw67 +xfZD83RCNCckjKNaHJz8WNav56bFYpd2Tk4tpicxt2Uc8T0oB30KFz927z/5 +v940PP5dDPs2q4PBJCx2MChfQiU82AksLLUUB+LbVfiJi+Ss//I5XE7Dphfc +AY4MTt3wm8FfczFhuV1lOA5pFQeYS5NSl35ohnvzKGcQo95iTi9nvmdgOewm +eF2PcMQBDKbnv5fBDgrUoZo4AvzaicBM3e4nOEIM1/fcYKRw97OVW/89antD +FTSzxgPNmh7BX7w97AkKbf/3pP9PwuUsbPSB+H+3Qsop0DMsRtOi5wnRI0a+ +R/KIQ9+t0f3GXv18aJj5enV4PAuDqiJVuCZVHtUcWOZIOaI02qBAgD+H3wVl +mNONSDe6DycNUBdRFeVI/D7hEsqKd4OPIMrOC0XsDu0/naF5YhFXuBQBpxEA +RH8+IzJSCV2hak7PQDzjwcXGAerldOgd7cMMFZv6Q7Gf0KGJXAiJP/U9bfhh +8ERKpHK6pxYmOMI14qgIY5KXUYWCd4XllquuzdUyEjssqVCCaSrWHHF2cT08 +UyiKvSK+jBKZCDtkLTDSb4OgDADDB9cPB8CBdelZYqxx5TYyHTBD4MDH5gYI +2CsxveWDJTE3emeUEkMEnPgDxZ+nGJGmtyHHeCL0YNkrNbt4TkGcDN+NNBAU +3PxfsWDde2m5keNI9UANhQUfqw4wUA/96Hb/b8b3jIoENLcm4Tsq5bNnvq/J +UPSJBsChnACKE5PyECxgNp0kUR3BxK6bZBtktAsznBvseg07TO8K7DX065Gf +Ged0hTa4gLHGIt+EsaK8rA8w1srFNVrcDuxg5zjyg1RvzkVL0KDNOnN5Q94K +7iIzvm8WPgIabLEXAQcQwYyh0+nltEm4Te8t2aWfuACIXIg5A6RuCAP0opkw +ED3mp/QwSPt3tTqADOe4hjnhPI7KKMbAYDUX+2hwbBZFXxg++fC+P3XBs01M +tkXdD4xnFBV1dYh3q34iYkJEbze7ip1CB20f6O3GNU6/WdAaE3PXpcbEKh2+ +bCctic2pNSrfUCvVMNwsUustIbXb+NHOTZlK9W56KwbNOH5lUOnTuVUCMNhx +aLrQwaFQSbkZouotD7Xa2FCr7Qs9blJUg6PZitDTnkHR/QuJjkY96g2NSlCl +GUr1ikJ1OT3VZe5Ul5/Tcx6t6EaLRkukHrEjkUKWNjmm2hSYahNdqk9nqT5p +pfrUlLYS0FhPJAZPT5WmRmCSLDWhMxUrbduPaJbhUZM8jhpka9QgJ6MmmRdV +5Ve197SKEvnUDCcT9hHV/S+XSGrM/Yr/oCifVslqu+6Xy4lzUqBDKTKkt/hY +hFWgzfHBiy1nSSiM04u3VRXSQ74imS1rQsJNRbaleFplybquophniBue6lqy +PicoxnmUNU86rZlJCB5kwQwe52cuNqrxTry9sylsmblYDGauP8iWRkXC12BV +cO1knohyF7CHCG64pWfiqb6DsWeNscGjPmvsjV34HcZ+DphbHjiKeOAo4sGj +iAePIh48inguUcRziSKeSxTxXKKI5xJFPJco4jlFEc8piniOUcRzjCKeYxTx +HKOIB4giDJyLMHAuwuC5CIPnIgyeizCXXIS55CLMJRdhLrkIc8lFmEsuwpxy +EeaUizDHXIQ55iLMMRdhjrkIg+UiDJyLMHAuwuC5CIPnIgyeizCXXIS55CLM +JRdhLrkIc8lFmEsuwpxyEeaUizDHXIQ55iLMMRdhjrkIs8hFiM0hyoV4Vmqe +hdWivrTGOTqmR03VWpSHZX6aAIqjq25ueR1Hw0yDhTW7+AOQXV6cEh1oGK1W +FdphrkhiByGLoWUH4GrId7S4nesPB0XEsW+5rcXRP2db/PDr3Jj35TgMdhLn +J8Pbsb2eO2vDxzodjdlx3zqaJfuthS3Y+xzNHKLf0Sww+h3NHOTsaEwUVNg4 +GrtyNHOJV45mznbtaCq+jGxw3NTndx1QXxH7oSiix827x2LuYBqSEikXAf8c +X3kkNgixz+LAHO1jJN+sMDgzf76o1HsPiYGeNzCdr/ZbwDRd7r6PRrsoGYD0 +6HL1cN0WyFCdzvsmbPtkMb7qPmmANOpgBYq2NsYMxaA05gxE3IEqrrIIkmYz +enLsgJu/mqCYGdcSyVQhmMuaoMC6BXTaIwwZxFpkMGuRYaxFBrEWGcxaxNFa +0wbmvPh133e2KVUlGKYo5RAoUyTueRkAZjOMNsN0Sr646gwz26A6I7EVTudd +T9Zj04lSDoECsHc3zGYYbYbplL29uxd/C3vjYtPjN8tIOM8zNe3YEan/hSIN +0ryN1B+VzdSyAtMH6C64QTuqeN9Cp9vrJZEbYODs0IDagYfODs1YOzLo2JGB +x44MOnZkoLF7ritv8hHRj7d3ZQ1/EGfBctZ/6+0tFJUMviGDVvLEFKiRS82o +kSBGKlrjoZmGRjCNep4BsVbiswmIwXCYDsU0FkWE4tz2SRyhk4wwcTN5rDpF +7OYxp9+WpbWMhseAfs7JGU6QvOyWu+rCiseA+FU86oj4LpgdJcQLO65YS86a +XlNxoQSmQb34wRTvSN1wnW3IVAd03Tzm9EcbWsloeEzovVPvC1JbdF+cTsak +2KGqFrfQsDyin0GtZ5xycXycGzkLc4baoC9f/NGKx8BVrvyRmfnjFRfrJ5+J +V3My5ONEnNVjRU35iXRSRDnSk6F/i7Lyfpu+xCRB8a5+i+Pre06lK20L+oHX +zY3ti5jHzPfbuGmGJdwMFbxZ/eSvH0A8pu69y8YMQjy5jDImnmMURDgvBEX6 +Y9GMB/eyCR+QiRiZGf9lBhgZoRTZ1sMBCnc7DtIFM9XjsAHNz1zNz9zNzwYx +PzMyPzO2FlOan9mbnzmbP8s4p1z+ZUw/v66iWgiu2EQxxRVb82PELF4Ma77S +Qqq8SVTMlPXFOndhNFZXZmsW9C/Qrr1wBS1lvdp3iN2ajRmzXZmNWfSNfTEb +U72bfs14YzZTdU9mM6V/gXatZTZTWa/mHRIfA9ygWu6GX/H5/Op8w1nnwWUg +rzxDe/6XuObRp35LA7F9aX7el/UcEyrvRns/35PO/xMB4f0f91cm+5WUyX8l +ZepfSZn9r6TMYUxlZi1l1hmJ6sBEkf5XJKGKyG8omYhe7EYTvRpNcjKaZDSa +5PVoktPRJGejSc5Hk1yPJnk/muTD0JLF1mm8CGojfTWm8GRM4WhM4esxhadj +Cs/GFJ6PKbweU/h+TOEG8ZX1Ce8qn7j3rtNRmexXUib/lZSpfyVl9r+SMocx +lbnfrtNOkUF3nXcSvRpNcjKaZDSa5PVoktPRJGejSc5Hk1yPJnk/muTBs6Kh +d533k74aU3gypnA0pvD1mMLTMYVnYwrPxxRejyl8P6ZwTXwtt8ctLzF+OiMe +Lcs7AkTKLu8cjdsqLOLla/iOBxfbfHSVinq2q+1lW3as2xa4y81HkrsfSe7h +UXInZ7dqNl/SsZYzUWrzcg9/ml76KQW+JZ09vUdXv4hGjxL9LN6sv5Wz+LF7 +iJzVQ6QkD5GCHiIle4iU/CFS9g+RMvhyJ6+jiJIgxteLHA9PwwclefXPY2Zo +n6jVowQljxKEHiUoe5Sg/FGC9o8SZDBtWZ+g7tPcr1kqu5rAzDBLtRRrkqUy +k3XdTW4+ktz9SHIPj5LblaWK4tp7+ZNZlsriO3TVLEu9h+iulZ0ZrYEDyFk9 +REryECnoIVKyh0jJHyJl/xApgy93pyyVXWepIjINH5R6FvF7zNA+UatHCUoe +JQg9SlD2KEH5owTtHyWof9rOxY16cURr/WV6Z7iXdYSzbYV0F4jNc0RptEEi +g/bm16/1Cs2VHyW64vafWtzriuQNv+Id2Ta3fKSHMTYWN5GXNwny98vbrnBO +1SvybeZpnBGKAmxEfDpKt+mXeAMJ3DXxPrO5gqzX4orLE4xwXXyBtX3BVRGo +lzAXL2E2XsJuvMS1xw7+w8z8h4e98x3Ehtelf+Gpqy3SvMB+xSNeHFxHGbVi +Mr5V/ZrL9HrrKy6be72/MEIkzi2HfW455K92w80AvsAAvsAgvsBAvsBAvsCg +vsCgvsAsfYFZ+gIz9AV5aynghbwneTE0qRJU3QPea8KrvH7a5KH2QnUntLMW +gn98LQ6Ir8bjaSFUYAh9JtFhPC3EnRL39Tz51WV5ibecOqP1tF0IzWf1SRGe +b6RKbe6gydH/Rjf+tD0kLeNgcav/o0flmAD+Aq4i79g/jcsOR6ePFRjqo0yP +gSrNR4/d89Hj9vwOMdvy/QXbRdoS/k6LNFCLgY0N1GJcg99pkbbUArBI20q4 +V+R1eFtp4EXaVpO7LdKWiqgWafboUbnnIm2nivMizQZXaT567J6PHreHXqQH +q79sErPlX8MHCozjwYX1l6+ZyhukTqCZQWajOZTAO/Su/zGrRt4uyqKqig5v +aTCh+D+qE6FX8dwqyjIS6x9cne40J7TT+t7T+YtSVC1RhSI+TeXAjcjagbvG +CM49EY/zXNhx5sK+IcRh4LS3qOv4dVf0avhn+vugNQgvVSLDNrwP8W6xg3Mr +j5BbvF1T3X7qdKGYT51ObuOp08ltPHU6P7ViPnW62Y2nTvfnYoynDuirSTp+ +i6nT+RUV/dXVGgS7qdOFYD51urjVU0f5ncIrSvkpW9DjjUu2/ia/pG16C4Wi +YGAobbxxtWm9x736laWj4YVfv2U5Qu+tFDAaANsTxfvOCydtvHG1sZgXolzE +DzX1IQ9Q5A4eYjdF7jsQw84W+E77SnKsLCobWNYdOtXagz6iX73i7tC11/Pr +DVex7QG9vJLsuUmGnV9cDS47lrSZTs2hxKLhBfX5z/272Cv5Dr008Nz7d9jA +ic2VOH+/XL+ZOpPqv3P+Kr7vw5VcE547hKJuMqpj8d2AZbhY7FITPk/ybUtR +2s0ZfwqgeL+PVnjneS/89yyiNDzTB4uFkTqTMMS0KTxDVVia9WEqmdai4Ivv +hvq55Ff7fvcUHxU6USgBfC2ArwT4rsb/ruJmWv2ZWn+m1Z+p9Wdq/ZlKf0V9 +ME9D/iJOW8MPvumveBcWYpJ4oTOS74T0tzBD0WfIGxNgr5ailpxPzG1cB6mD +JssQ5WV9CKv+ykolwrPUpDzIanbKB+Xnaxhuiq2YtWLCFuJBXHaJbc33QHj0 +mocPkCZipJQ2G0SaUH46CFITvAFI4pk4t1UWxSiMtvsgTVMGgJm2YGi0Rhyn +/4s1KiQ/4v5nz/aMCpDnT8J3VMqdaYK495NDAHT9FhCmpSjvdwaq0FrY2K1b +HCSJ6ggO4jylGxjxvjdFtYxTyMFU0yMO5RTRKkNgJH921kh8yyuMiuQCeu2B +v00HgwWp+iLfWY9WjUuBIobEkW8iZTxfbIBSIMyrCD48g6vFLId6eQWdsJRF +ZfDe+w0oFe90hTa4gEidxpmoOoBw8pWDBwPdKtYsYneGB61aRwkpwFtmUVkK +M/eWwxgyO4jm7gnxlCt2l66noJ5TijcFdNjOzCmcGzxsLfbUQT5o2JrdrOFc +uL8EBwEpzPQt7tSBHWr8K/7URQOQcSpwdJ9VSJbZpA6sAI1f5Au8PMHr+Lqj +YcpxCwFRQaaHC6D+Ip2DDLngzckOOfa9DWGtwnG/Yr54/lxOwsuG8xUwR+xl +giIBSAxoBw2SlD5CDjCK2Qvi+TgoJQdIkj+PkJS6OF2aujkt9OigBeBq/BS+ +iLUwgKPI18AdaDESR/qn9I/dCDcMqrcQEBXybd3sDSFdKLf0I1xF8SckfxLn +sT+gh5cNcwrnduL0HTghA33hhaudOgz1DsL1ZhbAguAfIV/NwMqJNV5izFww +GogpGMJ38WXfyZf9wInTd+AE+bLv6su+gy+DniP97j/Gl/0BfNl39uXvoPkO +e0IXLWEHJ5wP6LqcEyAP7yFLfPknrHecD9g7ztknLxFLJuebNuXB58qDzqoZ +Bcaut80fAL/fi5LvT4o2hV4Kz8RXZY6LXRC8L/Ht1+5a7+XcMKws6RNLemRJ +v7akTy3pM0v63JK+tqTfW9If9PSywHMR4z6HObXHSoJ3x3lgiaJRRt3cOzcm +JUKfimauUaIaqgk9FHE//5Ry+E2wrCfrktA3UfyTr2S9aFgHGqb9ucR0Kd7J +xlUvfY2ybKNQYlsox2DG91akur39rb09a5FoILBOyE4vZNcHQVFRVwexT1ji +1TcNla+keuEjkuECqWzbpul1r5dyW8vNYS/FXPgQJblS1gDrFlasW3iAdQsr +1i2sWLewYt3CqnVLlJH8wY24DAMNyaVAcPmkJ9a8drd8VkO4DiPH6P1KYNO6 +UjUmqkakalyrGrGqMVU1ZqrGXNVIVY21qnGvauy5U3dJZNFw3zwl03W2pR+9 +7khkWC7tYvmJyTSWExnLS4USrOL7MfGNgD6DkXaYJDoi34BIMX/PJP3TmIga +D73WreLfxQ+ZoxJFCtMmX9kQJzbEyIY4syHObYj3NsQHLfEASw1RLDVkgKWG +KJYaolhqiMpVVT6aNUsD6V9qTiRGS82ZGL7USIgfSoVdB5ljrBRtiaINKdrW +ijasaEsVbZmiLVe0UUVbrWjbK9r6lhf6tRJbvHMjKtMNOXwjDpPKanpbP22A +a1JUSb/U+cbaauh4SbmnT8LBIJehRPQtEIUWngU9F9FP3VGgHPcSd5Yhx32+ +TlXFxlRZUkw7C4dVNu8sEDZj0BUCU/NyX2pT1EutSnepZYEudSnDpeBiW/ql +pFYYQeHfX0tnBUOqJL8tkVXauVI72rHgtS+4Uk1VK9XUrlLDElIZWIbAUAy1 +rtaUXopCYwMSLUz/S5rXNHqFUoU+p4JME5JUR6NW+UKTapEUKhvWOUpzDoOi +VyZVDWCLJtUSKYewTZTqsRR6a+oRqa7q8ELQK+NS/hd/6flVtLwl7IdrqgGV +EpU1f/RSlmegU5uwB860dukUW37yncI5D+qty7LHVbgfBEuRokHg0sHAVLPD +Gky8ruIN19HmZzC4VG/RNDWxuzrLvK4qo4ZkqQmesgfqOjLaLvWKb4CuJuot +YT+ctvKLtuq7elWXhVi6LVRDlOqpjCioprm/Q2cCvZzUoE+7/rY3I7eWhRqT +UCtJhEhJ65vQNqSeltQ3sZxvZDk/MKKgmmaF5XxDy/kGlvMVlvMHtZxvYTnf +2HKKozyqOMqjstomVrSqTMSbe1kV5TFUFsHEilaVUN7cx1qL2yHEFwfFgSPf +yC6P1z4fPxX8/uPqA/IKFHEpq7j3AasqTK7pPEM6X0WHz5drPPN/zCibQGpO +iU1JjTENIX1jPX1zPX1TPX1TPdUWb9F5hnRKize3vwiH9J/aZ5Bh+f+9XVuT +4jqS/kcnygLqMk+zO1Oz0duzZzqqOvrVYWwBZozF2KYE59evbcA2YEl5kc9D +d1SgzE+pVGZK1iUVVXV4yd9jm5223JfXyNpMNGLe7m2kVRpl9adH2BO8/2jy +l5zfuQvPdVi74wG66ekxiC0Soy54DQeChXEUb+SFAAmGIhce5Bc+5Rcocrtd +PpIHOHIBJw+ms7PAg50FLjsLEP0U4Ows8GBngcvOsPILFDnGzgKcnQUIO9PT +xTPtIZ5pVzzTiHigcfFMe4hn2hXPCPILFDnYzjQunmlcPNPTxTPtIZ5pVzzT +iHigcfFMe4hn2hXPCPILFDnGzgKcnTnjmQLP53vK51Uh5R/SeNjghvg8/a5/ +m7XHpf61l/lOJRLICpLnPHOmVQFUjr2LFHDKrWBT7iBs38C5vuErZree35WN +WD0UE+/yIwD1ry+DX6/D/zsGBE4ruBILHxILOK3VZu5pAwQt2Hb0BLajubaj +Lbaj3zEgcFrBlVj4kFjAaeG2oxG2ox22c3vOTMj/vH98j++3Q4YL9zYeSCX6 +wqARldzwmCrJ1DqNQ1kUqrAvv9/RCjitvYuGhAGU0No5RZSvJbBJQ1oBp7U3 +aUhoa5IIw/MpxlWaNcFVrBIEdfNSy8OtksDKoeoBv1mPe5gBpPBardd+HukX +cdacr4IzPB/Ltaxy6xLfCM/+UOX2pcZ7ppfmgLparcBH1B8hXstTGZ6V+hp+ ++1f4j2//fIfPrsxoKQPkrIrwvO5aez5cIW/lRuldlJ8w/Xse4/bVprnr02w6 +ymNYwbkFhxvlXNaYIeI2NfTy1Lx0eWmVbdR7oBc4emv8eCAOMMTwZmpkMzWy +mRrTTI1ppmN4FonaRWkOGwJuiQWC2N66G0pr0y4HJmHS3hALBLFd2htKq7Tq +UIVqFbZjnFPaG2KBILZLe0Nplbb7jlweVo7v9hta95f7LXkpqybXvnXQGjI0 +B66aKmxbXrcMxtEKMVY8AO5VabjWRURtzua2T2BaZvAPDLXyDvvm7PlTCFTf +q/qSxSpT+h3I8LZvzrm0T+NBOQ55cqkDaDNNNP3Hj69t/f+X5YrMCNf3KEUQ +xxjiDYI4Smux4eQxjnyDIoeKLUg6FxidC4zOBUbnAqdzgdO5wOlcQHVuj803 +lODYbB3Pm8c4w+7pQmv1d6QBnNQhwHUmabpq1t3MqAlWsrDoEgLWXNw431kI +28DFlO25vxrjBa85X1jJYpfmjYT1aNxcL2ZBCtGeis7rsF6FlWpyesgijzLr +iArBHTa9fSGmBr32EU/il+tQ1+jAK7T5O52F6vhyB0A4P+YhGGnt7+3Vner5 +vMzIQYNMvCAwgEUHGIx7HQKA89LGEvnvx8nezSSNVYWH+SS0Gh+zTEBdkEkh +AAY0VYTgQJZTIDiQiSgA528Bk585QrrGbSc/U37MCD/6sDh1hB8Fo4/wo3CM +EX4UjzfCj0GaR3jN0qSfEX4U2c8IP/pMOnuEH0XFjfCjD6gjR/hRDPIIP/ok ++3WE55lJN8KzYS4jPA/H8wg/WoX/Ed5YzQQj/Fhd/Qi/5cAMRngeDmGEH8Uh +jPBjOJgRfpSfOUJiRvhRfqb8oBH+vOTg+7vLggoOqRaMl/SyM8hCOa8eem0c +yORs/MK/SEwrAFmxjZ+pEowVe55bWFCxVjw6VKCt2BDlfDcOY8WGwOldJKYV +YKyYHItt/AD5Uwv/SOpmEdpy++JRl1OAJlOAyilAV1OAbqcAzaYA3U0BWk0B +epwC9OQJ9Jwx2/0xBMHQbJDRnNr6M7BsDPKhPYjNh2AFW3sucBjEOV84zxCA +Cxg2CErecQww/LAgBNOevxyGYc9xDsJw50FHwngQZeujQV9+GvTFEWWQlFiE +y796gBJ8qD7DO89jhzisINZlk2ehDDLO89plShXMHksgnxqp41NjGsGYE3bI +B0fK/OBIJ/zguOTIF6ElNTEGaJAuee4HcJBSue3tzwUfdxJTsub5ByMsuQAJ +F0ByAVZcgJQLsOUCZFyAHReg5AJUXIAjF+DkAlC26X2z9ao5CI73HWAYhDcg +MMDoebVyzav3zAZfX25wTyQUePrnA0n4QoKMuBAYwYMZvpLB0/XjSxoihKxv +ADGX3hET74jSO2LmHXHnHfHoHfHkB5H1GaGYnxFqqs8IxfyMUMzPCMX8jFDM +zwgbf4b5jAAA4T4jIICUzwgX7ne2ziYxRss7L1D+hMkvmfwrJn/K5N8y+TMm +/47JXzL5Kyb/kclv+XQoDnmTsQJ2cfSOWmCo7TH5ljSAk9oi7fwSpDwf14HA +Ok86QEAARx0gMNizDkBMplBiAqG4xmC1URAAVy0oe/Z0cAcCi7Zn4tEdCAzX +ntFTYRCAmEAorjGg7Bk9EQYBQJrg/+MHiMwUTUwmGldroI5X3I5XzI5X0wzM +ysfArPwMzGqCgVlxB2Y1wcCsuAOz4g7MijswK+7ArKYZmJWPgVn5GZjVBAOz +4g7MaoKBWXEHZsUdmBV3YFb0gTmuNRR/VV1GqPh2q8aV8snJL3j8dvW5mAMO +M0Ztmqk2zVSb5qhNc9SmkWrLsoYOnDlthENgOVzKuCcPcOSYBmt0gzW6wRrX +YI1rsCOX2vx65Q62zHRPLlDk9nbe0doauQj/mZZVmNem3a6dzzdK/bue8odw +jstTr6Etgc490/MhbyuCC/ZSyLpV1lnAPctrPRLkZXMF+YdDOsgT6cHT/rD0 +cFMUV5mP+6KgGuelrNZNarFZsyHDx9qfsbhA9mPOIJDFennY7S3r+zAU+F1t +GN7ei1TNXW2+htw5XYA4+0MtTewBxZEaBgZ0cNwUBqE8l01HxXwYd/4dIFAu +j5UHeSpVlvFDWuQnPNLRk/mAkgKBkF6a6OkhdLz8WfH+5U+N9S9lexWCrZ7X +NA+jryY7ABcIkJAIBlR3vOdADcpyBERqxnQ/UQCUUQGIdLlQwA7coOwMIKTz +5FmEbBSuHMKLHIIrh3W6D0Ng6+Lv7FacH1bFaHP8cuZE83BrZZPE5vGbfe08 +XGPn4Sas/RmLC4QcTEdXcdHzcGbOJBje3otU6Hm4EcWeeQmI087DtQcURwIn +GBB6Hj5+iBo9D6dmyQICoefh4zCUeTg1cRcUyFPPU+bhU2Xoglf0p8V6yjx8 +9GIAZR5OTRsGA+LNw8m5yIBI/Tyca+G0efg4UjcPZwZu2jzcsDHpYcaGm4cb +9iJ9yCG4cuDm4eg9TRgCuxWIeXiarw2z+c+oy79+2CdNeJTrvfXZEARqm/Tx +Stc+NkgKva562ndOrNsucCzc+p4LrVVAM3D+iLdbH3heB1BQbX5HUVeVuBUk +FxpyzccJZxokfnsmAuIivAvuslc5Rb/U0JPAiukkFpNIDBw6nDABBmYk+tcw +mhm3zah+47axnmvcXrbHUOuR7N0DKO7D0IXWB3CNCeBmvCkCuL22SQK4sUrc +p4cLDfmx4IQjBnAzICmAG+HOAbx3hyk6iBfJzbDiTxBdTAKLC+lmGGtI38my +jNayhB8cG2MRaBZ740boka2wf8bc02t8qzW+1RrZao1steME2SI/7M4PgcP7 +eoRFoFnsrR6hD5D0qFZrfKs1vtUa2WqNbLWrr9vrnWvZa6n+8S3tPtzDtJJF +VKlidCfNrSEmvJgU3q55JnYwITaqR7UJe2xNBt+jSHgxKTyuR5HYwYTY7h7d +H26tRfn1UTq8mBQe0KN07GBCbFSPahM2w0fp8GJSeFyPIrGDCbGtPdp9SMGO +3T/QCxy9XYn3xDatPIdhN2eIo3jTTJSaDF/t36BtszGEuv1bHINAMlg1MMqA +1YK1wx8ZNFttGqs2jVWbxqpNY9XmmG8+p/lXlKVJGBXrw07mlctRHhkEksHe +4gdqW3NfwrCJIQhTH+cQWA5rG8Y50O2w9tsIh0a3XKNbrtEt1+iWOyz2Zady +ebr9kvzn8sk9CpsZBZHRrgsjV0DiwuskoOokoOokIOkkIOkkwOtEU+1EU+1E +k+xEk+xEk+xEU+1EU+1Ek+xEk+xEu+3kNQwHfJfg26oSN30w41jDsoVNkNis +irWwBTQ2gmoDT6oNaKoNaKoNaKoNaKqlWK32ZLWaZrWaZrWaZrWaZrWaZrXa +k9VqmtVqmtVqmtVqmtVCYu1w88uccAZ6CgsFhz7MgkWfCllMKreYSm672aGQ +Am9IWOP0cdQEhUvfscdWMxWy+HMaIKZCRtstbQMfh2Sz27f+CQ0vURWHhw6r +aPjJoMW0kovJJLeaKBIq8AeFNlIf0RUHTA+v6HomgxZ/UhPEZNB4+6XFWCQU +wH6V5yCrpg2yarogq6YNsmq6IKv8BVnlL8gqf0FWTRVk1Z8UZNV0QVb9SUFW +TRdklb8gq/wFWeUjyEJPXO331odpcTgYYcYOIlCEMeKMC7OIq9O+2RMMZnEW +lY2+q2iZWZLedRzNyn6wCMPzYlXzvXGIqx/fq+UWxAojFPQ6BLAOmzn3RAGE +SDiJ9HtwfsoqrdIoS/+QYfs7gG9U3VsIG4BI0LAFABugXg1Rr33ztjmeoIqd +Mbvkk+W1MCPvJc/kU1jHajz7eMZJEKsh9SSId5iD0iX5uszSWL6G3/JEHmVz +lGnbrD+/fkVZVBTR6dv2/XNuvLX7wC5w7GdrC4Jm7VvVf5Vxu/zdyJw6mEQ9 +eLbNv/hQVMm1Kk7vLr5F+G23z4Ln82tyVZRll2X3H99//wwXqyiW1Z1N//YC +gjmztjAiTW7w4DIVcp/VLB1Y3YEdIgzlbYDSaeUChIMSwSNUKsszGEzPjr3f +e9qPBhlMDSQUCBEESgQBFMEW224JhZuwtYx6WvfZnkCOw2Yv2nZD7JZv0fGd +f4bW91LzxZnKZc/58ThgwLBeG6x62lLZwZrs8iKEQb7VkIms+dSJL6BtvLol +DKCE7n59XmdqGWVt+HOQvlzmaU7Ms/cBiJpI1Rrgu5sYIOCFypZT85YSEO// +5tb12dMBRPDmClBzBbi5Atxct8UArA9gd+fEHMYmvlwS0I8+VQCfMNpgCEzC +T92CULctOJiZAgqTADBpP92iKd2i/XSLpnSLpnSLpnSLxnVLm77f9G28hTGC +yAQVX4DwHQrtyAIYGURpmqo0DVOapipNw5SmYUrTMKXZv3Zf8sOumcjh73Bu +fUMK75BWNRIhA/+QkO5BX8jc+oYU3iFB3ePvviUV0tU9+0OFv1259Q0pvEM6 +u4cAGfiHhHQP+qrk1jek8A4J6h5/NyGpkJbueV1GSRhHpfVDuScKIES26lJV +tuuawSz8v3BdKB1qVSTle7p0syyahEihTqtNPdonqXoH8Ly0R3HbhcEsa3L6 +lO/NF6P8kvndUt7NQssAoJDrOk7JokN4//GPrx7j4zNMzQuVHY54qgVJ0nKv +SjmQ5Qsgwvxbnla2T+Y7SgGl/DsYE9Sl8MzaHcvzsdaEigHgL5dtFghpk7bp +UEjHDcVHeoGjt3rMA3GAIYaoG2QRIGMANQTUAKvg12w25s1G88ruHTOMTnAq +EbBKrLob0gVAOpAGNUeDGqhBzdGgBmpQAzWogRq0f2m9Xi8915oW4nbn9lr0 +Dl9pHsLhPnfvOQ032aH8MDpBllAwJRQwOqspDOkCIB3IFLRfU9BkU9BmU9Dv +UH4YnSBLKJgSChgdzBQ00BQcUeGaA8jjAgwZU/jHtOqSihlMgAnqI3+rMGRM +4R8T1kf+FmLImM4+8rsUQ8YU/jHdfeR3NYaMCeojf+sxZEzhHxPWR/6WZMiY +tj4anLi6HqQB0m5BdAKBKWCYVh0N6YDtTo/met/CMD4eg7e3V/ABwZvnkGz8 +5kOCUAjDQUEou+mwIJTfcWDwFqZZsmtXYGxuM6AKQFTCStXetFGl4dlqeYzl +vkpV3uQOvyQqr/81+7VUzObkWymrFsMb6OJ6xb87oORe9nJgzqpU1p2miOzz +VZpl5ic4Xdzt4tqP8acYA/OVDwfsIs5kVPjSOnyF0QVUJG2qft/NfYnV/rTa +Vfb3cRwgr76N9fJV6bWlfwvITn75hPQrDj3mOEKflZWuBGCQHH9TlBckRzG5 +QXIUlBkkxzAvQbIKZv11ovNFIpjxjL5O2kZOTeU2R06OUPTIyXv11AVkiZyc +5qIj5+irgb4t2BQ5OS0FRk7DA3T+xaEHImDkxD5j52K1CdxFRqtsPVUAorLV +2KY38rx4SAcVE4BaVUkGDaYAhXWUvxVEOqiYABTYUf4WEemg7o7yu4xIBxUT +gAI6yu9KIh0U1lH+1hLpoGICUGBH+VtOpIPaOqoGOeO0qRwAdM+rQso/pPmA +VU/aTJHiFJyc4IYRIEkz5aHBgxRi7eCeCiSqtQuaS89hmq+UtcaeKgBRmWqs +9fV0yOVxL+NKJkaaWT1fDmuz2qV5PUf+8Y+vr3cT6TwMY5V/yaJmUOHXt+T9 +68f3+ONn+HE/1f74Dtw1H0GVk6Cu/KA2yuqVatXWoqHNpQ43UZ5ksrASP4fX +r9jm+nEt9Wgo/nj85v0Z/nwK3z+fwx/fP2d3t9D6m+GjNYzFEGYN1aY5zXk9 +BGq0uZemc/an/utlfH0kHf3Q6eQxHJscxx9rLAe/b2m73m1ram2E6erUHHQ9 +7NN83ZydejhjOFiTbzOmfixrOaUMExkXsnnaoFb+8Pduqd9U7xgEFSHN2UL0 +EFiEgaabmJeaI9nrIY+jw3pThd1Xm5H27YrbnFq8nLg03TUdkmdqXRtL+zAL +hLyI8rV0kYunTh/LLIr/Xdvuwazr+92cUZxCtZfbMrkyKfzDtPfX4lzET9Qu +SnOA/FftyHxdbeD06lCFanXWkoU+uG9Xkda9TGhY8BAXQqlWlNjzsTTGQVMt +hAjkqKX3DKfB9+RFbVvNdrWrk0TXSV/D54gsHLMrx90LRhaWeeeGdy+5WHj6 +Pi/kMsqivMmuoYq6rrr3jUZhtIm3QYxqhsWwHq175KUBsDYyC6YuXcN3ej+4 +WqAc4zQUai7raQh6WmEFw84gLGClT8lKX5ItVtmh3PgS7YLmSbaXtayyNJfm +/H9mI3mprXf57XMefi7Cn6Z1Y79VmNIGDWoxJoLz1xAPVZgb8raJynOin2/N +x1ef9P7y/sX70rX3YsPQRIxg8GpS02U8AI0H6JKpUTkJdRqTMNCRtAek6/V5 +Oi9FF7Tr0PR6cOvAtHrwy5j0enCrcOh6hld/GMwEyxjek2AwU2omHg9nVDSx +VVIP6jIqmtguyVtwnJom7iXyJginpgn6qVuwNX3wvR1KaZ2LfHz/GQIqssEE +HmC0H2k0XZr7+REbQ5Mw+lkSnZlWs3muxAHTfsC6GROHnagX4ryJUxUy/FGr +IsQ/TlXIAEio6mYOxeKn2crNTIrFT6yfOp9i1TW9tZJnVay6prdX+tyKV9n0 +PUafYfEqm6TPsvIbbOkv/hm+fy7CH98jb0ixN6SNJyRfbfPVMku7sCu27aGA +T1mlqlxl0br0ijxrkPeFjNMyVblX6Hn4IctpxF40UmufiK8NomW3lw7aHN7/ +9jkzrv+OIz8sYauHld/vkNXlGj2BCv50XkN+boNPPUHd7TN5rCu0YUM3DAjY +ckK55YRyryaUezWh3KQzNz7hYn9gXiWDB2ACMjQAE6DBARiP7QrAeERAACaC +AgIwFhlnqQ+7eMRYvs/MI0XN36I17K1j1pDG7VkoUIvx+cqFcUtjVBq2WSAg +d7NAMA5pCteMMb2bC0e+gDaegD58SfRhkQh7GOWHObQT0D68ogEHChoyaKCg +QcMGChK2daAgIboGCjqoa6AoygnPsxTuiXr6OLkDze0K9zydAS2nk1pOJ/Vq +OqlX00mNPW5nD5ZYtA+vaPBgSUCGBksCNDhY4rFdwRKPCAiWRFBAsJzqzNzP +Tf4a/v6ZGK8qdQSBnaCTYmV+rvjdVQsMBCsJ6j47CsQpyby7384RBYTilOW1 +O3/OeemaCEeSDv+OMRHOJR3gEuEIqRH16ynM64BtdbsBjRsnBeCkABwFwFEA +nCp4SgEqM9CD8BUSX+HwWcEMiUOQBxdHkDgIeVJP8qSe5FGe5FFcea4hm21A +ICCKRGQVgYAwEilfEvnqNeWr15SvXlO+ek2xe83TNIKMSJWRMJkgI0JkfOsj +ny9FIiHJUvJUiYRESKn861L516Xyr0vlT5dvqHkVYLabRD/MBc2O6luuzpdD +x9MxJJmJPwPx59HW9DuEW29Nv7u5w/gYnfMGNGdju8u7f/1rTfZf//0tDH6b +DSmv+RRs5ee78UYKuU7z5sn3eGMiaR/I6lNbGMkORSHzwRX7tmYTdSJ3Ub7O +jOUyT+xSNTlo3ApqHkDvBdpXxZDwt+Ce9Pwyd2lDu5CEK4va14eoSMJoqQoX +SfyfQ1pIO1EhM9muIY0T7Q+FDL/SojpEmYmmkK3NmYqthV8yDuO4UoWVoBYx +P+ytJA6IpG5mJYWbZOYmsVI45MilFo7ymaN8rDg55dGuDrwmd10fj+FeFqXK +oyytTuHX0xhVma7zZVplt7HjX//9v+9/+/mXp78MGEZ+q+197FfR/zoKepOM +xPDz0/jv49BD6YY/G+pcjP/8PP7zy/jPr+M/v11/Dp7E6zmB0qwqmuQiiayi +NKvHrHBfNKd/s3r28f6QmeWBG80pmuHx2+/BU5u0PVqmX0GTL0c2J+S7VE7v +N7ZgY30JwzZJPpE3zqKyJPLum5VuCu9bzavSvHlFEc/d5AxZNefjryMOnr1M +ye1usmd87Rj8s1b6JGpSW0QZBWGoPRXu5G4J1mPNus4P7WMBs/bNzHCVZvKc +bXvkHPZo0HFjjR6zhmK9nrHatz29CDcGiJdw5FlGK0OznjicjNkp+5VNF6WC +Ut7dZcNQayf1IPeRi3SQ98hBKtqba+GyGTelW+RbcqfMYpjByEU7zF7koh1m +LnLRdp1XG6GDdnaTzMdN3C7ZWvwFhzDmIEAEy3IWDoEiw01KIwcxZNkUCUEQ +GbIMiISgSHG5q9l5392VTSS7RrO3tzPBvn/P4PT++W3qKgf1YriSZ3Co86IO +Fsi47uICul5KBKvogcOpo0X3hCO4jnsOdx3XC4NdHagrb1h03B03EPr+cCu7 +8ir7AB13i82FfpeIzUH+fJ+EzUH/0t+/75TT3uYn8QUUPk2sT0PqQ20/ELCo +YQG3mk8BY0qmfEqmfEjW5UewkxkzIQDZNJbtmt0ASOiUn5axAAjqM64SsxAA +QX1G0dd+9d1KNkgAAKV0dSf18jsU1aea+qf/nHS2J7HAzPjpLvnKPRjWp4OQ +r9GDYb32PmSB4nOQ5AZEph1kENf8HKzhjJPNLmQuC+7oHN/nHZ1DJx1d648A ++brVvnG6+dM5/2lcz/UcFJlaOyjUwaDU+bylmMVpbiVYaIscHYlZkI7ELMnr +WVZtFuWiOedA+0BosrzFhdC9zNdTXlasjP3bU7oWBHvKsxdDMBUQc5hAB0rp +0pJ7THykNGE+txsvS/MEsLkysBdP4Wcod/vqFBZyX0+3VVFLaoUsaYw/v7W5 +PkZW3a+lS2tpbC1NrKXSWrqylm6spam1dGstzaylO2tpaS2trKVf1lJtLT1a +S0+2UmvnW/ve2vXWnrd2vLXfrd1u7XVrp1v73Nrl1h63dri1v63dbe1tS2d/ +gnY/elLn7kdL+hMS8ntKBaWkr3AbIeCT7XsIxZdCMaTwsVRiB0MuSNyDKZ+S +KbZkv37vMmIt2hxbVmLohuM4tXZQO6btVzLSB+UIM9y6Xq6OLroNEKOUPa17 +J7Knde9E9rTuncie1r0T+drSAq3g9QoM2bbsifs9CqPaemLA3t5bF3ghxjhO +rV3UoL2kwYSyhv+8PquWvVvIx5LzztO8wjEFaCZNqUlDa5rXE3pZ1K6VpbvG +maL3ZqwLl6rZkkhIvM3hl7Us0LyiycpcfzY3T3Sf0Nyzhvs/h1RW4e/R72j2 +eS14uZdxGmXpH4SGL5rqE5mrYhdmqizxAFWan/La48KlXCnj+RUzwEsjQXMg +sW5BvqYoYVEH1D1a8tdac/IYxRWW8a2xFRkvFm8Uzp1KDpmicDY6wvbwkuEW +S4ZbLFluseS5xZLpFkuuWyy5brHku8WS5BZLqlssyW6xJLvFkuwWMcMtYoZb +xCy3iHluETPdIua6Rcx1i5jvFjHJLWKqW8Rkt4jJbhGT3SJhuEXCcIuE5RYJ +zy0SplskXLdIuG6R8N0iIblFQnWLhOwWCdktErJbSIZbSIZbSJZbSJ5bSKZb +SK5bSK5bSL5bSJJbSKpbSLJbSLJbSLJbrBhusWK4xYrlFiueW6yYbrHiusWK +6xYrvlusSG6xorrFiuwWK7JbrMhusWG4xYbhFhuWW2x4brFhusWG6xYbrlts ++G6xIbnFhuoWG7JbbMhusSG7Rcpwi5ThFinLLVKeW6RMt0i5bpFy3SLlu0VK +couU6hYp2S1SslukZLfYMtxiy3CLLcsttjy32DLdYst1iy3XLbZ8t9iS3GJL +dYst2S22ZLfYkt0iY7hFxnCLjOUWGc8tMqZbZFy3yLhukfHdIiO5RUZ1i4zs +FhnZLTKyW+wYbrFjuMWO5RY7nlvsmG6x47rFjusWO75b7EhusaO6xY7sFjuy +W+zIblEy3KJkuEXJcouS5xYl0y1KrluUXLco+W5RktyipLpFSXaLkuwWJdkt +KoZbVAy3qFhuUfHcomK6RcV1i4rrFhXfLSqSW1RUt6jIblGR3aIiu4VmuIVm +uIVmuYXmuYVmuoXmuoXmuoXmu4UmuYWmuoUmu4Umu4Umu8WR4RZHhlscWW5x +5LnFkekWR65bHLluceS7xZHkFkeqWxzJbnEku8WR7BYnhlucGG5xYrnFiecW +J6ZbnLhuceK6xYnvFieSW5yobnEiu8WJ7BYnsFsYczEB7moY8zHReDWjXuj9 +kiYh662u2stE4FBg44eEAxM/PCSYEBBhwQSBCQ0mDFR4MIJgQoQJBBkmDDCA +UGHghIULAzMwZFi4AWHDwu0KHU3OJHXJCz9CIp76q46ANK0mcu0iByWqO5P/ +giflvTAAsnX0lM5r3gNS1zXvntR9+XNA67z8OaB1Xv7saQF3NAfE7muXPTHk +amRPPUVCRQu6hyxHA3SEnYITKnb09nRGPZnPrGAjqD415sip1NM5DDO4Rgqg +SsX1GjM0VIjb1DrDy7g4hgDFoLE1aEgNwMy6ZgZTwJ51ARuYmdbC4a4DmJnW +wmGqY35OW5ZaS5W1FPRCIQxB0RB++U5pNwbrIaXdPayvyDUC6yN0vfa5wBC5 +QB/ZNJzt52diLYU9ZGiB+IUICkYG7WIABwUzh6sOVya3R0IXojPv2QilCbNL +FoHLjOziCyh8mlifBtTXfHCcJ/hRbd5FFJ8zoBg16OIz6fOagKLPoLeIsjrG +Y8j3mwhOHudVkcHJk3SdVnDydf0JuoGT17NM0yrEGPm+SHOEMG2nw8nLfRRL +OPlhv0fI/nx0aPLn52i2tXPR0lwUm4sSc5E0F63MRRtzUWou2pqLMnPRzlxU +mosqc9GXuUibi47motFsarNLutjx0W7Wv3cDm4MbGQIUg8bWoCE1kEfsDgIZ +ZZ182sU3q/kuCa3uaoRMZQA4GoVjzxXckTkH8BFKlyper09K+JhWj6B6mFWf +UX9RXpQbZQW+KDfGi3ibbZQd9TjaBcGZDnl2/iok6Wc+wgrVj4EX8nKciRfy +4t5s3mexRqfDG2OGx6vnEanBb/3NngdrEPjHiMzsGs7+i6bxMV5ku3+x3/ib +dbkVocslFg7t4HDmcx6hNGG+PFoNJo4Y2IFvPM5eR9gRbzzO3gaZD0lvsBkR +4H73gIDNtGpEoMuQshHw86UWYDwd86VsaSmLLWWJpUxaylaWso2lLLWUbS1l +maVsZykrLWWVpezLUqYtZUdL2clcZvpeOBemtsLxucH8acT9UbOQebd1Sc62 +bIYAm/4jhGJDKH5DFLEhPxmxzIhAkIH/7pcdC5eYucNyvCcyv+ylpdZSZS11 +JyQfUCooJXVseEBQdASmm/7iu+kvvo/94vvYr4meY7Sge/jY7dE9JHR3gFG8 +85efhO4OMKJkXhczRlB99G+3v9+/nXpGQ7iIGQPuI/Px70HIN8EYL+KTZJQd +8Ukynw/27fAf0mZ2jWJnLiICcHDy4HZxH9lgtV2fMeK23o0Dk2dkSYayuDB/ +fnQp0tvIFiBkUOu2jkEHEjty+1pyR+bzVcpHUA+nE+bnz+Xf685c54emc4NZ +WSWpsq5AULHQcvmZPFuxkPYCejWsfathFx3rWP2HZAPZH31wA+V7Vdp4S5zM +pROwauJEY6TteesGt7bKXXOOHc4lOi51qBxs/eN1wWvNlshVdMiqcB9Vtepy +MPPrtUoIx3AzUaQJiiVAsmh8LRpWy90DJzh67aYfe3mjvwiA5g2K5ipG7b+n +TKKZRWPc8rivlZRXBO7muDWVez6sO3gi8A9qx/MviihJj1iu5/YQRYllez2z +YaVcMkxjyTGNJcs0lizTWDJNY8k0jSXJNJY001hSTSNmmEbMMY2YZRoxyzRi +pmnETNOISaYR00wjpppGwjCNhGMaCcs0EpZpJEzTSJimkZBMI6GZRkI1Dckw +DckxDckyDckyDck0Dck0DUkyDUkzDUk1jRXDNFYc01ixTGPFMo0V0zRWTNNY +kUxjRTONFdU0NgzT2HBMY8MyjQ3LNDZM09gwTWNDMo0NzTQ2VNNIGaaRckwj +ZZlGyjKNlGkaKdM0UpJppDTTSKmmsWWYxpZjGluWaWxZprFlmsaWaRpbkmls +aaaxpZpGxjCNjGMaGcs0MpZpZEzTyJimkZFMI6OZRkY1jR3DNHYc09ixTGPH +Mo0d0zR2TNPYkUxjRzONHdU0SoZplBzTKFmmUbJMo2SaRsk0jZJkGiXNNEqq +aVQM06g4plGxTKNimUbFNI2KaRoVyTQqmmlUVNPQDNPQHNPQLNPQLNPQTNPQ +TNPQJNPQNNPQVNM4MkzjyDGNI8s0jizTODJN48g0jSPJNI400zhSTePEMI0T +xzROLNM4sUzjxDSNE9M0TiTTONFM4wQ0DWMWS6B5mPjBJmJMgwk2EzMC1FSM +WTAR5mLGgJuMKfGk22wMnBDTMeWsBJlPdygyeKqiZSadR8l6DtdJoguh+8jR +c6biKJOzKMsgdPO87goQYXPuCUJ4lhRCeU0pBKK9dAqEtktLACOuFVBFxclK +bDz17OoO47lnAOM1MROY1G0d7dFZ/M0FMCzu6gIIFn96GAyLOz/sgu0yIwZP +q0xF1SqVGZijjNM6IqYrh4X3HEGUbA8lphLhOH7aEc6i/R5K6fDennIp10DK ++FAAKWvPAlLKHKik2UYegZQqrqCUByDlPJMrIOlilR4lsFGLIl1voLhVcciB +an1eRskyraDEae4Ksz2xVCs4cvnvdK9LGHGbthQM/bJWCt7El3KjtOtseE99 +yNOqDjsw6tf2ponjFPyAvBGl+QNG3lyPk4hY8rZUKrPkgnugP2smzYGafGvz +ncVO+QepipzheZCsyEk7yMsBpwXgEm8OwnH9DsDUzMhwXL9DMDl9KwLYr4bJ +KVwRwB51/PPzx/fRxCDXwqWtMLYVJrZCaStc2Qo3tsLUVri1FWa2wp2tsLQV +VrbCL1uhthUebYWjiUIWXQ4KDxfEHWC4G2gPYMqnZJwL4gvRpXKA5Vs0MgQo +Bo2tQUNqwKXcdfGR6tPE+mztmz/c0Xw939EsT3lMuPQJBITf/Fw8X1/zGdw1 +C+Mo3rQrS829wObnP+pWl3gEDUPokm/T1guAoB4SHdyCerpp/Ajq4abx4noL +12v6iBFUH1q9RfWl1kdUD3p9voZwv7n7x2A9aPYe1pNqx2C96faXIV/Y8306 +IXQWHiMCOMHI80M2IUWH4F+1t2PhphMdls8g/Ajqzah/OTNqPV9SsngbfIGA +iJbMx9NhaCd51/K6QgctNcWlEQFu6h0CPS2QBYMlBykPigUI6WxdnhxcknAn +n8tyuo29cbKXLs/LBDnGLOg+osIdut8XyizoHga+ZgL4P7+Q2S6MTAGaSVNq +0tCaoFkvDBwawAHZvnwkBiF73sJ8BPa0wncL7HGF7xHY0wpfBwxaVx+hhvQf +aG19hBqE7Xt9fQTZt3V4X2MfQfZtH/7X2cegfWva/1r7GLRHXf/8dps4cRGG +K1XEMgkPuU7bLe1+5fY3YWN9a1llus77J0vt7P2BnlUUy8om5e3jQw7CPt+U +nVB0aROdpLP+1TMnLX3ia4YAz3uvEN0+p5XK8YZnR9a6uRvN/rzHmWx0b+dc +tDQXxeaixFwkzUUrc9HGXJSai7bmosxctDMXleaiylz0ZS7S5qKjuehkLPpp +yFt9KVS2QnqG6XsA+GLM/wPFE2sZASoDAA== + +--sDCecy9vEe +Content-Type: application/octet-stream +Content-Disposition: attachment; + filename="hppa-symbols.diff.gz" +Content-Transfer-Encoding: base64 + +H4sICCae70cCA2hwcGEtc3ltYm9scy5kaWZmAL1cbY/bNhL+fPkV/niFdxNT +si07wAG5pmrh21xirHNFcV8IWaJtbiRRlei3+/U3pLxavZAy5ZcWKZKVZp4h +Z4YzQ2q4szggh489n8Uruv7gLemHhGX8wyZJvMeQxtvD4zreflh6GYGfCM6O +0ZKF2Xt+4O/+cf1/7x4fHy+R/be/p2RHM8riHrJtx5r89K7f71+GtGfpDxqv +gTk5/vTu06feo20/oGmvD3+NUO/Tp3e9X//z9fNH/N+vaIAxAOGYpREa4S80 +4/DvgGCBPN7GG8Z+uLtPn377Mvv58x9/YPv90JDZgfmQNCOXcU946sXZiqTu +fDHAC4SrIP0CBOUgAVlu12iKF96KYMpJ6nGWSiRk4X/jNeE42nJyqI3m/bQT +1BigPM49f4MzUHBI3PnXBX6lzcifWxL7+QTc5bWCAlIW1Bh3zxxuWgzbeLxd +4fPB1m3dPuWK/HNm6nWAst+054Wh3gHPI02qdtiGXnoFnCXmCMvCC2ngcXIL +SAcQs72XuM+wTHTKH+YoiZeCOkgIOIRzkJ3ZoPC6qtHAiDMDzoZMwVwMfprz ++ocD2Bb8j0XUx14QuPPfKdVMuMTjYJwwFgoLMt/ARSxTlPEJZZUSgkOIOu7L +ZUhTac0VBRczQphgTA7+xovXBHtx0EEXk4wHlEGQj30M8sAbVjN/wRECtBRD +qKQ8m/muOxQvm4rpXwMpKNp85ALM0XYVsv2NQceHDKwau3Of3ho32QrcpxsD +OxkhP9hq5R7gpYVnLIPVRX4ENIWfbfnzt4TEEaTE28uFYsJd8OEK/p6JKBMt +My5CEnfvK3zCoCiQ1r+tMqfJ0vN/rDwa3hp4C+Vkegd//Yzc+QTPvuFfZ1/c +2yJb90L+ZXBjJfyCFICmEXBfB9xfHQGVkFdGQBXm1RFQBVpEwD29Ne4pAt4Y ++K+IgFq5f0UEVAl/i4AvN8UtRcAbA98iAqqAbxMBlcjWvZCvioBKQMMIaEH9 +CY6zZBm4KEkjGgtnhWIyCIncQAD7P3+eYfTeVrGP8+p19mU5cF3YeYttTcZT +dqxLH57nFtsrGlNOYR/zP9IEEAcOI/th3OtPRg9O5bhBBzmWpXRG0h1sa6C+ +/uG+vHQaF7pqVqg5q/mvu/nOraP0O6LUhjE+P4zX3QnfpMQLMA0umEquTT/0 +aHTS5tx/uRCl3SbC1BMkTI3QsGbrp+rudYzdNGUpXrE08ji4MNhrDvvKiMC/ +LWG7JKUxFxb0U5pwykTGaexqzeHtAnLP0kDU75cijQokcC7Y/Tax+sZYzuse +1jvgkMRrvmlb/+fgxPkEEY+0RwhPBqdNSBwinDuLMAEaApDvxdhnUeKlxH1u +2k94jDWQ0cGynbrLLLKJUEzWXDsVIrHzpkuypnEbFT3UMmHJTBCJB0tYX/yY +EBoM9xuPt5w5SXIKxoc/xItGaQDhvEU0UEcsJsdkG/t85p+CbkzWJ8t1ZoUi +RcMqT3cHcgna9rihT0g4IVtTP/cS1Tyr1BYWlqR5zoIK3aEZZlAGtTMVmgG1 +jBOfbWOuPQoUDLZQPTn4RC5zE+3bb6PS7mtNhlrDUVaHXXDyaatPOZReIu2V +Z0d7Mn1A44bBpjk0y2ONXsDC00s5D6iYOQDC9G0guCGcenxlb7B5ioYbL9vM +IFws9UiuH7qLMdZXX3WsTHJYphxnZQ+NkXLJmSE5EdSkldhYPyNshpOPEJlQ +n5VpG6GY6KRVH2UnHPkicIP/o0HAMGewORGfivyWJK9j6sCwTRK1FFnxopFY +1KMxagZhJ95GGPbQUpFTVsTKIn0qA5E7BqnAJTT8vOATCklApNl9pB/yjSUd +/jJJR32AmIhs4XsZN0gUBaAjdsLblJxNeZOIZJm3JiKkyu89PothowUVOMNi +8KKIyczYZRX7yr5KWZQDzFXeMpUpe2xPmt4y4TQiF6jWVul1/mTxyG9x8kul +6Qx5XuDOgzoz9Y6zF3eoqvTKtp8K28tPLgbGP2UmGIoyIaNBUXJkbR5xBkYU +L9ewE6YsChxnKLfMlvWAqi6xCBqnDpWXqO2lpdXugsodMznA2KDUhN2u+7yg +z9+x9sN1jWPZlSHoykC6Mqy6Mrx0ZQi7MkRdGXhXhkNXhqOO4c1xqPw0XP+W +piLw2ykgNqBRviKKyKJcF80VMbHzQ6QprIhhdUWwcUZinh7bfL+gsVpoRMDf +pxDpmt/3SupjcsMZZxDXZ/MnuVBYm7rL9MtO1EEnatKJOuxEHXWiPnSiPmqo +S4YRh8OXuU4VRG96cUh8uXMiazCW516WYzcCdmN/aHSU3BkAXQvQkhwKANoC +oEggFm4NRN1hl3dBDe6CSu6CuroL6stdUMO7oEZ3QeV3QT3cBfWoRe11Q83T +9p6eDRwmIPvrUZT1wX6BsDLmjuUeG9lDu1ESFCJZi0iDsqETjnUlTrn82NOz +sbgNqlmiWNjIBw1Bl7eHDG4PSW4PGd4eMro95OH2kEcdZEcn15VyilXeHfm6 +9aerAXUByJ7kRd/QsR4aNZ9RJTnOltso8dvGbYhDuDhYb9miGSPFkGX8tlLQ +EIezLPPbPmcaQ2l6OS9CUndvdodyku1S0QqWO8bQkqc3aISQgWOoU0p3x9Ck +ptwx9vRqpO6Ooca5yDG6tLhdhKRuausO1e4Yo7xkGUHJ0uIYNF5r/C7/zHdV +y9qFMnW9uhfCaVt0NR51Fi/bsH3kxUe/pc2mG6Ku0+1CPEhP4uPBXYwD2Gdw +c/dz8u6c0Xhg4H7a73x3dD+tTF2j5IVw2v7IdvfT413qfnrEy9xPi5e731IW +M1DH3cdKhn44Bv9DA3BEZzSoNz0UH1r37Wdib1TIiKrtdGuM1fcOh/LC47xx +sWlqxFzcxGp8WTfiV9+3NOPVXLc0Y26/bVmt+MfrLKQ+meCZuGdLUnCAl+fa +Z716q0Qrv9WRP2S+F+ZNaz6Df2W+bDQTw6bnuCxYCVIFsj0T+xC+1iw9tiSk +E+MIz6IklD2RsDviXhgCs78R+/avCzxaeT5p3CNzFANQ4OS8EseiQQXw3HRK +aClJQuAp0MCOBaQhzLQEU2jmhKTByte2k2+KHMWmqCJBmNpsLK0rvErZuspP +lFIpEKQWsvXSx7EXkdaoXWUcFYz5Y4Ox5YwOMPohi8kb6/Mc4E4/iPi69bkh +2ESAQR3K29Fgp1PvecrN5IxEBdpHE4jA05qZihaL1hOt4iO/7K1dp2wvW2sz +ly4NeEayF35P+QbL3vj6jeO+ismRHaVytYWhSOCZK5YHxLq4tjyqBiwhpGQN +wY6kBYTo7X4DeV5g2rL+CyDZ3BHQLBEd+G+j2ZkMYjiLKW89rKiRWsakzZUi +jT2xLbkmpzZ6QKhu7VKobV1pFULLkLCl7XbBp1jejphOJ8apt9wj1g6gT7/G +GJoUbMyvS8PGAGdScXXT/NYd01o4lciQGZmldKopsh8mvb41QM4DsstOBSXg +EJc7p3azlbuDUPT8XXQHnS53wf9iw/D8ZBoDBWwGgXcbk0NCfE4CWLo7V0s8 +EsQx2b/ei1FQ99+ooRo6nQ2+nniqjn2emyeJ3/H3ARa9qJAc7VrRUSq4lCJU +BwjdRfQqIvhGhOLXEL7TTtgRJkqOb8cZ6g4lqjwSLUZUD3jtAlTz7SigVxHw +Nlnpqq2zBW+kq6PIVNsENjLgjy+Nbr7yklqILPu8hKESggMC2TaCFDF/qjwv +lqrWDVUYF0PQuNMw8oVqD+QFi4E9hQVbWaeQxk4qzG+f5BcDtH2zZQa25Zit +cCp+gUEbAyrGmDJ5QJDS9UanAf3vrZBIdX/ChK0ucdrnpX6V6sRc4LpNMRW9 +lJy3aEXctejxRJ9uY9mkedZUVmGq09GJhkVewkFj68EWXiI6WKxyOIfk5MHa +8UUoVV0sxMERKmjQgAg3SoL14YATyIMsho0WP+LdoEL2KMk8n2VhVV/Fq4zG +ulfci63qu9IbDZNPaKh7xbKNEq4xutNzSEXK56BvlmqErCIW6F6lDcDi3eaY +MK4UFgZ6rpCt0UDNxdbK5zC6lQYsYXslCxhoo3uhfv5nqp4KWG2jexEqQwPO +6Dpe0jpc79vP/3I/f/84+Fj2SMXD9+hdX/XYKlGrgSt5owRSzyfKFxr4yiBr +dzSVz0cawWPNc0fzfKJ5Pi2eo4E1yWtDcVtlguXvQqKhuCWWpCIiyV+f06x0 +G+zGrK8zRhYwfp/JXxIGpbK3pDskSh0ibh2IAz7IiyvmVoNPGy9UA3KDcimz +H3pZpmOWl7qQLb/02JZtn5LuCVLo4LffRYEtb/aJo4S8156a9torDnE02KKP +39f18avyZkfsbncEmtj9EnZdy7AnWLHUJwHU+3saBxUdi1/i0cI7lbwE4sJb +Zq3z96r8tfOVFiV8n4lrmfJcWVW1NShzdTUp/w/5yo6/mlAAAA== + +--sDCecy9vEe-- \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2011-01-13 13:18:43
|
Revision: 2458 http://aperture.svn.sourceforge.net/aperture/?rev=2458&view=rev Author: mylka Date: 2011-01-13 13:18:37 +0000 (Thu, 13 Jan 2011) Log Message: ----------- a couple of minor fixes in ModelAccessData, coupled with a few additional checks in AccessDataTest Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/AccessData.java aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/base/ModelAccessData.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/AccessDataTest.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/TestModelAccessData.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/AccessData.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/AccessData.java 2010-12-15 12:35:27 UTC (rev 2457) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/AccessData.java 2011-01-13 13:18:37 UTC (rev 2458) @@ -80,7 +80,8 @@ public boolean isKnownId(String id); /** - * Stores information (a key-value pair) for the specified id. + * Stores information (a key-value pair) for the specified id. This operation does NOT imply + * {@link #touch(String)} nor {@link #touchRecursively(String)}. * * @param id The resource's ID. * @param key The info key. @@ -89,7 +90,8 @@ public void put(String id, String key, String value); /** - * Stores a reference relation between two resources, modeling e.g. a link. + * Stores a reference relation between two resources, modeling e.g. a link. This operation + * does NOT imply {@link #touch(String)} nor {@link #touchRecursively(String)}. * * @param id The referring resource's ID. * @param referredID The referred resource's ID. Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/base/ModelAccessData.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/base/ModelAccessData.java 2010-12-15 12:35:27 UTC (rev 2457) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/accessor/base/ModelAccessData.java 2011-01-13 13:18:37 UTC (rev 2458) @@ -309,7 +309,7 @@ Literal object = ModelUtil.createLiteral(model, value, dataType); add(ModelUtil.createStatement(model, subject, predicate, object)); } - add(model.createStatement(subject, timestamp, ModelUtil.createLiteral(model, timestampLong))); + //add(model.createStatement(subject, timestamp, ModelUtil.createLiteral(model, timestampLong))); } catch (ModelException e) { logger.error("Could not store info for ID " + id, e); @@ -339,7 +339,7 @@ public void remove(String id) { try { - remove(ModelUtil.createURI(model, id), null); + remove(ModelUtil.createURI(model, id)); } catch (ModelException e) { logger.error("Could not remove info about ID " + id, e); @@ -402,23 +402,19 @@ } } - private void remove(URI subject, URI predicate) { + private void remove(URI subject) { commit(); ClosableIterator<? extends Statement> iter = null; try { // remove the subtree - - if (predicate == null) { - // this means that we want to remove everything we know - iter = model.findStatements(subject, aggregates, Variable.ANY); - while (iter.hasNext()) { - URI child = iter.next().getObject().asURI(); - remove(child,null); - } + iter = model.findStatements(subject, aggregates, Variable.ANY); + while (iter.hasNext()) { + URI child = iter.next().getObject().asURI(); + remove(child); } // remove the current node - model.removeStatement(subject, predicate, (Node)null); + model.removeStatement(subject, null, (Node)null); // remove the link from the parent model.removeStatement(null, aggregates, subject); // note that the incoming referredID links are left alone @@ -432,6 +428,16 @@ } } } + + private void remove(URI subject, URI predicate) { + commit(); + try { + model.removeStatement(subject, predicate, (Node)null); + } + catch (ModelRuntimeException e) { + logger.error("Exception while removing statement", e); + } + } public Set getAggregatedIDs(String id) { commit(); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/AccessDataTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/AccessDataTest.java 2010-12-15 12:35:27 UTC (rev 2457) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/AccessDataTest.java 2011-01-13 13:18:37 UTC (rev 2458) @@ -35,8 +35,8 @@ protected static final String id3 = "file:file3"; protected static final String folderid1 = "file:folder1"; protected static final String folderid2 = "file:folder2"; - protected static final Set idset = new HashSet(Arrays.asList(id1,id2,id3,folderid1,folderid2)); - protected static final Set idset2 = new HashSet(Arrays.asList(folderid2,id3)); + protected static final Set<String> idset = new HashSet<String>(Arrays.asList(id1,id2,id3,folderid1,folderid2)); + protected static final Set<String> idset2 = new HashSet<String>(Arrays.asList(folderid2,id3)); protected static final String key1 = "key1"; protected static final String key2 = "key2"; protected static final String value1 = "value1"; @@ -68,6 +68,10 @@ accessData.put(id3, key1, value2); accessData.putAggregatedID(folderid2, id3); + for (String id : idset) { + accessData.touch(id); + } + assertEquals(5, accessDataToTest.getSize()); assertEquals(3, accessDataToTest.getAggregatedIDs(folderid1).size()); assertEquals(1, accessDataToTest.getAggregatedIDs(folderid2).size()); @@ -229,10 +233,8 @@ } // and check if their number is OK assertEquals(counter,idset.size()); - + // this should touch a resource - String newValue2 = accessDataToTest.get(id1,key2); - assertEquals(newValue2, value2); accessDataToTest.touch(id1); // after one resource has been touched, it should no longer appear on the @@ -256,12 +258,87 @@ accessDataToTest.clear(); } - public void testTouchRecursively() throws IOException { - + public void testNoImplicitTouch() throws IOException { + accessDataToTest.store(); + accessDataToTest.initialize(); + // at the beginning all resources are supposed to be untouched + int counter = 0; + Iterator iterator = accessDataToTest.getUntouchedIDsIterator(); + while (iterator.hasNext()) { + String id = (String)iterator.next(); + counter++; + // check if the untouched resources belong to the set + assertTrue(idset.contains(id)); + } + // and check if their number is OK + assertEquals(counter,idset.size()); + + accessDataToTest.put(id1,key2, value3); + String newValue2 = accessDataToTest.get(id1,key2); + assertEquals(newValue2, value3); + accessDataToTest.put(id1,key2, value2); + newValue2 = accessDataToTest.get(id1,key2); + assertEquals(newValue2, value2); + accessDataToTest.putAggregatedID(id1, id2); + accessDataToTest.removeAggregatedID(id1, id2); + accessDataToTest.putReferredID(id1, id2); + accessDataToTest.removeReferredID(id1, id2); + + /* + * get and put should not touch the resource + * putReferredId, putAggregatedId + * removeReferredId, removeAggregatedId + * getReferredIds, getAggregatedIds + * shouldn't touch the resource either, + * only explicit touch() can touch the resource + */ + counter = 0; + iterator = accessDataToTest.getUntouchedIDsIterator(); + while (iterator.hasNext()) { + String id = (String)iterator.next(); + counter++; + assertTrue(idset.contains(id)); + } + assertEquals(counter,idset.size()); // all ids are untouched + } + + public void testPutDoesntChangeAggregations() throws IOException { accessDataToTest.store(); accessDataToTest.initialize(); + int counter = 0; + Iterator iterator = accessDataToTest.getAggregatedIDs(folderid1).iterator(); + while (iterator.hasNext()) { + String id = (String)iterator.next(); + counter++; + } + assertEquals(3,counter); + + accessDataToTest.put(id1,key2, value3); + + counter = 0; + iterator = accessDataToTest.getAggregatedIDs(folderid1).iterator(); + while (iterator.hasNext()) { + String id = (String)iterator.next(); + counter++; + } + assertEquals(3,counter); + } + + public void testPutDoesntChangeOtherKeyValues() throws IOException { + accessDataToTest.store(); + accessDataToTest.initialize(); + assertEquals(value1, accessDataToTest.get(id1, key1)); + accessDataToTest.put(id1,key2, value3); + assertEquals(value1, accessDataToTest.get(id1, key1)); + } + + + public void testTouchRecursively() throws IOException { + accessDataToTest.store(); + + accessDataToTest.initialize(); // at the beginning all resources are supposed to be untouched int counter = 0; Iterator iterator = accessDataToTest.getUntouchedIDsIterator(); Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/TestModelAccessData.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/TestModelAccessData.java 2010-12-15 12:35:27 UTC (rev 2457) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/accessor/base/TestModelAccessData.java 2011-01-13 13:18:37 UTC (rev 2458) @@ -16,8 +16,6 @@ private Model model; - private ModelAccessData accessData; - public void setUp() throws ModelException, IOException { model = createModel(); super.setUp(new ModelAccessData(model)); @@ -33,4 +31,13 @@ model.open(); return model; } + + public void testPutDoesntChangeTheStatementCount() throws Exception { + accessDataToTest.store(); + accessDataToTest.initialize(); + long count1 = model.size(); + accessDataToTest.put(id1,key2, value3); + long count2 = model.size(); + assertEquals(count1,count2); + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2011-02-25 12:41:12
|
Revision: 2460 http://aperture.svn.sourceforge.net/aperture/?rev=2460&view=rev Author: mylka Date: 2011-02-25 12:41:06 +0000 (Fri, 25 Feb 2011) Log Message: ----------- [3185790] added exif dateTime predicates to the jpg extractor Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/jpg/JpgHeaderExtractor.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/jpg/JpgExtractorTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/jpg/JpgHeaderExtractor.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/jpg/JpgHeaderExtractor.java 2011-02-24 23:15:00 UTC (rev 2459) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/extractor/jpg/JpgHeaderExtractor.java 2011-02-25 12:41:06 UTC (rev 2460) @@ -10,6 +10,7 @@ import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; +import java.util.Date; import java.util.Locale; import org.ontoware.rdf2go.model.Model; @@ -124,6 +125,10 @@ addStringMetadataIfSet(ExifDirectory.TAG_FLASH_ENERGY, NEXIF.flashEnergy, exifDirectory); addStringMetadataIfSet(ExifDirectory.TAG_FLASHPIX_VERSION, NEXIF.flashpixVersion, exifDirectory); addStringMetadataIfSet(ExifDirectory.TAG_MAKE, NEXIF.make, exifDirectory); + + addDateMetadataIfSet(ExifDirectory.TAG_DATETIME, NEXIF.dateTime, exifDirectory); + addDateMetadataIfSet(ExifDirectory.TAG_DATETIME_DIGITIZED, NEXIF.dateTimeDigitized, exifDirectory); + addDateMetadataIfSet(ExifDirectory.TAG_DATETIME_ORIGINAL, NEXIF.dateTimeOriginal, exifDirectory); } @@ -139,6 +144,17 @@ if (entry != null) result.add(property, entry); } + + private void addDateMetadataIfSet(int tag, URI property, Directory directory) { + Date date; + try { + date = directory.getDate(tag); + if (date != null) + result.add(property, date); + } catch (MetadataException e) { + LoggerFactory.getLogger(getClass()).warn("Wrong date metadata",e); + } + } private double getRationalArrayValue(int tag, Directory directory) { if (!directory.containsTag(tag)) { Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/jpg/JpgExtractorTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/jpg/JpgExtractorTest.java 2011-02-24 23:15:00 UTC (rev 2459) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/extractor/jpg/JpgExtractorTest.java 2011-02-25 12:41:06 UTC (rev 2460) @@ -35,6 +35,23 @@ validate(container); container.dispose(); } + + /** + * Added for ticket 3185790 + * @throws Exception + */ + public void testTimestamps() throws Exception { + ExtractorFactory factory = new JpgExtractorFactory(); + Extractor extractor = factory.get(); + RDFContainer container = extract(DOCS_PATH + "jpg-exif-img_9367.JPG", extractor); + + checkUTCDate(NEXIF.dateTime, "2007-06-24T14:17:00Z", container); + checkUTCDate(NEXIF.dateTimeOriginal, "2007-06-24T14:17:00Z", container); + checkUTCDate(NEXIF.dateTimeDigitized, "2007-06-24T14:17:00Z", container); + + container.getModel().dump(); + container.dispose(); + } public void testZeroLength() throws ExtractorException, IOException { ExtractorFactory factory = new JpgExtractorFactory(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |