From: <my...@us...> - 2009-12-09 08:44:26
|
Revision: 2170 http://aperture.svn.sourceforge.net/aperture/?rev=2170&view=rev Author: mylka Date: 2009-12-09 08:44:10 +0000 (Wed, 09 Dec 2009) Log Message: ----------- oops, on previous commit i accidentally commented out all DOF tests, I haste to repair the damage Modified Paths: -------------- aperture/trunk/core/crawler/mail/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java Modified: aperture/trunk/core/crawler/mail/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java =================================================================== --- aperture/trunk/core/crawler/mail/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java 2009-12-09 08:36:08 UTC (rev 2169) +++ aperture/trunk/core/crawler/mail/src/test/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactoryTest.java 2009-12-09 08:44:10 UTC (rev 2170) @@ -50,718 +50,718 @@ */ public class DataObjectFactoryTest extends ApertureTestBase { -// /** -// * This method runs the data object factory over a simple email, with plain-text content written in -// * "normal" US-ASCII encoding without any non-ASCII characters. The factory should return exactly one data -// * object with the full-text correctly extracted. -// * @throws Exception -// */ -// public void testOrdinarySinglePartPlainTextEmail() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-thunderbird-1.5.eml"); -// DataObject obj = fac.getObject(); -// assertTrue(obj instanceof MessageDataObject); -// // there should only be one data object -// assertNoContentLost(fac, obj); -// URI emailUri = obj.getID(); -// assertEquals(emailUri.toString(), "uri:dummymailuri:mail-thunderbird-1.5.eml"); -// -// RDFContainer container = obj.getMetadata(); -// Model model = container.getModel(); -// -// /* -// * First we test the names of the sender and receiver -// */ -// Resource sender = findSingleObjectResource(model, emailUri, NMO.from); -// assertSingleValueProperty(model, sender, NCO.fullname, "Christiaan Fluit"); -// Set<Resource> senderEmailAddresses = findObjectResourceSet(model, sender, NCO.hasEmailAddress); -// Resource receiver = findSingleObjectResource(model, emailUri, NMO.to); -// assertSingleValueProperty(model, receiver, NCO.fullname, "Christiaan Fluit"); -// Set<Resource> receiverEmailAddresses = findObjectResourceSet(model, receiver, NCO.hasEmailAddress); -// -// /* -// * There was a problem with loosing information which address was the sender -// * and which was the receiver if both have the same name. This is to confirm that this problem -// * no longer occurs. -// */ -// assertEquals(1,senderEmailAddresses.size()); -// Iterator<Resource> it = senderEmailAddresses.iterator(); -// assertTrue(RDFTool.getSingleValueString(model, it.next(), NCO.emailAddress).equals("chr...@ad...")); -// -// assertEquals(1,receiverEmailAddresses.size()); -// it = receiverEmailAddresses.iterator(); -// assertTrue(RDFTool.getSingleValueString(model, it.next(), NCO.emailAddress).equals("Chr...@ad...")); -// assertNotSame(sender,receiver); // the sender and receiver are NOT the same resource -// -// testStandardMessageMetadata(model, emailUri, "iso-8859-1", "message/rfc822", -// "text/plain", "test subject", "15", "2006-02-20T13:47:14Z", "<43F...@ad...>"); -// -// // test the plain text content extraction -// String content = container.getString(NMO.plainTextMessageContent); -// assertEquals("test body\r\n--\r\n",content); -// -// -// validate(model); -// obj.dispose(); -// } -// -// /** -// * This method tests the behavior of the data object factory when it is confronted with a -// * multipart/alternative email message. Such a message contains the same content in both the html and the -// * plain text version. The desired behavior is to ignore the html part altogether and put in only the -// * plain text part. Thus only one data object should be returned, even though the message is composed of -// * three mime parts. -// * @throws Exception -// */ -// public void testMultipartAlternative() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-multipart-plain-html.eml"); -// DataObject obj = fac.getObject(); -// assertTrue(obj instanceof MessageDataObject); -// // there should only be one data object -// assertNoContentLost(fac, obj); -// URI emailUri = obj.getID(); -// assertEquals(emailUri.toString(), "uri:dummymailuri:mail-multipart-plain-html.eml"); -// RDFContainer container = obj.getMetadata(); -// Model model = container.getModel(); -// -// testSenderAndReceiver(model, emailUri, "SourceForge.net","su...@os...",null,"my...@us..."); -// -// /* -// * the charset and the content mime type are defined only in the plaintext part, the factory should -// * merge them with the overall message metadata -// */ -// testStandardMessageMetadata(model, emailUri, "iso-8859-1", "message/rfc822", -// "text/plain", "SourceForge.net needs your input on OSS development and support issues", -// "10251", "2006-10-25T12:50:02Z", "<CON...@sm...>"); -// -// // test the plain text content extraction -// String content = container.getString(NMO.plainTextMessageContent); -// // this is a sentence from the plaintext part -// assertTrue(content.contains("SourceForge.net is looking for open-source \"experts and opinion-leaders\"")); -// // the original message contains weird equality signs at the end of each plaintext line, they should -// // be filtered out -// assertFalse(content.contains("please help us ensure that they =")); -// // the content should not contain any HTML markup -// assertFalse(content.contains("<b>SourceForge.net</b> is looking for open-source \"experts and")); -// -// validate(model); -// obj.dispose(); -// } -// -// /** -// * This method tests how does the DataObjectFactory copes with a fairly complicated multipart/mixed message. -// * It is a message with two attachments - the first attachment is a PDF, the second attachment is another -// * message (forwarded). We should get three data objects. -// * @throws Exception -// */ -// public void testMultipartMixed() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-multipart-test.eml"); -// DataObject obj1 = fac.getObject(); -// assertTrue(obj1 instanceof MessageDataObject); -// DataObject obj2 = fac.getObject(); -// assertTrue(obj2 instanceof FileDataObject); -// DataObject obj3 = fac.getObject(); -// assertTrue(obj3 instanceof MessageDataObject); -// assertNoContentLost(fac,obj1,obj2,obj3); -// -// // first some test of the first message -// URI emailUri = obj1.getID(); -// assertEquals(emailUri.toString(), "uri:dummymailuri:mail-multipart-test.eml"); -// RDFContainer container1 = obj1.getMetadata(); -// Model model1 = container1.getModel(); -// testSenderAndReceiver(model1, emailUri, "Antoni My\u0142ka","ant...@gm...","aperture-devel","ape...@li..."); -// testStandardMessageMetadata(model1, emailUri, "iso-8859-2", "message/rfc822", "text/plain", -// "[Fwd: Re: [Aperture-devel] Developer's Checklists]", "36872", "2008-07-27T22:10:47Z", -// "<488...@gm...>"); -// String content = container1.getString(NMO.plainTextMessageContent); -// assertEquals("This is a test of a multipart message, that has some content and an \r\n" + -// "attached message, and a PDF attachment. Let's see how the MimeSubCrawler \r\n" + -// "will handler this.\r\n" + -// "\r\n" + -// "Antoni Mylka\r\n", -// content); -// validate(container1); -// obj1.dispose(); -// -// // then some tests of the attached message (which should reside in the third data object); -// assertEquals(obj3.getID().toString(), "uri:dummymailuri:mail-multipart-test.eml#2"); -// Model model3 = obj3.getMetadata().getModel(); -// testSenderAndReceiver(model3, obj3.getID(), "Leo Sauermann","leo...@df...","Antoni Mylka","ant...@gm..."); -// testStandardMessageMetadata(model3, obj3.getID(), "iso-8859-2", "message/rfc822", "text/plain", -// "Re: [Aperture-devel] Developer's Checklists", "1341", "2008-07-25T08:50:18Z", -// "<488...@df...>"); -// assertMessageContentContains("> http://aperture.wiki.sourceforge.net/DevelopersChecklists", obj3); -// assertMessageContentContains("> about all this, but it may nevertheless be interesting.", obj3); -// validate(model3); -// obj3.dispose(); -// -// // the second data object should be a FileDataObject containing a PDF -// assertTrue(obj2 instanceof FileDataObject); -// FileDataObject fobj2 = (FileDataObject)obj2; -// URI pdfUri = fobj2.getID(); -// RDFContainer container2 = fobj2.getMetadata(); -// Model model2 = container2.getModel(); -// InputStream contentStream = fobj2.getContent(); -// assertNotNull(contentStream); -// assertEquals(pdfUri.toString(), "uri:dummymailuri:mail-multipart-test.eml#1"); -// assertSingleValueProperty(model2, pdfUri, NIE.mimeType, "application/pdf"); -// assertSingleValueProperty(model2, pdfUri, NFO.fileName, "pdf-openoffice-2.0-writer.pdf"); -// -// assertMimeType("application/pdf", pdfUri, contentStream); -// -// // and the extractor -// Extractor extractor = new PdfExtractorFactory().get(); -// extractor.extract(pdfUri, contentStream, null, "application/pdf", container2); -// String contentString = container2.getString(NIE.plainTextContent); -// assertTrue(contentString.contains("This is an example document created with OpenOffice 2.0")); -// -// validate(container2); -// obj2.dispose(); -// } -// -// /** -// * This method tests the partUriDelimiter feature. It allows the user to customize the delimiter between the -// * uri of the message and the part identifiers -// * @throws Exception -// */ -// public void testPartUriDelimiter() throws Exception { -// InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mail-multipart-test.eml", this.getClass()); -// MimeMessage msg = new MimeMessage(null, stream); -// DataObjectFactory fac = new DataObjectFactory(msg,containerFactory,null,null, -// new URIImpl("mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/"), null,""); -// DataObject obj1 = fac.getObject(); -// DataObject obj2 = fac.getObject(); -// DataObject obj3 = fac.getObject(); -// assertNoContentLost(fac,obj1,obj2,obj3); -// -// URI emailUri = obj1.getID(); -// assertEquals(emailUri.toString(), "mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/"); -// obj1.dispose(); -// -// -// assertEquals(obj3.getID().toString(), "mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/2"); -// obj3.dispose(); -// -// URI pdfUri = obj2.getID(); -// assertEquals(pdfUri.toString(), "mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/1"); -// obj2.dispose(); -// } -// -// /** -// * A test for a message that has been taken from within a thread. It contains References: and In-Reply-To: -// * headers. It is a multipart/mixed message, whose first part is a multipart/alternative. An example of -// * the ingenuity of the SF mailing list software, they add the ads and the mailing list signature as -// * attachments. All the 'References:' and 'In-Reply-To:' headers should appear in the extracted RDF. -// * -// * @throws Exception -// */ -// public void testMessageInAThread() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-threaded.eml"); -// DataObject obj = fac.getObject(); -// assertTrue(obj instanceof MessageDataObject); -// RDFContainer metadata = obj.getMetadata(); -// Model model = metadata.getModel(); -// DataObject obj2 = fac.getObject(); -// DataObject obj3 = fac.getObject(); -// assertNoContentLost(fac,obj,obj2,obj3); -// obj2.dispose(); -// obj3.dispose(); -// -// assertEquals("<452...@df...>", findSingleObjectNode(model, metadata.getDescribedUri(), -// NMO.messageId).asLiteral().getValue()); -// assertReferencedEmails(obj, NMO.references, "<452...@df...>", -// "<452...@ad...>", "<452...@df...>", -// "<452...@ad...>"); -// assertReferencedEmails(obj, NMO.inReplyTo, "<452...@ad...>"); -// validate(obj); -// obj.dispose(); -// } -// -// /** -// * Tests whether the 'References:' and 'In-Reply-To:' are extracted correctly from a forwarded message. -// * The .eml file that is tested has a following structure -// * -// * <pre> -// * multipart/mixed -// * - plain text (my greeting) -// * - multipart/mixed (the forwarded message) -// * - multipart/alternative (the leo's reply) -// * - plain text - plain text content -// * - html text - the html text contet -// * - plain text - the sourceforge ad -// * - plain text - the sourceforge list signature -// * </pre> -// * -// * This structure should yield four data objects. (my greeting, leo's reply in plain text, the ad and the -// * signature). The second data object should have correct References: and In-Reply-To links. -// * @throws Exception -// */ -// public void testForwardedMessageWithReferecesAndInReplyToHeaders() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); -// DataObject myGreeting = fac.getObject(); -// assertTrue(myGreeting instanceof MessageDataObject); -// DataObject forwardedMsg = fac.getObject(); -// assertTrue(forwardedMsg instanceof MessageDataObject); -// DataObject sfAd = fac.getObject(); -// assertTrue(sfAd instanceof FileDataObject); -// DataObject sfSig = fac.getObject(); -// assertTrue(sfSig instanceof FileDataObject); -// assertNoContentLost(fac,myGreeting,forwardedMsg,sfAd,sfSig); -// -// assertMessageId("<48D...@po...>", myGreeting); -// assertMessageId("<46A...@df...>", forwardedMsg); -// assertMessageContentContains("A test message that contains a forwarded message",myGreeting); -// assertMessageContentContains("There are two concrete benefits to using XRIs identified",forwardedMsg); -// -// /* -// * The two last data objects are unnamed plain text message parts - attachments, they should be -// * interpreted as such i.e. no content in the metadata, everything is a plain text FileDataObject -// */ -// assertNull(sfAd.getMetadata().getString(NMO.plainTextMessageContent)); -// assertAsciiFileContentContains(sfAd, "This SF.net email is sponsored by: Splunk Inc."); -// -// assertNull(sfSig.getMetadata().getString(NMO.plainTextMessageContent)); -// assertAsciiFileContentContains(sfSig, "Aperture-devel mailing list"); -// -// assertReferencedEmails(forwardedMsg, NMO.references, "<46A...@df...>", -// "<216...@ma...>"); -// assertReferencedEmails(forwardedMsg, NMO.inReplyTo, -// "<216...@ma...>"); -// validate(myGreeting); -// myGreeting.dispose(); -// forwardedMsg.dispose(); -// sfAd.dispose(); -// sfSig.dispose(); -// } -// -// /** -// * Tests whether .xml files attached to the email are returned as separate FileDataObjects. Problems have -// * been reported with XML attachments being mistakenly returned as DataObjects with their entire content -// * being returned as NMO.messagePlainTextContent (with all the tags). -// * -// * @throws Exception -// */ -// public void testXmlAttachment() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-xml-attachment.eml"); -// DataObject mailContent = fac.getObject(); -// assertTrue(mailContent instanceof MessageDataObject); -// DataObject xmlAttachment = fac.getObject(); -// assertTrue(xmlAttachment instanceof FileDataObject); -// assertNoContentLost(fac, mailContent, xmlAttachment); -// assertMessageContentContains("test mail.", mailContent); -// testSenderAndReceiver(mailContent.getMetadata().getModel(), mailContent.getID(), "Christiaan Fluit", -// "chr...@ad...", null, "ch...@ad..."); -// assertEquals("line.xml",xmlAttachment.getMetadata().getString(NFO.fileName)); -// assertMimeType("text/xml", new URIImpl("uri:line.xml"), ((FileDataObject)xmlAttachment).getContent()); -// validate(mailContent); -// mailContent.dispose(); -// xmlAttachment.dispose(); -// } -// -// /** -// * <p> -// * Tests whether .txt files attached to the email are returned as separate FileDataObjects instead of as -// * DataObjects with their content already extracted. -// * </p> -// * -// * The MIME structure of this .eml file is: -// * -// * <pre> -// * multipart/mixed -// * text/plain - body text -// * text/plain, filename="attachment.txt" - attachment text -// * </pre> -// * -// * <p> -// * Obviously, it should yield two DataObjects. The second one should be a FileDataObject. -// * </p> -// * -// * @throws Exception -// */ -// public void testPlainTextAttachment() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-plaintext-attachment.eml"); -// DataObject mail = fac.getObject(); -// assertTrue(mail.getMetadata().getString(NMO.plainTextMessageContent).contains("Example body text.")); -// DataObject attachment = fac.getObject(); -// assertNoContentLost(fac, mail,attachment); -// assertTrue(attachment instanceof FileDataObject); -// String content = IOUtil.readString(((FileDataObject)attachment).getContent()); -// assertTrue(content.contains("test attachment")); -// assertEquals("attachment.txt",attachment.getMetadata().getString(NFO.fileName)); -// assertTrue(attachment.getMetadata().getAll(RDF.type).contains(NFO.Attachment)); -// validate(attachment); -// mail.dispose(); -// attachment.dispose(); -// } -// -// /** -// * <p> -// * The .eml file tested in this test has been submitted with a problem. -// * </p> -// * -// * <p> -// * It has a following mime structure: -// * -// * <pre> -// * multipart/mixed -// * - plain text (the text of the message) -// * - plain text (a text/plain attachment with a name: ConfigFilePanel.java) -// * - plain text (a text/plain attachment without a name) -// * - plain text (a text/plain attachment without a name) -// * </pre> -// * -// * </p> -// * -// * <p> -// * <ol> -// * <li>Four data objects should be returned, a MessageDataObject for the first part, and three FileDataObjects -// * for three subsequent parts.</li> -// * <li>The second dataobject should contain a filename.</li> -// * <li>The plaintext attachment should NOT be reported as messages.</li> -// * <li>The unnamed file attachments should NOT get a contentCreated -// * property.</li> -// * </ol> -// * -// * </p> -// * -// * @throws Exception -// */ -// public void testUnsupportedOperationException() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-UnsupportedOperationException.eml"); -// DataObject mail = fac.getObject(); -// assertTrue(mail.getMetadata().getString(NMO.plainTextMessageContent).contains( -// "I've attached my .java file")); -// assertTrue(mail instanceof MessageDataObject); -// -// DataObject javaAttachment = fac.getObject(); -// assertTrue(javaAttachment instanceof FileDataObject); -// assertEquals("ConfigFilePanel.java",javaAttachment.getMetadata().getString(NFO.fileName)); -// assertFalse(javaAttachment.getMetadata().getAll(RDF.type).contains(NMO.Message)); -// -// DataObject firstUnnamedAttachment = fac.getObject(); -// assertTrue(firstUnnamedAttachment instanceof FileDataObject); -// assertNull(firstUnnamedAttachment.getMetadata().getString(NFO.fileName)); -// assertNull(firstUnnamedAttachment.getMetadata().getDate(NIE.contentCreated)); -// assertFalse(firstUnnamedAttachment.getMetadata().getAll(RDF.type).contains(NMO.Message)); -// -// DataObject secondUnnamedAttachment = fac.getObject(); -// assertTrue(secondUnnamedAttachment instanceof FileDataObject); -// assertNull(secondUnnamedAttachment.getMetadata().getString(NFO.fileName)); -// assertNull(secondUnnamedAttachment.getMetadata().getDate(NIE.contentCreated)); -// assertFalse(secondUnnamedAttachment.getMetadata().getAll(RDF.type).contains(NMO.Message)); -// -// for (RDFContainer cont : containerFactory.returnedContainers.values()) { -// validate(cont); -// } -// assertNoContentLost(fac, mail, javaAttachment, firstUnnamedAttachment, secondUnnamedAttachment); -// mail.dispose(); -// javaAttachment.dispose(); -// firstUnnamedAttachment.dispose(); -// secondUnnamedAttachment.dispose(); -// } -// -// /** -// * Tests whether non-text attachments have a charset property. They should not. (issue 2278007). -// * @throws Exception -// */ -// public void testSuperfluousCharsets() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-multipart-test.eml"); -// DataObject email = fac.getObject(); -// DataObject pdf = fac.getObject(); -// DataObject forwardedEmail = fac.getObject(); -// assertNoContentLost(fac,email, pdf, forwardedEmail); -// assertNull(pdf.getMetadata().getString(NIE.characterSet)); -// email.dispose(); -// pdf.dispose(); -// forwardedEmail.dispose(); -// } -// -// /** -// * Tests whether email, that don't have any explicit charset specified, get a default -// * us-ascii setting, according to RFC2045. The us-ascii will be converted to iso-8859-1, so -// * therefore we expect iso-8859-1 -// * @throws Exception -// */ -// public void testCorrectlyInferredRFC2045Charset() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-thunderbird-1.5-unspecifiedcharset.eml"); -// DataObject email = fac.getObject(); -// assertEquals("iso-8859-1",email.getMetadata().getString(NIE.characterSet)); -// assertNoContentLost(fac, email); -// email.dispose(); -// } -// -// /** -// * Tests whether plaintext attachments, that don't have any explicit charset specified, get a default -// * us-ascii setting, according to RFC2045. The us-ascii will be converted to iso-8859-1, so -// * therefore we expect iso-8859-1 -// * @throws Exception -// */ -// public void testCorrectlyInferredRFC2045CharsetPlaintextAttachment() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-plaintext-attachment.eml"); -// DataObject email = fac.getObject(); -// DataObject attachment = fac.getObject(); -// assertNoContentLost(fac, email, attachment); -// assertEquals("iso-8859-1",attachment.getMetadata().getString(NIE.characterSet)); -// email.dispose(); -// attachment.dispose(); -// } -// -// /** -// * Tests whether the @link {@link DataObjectFactory#getObjectAndDisposeAllOtherObjects(String)} works -// * correctly. The method will process an email that yields four data objects, will try to obtain the third -// * one, and then will check if all others have been disposed already. -// * -// * @throws Exception -// */ -// public void testGetObjectAndDisposeAllOther() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); -// DataObject object = fac.getObjectAndDisposeAllOtherObjects("uri:dummymailuri:mail-forwarded-references.eml#1-1"); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-1").getModel().isOpen()); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-2").getModel().isOpen()); -// assertAsciiFileContentContains(object, "This SF.net email is sponsored by: Splunk Inc."); -// assertNoContentLost(fac, object); -// object.dispose(); -// } -// -// /** -// * <p> -// * Tests whether the @link {@link DataObjectFactory#getObject(String)} works correctly. The method will -// * process an email that originally yields four data objects, will try to obtain the third one, and then -// * will check that no other object has been disposed in the process. -// * </p> -// * -// * <p> -// * The second part of the text will call the {@link DataObjectFactory#disposeRemainingObjects()} method -// * and will check if all models have been properly disposed. -// * </p> -// * -// * @throws Exception -// */ -// public void testGetObjectString() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); -// DataObject object = fac.getObject("uri:dummymailuri:mail-forwarded-references.eml#1-1"); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-1").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-2").getModel().isOpen()); -// assertAsciiFileContentContains(object, "This SF.net email is sponsored by: Splunk Inc."); -// fac.disposeRemainingObjects(); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-1").getModel().isOpen()); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-2").getModel().isOpen()); -// assertNoContentLost(fac, object); -// object.dispose(); -// assertFalse(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-1").getModel().isOpen()); -// } -// -// /** -// * <p> -// * Tests whether the {@link DataObjectFactory#getAllDataObjects()} works correctly. The method will -// * process an email that originally yields four data objects, will try to obtain the map with all of them, and then -// * will check that no object has been disposed in the process. -// * </p> -// * -// * <p> -// * The second part of the text will call the {@link DataObjectFactory#getObject()} method -// * and will check if it returns null. -// * </p> -// * -// * @throws Exception -// */ -// public void testGetAllDataObjects() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); -// Map<URI, DataObject> objects = fac.getAllDataObjects(); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-1").getModel().isOpen()); -// assertTrue(containerFactory.returnedContainers.get("uri:dummymailuri:mail-forwarded-references.eml#1-2").getModel().isOpen()); -// URI u1 = new URIImpl("uri:dummymailuri:mail-forwarded-references.eml"); -// URI u2 = new URIImpl("uri:dummymailuri:mail-forwarded-references.eml#1"); -// URI u3 = new URIImpl("uri:dummymailuri:mail-forwarded-references.eml#1-1"); -// URI u4 = new URIImpl("uri:dummymailuri:mail-forwarded-references.eml#1-2"); -// assertTrue(objects.containsKey(u1)); -// assertTrue(objects.containsKey(u2)); -// assertTrue(objects.containsKey(u3)); -// assertTrue(objects.containsKey(u4)); -// assertAsciiFileContentContains(objects.get(u3),"This SF.net email is sponsored by: Splunk Inc."); -// assertNoContentLost(fac, objects.get(u1), objects.get(u2), objects.get(u3), objects.get(u4)); -// objects.get(u1).dispose(); -// objects.get(u2).dispose(); -// objects.get(u3).dispose(); -// objects.get(u4).dispose(); -// } -// -// /** -// * Tests if the receivedDate property is extracted properly. -// * @throws IOException -// * @throws MessagingException -// */ -// public void testReceivedDate() throws MessagingException, IOException { -// DataObjectFactory fac = wrapEmail("mail-threaded.eml"); -// DataObject o1 = fac.getObject(); -// -// assertTrue(DateUtil.dateTimeEqualToUTCString(o1.getMetadata().getDate(NMO.sentDate), "2006-10-09T15:09:58Z")); -// assertTrue(DateUtil.dateTimeEqualToUTCString(o1.getMetadata().getDate(NMO.receivedDate), "2006-10-09T15:10:51Z")); -// -// fac.disposeRemainingObjects(); -// assertNoContentLost(fac, o1); -// o1.dispose(); -// } -// -// /** -// * Tests an issue reported by a user. The .eml file tested with this method yielded a DataObject without -// * the nie:plainTextContent property. -// */ -// public void testMultipartRelatedBug() throws Exception { -// DataObjectFactory fac = wrapEmail("mail-multipart-related-bug.eml"); -// DataObject o1 = fac.getObject(); -// DataObject o2 = fac.getObject(); -// assertNoContentLost(fac, o1,o2); -// -// checkStatement(NMO.plainTextMessageContent, "Body text", o1.getMetadata()); -// -// o1.dispose(); -// o2.dispose(); -// } -// -// /** -// * A single-part email encrypted with Enigmail OpenPGP without the "Use PGP/MIME" checkbox. -// * it used a GnuPG certificate generated with the Gnome Seahorse application -// */ -// public void testPgpEncryptedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-pgp-encrypted.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(NFO.encryptionStatus, NFO.encryptedStatus, o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * A single-part email encrypted with Enigmail OpenPGP without the "Use PGP/MIME" checkbox. -// * it used a GnuPG certificate generated with the Gnome Seahorse application -// */ -// public void testPgpMimeEncryptedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-encrypted.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(NFO.encryptionStatus, NFO.encryptedStatus, o1.getMetadata()); -// checkStatement(NFO.fileName, "encrypted.asc", o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * A single-part email encrypted with plain Thunderbird S/MIME encryption, using a PEM certificate -// * generated with OpenSSL. -// */ -// public void testSMimeEncryptedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-smime-encrypted.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(NFO.encryptionStatus, NFO.encryptedStatus, o1.getMetadata()); -// checkStatement(NFO.fileName, "smime.p7m", o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * An encrypted and signed email, is an encrypted email -// */ -// public void testPgpEncryptedSignedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-pgp-encrypted-signed.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(NFO.encryptionStatus, NFO.encryptedStatus, o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * An encrypted and signed email, is an encrypted email -// */ -// public void testPgpMimeEncryptedSignedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-encrypted-signed.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(NFO.encryptionStatus, NFO.encryptedStatus, o1.getMetadata()); -// checkStatement(NFO.fileName, "encrypted.asc", o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * An encrypted and signed email, is an encrypted email -// */ -// public void testSMimeEncryptedSignedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-smime-encrypted-signed.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(NFO.encryptionStatus, NFO.encryptedStatus, o1.getMetadata()); -// checkStatement(NFO.fileName, "smime.p7m", o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * A single-part email signed with Enigmail OpenPGP without the "Use PGP/MIME" checkbox. -// * it used a GnuPG certificate generated with the Gnome Seahorse application -// */ -// public void testPgpSignedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-pgp-signed.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(APERTURE_NIE_EXTENSIONS.hasSignature,Boolean.toString(true),o1.getMetadata()); -// checkStatement(APERTURE_NIE_EXTENSIONS.signatureContainedIn, o1.getID(), o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * A single-part email signed with Enigmail OpenPGP without the "Use PGP/MIME" checkbox. -// * it used a GnuPG certificate generated with the Gnome Seahorse application -// */ -// public void testPgpMimeSignedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-pgpmime-signed.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// DataObject o2 = fac.getObject(); -// assertNotNull(o2); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(APERTURE_NIE_EXTENSIONS.hasSignature,Boolean.toString(true),o1.getMetadata()); -// checkStatement(APERTURE_NIE_EXTENSIONS.signatureContainedIn, o2.getID(), o1.getMetadata()); -// o1.dispose(); -// } -// -// /** -// * A single-part email encrypted with plain Thunderbird S/MIME encryption, using a PEM certificate -// * generated with OpenSSL. -// */ -// public void testSMimeSignedEmail() throws MessagingException, IOException, ModelException { -// DataObjectFactory fac = wrapEmail("encrypted/mail-smime-signed.eml"); -// DataObject o1 = fac.getObject(); -// assertNotNull(o1); -// DataObject o2 = fac.getObject(); -// assertNotNull(o2); -// assertNull(fac.getObject()); -// assertTrue(o1 instanceof MessageDataObject); -// checkStatement(APERTURE_NIE_EXTENSIONS.hasSignature,Boolean.toString(true),o1.getMetadata()); -// checkStatement(APERTURE_NIE_EXTENSIONS.signatureContainedIn, o2.getID(), o1.getMetadata()); -// o1.dispose(); -// o2.dispose(); -// } + /** + * This method runs the data object factory over a simple email, with plain-text content written in + * "normal" US-ASCII encoding without any non-ASCII characters. The factory should return exactly one data + * object with the full-text correctly extracted. + * @throws Exception + */ + public void testOrdinarySinglePartPlainTextEmail() throws Exception { + DataObjectFactory fac = wrapEmail("mail-thunderbird-1.5.eml"); + DataObject obj = fac.getObject(); + assertTrue(obj instanceof MessageDataObject); + // there should only be one data object + assertNoContentLost(fac, obj); + URI emailUri = obj.getID(); + assertEquals(emailUri.toString(), "uri:dummymailuri:mail-thunderbird-1.5.eml"); + + RDFContainer container = obj.getMetadata(); + Model model = container.getModel(); + + /* + * First we test the names of the sender and receiver + */ + Resource sender = findSingleObjectResource(model, emailUri, NMO.from); + assertSingleValueProperty(model, sender, NCO.fullname, "Christiaan Fluit"); + Set<Resource> senderEmailAddresses = findObjectResourceSet(model, sender, NCO.hasEmailAddress); + Resource receiver = findSingleObjectResource(model, emailUri, NMO.to); + assertSingleValueProperty(model, receiver, NCO.fullname, "Christiaan Fluit"); + Set<Resource> receiverEmailAddresses = findObjectResourceSet(model, receiver, NCO.hasEmailAddress); + + /* + * There was a problem with loosing information which address was the sender + * and which was the receiver if both have the same name. This is to confirm that this problem + * no longer occurs. + */ + assertEquals(1,senderEmailAddresses.size()); + Iterator<Resource> it = senderEmailAddresses.iterator(); + assertTrue(RDFTool.getSingleValueString(model, it.next(), NCO.emailAddress).equals("chr...@ad...")); + + assertEquals(1,receiverEmailAddresses.size()); + it = receiverEmailAddresses.iterator(); + assertTrue(RDFTool.getSingleValueString(model, it.next(), NCO.emailAddress).equals("Chr...@ad...")); + assertNotSame(sender,receiver); // the sender and receiver are NOT the same resource + + testStandardMessageMetadata(model, emailUri, "iso-8859-1", "message/rfc822", + "text/plain", "test subject", "15", "2006-02-20T13:47:14Z", "<43F...@ad...>"); + + // test the plain text content extraction + String content = container.getString(NMO.plainTextMessageContent); + assertEquals("test body\r\n--\r\n",content); + + + validate(model); + obj.dispose(); + } /** + * This method tests the behavior of the data object factory when it is confronted with a + * multipart/alternative email message. Such a message contains the same content in both the html and the + * plain text version. The desired behavior is to ignore the html part altogether and put in only the + * plain text part. Thus only one data object should be returned, even though the message is composed of + * three mime parts. + * @throws Exception + */ + public void testMultipartAlternative() throws Exception { + DataObjectFactory fac = wrapEmail("mail-multipart-plain-html.eml"); + DataObject obj = fac.getObject(); + assertTrue(obj instanceof MessageDataObject); + // there should only be one data object + assertNoContentLost(fac, obj); + URI emailUri = obj.getID(); + assertEquals(emailUri.toString(), "uri:dummymailuri:mail-multipart-plain-html.eml"); + RDFContainer container = obj.getMetadata(); + Model model = container.getModel(); + + testSenderAndReceiver(model, emailUri, "SourceForge.net","su...@os...",null,"my...@us..."); + + /* + * the charset and the content mime type are defined only in the plaintext part, the factory should + * merge them with the overall message metadata + */ + testStandardMessageMetadata(model, emailUri, "iso-8859-1", "message/rfc822", + "text/plain", "SourceForge.net needs your input on OSS development and support issues", + "10251", "2006-10-25T12:50:02Z", "<CON...@sm...>"); + + // test the plain text content extraction + String content = container.getString(NMO.plainTextMessageContent); + // this is a sentence from the plaintext part + assertTrue(content.contains("SourceForge.net is looking for open-source \"experts and opinion-leaders\"")); + // the original message contains weird equality signs at the end of each plaintext line, they should + // be filtered out + assertFalse(content.contains("please help us ensure that they =")); + // the content should not contain any HTML markup + assertFalse(content.contains("<b>SourceForge.net</b> is looking for open-source \"experts and")); + + validate(model); + obj.dispose(); + } + + /** + * This method tests how does the DataObjectFactory copes with a fairly complicated multipart/mixed message. + * It is a message with two attachments - the first attachment is a PDF, the second attachment is another + * message (forwarded). We should get three data objects. + * @throws Exception + */ + public void testMultipartMixed() throws Exception { + DataObjectFactory fac = wrapEmail("mail-multipart-test.eml"); + DataObject obj1 = fac.getObject(); + assertTrue(obj1 instanceof MessageDataObject); + DataObject obj2 = fac.getObject(); + assertTrue(obj2 instanceof FileDataObject); + DataObject obj3 = fac.getObject(); + assertTrue(obj3 instanceof MessageDataObject); + assertNoContentLost(fac,obj1,obj2,obj3); + + // first some test of the first message + URI emailUri = obj1.getID(); + assertEquals(emailUri.toString(), "uri:dummymailuri:mail-multipart-test.eml"); + RDFContainer container1 = obj1.getMetadata(); + Model model1 = container1.getModel(); + testSenderAndReceiver(model1, emailUri, "Antoni My\u0142ka","ant...@gm...","aperture-devel","ape...@li..."); + testStandardMessageMetadata(model1, emailUri, "iso-8859-2", "message/rfc822", "text/plain", + "[Fwd: Re: [Aperture-devel] Developer's Checklists]", "36872", "2008-07-27T22:10:47Z", + "<488...@gm...>"); + String content = container1.getString(NMO.plainTextMessageContent); + assertEquals("This is a test of a multipart message, that has some content and an \r\n" + + "attached message, and a PDF attachment. Let's see how the MimeSubCrawler \r\n" + + "will handler this.\r\n" + + "\r\n" + + "Antoni Mylka\r\n", + content); + validate(container1); + obj1.dispose(); + + // then some tests of the attached message (which should reside in the third data object); + assertEquals(obj3.getID().toString(), "uri:dummymailuri:mail-multipart-test.eml#2"); + Model model3 = obj3.getMetadata().getModel(); + testSenderAndReceiver(model3, obj3.getID(), "Leo Sauermann","leo...@df...","Antoni Mylka","ant...@gm..."); + testStandardMessageMetadata(model3, obj3.getID(), "iso-8859-2", "message/rfc822", "text/plain", + "Re: [Aperture-devel] Developer's Checklists", "1341", "2008-07-25T08:50:18Z", + "<488...@df...>"); + assertMessageContentContains("> http://aperture.wiki.sourceforge.net/DevelopersChecklists", obj3); + assertMessageContentContains("> about all this, but it may nevertheless be interesting.", obj3); + validate(model3); + obj3.dispose(); + + // the second data object should be a FileDataObject containing a PDF + assertTrue(obj2 instanceof FileDataObject); + FileDataObject fobj2 = (FileDataObject)obj2; + URI pdfUri = fobj2.getID(); + RDFContainer container2 = fobj2.getMetadata(); + Model model2 = container2.getModel(); + InputStream contentStream = fobj2.getContent(); + assertNotNull(contentStream); + assertEquals(pdfUri.toString(), "uri:dummymailuri:mail-multipart-test.eml#1"); + assertSingleValueProperty(model2, pdfUri, NIE.mimeType, "application/pdf"); + assertSingleValueProperty(model2, pdfUri, NFO.fileName, "pdf-openoffice-2.0-writer.pdf"); + + assertMimeType("application/pdf", pdfUri, contentStream); + + // and the extractor + Extractor extractor = new PdfExtractorFactory().get(); + extractor.extract(pdfUri, contentStream, null, "application/pdf", container2); + String contentString = container2.getString(NIE.plainTextContent); + assertTrue(contentString.contains("This is an example document created with OpenOffice 2.0")); + + validate(container2); + obj2.dispose(); + } + + /** + * This method tests the partUriDelimiter feature. It allows the user to customize the delimiter between the + * uri of the message and the part identifiers + * @throws Exception + */ + public void testPartUriDelimiter() throws Exception { + InputStream stream = ResourceUtil.getInputStream(DOCS_PATH + "mail-multipart-test.eml", this.getClass()); + MimeMessage msg = new MimeMessage(null, stream); + DataObjectFactory fac = new DataObjectFactory(msg,containerFactory,null,null, + new URIImpl("mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/"), null,""); + DataObject obj1 = fac.getObject(); + DataObject obj2 = fac.getObject(); + DataObject obj3 = fac.getObject(); + assertNoContentLost(fac,obj1,obj2,obj3); + + URI emailUri = obj1.getID(); + assertEquals(emailUri.toString(), "mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/"); + obj1.dispose(); + + + assertEquals(obj3.getID().toString(), "mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/2"); + obj3.dispose(); + + URI pdfUri = obj2.getID(); + assertEquals(pdfUri.toString(), "mime:zip:uri:dummymailuri:somefile.zip!/mail-multipart-test.eml!/1"); + obj2.dispose(); + } + + /** + * A test for a message that has been taken from within a thread. It contains References: and In-Reply-To: + * headers. It is a multipart/mixed message, whose first part is a multipart/alternative. An example of + * the ingenuity of the SF mailing list software, they add the ads and the mailing list signature as + * attachments. All the 'References:' and 'In-Reply-To:' headers should appear in the extracted RDF. + * + * @throws Exception + */ + public void testMessageInAThread() throws Exception { + DataObjectFactory fac = wrapEmail("mail-threaded.eml"); + DataObject obj = fac.getObject(); + assertTrue(obj instanceof MessageDataObject); + RDFContainer metadata = obj.getMetadata(); + Model model = metadata.getModel(); + DataObject obj2 = fac.getObject(); + DataObject obj3 = fac.getObject(); + assertNoContentLost(fac,obj,obj2,obj3); + obj2.dispose(); + obj3.dispose(); + + assertEquals("<452...@df...>", findSingleObjectNode(model, metadata.getDescribedUri(), + NMO.messageId).asLiteral().getValue()); + assertReferencedEmails(obj, NMO.references, "<452...@df...>", + "<452...@ad...>", "<452...@df...>", + "<452...@ad...>"); + assertReferencedEmails(obj, NMO.inReplyTo, "<452...@ad...>"); + validate(obj); + obj.dispose(); + } + + /** + * Tests whether the 'References:' and 'In-Reply-To:' are extracted correctly from a forwarded message. + * The .eml file that is tested has a following structure + * + * <pre> + * multipart/mixed + * - plain text (my greeting) + * - multipart/mixed (the forwarded message) + * - multipart/alternative (the leo's reply) + * - plain text - plain text content + * - html text - the html text contet + * - plain text - the sourceforge ad + * - plain text - the sourceforge list signature + * </pre> + * + * This structure should yield four data objects. (my greeting, leo's reply in plain text, the ad and the + * signature). The second data object should have correct References: and In-Reply-To links. + * @throws Exception + */ + public void testForwardedMessageWithReferecesAndInReplyToHeaders() throws Exception { + DataObjectFactory fac = wrapEmail("mail-forwarded-references.eml"); + DataObject myGreeting = fac.getObject(); + assertTrue(myGreeting instanceof MessageDataObject); + DataObject forwardedMsg = fac.getObject(); + assertTrue(forwardedMsg instanceof MessageDataObject); + DataObject sfAd = fac.getObject(); + assertTrue(sfAd instanceof FileDataObject); + DataObject sfSig = fac.getObject(); + assertTrue(sfSig instanceof FileDataObject); + assertNoContentLost(fac,myGreeting,forwardedMsg,sfAd,sfSig); + + assertMessageId("<48D...@po...>", myGreeting); + assertMessageId("<46A...@df...>", forwardedMsg); + assertMessageContentContains("A test message that contains a forwarded message",myGreeting); + assertMessageContentContains("There are two concrete benefits to using XRIs identified",forwardedMsg); + + /* + * The two last data objects are unnamed plain text message parts - attachments, they should be + * interpreted as such i.e. no content in the metadata, everything is a plain text FileDataObject + */ + assertNull(sfAd.getMetadata().getString(NMO.plainTextMessageContent)); + assertAsciiFileContentContains(sfAd, "This SF.net email is sponsored by: Splunk Inc."); + + assertNull(sfSig.getMetadata().getString(NMO.plainTextMessageContent)); + assertAsciiFileContentContains(sfSig, "Aperture-devel mailing list"); + + assertReferencedEmails(forwardedMsg, NMO.references, "<46A...@df...>", + "<216...@ma...>"); + assertReferencedEmails(forwardedMsg, NMO.inReplyTo, + "<216...@ma...>"); + validate(myGreeting); + myGreeting.dispose(); + forwardedMsg.dispose(); + sfAd.dispose(); + sfSig.dispose(); + } + + /** + * Tests whether .xml files attached to the email are returned as separate FileDataObjects. Problems have + * been reported with XML attachments being mistakenly returned as DataObjects with their entire content + * being returned as NMO.messagePlainTextContent (with all the tags). + * + * @throws Exception + */ + public void testXmlAttachment() throws Exception { + DataObjectFactory fac = wrapEmail("mail-xml-attachment.eml"); + DataObject mailContent = fac.getObject(); + assertTrue(mailContent instanceof MessageDataObject); + DataObject xmlAttachment = fac.getObject(); + assertTrue(xmlAttachment instanceof FileDataObject); + assertNoContentLost(fac, mailContent, xmlAttachment); + assertMessageContentContains("test mail.", mailContent); + testSenderAndReceiver(mailContent.getMetadata().getModel(), mailContent.getID(), "Christiaan Fluit", + "chr...@ad...", null, "ch...@ad..."); + assertEquals("line.xml",xmlAttachment.getMetadata().getString(NFO.fileName)); + assertMimeType("text/xml", new URIImpl("uri:line.xml"), ((FileDataObject)xmlAttachment).getContent()); + validate(mailContent); + mailContent.dispose(); + xmlAttachment.dispose(); + } + + /** + * <p> + * Tests whether .txt files attached to the email are returned as separate FileDataObjects instead of as + * DataObjects with their content already extracted. + * </p> + * + * The MIME structure of this .eml file is: + * + * <pre> + * multipart/mixed + * text/plain - body text + * text/plain, filename="attachment.txt" - attachment text + * </pre> + * + * <p> + * Obviously, it should yield two DataObjects. The second one should be a FileDataObject. + * </p> + * + * @throws Exception + */ + public void testPlainTextAttachment() throws Exception { + DataObjectFactory fac = wrapEmail("mail-plaintext-attachment.eml"); + DataObject mail = fac.getObject(); + assertTrue(mail.getMetadata().getString(NMO.plainTextMessageContent).contains("Example body text.")); + DataObject attachment = fac.getObject(); + assertNoContentLost(fac, mail,attachment); + assertTrue(attachment instanceof FileDataObject); + String content = IOUtil.readString(((FileDataObject)attachment).getContent()); + assertTrue(content.contains("test attachment")); + assertEquals("attachment.txt",attachment.getMetadata().getString(NFO.fileName)); + assertTrue(attachment.getMetadata().getAll(RDF.type).contains(NFO.Attachment)); + validate(attachment); + mail.dispose(); + attachment.dispose(); + } + + /** + * <p> + * The .eml file tested in this test has been submitted with a problem. + * </p> + * + * <p> + * It has a following mime structure: + * + * <pre> + * multipart/mixed + * - plain text (the text of the message) + * - plain text (a text/plain attachment with a name: ConfigFilePanel.java) + * - plain text (a text/plain attachment without a name) + * - plain text (a text/plain attachment without a name) + * </pre> + * + * </p> + * + * <p> + * <ol> + * <li>Four data objects should be returned, a MessageDataObject for the first part, and three FileDataObjects + * for three subsequent parts.</li> + * <li>The second dataobject should contain a filename.</li> + * <li>The plaintext attachment should NOT be reported as messages.</li> + * <li>The unnamed file attachments should NOT get a contentCreated + * property.</li> + * </ol> + * + * </p> + * + * @throws Exception + */ + public void testUnsupportedOperationException() throws Exception { + DataObjectFactory fac = wrapEmail("mail-UnsupportedOperationException.eml"); + DataObject mail = fac.getObject(); + assertTrue(mail.getMetadata().getString(NMO.plainTextMessageContent).contains( + "I've attached my .java file")); + assertTrue(mail instanceof MessageDataObject); + + DataObject javaAttachment = fac.getObject(); + assertTrue(javaAttachment instanceof FileDataObject); + assertEquals("ConfigFilePanel.java",javaAttachment.getMetadata().getString(NFO.fileName)); + assertFalse(javaAttachment.getMetadata().getAll(RDF.type).contains(NMO.Message)); + + DataObject firstUnnamedAttachment = fac.getObject(); + assertTrue(firstUnnamedAttachment instanceof FileDataObject); + assertNull(firstUnnamedAttachment.getMetadata().getString(NFO.fileName)); + assertNull(firstUnnamedAttachment.getMetadata().getDate(NIE.contentCreated)); + assertFalse(firstUnnamedAttachment.getMetadata().getAll(RDF.type).contains(NMO.Message)); + + DataObject secondUnnamedAttachment = fac.getObject(); + assertTrue(secondUnnamedAttachment instanceof FileDataObject); + assertNull(secondUnnamedAttachment.getMetadata().getString(NFO.fileName)); + assertNull(secondUnnamedAttachment.getMetadata().getDate(NIE.contentCreated)); + assertFalse(secondUnnamedAttachment.getMetadata().getAll(RDF.type).contains(NMO.Message)); + + for (RDFContainer cont : containerFactory.returnedContainers.values()) { + validate(cont); + } + assertNoContentLost(fac, mail, javaAttachment, firstUnnamedAttachment, secondUnnamedAttachment); + mail.dispose(); + javaAttachment.dispose(); + firstUnnamedAttachment.dispose(); + secondUnnamedAttachment.dispose(); + } + + /** + * Tests whether non-text attachments have a charset property. They should not. (issue 2278007). + * @throws Exception + */ + public void testSuperfluousCharsets() throws Exception { + DataObjectFactory fac = wrapEmail("mail-multipart-test.eml"); + DataObject email = fac.getObject(); + DataObject pdf = fac.getObject(); + DataObject forwardedEmail = fac.getObject(); + assertNoContentLost(fac,email, pdf, forwardedEmail); + assertNull(pdf.getMetadata().getString(NIE.characterSet)); + email.dispose(); + pdf.dispose(); + forwardedEmail.dispose(); + } + + /** + * Tests whether email, that don't have any explicit charset specified, get a default + * us-ascii setting, according to RFC2045. The us-ascii will be converted to iso-8859-1, so + * therefore we expec... [truncated message content] |