From: <my...@us...> - 2009-03-10 21:47:45
|
Revision: 1791 http://aperture.svn.sourceforge.net/aperture/?rev=1791&view=rev Author: mylka Date: 2009-03-10 21:47:34 +0000 (Tue, 10 Mar 2009) Log Message: ----------- a fix in the DataObjectFactory for the changes accidentaly introduced by Christiaan Modified Paths: -------------- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java Modified: aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-03-10 21:45:38 UTC (rev 1790) +++ aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-03-10 21:47:34 UTC (rev 1791) @@ -36,16 +36,15 @@ import org.ontoware.rdf2go.model.node.impl.URIImpl; import org.ontoware.rdf2go.vocabulary.RDF; import org.semanticdesktop.aperture.accessor.DataObject; -import org.semanticdesktop.aperture.accessor.MessageDataObject; import org.semanticdesktop.aperture.accessor.RDFContainerFactory; import org.semanticdesktop.aperture.accessor.base.FileDataObjectBase; -import org.semanticdesktop.aperture.accessor.base.MessageDataObjectBase; +import org.semanticdesktop.aperture.crawler.mail.base.MessageDataObjectBase; import org.semanticdesktop.aperture.datasource.DataSource; -import org.semanticdesktop.aperture.extractor.ExtractorException; -import org.semanticdesktop.aperture.extractor.util.HtmlParserUtil; +import org.semanticdesktop.aperture.helper.html.HtmlParserException; +import org.semanticdesktop.aperture.helper.html.HtmlParserUtil; import org.semanticdesktop.aperture.rdf.RDFContainer; +import org.semanticdesktop.aperture.rdf.util.ModelUtil; import org.semanticdesktop.aperture.subcrawler.SubCrawlerUtil; -import org.semanticdesktop.aperture.util.UriUtil; import org.semanticdesktop.aperture.vocabulary.NFO; import org.semanticdesktop.aperture.vocabulary.NIE; import org.semanticdesktop.aperture.vocabulary.NMO; @@ -681,7 +680,7 @@ try { HtmlParserUtil.parse(stream, null, extractor); } - catch (ExtractorException e) { + catch (HtmlParserException e) { return ""; } @@ -1308,7 +1307,7 @@ for (String header : headers) { String [] subheaders = header.split("\\s+"); for (String subheader : subheaders) { - Resource res = UriUtil.generateRandomResource(model); + Resource res = ModelUtil.generateRandomResource(model); model.addStatement(res,NMO.messageId,subheader); model.addStatement(res,RDF.type,NMO.Email); model.addStatement(metadata.getDescribedUri(),predicate,res); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2009-05-25 09:30:58
|
Revision: 1947 http://aperture.svn.sourceforge.net/aperture/?rev=1947&view=rev Author: mylka Date: 2009-05-25 09:30:50 +0000 (Mon, 25 May 2009) Log Message: ----------- added a check of getReceivedDate to the dataobjectfactory Modified Paths: -------------- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java Modified: aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-05-20 21:06:41 UTC (rev 1946) +++ aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-05-25 09:30:50 UTC (rev 1947) @@ -1477,14 +1477,23 @@ if (part instanceof MimeMessage) { MimeMessage mm = (MimeMessage)part; addObjectIfNotNull(NMO.sentDate, mm.getSentDate(), result); - String [] s = mm.getHeader("Received"); - if (s != null && s.length > 0) { - try { - Date date = MailUtil.parseReceivedHeader(s[0]); - result.put(NMO.receivedDate, date); - } catch (ParseException e) { - logger.warn("Coudln't parse the Received date: " + s[0]); - } + + Date date = mm.getReceivedDate(); + if (date != null) { + // the sun javamail implementation seems to return null every time + // this check has been added at the request of a user who uses their own + // subclass of AbstractJavaMailCrawler that uses a different javamail implementation + result.put(NMO.receivedDate, date); + } else { + String [] s = mm.getHeader("Received"); + if (s != null && s.length > 0) { + try { + date = MailUtil.parseReceivedHeader(s[0]); + result.put(NMO.receivedDate, date); + } catch (ParseException e) { + logger.warn("Coudln't parse the Received date: " + s[0]); + } + } } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2009-12-08 15:39:12
|
Revision: 2167 http://aperture.svn.sourceforge.net/aperture/?rev=2167&view=rev Author: mylka Date: 2009-12-08 15:39:05 +0000 (Tue, 08 Dec 2009) Log Message: ----------- 2910404 commented out my cool workaround in DataObjectFactory Modified Paths: -------------- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java Modified: aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-12-08 15:11:14 UTC (rev 2166) +++ aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-12-08 15:39:05 UTC (rev 2167) @@ -112,15 +112,15 @@ * taken from Eclipse Orbit repository, version 1.4.1.v200808130215. It should be updated when * this dependency is updated. */ - static { - MailcapCommandMap mc = (MailcapCommandMap)CommandMap.getDefaultCommandMap(); - mc.addMailcap("text/html;; x-java-content-handler=com.sun.mail.handlers.text_html"); - mc.addMailcap("text/xml;; x-java-content-handler=com.sun.mail.handlers.text_xml"); - mc.addMailcap("text/plain;; x-java-content-handler=com.sun.mail.handlers.text_plain"); - mc.addMailcap("multipart/*;; x-java-content-handler=com.sun.mail.handlers.multipart_mixed; x-java-fallback-entry=true"); - mc.addMailcap("message/rfc822;; x-java-content-handler=com.sun.mail.handlers.message_rfc822"); - CommandMap.setDefaultCommandMap(mc); - } +// static { +// MailcapCommandMap mc = (MailcapCommandMap)CommandMap.getDefaultCommandMap(); +// mc.addMailcap("text/html;; x-java-content-handler=com.sun.mail.handlers.text_html"); +// mc.addMailcap("text/xml;; x-java-content-handler=com.sun.mail.handlers.text_xml"); +// mc.addMailcap("text/plain;; x-java-content-handler=com.sun.mail.handlers.text_plain"); +// mc.addMailcap("multipart/*;; x-java-content-handler=com.sun.mail.handlers.multipart_mixed; x-java-fallback-entry=true"); +// mc.addMailcap("message/rfc822;; x-java-content-handler=com.sun.mail.handlers.message_rfc822"); +// CommandMap.setDefaultCommandMap(mc); +// } /** Obtains InputStreams from {@link Part} instances. */ public static interface PartStreamFactory { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <my...@us...> - 2009-12-27 17:03:48
|
Revision: 2176 http://aperture.svn.sourceforge.net/aperture/?rev=2176&view=rev Author: mylka Date: 2009-12-27 17:03:42 +0000 (Sun, 27 Dec 2009) Log Message: ----------- 2907606 - a little fix for PGP/MIME signed emails in the DataObjectFactory Modified Paths: -------------- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java Modified: aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-12-09 10:44:52 UTC (rev 2175) +++ aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2009-12-27 17:03:42 UTC (rev 2176) @@ -1176,7 +1176,8 @@ private boolean isMulitpartSignedSignature(HashMap childHashmap) { String mimeType = (String)childHashmap.get(NIE.mimeType); - if (mimeType != null && mimeType.contains("pkcs7-signature")) { + // this can be for instance pkcs7-signature for S/MIME or pgp-signature for PGP/MIME + if (mimeType != null && mimeType.contains("signature")) { return true; } return false; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ar...@us...> - 2010-02-26 22:13:09
|
Revision: 2225 http://aperture.svn.sourceforge.net/aperture/?rev=2225&view=rev Author: arjohn Date: 2010-02-26 22:13:03 +0000 (Fri, 26 Feb 2010) Log Message: ----------- bugfix: use decodeText() instead of decodeWord() to decode the filename; also, catch UnsupportedEncodingException's since they're not fatal Modified Paths: -------------- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java Modified: aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java =================================================================== --- aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-02-22 17:00:51 UTC (rev 2224) +++ aperture/trunk/core/crawler/mail/src/main/java/org/semanticdesktop/aperture/crawler/mail/DataObjectFactory.java 2010-02-26 22:13:03 UTC (rev 2225) @@ -10,6 +10,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.text.ParseException; import java.util.ArrayList; import java.util.Date; @@ -17,11 +18,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.activation.CommandMap; -import javax.activation.MailcapCommandMap; import javax.mail.Address; import javax.mail.BodyPart; import javax.mail.Message; @@ -658,10 +656,10 @@ String fileName = normalSinglePart.getFileName(); if (fileName != null) { try { - fileName = MimeUtility.decodeWord(fileName); + fileName = MimeUtility.decodeText(fileName); } - catch (MessagingException e) { - // happens on unencoded file names! so just ignore it and leave the file name as it is + catch (UnsupportedEncodingException e) { + logger.warn("Unable to decode text", e); } result.put(NFO.fileName, fileName); // everything that has a file name is an attachment This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |