From: <my...@us...> - 2010-07-14 13:54:36
|
Revision: 2395 http://aperture.svn.sourceforge.net/aperture/?rev=2395&view=rev Author: mylka Date: 2010-07-14 13:54:29 +0000 (Wed, 14 Jul 2010) Log Message: ----------- [3029535] added another hack that removes single quotes from the name Modified Paths: -------------- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java Modified: aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java =================================================================== --- aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-07-14 11:50:16 UTC (rev 2394) +++ aperture/trunk/core/src/main/java/org/semanticdesktop/aperture/crawler/mail/MailUtil.java 2010-07-14 13:54:29 UTC (rev 2395) @@ -405,7 +405,7 @@ currentChunk.append(c2); } } else if (QUOTATION_MARK.equals(c)) { - chunks.add(currentChunk.toString().trim()); + chunks.add(trimAndRemoveSingleQuotes(currentChunk.toString())); separators.add(c); currentChunk = new StringBuilder(); if (insideQuotes) { @@ -415,7 +415,7 @@ } } else if (!insideQuotes && isSeparator(c,commaIsASeparator)) { // inside quotes we disregard all separators - chunks.add(currentChunk.toString().trim()); + chunks.add(trimAndRemoveSingleQuotes(currentChunk.toString())); separators.add(c); currentChunk = new StringBuilder(); } else if (isLineBreak(c)) { @@ -434,8 +434,24 @@ } i++; } - chunks.add(currentChunk.toString().trim()); + + chunks.add(trimAndRemoveSingleQuotes(currentChunk.toString())); } + + private static String trimAndRemoveSingleQuotes(String currentChunk) { + String trimmedChunk = currentChunk.toString().trim(); + + // now a little hack, sometimes we see mail headers like this: + // "'ch...@ho...'" <ch...@ho...> + // enclosed both within double quotes and single quotes, + // we should remove those single quotes + if (trimmedChunk.length() >= 2 && + trimmedChunk.charAt(0) == '\'' && + trimmedChunk.charAt(trimmedChunk.length() - 1) == '\'') { + trimmedChunk = trimmedChunk.substring(1,trimmedChunk.length() - 1); + } + return trimmedChunk; + } /** * @param chunk Modified: aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java =================================================================== --- aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-07-14 11:50:16 UTC (rev 2394) +++ aperture/trunk/core/src/test/java/org/semanticdesktop/aperture/crawler/mail/MailUtilTest.java 2010-07-14 13:54:29 UTC (rev 2395) @@ -11,13 +11,11 @@ import java.text.ParseException; import java.util.Date; import java.util.List; -import java.util.Set; import javax.mail.MessagingException; import javax.mail.internet.MimeMessage; import org.ontoware.rdf2go.model.Model; -import org.ontoware.rdf2go.model.node.Resource; import org.semanticdesktop.aperture.crawler.mail.MailUtil.LiberalInternetAddress; import org.semanticdesktop.aperture.rdf.RDFContainer; import org.semanticdesktop.aperture.test.ApertureTestBase; @@ -199,6 +197,16 @@ a("Christiaan Fluit","Chr...@ad...")); } + public void testTripleQuotesNames() { + /* + * Some headers use names which are enclosed in "' ... '", that is, a string is enclosed in single + * quotes, and they are enclosed in double quotes. Such abominations defile the output and ought to be + * cleansed before the innocent user is confronted with them. + */ + assertHeaders("\"'ch...@ho...'\" <ch...@ho...>", + a("ch...@ho...","ch...@ho...")); + } + private void check(String st, String utcString) throws ParseException { Date date = MailUtil.parseReceivedHeader(st); assertTrue(DateUtil.dateTimeEqualToUTCString(date, utcString)); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |