From: <bra...@us...> - 2008-01-31 00:30:33
|
Revision: 2160 http://archive-access.svn.sourceforge.net/archive-access/?rev=2160&view=rev Author: bradtofel Date: 2008-01-30 16:30:37 -0800 (Wed, 30 Jan 2008) Log Message: ----------- FEATURE: firstly, we test that a charset is supported before returning it as a viable charset to encode/decode. Secondly, we now attemt to replace internal spaces within a charset declaration... there's at least one lame webpage out there that has "charset=i so-8859-1"... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2008-01-30 03:26:36 UTC (rev 2159) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/HTMLPage.java 2008-01-31 00:30:37 UTC (rev 2160) @@ -28,6 +28,7 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import java.text.ParseException; import java.util.Map; @@ -86,11 +87,23 @@ this.result = result; this.uriConverter = uriConverter; } - + private String contentTypeToCharset(final String contentType) { int offset = contentType.indexOf(CHARSET_TOKEN); if (offset != -1) { - return contentType.substring(offset + CHARSET_TOKEN.length()); + String cs = contentType.substring(offset + CHARSET_TOKEN.length()); + if(Charset.isSupported(cs)) { + return cs; + } + // test for extra spaces... there's at least one page out there that + // indicates it's charset with: + +// <meta http-equiv="Content-type" content="text/html; charset=i so-8859-1"> + + // bad web page! + if(Charset.isSupported(cs.replace(" ", ""))) { + return cs.replace(" ", ""); + } } return null; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |