From: <bra...@us...> - 2008-08-05 01:28:37
|
Revision: 2524 http://archive-access.svn.sourceforge.net/archive-access/?rev=2524&view=rev Author: bradtofel Date: 2008-08-05 01:28:47 +0000 (Tue, 05 Aug 2008) Log Message: ----------- BUGFIX (ACC-28): check that guessed charset is supported before attempting to use. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2008-08-01 17:17:44 UTC (rev 2523) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2008-08-05 01:28:47 UTC (rev 2524) @@ -91,6 +91,9 @@ private boolean isCharsetSupported(String charsetName) { // can you believe that this throws a runtime? Just asking if it's // supported!!?! They coulda just said "no"... + if(charsetName == null) { + return false; + } try { return Charset.isSupported(charsetName); } catch(IllegalCharsetNameException e) { @@ -192,8 +195,10 @@ // (5) detector.reset(); - - return charsetName; + if(isCharsetSupported(charsetName)) { + return charsetName; + } + return null; } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2009-05-20 02:28:41
|
Revision: 2721 http://archive-access.svn.sourceforge.net/archive-access/?rev=2721&view=rev Author: bradtofel Date: 2009-05-20 02:28:33 +0000 (Wed, 20 May 2009) Log Message: ----------- FEATURE: Now compares chaset case-insensitive in meta tags and HTTP headers FEATURE: exposed setResultBytes() FEATURE: new insertAtStartOfDocument() method FEATURE: now detects charset by: a) document META tag, b) byte-detection, c) HTTP header. before was c,b,a. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2009-05-20 02:22:20 UTC (rev 2720) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2009-05-20 02:28:33 UTC (rev 2721) @@ -31,6 +31,7 @@ import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.text.ParseException; +import java.util.Iterator; import java.util.Map; import javax.servlet.ServletException; @@ -102,7 +103,9 @@ } private String contentTypeToCharset(final String contentType) { - int offset = contentType.indexOf(CHARSET_TOKEN); + int offset = + contentType.toUpperCase().indexOf(CHARSET_TOKEN.toUpperCase()); + if (offset != -1) { String cs = contentType.substring(offset + CHARSET_TOKEN.length()); if(isCharsetSupported(cs)) { @@ -135,7 +138,16 @@ String charsetName = null; Map<String,String> httpHeaders = resource.getHttpHeaders(); - String ctype = httpHeaders.get(HTTP_CONTENT_TYPE_HEADER); + Iterator<String> keys = httpHeaders.keySet().iterator(); + String ctype = null; + while(keys.hasNext()) { + String headerKey = keys.next(); + String keyCmp = headerKey.toUpperCase().trim(); + if(keyCmp.equals(HTTP_CONTENT_TYPE_HEADER.toUpperCase())) { + ctype = httpHeaders.get(headerKey); + break; + } + } if (ctype != null) { charsetName = contentTypeToCharset(ctype); } @@ -212,11 +224,11 @@ */ protected String guessCharset() throws IOException { - String charSet = getCharsetFromMeta(resource); + String charSet = getCharsetFromHeaders(resource); if(charSet == null) { charSet = getCharsetFromBytes(resource); if(charSet == null) { - charSet = getCharsetFromHeaders(resource); + charSet = getCharsetFromMeta(resource); if(charSet == null) { charSet = "UTF-8"; } @@ -365,6 +377,9 @@ * @throws UnsupportedEncodingException */ public byte[] getBytes() throws UnsupportedEncodingException { + if(resultBytes != null) { + return resultBytes; + } if(sb == null) { throw new IllegalStateException("No interal StringBuffer"); } @@ -374,6 +389,10 @@ return resultBytes; } + public void setResultBytes(byte[] resultBytes) { + this.resultBytes = resultBytes; + } + /** * Write the contents of the page to the client. * @@ -396,6 +415,13 @@ /** * @param toInsert */ + public void insertAtStartOfDocument(String toInsert) { + sb.insert(0,toInsert); + } + + /** + * @param toInsert + */ public void insertAtStartOfHead(String toInsert) { int insertPoint = TagMagix.getEndOfFirstTag(sb,"head"); if (-1 == insertPoint) { @@ -450,9 +476,6 @@ StringHttpServletResponseWrapper wrappedResponse = new StringHttpServletResponseWrapper(httpResponse); uiResults.forward(httpRequest, wrappedResponse, jspPath); -// uiResults.storeInRequest(httpRequest,jspPath); -// RequestDispatcher dispatcher = httpRequest.getRequestDispatcher(jspPath); -// dispatcher.forward(httpRequest, wrappedResponse); return wrappedResponse.getStringResponse(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-28 00:09:57
|
Revision: 3192 http://archive-access.svn.sourceforge.net/archive-access/?rev=3192&view=rev Author: bradtofel Date: 2010-07-28 00:09:51 +0000 (Wed, 28 Jul 2010) Log Message: ----------- Added method "addBase()" which just adds a <base href="X"> tag. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2010-07-27 23:57:00 UTC (rev 3191) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2010-07-28 00:09:51 UTC (rev 3192) @@ -77,7 +77,20 @@ this.uriConverter = uriConverter; } + public void addBase() { + // TODO: get url from Resource instead of SearchResult? + String pageUrl = result.getOriginalUrl(); + String captureDate = result.getCaptureTimestamp(); + + String existingBaseHref = TagMagix.getBaseHref(sb); + if (existingBaseHref == null) { + insertAtStartOfHead("<base href=\"" + pageUrl + "\" />"); + } else { + pageUrl = existingBaseHref; + } + } + /** * Update URLs inside the page, so those URLs which must be correct at * page load time resolve correctly to absolute URLs. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2011-12-20 23:06:38
|
Revision: 3587 http://archive-access.svn.sourceforge.net/archive-access/?rev=3587&view=rev Author: ikreymer Date: 2011-12-20 23:06:32 +0000 (Tue, 20 Dec 2011) Log Message: ----------- BUGFIX: if the first char read from the InputStreamReader is a 0xFEFF BOM marker, remove it -- this implies the InputStreamReader is not interpreting it, and thus it should be removed from the content to avoid problems. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2011-12-18 04:17:45 UTC (rev 3586) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2011-12-20 23:06:32 UTC (rev 3587) @@ -205,6 +205,13 @@ // slurp the whole thing into RAM: sb = new StringBuilder(recordLength); + + //Skip the UTF-8 BOM 0xFEFF + int firstChar = isr.read(); + if ((firstChar != '\uFEFF') && (firstChar != -1)) { + sb.append(firstChar); + } + for (int r = -1; (r = isr.read(cbuffer, 0, C_BUFFER_SIZE)) != -1;) { sb.append(cbuffer, 0, r); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ikr...@us...> - 2011-12-24 00:46:49
|
Revision: 3589 http://archive-access.svn.sourceforge.net/archive-access/?rev=3589&view=rev Author: ikreymer Date: 2011-12-24 00:46:42 +0000 (Sat, 24 Dec 2011) Log Message: ----------- BUGFIX: Fix adding first character as a char not int! Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2011-12-21 20:55:11 UTC (rev 3588) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TextDocument.java 2011-12-24 00:46:42 UTC (rev 3589) @@ -209,7 +209,7 @@ //Skip the UTF-8 BOM 0xFEFF int firstChar = isr.read(); if ((firstChar != '\uFEFF') && (firstChar != -1)) { - sb.append(firstChar); + sb.append((char)firstChar); } for (int r = -1; (r = isr.read(cbuffer, 0, C_BUFFER_SIZE)) != -1;) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |