Revision: 199
http://nekohtml.svn.sourceforge.net/nekohtml/?rev=199&view=rev
Author: mguillem
Date: 2008-10-21 12:17:18 +0000 (Tue, 21 Oct 2008)
Log Message:
-----------
Add character offsets in HTMLEventInfo augmentations (patch provided by Ian Roberts, #2128228)
Modified Paths:
--------------
trunk/data/canonical/test-augmentations-following-cdata.html
trunk/data/canonical/test-newline-in-attribute-crlf.html
trunk/data/canonical/test-newline-in-attribute-lf.html
trunk/data/canonical/test-newline-in-pi-crlf.html
trunk/data/canonical/test-newline-in-pi-lf.html
trunk/doc/changes.html
trunk/src/org/cyberneko/html/HTMLEventInfo.java
trunk/src/org/cyberneko/html/HTMLScanner.java
trunk/test/java/org/cyberneko/html/Writer.java
Modified: trunk/data/canonical/test-augmentations-following-cdata.html
===================================================================
--- trunk/data/canonical/test-augmentations-following-cdata.html 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/data/canonical/test-augmentations-following-cdata.html 2008-10-21 12:17:18 UTC (rev 199)
@@ -1,7 +1,7 @@
[synth](HTML
[synth](BODY
-[1,1;1,13]"Text before
-[1,13;1,39]#[CDATA[ text in CDATA]]
-[1,39;2,1]" text after\n
+[1,1,0;1,13,12]"Text before
+[1,13,12;1,39,38]#[CDATA[ text in CDATA]]
+[1,39,38;2,1,50]" text after\n
[synth])BODY
[synth])HTML
Modified: trunk/data/canonical/test-newline-in-attribute-crlf.html
===================================================================
--- trunk/data/canonical/test-newline-in-attribute-crlf.html 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/data/canonical/test-newline-in-attribute-crlf.html 2008-10-21 12:17:18 UTC (rev 199)
@@ -1,10 +1,10 @@
[synth](HTML
[synth](BODY
-[1,1;1,11]"some text
-[1,11;2,23](SPAN
+[1,1,0;1,11,10]"some text
+[1,11,10;2,23,52](SPAN
Aclass value\ncontaining a newline
-[2,23;2,34]"spancontent
-[2,34;2,41])SPAN
-[2,41;3,1]"\n
+[2,23,52;2,34,63]"spancontent
+[2,34,63;2,41,70])SPAN
+[2,41,70;3,1,72]"\n
[synth])BODY
[synth])HTML
Modified: trunk/data/canonical/test-newline-in-attribute-lf.html
===================================================================
--- trunk/data/canonical/test-newline-in-attribute-lf.html 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/data/canonical/test-newline-in-attribute-lf.html 2008-10-21 12:17:18 UTC (rev 199)
@@ -1,10 +1,10 @@
[synth](HTML
[synth](BODY
-[1,1;1,11]"some text
-[1,11;2,23](SPAN
+[1,1,0;1,11,10]"some text
+[1,11,10;2,23,51](SPAN
Aclass value\ncontaining a newline
-[2,23;2,34]"spancontent
-[2,34;2,41])SPAN
-[2,41;3,1]"\n
+[2,23,51;2,34,62]"spancontent
+[2,34,62;2,41,69])SPAN
+[2,41,69;3,1,70]"\n
[synth])BODY
[synth])HTML
Modified: trunk/data/canonical/test-newline-in-pi-crlf.html
===================================================================
--- trunk/data/canonical/test-newline-in-pi-crlf.html 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/data/canonical/test-newline-in-pi-crlf.html 2008-10-21 12:17:18 UTC (rev 199)
@@ -1,7 +1,7 @@
[synth](HTML
[synth](BODY
-[1,1;1,11]"some text
-[1,11;2,23]?instruct beforenl="content"\n afternl="content"
-[2,23;3,1]"more text\n
+[1,1,0;1,11,10]"some text
+[1,11,10;2,23,63]?instruct beforenl="content"\n afternl="content"
+[2,23,63;3,1,74]"more text\n
[synth])BODY
[synth])HTML
Modified: trunk/data/canonical/test-newline-in-pi-lf.html
===================================================================
--- trunk/data/canonical/test-newline-in-pi-lf.html 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/data/canonical/test-newline-in-pi-lf.html 2008-10-21 12:17:18 UTC (rev 199)
@@ -1,7 +1,7 @@
[synth](HTML
[synth](BODY
-[1,1;1,11]"some text
-[1,11;2,23]?instruct beforenl="content"\n afternl="content"
-[2,23;3,1]"more text\n
+[1,1,0;1,11,10]"some text
+[1,11,10;2,23,62]?instruct beforenl="content"\n afternl="content"
+[2,23,62;3,1,72]"more text\n
[synth])BODY
[synth])HTML
Modified: trunk/doc/changes.html
===================================================================
--- trunk/doc/changes.html 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/doc/changes.html 2008-10-21 12:17:18 UTC (rev 199)
@@ -25,7 +25,8 @@
<h2>Releases</h2>
<dl>
<dt>Future version
- <dd>select wrongly closes (and reopens) inline tags (patch provided by Ahmed Ashour, #2146829)
+ <dd>select wrongly closes (and reopens) inline tags (patch provided by Ahmed Ashour, #2146829),
+ add character offsets in HTMLEventInfo augmentations (patch provided by Ian Roberts, #2128228)
<dt>Version 1.9.9 (11 Sept 2008)
[<a href='http://downloads.sourceforge.net/nekohtml/nekohtml-1.9.9.zip'>zip</a>]
Modified: trunk/src/org/cyberneko/html/HTMLEventInfo.java
===================================================================
--- trunk/src/org/cyberneko/html/HTMLEventInfo.java 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/src/org/cyberneko/html/HTMLEventInfo.java 2008-10-21 12:17:18 UTC (rev 199)
@@ -38,12 +38,18 @@
/** Returns the column number of the beginning of this event.*/
public int getBeginColumnNumber();
+ /** Returns the character offset of the beginning of this event.*/
+ public int getBeginCharacterOffset();
+
/** Returns the line number of the end of this event.*/
public int getEndLineNumber();
/** Returns the column number of the end of this event.*/
public int getEndColumnNumber();
+ /** Returns the character offset of the end of this event.*/
+ public int getEndCharacterOffset();
+
// other information
/** Returns true if this corresponding event was synthesized. */
@@ -73,6 +79,11 @@
return -1;
} // getBeginColumnNumber():int
+ /** Returns the character offset of the beginning of this event.*/
+ public int getBeginCharacterOffset() {
+ return -1;
+ } // getBeginCharacterOffset():int
+
/** Returns the line number of the end of this event.*/
public int getEndLineNumber() {
return -1;
@@ -83,6 +94,11 @@
return -1;
} // getEndColumnNumber():int
+ /** Returns the character offset of the end of this event.*/
+ public int getEndCharacterOffset() {
+ return -1;
+ } // getEndCharacterOffset():int
+
// other information
/** Returns true if this corresponding event was synthesized. */
Modified: trunk/src/org/cyberneko/html/HTMLScanner.java
===================================================================
--- trunk/src/org/cyberneko/html/HTMLScanner.java 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/src/org/cyberneko/html/HTMLScanner.java 2008-10-21 12:17:18 UTC (rev 199)
@@ -420,12 +420,18 @@
/** Beginning column number. */
protected int fBeginColumnNumber;
+ /** Beginning character offset in the file. */
+ protected int fBeginCharacterOffset;
+
/** Ending line number. */
protected int fEndLineNumber;
/** Ending column number. */
protected int fEndColumnNumber;
+ /** Ending character offset in the file. */
+ protected int fEndCharacterOffset;
+
// state
/** The playback byte stream. */
@@ -669,7 +675,7 @@
/** Returns the character offset. */
public int getCharacterOffset() {
- return fCurrentEntity != null ? fCurrentEntity.charOffset : -1;
+ return fCurrentEntity != null ? fCurrentEntity.characterOffset : -1;
} // getCharacterOffset():int
//
@@ -823,8 +829,10 @@
fBeginLineNumber = 1;
fBeginColumnNumber = 1;
+ fBeginCharacterOffset = 0;
fEndLineNumber = fBeginLineNumber;
fEndColumnNumber = fBeginColumnNumber;
+ fEndCharacterOffset = fBeginCharacterOffset;
// reset encoding information
fIANAEncoding = fDefaultIANAEncoding;
@@ -1132,6 +1140,7 @@
}
}
char c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
if (DEBUG_BUFFER) {
System.out.print(")read: ");
@@ -1239,6 +1248,7 @@
while ((c = read()) != -1) {
if (c == '<') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
break;
}
@@ -1258,6 +1268,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.doctypeDecl(root, pubid, sysid, locationAugs());
}
@@ -1275,6 +1286,7 @@
}
if (c == '\r' || c == '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
// NOTE: This collapses newlines to a single space.
// [Q] Is this the right thing to do here? -Ac
@@ -1283,6 +1295,7 @@
}
else if (c == '<') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
break;
}
@@ -1300,6 +1313,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
}
return null;
@@ -1331,6 +1345,7 @@
break;
}
fCurrentEntity.offset++;
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
}
if (fCurrentEntity.offset == fCurrentEntity.length) {
@@ -1376,6 +1391,7 @@
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(str, locationAugs());
}
return -1;
@@ -1385,10 +1401,12 @@
fErrorReporter.reportWarning("HTML1004", null);
}
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(str, locationAugs());
}
return -1;
@@ -1399,6 +1417,7 @@
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(str, locationAugs());
}
return -1;
@@ -1421,6 +1440,7 @@
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
if (fNotifyCharRefs) {
XMLResourceIdentifier id = resourceId();
String encoding = null;
@@ -1441,6 +1461,7 @@
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(str, locationAugs());
}
}
@@ -1455,6 +1476,7 @@
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(str, locationAugs());
}
return -1;
@@ -1462,6 +1484,7 @@
if (content && fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
boolean notify = fNotifyHtmlBuiltinRefs || (fNotifyXmlBuiltinRefs && builtinXmlRef(name));
if (notify) {
XMLResourceIdentifier id = resourceId();
@@ -1492,6 +1515,7 @@
}
char c0 = s.charAt(i);
char c1 = fCurrentEntity.buffer[fCurrentEntity.offset++];
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
if (!caseSensitive) {
c0 = Character.toUpperCase(c0);
@@ -1499,6 +1523,7 @@
}
if (c0 != c1) {
fCurrentEntity.offset -= i + 1;
+ fCurrentEntity.characterOffset -= i + 1;
fCurrentEntity.columnNumber -= i + 1;
return false;
}
@@ -1523,6 +1548,7 @@
}
while (fCurrentEntity.offset < fCurrentEntity.length) {
char c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
if (balance && c == '<') {
depth++;
@@ -1540,6 +1566,7 @@
}
}
c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
if (c == '>') {
slashgt = true;
@@ -1550,6 +1577,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
}
}
@@ -1591,6 +1619,7 @@
continue;
}
fCurrentEntity.offset++;
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
}
if (DEBUG_BUFFER) {
@@ -1631,6 +1660,7 @@
if (c == '\n' || c == '\r') {
do {
c = fCurrentEntity.buffer[fCurrentEntity.offset++];
+ fCurrentEntity.characterOffset++;
if (c == '\r') {
newlines++;
if (fCurrentEntity.offset == fCurrentEntity.length) {
@@ -1642,6 +1672,7 @@
}
if (fCurrentEntity.buffer[fCurrentEntity.offset] == '\n') {
fCurrentEntity.offset++;
+ fCurrentEntity.characterOffset++;
offset++;
}
}
@@ -1657,6 +1688,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
break;
}
} while (newlines < maxlines &&
@@ -1681,7 +1713,8 @@
HTMLAugmentations augs = null;
if (fAugmentations) {
fLocationItem.setValues(fBeginLineNumber, fBeginColumnNumber,
- fEndLineNumber, fEndColumnNumber);
+ fBeginCharacterOffset, fEndLineNumber,
+ fEndColumnNumber, fEndCharacterOffset);
augs = fInfosetAugs;
augs.removeAllItems();
augs.putItem(AUGMENTATIONS, fLocationItem);
@@ -1844,8 +1877,8 @@
/** Column number. */
public int columnNumber = 1;
- /** Character offset. */
- public int charOffset = -1;
+ /** Character offset in the file. */
+ public int characterOffset = 0;
// buffer
@@ -1910,6 +1943,7 @@
case STATE_CONTENT: {
fBeginLineNumber = fCurrentEntity.lineNumber;
fBeginColumnNumber = fCurrentEntity.columnNumber;
+ fBeginCharacterOffset = fCurrentEntity.characterOffset;
int c = read();
if (c == '<') {
setScannerState(STATE_MARKUP_BRACKET);
@@ -1923,6 +1957,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
scanCharacters();
}
@@ -1966,6 +2001,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
fElementCount++;
fSingleBoolean[0] = false;
@@ -2016,6 +2052,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.endDocument(locationAugs());
}
return false;
@@ -2056,12 +2093,14 @@
if (next.length() >= 10 && "/noscript".equalsIgnoreCase(next.substring(0, 9))
&& ('>' == next.charAt(9) || Character.isWhitespace(next.charAt(9)))) {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
break;
}
}
if (c == '\r' || c == '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
int newlines = skipNewlines();
for (int i = 0; i < newlines; i++) {
@@ -2075,6 +2114,7 @@
if (buffer.length > 0 && fDocumentHandler != null) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(buffer, locationAugs());
}
}
@@ -2097,6 +2137,7 @@
if (next.length() >= 8 && "/script".equalsIgnoreCase(next.substring(0, 7))
&& ('>' == next.charAt(7) || Character.isWhitespace(next.charAt(7)))) {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
break;
}
@@ -2107,6 +2148,7 @@
if (c == '\r' || c == '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
int newlines = skipNewlines();
for (int i = 0; i < newlines; i++) {
@@ -2131,6 +2173,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(buffer, locationAugs());
}
}
@@ -2145,6 +2188,7 @@
private String nextContent(int len) throws IOException {
final int originalOffset = fCurrentEntity.offset;
final int originalColumnNumber = fCurrentEntity.columnNumber;
+ final int originalCharacterOffset = fCurrentEntity.characterOffset;
char[] buff = new char[len];
int nbRead = 0;
@@ -2169,6 +2213,7 @@
}
fCurrentEntity.offset = originalOffset;
fCurrentEntity.columnNumber = originalColumnNumber;
+ fCurrentEntity.characterOffset = originalCharacterOffset;
return new String(buff, 0, nbRead);
}
@@ -2203,6 +2248,7 @@
break;
}
fCurrentEntity.offset++;
+ fCurrentEntity.characterOffset++;
fCurrentEntity.columnNumber++;
}
if (fCurrentEntity.offset > offset &&
@@ -2213,6 +2259,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(fString, locationAugs());
}
if (DEBUG_BUFFER) {
@@ -2234,6 +2281,7 @@
if (fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
if (DEBUG_CALLBACKS) {
System.out.println("startCDATA()");
}
@@ -2250,6 +2298,7 @@
if (fDocumentHandler != null && fElementCount >= fElementDepth) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
if (fCDATASections) {
if (DEBUG_CALLBACKS) {
System.out.println("characters("+fStringBuffer+")");
@@ -2292,6 +2341,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.comment(fStringBuffer, locationAugs());
}
if (DEBUG_BUFFER) {
@@ -2330,6 +2380,7 @@
buffer.append(cend);
//if (c != -1) {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
//}
continue;
@@ -2339,6 +2390,7 @@
buffer.append(cend);
}
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
continue;
}
@@ -2349,6 +2401,7 @@
}
else if (c == '\n' || c == '\r') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
int newlines = skipNewlines();
for (int i = 0; i < newlines; i++) {
@@ -2388,6 +2441,7 @@
c = read();
if (c != '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
}
}
fCurrentEntity.lineNumber++;
@@ -2399,6 +2453,7 @@
}
if (c != ' ' && c != '\t') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
break;
}
@@ -2415,6 +2470,7 @@
else {
fStringBuffer.append(c0);
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
continue;
}
@@ -2425,6 +2481,7 @@
c = read();
if (c != '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
}
}
fCurrentEntity.lineNumber++;
@@ -2442,6 +2499,7 @@
if (fDocumentHandler != null) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.processingInstruction(target, data, locationAugs());
}
}
@@ -2450,6 +2508,7 @@
else {
int beginLineNumber = fBeginLineNumber;
int beginColumnNumber = fBeginColumnNumber;
+ int beginCharacterOffset = fBeginCharacterOffset;
fAttributes.removeAllAttributes();
int aindex = 0;
while (scanPseudoAttribute(fAttributes)) {
@@ -2465,8 +2524,10 @@
fBeginLineNumber = beginLineNumber;
fBeginColumnNumber = beginColumnNumber;
+ fBeginCharacterOffset = beginCharacterOffset;
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.xmlDecl(version, encoding, standalone,
locationAugs());
}
@@ -2507,11 +2568,13 @@
fAttributes.removeAllAttributes();
int beginLineNumber = fBeginLineNumber;
int beginColumnNumber = fBeginColumnNumber;
+ int beginCharacterOffset = fBeginCharacterOffset;
while (scanAttribute(fAttributes, empty)) {
// do nothing
}
fBeginLineNumber = beginLineNumber;
fBeginColumnNumber = beginColumnNumber;
+ fBeginCharacterOffset = beginCharacterOffset;
if (fByteStream != null && fElementDepth == -1) {
if (ename.equalsIgnoreCase("META")) {
if (DEBUG_CHARSET) {
@@ -2559,6 +2622,7 @@
fCurrentEntity.offset = fCurrentEntity.length = 0;
fCurrentEntity.lineNumber = 1;
fCurrentEntity.columnNumber = 1;
+ fCurrentEntity.characterOffset = 0;
}
}
}
@@ -2596,6 +2660,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
if (empty[0]) {
fDocumentHandler.emptyElement(fQName, fAttributes, locationAugs());
}
@@ -2646,6 +2711,7 @@
boolean skippedSpaces = skipSpaces();
fBeginLineNumber = fCurrentEntity.lineNumber;
fBeginColumnNumber = fCurrentEntity.columnNumber;
+ fBeginCharacterOffset = fCurrentEntity.characterOffset;
int c = read();
if (c == -1) {
if (fReportErrors) {
@@ -2657,6 +2723,7 @@
return false;
}
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
String aname = scanName();
if (aname == null) {
@@ -2687,6 +2754,7 @@
}
if (c == '/') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
empty[0] = skipMarkup(false);
}
@@ -2732,6 +2800,7 @@
fNonNormAttr.clear();
if (c != '\'' && c != '"') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
while (true) {
c = read();
@@ -2739,6 +2808,7 @@
if (Character.isSpace((char)c) || c == '>') {
//fCharOffset--;
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
break;
}
@@ -2809,6 +2879,7 @@
int c2 = read();
if (c2 != '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
}
else {
@@ -2855,6 +2926,7 @@
attributes.addAttribute(fQName, "CDATA", "");
attributes.setSpecified(attributes.getLength()-1, true);
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
if (fAugmentations) {
addLocationItem(attributes, attributes.getLength() - 1);
@@ -2867,9 +2939,11 @@
protected void addLocationItem(XMLAttributes attributes, int index) {
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
LocationItem locationItem = new LocationItem();
locationItem.setValues(fBeginLineNumber, fBeginColumnNumber,
- fEndLineNumber, fEndColumnNumber);
+ fBeginCharacterOffset, fEndLineNumber,
+ fEndColumnNumber, fEndCharacterOffset);
Augmentations augs = attributes.getAugmentations(index);
augs.putItem(AUGMENTATIONS, locationItem);
} // addLocationItem(XMLAttributes,int)
@@ -2890,6 +2964,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.endElement(fQName, locationAugs());
}
}
@@ -2971,6 +3046,7 @@
case STATE_CONTENT: {
fBeginLineNumber = fCurrentEntity.lineNumber;
fBeginColumnNumber = fCurrentEntity.columnNumber;
+ fBeginCharacterOffset = fCurrentEntity.characterOffset;
int c = read();
if (c == '<') {
setScannerState(STATE_MARKUP_BRACKET);
@@ -2992,6 +3068,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
fStringBuffer.clear();
}
@@ -3011,6 +3088,7 @@
if (c == '\r' || c == '\n') {
fCurrentEntity.columnNumber--;
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
break;
}
} while (c != -1);
@@ -3030,6 +3108,7 @@
if (c == '\r' || c == '\n') {
fCurrentEntity.columnNumber--;
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
break;
}
} while (c != -1);
@@ -3054,6 +3133,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.endElement(fQName, locationAugs());
}
setScanner(fContentScanner);
@@ -3062,6 +3142,7 @@
}
else {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
}
}
@@ -3121,6 +3202,7 @@
if (c == -1 || (delimiter == -1 && (c == '<' || c == '&'))) {
if (c != -1) {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
}
break;
@@ -3128,6 +3210,7 @@
// Patch supplied by Jonathan Baxter
else if (c == '\r' || c == '\n') {
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
int newlines = skipNewlines();
for (int i = 0; i < newlines; i++) {
@@ -3150,6 +3233,7 @@
break;
}
fCurrentEntity.offset--;
+ fCurrentEntity.characterOffset--;
fCurrentEntity.columnNumber--;
}
else {
@@ -3166,6 +3250,7 @@
}
fEndLineNumber = fCurrentEntity.lineNumber;
fEndColumnNumber = fCurrentEntity.columnNumber;
+ fEndCharacterOffset = fCurrentEntity.characterOffset;
fDocumentHandler.characters(buffer, locationAugs());
}
if (DEBUG_BUFFER) {
@@ -3427,25 +3512,46 @@
/** Beginning column number. */
protected int fBeginColumnNumber;
+ /** Beginning character offset. */
+ protected int fBeginCharacterOffset;
+
/** Ending line number. */
protected int fEndLineNumber;
/** Ending column number. */
protected int fEndColumnNumber;
+ /** Ending character offset. */
+ protected int fEndCharacterOffset;
+
//
// Public methods
//
- /** Sets the values of this item. */
+ /**
+ * Sets the values of this item.
+ * @deprecated after 1.9.9. Use {@link #setValues(int, int, int, int, int, int)}.
+ **/
public void setValues(int beginLine, int beginColumn,
int endLine, int endColumn) {
+ setValues(beginLine, beginColumn, 0, endLine, endColumn, 0);
fBeginLineNumber = beginLine;
fBeginColumnNumber = beginColumn;
fEndLineNumber = endLine;
fEndColumnNumber = endColumn;
} // setValues(int,int,int,int)
+ /** Sets the values of this item. */
+ public void setValues(int beginLine, int beginColumn, int beginOffset,
+ int endLine, int endColumn, int endOffset) {
+ fBeginLineNumber = beginLine;
+ fBeginColumnNumber = beginColumn;
+ fBeginCharacterOffset = beginOffset;
+ fEndLineNumber = endLine;
+ fEndColumnNumber = endColumn;
+ fEndCharacterOffset = endOffset;
+ } // setValues(int,int,int,int)
+
//
// HTMLEventInfo methods
//
@@ -3462,6 +3568,11 @@
return fBeginColumnNumber;
} // getBeginColumnNumber():int
+ /** Returns the character offset of the beginning of this event.*/
+ public int getBeginCharacterOffset() {
+ return fBeginCharacterOffset;
+ } // getBeginCharacterOffset():int
+
/** Returns the line number of the end of this event.*/
public int getEndLineNumber() {
return fEndLineNumber;
@@ -3472,6 +3583,11 @@
return fEndColumnNumber;
} // getEndColumnNumber():int
+ /** Returns the character offset of the end of this event.*/
+ public int getEndCharacterOffset() {
+ return fEndCharacterOffset;
+ } // getEndCharacterOffset():int
+
// other information
/** Returns true if this corresponding event was synthesized. */
@@ -3490,9 +3606,13 @@
str.append(':');
str.append(fBeginColumnNumber);
str.append(':');
+ str.append(fBeginCharacterOffset);
+ str.append(':');
str.append(fEndLineNumber);
str.append(':');
str.append(fEndColumnNumber);
+ str.append(':');
+ str.append(fEndCharacterOffset);
return str.toString();
} // toString():String
@@ -3533,6 +3653,7 @@
int nbCaret = 0;
final int originalOffset = fCurrentEntity.offset;
final int originalColumnNumber = fCurrentEntity.columnNumber;
+ final int originalCharacterOffset = fCurrentEntity.characterOffset;
while (true) {
// read() should not clear the buffer
@@ -3543,6 +3664,7 @@
else { // everything was already loaded
fCurrentEntity.offset = originalOffset;
fCurrentEntity.columnNumber = originalColumnNumber;
+ fCurrentEntity.characterOffset = originalCharacterOffset;
return false;
}
}
@@ -3551,11 +3673,13 @@
if (c == -1) {
fCurrentEntity.offset = originalOffset;
fCurrentEntity.columnNumber = originalColumnNumber;
+ fCurrentEntity.characterOffset = originalCharacterOffset;
return false;
}
else if (c == '>' && nbCaret >= 2) {
fCurrentEntity.offset = originalOffset;
fCurrentEntity.columnNumber = originalColumnNumber;
+ fCurrentEntity.characterOffset = originalCharacterOffset;
return true;
}
else if (c == '-') {
Modified: trunk/test/java/org/cyberneko/html/Writer.java
===================================================================
--- trunk/test/java/org/cyberneko/html/Writer.java 2008-10-21 11:58:57 UTC (rev 198)
+++ trunk/test/java/org/cyberneko/html/Writer.java 2008-10-21 12:17:18 UTC (rev 199)
@@ -71,6 +71,13 @@
private int fCharactersBeginColumn = -1;
/**
+ * Beginning character offset of the current block of characters (which may
+ * be reported in several characters chunks). Will be -1 if the parser
+ * isn't producing HTML augmentations.
+ */
+ private int fCharactersBeginCharacterOffset = -1;
+
+ /**
* Ending line number of the current block of characters (which may be
* reported in several characters chunks). Will be -1 if the parser
* isn't producing HTML augmentations.
@@ -84,6 +91,13 @@
*/
private int fCharactersEndColumn = -1;
+ /**
+ * Ending character offset of the current block of characters (which may be
+ * reported in several characters chunks). Will be -1 if the parser isn't
+ * producing HTML augmentations.
+ */
+ private int fCharactersEndCharacterOffset = -1;
+
//
// Constructors
//
@@ -327,10 +341,14 @@
out.print(evInfo.getBeginLineNumber());
out.print(',');
out.print(evInfo.getBeginColumnNumber());
+ out.print(',');
+ out.print(evInfo.getBeginCharacterOffset());
out.print(';');
out.print(evInfo.getEndLineNumber());
out.print(',');
out.print(evInfo.getEndColumnNumber());
+ out.print(',');
+ out.print(evInfo.getEndCharacterOffset());
out.print(']');
}
}
@@ -347,6 +365,7 @@
if(evInfo != null) {
fCharactersBeginLine = evInfo.getBeginLineNumber();
fCharactersBeginColumn = evInfo.getBeginColumnNumber();
+ fCharactersBeginCharacterOffset = evInfo.getBeginCharacterOffset();
}
} // storeCharactersStart(Augmentations)
@@ -361,6 +380,7 @@
if(evInfo != null) {
fCharactersEndLine = evInfo.getEndLineNumber();
fCharactersEndColumn = evInfo.getEndColumnNumber();
+ fCharactersEndCharacterOffset = evInfo.getEndCharacterOffset();
}
} // storeCharactersEnd(Augmentations)
@@ -375,10 +395,14 @@
out.print(fCharactersBeginLine);
out.print(',');
out.print(fCharactersBeginColumn);
+ out.print(',');
+ out.print(fCharactersBeginCharacterOffset);
out.print(';');
out.print(fCharactersEndLine);
out.print(',');
out.print(fCharactersEndColumn);
+ out.print(',');
+ out.print(fCharactersEndCharacterOffset);
out.print(']');
}
} // doCharactersAugs()
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|