You can subscribe to this list here.
2000 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(46) |
Dec
(1) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2001 |
Jan
(1) |
Feb
|
Mar
(1) |
Apr
|
May
|
Jun
(1) |
Jul
(15) |
Aug
(24) |
Sep
(14) |
Oct
(2) |
Nov
(1) |
Dec
(18) |
2002 |
Jan
(12) |
Feb
(5) |
Mar
(3) |
Apr
(1) |
May
(1) |
Jun
(10) |
Jul
(3) |
Aug
(4) |
Sep
|
Oct
(1) |
Nov
|
Dec
(5) |
2003 |
Jan
(1) |
Feb
(1) |
Mar
|
Apr
(1) |
May
(2) |
Jun
(4) |
Jul
(2) |
Aug
(4) |
Sep
|
Oct
(1) |
Nov
(1) |
Dec
(1) |
2004 |
Jan
(141) |
Feb
(79) |
Mar
(85) |
Apr
(38) |
May
(1) |
Jun
|
Jul
(78) |
Aug
(223) |
Sep
(107) |
Oct
(158) |
Nov
(136) |
Dec
|
2005 |
Jan
(7) |
Feb
(4) |
Mar
|
Apr
(13) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
(2) |
Dec
(3) |
2010 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(2) |
Nov
(100) |
Dec
(22) |
From: <ad...@us...> - 2010-11-26 00:21:30
|
Revision: 1236 http://jtidy.svn.sourceforge.net/jtidy/?rev=1236&view=rev Author: aditsu Date: 2010-11-26 00:21:24 +0000 (Fri, 26 Nov 2010) Log Message: ----------- added setParent and setNext, with integrity checks Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-25 04:54:42 UTC (rev 1235) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-26 00:21:24 UTC (rev 1236) @@ -485,43 +485,35 @@ * Insert a node into markup tree. * @param node to insert */ - public void insertNodeAtStart(Node node) - { - node.parent = this; + public void insertNodeAtStart(final Node node) { + node.setParent(this); - if (this.content == null) - { - this.last = node; + if (content == null) { + last = node; } - else - { - this.content.prev = node; // AQ added 13 Apr 2000 + else { + content.prev = node; // AQ added 13 Apr 2000 } - - node.next = this.content; + node.setNext(content); node.prev = null; - this.content = node; + content = node; } /** * Insert node into markup tree. * @param node Node to insert */ - public void insertNodeAtEnd(Node node) - { - node.parent = this; - node.prev = this.last; - - if (this.last != null) - { - this.last.next = node; + public void insertNodeAtEnd(final Node node) { + node.setParent(this); + node.prev = last; + + if (last != null) { + last.setNext(node); } - else - { - this.content = node; + else { + content = node; } - - this.last = node; + last = node; } /** @@ -529,62 +521,48 @@ * @param element child node. Will be inserted as a child of element * @param node parent node */ - public static void insertNodeAsParent(Node element, Node node) - { + public static void insertNodeAsParent(final Node element, final Node node) { node.content = element; node.last = element; - node.parent = element.parent; - element.parent = node; + node.setParent(element.parent); + element.setParent(node); - if (node.parent.content == element) - { + if (node.parent.content == element) { node.parent.content = node; } - - if (node.parent.last == element) - { + if (node.parent.last == element) { node.parent.last = node; } - node.prev = element.prev; element.prev = null; - if (node.prev != null) - { - node.prev.next = node; + if (node.prev != null) { + node.prev.setNext(node); } - - node.next = element.next; + node.setNext(element.next); element.next = null; - if (node.next != null) - { + if (node.next != null) { node.next.prev = node; } } /** * Insert node into markup tree before element. - * @param element child node. Will be insertedbefore element + * @param element child node. Will be inserted before element * @param node following node */ - public static void insertNodeBeforeElement(Node element, Node node) - { - Node parent; - - parent = element.parent; - node.parent = parent; - node.next = element; + public static void insertNodeBeforeElement(Node element, Node node) { + Node parent = element.parent; + node.setParent(parent); + node.setNext(element); node.prev = element.prev; element.prev = node; - if (node.prev != null) - { - node.prev.next = node; + if (node.prev != null) { + node.prev.setNext(node); } - - if (parent != null && parent.content == element) - { + if (parent != null && parent.content == element) { parent.content = node; } } @@ -593,29 +571,21 @@ * Insert node into markup tree after element. * @param node new node to insert */ - public void insertNodeAfterElement(Node node) - { - Node parent; + public void insertNodeAfterElement(final Node node) { + node.setParent(parent); - parent = this.parent; - node.parent = parent; - // AQ - 13Jan2000 fix for parent == null - if (parent != null && parent.last == this) - { + if (parent != null && parent.last == this) { parent.last = node; } - else - { - node.next = this.next; + else { + node.setNext(next); // AQ - 13Jan2000 fix for node.next == null - if (node.next != null) - { + if (node.next != null) { node.next.prev = node; } } - - this.next = node; + setNext(node); node.prev = this; } @@ -918,30 +888,21 @@ * @param row Row node * @param node Node which should be moved before the table */ - public static void moveBeforeTable(Node row, Node node) - { - Node table; - + public static void moveBeforeTable(Node row, Node node) { /* first find the table element */ - for (table = row.parent; table != null; table = table.parent) - { - if (table.is(TagId.TABLE)) - { - if (table.parent.content == table) - { + for (Node table = row.parent; table != null; table = table.parent) { + if (table.is(TagId.TABLE)) { + if (table.parent.content == table) { table.parent.content = node; } - node.prev = table.prev; - node.next = table; + node.setNext(table); table.prev = node; - node.parent = table.parent; + node.setParent(table.parent); - if (node.prev != null) - { - node.prev.next = node; + if (node.prev != null) { + node.prev.setNext(node); } - break; } } @@ -992,34 +953,24 @@ /** * Extract this node and its children from a markup tree. */ - public void removeNode() - { - if (this.prev != null) - { - this.prev.next = this.next; + public void removeNode() { + if (prev != null) { + prev.setNext(next); } - - if (this.next != null) - { - this.next.prev = this.prev; + if (next != null) { + next.prev = prev; } - - if (this.parent != null) - { - if (this.parent.content == this) - { - this.parent.content = this.next; + if (parent != null) { + if (parent.content == this) { + parent.content = next; } - - if (this.parent.last == this) - { - this.parent.last = this.prev; + if (parent.last == this) { + parent.last = prev; } } - - this.parent = null; - this.prev = null; - this.next = null; + parent = null; + prev = null; + next = null; } /** @@ -1490,4 +1441,18 @@ } return 0; } + + protected void setParent(final Node node) { + if (node == this) { + throw new IllegalArgumentException("Attempt to insert a node into itself"); + } + parent = node; + } + + protected void setNext(final Node node) { + if (node == this) { + throw new IllegalArgumentException("Attempt to insert a node as its own sibling"); + } + next = node; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-25 04:54:49
|
Revision: 1235 http://jtidy.svn.sourceforge.net/jtidy/?rev=1235&view=rev Author: aditsu Date: 2010-11-25 04:54:42 +0000 (Thu, 25 Nov 2010) Log Message: ----------- fixed tests 1503897, 1590220-1 and 1590220-2 - updated unexpected tag handling in ParsePre - but results are questionable Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-25 04:00:26 UTC (rev 1234) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-25 04:54:42 UTC (rev 1235) @@ -1975,13 +1975,33 @@ } // strip unexpected tags - if (!lexer.preContent(node)) - { - Node newnode; - - lexer.report.warning(lexer, pre, node, ErrorCode.UNESCAPED_ELEMENT); - newnode = Node.escapeTag(lexer, node); - pre.insertNodeAtEnd(newnode); + if (!lexer.preContent(node)) { + if (node.type == NodeType.EndTag) { + if (lexer.exiled && (node.hasCM(Dict.CM_TABLE) || node.is(TagId.TABLE))) { + lexer.ungetToken(); + Node.trimSpaces(lexer, pre); + return; + } + lexer.report.warning(lexer, pre, node, ErrorCode.DISCARDING_UNEXPECTED); + continue; + } + else if (node.hasCM(Dict.CM_TABLE | Dict.CM_ROW) || node.is(TagId.TABLE)) { + if (!lexer.exiled) { + /* No missing close warning if exiled. */ + lexer.report.warning(lexer, pre, node, ErrorCode.MISSING_ENDTAG_BEFORE); + } + lexer.ungetToken(); + return; + } + + pre.insertNodeAfterElement(node); + lexer.report.warning(lexer, pre, node, ErrorCode.MISSING_ENDTAG_BEFORE); + parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); + + final Node newnode = lexer.inferredTag(TagId.PRE); + lexer.report.warning(lexer, pre, newnode, ErrorCode.INSERTING_TAG); + pre = newnode; + node.insertNodeAfterElement(pre); continue; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-25 04:00:33
|
Revision: 1234 http://jtidy.svn.sourceforge.net/jtidy/?rev=1234&view=rev Author: aditsu Date: 2010-11-25 04:00:26 +0000 (Thu, 25 Nov 2010) Log Message: ----------- corrected node description for UNESCAPED_ELEMENT Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-24 02:39:29 UTC (rev 1233) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-25 04:00:26 UTC (rev 1234) @@ -703,7 +703,7 @@ break; case UNESCAPED_ELEMENT : - messageLexer(lexer, Level.WARNING, code, getTagName(element)); + messageLexer(lexer, Level.WARNING, code, nodedesc); break; case NOFRAMES_CONTENT : This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-24 02:39:38
|
Revision: 1233 http://jtidy.svn.sourceforge.net/jtidy/?rev=1233&view=rev Author: aditsu Date: 2010-11-24 02:39:29 +0000 (Wed, 24 Nov 2010) Log Message: ----------- ported accessibility checks and messages - this fixes 5 tests Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AttVal.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ErrorCode.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Added Paths: ----------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/IErrorCode.java Added: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java (rev 0) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java 2010-11-24 02:39:29 UTC (rev 1233) @@ -0,0 +1,148 @@ +package org.w3c.tidy; + +/** + * Accessibility error codes (from access.h) + * + * @author aditsu + */ +public enum AccessErrorCode implements IErrorCode { + FIRST_ACCESS_ERR, /* must be first */ + +/* [1.1.1.1] */ IMG_MISSING_ALT, +/* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME, +/* [1.1.1.3] */ IMG_ALT_SUSPICIOUS_FILE_SIZE, +/* [1.1.1.4] */ IMG_ALT_SUSPICIOUS_PLACEHOLDER, +/* [1.1.1.10] */ IMG_ALT_SUSPICIOUS_TOO_LONG, +/* [1.1.1.11] */ IMG_MISSING_ALT_BULLET, +/* [1.1.1.12] */ IMG_MISSING_ALT_H_RULE, +/* [1.1.2.1] */ IMG_MISSING_LONGDESC_DLINK, +/* [1.1.2.2] */ IMG_MISSING_DLINK, +/* [1.1.2.3] */ IMG_MISSING_LONGDESC, +/* [1.1.2.5] */ LONGDESC_NOT_REQUIRED, +/* [1.1.3.1] */ IMG_BUTTON_MISSING_ALT, +/* [1.1.4.1] */ APPLET_MISSING_ALT, +/* [1.1.5.1] */ OBJECT_MISSING_ALT, +/* [1.1.6.1] */ AUDIO_MISSING_TEXT_WAV, +/* [1.1.6.2] */ AUDIO_MISSING_TEXT_AU, +/* [1.1.6.3] */ AUDIO_MISSING_TEXT_AIFF, +/* [1.1.6.4] */ AUDIO_MISSING_TEXT_SND, +/* [1.1.6.5] */ AUDIO_MISSING_TEXT_RA, +/* [1.1.6.6] */ AUDIO_MISSING_TEXT_RM, +/* [1.1.8.1] */ FRAME_MISSING_LONGDESC, +/* [1.1.9.1] */ AREA_MISSING_ALT, +/* [1.1.10.1] */ SCRIPT_MISSING_NOSCRIPT, +/* [1.1.12.1] */ ASCII_REQUIRES_DESCRIPTION, +/* [1.2.1.1] */ IMG_MAP_SERVER_REQUIRES_TEXT_LINKS, +/* [1.4.1.1] */ MULTIMEDIA_REQUIRES_TEXT, +/* [1.5.1.1] */ IMG_MAP_CLIENT_MISSING_TEXT_LINKS, +/* [2.1.1.1] */ INFORMATION_NOT_CONVEYED_IMAGE, +/* [2.1.1.2] */ INFORMATION_NOT_CONVEYED_APPLET, +/* [2.1.1.3] */ INFORMATION_NOT_CONVEYED_OBJECT, +/* [2.1.1.4] */ INFORMATION_NOT_CONVEYED_SCRIPT, +/* [2.1.1.5] */ INFORMATION_NOT_CONVEYED_INPUT, +/* [2.2.1.1] */ COLOR_CONTRAST_TEXT, +/* [2.2.1.2] */ COLOR_CONTRAST_LINK, +/* [2.2.1.3] */ COLOR_CONTRAST_ACTIVE_LINK, +/* [2.2.1.4] */ COLOR_CONTRAST_VISITED_LINK, +/* [3.2.1.1] */ DOCTYPE_MISSING, +/* [3.3.1.1] */ STYLE_SHEET_CONTROL_PRESENTATION, +/* [3.5.1.1] */ HEADERS_IMPROPERLY_NESTED, +/* [3.5.2.1] */ POTENTIAL_HEADER_BOLD, +/* [3.5.2.2] */ POTENTIAL_HEADER_ITALICS, +/* [3.5.2.3] */ POTENTIAL_HEADER_UNDERLINE, +/* [3.5.3.1] */ HEADER_USED_FORMAT_TEXT, +/* [3.6.1.1] */ LIST_USAGE_INVALID_UL, +/* [3.6.1.2] */ LIST_USAGE_INVALID_OL, +/* [3.6.1.4] */ LIST_USAGE_INVALID_LI, +/* [4.1.1.1] */ INDICATE_CHANGES_IN_LANGUAGE, +/* [4.3.1.1] */ LANGUAGE_NOT_IDENTIFIED, +/* [4.3.1.1] */ LANGUAGE_INVALID, +/* [5.1.2.1] */ DATA_TABLE_MISSING_HEADERS, +/* [5.1.2.2] */ DATA_TABLE_MISSING_HEADERS_COLUMN, +/* [5.1.2.3] */ DATA_TABLE_MISSING_HEADERS_ROW, +/* [5.2.1.1] */ DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS, +/* [5.2.1.2] */ DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS, +/* [5.3.1.1] */ LAYOUT_TABLES_LINEARIZE_PROPERLY, +/* [5.4.1.1] */ LAYOUT_TABLE_INVALID_MARKUP, +/* [5.5.1.1] */ TABLE_MISSING_SUMMARY, +/* [5.5.1.2] */ TABLE_SUMMARY_INVALID_NULL, +/* [5.5.1.3] */ TABLE_SUMMARY_INVALID_SPACES, +/* [5.5.1.6] */ TABLE_SUMMARY_INVALID_PLACEHOLDER, +/* [5.5.2.1] */ TABLE_MISSING_CAPTION, +/* [5.6.1.1] */ TABLE_MAY_REQUIRE_HEADER_ABBR, +/* [5.6.1.2] */ TABLE_MAY_REQUIRE_HEADER_ABBR_NULL, +/* [5.6.1.3] */ TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES, +/* [6.1.1.1] */ STYLESHEETS_REQUIRE_TESTING_LINK, +/* [6.1.1.2] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT, +/* [6.1.1.3] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR, +/* [6.2.1.1] */ FRAME_SRC_INVALID, +/* [6.2.2.1] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET, +/* [6.2.2.2] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT, +/* [6.2.2.3] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT, +/* [6.3.1.1] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT, +/* [6.3.1.2] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT, +/* [6.3.1.3] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED, +/* [6.3.1.4] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET, +/* [6.5.1.1] */ FRAME_MISSING_NOFRAMES, +/* [6.5.1.2] */ NOFRAMES_INVALID_NO_VALUE, +/* [6.5.1.3] */ NOFRAMES_INVALID_CONTENT, +/* [6.5.1.4] */ NOFRAMES_INVALID_LINK, +/* [7.1.1.1] */ REMOVE_FLICKER_SCRIPT, +/* [7.1.1.2] */ REMOVE_FLICKER_OBJECT, +/* [7.1.1.3] */ REMOVE_FLICKER_EMBED, +/* [7.1.1.4] */ REMOVE_FLICKER_APPLET, +/* [7.1.1.5] */ REMOVE_FLICKER_ANIMATED_GIF, +/* [7.2.1.1] */ REMOVE_BLINK_MARQUEE, +/* [7.4.1.1] */ REMOVE_AUTO_REFRESH, +/* [7.5.1.1] */ REMOVE_AUTO_REDIRECT, +/* [8.1.1.1] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT, +/* [8.1.1.2] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT, +/* [8.1.1.3] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET, +/* [8.1.1.4] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED, +/* [9.1.1.1] */ IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION, +/* [9.3.1.1] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN, +/* [9.3.1.2] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP, +/* [9.3.1.3] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK, +/* [9.3.1.4] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER, +/* [9.3.1.5] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT, +/* [9.3.1.6] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE, +/* [10.1.1.1] */ NEW_WINDOWS_REQUIRE_WARNING_NEW, +/* [10.1.1.2] */ NEW_WINDOWS_REQUIRE_WARNING_BLANK, +/* [10.2.1.1] */ LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT, +/* [10.2.1.2] */ LABEL_NEEDS_REPOSITIONING_AFTER_INPUT, +/* [10.4.1.1] */ FORM_CONTROL_REQUIRES_DEFAULT_TEXT, +/* [10.4.1.2] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL, +/* [10.4.1.3] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES, +/* [11.2.1.1] */ REPLACE_DEPRECATED_HTML_APPLET, +/* [11.2.1.2] */ REPLACE_DEPRECATED_HTML_BASEFONT, +/* [11.2.1.3] */ REPLACE_DEPRECATED_HTML_CENTER, +/* [11.2.1.4] */ REPLACE_DEPRECATED_HTML_DIR, +/* [11.2.1.5] */ REPLACE_DEPRECATED_HTML_FONT, +/* [11.2.1.6] */ REPLACE_DEPRECATED_HTML_ISINDEX, +/* [11.2.1.7] */ REPLACE_DEPRECATED_HTML_MENU, +/* [11.2.1.8] */ REPLACE_DEPRECATED_HTML_S, +/* [11.2.1.9] */ REPLACE_DEPRECATED_HTML_STRIKE, +/* [11.2.1.10] */ REPLACE_DEPRECATED_HTML_U, +/* [12.1.1.1] */ FRAME_MISSING_TITLE, +/* [12.1.1.2] */ FRAME_TITLE_INVALID_NULL, +/* [12.1.1.3] */ FRAME_TITLE_INVALID_SPACES, +/* [12.4.1.1] */ ASSOCIATE_LABELS_EXPLICITLY, +/* [12.4.1.2] */ ASSOCIATE_LABELS_EXPLICITLY_FOR, +/* [12.4.1.3] */ ASSOCIATE_LABELS_EXPLICITLY_ID, +/* [13.1.1.1] */ LINK_TEXT_NOT_MEANINGFUL, +/* [13.1.1.2] */ LINK_TEXT_MISSING, +/* [13.1.1.3] */ LINK_TEXT_TOO_LONG, +/* [13.1.1.4] */ LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE, +/* [13.1.1.5] */ LINK_TEXT_NOT_MEANINGFUL_MORE, +/* [13.1.1.6] */ LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS, +/* [13.2.1.1] */ METADATA_MISSING, +/* [13.2.1.2] */ METADATA_MISSING_LINK, +/* [13.2.1.3] */ METADATA_MISSING_REDIRECT_AUTOREFRESH, +/* [13.10.1.1] */ SKIPOVER_ASCII_ART, + + LAST_ACCESS_ERR; /* must be last */ + + public int code() { + return ordinal() + 1000; + } +} Added: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java (rev 0) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java 2010-11-24 02:39:29 UTC (rev 1233) @@ -0,0 +1,2534 @@ +package org.w3c.tidy; + +/********************************************************************* +* AccessibilityChecks +* +* Carries out processes for all accessibility checks. Traverses +* through all the content within the tree and evaluates the tags for +* accessibility. +* +* To perform the following checks, 'AccessibilityChecks' must be +* called AFTER the tree structure has been formed. +* +* If, in the command prompt, there is no specification of which +* accessibility priorities to check, no accessibility checks will be +* performed. (ie. '1' for priority 1, '2' for priorities 1 and 2, +* and '3') for priorities 1, 2 and 3.) +* +* Copyright University of Toronto +* Programmed by: Mike Lam and Chris Ridpath +* Modifications by : Terry Teague (TRT) +* +* Reference document: http://www.w3.org/TR/WAI-WEBCONTENT/ +*********************************************************************/ + + +public class Accessibility { + +// private static final int TEXTBUF_SIZE = 128; + + /* List of possible image types */ + private static final String imageExtensions[] = {".jpg", ".gif", ".tif", ".pct", ".pic", ".iff", ".dib", + ".tga", ".pcx", ".png", ".jpeg", ".tiff", ".bmp"}; + + /* List of possible sound file types */ + private static final String soundExtensions[] = {".wav", ".au", ".aiff", ".snd", ".ra", ".rm"}; + + private static final AccessErrorCode soundExtErrCodes[] = { + AccessErrorCode.AUDIO_MISSING_TEXT_WAV, + AccessErrorCode.AUDIO_MISSING_TEXT_AU, + AccessErrorCode.AUDIO_MISSING_TEXT_AIFF, + AccessErrorCode.AUDIO_MISSING_TEXT_SND, + AccessErrorCode.AUDIO_MISSING_TEXT_RA, + AccessErrorCode.AUDIO_MISSING_TEXT_RM + }; + + /* List of possible media extensions */ + private static final String mediaExtensions[] = {".mpg", ".mov", ".asx", ".avi", ".ivf", ".m1v", ".mmm", ".mp2v", + ".mpa", ".mpe", ".mpeg", ".ram", ".smi", ".smil", ".swf", ".wm", ".wma", ".wmv"}; + + /* List of possible frame sources */ + private static final String frameExtensions[] = {".htm", ".html", ".shtm", ".shtml", ".cfm", ".cfml", + ".asp", ".cgi", ".pl", ".smil"}; + + /* List of possible colour values */ + private static final int colorValues[][] = { + { 0, 0, 0}, + {128, 128, 128}, + {192, 192, 192}, + {255, 255, 255}, + {192, 0, 0}, + {255, 0, 0}, + {128, 0, 128}, + {255, 0, 255}, + { 0, 128, 0}, + { 0, 255, 0}, + {128, 128, 0}, + {255, 255, 0}, + { 0, 0, 128}, + { 0, 0, 255}, + { 0, 128, 128}, + { 0, 255, 255} + }; + + /* These arrays are used to convert color names to their RGB values */ + private static final String colorNames[] = { "black", "silver", "grey", "white", "maroon", "red", "purple", + "fuchsia", "green", "lime", "olive", "yellow", "navy", "blue", "teal", "aqua"}; + + /* gets set from Tidy variable AccessibilityCheckLevel */ + private int level; + + /* list of characters in the text nodes found within a container element */ + private final StringBuilder textNode = new StringBuilder(); + + /* Number of frame elements found within a frameset */ + private int numFrames; + + /* Number of 'longdesc' attributes found within a frameset */ + private int hasCheckedLongDesc; + + private int checkedHeaders; + private int listElements; + private int otherListElements; + + /* For 'USEMAP' identifier */ + private boolean hasUseMap; + private boolean hasName; + private boolean hasMap; + + /* For tracking nodes that are deleted from the original parse tree - TRT */ + /* Node *access_tree; */ + + private boolean hasTH; + private boolean hasValidFor; + private boolean hasValidId; + private boolean hasValidRowHeaders; + private boolean hasValidColumnHeaders; + private boolean hasInvalidRowHeader; + private boolean hasInvalidColumnHeader; + private int forID; + + /* + GetFileExtension takes a path and returns the extension + portion of the path (if any). + */ + + private static String getFileExtension(final String path) { + int i = path.length() - 1; + + do { + if (path.charAt(i) == '/' || path.charAt(i) == '\\') { + return ""; + } + else if (path.charAt(i) == '.') { + return path.substring(i); + } + } while (--i > 0); + return ""; + } + + /************************************************************************ + * IsImage + * + * Checks if the given filename is an image file. + ************************************************************************/ + + private static boolean isImage(final String iType) { + /* Get the file extension */ + final String ext = getFileExtension(iType); + + /* Compare it to the array of known image file extensions */ + for (String s : imageExtensions) { + if (ext.equalsIgnoreCase(s)) { + return true; + } + } + return false; + } + + + /*********************************************************************** + * IsSoundFile + * + * Checks if the given filename is a sound file. + ***********************************************************************/ + + private static AccessErrorCode isSoundFile(final String sType) { + final String ext = getFileExtension(sType); + + for (int i = 0; i < soundExtensions.length; i++) { + if (ext.equalsIgnoreCase(soundExtensions[i])) { + return soundExtErrCodes[i]; + } + } + return null; + } + + + /*********************************************************************** + * IsValidSrcExtension + * + * Checks if the 'SRC' value within the FRAME element is valid + * The 'SRC' extension must end in ".htm", ".html", ".shtm", ".shtml", + * ".cfm", ".cfml", ".asp", ".cgi", ".pl", or ".smil" + ***********************************************************************/ + + private static boolean isValidSrcExtension(final String sType) { + final String ext = getFileExtension(sType); + + for (String s : frameExtensions) { + if (ext.equalsIgnoreCase(s)) { + return true; + } + } + return false; + } + + + /********************************************************************* + * IsValidMediaExtension + * + * Checks to warn the user that synchronized text equivalents are + * required if multimedia is used. + *********************************************************************/ + + private static boolean isValidMediaExtension(final String sType) { + final String ext = getFileExtension(sType); + + for (String s : mediaExtensions) { + if (ext.equalsIgnoreCase(s)) { + return true; + } + } + return false; + } + + + /************************************************************************ + * IsWhitespace + * + * Checks if the given string is all whitespace. + ************************************************************************/ + + private static boolean isWhitespace(final String pString) { + if (pString == null) { + return true; + } + for (int i = 0; i < pString.length(); ++i) { + final char c = pString.charAt(i); + if (!TidyUtils.isWhite(c)) { + return false; + } + } + return true; + } + + private static boolean hasValue(final AttVal av) { + return av != null && !isWhitespace(av.value); + } + + /*********************************************************************** + * IsPlaceholderAlt + * + * Checks to see if there is an image and photo place holder contained + * in the ALT text. + ***********************************************************************/ + + private static boolean isPlaceholderAlt(final String txt) { + return txt.contains("image") || txt.contains("photo"); + } + + + /*********************************************************************** + * IsPlaceHolderObject + * + * Checks to see if there is an OBJECT place holder contained + * in the 'ALT' text. + ***********************************************************************/ + + private static boolean isPlaceHolderObject(final String txt) { + return txt.contains("object"); + } + + + /********************************************************** + * EndsWithBytes + * + * Checks to see if the ALT text ends with 'bytes' + **********************************************************/ + + private static boolean endsWithBytes(final String txt) { + return txt.endsWith("bytes"); + } + + + /******************************************************* + * textFromOneNode + * + * Returns a list of characters contained within one + * text node. + *******************************************************/ + + private String textFromOneNode(final Lexer lexer, final Node node) { + if (node != null) { + /* Copy contents of a text node */ + return TidyUtils.getString(lexer.lexbuf, node.start, node.end - node.start); + } + return ""; + } + + + /********************************************************* + * getTextNode + * + * Locates text nodes within a container element. + * Retrieves text that are found contained within + * text nodes, and concatenates the text. + *********************************************************/ + + private void getTextNode(final Lexer lexer, final Node node) { + if (node.isText()) { + /* Retrieves each character found within the text node */ + textNode.append(TidyUtils.getString(lexer.lexbuf, node.start, node.end - node.start)); + } + } + + + /********************************************************** + * getTextNodeClear + * + * Clears the current 'textNode' and reloads it with new + * text. The textNode must be cleared before use. + **********************************************************/ + + private String getTextNodeClear(final Lexer lexer, final Node node) { + /* Clears list */ + textNode.setLength(0); + + getTextNode(lexer, node.content); + return textNode.toString(); + } + + /********************************************************** + * LevelX_Enabled + * + * Tell whether access "X" is enabled. + **********************************************************/ + + private boolean level1Enabled() { + return level == 1 || level == 2 || level == 3; + } + + private boolean level2Enabled() { + return level == 2 || level == 3; + } + + private boolean level3Enabled() { + return level == 3; + } + + /******************************************************** + * CheckColorAvailable + * + * Verify that information conveyed with color is + * available without color. + ********************************************************/ + + private void checkColorAvailable(final Lexer lexer, final Node node) { + if (level1Enabled()) { + switch (node.getId()) { + case IMG: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_IMAGE); + break; + case APPLET: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_APPLET); + break; + case OBJECT: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_OBJECT); + break; + case SCRIPT: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_SCRIPT); + break; + case INPUT: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_INPUT); + break; + } + } + } + + /********************************************************************* + * CheckColorContrast + * + * Checks elements for color contrast. Must have valid contrast for + * valid visibility. + * + * This logic is extremely fragile as it does not recognize + * the fact that color is inherited by many components and + * that BG and FG colors are often set separately. E.g. the + * background color may be set by for the body or a table + * or a cell. The foreground color may be set by any text + * element (p, h1, h2, input, textarea), either explicitly + * or by style. Ergo, this test will not handle most real + * world cases. It's a start, however. + *********************************************************************/ + + private void checkColorContrast(final Lexer lexer, final Node node) { + int rgbBG[] = {255,255,255}; /* Black text on white BG */ + + if (level3Enabled()) { + boolean gotBG = true; + AttVal av; + + /* Check for 'BGCOLOR' first to compare with other color attributes */ + for (av = node.attributes; av != null; av = av.next) { + if (av.is(AttrId.BGCOLOR)) { + if (hasValue(av)) { + gotBG = getRgb(av.value, rgbBG); + } + } + } + /* + Search for COLOR attributes to compare with background color + Must have valid colour contrast + */ + for (av = node.attributes; gotBG && av != null; av = av.next) { + AccessErrorCode errcode = null; + switch (av.getId()) { + case TEXT: + errcode = AccessErrorCode.COLOR_CONTRAST_TEXT; + break; + case LINK: + errcode = AccessErrorCode.COLOR_CONTRAST_LINK; + break; + case ALINK: + errcode = AccessErrorCode.COLOR_CONTRAST_ACTIVE_LINK; + break; + case VLINK: + errcode = AccessErrorCode.COLOR_CONTRAST_VISITED_LINK; + break; + } + if (errcode != null && hasValue(av)) { + int rgbFG[] = {0, 0, 0}; /* Black text */ + + if (getRgb(av.value, rgbFG) && !compareColors(rgbBG, rgbFG)) { + lexer.report.accessWarning(lexer, node, errcode); + } + } + } + } + } + + + /************************************************************** + * CompareColors + * + * Compares two RGB colors for good contrast. + **************************************************************/ + private static int minmax(final int i1, final int i2) { + return Math.max(i1, i2) - Math.min(i1, i2); + } + + private static int brightness(final int rgb[]) { + return ((rgb[0] * 299) + (rgb[1] * 587) + (rgb[2] * 114)) / 1000; + } + + private static boolean compareColors(final int rgbBG[], final int rgbFG[]) { + int brightBG = brightness(rgbBG); + int brightFG = brightness(rgbFG); + + int diffBright = minmax(brightBG, brightFG); + int diffColor = minmax(rgbBG[0], rgbFG[0]) + minmax(rgbBG[1], rgbFG[1]) + minmax(rgbBG[2], rgbFG[2]); + + return diffBright > 180 && diffColor > 500; + } + + + /********************************************************************* + * GetRgb + * + * Gets the red, green and blue values for this attribute for the + * background. + * + * Example: If attribute is BGCOLOR="#121005" then red = 18, green = 16, + * blue = 5. + *********************************************************************/ + + private static boolean getRgb(final String color, final int rgb[]) { + /* Check if we have a color name */ + for (int x = 0; x < colorNames.length; x++) { + if (colorNames[x].contains(color)) { + rgb[0] = colorValues[x][0]; + rgb[1] = colorValues[x][1]; + rgb[2] = colorValues[x][2]; + return true; + } + } + /* + No color name so must be hex values + Is this a number in hexadecimal format? + */ + /* Must be 7 characters in the RGB value (including '#') */ + if (color.length() == 7 && color.charAt(0) == '#') { + rgb[0] = (ctox(color.charAt(1)) * 16) + ctox(color.charAt(2)); + rgb[1] = (ctox(color.charAt(3)) * 16) + ctox(color.charAt(4)); + rgb[2] = (ctox(color.charAt(5)) * 16) + ctox(color.charAt(6)); + return true; + } + return false; + } + + + /******************************************************************* + * ctox + * + * Converts a character to a number. + * Example: if given character is 'A' then returns 10. + * + * Returns the number that the character represents. Returns -1 if not a + * valid number. + *******************************************************************/ + + private static int ctox(final char ch) { + if (ch >= '0' && ch <= '9') { + return ch - '0'; + } + else if (ch >= 'a' && ch <= 'f') { + return ch - 'a' + 10; + } + else if (ch >= 'A' && ch <= 'F') { + return ch - 'A' + 10; + } + return -1; + } + + + /*********************************************************** + * CheckImage + * + * Checks all image attributes for specific elements to + * check for validity of the values contained within + * the attributes. An appropriate warning message is displayed + * to indicate the error. + ***********************************************************/ + + private void checkImage(final Lexer lexer, Node node) { + if (level1Enabled()) { + boolean hasAlt = false; + boolean hasIsMap = false; + boolean hasLongDesc = false; + boolean hasDLINK = false; + boolean hasValidHeight = false; + boolean hasValidWidthBullet = false; + boolean hasValidWidthHR = false; + boolean hasTriggeredMissingLongDesc = false; + + /* Checks all image attributes for invalid values within attributes */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* + Checks for valid ALT attribute. + The length of the alt text must be less than 150 characters + long. + */ + if (av.is(AttrId.ALT)) { + if (av.value != null) { + if (av.value.length() < 150 && !isPlaceholderAlt(av.value) && !isPlaceHolderObject(av.value) + && !endsWithBytes(av.value) && !isImage(av.value)) { + hasAlt = true; + } + else if (av.value.length() > 150) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_TOO_LONG); + } + else if (isImage(av.value)) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_FILENAME); + } + else if (isPlaceholderAlt(av.value)) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_PLACEHOLDER); + } + else if (endsWithBytes(av.value)) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_FILE_SIZE); + } + } + } + /* + Checks for width values of 'bullets' and 'horizontal + rules' for validity. + + Valid pixel width for 'bullets' must be < 30, and > 150 for + horizontal rules. + */ + else if (av.is(AttrId.WIDTH)) { + /* Longdesc attribute needed if width attribute is not present. */ + if (hasValue(av)) { + int width = Integer.parseInt(av.value); + if (width < 30) { + hasValidWidthBullet = true; + } + if (width > 150) { + hasValidWidthHR = true; + } + } + } + /* + Checks for height values of 'bullets' and horizontal + rules for validity. + + Valid pixel height for 'bullets' and horizontal rules + must be < 30. + */ + else if (av.is(AttrId.HEIGHT)) { + /* Longdesc attribute needed if height attribute not present. */ + if (hasValue(av) && Integer.parseInt(av.value) < 30) { + hasValidHeight = true; + } + } + /* + Checks for longdesc and determines validity. + The length of the 'longdesc' must be > 1 + */ + else if (av.is(AttrId.LONGDESC)) { + if (hasValue(av) && av.value.length() > 1) { + hasLongDesc = true; + } + } + /* + Checks for 'USEMAP' attribute. Ensures that + text links are provided for client-side image maps + */ + else if (av.is(AttrId.USEMAP)) { + if (hasValue(av)) { + hasUseMap = true; + } + } + else if (av.is(AttrId.ISMAP)) { + hasIsMap = true; + } + } + + /* + Check to see if a dLINK is present. The ANCHOR element must + be present following the IMG element. The text found between + the ANCHOR tags must be < 6 characters long, and must contain + the letter 'd'. + */ + if (node.next.is(TagId.A)) { + node = node.next; + /* + Node following the anchor must be a text node + for dLINK to exist + */ + if (node.content != null && node.content.tag == null) { + /* Number of characters found within the text node */ + String word = textFromOneNode(lexer, node.content); + if (word.equals("d") || word.equals("D")) { + hasDLINK = true; + } + } + } + /* + Special case check for dLINK. This will occur if there is + whitespace between the <img> and <a> elements. Ignores + whitespace and continues check for dLINK. + */ + if (node.next != null && node.next.tag == null) { + node = node.next; + + if (node.next.is(TagId.A)) { + node = node.next; + /* + Node following the ANCHOR must be a text node + for dLINK to exist + */ + if (node.content != null && node.content.tag == null) { + /* Number of characters found within the text node */ + String word = textFromOneNode(lexer, node.content); + + if (word.equals("d") || word.equals("D")) { + hasDLINK = true; + } + } + } + } + + if (!hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.IMG_MISSING_ALT); + } + if (!hasLongDesc && hasValidHeight && (hasValidWidthHR || hasValidWidthBullet)) { + hasTriggeredMissingLongDesc = true; + } + if (!hasTriggeredMissingLongDesc) { + if (hasDLINK && !hasLongDesc) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MISSING_LONGDESC); + } + if (hasLongDesc && !hasDLINK) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MISSING_DLINK); + } + if (!hasLongDesc && !hasDLINK) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MISSING_LONGDESC_DLINK); + } + } + if (hasIsMap) { + lexer.report.accessError(lexer, node, AccessErrorCode.IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION); + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MAP_SERVER_REQUIRES_TEXT_LINKS); + } + } + } + + + /*********************************************************** + * CheckApplet + * + * Checks APPLET element to check for validity pertaining + * the 'ALT' attribute. An appropriate warning message is + * displayed to indicate the error. An appropriate warning + * message is displayed to indicate the error. If no 'ALT' + * text is present, then there must be alternate content + * within the APPLET element. + ***********************************************************/ + + private void checkApplet(final Lexer lexer, final Node node) { + if (level1Enabled()) { + boolean hasAlt = false; + boolean hasDescription = false; + + /* Checks for attributes within the APPLET element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* + Checks for valid ALT attribute. + The length of the alt text must be > 4 characters in length + but must be < 150 characters long. + */ + if (av.is(AttrId.ALT)) { + if (av.value != null) { + hasAlt = true; + } + } + } + if (!hasAlt) { + /* Must have alternate text representation for that element */ + if (node.content != null) { + String word = null; + + if (node.content.tag == null) { + word = textFromOneNode(lexer, node.content); + } + if (node.content.content != null && node.content.content.tag == null) { + word = textFromOneNode(lexer, node.content.content); + } + if (word != null && !isWhitespace(word)) { + hasDescription = true; + } + } + } + if (!hasDescription && !hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.APPLET_MISSING_ALT); + } + } + } + + + /******************************************************************* + * CheckObject + * + * Checks to verify whether the OBJECT element contains + * 'ALT' text, and to see that the sound file selected is + * of a valid sound file type. OBJECT must have an alternate text + * representation. + *******************************************************************/ + + private void checkObject(final Lexer lexer, final Node node) { + if (level1Enabled()) { + boolean hasAlt = false; + boolean hasDescription = false; + + if (node.content != null) { + if (!node.content.isText()) { + Node tnode = node.content; + + for (AttVal av = tnode.attributes; av != null; av = av.next) { + if (av.is(AttrId.ALT)) { + hasAlt = true; + break; + } + } + } + + /* Must have alternate text representation for that element */ + if (!hasAlt) { + String word = null; + if (node.content.isText()) { + word = textFromOneNode(lexer, node.content); + } + if (word == null && node.content.content.isText()) { + word = textFromOneNode(lexer, node.content.content); + } + if (word != null && !isWhitespace(word)) { + hasDescription = true; + } + } + } + + if (!hasAlt && !hasDescription) { + lexer.report.accessError(lexer, node, AccessErrorCode.OBJECT_MISSING_ALT); + } + } + } + + + /*************************************************************** + * CheckMissingStyleSheets + * + * Ensures that stylesheets are used to control the presentation. + ***************************************************************/ + + private static boolean checkMissingStyleSheets(final Node node) { + boolean sspresent = false; + + for (Node content = node.content; !sspresent && content != null; content = content.next) { + sspresent = content.is(TagId.LINK) || content.is(TagId.STYLE) || content.is(TagId.FONT) + || content.is(TagId.BASEFONT); + + for (AttVal av = content.attributes; !sspresent && av != null; av = av.next) { + sspresent = av.is(AttrId.STYLE) || av.is(AttrId.TEXT) || av.is(AttrId.VLINK) || av.is(AttrId.ALINK) + || av.is(AttrId.LINK); + + if (!sspresent && av.is(AttrId.REL)) { + sspresent = av.valueIs("stylesheet"); + } + } + + if (!sspresent) { + sspresent = checkMissingStyleSheets(content); + } + } + return sspresent; + } + + + /******************************************************************* + * CheckFrame + * + * Checks if the URL is valid and to check if a 'LONGDESC' is needed + * within the FRAME element. If a 'LONGDESC' is needed, the value must + * be valid. The URL must end with the file extension, htm, or html. + * Also, checks to ensure that the 'SRC' and 'TITLE' values are valid. + *******************************************************************/ + + private void checkFrame(final Lexer lexer, final Node node) { + boolean hasTitle = false; + numFrames++; + + if (level1Enabled()) { + /* Checks for attributes within the FRAME element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* Checks if 'LONGDESC' value is valid only if present */ + if (av.is(AttrId.LONGDESC)) { + if (hasValue(av) && av.value.length() > 1) { + hasCheckedLongDesc++; + } + } + + /* Checks for valid 'SRC' value within the frame element */ + else if (av.is(AttrId.SRC)) { + if (hasValue(av) && !isValidSrcExtension(av.value)) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_SRC_INVALID); + } + } + + /* Checks for valid 'TITLE' value within frame element */ + else if (av.is(AttrId.TITLE)) { + if (hasValue(av)) { + hasTitle = true; + } + if (!hasTitle) { + if (av.value == null || av.value.length() == 0) { + hasTitle = true; + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_TITLE_INVALID_NULL); + } + else { + if (isWhitespace(av.value) && av.value.length() > 0) { + hasTitle = true; + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_TITLE_INVALID_SPACES); + } + } + } + } + } + if (!hasTitle) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_MISSING_TITLE); + } + if (numFrames == 3 && hasCheckedLongDesc < 3) { + numFrames = 0; + lexer.report.accessWarning(lexer, node, AccessErrorCode.FRAME_MISSING_LONGDESC); + } + } + } + + + /**************************************************************** + * CheckIFrame + * + * Checks if 'SRC' value is valid. Must end in appropriate + * file extension. + ****************************************************************/ + + private void checkIFrame(final Lexer lexer, final Node node) { + if (level1Enabled()) { + /* Checks for valid 'SRC' value within the IFRAME element */ + final AttVal av = node.getAttrById(AttrId.SRC); + if (hasValue(av)) { + if (!isValidSrcExtension(av.value)) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_SRC_INVALID); + } + } + } + } + + + /********************************************************************** + * CheckAnchorAccess + * + * Checks that the sound file is valid, and to ensure that + * text transcript is present describing the 'HREF' within the + * ANCHOR element. Also checks to see ensure that the 'TARGET' attribute + * (if it exists) is not null and does not contain '_new' or '_blank'. + **********************************************************************/ + + private void checkAnchorAccess(final Lexer lexer, final Node node) { + boolean hasDescription = false; + boolean hasTriggeredLink = false; + + /* Checks for attributes within the ANCHOR element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (level1Enabled()) { + /* Must be of valid sound file type */ + if (av.is(AttrId.HREF)) { + if (hasValue(av)) { + String ext = getFileExtension (av.value); + + /* Checks to see if multimedia is used */ + if (isValidMediaExtension(av.value)) { + lexer.report.accessError(lexer, node, AccessErrorCode.MULTIMEDIA_REQUIRES_TEXT); + } + /* + Checks for validity of sound file, and checks to see if + the file is described within the document, or by a link + that is present which gives the description. + */ + if (ext.length() < 6 && ext.length() > 0) { + AccessErrorCode errcode = isSoundFile(av.value); + if (errcode != null) { + if (node.next != null) { + if (node.next.tag == null) { + String word = textFromOneNode(lexer, node.next); + + /* Must contain at least one letter in the text */ + if (!isWhitespace(word)) { + hasDescription = true; + } + } + } + + /* Must contain text description of sound file */ + if (!hasDescription) { + lexer.report.accessError(lexer, node, errcode); + } + } + } + } + } + } + + if (level2Enabled()) { + /* Checks 'TARGET' attribute for validity if it exists */ + if (av.is(AttrId.TARGET)) { + if (av.valueIs("_new")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_NEW); + } + else if (av.valueIs("_blank")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_BLANK); + } + } + } + } + + if (level2Enabled()) { + if (node.content != null && node.content.tag == null) { + String word = textFromOneNode(lexer, node.content); + if (word != null && !isWhitespace(word)) { + if (word.equals("more")) { + hasTriggeredLink = true; + } + if (word.equals("click here")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE); + } + if (hasTriggeredLink == false) { + if (word.length() < 6) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_NOT_MEANINGFUL); + } + } + if (word.length() > 60) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_TOO_LONG); + } + } + } + if (node.content == null) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_MISSING); + } + } + } + + + /************************************************************ + * CheckArea + * + * Checks attributes within the AREA element to + * determine if the 'ALT' text and 'HREF' values are valid. + * Also checks to see ensure that the 'TARGET' attribute + * (if it exists) is not null and does not contain '_new' + * or '_blank'. + ************************************************************/ + + private void checkArea(final Lexer lexer, final Node node) { + boolean hasAlt = false; + + /* Checks all attributes within the AREA element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (level1Enabled()) { + /* + Checks for valid ALT attribute. + The length of the alt text must be > 4 characters long + but must be less than 150 characters long. + */ + if (av.is(AttrId.ALT)) { + /* The check for validity */ + if (av.value != null) { + hasAlt = true; + } + } + } + + if (level2Enabled()) { + if (av.is(AttrId.TARGET)) { + if (av.valueIs("_new")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_NEW); + } + else if (av.valueIs("_blank")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_BLANK); + } + } + } + } + + if (level1Enabled()) { + /* AREA must contain alt text */ + if (!hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.AREA_MISSING_ALT); + } + } + } + + + /*************************************************** + * CheckScript + * + * Checks the SCRIPT element to ensure that a + * NOSCRIPT section follows the SCRIPT. + ***************************************************/ + + private void checkScriptAcc(final Lexer lexer, final Node node) { + if (level1Enabled()) { + /* NOSCRIPT element must appear immediately following SCRIPT element */ + if (node.next == null || !node.next.is(TagId.NOSCRIPT)) { + lexer.report.accessError(lexer, node, AccessErrorCode.SCRIPT_MISSING_NOSCRIPT); + } + } + } + + + /********************************************************** + * CheckRows + * + * Check to see that each table has a row of headers if + * a column of columns doesn't exist. + **********************************************************/ + + private void checkRows(final Lexer lexer, Node node) { + int numTR = 0; + int numValidTH = 0; + checkedHeaders++; + + for (; node != null; node = node.next) { + numTR++; + if (node.content.is(TagId.TH)) { + hasTH = true; + if (node.content.content.isText()) { + String word = textFromOneNode(lexer, node.content.content); + if (!isWhitespace(word)) { + numValidTH++; + } + } + } + } + if (numTR == numValidTH) { + hasValidRowHeaders = true; + } + if (numTR >= 2 && numTR > numValidTH && numValidTH >= 2 && hasTH) { + hasInvalidRowHeader = true; + } + } + + + /********************************************************** + * CheckColumns + * + * Check to see that each table has a column of headers if + * a row of columns doesn't exist. + **********************************************************/ + + private void checkColumns(final Lexer lexer, final Node node) { + int numTH = 0; + boolean isMissingHeader = false; + + checkedHeaders++; + + /* Table must have row of headers if headers for columns don't exist */ + if (node.content.is(TagId.TH)) { + hasTH = true; + + for (Node tnode = node.content; tnode != null; tnode = tnode.next) { + if (tnode.is(TagId.TH)) { + if (tnode.content.isText()) { + String word = textFromOneNode(lexer, tnode.content); + if (!isWhitespace(word)) { + numTH++; + } + } + } + else { + isMissingHeader = true; + } + } + } + + if (!isMissingHeader && numTH > 0) { + hasValidColumnHeaders = true; + } + if (isMissingHeader && numTH >= 2) { + hasInvalidColumnHeader = true; + } + } + + + /***************************************************** + * CheckTH + * + * Checks to see if the header provided for a table + * requires an abbreviation. (only required if the + * length of the header is greater than 15 characters) + *****************************************************/ + + private void checkTH(final Lexer lexer, final Node node) { + if (level3Enabled()) { + boolean hasAbbr = false; + + /* Checks TH element for 'ABBR' attribute */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (av.is(AttrId.ABBR)) { + /* Value must not be null and must be less than 15 characters */ + if (av.value != null && !isWhitespace(av.value)) { + hasAbbr = true; + } + if (av.value == null || av.value.length() == 0) { + hasAbbr = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.TABLE_MAY_REQUIRE_HEADER_ABBR_NULL); + } + if (isWhitespace(av.value) && av.value.length() > 0) { + hasAbbr = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES); + } + } + } + + /* If the header is greater than 15 characters, an abbreviation is needed */ + final String word = textFromOneNode(lexer, node.content); + + if (word != null && !isWhitespace(word)) { + /* Must have 'ABBR' attribute if header is > 15 characters */ + if (word.length() > 15 && !hasAbbr) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.TABLE_MAY_REQUIRE_HEADER_ABBR); + } + } + } + } + + + /***************************************************************** + * CheckMultiHeaders + * + * Layout tables should make sense when linearized. + * TABLE must contain at least one TH element. + * This technique applies only to tables used for layout purposes, + * not to data tables. Checks for column of multiple headers. + *****************************************************************/ + + private void checkMultiHeaders(final Lexer lexer, final Node node) { + if (level1Enabled()) { + boolean validColSpanRows = true; + boolean validColSpanColumns = true; + + int flag = 0; + + if (node.content != null) { + Node tnode = node.content; + /* + Checks for column of multiple headers found + within a data table. + */ + while (tnode != null) { + if (tnode.is(TagId.TR)) { + if (tnode.content != null) { + Node temp = tnode.content; + + /* The number of TH elements found within TR element */ + if (flag == 0) { + while (temp != null) { + /* + Must contain at least one TH element + within in the TR element + */ + if (temp.is(TagId.TH)) { + for (AttVal av = temp.attributes; av != null; av = av.next) { + if (av.is(AttrId.COLSPAN) && (Integer.parseInt(av.value) > 1)) { + validColSpanColumns = false; + } + if (av.is(AttrId.ROWSPAN) && (Integer.parseInt(av.value) > 1)) { + validColSpanRows = false; + } + } + } + temp = temp.next; + } + flag = 1; + } + } + } + tnode = tnode.next; + } + /* Displays HTML 4 Table Algorithm when multiple column of headers used */ + if (!validColSpanRows) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS); + lexer.report.displayHTMLTableAlgorithm(lexer); + } + if (!validColSpanColumns) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS); + lexer.report.displayHTMLTableAlgorithm(lexer); + } + } + } + } + + + /**************************************************** + * CheckTable + * + * Checks the TABLE element to ensure that the + * table is not missing any headers. Must have either + * a row or column of headers. + ****************************************************/ + + private void checkTable(final Lexer lexer, final Node node) { + int numTR = 0; + boolean hasSummary = false; + boolean hasCaption = false; + + if (level3Enabled()) { + /* Table must have a 'SUMMARY' describing the purpose of the table */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (av.is(AttrId.SUMMARY)) { + if (hasValue(av)) { + hasSummary = true; + if (av.contains("summary") && av.contains("table")) { + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_SUMMARY_INVALID_PLACEHOLDER); + } + } + if (av.value == null || av.value.length() == 0) { + hasSummary = true; + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_SUMMARY_INVALID_NULL); + } + else if (isWhitespace(av.value) && av.value.length() > 0) { + hasSummary = true; + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_SUMMARY_INVALID_SPACES); + } + } + } + + /* TABLE must have content. */ + if (node.content == null) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS); + return; + } + } + + if (level1Enabled()) { + /* Checks for multiple headers */ + checkMultiHeaders(lexer, node); + } + + if (level2Enabled()) { + /* Table must have a CAPTION describing the purpose of the table */ + if (node.content.is(TagId.CAPTION)) { + Node tnode = node.content; + String word = null; + + if (tnode.content != null && tnode.content.tag == null) { + word = getTextNodeClear(lexer, tnode); + } + if (!isWhitespace(word)) { + hasCaption = true; + } + } + if (!hasCaption) { + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_MISSING_CAPTION); + } + } + + if (node.content != null) { + if (node.content.is(TagId.CAPTION) && node.content.next != null && node.content.next.is(TagId.TR)) { + checkColumns(lexer, node.content.next); + } + else if (node.content.is(TagId.TR)) { + checkColumns(lexer, node.content); + } + } + if (!hasValidColumnHeaders) { + if (node.content != null) { + if (node.content.is(TagId.CAPTION) && node.content.next != null && node.content.next.is(TagId.TR)) { + checkRows(lexer, node.content.next); + } + else if (node.content.is(TagId.TR)) { + checkRows(lexer, node.content); + } + } + } + + if (level3Enabled()) { + /* Suppress warning for missing 'SUMMARY for HTML 2.0 and HTML 3.2 */ + if (!hasSummary) { + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_MISSING_SUMMARY); + } + } + + if (level2Enabled()) { + if (node.content != null) { + Node temp = node.content; + + while (temp != null) { + if (temp.is(TagId.TR)) { + numTR++; + } + temp = temp.next; + } + + if (numTR == 1) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LAYOUT_TABLES_LINEARIZE_PROPERLY); + } + } + if (hasTH) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LAYOUT_TABLE_INVALID_MARKUP); + } + } + + if (level1Enabled()) { + if (checkedHeaders == 2) { + if (!hasValidRowHeaders && !hasValidColumnHeaders && !hasInvalidRowHeader && !hasInvalidColumnHeader) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS); + } + if (!hasValidRowHeaders && hasInvalidRowHeader) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS_ROW); + } + + if (!hasValidColumnHeaders && hasInvalidColumnHeader) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS_COLUMN); + } + } + } + } + + + /*************************************************** + * CheckASCII + * + * Checks for valid text equivalents for XMP and PRE + * elements for ASCII art. Ensures that there is + * a skip over link to skip multi-lined ASCII art. + ***************************************************/ + + private void checkASCII(final Lexer lexer, final Node node) { + String skipOver = null; + boolean IsAscii = false; + int hasSkipOverLink = 0; + + int newLines = -1; + char compareLetter; + int matchingCount = 0; + + if (level1Enabled() && node.content != null) { + /* + Checks the text within the PRE and XMP tags to see if ascii + art is present + */ + for (int i = node.content.start + 1; i < node.content.end; i++) { + matchingCount = 0; + + /* Counts the number of lines of text */ + if (lexer.lexbuf[i] == '\n') { + newLines++; + } + compareLetter = (char) lexer.lexbuf[i]; + + /* Counts consecutive character matches */ + for (int x = i; x < i + 5; x++) { + if (lexer.lexbuf[x] == compareLetter) { + matchingCount++; + } + else { + break; + } + } + + /* Must have at least 5 consecutive character matches */ + if (matchingCount >= 5) { + break; + } + } + /* + Must have more than 6 lines of text OR 5 or more consecutive + letters that are the same for there to be ascii art + */ + if (newLines >= 6 || matchingCount >= 5) { + IsAscii = true; + } + + /* Checks for skip over link if ASCII art is present */ + if (IsAscii) { + if (node.prev != null && node.prev.prev != null) { + final Node temp1 = node.prev.prev; + + /* Checks for 'HREF' attribute */ + for (AttVal av = temp1.attributes; av != null; av = av.next) { + if (av.is(AttrId.HREF) && hasValue(av)) { + skipOver = av.value; + hasSkipOverLink++; + } + } + } + } + } + + if (level2Enabled()) { + /* + Checks for A element following PRE to ensure proper skipover link + only if there is an A element preceding PRE. + */ + if (hasSkipOverLink == 1) { + if (node.next.is(TagId.A)) { + final Node temp2 = node.next; + + /* Checks for 'NAME' attribute */ + for (AttVal av = temp2.attributes; av != null; av = av.next) { + if (av.is(AttrId.NAME) && hasValue(av)) { + /* + Value within the 'HREF' attribute must be the same + as the value within the 'NAME' attribute for valid + skipover. + */ + if (skipOver.contains(av.value)) { + hasSkipOverLink++; + } + } + } + } + } + + if (IsAscii) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASCII_REQUIRES_DESCRIPTION); + if (level3Enabled() && hasSkipOverLink < 2) { + lexer.report.accessError(lexer, node, AccessErrorCode.SKIPOVER_ASCII_ART); + } + } + } + } + + + /*********************************************************** + * CheckFormControls + * + * <form> must have valid 'FOR' attribute, and <label> must + * have valid 'ID' attribute for valid form control. + ***********************************************************/ + + private void checkFormControls(final Lexer lexer, final Node node) { + if (!hasValidFor && hasValidId) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASSOCIATE_LABELS_EXPLICITLY_FOR); + } + if (!hasValidId && hasValidFor) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASSOCIATE_LABELS_EXPLICITLY_ID); + } + if (!hasValidId && !hasValidFor) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASSOCIATE_LABELS_EXPLICITLY); + } + } + + + /************************************************************ + * CheckLabel + * + * Check for valid 'FOR' attribute within the LABEL element + ************************************************************/ + + private void checkLabel(final Lexer lexer, final Node node) { + if (level2Enabled()) { + /* Checks for valid 'FOR' attribute */ + final AttVal av = node.getAttrById(AttrId.FOR); + if (hasValue(av)) { + hasValidFor = true; + } + if (++forID == 2) { + forID = 0; + checkFormControls(lexer, node); + } + } + } + + + /************************************************************ + * CheckInputLabel + * + * Checks for valid 'ID' attribute within the INPUT element. + * Checks to see if there is a LABEL directly before + * or after the INPUT element determined by the 'TYPE'. + * Each INPUT element must have a LABEL describing the form. + ************************************************************/ + + private void checkInputLabel(final Lexer lexer, final Node node) { + if (level2Enabled()) { + /* Checks attributes within the INPUT element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* Must have valid 'ID' value */ + if (av.is(AttrId.ID) && hasValue(av)) { + hasValidId = true; + } + } + if (++forID == 2) { + forID = 0; + checkFormControls(lexer, node); + } + } + } + + + /*************************************************************** + * CheckInputAttributes + * + * INPUT element must have a valid 'ALT' attribute if the + * 'VALUE' attribute is present. + ***************************************************************/ + + private void checkInputAttributes(final Lexer lexer, final Node node) { + boolean hasAlt = false; + boolean mustHaveAlt = false; + + /* Checks attributes within the INPUT element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* 'VALUE' must be found if the 'TYPE' is 'text' or 'checkbox' */ + if (av.is(AttrId.TYPE) && hasValue(av)) { + if (level1Enabled()) { + if (av.valueIs("image")) { + mustHaveAlt = true; + } + } + } + if (av.is(AttrId.ALT) && hasValue(av)) { + hasAlt = true; + } + } + if (mustHaveAlt && !hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.IMG_BUTTON_MISSING_ALT); + } + } + + + /*************************************************************** + * CheckFrameSet + * + * Frameset must have valid NOFRAME section. Must contain some + * text but must not contain information telling user to update + * browsers, + ***************************************************************/ + + private void checkFrameSet(final Lexer lexer, final Node node) { + boolean hasNoFrames = false; + + if (level1Enabled()) { + if ((lexer.badAccess & Report.INVALID_LINK_NOFRAMES) != 0) { + lexer.report.accessError(lexer, node, AccessErrorCode.NOFRAMES_INVALID_LINK); + lexer.badAccess &= ~Report.INVALID_LINK_NOFRAMES; /* emit only once */ + } + for (Node temp = node.content; temp != null ; temp = temp.next) { + if (temp.is(TagId.NOFRAMES)) { + hasNoFrames = true; + + if (temp.content != null && temp.content.content.is(TagId.P)) { + final Node para = temp.content.content; + if (para.content.isText()) { + final String word = textFromOneNode(lexer, para.content); + if (word != null && word.contains("browser")) { + lexer.report.accessError(lexer, para, AccessErrorCode.NOFRAMES_INVALID_CONTENT); + } + } + } + else if (temp.content == null) { + lexer.report.accessError(lexer, temp, AccessErrorCode.NOFRAMES_INVALID_NO_VALUE); + } + else if (temp.content != null && isWhitespace(textFromOneNode(lexer, temp.content))) { + lexer.report.accessError(lexer, temp, AccessErrorCode.NOFRAMES_INVALID_NO_VALUE); + } + } + } + if (!hasNoFrames) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_MISSING_NOFRAMES); + } + } + } + + + /*********************************************************** + * CheckHeaderNesting + * + * Checks for heading increases and decreases. Headings must + * not increase by more than one header level, but may + * decrease at from any level to any level. Text within + * headers must not be more than 20 words in length. + ***********************************************************/ + + private void checkHeaderNesting(final Lexer lexer, final Node node) { + if (level2Enabled()) { + int numWords = 1; + boolean isValidIncrease = false; + boolean needsDescription = false; + /* + Text within header element cannot contain more than 20 words without + a separate description + */ + if (node.content != null && node.content.tag == null) { + final String word = textFromOneNode(lexer, node.content); + + for (int i = 0; i < word.length(); i++) { + if (word.charAt(i) == ' ') { + numWords++; + } + } + if (numWords > 20) { + needsDescription = true; + } + } + + /* Header following must be same level or same plus 1 for + ** valid heading increase size. E.g. H1 . H1, H2. H3 . H3, H4 + */ + if (node.isHeader()) { + int level = node.getHeaderLevel(); + isValidIncrease = true; + + for (Node temp = node.next; temp != null; temp = temp.next) { + final int nested = temp.getHeaderLevel(); + if (nested >= level) { + isValidIncrease = nested <= level + 1; + break; + } + } + } + if (!isValidIncrease) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.HEADERS_IMPROPERLY_NESTED); + } + if (needsDescription) { + lexer.report.accessWarni... [truncated message content] |
From: <ad...@us...> - 2010-11-23 12:41:31
|
Revision: 1232 http://jtidy.svn.sourceforge.net/jtidy/?rev=1232&view=rev Author: aditsu Date: 2010-11-23 12:41:24 +0000 (Tue, 23 Nov 2010) Log Message: ----------- fixed test 1359292 - corrected discarding of empty paragraphs, moved xmlns attribute handling to parseDocument, removed early returns from attrError (to update counts) and added a message about incomplete warning/error reporting Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -2226,9 +2226,8 @@ } // discard empty paragraphs - if (node.content == null && node.is(TagId.P)) - { - node = Node.discardElement(node); + if (node.content == null && node.is(TagId.P)) { + node = Node.trimEmptyElement(lexer, node); continue; } Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -66,6 +66,10 @@ */ public final class ParserImpl { + /** + * xhtml namespace String. + */ + private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; /** * parser for html. @@ -3601,7 +3605,24 @@ lexer.report.warning(lexer, document, node, ErrorCode.DISCARDING_UNEXPECTED); // TODO? continue; } + + if (node.type == NodeType.StartTag && node.is(TagId.HTML)) { + final AttVal xmlns = node.getAttrById(AttrId.XMLNS); + if (xmlns != null && xmlns.valueIs(XHTML_NAMESPACE)) { + final boolean htmlOut = lexer.configuration.isHtmlOut(); + // Unless plain HTML is specified, output will be XHTML. + lexer.isvoyager = true; + lexer.configuration.setXHTML(!htmlOut); + lexer.configuration.setXmlOut(!htmlOut); + // adjust other config options, just as in Configuration + if (!htmlOut) { + lexer.configuration.setUpperCaseTags(false); + lexer.configuration.setUpperCaseAttrs(false); + } + } + } + if (node.type != NodeType.StartTag || !node.is(TagId.HTML)) { lexer.ungetToken(); Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -530,16 +530,6 @@ */ public void attrError(Lexer lexer, Node node, AttVal attribute, ErrorCode code) { - if (lexer.errors > lexer.configuration.getShowErrors()) // keep quiet after <showErrors> errors - { - return; - } - - if (!lexer.configuration.isShowWarnings()) // warnings - { - return; - } - final String tagdesc = getTagName(node); final String name = attribute == null || attribute.attribute == null ? "NULL" : attribute.attribute; final String value = attribute == null || attribute.value == null ? "NULL" : attribute.value; @@ -1052,18 +1042,13 @@ * @param errout PrintWriter * @param lexer Lexer */ - public void reportNumWarnings(PrintWriter errout, Lexer lexer) - { - if (lexer.warnings > 0 || lexer.errors > 0) - { - printMessage( - errout, - Level.SUMMARY, - "num_warnings", - lexer.warnings, lexer.errors); + public void reportNumWarnings(PrintWriter errout, Lexer lexer) { + if (lexer.warnings > 0 || lexer.errors > 0) { + final int incomplete = lexer.errors > lexer.configuration.getShowErrors() + || !lexer.configuration.isShowWarnings() ? 1 : 0; + printMessage(errout, Level.SUMMARY, "num_warnings", lexer.warnings, lexer.errors, incomplete); } - else - { + else { printMessage(errout, Level.SUMMARY, "no_warnings"); } } Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -148,40 +148,11 @@ */ public static class CheckHTML implements TagCheck { - /** - * xhtml namepace String. - */ - private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; - - /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ - public void check(Lexer lexer, Node node) - { - - AttVal attval; - AttVal xmlns; - - xmlns = node.getAttrByName("xmlns"); - - if (xmlns != null && XHTML_NAMESPACE.equals(xmlns.value)) - { - lexer.isvoyager = true; - if (!lexer.configuration.isHtmlOut()) // Unless user has specified plain HTML output, - { - lexer.configuration.setXHTML(true); // output format will be XHTML. - } - // adjust other config options, just as in Configuration - lexer.configuration.setXmlOut(true); - lexer.configuration.setUpperCaseTags(false); - lexer.configuration.setUpperCaseAttrs(false); - } - - for (attval = node.attributes; attval != null; attval = attval.next) - { - attval.checkAttribute(lexer, node); - } + public void check(Lexer lexer, Node node) { + node.checkAttributes(lexer); } } Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-23 12:41:24 UTC (rev 1232) @@ -86,7 +86,7 @@ no_warnings=No warnings or errors were found.\n noframes_content={0} not inside ''noframes'' element non_matching_endtag=replacing unexpected {0} by </{1}> -num_warnings={0,choice,0#0 warnings|1#1 warning|1<{0,number,integer} warnings}, {1,choice,0#0 errors|1#1 error|2#{1,number,integer} errors} were found!\n +num_warnings={0,choice,0#0 warnings|1#1 warning|1<{0,number,integer} warnings}, {1,choice,0#0 errors|1#1 error|2#{1,number,integer} errors} were found!{2,choice,0#|1# Not all warnings/errors were shown.}\n obsolete_element=replacing obsolete element {0} by {1} previous_location=<{0}> previously mentioned proprietary_attr_value={0} proprietary attribute value "{1}" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-23 04:24:44
|
Revision: 1231 http://jtidy.svn.sourceforge.net/jtidy/?rev=1231&view=rev Author: aditsu Date: 2010-11-23 04:24:37 +0000 (Tue, 23 Nov 2010) Log Message: ----------- fixed tests 1067112 and 2085175 - corrected identification of word documents Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-22 10:02:10 UTC (rev 1230) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-23 04:24:37 UTC (rev 1231) @@ -2145,8 +2145,7 @@ if (node.is(TagId.HTML)) { // check that it's a Word 2000 document - if ((node.getAttrByName("xmlns:o") == null)) - { + if (node.getAttrByName("xmlns:o") == null && !lexer.configuration.isMakeBare()) { return; } lexer.configuration.tt.freeAttrs(node); @@ -2357,8 +2356,7 @@ continue; } - if (!"generator".equals(attval.value)) - { + if (!attval.valueIs("generator")) { continue; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-22 10:02:16
|
Revision: 1230 http://jtidy.svn.sourceforge.net/jtidy/?rev=1230&view=rev Author: aditsu Date: 2010-11-22 10:02:10 +0000 (Mon, 22 Nov 2010) Log Message: ----------- fixed test 1263391 - updated ParseInline to allow P within ADDRESS in transitional html versions Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-22 09:09:53 UTC (rev 1229) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-22 10:02:10 UTC (rev 1230) @@ -1246,6 +1246,14 @@ continue; } + /* <p> allowed within <address> in HTML 4.01 Transitional */ + if (node.is(TagId.P) && node.type == NodeType.StartTag && element.is(TagId.ADDRESS)) { + lexer.constrainVersion(~VERS_HTML40_STRICT); + element.insertNodeAtEnd(node); + parseTag(lexer, node, mode); + continue; + } + // ignore unknown and PARAM tags if (node.tag == null || node.is(TagId.PARAM)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-22 09:09:59
|
Revision: 1229 http://jtidy.svn.sourceforge.net/jtidy/?rev=1229&view=rev Author: aditsu Date: 2010-11-22 09:09:53 +0000 (Mon, 22 Nov 2010) Log Message: ----------- fixed test 1068087 - modified fontSize2Name to return null for empty strings, and added a special constant to deal with attribute values that wrongly end up empty in Tidy Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AttVal.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AttVal.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AttVal.java 2010-11-21 22:14:51 UTC (rev 1228) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AttVal.java 2010-11-22 09:09:53 UTC (rev 1229) @@ -66,6 +66,11 @@ */ public class AttVal extends Object implements Cloneable { + /** + * Special empty string object to mark strings that (wrongly) end up empty in Tidy (the C library); + * normally Tidy uses null for empty strings and checks values based on that + */ + protected static final String EMPTY = new String(); /** * next AttVal. @@ -443,7 +448,7 @@ } public boolean hasValue() { - return value != null && value.length() > 0; + return value != null && value.length() > 0 || value == EMPTY; } public boolean valueIs(final String val) { Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-21 22:14:51 UTC (rev 1228) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-22 09:09:53 UTC (rev 1229) @@ -828,13 +828,17 @@ String[] sizes = {"60%", "70%", "80%", null, "120%", "150%", "200%"}; String buf; - if (size.length() > 0 && '0' <= size.charAt(0) && size.charAt(0) <= '6') + if (size.length() == 0) { + return null; + } + + if ('0' <= size.charAt(0) && size.charAt(0) <= '6') { int n = size.charAt(0) - '0'; return sizes[n]; } - if (size.length() > 0 && size.charAt(0) == '-') + if (size.charAt(0) == '-') { if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6') { Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-21 22:14:51 UTC (rev 1228) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-22 09:09:53 UTC (rev 1229) @@ -3296,6 +3296,10 @@ // workaround to emulate broken Tidy behavior in compatibility mode // invalid entity becomes character 0 that terminates the C string, see test 1062345 value = value.substring(0, x); + if (value.length() == 0) { + // use this special object to mark that the attribute is still considered to have a value + value = AttVal.EMPTY; + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 22:14:57
|
Revision: 1228 http://jtidy.svn.sourceforge.net/jtidy/?rev=1228&view=rev Author: aditsu Date: 2010-11-21 22:14:51 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed 1316307 and 3 others - updated parsing code to fix table processing Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 21:39:44 UTC (rev 1227) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 22:14:51 UTC (rev 1228) @@ -1688,6 +1688,11 @@ lexer.report.warning(lexer, list, node, ErrorCode.MISSING_ENDTAG_BEFORE); return; } + /* In exiled mode, return so table processing can continue. */ + else if (lexer.exiled && (node.hasCM(Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW) + || node.is(TagId.TABLE))) { + return; + } /* http://tidy.sf.net/issue/836462 If "list" is an unordered list, insert the next tag within @@ -2369,6 +2374,10 @@ } else if ((node.tag.model & Dict.CM_TABLE) != 0 || (node.tag.model & Dict.CM_ROW) != 0) { + /* In exiled mode, return so table processing can continue. */ + if (lexer.exiled) { + return; + } node = lexer.inferredTag(TagId.TABLE); } else if ((element.tag.model & Dict.CM_OBJECT) != 0) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 21:39:50
|
Revision: 1227 http://jtidy.svn.sourceforge.net/jtidy/?rev=1227&view=rev Author: aditsu Date: 2010-11-21 21:39:44 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed tests 1079820, 1168193, 1235296 - added more conditions when checking for nested emphasis in parseInline Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 21:32:29 UTC (rev 1226) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 21:39:44 UTC (rev 1227) @@ -1171,7 +1171,10 @@ && !node.is(TagId.FONT) && !node.is(TagId.BIG) && !node.is(TagId.SMALL) - && !node.is(TagId.Q)) + && !node.is(TagId.SUB) + && !node.is(TagId.SUP) + && !node.is(TagId.Q) + && !node.is(TagId.SPAN)) { if (element.content != null && node.attributes == null && element.last.isText() && !lexer.textNodeEndWithSpace(element.last)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 21:32:36
|
Revision: 1226 http://jtidy.svn.sourceforge.net/jtidy/?rev=1226&view=rev Author: aditsu Date: 2010-11-21 21:32:29 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1069549 - fixed a tree integrity problem in nestedList Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-21 21:00:53 UTC (rev 1225) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-21 21:32:29 UTC (rev 1226) @@ -1296,15 +1296,11 @@ node = list; list = node.prev; - list.next = node.next; - - if (list.next != null) - { - list.next.prev = list; - } - child = list.last; /* <li> */ + list.next = node.next; + fixNodeLinks(list); + node.parent = child; node.next = null; node.prev = child.last; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 21:00:59
|
Revision: 1225 http://jtidy.svn.sourceforge.net/jtidy/?rev=1225&view=rev Author: aditsu Date: 2010-11-21 21:00:53 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1333579 - updated ParseRow to parse inline/block elements (that were moved before the table) with excludeBlocks = false Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 20:29:34 UTC (rev 1224) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 21:00:53 UTC (rev 1225) @@ -2945,6 +2945,8 @@ Node.moveBeforeTable(row, node); lexer.report.warning(lexer, row, node, ErrorCode.TAG_NOT_ALLOWED_IN); lexer.exiled = true; + excludeState = lexer.excludeBlocks; + lexer.excludeBlocks = false; if (node.type != NodeType.TextNode) { @@ -2952,6 +2954,7 @@ } lexer.exiled = false; + lexer.excludeBlocks = excludeState; continue; } else if ((node.tag.model & Dict.CM_HEAD) != 0) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 20:29:40
|
Revision: 1224 http://jtidy.svn.sourceforge.net/jtidy/?rev=1224&view=rev Author: aditsu Date: 2010-11-21 20:29:34 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1410061 - removed thousands separator for line/column numbers Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-21 01:57:42 UTC (rev 1223) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-21 20:29:34 UTC (rev 1224) @@ -63,7 +63,7 @@ invalid_xml_id={0} cannot copy name attribute to id invaliduri_summary=URIs must be properly escaped, they must not contain unescaped\u000acharacters below U+0021 including the space character and not\u000aabove U+007E. Tidy escapes the URI for you as recommended by\u000aHTML 4.01 section B.2.1 and XML 1.0 section 4.2.2. Some user agents\u000ause another algorithm to escape such URIs and some server-sided\u000ascripts depend on that. If you want to depend on that, you must\u000aescape the URI by your own. For more information please refer to\u000ahttp://www.w3.org/International/O-URL-and-ident.html\n joining_attribute={0} joining values of repeated attribute "{1}" -line_column=line {0,number} column {1,number} - +line_column=line {0,number,#} column {1,number,#} - malformed_comment=adjacent hyphens within comment malformed_doctype=discarding malformed <!DOCTYPE> missing_attr_value={0} attribute "{1}" lacks value This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 01:57:49
|
Revision: 1223 http://jtidy.svn.sourceforge.net/jtidy/?rev=1223&view=rev Author: aditsu Date: 2010-11-21 01:57:42 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1078345 - added encloseBodyText and encloseBlockText methods, and removed the old corresponding code Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 01:14:22 UTC (rev 1222) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-21 01:57:42 UTC (rev 1223) @@ -803,18 +803,6 @@ continue; } - if (lexer.configuration.isEncloseBodyText() && !iswhitenode) - { - Node para; - - lexer.ungetToken(); - para = lexer.inferredTag(TagId.P); - body.insertNodeAtEnd(para); - parseTag(lexer, para, mode); - mode = Lexer.MIXED_CONTENT; - continue; - } - // HTML2 and HTML4 strict doesn't allow text here lexer.constrainVersion(~(VERS_HTML40_STRICT | VERS_HTML20)); @@ -2158,24 +2146,6 @@ // mixed content model permits text if (node.type == NodeType.TextNode) { - boolean iswhitenode = false; - - if (node.type == NodeType.TextNode - && node.end <= node.start + 1 - && lexer.lexbuf[node.start] == (byte) ' ') - { - iswhitenode = true; - } - - if (lexer.configuration.isEncloseBlockText() && !iswhitenode) - { - lexer.ungetToken(); - node = lexer.inferredTag(TagId.P); - element.insertNodeAtEnd(node); - parseTag(lexer, node, Lexer.MIXED_CONTENT); - continue; - } - if (checkstack) { checkstack = false; @@ -2423,17 +2393,6 @@ { if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { - // DSR - 27Apr02 ensure we wrap anchors and other inline content - // fgiust: commented out due to [1403105]: java.lang.StackOverflowError in Tidy.parseDOM() - // if (lexer.configuration.encloseBlockText) - // { - // lexer.ungetToken(); - // node = lexer.inferredTag(TagId.P); - // element.insertNodeAtEnd(node); - // parseTag(lexer, node, Lexer.MIXED_CONTENT); - // continue; - // } - if (checkstack && !node.implicit) { checkstack = false; @@ -3505,6 +3464,70 @@ } } + private static boolean nodeCMIsOnlyInline(final Node node) { + return node.hasCM(Dict.CM_INLINE) && !node.hasCM(Dict.CM_BLOCK); + } + + private static void encloseBodyText(final Lexer lexer) { + Node body = lexer.root.findBody(); + if (body == null) { + return; + } + Node node = body.content; + + while (node != null) { + if ((node.isText() && !node.isBlank(lexer)) || + (node.isElement() && nodeCMIsOnlyInline(node))) { + Node p = lexer.inferredTag(TagId.P); + Node.insertNodeBeforeElement(node, p); + while (node != null && (!node.isElement() || nodeCMIsOnlyInline(node))) { + Node next = node.next; + node.removeNode(); + p.insertNodeAtEnd(node); + node = next; + } + Node.trimSpaces(lexer, p); + continue; + } + node = node.next; + } + } + + /* <form>, <blockquote> and <noscript> do not allow #PCDATA in + HTML 4.01 Strict (%block; model instead of %flow;). + When requested, text nodes in these elements are wrapped in <p>. */ + private static void encloseBlockText(final Lexer lexer, Node node) { + while (node != null) { + Node next = node.next; + + if (node.content != null) { + encloseBlockText(lexer, node.content); + } + + if (!(node.is(TagId.FORM) || node.is(TagId.NOSCRIPT) || + node.is(TagId.BLOCKQUOTE)) || node.content == null) { + node = next; + continue; + } + Node block = node.content; + + if ((block.isText() && !block.isBlank(lexer)) || + (block.isElement() && nodeCMIsOnlyInline(block))) { + Node p = lexer.inferredTag(TagId.P); + Node.insertNodeBeforeElement(block, p); + while (block != null && (!block.isElement() || nodeCMIsOnlyInline(block))) { + Node tempNext = block.next; + block.removeNode(); + p.insertNodeAtEnd(block); + block = tempNext; + } + Node.trimSpaces(lexer, p); + continue; + } + node = next; + } + } + /** * HTML is the top level element. */ @@ -3592,7 +3615,13 @@ attributeChecks(lexer, lexer.root); dropEmptyElements(lexer, lexer.root); cleanSpaces(lexer, lexer.root); - + + if (lexer.configuration.isEncloseBodyText()) { + encloseBodyText(lexer); + } + if (lexer.configuration.isEncloseBlockText()) { + encloseBlockText(lexer, lexer.root); + } return document; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 01:14:32
|
Revision: 1222 http://jtidy.svn.sourceforge.net/jtidy/?rev=1222&view=rev Author: aditsu Date: 2010-11-21 01:14:22 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1266647 - updated comment parsing Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-21 00:40:56 UTC (rev 1221) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-21 01:14:22 UTC (rev 1222) @@ -2114,8 +2114,7 @@ continue; } - end_comment : while (true) - { + while (true) { c = this.in.readChar(); if (c == '>') @@ -2170,17 +2169,17 @@ this.lexbuf[this.lexsize - 2] = (byte) '='; } - addCharToLexer(c); - // if '-' then look for '>' to end the comment - if (c != '-') - { - break end_comment; + if (c == '-') { + addCharToLexer(c); } - + else { + // otherwise continue to look for --> + this.lexbuf[this.lexsize - 1] = (byte) '='; + addCharToLexer(c); + break; + } } - // otherwise continue to look for --> - this.lexbuf[this.lexsize - 2] = (byte) '='; continue; case LEX_DOCTYPE : This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 00:41:02
|
Revision: 1221 http://jtidy.svn.sourceforge.net/jtidy/?rev=1221&view=rev Author: aditsu Date: 2010-11-21 00:40:56 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1231279 - updated ANCHOR_NOT_UNIQUE message Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-21 00:02:57 UTC (rev 1220) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-21 00:40:56 UTC (rev 1221) @@ -632,11 +632,7 @@ break; case ANCHOR_NOT_UNIQUE : - messageLexer( - lexer, - Level.WARNING, - code, - getTagName(node), attribute.value); + messageNode(lexer, Level.WARNING, node, code, tagdesc, value); break; case ENTITY_IN_ID : Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-21 00:02:57 UTC (rev 1220) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-21 00:40:56 UTC (rev 1221) @@ -1,4 +1,4 @@ -anchor_not_unique={0} Anchor "{1}" already defined +anchor_not_unique={0} anchor "{1}" already defined apos_undefined=named entity ' only defined in XML/XHTML attr_value_not_lcase={0} attribute value "{1}" must be lower case for XHTML # to be translated This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 00:08:14
|
Revision: 1218 http://jtidy.svn.sourceforge.net/jtidy/?rev=1218&view=rev Author: aditsu Date: 2010-11-20 23:02:29 +0000 (Sat, 20 Nov 2010) Log Message: ----------- fixed test 1062345 - emulated (only in compatibility mode) broken Tidy behavior for invalid entities (and corrected it otherwise); updated repairDuplicateAttributes and JOINING_ATTRIBUTE message handling Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-20 21:37:31 UTC (rev 1217) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-20 23:02:29 UTC (rev 1218) @@ -770,8 +770,11 @@ } final Entity ent = EntityTable.getDefaultEntityTable().entityInfo(str, isXml); - final boolean found = ent != null; + boolean found = ent != null; int ch = found ? ent.getCode() : 0; + if (found && ch == 0 && !configuration.isTidyCompat()) { + found = false; + } final int entver = found ? ent.getVersions() : isXml ? VERS_XML : VERS_PROPRIETARY; // deal with unrecognized or invalid entities @@ -3288,6 +3291,14 @@ } value = parseValue(attribute, false, isempty, delim); + if (value != null) { + final int x = value.indexOf(0); + if (x >= 0 && configuration.isTidyCompat()) { + // workaround to emulate broken Tidy behavior in compatibility mode + // invalid entity becomes character 0 that terminates the C string, see test 1062345 + value = value.substring(0, x); + } + } if (attribute != null && isValidAttrName(attribute)) { Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-20 21:37:31 UTC (rev 1217) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-20 23:02:29 UTC (rev 1218) @@ -313,29 +313,25 @@ removeAttribute(first); first = temp; } - else if ("style".equalsIgnoreCase(second.attribute) && lexer.configuration.isJoinStyles()) - { + else if (first.is(AttrId.STYLE) && lexer.configuration.isJoinStyles()) { // concatenate styles // this doesn't handle CSS comments and leading/trailing white-space very well see // http://www.w3.org/TR/css-style-attr - int end = first.value.length() - 1; + final int end = first.value.length(); - if (first.value.charAt(end) == ';') - { + if (end > 0 && first.value.charAt(end - 1) == ';') { // attribute ends with declaration seperator first.value = first.value + " " + second.value; } - else if (first.value.charAt(end) == '}') - { + else if (end > 0 && first.value.charAt(end - 1) == '}') { // attribute ends with rule set first.value = first.value + " { " + second.value + " }"; } - else - { + else { // attribute ends with property value - first.value = first.value + "; " + second.value; + first.value = first.value + (end > 0 ? "; " : "") + second.value; } temp = second.next; Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-20 21:37:31 UTC (rev 1217) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-20 23:02:29 UTC (rev 1218) @@ -551,7 +551,8 @@ break; case INSERTING_ATTRIBUTE: - case MISSING_ATTR_VALUE : + case MISSING_ATTR_VALUE: + case JOINING_ATTRIBUTE: messageNode(lexer, Level.WARNING, node, code, tagdesc, name); break; @@ -642,14 +643,6 @@ messageLexer(lexer, Level.WARNING, code); break; - case JOINING_ATTRIBUTE : - messageLexer( - lexer, - Level.WARNING, - code, - getTagName(node), attribute.attribute); - break; - case UNEXPECTED_EQUALSIGN : messageLexer(lexer, Level.WARNING, code, getTagName(node)); break; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 00:03:03
|
Revision: 1220 http://jtidy.svn.sourceforge.net/jtidy/?rev=1220&view=rev Author: aditsu Date: 2010-11-21 00:02:57 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1056910 - added a condition in nestedList Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-20 23:57:13 UTC (rev 1219) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-21 00:02:57 UTC (rev 1220) @@ -1267,6 +1267,11 @@ { return false; } + + /* check list has no peers */ + if (list.next != null) { + return false; + } pnode[0] = list; // Set node to resume iteration This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-20 23:57:19
|
Revision: 1219 http://jtidy.svn.sourceforge.net/jtidy/?rev=1219&view=rev Author: aditsu Date: 2010-11-20 23:57:13 +0000 (Sat, 20 Nov 2010) Log Message: ----------- improved toString Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-20 23:02:29 UTC (rev 1218) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-20 23:57:13 UTC (rev 1219) @@ -1202,59 +1202,48 @@ } } + protected String toString(final String indent) { + final StringBuilder sb = new StringBuilder(); + sb.append(type); + if (element != null) { + sb.append(':').append(element); + } + if (type == NodeType.TextNode || type == NodeType.CommentTag || type == NodeType.ProcInsTag) { + if (textarray != null && start <= end) { + sb.append(" \""); + sb.append(TidyUtils.getString(textarray, start, end - start)); + sb.append('"'); + } + else { + sb.append(" null"); + } + } + for (AttVal av = attributes; av != null; av = av.next) { + sb.append(' ').append(av.attribute).append('='); + if (av.value == null) { + sb.append("null"); + } + else { + sb.append('"').append(av.value).append('"'); + } + } + if (content != null) { + sb.append("\n ").append(indent); + sb.append(content.toString(indent + ' ')); + } + if (next != null) { + sb.append('\n').append(indent); + sb.append(next.toString(indent)); + } + return sb.toString(); + } + /** * @see java.lang.Object#toString() */ @Override - public String toString() - { - String s = ""; - Node n = this; - - while (n != null) - { - s += "[Node type="; - s += n.type; - s += ",element="; - if (n.element != null) - { - s += n.element; - } - else - { - s += "null"; - } - if (n.type == NodeType.TextNode || n.type == NodeType.CommentTag || n.type == NodeType.ProcInsTag) - { - s += ",text="; - if (n.textarray != null && n.start <= n.end) - { - s += "\""; - s += TidyUtils.getString(n.textarray, n.start, n.end - n.start); - s += "\""; - } - else - { - s += "null"; - } - } - s += ",content="; - if (n.content != null) - { - s += n.content.toString(); - } - else - { - s += "null"; - } - s += "]"; - if (n.next != null) - { - s += ","; - } - n = n.next; - } - return s; + public String toString() { + return toString(""); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-20 21:37:37
|
Revision: 1217 http://jtidy.svn.sourceforge.net/jtidy/?rev=1217&view=rev Author: aditsu Date: 2010-11-20 21:37:31 +0000 (Sat, 20 Nov 2010) Log Message: ----------- fixed test 1056023 - moved a return statement in ParseRowGroup Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-20 21:26:27 UTC (rev 1216) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-20 21:37:31 UTC (rev 1217) @@ -2843,9 +2843,8 @@ if (node.type != NodeType.EndTag) { lexer.ungetToken(); + return; } - - return; } if (node.type == NodeType.EndTag) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-20 21:26:33
|
Revision: 1216 http://jtidy.svn.sourceforge.net/jtidy/?rev=1216&view=rev Author: aditsu Date: 2010-11-20 21:26:27 +0000 (Sat, 20 Nov 2010) Log Message: ----------- fixed test 1055398 - swapped "first" and "second" when joining styles in repairDuplicateAttributes Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-20 21:12:16 UTC (rev 1215) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java 2010-11-20 21:26:27 UTC (rev 1216) @@ -320,40 +320,28 @@ // this doesn't handle CSS comments and leading/trailing white-space very well see // http://www.w3.org/TR/css-style-attr - int end = second.value.length() - 1; + int end = first.value.length() - 1; - if (second.value.charAt(end) == ';') + if (first.value.charAt(end) == ';') { // attribute ends with declaration seperator - second.value = second.value + " " + first.value; + first.value = first.value + " " + second.value; } - else if (second.value.charAt(end) == '}') + else if (first.value.charAt(end) == '}') { // attribute ends with rule set - second.value = second.value + " { " + first.value + " }"; + first.value = first.value + " { " + second.value + " }"; } else { // attribute ends with property value - second.value = second.value + "; " + first.value; + first.value = first.value + "; " + second.value; } - temp = first.next; - - if (temp.next == null) - { - second = null; - } - else - { - second = second.next; - } - - lexer.report.attrError(lexer, this, first, ErrorCode.JOINING_ATTRIBUTE); - - removeAttribute(first); - first = temp; - + temp = second.next; + lexer.report.attrError(lexer, this, second, ErrorCode.JOINING_ATTRIBUTE); + removeAttribute(second); + second = temp; } else if (lexer.configuration.getDuplicateAttrs() == DupAttrModes.KeepLast) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-20 21:12:22
|
Revision: 1215 http://jtidy.svn.sourceforge.net/jtidy/?rev=1215&view=rev Author: aditsu Date: 2010-11-20 21:12:16 +0000 (Sat, 20 Nov 2010) Log Message: ----------- fixed test 500236 - modified parseValue to replace \n with space in certain cases Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-18 03:14:32 UTC (rev 1214) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-20 21:12:16 UTC (rev 1215) @@ -3043,8 +3043,10 @@ addCharToLexer(c); parseEntity((short) 0); + if (lexbuf[lexsize - 1] == '\n' && munge) { + changeChar((byte)' '); + } continue; - } // kludge for JavaScript attribute values with line continuations in string literals This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-18 03:14:38
|
Revision: 1214 http://jtidy.svn.sourceforge.net/jtidy/?rev=1214&view=rev Author: aditsu Date: 2010-11-18 03:14:32 +0000 (Thu, 18 Nov 2010) Log Message: ----------- fixed tests 1004512, 1410061-1 and 1410061-2 - added a condition in font2Span Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-18 02:59:21 UTC (rev 1213) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-18 03:14:32 UTC (rev 1214) @@ -1491,8 +1491,8 @@ } // if FONT is only child of parent element then leave alone - if (node.parent.content == node && node.next == null) - { + // Do so only if blockStyle may be succesful. + if (node.parent.content == node && node.next == null && canApplyBlockStyle(node.parent)) { return false; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-18 02:59:28
|
Revision: 1213 http://jtidy.svn.sourceforge.net/jtidy/?rev=1213&view=rev Author: aditsu Date: 2010-11-18 02:59:21 +0000 (Thu, 18 Nov 2010) Log Message: ----------- fixed tests 837023 and 1072528 - corrected MALFORMED_DOCTYPE message and level Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-18 02:41:02 UTC (rev 1212) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-18 02:59:21 UTC (rev 1213) @@ -3747,7 +3747,7 @@ AttrCheckImpl.URL.check(this, node, si); } if (node.element == null || !TidyUtils.isValidXMLID(node.element)) { - report.error(this, null, null, ErrorCode.MALFORMED_DOCTYPE); + report.warning(this, null, null, ErrorCode.MALFORMED_DOCTYPE); return null; } // #ifdef TIDY_STORE_ORIGINAL_TEXT @@ -3816,7 +3816,7 @@ } /* document type declaration not finished */ - report.error(this, null, null, ErrorCode.MALFORMED_DOCTYPE); + report.warning(this, null, null, ErrorCode.MALFORMED_DOCTYPE); return null; } Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-18 02:41:02 UTC (rev 1212) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-18 02:59:21 UTC (rev 1213) @@ -65,7 +65,7 @@ joining_attribute={0} joining values of repeated attribute "{1}" line_column=line {0,number} column {1,number} - malformed_comment=adjacent hyphens within comment -malformed_doctype=expected "html PUBLIC" or "html SYSTEM" +malformed_doctype=discarding malformed <!DOCTYPE> missing_attr_value={0} attribute "{1}" lacks value missing_attribute={0} lacks "{1}" attribute missing_body=Can''t create slides - document is missing a body element. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-18 02:41:08
|
Revision: 1212 http://jtidy.svn.sourceforge.net/jtidy/?rev=1212&view=rev Author: aditsu Date: 2010-11-18 02:41:02 +0000 (Thu, 18 Nov 2010) Log Message: ----------- fixed test 660397 - added adjustCharEncoding, and removed inCharEncoding and outCharEncoding fields Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Configuration.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParsePropertyImpl.java Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Configuration.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Configuration.java 2010-11-18 01:41:14 UTC (rev 1211) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Configuration.java 2010-11-18 02:41:02 UTC (rev 1212) @@ -303,16 +303,6 @@ private int definedTags; /** - * Input character encoding (defaults to "ISO8859_1"). - */ - private String inCharEncoding = "ISO8859_1"; - - /** - * Output character encoding (defaults to "ASCII"). - */ - private String outCharEncoding = "ASCII"; - - /** * Avoid mapping values > 127 to entities. */ private boolean rawOut; @@ -399,6 +389,37 @@ } } + /* ensure that char encodings are self consistent */ + protected boolean adjustCharEncoding(final String encoding) { + final String enc = EncodingNameMapper.toJava(encoding); + String outenc = null; + String inenc = null; + + if ("MacRoman".equals(enc) || "Cp1252".equals(enc) || "Cp858".equals(enc) + || "ISO8859_15".equals(enc)) { + inenc = enc; + outenc = "ASCII"; + } + else if ("ASCII".equals(enc)) { + inenc = "ISO8859_1"; + outenc = "ASCII"; + } + for (String s : ENCODING_NAMES) { + if (s.equals(enc)) { + inenc = outenc = enc; + break; + } + } + + if (inenc != null) { + set(Option.CharEncoding, enc); + set(Option.InCharEncoding, inenc); + set(Option.OutCharEncoding, outenc); + return true; + } + return false; + } + /** * Ensure that config is self consistent. */ @@ -518,21 +539,18 @@ * Getter for <code>inCharEncodingName</code>. * @return Returns the inCharEncodingName. */ - protected String getInCharEncodingName() - { - return this.inCharEncoding; + protected String getInCharEncodingName() { + return getString(Option.InCharEncoding); } /** * Setter for <code>inCharEncodingName</code>. * @param encoding The inCharEncodingName to set. */ - protected void setInCharEncodingName(String encoding) - { - String javaEncoding = EncodingNameMapper.toJava(encoding); - if (javaEncoding != null) - { - this.inCharEncoding = javaEncoding; + protected void setInCharEncodingName(final String encoding) { + final String javaEncoding = EncodingNameMapper.toJava(encoding); + if (javaEncoding != null) { + set(Option.InCharEncoding, javaEncoding); } } @@ -540,21 +558,18 @@ * Getter for <code>outCharEncodingName</code>. * @return Returns the outCharEncodingName. */ - protected String getOutCharEncodingName() - { - return this.outCharEncoding; + protected String getOutCharEncodingName() { + return getString(Option.OutCharEncoding); } /** * Setter for <code>outCharEncodingName</code>. * @param encoding The outCharEncodingName to set. */ - protected void setOutCharEncodingName(String encoding) - { - String javaEncoding = EncodingNameMapper.toJava(encoding); - if (javaEncoding != null) - { - this.outCharEncoding = javaEncoding; + protected void setOutCharEncodingName(final String encoding) { + final String javaEncoding = EncodingNameMapper.toJava(encoding); + if (javaEncoding != null) { + set(Option.OutCharEncoding, javaEncoding); } } Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParsePropertyImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParsePropertyImpl.java 2010-11-18 01:41:14 UTC (rev 1211) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParsePropertyImpl.java 2010-11-18 02:41:02 UTC (rev 1212) @@ -342,10 +342,8 @@ { configuration.setOutCharEncodingName(value); } - else if ("char-encoding".equalsIgnoreCase(name)) - { - configuration.setInCharEncodingName(value); - configuration.setOutCharEncodingName(value); + else if ("char-encoding".equalsIgnoreCase(name)) { + configuration.adjustCharEncoding(value); } return null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |