From: <ad...@us...> - 2010-11-06 14:33:04
|
Revision: 1172 http://jtidy.svn.sourceforge.net/jtidy/?rev=1172&view=rev Author: aditsu Date: 2010-11-06 14:32:58 +0000 (Sat, 06 Nov 2010) Log Message: ----------- fixed test 1632218 - modified encodingError to use the decimal character value for INVALID_NCR, and adjusted messages Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-06 13:54:31 UTC (rev 1171) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-06 14:32:58 UTC (rev 1172) @@ -514,11 +514,7 @@ break; case INVALID_NCR: lexer.badChars |= BC_INVALID_NCR; - messageLexer( - lexer, - Level.WARNING, - code, - replaceMode, buf); + messageLexer(lexer, Level.WARNING, code, replaceMode, c); break; } } Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-06 13:54:31 UTC (rev 1171) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-06 14:32:58 UTC (rev 1172) @@ -54,7 +54,7 @@ invalid_attribute={0} attribute name "{1}" (value="{2}") is invalid invalid_char={0,choice,0#replacing|1#discarding} invalid character code {1} invalid_ncr={0,choice,0#replacing|1#discarding} invalid numeric character reference {1} -invalid_sgml_chars=Character codes 128 to 159 (U+0080 to U+009F) are not allowed in HTML;\u000aeven if they were, they would likely be unprintable control characters.\u000aTidy assumed you wanted to refer to a character with the same byte value in the \u000a{0,choice,0#specified|1#Windows-1252|2#MacRoman} encoding and replaced that reference with the Unicode equivalent. +invalid_sgml_chars=Character codes 128 to 159 (U+0080 to U+009F) are not allowed in HTML;\u000aeven if they were, they would likely be unprintable control characters.\u000aTidy assumed you wanted to refer to a character with the same byte value in the \u000a{0,choice,0#specified|1#Windows-1252|2#MacRoman} encoding and replaced that reference with the Unicode equivalent.\n invalid_utf16={0,choice,0#replacing|1#discarding} invalid UTF-16 surrogate pair (char. code {1}) invalid_utf16_summary=Character codes for UTF-16 must be in the range: U+0000 to U+10FFFF.\u000aThe definition of UTF-16 in Annex C of ISO/IEC 10646-1:2000 does not allow the\u000amapping of unpaired surrogates. For more information please refer to\u000ahttp://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html invalid_utf8={0,choice,0#replacing|1#discarding} invalid UTF-8 bytes (char. code {1}) @@ -74,7 +74,7 @@ missing_imagemap={0} should use client-side image map missing_quotemark={0} attribute with missing trailing quote mark missing_semicolon=entity "{0}" doesn''t end in '';'' -missing_semicolon_ncr=numeric character reference "{0}" doesn''t end in ";" +missing_semicolon_ncr=numeric character reference "{0}" doesn''t end in '';'' missing_starttag=missing <{0}> missing_title_element=inserting missing ''title'' element needs_author_intervention=This document has errors that must be fixed before\u000ausing HTML Tidy to generate a tidied up version.\n This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-13 19:56:12
|
Revision: 1184 http://jtidy.svn.sourceforge.net/jtidy/?rev=1184&view=rev Author: aditsu Date: 2010-11-13 19:56:05 +0000 (Sat, 13 Nov 2010) Log Message: ----------- fixed test 1715153 - added BAD_ATTRIBUTE_VALUE_REPLACED Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-07 17:42:05 UTC (rev 1183) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-13 19:56:05 UTC (rev 1184) @@ -578,7 +578,8 @@ lexer.badAccess |= MISSING_IMAGE_MAP; break; - case BAD_ATTRIBUTE_VALUE : + case BAD_ATTRIBUTE_VALUE: + case BAD_ATTRIBUTE_VALUE_REPLACED: case INVALID_ATTRIBUTE: messageNode(lexer, Level.WARNING, node, code, tagdesc, name, value); break; Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-07 17:42:05 UTC (rev 1183) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-13 19:56:05 UTC (rev 1184) @@ -5,6 +5,7 @@ backslash_in_uri={0} URI reference contains backslash. Typo? bad_argument=Warning - missing or malformed argument "{1}" for option "{0}" bad_attribute_value={0} attribute "{1}" has invalid value "{2}" +bad_attribute_value_replaced = {0} attribute "{1}" had invalid value "{2}" and has been replaced bad_cdata_content=''<'' + ''/'' + letter not allowed here bad_comment_chars=expecting -- or > bad_tree=Panic - tree has lost its integrity This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-13 20:18:29
|
Revision: 1185 http://jtidy.svn.sourceforge.net/jtidy/?rev=1185&view=rev Author: aditsu Date: 2010-11-13 20:18:23 +0000 (Sat, 13 Nov 2010) Log Message: ----------- fixed test 655338 - added checks for xml declaration, and SPACE_PRECEDING_XMLDECL message Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-13 19:56:05 UTC (rev 1184) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-13 20:18:23 UTC (rev 1185) @@ -3813,4 +3813,12 @@ report.error(this, null, null, ErrorCode.MALFORMED_DOCTYPE); return null; } + + public Node findXmlDecl() { + Node node = root.content; + while (node != null && node.type != NodeType.XmlDecl) { + node = node.next; + } + return node; + } } Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-13 19:56:05 UTC (rev 1184) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-13 20:18:23 UTC (rev 1185) @@ -3503,6 +3503,16 @@ while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { + if (node.type == NodeType.XmlDecl) { + if (lexer.findXmlDecl() != null && lexer.root.content != null) { + lexer.report.warning(lexer, lexer.root, node, ErrorCode.DISCARDING_UNEXPECTED); + continue; + } + if (node.line != 1 || node.column != 1) { + lexer.report.warning(lexer, lexer.root, node, ErrorCode.SPACE_PRECEDING_XMLDECL); + } + } + // deal with comments etc. if (Node.insertMisc(document, node)) { Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-13 19:56:05 UTC (rev 1184) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-13 20:18:23 UTC (rev 1185) @@ -806,7 +806,8 @@ messageLexer(lexer, Level.WARNING, code, getTagName(element)); break; - case MISSING_DOCTYPE : + case MISSING_DOCTYPE: + case SPACE_PRECEDING_XMLDECL: messageLexer(lexer, Level.WARNING, code); break; Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-13 19:56:05 UTC (rev 1184) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-13 20:18:23 UTC (rev 1185) @@ -97,6 +97,7 @@ replacing_unex_element=replacing unexpected {0} by {1} report_version=Document content looks like {0} slides_found={0,number} Slides found +space_preceding_xmldecl = removing whitespace preceding XML Declaration suspected_missing_quote=missing quotemark for attribute value tag_not_allowed_in={0} isn''t allowed in <{1}> elements too_many_elements=too many {0} elements This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-18 02:59:28
|
Revision: 1213 http://jtidy.svn.sourceforge.net/jtidy/?rev=1213&view=rev Author: aditsu Date: 2010-11-18 02:59:21 +0000 (Thu, 18 Nov 2010) Log Message: ----------- fixed tests 837023 and 1072528 - corrected MALFORMED_DOCTYPE message and level Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-18 02:41:02 UTC (rev 1212) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java 2010-11-18 02:59:21 UTC (rev 1213) @@ -3747,7 +3747,7 @@ AttrCheckImpl.URL.check(this, node, si); } if (node.element == null || !TidyUtils.isValidXMLID(node.element)) { - report.error(this, null, null, ErrorCode.MALFORMED_DOCTYPE); + report.warning(this, null, null, ErrorCode.MALFORMED_DOCTYPE); return null; } // #ifdef TIDY_STORE_ORIGINAL_TEXT @@ -3816,7 +3816,7 @@ } /* document type declaration not finished */ - report.error(this, null, null, ErrorCode.MALFORMED_DOCTYPE); + report.warning(this, null, null, ErrorCode.MALFORMED_DOCTYPE); return null; } Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-18 02:41:02 UTC (rev 1212) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-18 02:59:21 UTC (rev 1213) @@ -65,7 +65,7 @@ joining_attribute={0} joining values of repeated attribute "{1}" line_column=line {0,number} column {1,number} - malformed_comment=adjacent hyphens within comment -malformed_doctype=expected "html PUBLIC" or "html SYSTEM" +malformed_doctype=discarding malformed <!DOCTYPE> missing_attr_value={0} attribute "{1}" lacks value missing_attribute={0} lacks "{1}" attribute missing_body=Can''t create slides - document is missing a body element. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-21 00:41:02
|
Revision: 1221 http://jtidy.svn.sourceforge.net/jtidy/?rev=1221&view=rev Author: aditsu Date: 2010-11-21 00:40:56 +0000 (Sun, 21 Nov 2010) Log Message: ----------- fixed test 1231279 - updated ANCHOR_NOT_UNIQUE message Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-21 00:02:57 UTC (rev 1220) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-21 00:40:56 UTC (rev 1221) @@ -632,11 +632,7 @@ break; case ANCHOR_NOT_UNIQUE : - messageLexer( - lexer, - Level.WARNING, - code, - getTagName(node), attribute.value); + messageNode(lexer, Level.WARNING, node, code, tagdesc, value); break; case ENTITY_IN_ID : Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-21 00:02:57 UTC (rev 1220) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-21 00:40:56 UTC (rev 1221) @@ -1,4 +1,4 @@ -anchor_not_unique={0} Anchor "{1}" already defined +anchor_not_unique={0} anchor "{1}" already defined apos_undefined=named entity ' only defined in XML/XHTML attr_value_not_lcase={0} attribute value "{1}" must be lower case for XHTML # to be translated This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-23 12:41:31
|
Revision: 1232 http://jtidy.svn.sourceforge.net/jtidy/?rev=1232&view=rev Author: aditsu Date: 2010-11-23 12:41:24 +0000 (Tue, 23 Nov 2010) Log Message: ----------- fixed test 1359292 - corrected discarding of empty paragraphs, moved xmlns attribute handling to parseDocument, removed early returns from attrError (to update counts) and added a message about incomplete warning/error reporting Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Clean.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -2226,9 +2226,8 @@ } // discard empty paragraphs - if (node.content == null && node.is(TagId.P)) - { - node = Node.discardElement(node); + if (node.content == null && node.is(TagId.P)) { + node = Node.trimEmptyElement(lexer, node); continue; } Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -66,6 +66,10 @@ */ public final class ParserImpl { + /** + * xhtml namespace String. + */ + private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; /** * parser for html. @@ -3601,7 +3605,24 @@ lexer.report.warning(lexer, document, node, ErrorCode.DISCARDING_UNEXPECTED); // TODO? continue; } + + if (node.type == NodeType.StartTag && node.is(TagId.HTML)) { + final AttVal xmlns = node.getAttrById(AttrId.XMLNS); + if (xmlns != null && xmlns.valueIs(XHTML_NAMESPACE)) { + final boolean htmlOut = lexer.configuration.isHtmlOut(); + // Unless plain HTML is specified, output will be XHTML. + lexer.isvoyager = true; + lexer.configuration.setXHTML(!htmlOut); + lexer.configuration.setXmlOut(!htmlOut); + // adjust other config options, just as in Configuration + if (!htmlOut) { + lexer.configuration.setUpperCaseTags(false); + lexer.configuration.setUpperCaseAttrs(false); + } + } + } + if (node.type != NodeType.StartTag || !node.is(TagId.HTML)) { lexer.ungetToken(); Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -530,16 +530,6 @@ */ public void attrError(Lexer lexer, Node node, AttVal attribute, ErrorCode code) { - if (lexer.errors > lexer.configuration.getShowErrors()) // keep quiet after <showErrors> errors - { - return; - } - - if (!lexer.configuration.isShowWarnings()) // warnings - { - return; - } - final String tagdesc = getTagName(node); final String name = attribute == null || attribute.attribute == null ? "NULL" : attribute.attribute; final String value = attribute == null || attribute.value == null ? "NULL" : attribute.value; @@ -1052,18 +1042,13 @@ * @param errout PrintWriter * @param lexer Lexer */ - public void reportNumWarnings(PrintWriter errout, Lexer lexer) - { - if (lexer.warnings > 0 || lexer.errors > 0) - { - printMessage( - errout, - Level.SUMMARY, - "num_warnings", - lexer.warnings, lexer.errors); + public void reportNumWarnings(PrintWriter errout, Lexer lexer) { + if (lexer.warnings > 0 || lexer.errors > 0) { + final int incomplete = lexer.errors > lexer.configuration.getShowErrors() + || !lexer.configuration.isShowWarnings() ? 1 : 0; + printMessage(errout, Level.SUMMARY, "num_warnings", lexer.warnings, lexer.errors, incomplete); } - else - { + else { printMessage(errout, Level.SUMMARY, "no_warnings"); } } Modified: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/TagCheckImpl.java 2010-11-23 12:41:24 UTC (rev 1232) @@ -148,40 +148,11 @@ */ public static class CheckHTML implements TagCheck { - /** - * xhtml namepace String. - */ - private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; - - /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ - public void check(Lexer lexer, Node node) - { - - AttVal attval; - AttVal xmlns; - - xmlns = node.getAttrByName("xmlns"); - - if (xmlns != null && XHTML_NAMESPACE.equals(xmlns.value)) - { - lexer.isvoyager = true; - if (!lexer.configuration.isHtmlOut()) // Unless user has specified plain HTML output, - { - lexer.configuration.setXHTML(true); // output format will be XHTML. - } - // adjust other config options, just as in Configuration - lexer.configuration.setXmlOut(true); - lexer.configuration.setUpperCaseTags(false); - lexer.configuration.setUpperCaseAttrs(false); - } - - for (attval = node.attributes; attval != null; attval = attval.next) - { - attval.checkAttribute(lexer, node); - } + public void check(Lexer lexer, Node node) { + node.checkAttributes(lexer); } } Modified: branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties =================================================================== --- branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-23 04:24:37 UTC (rev 1231) +++ branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties 2010-11-23 12:41:24 UTC (rev 1232) @@ -86,7 +86,7 @@ no_warnings=No warnings or errors were found.\n noframes_content={0} not inside ''noframes'' element non_matching_endtag=replacing unexpected {0} by </{1}> -num_warnings={0,choice,0#0 warnings|1#1 warning|1<{0,number,integer} warnings}, {1,choice,0#0 errors|1#1 error|2#{1,number,integer} errors} were found!\n +num_warnings={0,choice,0#0 warnings|1#1 warning|1<{0,number,integer} warnings}, {1,choice,0#0 errors|1#1 error|2#{1,number,integer} errors} were found!{2,choice,0#|1# Not all warnings/errors were shown.}\n obsolete_element=replacing obsolete element {0} by {1} previous_location=<{0}> previously mentioned proprietary_attr_value={0} proprietary attribute value "{1}" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ad...@us...> - 2010-11-24 02:39:38
|
Revision: 1233 http://jtidy.svn.sourceforge.net/jtidy/?rev=1233&view=rev Author: aditsu Date: 2010-11-24 02:39:29 +0000 (Wed, 24 Nov 2010) Log Message: ----------- ported accessibility checks and messages - this fixes 5 tests Modified Paths: -------------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AttVal.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ErrorCode.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Lexer.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Node.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/ParserImpl.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Report.java branches/CodeUpdateAndJava5/src/main/resources/org/w3c/tidy/TidyMessages.properties Added Paths: ----------- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/IErrorCode.java Added: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java (rev 0) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/AccessErrorCode.java 2010-11-24 02:39:29 UTC (rev 1233) @@ -0,0 +1,148 @@ +package org.w3c.tidy; + +/** + * Accessibility error codes (from access.h) + * + * @author aditsu + */ +public enum AccessErrorCode implements IErrorCode { + FIRST_ACCESS_ERR, /* must be first */ + +/* [1.1.1.1] */ IMG_MISSING_ALT, +/* [1.1.1.2] */ IMG_ALT_SUSPICIOUS_FILENAME, +/* [1.1.1.3] */ IMG_ALT_SUSPICIOUS_FILE_SIZE, +/* [1.1.1.4] */ IMG_ALT_SUSPICIOUS_PLACEHOLDER, +/* [1.1.1.10] */ IMG_ALT_SUSPICIOUS_TOO_LONG, +/* [1.1.1.11] */ IMG_MISSING_ALT_BULLET, +/* [1.1.1.12] */ IMG_MISSING_ALT_H_RULE, +/* [1.1.2.1] */ IMG_MISSING_LONGDESC_DLINK, +/* [1.1.2.2] */ IMG_MISSING_DLINK, +/* [1.1.2.3] */ IMG_MISSING_LONGDESC, +/* [1.1.2.5] */ LONGDESC_NOT_REQUIRED, +/* [1.1.3.1] */ IMG_BUTTON_MISSING_ALT, +/* [1.1.4.1] */ APPLET_MISSING_ALT, +/* [1.1.5.1] */ OBJECT_MISSING_ALT, +/* [1.1.6.1] */ AUDIO_MISSING_TEXT_WAV, +/* [1.1.6.2] */ AUDIO_MISSING_TEXT_AU, +/* [1.1.6.3] */ AUDIO_MISSING_TEXT_AIFF, +/* [1.1.6.4] */ AUDIO_MISSING_TEXT_SND, +/* [1.1.6.5] */ AUDIO_MISSING_TEXT_RA, +/* [1.1.6.6] */ AUDIO_MISSING_TEXT_RM, +/* [1.1.8.1] */ FRAME_MISSING_LONGDESC, +/* [1.1.9.1] */ AREA_MISSING_ALT, +/* [1.1.10.1] */ SCRIPT_MISSING_NOSCRIPT, +/* [1.1.12.1] */ ASCII_REQUIRES_DESCRIPTION, +/* [1.2.1.1] */ IMG_MAP_SERVER_REQUIRES_TEXT_LINKS, +/* [1.4.1.1] */ MULTIMEDIA_REQUIRES_TEXT, +/* [1.5.1.1] */ IMG_MAP_CLIENT_MISSING_TEXT_LINKS, +/* [2.1.1.1] */ INFORMATION_NOT_CONVEYED_IMAGE, +/* [2.1.1.2] */ INFORMATION_NOT_CONVEYED_APPLET, +/* [2.1.1.3] */ INFORMATION_NOT_CONVEYED_OBJECT, +/* [2.1.1.4] */ INFORMATION_NOT_CONVEYED_SCRIPT, +/* [2.1.1.5] */ INFORMATION_NOT_CONVEYED_INPUT, +/* [2.2.1.1] */ COLOR_CONTRAST_TEXT, +/* [2.2.1.2] */ COLOR_CONTRAST_LINK, +/* [2.2.1.3] */ COLOR_CONTRAST_ACTIVE_LINK, +/* [2.2.1.4] */ COLOR_CONTRAST_VISITED_LINK, +/* [3.2.1.1] */ DOCTYPE_MISSING, +/* [3.3.1.1] */ STYLE_SHEET_CONTROL_PRESENTATION, +/* [3.5.1.1] */ HEADERS_IMPROPERLY_NESTED, +/* [3.5.2.1] */ POTENTIAL_HEADER_BOLD, +/* [3.5.2.2] */ POTENTIAL_HEADER_ITALICS, +/* [3.5.2.3] */ POTENTIAL_HEADER_UNDERLINE, +/* [3.5.3.1] */ HEADER_USED_FORMAT_TEXT, +/* [3.6.1.1] */ LIST_USAGE_INVALID_UL, +/* [3.6.1.2] */ LIST_USAGE_INVALID_OL, +/* [3.6.1.4] */ LIST_USAGE_INVALID_LI, +/* [4.1.1.1] */ INDICATE_CHANGES_IN_LANGUAGE, +/* [4.3.1.1] */ LANGUAGE_NOT_IDENTIFIED, +/* [4.3.1.1] */ LANGUAGE_INVALID, +/* [5.1.2.1] */ DATA_TABLE_MISSING_HEADERS, +/* [5.1.2.2] */ DATA_TABLE_MISSING_HEADERS_COLUMN, +/* [5.1.2.3] */ DATA_TABLE_MISSING_HEADERS_ROW, +/* [5.2.1.1] */ DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS, +/* [5.2.1.2] */ DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS, +/* [5.3.1.1] */ LAYOUT_TABLES_LINEARIZE_PROPERLY, +/* [5.4.1.1] */ LAYOUT_TABLE_INVALID_MARKUP, +/* [5.5.1.1] */ TABLE_MISSING_SUMMARY, +/* [5.5.1.2] */ TABLE_SUMMARY_INVALID_NULL, +/* [5.5.1.3] */ TABLE_SUMMARY_INVALID_SPACES, +/* [5.5.1.6] */ TABLE_SUMMARY_INVALID_PLACEHOLDER, +/* [5.5.2.1] */ TABLE_MISSING_CAPTION, +/* [5.6.1.1] */ TABLE_MAY_REQUIRE_HEADER_ABBR, +/* [5.6.1.2] */ TABLE_MAY_REQUIRE_HEADER_ABBR_NULL, +/* [5.6.1.3] */ TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES, +/* [6.1.1.1] */ STYLESHEETS_REQUIRE_TESTING_LINK, +/* [6.1.1.2] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT, +/* [6.1.1.3] */ STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR, +/* [6.2.1.1] */ FRAME_SRC_INVALID, +/* [6.2.2.1] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET, +/* [6.2.2.2] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT, +/* [6.2.2.3] */ TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT, +/* [6.3.1.1] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT, +/* [6.3.1.2] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT, +/* [6.3.1.3] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED, +/* [6.3.1.4] */ PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET, +/* [6.5.1.1] */ FRAME_MISSING_NOFRAMES, +/* [6.5.1.2] */ NOFRAMES_INVALID_NO_VALUE, +/* [6.5.1.3] */ NOFRAMES_INVALID_CONTENT, +/* [6.5.1.4] */ NOFRAMES_INVALID_LINK, +/* [7.1.1.1] */ REMOVE_FLICKER_SCRIPT, +/* [7.1.1.2] */ REMOVE_FLICKER_OBJECT, +/* [7.1.1.3] */ REMOVE_FLICKER_EMBED, +/* [7.1.1.4] */ REMOVE_FLICKER_APPLET, +/* [7.1.1.5] */ REMOVE_FLICKER_ANIMATED_GIF, +/* [7.2.1.1] */ REMOVE_BLINK_MARQUEE, +/* [7.4.1.1] */ REMOVE_AUTO_REFRESH, +/* [7.5.1.1] */ REMOVE_AUTO_REDIRECT, +/* [8.1.1.1] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT, +/* [8.1.1.2] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT, +/* [8.1.1.3] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET, +/* [8.1.1.4] */ ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED, +/* [9.1.1.1] */ IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION, +/* [9.3.1.1] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN, +/* [9.3.1.2] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP, +/* [9.3.1.3] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK, +/* [9.3.1.4] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER, +/* [9.3.1.5] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT, +/* [9.3.1.6] */ SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE, +/* [10.1.1.1] */ NEW_WINDOWS_REQUIRE_WARNING_NEW, +/* [10.1.1.2] */ NEW_WINDOWS_REQUIRE_WARNING_BLANK, +/* [10.2.1.1] */ LABEL_NEEDS_REPOSITIONING_BEFORE_INPUT, +/* [10.2.1.2] */ LABEL_NEEDS_REPOSITIONING_AFTER_INPUT, +/* [10.4.1.1] */ FORM_CONTROL_REQUIRES_DEFAULT_TEXT, +/* [10.4.1.2] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL, +/* [10.4.1.3] */ FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES, +/* [11.2.1.1] */ REPLACE_DEPRECATED_HTML_APPLET, +/* [11.2.1.2] */ REPLACE_DEPRECATED_HTML_BASEFONT, +/* [11.2.1.3] */ REPLACE_DEPRECATED_HTML_CENTER, +/* [11.2.1.4] */ REPLACE_DEPRECATED_HTML_DIR, +/* [11.2.1.5] */ REPLACE_DEPRECATED_HTML_FONT, +/* [11.2.1.6] */ REPLACE_DEPRECATED_HTML_ISINDEX, +/* [11.2.1.7] */ REPLACE_DEPRECATED_HTML_MENU, +/* [11.2.1.8] */ REPLACE_DEPRECATED_HTML_S, +/* [11.2.1.9] */ REPLACE_DEPRECATED_HTML_STRIKE, +/* [11.2.1.10] */ REPLACE_DEPRECATED_HTML_U, +/* [12.1.1.1] */ FRAME_MISSING_TITLE, +/* [12.1.1.2] */ FRAME_TITLE_INVALID_NULL, +/* [12.1.1.3] */ FRAME_TITLE_INVALID_SPACES, +/* [12.4.1.1] */ ASSOCIATE_LABELS_EXPLICITLY, +/* [12.4.1.2] */ ASSOCIATE_LABELS_EXPLICITLY_FOR, +/* [12.4.1.3] */ ASSOCIATE_LABELS_EXPLICITLY_ID, +/* [13.1.1.1] */ LINK_TEXT_NOT_MEANINGFUL, +/* [13.1.1.2] */ LINK_TEXT_MISSING, +/* [13.1.1.3] */ LINK_TEXT_TOO_LONG, +/* [13.1.1.4] */ LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE, +/* [13.1.1.5] */ LINK_TEXT_NOT_MEANINGFUL_MORE, +/* [13.1.1.6] */ LINK_TEXT_NOT_MEANINGFUL_FOLLOW_THIS, +/* [13.2.1.1] */ METADATA_MISSING, +/* [13.2.1.2] */ METADATA_MISSING_LINK, +/* [13.2.1.3] */ METADATA_MISSING_REDIRECT_AUTOREFRESH, +/* [13.10.1.1] */ SKIPOVER_ASCII_ART, + + LAST_ACCESS_ERR; /* must be last */ + + public int code() { + return ordinal() + 1000; + } +} Added: branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java =================================================================== --- branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java (rev 0) +++ branches/CodeUpdateAndJava5/src/main/java/org/w3c/tidy/Accessibility.java 2010-11-24 02:39:29 UTC (rev 1233) @@ -0,0 +1,2534 @@ +package org.w3c.tidy; + +/********************************************************************* +* AccessibilityChecks +* +* Carries out processes for all accessibility checks. Traverses +* through all the content within the tree and evaluates the tags for +* accessibility. +* +* To perform the following checks, 'AccessibilityChecks' must be +* called AFTER the tree structure has been formed. +* +* If, in the command prompt, there is no specification of which +* accessibility priorities to check, no accessibility checks will be +* performed. (ie. '1' for priority 1, '2' for priorities 1 and 2, +* and '3') for priorities 1, 2 and 3.) +* +* Copyright University of Toronto +* Programmed by: Mike Lam and Chris Ridpath +* Modifications by : Terry Teague (TRT) +* +* Reference document: http://www.w3.org/TR/WAI-WEBCONTENT/ +*********************************************************************/ + + +public class Accessibility { + +// private static final int TEXTBUF_SIZE = 128; + + /* List of possible image types */ + private static final String imageExtensions[] = {".jpg", ".gif", ".tif", ".pct", ".pic", ".iff", ".dib", + ".tga", ".pcx", ".png", ".jpeg", ".tiff", ".bmp"}; + + /* List of possible sound file types */ + private static final String soundExtensions[] = {".wav", ".au", ".aiff", ".snd", ".ra", ".rm"}; + + private static final AccessErrorCode soundExtErrCodes[] = { + AccessErrorCode.AUDIO_MISSING_TEXT_WAV, + AccessErrorCode.AUDIO_MISSING_TEXT_AU, + AccessErrorCode.AUDIO_MISSING_TEXT_AIFF, + AccessErrorCode.AUDIO_MISSING_TEXT_SND, + AccessErrorCode.AUDIO_MISSING_TEXT_RA, + AccessErrorCode.AUDIO_MISSING_TEXT_RM + }; + + /* List of possible media extensions */ + private static final String mediaExtensions[] = {".mpg", ".mov", ".asx", ".avi", ".ivf", ".m1v", ".mmm", ".mp2v", + ".mpa", ".mpe", ".mpeg", ".ram", ".smi", ".smil", ".swf", ".wm", ".wma", ".wmv"}; + + /* List of possible frame sources */ + private static final String frameExtensions[] = {".htm", ".html", ".shtm", ".shtml", ".cfm", ".cfml", + ".asp", ".cgi", ".pl", ".smil"}; + + /* List of possible colour values */ + private static final int colorValues[][] = { + { 0, 0, 0}, + {128, 128, 128}, + {192, 192, 192}, + {255, 255, 255}, + {192, 0, 0}, + {255, 0, 0}, + {128, 0, 128}, + {255, 0, 255}, + { 0, 128, 0}, + { 0, 255, 0}, + {128, 128, 0}, + {255, 255, 0}, + { 0, 0, 128}, + { 0, 0, 255}, + { 0, 128, 128}, + { 0, 255, 255} + }; + + /* These arrays are used to convert color names to their RGB values */ + private static final String colorNames[] = { "black", "silver", "grey", "white", "maroon", "red", "purple", + "fuchsia", "green", "lime", "olive", "yellow", "navy", "blue", "teal", "aqua"}; + + /* gets set from Tidy variable AccessibilityCheckLevel */ + private int level; + + /* list of characters in the text nodes found within a container element */ + private final StringBuilder textNode = new StringBuilder(); + + /* Number of frame elements found within a frameset */ + private int numFrames; + + /* Number of 'longdesc' attributes found within a frameset */ + private int hasCheckedLongDesc; + + private int checkedHeaders; + private int listElements; + private int otherListElements; + + /* For 'USEMAP' identifier */ + private boolean hasUseMap; + private boolean hasName; + private boolean hasMap; + + /* For tracking nodes that are deleted from the original parse tree - TRT */ + /* Node *access_tree; */ + + private boolean hasTH; + private boolean hasValidFor; + private boolean hasValidId; + private boolean hasValidRowHeaders; + private boolean hasValidColumnHeaders; + private boolean hasInvalidRowHeader; + private boolean hasInvalidColumnHeader; + private int forID; + + /* + GetFileExtension takes a path and returns the extension + portion of the path (if any). + */ + + private static String getFileExtension(final String path) { + int i = path.length() - 1; + + do { + if (path.charAt(i) == '/' || path.charAt(i) == '\\') { + return ""; + } + else if (path.charAt(i) == '.') { + return path.substring(i); + } + } while (--i > 0); + return ""; + } + + /************************************************************************ + * IsImage + * + * Checks if the given filename is an image file. + ************************************************************************/ + + private static boolean isImage(final String iType) { + /* Get the file extension */ + final String ext = getFileExtension(iType); + + /* Compare it to the array of known image file extensions */ + for (String s : imageExtensions) { + if (ext.equalsIgnoreCase(s)) { + return true; + } + } + return false; + } + + + /*********************************************************************** + * IsSoundFile + * + * Checks if the given filename is a sound file. + ***********************************************************************/ + + private static AccessErrorCode isSoundFile(final String sType) { + final String ext = getFileExtension(sType); + + for (int i = 0; i < soundExtensions.length; i++) { + if (ext.equalsIgnoreCase(soundExtensions[i])) { + return soundExtErrCodes[i]; + } + } + return null; + } + + + /*********************************************************************** + * IsValidSrcExtension + * + * Checks if the 'SRC' value within the FRAME element is valid + * The 'SRC' extension must end in ".htm", ".html", ".shtm", ".shtml", + * ".cfm", ".cfml", ".asp", ".cgi", ".pl", or ".smil" + ***********************************************************************/ + + private static boolean isValidSrcExtension(final String sType) { + final String ext = getFileExtension(sType); + + for (String s : frameExtensions) { + if (ext.equalsIgnoreCase(s)) { + return true; + } + } + return false; + } + + + /********************************************************************* + * IsValidMediaExtension + * + * Checks to warn the user that synchronized text equivalents are + * required if multimedia is used. + *********************************************************************/ + + private static boolean isValidMediaExtension(final String sType) { + final String ext = getFileExtension(sType); + + for (String s : mediaExtensions) { + if (ext.equalsIgnoreCase(s)) { + return true; + } + } + return false; + } + + + /************************************************************************ + * IsWhitespace + * + * Checks if the given string is all whitespace. + ************************************************************************/ + + private static boolean isWhitespace(final String pString) { + if (pString == null) { + return true; + } + for (int i = 0; i < pString.length(); ++i) { + final char c = pString.charAt(i); + if (!TidyUtils.isWhite(c)) { + return false; + } + } + return true; + } + + private static boolean hasValue(final AttVal av) { + return av != null && !isWhitespace(av.value); + } + + /*********************************************************************** + * IsPlaceholderAlt + * + * Checks to see if there is an image and photo place holder contained + * in the ALT text. + ***********************************************************************/ + + private static boolean isPlaceholderAlt(final String txt) { + return txt.contains("image") || txt.contains("photo"); + } + + + /*********************************************************************** + * IsPlaceHolderObject + * + * Checks to see if there is an OBJECT place holder contained + * in the 'ALT' text. + ***********************************************************************/ + + private static boolean isPlaceHolderObject(final String txt) { + return txt.contains("object"); + } + + + /********************************************************** + * EndsWithBytes + * + * Checks to see if the ALT text ends with 'bytes' + **********************************************************/ + + private static boolean endsWithBytes(final String txt) { + return txt.endsWith("bytes"); + } + + + /******************************************************* + * textFromOneNode + * + * Returns a list of characters contained within one + * text node. + *******************************************************/ + + private String textFromOneNode(final Lexer lexer, final Node node) { + if (node != null) { + /* Copy contents of a text node */ + return TidyUtils.getString(lexer.lexbuf, node.start, node.end - node.start); + } + return ""; + } + + + /********************************************************* + * getTextNode + * + * Locates text nodes within a container element. + * Retrieves text that are found contained within + * text nodes, and concatenates the text. + *********************************************************/ + + private void getTextNode(final Lexer lexer, final Node node) { + if (node.isText()) { + /* Retrieves each character found within the text node */ + textNode.append(TidyUtils.getString(lexer.lexbuf, node.start, node.end - node.start)); + } + } + + + /********************************************************** + * getTextNodeClear + * + * Clears the current 'textNode' and reloads it with new + * text. The textNode must be cleared before use. + **********************************************************/ + + private String getTextNodeClear(final Lexer lexer, final Node node) { + /* Clears list */ + textNode.setLength(0); + + getTextNode(lexer, node.content); + return textNode.toString(); + } + + /********************************************************** + * LevelX_Enabled + * + * Tell whether access "X" is enabled. + **********************************************************/ + + private boolean level1Enabled() { + return level == 1 || level == 2 || level == 3; + } + + private boolean level2Enabled() { + return level == 2 || level == 3; + } + + private boolean level3Enabled() { + return level == 3; + } + + /******************************************************** + * CheckColorAvailable + * + * Verify that information conveyed with color is + * available without color. + ********************************************************/ + + private void checkColorAvailable(final Lexer lexer, final Node node) { + if (level1Enabled()) { + switch (node.getId()) { + case IMG: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_IMAGE); + break; + case APPLET: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_APPLET); + break; + case OBJECT: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_OBJECT); + break; + case SCRIPT: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_SCRIPT); + break; + case INPUT: + lexer.report.accessWarning(lexer, node, AccessErrorCode.INFORMATION_NOT_CONVEYED_INPUT); + break; + } + } + } + + /********************************************************************* + * CheckColorContrast + * + * Checks elements for color contrast. Must have valid contrast for + * valid visibility. + * + * This logic is extremely fragile as it does not recognize + * the fact that color is inherited by many components and + * that BG and FG colors are often set separately. E.g. the + * background color may be set by for the body or a table + * or a cell. The foreground color may be set by any text + * element (p, h1, h2, input, textarea), either explicitly + * or by style. Ergo, this test will not handle most real + * world cases. It's a start, however. + *********************************************************************/ + + private void checkColorContrast(final Lexer lexer, final Node node) { + int rgbBG[] = {255,255,255}; /* Black text on white BG */ + + if (level3Enabled()) { + boolean gotBG = true; + AttVal av; + + /* Check for 'BGCOLOR' first to compare with other color attributes */ + for (av = node.attributes; av != null; av = av.next) { + if (av.is(AttrId.BGCOLOR)) { + if (hasValue(av)) { + gotBG = getRgb(av.value, rgbBG); + } + } + } + /* + Search for COLOR attributes to compare with background color + Must have valid colour contrast + */ + for (av = node.attributes; gotBG && av != null; av = av.next) { + AccessErrorCode errcode = null; + switch (av.getId()) { + case TEXT: + errcode = AccessErrorCode.COLOR_CONTRAST_TEXT; + break; + case LINK: + errcode = AccessErrorCode.COLOR_CONTRAST_LINK; + break; + case ALINK: + errcode = AccessErrorCode.COLOR_CONTRAST_ACTIVE_LINK; + break; + case VLINK: + errcode = AccessErrorCode.COLOR_CONTRAST_VISITED_LINK; + break; + } + if (errcode != null && hasValue(av)) { + int rgbFG[] = {0, 0, 0}; /* Black text */ + + if (getRgb(av.value, rgbFG) && !compareColors(rgbBG, rgbFG)) { + lexer.report.accessWarning(lexer, node, errcode); + } + } + } + } + } + + + /************************************************************** + * CompareColors + * + * Compares two RGB colors for good contrast. + **************************************************************/ + private static int minmax(final int i1, final int i2) { + return Math.max(i1, i2) - Math.min(i1, i2); + } + + private static int brightness(final int rgb[]) { + return ((rgb[0] * 299) + (rgb[1] * 587) + (rgb[2] * 114)) / 1000; + } + + private static boolean compareColors(final int rgbBG[], final int rgbFG[]) { + int brightBG = brightness(rgbBG); + int brightFG = brightness(rgbFG); + + int diffBright = minmax(brightBG, brightFG); + int diffColor = minmax(rgbBG[0], rgbFG[0]) + minmax(rgbBG[1], rgbFG[1]) + minmax(rgbBG[2], rgbFG[2]); + + return diffBright > 180 && diffColor > 500; + } + + + /********************************************************************* + * GetRgb + * + * Gets the red, green and blue values for this attribute for the + * background. + * + * Example: If attribute is BGCOLOR="#121005" then red = 18, green = 16, + * blue = 5. + *********************************************************************/ + + private static boolean getRgb(final String color, final int rgb[]) { + /* Check if we have a color name */ + for (int x = 0; x < colorNames.length; x++) { + if (colorNames[x].contains(color)) { + rgb[0] = colorValues[x][0]; + rgb[1] = colorValues[x][1]; + rgb[2] = colorValues[x][2]; + return true; + } + } + /* + No color name so must be hex values + Is this a number in hexadecimal format? + */ + /* Must be 7 characters in the RGB value (including '#') */ + if (color.length() == 7 && color.charAt(0) == '#') { + rgb[0] = (ctox(color.charAt(1)) * 16) + ctox(color.charAt(2)); + rgb[1] = (ctox(color.charAt(3)) * 16) + ctox(color.charAt(4)); + rgb[2] = (ctox(color.charAt(5)) * 16) + ctox(color.charAt(6)); + return true; + } + return false; + } + + + /******************************************************************* + * ctox + * + * Converts a character to a number. + * Example: if given character is 'A' then returns 10. + * + * Returns the number that the character represents. Returns -1 if not a + * valid number. + *******************************************************************/ + + private static int ctox(final char ch) { + if (ch >= '0' && ch <= '9') { + return ch - '0'; + } + else if (ch >= 'a' && ch <= 'f') { + return ch - 'a' + 10; + } + else if (ch >= 'A' && ch <= 'F') { + return ch - 'A' + 10; + } + return -1; + } + + + /*********************************************************** + * CheckImage + * + * Checks all image attributes for specific elements to + * check for validity of the values contained within + * the attributes. An appropriate warning message is displayed + * to indicate the error. + ***********************************************************/ + + private void checkImage(final Lexer lexer, Node node) { + if (level1Enabled()) { + boolean hasAlt = false; + boolean hasIsMap = false; + boolean hasLongDesc = false; + boolean hasDLINK = false; + boolean hasValidHeight = false; + boolean hasValidWidthBullet = false; + boolean hasValidWidthHR = false; + boolean hasTriggeredMissingLongDesc = false; + + /* Checks all image attributes for invalid values within attributes */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* + Checks for valid ALT attribute. + The length of the alt text must be less than 150 characters + long. + */ + if (av.is(AttrId.ALT)) { + if (av.value != null) { + if (av.value.length() < 150 && !isPlaceholderAlt(av.value) && !isPlaceHolderObject(av.value) + && !endsWithBytes(av.value) && !isImage(av.value)) { + hasAlt = true; + } + else if (av.value.length() > 150) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_TOO_LONG); + } + else if (isImage(av.value)) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_FILENAME); + } + else if (isPlaceholderAlt(av.value)) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_PLACEHOLDER); + } + else if (endsWithBytes(av.value)) { + hasAlt = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_ALT_SUSPICIOUS_FILE_SIZE); + } + } + } + /* + Checks for width values of 'bullets' and 'horizontal + rules' for validity. + + Valid pixel width for 'bullets' must be < 30, and > 150 for + horizontal rules. + */ + else if (av.is(AttrId.WIDTH)) { + /* Longdesc attribute needed if width attribute is not present. */ + if (hasValue(av)) { + int width = Integer.parseInt(av.value); + if (width < 30) { + hasValidWidthBullet = true; + } + if (width > 150) { + hasValidWidthHR = true; + } + } + } + /* + Checks for height values of 'bullets' and horizontal + rules for validity. + + Valid pixel height for 'bullets' and horizontal rules + must be < 30. + */ + else if (av.is(AttrId.HEIGHT)) { + /* Longdesc attribute needed if height attribute not present. */ + if (hasValue(av) && Integer.parseInt(av.value) < 30) { + hasValidHeight = true; + } + } + /* + Checks for longdesc and determines validity. + The length of the 'longdesc' must be > 1 + */ + else if (av.is(AttrId.LONGDESC)) { + if (hasValue(av) && av.value.length() > 1) { + hasLongDesc = true; + } + } + /* + Checks for 'USEMAP' attribute. Ensures that + text links are provided for client-side image maps + */ + else if (av.is(AttrId.USEMAP)) { + if (hasValue(av)) { + hasUseMap = true; + } + } + else if (av.is(AttrId.ISMAP)) { + hasIsMap = true; + } + } + + /* + Check to see if a dLINK is present. The ANCHOR element must + be present following the IMG element. The text found between + the ANCHOR tags must be < 6 characters long, and must contain + the letter 'd'. + */ + if (node.next.is(TagId.A)) { + node = node.next; + /* + Node following the anchor must be a text node + for dLINK to exist + */ + if (node.content != null && node.content.tag == null) { + /* Number of characters found within the text node */ + String word = textFromOneNode(lexer, node.content); + if (word.equals("d") || word.equals("D")) { + hasDLINK = true; + } + } + } + /* + Special case check for dLINK. This will occur if there is + whitespace between the <img> and <a> elements. Ignores + whitespace and continues check for dLINK. + */ + if (node.next != null && node.next.tag == null) { + node = node.next; + + if (node.next.is(TagId.A)) { + node = node.next; + /* + Node following the ANCHOR must be a text node + for dLINK to exist + */ + if (node.content != null && node.content.tag == null) { + /* Number of characters found within the text node */ + String word = textFromOneNode(lexer, node.content); + + if (word.equals("d") || word.equals("D")) { + hasDLINK = true; + } + } + } + } + + if (!hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.IMG_MISSING_ALT); + } + if (!hasLongDesc && hasValidHeight && (hasValidWidthHR || hasValidWidthBullet)) { + hasTriggeredMissingLongDesc = true; + } + if (!hasTriggeredMissingLongDesc) { + if (hasDLINK && !hasLongDesc) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MISSING_LONGDESC); + } + if (hasLongDesc && !hasDLINK) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MISSING_DLINK); + } + if (!hasLongDesc && !hasDLINK) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MISSING_LONGDESC_DLINK); + } + } + if (hasIsMap) { + lexer.report.accessError(lexer, node, AccessErrorCode.IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION); + lexer.report.accessWarning(lexer, node, AccessErrorCode.IMG_MAP_SERVER_REQUIRES_TEXT_LINKS); + } + } + } + + + /*********************************************************** + * CheckApplet + * + * Checks APPLET element to check for validity pertaining + * the 'ALT' attribute. An appropriate warning message is + * displayed to indicate the error. An appropriate warning + * message is displayed to indicate the error. If no 'ALT' + * text is present, then there must be alternate content + * within the APPLET element. + ***********************************************************/ + + private void checkApplet(final Lexer lexer, final Node node) { + if (level1Enabled()) { + boolean hasAlt = false; + boolean hasDescription = false; + + /* Checks for attributes within the APPLET element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* + Checks for valid ALT attribute. + The length of the alt text must be > 4 characters in length + but must be < 150 characters long. + */ + if (av.is(AttrId.ALT)) { + if (av.value != null) { + hasAlt = true; + } + } + } + if (!hasAlt) { + /* Must have alternate text representation for that element */ + if (node.content != null) { + String word = null; + + if (node.content.tag == null) { + word = textFromOneNode(lexer, node.content); + } + if (node.content.content != null && node.content.content.tag == null) { + word = textFromOneNode(lexer, node.content.content); + } + if (word != null && !isWhitespace(word)) { + hasDescription = true; + } + } + } + if (!hasDescription && !hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.APPLET_MISSING_ALT); + } + } + } + + + /******************************************************************* + * CheckObject + * + * Checks to verify whether the OBJECT element contains + * 'ALT' text, and to see that the sound file selected is + * of a valid sound file type. OBJECT must have an alternate text + * representation. + *******************************************************************/ + + private void checkObject(final Lexer lexer, final Node node) { + if (level1Enabled()) { + boolean hasAlt = false; + boolean hasDescription = false; + + if (node.content != null) { + if (!node.content.isText()) { + Node tnode = node.content; + + for (AttVal av = tnode.attributes; av != null; av = av.next) { + if (av.is(AttrId.ALT)) { + hasAlt = true; + break; + } + } + } + + /* Must have alternate text representation for that element */ + if (!hasAlt) { + String word = null; + if (node.content.isText()) { + word = textFromOneNode(lexer, node.content); + } + if (word == null && node.content.content.isText()) { + word = textFromOneNode(lexer, node.content.content); + } + if (word != null && !isWhitespace(word)) { + hasDescription = true; + } + } + } + + if (!hasAlt && !hasDescription) { + lexer.report.accessError(lexer, node, AccessErrorCode.OBJECT_MISSING_ALT); + } + } + } + + + /*************************************************************** + * CheckMissingStyleSheets + * + * Ensures that stylesheets are used to control the presentation. + ***************************************************************/ + + private static boolean checkMissingStyleSheets(final Node node) { + boolean sspresent = false; + + for (Node content = node.content; !sspresent && content != null; content = content.next) { + sspresent = content.is(TagId.LINK) || content.is(TagId.STYLE) || content.is(TagId.FONT) + || content.is(TagId.BASEFONT); + + for (AttVal av = content.attributes; !sspresent && av != null; av = av.next) { + sspresent = av.is(AttrId.STYLE) || av.is(AttrId.TEXT) || av.is(AttrId.VLINK) || av.is(AttrId.ALINK) + || av.is(AttrId.LINK); + + if (!sspresent && av.is(AttrId.REL)) { + sspresent = av.valueIs("stylesheet"); + } + } + + if (!sspresent) { + sspresent = checkMissingStyleSheets(content); + } + } + return sspresent; + } + + + /******************************************************************* + * CheckFrame + * + * Checks if the URL is valid and to check if a 'LONGDESC' is needed + * within the FRAME element. If a 'LONGDESC' is needed, the value must + * be valid. The URL must end with the file extension, htm, or html. + * Also, checks to ensure that the 'SRC' and 'TITLE' values are valid. + *******************************************************************/ + + private void checkFrame(final Lexer lexer, final Node node) { + boolean hasTitle = false; + numFrames++; + + if (level1Enabled()) { + /* Checks for attributes within the FRAME element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* Checks if 'LONGDESC' value is valid only if present */ + if (av.is(AttrId.LONGDESC)) { + if (hasValue(av) && av.value.length() > 1) { + hasCheckedLongDesc++; + } + } + + /* Checks for valid 'SRC' value within the frame element */ + else if (av.is(AttrId.SRC)) { + if (hasValue(av) && !isValidSrcExtension(av.value)) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_SRC_INVALID); + } + } + + /* Checks for valid 'TITLE' value within frame element */ + else if (av.is(AttrId.TITLE)) { + if (hasValue(av)) { + hasTitle = true; + } + if (!hasTitle) { + if (av.value == null || av.value.length() == 0) { + hasTitle = true; + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_TITLE_INVALID_NULL); + } + else { + if (isWhitespace(av.value) && av.value.length() > 0) { + hasTitle = true; + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_TITLE_INVALID_SPACES); + } + } + } + } + } + if (!hasTitle) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_MISSING_TITLE); + } + if (numFrames == 3 && hasCheckedLongDesc < 3) { + numFrames = 0; + lexer.report.accessWarning(lexer, node, AccessErrorCode.FRAME_MISSING_LONGDESC); + } + } + } + + + /**************************************************************** + * CheckIFrame + * + * Checks if 'SRC' value is valid. Must end in appropriate + * file extension. + ****************************************************************/ + + private void checkIFrame(final Lexer lexer, final Node node) { + if (level1Enabled()) { + /* Checks for valid 'SRC' value within the IFRAME element */ + final AttVal av = node.getAttrById(AttrId.SRC); + if (hasValue(av)) { + if (!isValidSrcExtension(av.value)) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_SRC_INVALID); + } + } + } + } + + + /********************************************************************** + * CheckAnchorAccess + * + * Checks that the sound file is valid, and to ensure that + * text transcript is present describing the 'HREF' within the + * ANCHOR element. Also checks to see ensure that the 'TARGET' attribute + * (if it exists) is not null and does not contain '_new' or '_blank'. + **********************************************************************/ + + private void checkAnchorAccess(final Lexer lexer, final Node node) { + boolean hasDescription = false; + boolean hasTriggeredLink = false; + + /* Checks for attributes within the ANCHOR element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (level1Enabled()) { + /* Must be of valid sound file type */ + if (av.is(AttrId.HREF)) { + if (hasValue(av)) { + String ext = getFileExtension (av.value); + + /* Checks to see if multimedia is used */ + if (isValidMediaExtension(av.value)) { + lexer.report.accessError(lexer, node, AccessErrorCode.MULTIMEDIA_REQUIRES_TEXT); + } + /* + Checks for validity of sound file, and checks to see if + the file is described within the document, or by a link + that is present which gives the description. + */ + if (ext.length() < 6 && ext.length() > 0) { + AccessErrorCode errcode = isSoundFile(av.value); + if (errcode != null) { + if (node.next != null) { + if (node.next.tag == null) { + String word = textFromOneNode(lexer, node.next); + + /* Must contain at least one letter in the text */ + if (!isWhitespace(word)) { + hasDescription = true; + } + } + } + + /* Must contain text description of sound file */ + if (!hasDescription) { + lexer.report.accessError(lexer, node, errcode); + } + } + } + } + } + } + + if (level2Enabled()) { + /* Checks 'TARGET' attribute for validity if it exists */ + if (av.is(AttrId.TARGET)) { + if (av.valueIs("_new")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_NEW); + } + else if (av.valueIs("_blank")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_BLANK); + } + } + } + } + + if (level2Enabled()) { + if (node.content != null && node.content.tag == null) { + String word = textFromOneNode(lexer, node.content); + if (word != null && !isWhitespace(word)) { + if (word.equals("more")) { + hasTriggeredLink = true; + } + if (word.equals("click here")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE); + } + if (hasTriggeredLink == false) { + if (word.length() < 6) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_NOT_MEANINGFUL); + } + } + if (word.length() > 60) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_TOO_LONG); + } + } + } + if (node.content == null) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LINK_TEXT_MISSING); + } + } + } + + + /************************************************************ + * CheckArea + * + * Checks attributes within the AREA element to + * determine if the 'ALT' text and 'HREF' values are valid. + * Also checks to see ensure that the 'TARGET' attribute + * (if it exists) is not null and does not contain '_new' + * or '_blank'. + ************************************************************/ + + private void checkArea(final Lexer lexer, final Node node) { + boolean hasAlt = false; + + /* Checks all attributes within the AREA element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (level1Enabled()) { + /* + Checks for valid ALT attribute. + The length of the alt text must be > 4 characters long + but must be less than 150 characters long. + */ + if (av.is(AttrId.ALT)) { + /* The check for validity */ + if (av.value != null) { + hasAlt = true; + } + } + } + + if (level2Enabled()) { + if (av.is(AttrId.TARGET)) { + if (av.valueIs("_new")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_NEW); + } + else if (av.valueIs("_blank")) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.NEW_WINDOWS_REQUIRE_WARNING_BLANK); + } + } + } + } + + if (level1Enabled()) { + /* AREA must contain alt text */ + if (!hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.AREA_MISSING_ALT); + } + } + } + + + /*************************************************** + * CheckScript + * + * Checks the SCRIPT element to ensure that a + * NOSCRIPT section follows the SCRIPT. + ***************************************************/ + + private void checkScriptAcc(final Lexer lexer, final Node node) { + if (level1Enabled()) { + /* NOSCRIPT element must appear immediately following SCRIPT element */ + if (node.next == null || !node.next.is(TagId.NOSCRIPT)) { + lexer.report.accessError(lexer, node, AccessErrorCode.SCRIPT_MISSING_NOSCRIPT); + } + } + } + + + /********************************************************** + * CheckRows + * + * Check to see that each table has a row of headers if + * a column of columns doesn't exist. + **********************************************************/ + + private void checkRows(final Lexer lexer, Node node) { + int numTR = 0; + int numValidTH = 0; + checkedHeaders++; + + for (; node != null; node = node.next) { + numTR++; + if (node.content.is(TagId.TH)) { + hasTH = true; + if (node.content.content.isText()) { + String word = textFromOneNode(lexer, node.content.content); + if (!isWhitespace(word)) { + numValidTH++; + } + } + } + } + if (numTR == numValidTH) { + hasValidRowHeaders = true; + } + if (numTR >= 2 && numTR > numValidTH && numValidTH >= 2 && hasTH) { + hasInvalidRowHeader = true; + } + } + + + /********************************************************** + * CheckColumns + * + * Check to see that each table has a column of headers if + * a row of columns doesn't exist. + **********************************************************/ + + private void checkColumns(final Lexer lexer, final Node node) { + int numTH = 0; + boolean isMissingHeader = false; + + checkedHeaders++; + + /* Table must have row of headers if headers for columns don't exist */ + if (node.content.is(TagId.TH)) { + hasTH = true; + + for (Node tnode = node.content; tnode != null; tnode = tnode.next) { + if (tnode.is(TagId.TH)) { + if (tnode.content.isText()) { + String word = textFromOneNode(lexer, tnode.content); + if (!isWhitespace(word)) { + numTH++; + } + } + } + else { + isMissingHeader = true; + } + } + } + + if (!isMissingHeader && numTH > 0) { + hasValidColumnHeaders = true; + } + if (isMissingHeader && numTH >= 2) { + hasInvalidColumnHeader = true; + } + } + + + /***************************************************** + * CheckTH + * + * Checks to see if the header provided for a table + * requires an abbreviation. (only required if the + * length of the header is greater than 15 characters) + *****************************************************/ + + private void checkTH(final Lexer lexer, final Node node) { + if (level3Enabled()) { + boolean hasAbbr = false; + + /* Checks TH element for 'ABBR' attribute */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (av.is(AttrId.ABBR)) { + /* Value must not be null and must be less than 15 characters */ + if (av.value != null && !isWhitespace(av.value)) { + hasAbbr = true; + } + if (av.value == null || av.value.length() == 0) { + hasAbbr = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.TABLE_MAY_REQUIRE_HEADER_ABBR_NULL); + } + if (isWhitespace(av.value) && av.value.length() > 0) { + hasAbbr = true; + lexer.report.accessWarning(lexer, node, AccessErrorCode.TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES); + } + } + } + + /* If the header is greater than 15 characters, an abbreviation is needed */ + final String word = textFromOneNode(lexer, node.content); + + if (word != null && !isWhitespace(word)) { + /* Must have 'ABBR' attribute if header is > 15 characters */ + if (word.length() > 15 && !hasAbbr) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.TABLE_MAY_REQUIRE_HEADER_ABBR); + } + } + } + } + + + /***************************************************************** + * CheckMultiHeaders + * + * Layout tables should make sense when linearized. + * TABLE must contain at least one TH element. + * This technique applies only to tables used for layout purposes, + * not to data tables. Checks for column of multiple headers. + *****************************************************************/ + + private void checkMultiHeaders(final Lexer lexer, final Node node) { + if (level1Enabled()) { + boolean validColSpanRows = true; + boolean validColSpanColumns = true; + + int flag = 0; + + if (node.content != null) { + Node tnode = node.content; + /* + Checks for column of multiple headers found + within a data table. + */ + while (tnode != null) { + if (tnode.is(TagId.TR)) { + if (tnode.content != null) { + Node temp = tnode.content; + + /* The number of TH elements found within TR element */ + if (flag == 0) { + while (temp != null) { + /* + Must contain at least one TH element + within in the TR element + */ + if (temp.is(TagId.TH)) { + for (AttVal av = temp.attributes; av != null; av = av.next) { + if (av.is(AttrId.COLSPAN) && (Integer.parseInt(av.value) > 1)) { + validColSpanColumns = false; + } + if (av.is(AttrId.ROWSPAN) && (Integer.parseInt(av.value) > 1)) { + validColSpanRows = false; + } + } + } + temp = temp.next; + } + flag = 1; + } + } + } + tnode = tnode.next; + } + /* Displays HTML 4 Table Algorithm when multiple column of headers used */ + if (!validColSpanRows) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS); + lexer.report.displayHTMLTableAlgorithm(lexer); + } + if (!validColSpanColumns) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS); + lexer.report.displayHTMLTableAlgorithm(lexer); + } + } + } + } + + + /**************************************************** + * CheckTable + * + * Checks the TABLE element to ensure that the + * table is not missing any headers. Must have either + * a row or column of headers. + ****************************************************/ + + private void checkTable(final Lexer lexer, final Node node) { + int numTR = 0; + boolean hasSummary = false; + boolean hasCaption = false; + + if (level3Enabled()) { + /* Table must have a 'SUMMARY' describing the purpose of the table */ + for (AttVal av = node.attributes; av != null; av = av.next) { + if (av.is(AttrId.SUMMARY)) { + if (hasValue(av)) { + hasSummary = true; + if (av.contains("summary") && av.contains("table")) { + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_SUMMARY_INVALID_PLACEHOLDER); + } + } + if (av.value == null || av.value.length() == 0) { + hasSummary = true; + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_SUMMARY_INVALID_NULL); + } + else if (isWhitespace(av.value) && av.value.length() > 0) { + hasSummary = true; + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_SUMMARY_INVALID_SPACES); + } + } + } + + /* TABLE must have content. */ + if (node.content == null) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS); + return; + } + } + + if (level1Enabled()) { + /* Checks for multiple headers */ + checkMultiHeaders(lexer, node); + } + + if (level2Enabled()) { + /* Table must have a CAPTION describing the purpose of the table */ + if (node.content.is(TagId.CAPTION)) { + Node tnode = node.content; + String word = null; + + if (tnode.content != null && tnode.content.tag == null) { + word = getTextNodeClear(lexer, tnode); + } + if (!isWhitespace(word)) { + hasCaption = true; + } + } + if (!hasCaption) { + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_MISSING_CAPTION); + } + } + + if (node.content != null) { + if (node.content.is(TagId.CAPTION) && node.content.next != null && node.content.next.is(TagId.TR)) { + checkColumns(lexer, node.content.next); + } + else if (node.content.is(TagId.TR)) { + checkColumns(lexer, node.content); + } + } + if (!hasValidColumnHeaders) { + if (node.content != null) { + if (node.content.is(TagId.CAPTION) && node.content.next != null && node.content.next.is(TagId.TR)) { + checkRows(lexer, node.content.next); + } + else if (node.content.is(TagId.TR)) { + checkRows(lexer, node.content); + } + } + } + + if (level3Enabled()) { + /* Suppress warning for missing 'SUMMARY for HTML 2.0 and HTML 3.2 */ + if (!hasSummary) { + lexer.report.accessError(lexer, node, AccessErrorCode.TABLE_MISSING_SUMMARY); + } + } + + if (level2Enabled()) { + if (node.content != null) { + Node temp = node.content; + + while (temp != null) { + if (temp.is(TagId.TR)) { + numTR++; + } + temp = temp.next; + } + + if (numTR == 1) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LAYOUT_TABLES_LINEARIZE_PROPERLY); + } + } + if (hasTH) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.LAYOUT_TABLE_INVALID_MARKUP); + } + } + + if (level1Enabled()) { + if (checkedHeaders == 2) { + if (!hasValidRowHeaders && !hasValidColumnHeaders && !hasInvalidRowHeader && !hasInvalidColumnHeader) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS); + } + if (!hasValidRowHeaders && hasInvalidRowHeader) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS_ROW); + } + + if (!hasValidColumnHeaders && hasInvalidColumnHeader) { + lexer.report.accessError(lexer, node, AccessErrorCode.DATA_TABLE_MISSING_HEADERS_COLUMN); + } + } + } + } + + + /*************************************************** + * CheckASCII + * + * Checks for valid text equivalents for XMP and PRE + * elements for ASCII art. Ensures that there is + * a skip over link to skip multi-lined ASCII art. + ***************************************************/ + + private void checkASCII(final Lexer lexer, final Node node) { + String skipOver = null; + boolean IsAscii = false; + int hasSkipOverLink = 0; + + int newLines = -1; + char compareLetter; + int matchingCount = 0; + + if (level1Enabled() && node.content != null) { + /* + Checks the text within the PRE and XMP tags to see if ascii + art is present + */ + for (int i = node.content.start + 1; i < node.content.end; i++) { + matchingCount = 0; + + /* Counts the number of lines of text */ + if (lexer.lexbuf[i] == '\n') { + newLines++; + } + compareLetter = (char) lexer.lexbuf[i]; + + /* Counts consecutive character matches */ + for (int x = i; x < i + 5; x++) { + if (lexer.lexbuf[x] == compareLetter) { + matchingCount++; + } + else { + break; + } + } + + /* Must have at least 5 consecutive character matches */ + if (matchingCount >= 5) { + break; + } + } + /* + Must have more than 6 lines of text OR 5 or more consecutive + letters that are the same for there to be ascii art + */ + if (newLines >= 6 || matchingCount >= 5) { + IsAscii = true; + } + + /* Checks for skip over link if ASCII art is present */ + if (IsAscii) { + if (node.prev != null && node.prev.prev != null) { + final Node temp1 = node.prev.prev; + + /* Checks for 'HREF' attribute */ + for (AttVal av = temp1.attributes; av != null; av = av.next) { + if (av.is(AttrId.HREF) && hasValue(av)) { + skipOver = av.value; + hasSkipOverLink++; + } + } + } + } + } + + if (level2Enabled()) { + /* + Checks for A element following PRE to ensure proper skipover link + only if there is an A element preceding PRE. + */ + if (hasSkipOverLink == 1) { + if (node.next.is(TagId.A)) { + final Node temp2 = node.next; + + /* Checks for 'NAME' attribute */ + for (AttVal av = temp2.attributes; av != null; av = av.next) { + if (av.is(AttrId.NAME) && hasValue(av)) { + /* + Value within the 'HREF' attribute must be the same + as the value within the 'NAME' attribute for valid + skipover. + */ + if (skipOver.contains(av.value)) { + hasSkipOverLink++; + } + } + } + } + } + + if (IsAscii) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASCII_REQUIRES_DESCRIPTION); + if (level3Enabled() && hasSkipOverLink < 2) { + lexer.report.accessError(lexer, node, AccessErrorCode.SKIPOVER_ASCII_ART); + } + } + } + } + + + /*********************************************************** + * CheckFormControls + * + * <form> must have valid 'FOR' attribute, and <label> must + * have valid 'ID' attribute for valid form control. + ***********************************************************/ + + private void checkFormControls(final Lexer lexer, final Node node) { + if (!hasValidFor && hasValidId) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASSOCIATE_LABELS_EXPLICITLY_FOR); + } + if (!hasValidId && hasValidFor) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASSOCIATE_LABELS_EXPLICITLY_ID); + } + if (!hasValidId && !hasValidFor) { + lexer.report.accessError(lexer, node, AccessErrorCode.ASSOCIATE_LABELS_EXPLICITLY); + } + } + + + /************************************************************ + * CheckLabel + * + * Check for valid 'FOR' attribute within the LABEL element + ************************************************************/ + + private void checkLabel(final Lexer lexer, final Node node) { + if (level2Enabled()) { + /* Checks for valid 'FOR' attribute */ + final AttVal av = node.getAttrById(AttrId.FOR); + if (hasValue(av)) { + hasValidFor = true; + } + if (++forID == 2) { + forID = 0; + checkFormControls(lexer, node); + } + } + } + + + /************************************************************ + * CheckInputLabel + * + * Checks for valid 'ID' attribute within the INPUT element. + * Checks to see if there is a LABEL directly before + * or after the INPUT element determined by the 'TYPE'. + * Each INPUT element must have a LABEL describing the form. + ************************************************************/ + + private void checkInputLabel(final Lexer lexer, final Node node) { + if (level2Enabled()) { + /* Checks attributes within the INPUT element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* Must have valid 'ID' value */ + if (av.is(AttrId.ID) && hasValue(av)) { + hasValidId = true; + } + } + if (++forID == 2) { + forID = 0; + checkFormControls(lexer, node); + } + } + } + + + /*************************************************************** + * CheckInputAttributes + * + * INPUT element must have a valid 'ALT' attribute if the + * 'VALUE' attribute is present. + ***************************************************************/ + + private void checkInputAttributes(final Lexer lexer, final Node node) { + boolean hasAlt = false; + boolean mustHaveAlt = false; + + /* Checks attributes within the INPUT element */ + for (AttVal av = node.attributes; av != null; av = av.next) { + /* 'VALUE' must be found if the 'TYPE' is 'text' or 'checkbox' */ + if (av.is(AttrId.TYPE) && hasValue(av)) { + if (level1Enabled()) { + if (av.valueIs("image")) { + mustHaveAlt = true; + } + } + } + if (av.is(AttrId.ALT) && hasValue(av)) { + hasAlt = true; + } + } + if (mustHaveAlt && !hasAlt) { + lexer.report.accessError(lexer, node, AccessErrorCode.IMG_BUTTON_MISSING_ALT); + } + } + + + /*************************************************************** + * CheckFrameSet + * + * Frameset must have valid NOFRAME section. Must contain some + * text but must not contain information telling user to update + * browsers, + ***************************************************************/ + + private void checkFrameSet(final Lexer lexer, final Node node) { + boolean hasNoFrames = false; + + if (level1Enabled()) { + if ((lexer.badAccess & Report.INVALID_LINK_NOFRAMES) != 0) { + lexer.report.accessError(lexer, node, AccessErrorCode.NOFRAMES_INVALID_LINK); + lexer.badAccess &= ~Report.INVALID_LINK_NOFRAMES; /* emit only once */ + } + for (Node temp = node.content; temp != null ; temp = temp.next) { + if (temp.is(TagId.NOFRAMES)) { + hasNoFrames = true; + + if (temp.content != null && temp.content.content.is(TagId.P)) { + final Node para = temp.content.content; + if (para.content.isText()) { + final String word = textFromOneNode(lexer, para.content); + if (word != null && word.contains("browser")) { + lexer.report.accessError(lexer, para, AccessErrorCode.NOFRAMES_INVALID_CONTENT); + } + } + } + else if (temp.content == null) { + lexer.report.accessError(lexer, temp, AccessErrorCode.NOFRAMES_INVALID_NO_VALUE); + } + else if (temp.content != null && isWhitespace(textFromOneNode(lexer, temp.content))) { + lexer.report.accessError(lexer, temp, AccessErrorCode.NOFRAMES_INVALID_NO_VALUE); + } + } + } + if (!hasNoFrames) { + lexer.report.accessError(lexer, node, AccessErrorCode.FRAME_MISSING_NOFRAMES); + } + } + } + + + /*********************************************************** + * CheckHeaderNesting + * + * Checks for heading increases and decreases. Headings must + * not increase by more than one header level, but may + * decrease at from any level to any level. Text within + * headers must not be more than 20 words in length. + ***********************************************************/ + + private void checkHeaderNesting(final Lexer lexer, final Node node) { + if (level2Enabled()) { + int numWords = 1; + boolean isValidIncrease = false; + boolean needsDescription = false; + /* + Text within header element cannot contain more than 20 words without + a separate description + */ + if (node.content != null && node.content.tag == null) { + final String word = textFromOneNode(lexer, node.content); + + for (int i = 0; i < word.length(); i++) { + if (word.charAt(i) == ' ') { + numWords++; + } + } + if (numWords > 20) { + needsDescription = true; + } + } + + /* Header following must be same level or same plus 1 for + ** valid heading increase size. E.g. H1 . H1, H2. H3 . H3, H4 + */ + if (node.isHeader()) { + int level = node.getHeaderLevel(); + isValidIncrease = true; + + for (Node temp = node.next; temp != null; temp = temp.next) { + final int nested = temp.getHeaderLevel(); + if (nested >= level) { + isValidIncrease = nested <= level + 1; + break; + } + } + } + if (!isValidIncrease) { + lexer.report.accessWarning(lexer, node, AccessErrorCode.HEADERS_IMPROPERLY_NESTED); + } + if (needsDescription) { + lexer.report.accessWarni... [truncated message content] |