[Htmlparser-cvs] htmlparser/src/org/htmlparser/parserapplications WikiCapturer.java,1.1,1.2
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2004-05-30 01:44:34
|
Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21187/src/org/htmlparser/parserapplications Modified Files: WikiCapturer.java Log Message: Use WikiCapturer to pull Wiki pages locally. Index: WikiCapturer.java =================================================================== RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/WikiCapturer.java,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** WikiCapturer.java 10 Jan 2004 00:06:03 -0000 1.1 --- WikiCapturer.java 30 May 2004 01:43:54 -0000 1.2 *************** *** 55,58 **** --- 55,120 ---- /** + * Returns <code>true</code> if the link is one we are interested in. + * @param link The link to be checked. + * @return <code>true</code> if the link has the source URL as a prefix + * and doesn't contain '?' or '#'; the former because we won't be able to + * handle server side queries in the static target directory structure and + * the latter because presumably the full page with that reference has + * already been captured previously. This performs a case insensitive + * comparison, which is cheating really, but it's cheap. + */ + protected boolean isToBeCaptured (String link) + { + boolean ret; + + ret = super.isToBeCaptured (link); + + // eliminate PhpWiki specific pages + if (ret) + if (link.endsWith ("PhpWikiAdministration")) + ret = false; + else if (link.endsWith ("PhpWikiDocumentation")) + ret = false; + else if (link.endsWith ("TextFormattingRules")) + ret = false; + else if (link.endsWith ("NewMarkupTestPage")) + ret = false; + else if (link.endsWith ("OldMarkupTestPage")) + ret = false; + else if (link.endsWith ("OldTextFormattingRules")) + ret = false; + else if (link.endsWith ("PgsrcTranslation")) + ret = false; + else if (link.endsWith ("HowToUseWiki")) + ret = false; + else if (link.endsWith ("MoreAboutMechanics")) + ret = false; + else if (link.endsWith ("AddingPages")) + ret = false; + else if (link.endsWith ("WikiWikiWeb")) + ret = false; + else if (link.endsWith ("UserPreferences")) + ret = false; + else if (link.endsWith ("PhpWiki")) + ret = false; + else if (link.endsWith ("WabiSabi")) + ret = false; + else if (link.endsWith ("EditText")) + ret = false; + else if (link.endsWith ("FindPage")) + ret = false; + else if (link.endsWith ("RecentChanges")) + ret = false; + else if (link.endsWith ("RecentEdits")) + ret = false; + else if (link.endsWith ("RecentVisitors")) + ret = false; + else if (link.endsWith ("SteveWainstead")) + ret = false; + + return (ret); + } + + /** * Mainline to capture a web site locally. * @param args The command line arguments. |