From: <bra...@us...> - 2008-01-15 01:43:31
|
Revision: 2128 http://archive-access.svn.sourceforge.net/archive-access/?rev=2128&view=rev Author: bradtofel Date: 2008-01-14 17:43:29 -0800 (Mon, 14 Jan 2008) Log Message: ----------- REFACTOR: removed isAuthority() and resolveUrl() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java 2008-01-15 01:41:32 UTC (rev 2127) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java 2008-01-15 01:43:29 UTC (rev 2128) @@ -44,34 +44,8 @@ * @version $Date$, $Revision$ */ public class UrlCanonicalizer { - - private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" + - "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" + - "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" + - "|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo" + - "|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk" + - "|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg" + - "|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma" + - "|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz" + - "|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm" + - "|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj" + - "|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn" + - "|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu" + - "|wf|ws|ye|yt|yu|za|zm|zw"; - - private static final String GEN_TLDS = "aero|biz|cat|com|coop|edu|gov" + - "|info|int|jobs|mil|mobi|museum|name|net|org|pro|travel"; - - - private static final String ALL_TLD_PATTERN = CC_TLDS + "|" + GEN_TLDS; - private static final String IP_PATTERN = "[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+"; - private static final Pattern AUTHORITY_REGEX = - Pattern.compile("(([0-9a-z_.-]+)\\.(" + ALL_TLD_PATTERN + "))|" + - "(" + IP_PATTERN + ")"); - - private static final String CDX_PREFIX = " CDX "; /** * Strip leading 'www.' @@ -323,38 +297,6 @@ return url; } - /** - * @param urlPart - * @return boolean indicating whether urlPart might be an Authority. - */ - public boolean isAuthority(String urlPart) { - Matcher m = AUTHORITY_REGEX.matcher(urlPart); - - return (m != null) && m.matches(); - } - - /** - * @param baseUrl - * @param url - * @return url resolved against baseUrl, unless it is absolute already - */ - public static String resolveUrl(String baseUrl, String url) { - // TODO: this only works for http:// - if(url.startsWith("http://")) { - return url; - } - UURI absBaseURI; - UURI resolvedURI = null; - try { - absBaseURI = UURIFactory.getInstance(baseUrl); - resolvedURI = UURIFactory.getInstance(absBaseURI, url); - } catch (URIException e) { - e.printStackTrace(); - return url; - } - return resolvedURI.getEscapedURI(); - } - private static void USAGE() { System.err.println("Usage: [-f FIELD] [-d DELIM]"); System.exit(3); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |