|
From: <go...@us...> - 2003-09-12 02:03:04
|
Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel
In directory sc8-pr-cvs1:/tmp/cvs-serv28332/src/org/archive/crawler/datamodel
Modified Files:
UURI.java
Log Message:
strip leading (/..)+
Index: UURI.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel/UURI.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** UURI.java 6 Sep 2003 02:00:12 -0000 1.20
--- UURI.java 12 Sep 2003 02:03:01 -0000 1.21
***************
*** 89,96 ****
if (u.getSchemeSpecificPart().startsWith("/")) {
// hierarchical URI
! if ("".equals(u.getPath())) {
! u = u.resolve("/"); // ensure root URLs end with '/'
}
- u = u.normalize(); // factor out path cruft
String canonizedAuthority = u.getAuthority();
if(canonizedAuthority==null) {
--- 89,99 ----
if (u.getSchemeSpecificPart().startsWith("/")) {
// hierarchical URI
! u = u.normalize(); // factor out path cruft, according to official spec
! // now, go further and eliminate extra '..' segments
! String fixedPath = u.getPath().replaceFirst("^(/\\.\\.)+","");
! if ("".equals(fixedPath)) {
! // ensure root URLs end with '/'
! fixedPath = "/";
}
String canonizedAuthority = u.getAuthority();
if(canonizedAuthority==null) {
***************
*** 137,141 ****
u = new URI(u.getScheme().toLowerCase(), // case-flatten scheme
canonizedAuthority, // case and port flatten
! u.getPath(), // leave alone
u.getQuery(), // leave alone
null); // drop fragment
--- 140,144 ----
u = new URI(u.getScheme().toLowerCase(), // case-flatten scheme
canonizedAuthority, // case and port flatten
! fixedPath, // leave alone
u.getQuery(), // leave alone
null); // drop fragment
|