You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
From: Brad <bra...@us...> - 2005-11-16 03:11:29
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/cdx/indexer In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30954/src/java/org/archive/wayback/cdx/indexer Log Message: Directory /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/cdx/indexer added to the repository |
From: Brad <bra...@us...> - 2005-11-16 03:11:29
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/query In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30954/src/java/org/archive/wayback/query Log Message: Directory /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/query added to the repository |
From: Brad <bra...@us...> - 2005-11-16 03:11:29
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/cdx In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30954/src/java/org/archive/wayback/cdx Log Message: Directory /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/cdx added to the repository |
From: Brad <bra...@us...> - 2005-11-16 03:11:29
|
Update of /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/replay In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30954/src/java/org/archive/wayback/replay Log Message: Directory /cvsroot/archive-access/archive-access/projects/wayback/src/java/org/archive/wayback/replay added to the repository |
From: Michael S. <sta...@us...> - 2005-11-12 03:08:45
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16203/src/web Modified Files: header.jsp search.jsp Added Files: explain.jsp Log Message: * src/web/header.jsp * src/web/search.jsp * src/web/explain.jsp * src/web/en/include/header.html Make all inks relative, rather than absolute. Add here pages from nutch that have absolutes. Do this so can get content from webapp doing proxy passthrough. --- NEW FILE: explain.jsp --- <%@ page contentType="text/html; charset=UTF-8" pageEncoding="UTF-8" import="javax.servlet.*" import="javax.servlet.http.*" import="java.io.*" import="java.util.*" import="org.apache.nutch.searcher.*" %><% NutchBean bean = NutchBean.get(application); // set the character encoding to use when interpreting request values request.setCharacterEncoding("UTF-8"); bean.LOG.info("explain request from " + request.getRemoteAddr()); Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer.parseInt(request.getParameter("id"))); HitDetails details = bean.getDetails(hit); Query query = Query.parse(request.getParameter("query")); String language = ResourceBundle.getBundle("org.nutch.jsp.explain", request.getLocale()) .getLocale().getLanguage(); String requestURI = HttpUtils.getRequestURL(request).toString(); %><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <% // To prevent the character encoding declared with 'contentType' page // directive from being overriden by JSTL (apache i18n), we freeze it // by flushing the output buffer. // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/ out.flush(); %> <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> <i18n:bundle baseName="org.nutch.jsp.explain"/> <html lang="<%= language %>"> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <head> <title>Nutch: <i18n:message key="title"/></title> <jsp:include page="/include/style.html"/> </head> <body> <jsp:include page="<%= language + "/include/header.html"%>"/> <h3><i18n:message key="page"/></h3> <%=bean.getDetails(hit).toHtml()%> <h3><i18n:message key="scoreForQuery"> <i18n:messageArg value="<%=query%>"/> </i18n:message> </h3> <%=bean.getExplanation(query, hit)%> <jsp:include page="/include/footer.html"/> </body> </html> Index: header.jsp =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/web/header.jsp,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** header.jsp 21 Apr 2005 06:45:37 -0000 1.1 --- header.jsp 12 Nov 2005 03:08:36 -0000 1.2 *************** *** 1 **** ! <table cellspacing="0" cellpadding="0" border="0" width="635"><tr><td rowspan="2" width="140" valign="bottom"><a href="/"><img border="0" src="<%=request.getContextPath()%>/img/logo.jpg"/></a><br class="br"/><br class="br"/></td></tr><tr><td align="left" valign="bottom" width="495"><table width="495" cellspacing="0" cellpadding="0" border="0"><tr><h4>Web Search</h4></tr></table></td></tr></table> --- 1 ---- ! <table cellspacing="0" cellpadding="0" border="0" width="635"><tr><td rowspan="2" width="140" valign="bottom"><a href="/"><img border="0" src="img/logo.jpg"/></a><br class="br"/><br class="br"/></td></tr><tr><td align="left" valign="bottom" width="495"><table width="495" cellspacing="0" cellpadding="0" border="0"><tr><h4>Web Search</h4></tr></table></td></tr></table> Index: search.jsp =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/web/search.jsp,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** search.jsp 4 Nov 2005 21:31:16 -0000 1.24 --- search.jsp 12 Nov 2005 03:08:36 -0000 1.25 *************** *** 97,106 **** .getLocale().getLanguage(); String requestURI = HttpUtils.getRequestURL(request).toString(); - String base = requestURI.substring(0, requestURI.lastIndexOf('/')); // URLEncoder.encode the queryString rather than just use htmlQueryString. // The former will take care of other than just html entities in case its // needed. ! String rss = request.getContextPath() + "/opensearch?query=" + URLEncoder.encode(queryString, "UTF-8") + "&hitsPerDup=" + hitsPerDup + ((start != 0)? "&start=" + start: "") + params; --- 97,105 ---- .getLocale().getLanguage(); String requestURI = HttpUtils.getRequestURL(request).toString(); // URLEncoder.encode the queryString rather than just use htmlQueryString. // The former will take care of other than just html entities in case its // needed. ! String rss = "/opensearch?query=" + URLEncoder.encode(queryString, "UTF-8") + "&hitsPerDup=" + hitsPerDup + ((start != 0)? "&start=" + start: "") + params; *************** *** 120,126 **** <head> <title>Internet Archive: <i18n:message key="title"/></title> ! <link rel="shortcut icon" href="<%=request.getContextPath()%>/images/logo-16.jpg" type="image/x-icon"/> <jsp:include page="/include/style.html"/> - <base href="<%= base + "/" + language %>/"> </head> --- 119,124 ---- <head> <title>Internet Archive: <i18n:message key="title"/></title> ! <link rel="shortcut icon" href="img/logo-16.jpg" type="image/x-icon"/> <jsp:include page="/include/style.html"/> </head> *************** *** 128,132 **** <jsp:include page="/header.jsp"/> ! <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input name="query" size=44 value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> --- 126,130 ---- <jsp:include page="/header.jsp"/> ! <form name="search" action="search.jsp" method="get"> <input name="query" size=44 value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> *************** *** 217,225 **** +"&start="+start+"&hitsPerPage="+hitsPerPage+"&hitsPerDup="+0; %> - ! <a href="<%=request.getContextPath()%>/search.jsp?<%=more%>"><i18n:message key="moreFrom"/> <%=hit.getDedupValue()%></a> <% } %> - ! <a href="<%=request.getContextPath()%>/explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString)%>">explain</a> </small> <br><br> --- 215,223 ---- +"&start="+start+"&hitsPerPage="+hitsPerPage+"&hitsPerDup="+0; %> - ! <a href="search.jsp?<%=more%>"><i18n:message key="moreFrom"/> <%=hit.getDedupValue()%></a> <% } %> - ! <a href="explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString)%>">explain</a> </small> <br><br> *************** *** 231,235 **** || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { %> ! <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input type="hidden" name="query" value="<%=htmlQueryString%>"> <input type="hidden" name="start" value="<%=end%>"> --- 229,233 ---- || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { %> ! <form name="search" action="search.jsp" method="get"> <input type="hidden" name="query" value="<%=htmlQueryString%>"> <input type="hidden" name="start" value="<%=end%>"> *************** *** 248,252 **** if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { %> ! <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input type="hidden" name="query" value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> --- 246,250 ---- if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { %> ! <form name="search" action="search.jsp" method="get"> <input type="hidden" name="query" value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> *************** *** 269,273 **** <p> <a href="http://www.nutch.org/"> ! <img border="0" src="<%=request.getContextPath()%>/img/poweredbynutch_01.gif"> </a> </p> --- 267,271 ---- <p> <a href="http://www.nutch.org/"> ! <img border="0" src="img/poweredbynutch_01.gif"> </a> </p> |
From: Michael S. <sta...@us...> - 2005-11-12 03:08:45
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web/en/include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16203/src/web/en/include Added Files: header.html Log Message: * src/web/header.jsp * src/web/search.jsp * src/web/explain.jsp * src/web/en/include/header.html Make all inks relative, rather than absolute. Add here pages from nutch that have absolutes. Do this so can get content from webapp doing proxy passthrough. --- NEW FILE: header.html --- <?xml version="1.0" encoding="UTF-8"?><!--This file is automatically generated. Do not edit!--><table width="635" border="0" cellpadding="0" cellspacing="0"><tr><td valign="bottom" width="140" rowspan="2"><a href="./"><img src="img/reiter/logo_nutch.gif" border="0"/></a><img src="img/reiter/spacer_666666.gif" width="140" height="1"/></td></tr><tr><td width="495" valign="bottom" align="right"><table border="0" cellpadding="0" cellspacing="0" width="495"><tr><td background="img/reiter/_bg_reiter.gif" width="400"> </td><td height="28" valign="bottom" width="10"><img src="img/reiter/reiter_inactive_le1.gif" border="0"/></td><td background="img/reiter/_bg_reiter_inactive.gif" valign="bottom" nowrap="nowrap"><a class="bodytext" href="about.html">About</a></td><td height="28" valign="bottom" width="10"><img src="img/reiter/reiter_inactive_ri.gif" border="0"/></td><td height="28" valign="bottom" width="10"><img src="img/reiter/reiter_inactive_le.gif" border="0"/></td><td background="img/reiter/_bg_reiter_inactive.gif" valign="bottom" nowrap="nowrap"><a class="bodytext" href="http://www.nutch.org/faq.html">FAQ</a></td><td height="28" valign="bottom" width="10"><img src="img/reiter/reiter_inactive_ri.gif" border="0"/></td></tr></table></td></tr></table> |
From: Michael S. <sta...@us...> - 2005-11-12 03:07:36
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web/en/include In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16171/en/include Log Message: Directory /cvsroot/archive-access/archive-access/projects/nutch/src/web/en/include added to the repository |
From: Michael S. <sta...@us...> - 2005-11-12 03:07:26
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web/en In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv16156/en Log Message: Directory /cvsroot/archive-access/archive-access/projects/nutch/src/web/en added to the repository |
From: Michael S. <sta...@us...> - 2005-11-11 00:52:25
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv13084/xdocs Modified Files: srcbuild.xml Log Message: * xdocs/srcbuild.xml nutch-site.xml.nutchwax was renamed nutch-site.xml.tempalte Index: srcbuild.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/srcbuild.xml,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** srcbuild.xml 20 Oct 2005 19:54:26 -0000 1.9 --- srcbuild.xml 11 Nov 2005 00:52:17 -0000 1.10 *************** *** 40,50 **** </p> ! <p>Symlink <literal>${NUTCHWAX}/nutch/conf/nutch-site.xml.nutchwax</literal> to ${NUTCHWAX}/conf/nutch-site.xml. Doing this, there is only one nutch-site.xml shared by core Nutch and by NutchWAX extensions. <pre> % cd ${NUTCH_HOME}/nutch/conf % mv nutch-site.xml nutch-site.xml.original ! % ln -s ${NUTCHWAX}/conf/nutch-site.xml.nutchwax nutch-site.xml</pre> ! The <literal>nutch-site.xml.nutchwax</literal> that is in ${NUTCHWAX} has NutchWAX specific configuration overrides as well as hardcodings of collection names and --- 40,50 ---- </p> ! <p>Symlink <literal>${NUTCHWAX}/nutch/conf/nutch-site.xml.template</literal> to ${NUTCHWAX}/conf/nutch-site.xml. Doing this, there is only one nutch-site.xml shared by core Nutch and by NutchWAX extensions. <pre> % cd ${NUTCH_HOME}/nutch/conf % mv nutch-site.xml nutch-site.xml.original ! % ln -s ${NUTCHWAX}/conf/nutch-site.xml.template nutch-site.xml</pre> ! The <literal>nutch-site.xml.template</literal> that is in ${NUTCHWAX} has NutchWAX specific configuration overrides as well as hardcodings of collection names and |
From: Michael S. <sta...@us...> - 2005-11-10 02:03:26
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31464 Modified Files: build.xml Log Message: * build.xml Don't copy over web.xml. Index: build.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/build.xml,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** build.xml 4 Nov 2005 19:23:58 -0000 1.11 --- build.xml 10 Nov 2005 02:03:16 -0000 1.12 *************** *** 159,167 **** <war destfile="${build.dir}/${name}.war" webxml="${this.web}/web.xml"> ! <fileset dir="${nutch.web}/jsp" excludes="**/search.jsp"/> <fileset dir="${nutch.root}/docs"> <include name="img/*.gif"/> </fileset> ! <fileset dir="${this.web}"/> <classes dir="${nutch.root}/conf" > <exclude name="nutch-site.xml" /> --- 159,172 ---- <war destfile="${build.dir}/${name}.war" webxml="${this.web}/web.xml"> ! <fileset dir="${nutch.web}/jsp"> ! <exclude name="**/search.jsp"/> ! <exclude name="**/web.xml"/> ! </fileset> <fileset dir="${nutch.root}/docs"> <include name="img/*.gif"/> </fileset> ! <fileset dir="${this.web}"> ! <exclude name="**/web.xml"/> ! </fileset> <classes dir="${nutch.root}/conf" > <exclude name="nutch-site.xml" /> |
From: Michael S. <sta...@us...> - 2005-11-10 01:30:55
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25922/xdocs Modified Files: gettingstarted.xml Log Message: * xdocs/gettingstarted.xml Doc fix from brad. Index: gettingstarted.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/gettingstarted.xml,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** gettingstarted.xml 22 Oct 2005 01:46:21 -0000 1.11 --- gettingstarted.xml 10 Nov 2005 01:30:42 -0000 1.12 *************** *** 24,29 **** <pre>% cd ${NUTCHWAX} % chmod u+x ./bin/* ! % ./bin/indexarcs -h</pre> ! <code>indexarcs</code> is a wrapper script that will run through all the indexing step. It takes a bunch of options. To do the most basic indexing operation, point it a few ARC files and let it run: --- 24,29 ---- <pre>% cd ${NUTCHWAX} % chmod u+x ./bin/* ! % ./bin/indexarcs.sh -h</pre> ! <code>indexarcs.sh</code> is a wrapper script that will run through all the indexing step. It takes a bunch of options. To do the most basic indexing operation, point it a few ARC files and let it run: |
From: Michael S. <sta...@us...> - 2005-11-04 21:31:31
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web/img In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22778/src/web/img Added Files: ia-logo.jpg Log Message: * src/web/search.jsp Add carrying forward of collection name. * src/web/search.jsp.archiveit Add Dan's paging through results. * src/web/img/ia-logo.jpg Add another version of ia logo. --- NEW FILE: ia-logo.jpg --- (This appears to be a binary file; contents omitted.) |
From: Michael S. <sta...@us...> - 2005-11-04 21:31:30
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/web In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22778/src/web Modified Files: search.jsp Added Files: search.jsp.archiveit Log Message: * src/web/search.jsp Add carrying forward of collection name. * src/web/search.jsp.archiveit Add Dan's paging through results. * src/web/img/ia-logo.jpg Add another version of ia logo. --- NEW FILE: search.jsp.archiveit --- <%@ page contentType="text/html; charset=UTF-8" pageEncoding="UTF-8" import="javax.servlet.*" import="javax.servlet.http.*" import="java.io.*" import="java.util.*" import="java.text.*" import="java.net.*" import="java.util.regex.Pattern" import="org.apache.nutch.html.Entities" import="org.apache.nutch.searcher.*" import="org.apache.nutch.plugin.*" import="org.apache.nutch.util.NutchConf" import="org.archive.access.nutch.NutchwaxQuery" %><%! public static final DateFormat FORMAT = new SimpleDateFormat("yyyyMMddHHmmss"); public static final DateFormat DISPLAY_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static final String COLLECTION_KEY = "collection"; private static final String COLLECTION_QUERY_PARAM_KEY = COLLECTION_KEY + ":"; %><% NutchBean bean = NutchBean.get(application); // Set the character encoding to use when interpreting request values request.setCharacterEncoding("UTF-8"); bean.LOG.info("query request from " + request.getRemoteAddr()); // get query from request String queryString = request.getParameter("query"); if (queryString == null) { queryString = ""; } String htmlQueryString = Entities.encode(queryString); int start = 0; // first hit to display String startString = request.getParameter("start"); if (startString != null) start = Integer.parseInt(startString); int hitsPerPage = 10; // number of hits to display String hitsString = request.getParameter("hitsPerPage"); if (hitsString != null) hitsPerPage = Integer.parseInt(hitsString); // Add in 'sort' parameter. String sort = request.getParameter("sort"); boolean reverse = sort!=null && "true".equals(request.getParameter("reverse")); // De-Duplicate handling. Look for duplicates field and for how many // duplicates per results to return. Default duplicates field is 'site' // and duplicates per results default is '1' (Used to be '2' but now // '1' so can have an index with dups not show dups when used doing // straight searches). String dedupField = request.getParameter("dedupField"); if (dedupField == null || dedupField.length() == 0) { dedupField = "site"; } int hitsPerDup = 1; String hitsPerDupString = request.getParameter("hitsPerDup"); if (hitsPerDupString != null && hitsPerDupString.length() > 0) { hitsPerDup = Integer.parseInt(hitsPerDupString); } else { // If 'hitsPerSite' present, use that value. String hitsPerSiteString = request.getParameter("hitsPerSite"); if (hitsPerSiteString != null && hitsPerSiteString.length() > 0) { hitsPerDup = Integer.parseInt(hitsPerSiteString); } } // If a 'collection' parameter present, always add to query. String collection = request.getParameter(COLLECTION_KEY); if (collection != null && queryString != null && queryString.length() > 0) { int collectionIndex = queryString.indexOf(COLLECTION_QUERY_PARAM_KEY); if (collectionIndex < 0) { queryString = queryString + " " + COLLECTION_QUERY_PARAM_KEY + collection; } } // Make up query string for use later drawing the 'rss' logo. String params = "&hitsPerPage=" + hitsPerPage + (sort == null ? "" : "&sort=" + sort + (reverse? "&reverse=true": "") + (dedupField == null ? "" : "&dedupField=" + dedupField)); Query query = NutchwaxQuery.parse(queryString); bean.LOG.info("query: " + query.toString()); String language = ResourceBundle.getBundle("org.nutch.jsp.search", request.getLocale()) .getLocale().getLanguage(); String requestURI = HttpUtils.getRequestURL(request).toString(); String base = requestURI.substring(0, requestURI.lastIndexOf('/')); // URLEncoder.encode the queryString rather than just use htmlQueryString. // The former will take care of other than just html entities in case its // needed. String rss = request.getContextPath() + "/opensearch?query=" + URLEncoder.encode(queryString, "UTF-8") + "&hitsPerDup=" + hitsPerDup + ((start != 0)? "&start=" + start: "") + params; %><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <% // To prevent the character encoding declared with 'contentType' page // directive from being overriden by JSTL (apache i18n), we freeze it // by flushing the output buffer. // see http://java.sun.com/developer/technicalArticles/Intl/MultilingualJSP/ out.flush(); %> <%@ taglib uri="http://jakarta.apache.org/taglibs/i18n" prefix="i18n" %> <i18n:bundle baseName="org.nutch.jsp.search"/> <html lang="<%= language %>"> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <head> <title>Internet Archive: <i18n:message key="title"/></title> <link rel="shortcut icon" href="<%=request.getContextPath()%>/images/logo-16.jpg" type="image/x-icon"/> <jsp:include page="/include/style.html"/> <base href="<%= base + "/" + language %>/"> </head> <body> <jsp:include page="/header.jsp"/> <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input name="query" size=44 value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> <input type="hidden" name="collection" value="<%=collection%>"> <input type="submit" value="<i18n:message key="search"/>"> <% if (sort != null) { %> <input type="hidden" name="sort" value="<%=sort%>"> <input type="hidden" name="reverse" value="<%=reverse%>"> <% } %> </form> <% long startTime = System.currentTimeMillis(); Hits hits = null; try { hits = bean.search(query, start + hitsPerPage, hitsPerDup, dedupField, sort, reverse); } catch (IOException e) { hits = new Hits(0, new Hit[0]); } long searchTime = System.currentTimeMillis() - startTime; int end = (int)Math.min(hits.getLength(), start + hitsPerPage); %> Search took <%= searchTime/1000.0 %> seconds. <i18n:message key="hits"> <i18n:messageArg value="<%=new Long((end==0)?0:(start+1))%>"/> <i18n:messageArg value="<%=new Long(end)%>"/> <i18n:messageArg value="<%=new Long(hits.getTotal())%>"/> </i18n:message> <% // be responsive out.flush(); int length = end-start; int realEnd = (int)Math.min(hits.getLength(), start + hitsPerPage); Hit[] show = hits.getHits(start, realEnd-start); HitDetails[] details = bean.getDetails(show); String[] summaries = bean.getSummary(details, query); bean.LOG.info("total hits: " + hits.getTotal()); String collectionsHost = NutchConf.get().get("collections.host"); %> <br><br> <% for (int i = 0; i < length; i++) { // display the hits Hit hit = show[i]; HitDetails detail = details[i]; String title = detail.getValue("title"); String summary = summaries[i]; String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo(); String archiveDate = FORMAT.format(new Date(bean.getFetchDate(detail))); String archiveDisplayDate = DISPLAY_FORMAT.format(new Date(bean.getFetchDate(detail))); String archiveCollection = detail.getValue("collection"); String url = detail.getValue("url"); String target = "http://" + collectionsHost + "/" + archiveCollection + "/" + archiveDate + "/" + url; String allVersions = "http://" + collectionsHost + "/" + archiveCollection + "/*/" + url; if (title == null || title.equals("")) // use url for docs w/o title title = url; %> <b><a href="<%=target%>"><%=Entities.encode(title)%></a></b> <%@ include file="./more.jsp" %> <% if (!"".equals(summary)) { %> <br><%=summary%> <% } %> <br> <small> <span class="url"><%=Entities.encode(url)%></span> - <%=archiveDisplayDate%> - <a href="<%=allVersions%>">other versions</a> <% if (hit.moreFromDupExcluded()) { String more = "query="+URLEncoder.encode("site:"+hit.getDedupValue()+" "+queryString) +"&start="+start+"&hitsPerPage="+hitsPerPage+"&hitsPerDup="+0; %> - <a href="<%=request.getContextPath()%>/search.jsp?<%=more%>"><i18n:message key="moreFrom"/> <%=hit.getDedupValue()%></a> <% } %> - <a href="<%=request.getContextPath()%>/explain.jsp?<%=id%>&query=<%=URLEncoder.encode(queryString)%>">explain</a> </small> <br><br> <% } %> <% if ((hits.totalIsExact() && end < hits.getTotal()) // more hits to show || (!hits.totalIsExact() && (hits.getLength() > start+hitsPerPage))) { long pagesAvailable = (long) (hits.getTotal() / hitsPerPage) + 1 ; long currentPage = (long) ((start + 1) / hitsPerPage + 1) ; int maxPagesToShow = 20; long displayMin = (long) (currentPage - (0.5 * maxPagesToShow) ); if (displayMin < 1) { displayMin = 1; } long displayMax = displayMin + maxPagesToShow - 1 ; if (displayMax > pagesAvailable) { displayMax = pagesAvailable; } %> <!-- Debugging info <table border="1"> <tr> <td>pagesAvailable:<%=pagesAvailable%></td> <td>currentPage:<%=currentPage%></td> <td>displayMin:<%=displayMin%></td> <td>displayMax:<%=displayMax%></td> </tr> </table> --> <center> <% if (currentPage > 1) { long previousPageStart = (currentPage - 1) * hitsPerPage; String previousPageUrl = request.getContextPath() + "/search.jsp?" + "query=" + htmlQueryString + "&start=" + previousPageStart + "&hitsPerPage=" + hitsPerPage + "&hitsPerDup=" + hitsPerDup + "&dedupField=" + dedupField; if (sort != null) { previousPageUrl = previousPageUrl + "&sort=" + sort + "&reverse=" + reverse; } %> <a href="<%=previousPageUrl%>"><b>Previous</b></a>  <% } %> <% for (long pageIndex = displayMin; pageIndex <= displayMax; pageIndex++) { long pageStart = (pageIndex - 1) * hitsPerPage; String pageUrl = request.getContextPath() + "/search.jsp?" + "query=" + htmlQueryString + "&start=" + pageStart + "&hitsPerPage=" + hitsPerPage + "&hitsPerDup=" + hitsPerDup + "&dedupField=" + dedupField; if (sort != null) { pageUrl = pageUrl + "&sort=" + sort + "&reverse=" + reverse; } if (pageIndex != currentPage) { %> <a href="<%=pageUrl%>"><%=pageIndex%></a> <% } else { %> <b><%=pageIndex%></b> <% } } %> <% if (currentPage < pagesAvailable) { long nextPageStart = (currentPage + 1) * hitsPerPage; String nextPageUrl = request.getContextPath() + "/search.jsp?" + "query=" + htmlQueryString + "&start=" + nextPageStart + "&hitsPerPage=" + hitsPerPage + "&hitsPerDup=" + hitsPerDup + "&dedupField=" + dedupField; if (sort != null) { nextPageUrl = nextPageUrl + "&sort=" + sort + "&reverse=" + reverse; } %> <a href="<%=nextPageUrl%>"><b>Next</b></a>  <% } %> </center> <% } if ((!hits.totalIsExact() && (hits.getLength() <= start+hitsPerPage))) { %> <form name="search" action="<%=request.getContextPath()%>/search.jsp" method="get"> <input type="hidden" name="query" value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> <input type="hidden" name="hitsPerDup" value="0"> <input type="submit" value="<i18n:message key="showAllHits"/>"> <% if (sort != null) { %> <input type="hidden" name="sort" value="<%=sort%>"> <input type="hidden" name="reverse" value="<%=reverse%>"> <% } %> </form> <% } %> <p> <table bgcolor="3333ff" align="right"> <tr><td bgcolor="ff9900"><a href="<%=rss%>"><font color="ffffff"><b>RSS</b> </font></a></td></tr> </table> <a href="http://www.archive.org"> <img border="0" src="<%=request.getContextPath()%>/img/ia-logo.jpg"> </a> <a href="http://www.nutch.org/"> <img border="0" src="<%=request.getContextPath()%>/img/poweredbynutch_01.gif"> </a> </p> </body> </html> Index: search.jsp =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/web/search.jsp,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** search.jsp 6 Oct 2005 17:35:02 -0000 1.23 --- search.jsp 4 Nov 2005 21:31:16 -0000 1.24 *************** *** 22,25 **** --- 22,27 ---- public static final DateFormat DISPLAY_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private static final String COLLECTION_KEY = "collection"; + private static final String COLLECTION_QUERY_PARAM_KEY = COLLECTION_KEY + ":"; %><% NutchBean bean = NutchBean.get(application); *************** *** 32,36 **** // get query from request String queryString = request.getParameter("query"); - System.out.println("Untampered query: " + queryString); if (queryString == null) { queryString = ""; --- 34,37 ---- *************** *** 73,76 **** --- 74,87 ---- } } + + // If a 'collection' parameter present, always add to query. + String collection = request.getParameter(COLLECTION_KEY); + if (collection != null && queryString != null && queryString.length() > 0) { + int collectionIndex = queryString.indexOf(COLLECTION_QUERY_PARAM_KEY); + if (collectionIndex < 0) { + queryString = queryString + " " + COLLECTION_QUERY_PARAM_KEY + + collection; + } + } // Make up query string for use later drawing the 'rss' logo. *************** *** 120,123 **** --- 131,135 ---- <input name="query" size=44 value="<%=htmlQueryString%>"> <input type="hidden" name="hitsPerPage" value="<%=hitsPerPage%>"> + <input type="hidden" name="collection" value="<%=collection%>"> <input type="submit" value="<i18n:message key="search"/>"> <% if (sort != null) { %> |
From: Michael S. <sta...@us...> - 2005-11-04 19:30:27
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31995 Modified Files: project.xml Log Message: * project.xml Redoing 0.4.1 so it picks up build fix. Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/project.xml,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** project.xml 4 Nov 2005 18:20:02 -0000 1.24 --- project.xml 4 Nov 2005 19:30:14 -0000 1.25 *************** *** 12,16 **** <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.5.0${version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> --- 12,16 ---- <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.4.1${version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> |
From: Michael S. <sta...@us...> - 2005-11-04 19:24:10
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/conf In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30318/conf Added Files: nutch-site.xml.template Removed Files: nutch-site.xml.nutchwax Log Message: * build.xml Fix the nutch-site.xml copy. We were picking up default nutch-site.xml rather than the nutchwax specific nutch-site.template (formerly nutch-site.xml.nutchwax). * maven.xml Renamed nutch-site.xml as nutch-site.xml.template to align with how nutch does template naming. * conf/nutch-site.xml.template Added. Rename of nutch-site.xml.nutchwax. * conf/nutch-site.xml.nutchwax Removed. --- NEW FILE: nutch-site.xml.template --- <?xml version="1.0"?> <!--Internet Archive Nutch configuration. This config. is what gets built into nutchwax. Overrides a few Nutch defaults and adds nutchwax specific config (Such config. options have an 'archive' prefix). --> <nutch-conf> <!-- Enable parse-ext (parse-ext is a parser that calls the 'ext'ernal program xpdf to parse pdf files). Also enable parse-default and the ia plugins. --> <property> <name>plugin.includes</name> <value>urlfilter-regex|parse-(text|html|ext|default)|index-(basic|ia)|query-(basic|site|url|ia)</value> </property> <property> <name>db.ignore.internal.links</name> <value>false</value> <description>Keep all links, not just inter-host. db updates will be FASTER if set to true. Downside is that link text from same site won't be included (More valuable to take anchor text from other hosts). Use this if wide variety of sites to index. </description> </property> <property> <name>indexer.boost.by.link.count</name> <value>true</value> <description>Use in-degree as poor-man's link analysis.</description> </property> <property> <name>indexer.max.tokens</name> <value>100000</value> <description>Don't truncate documents as much as by default. </description> </property> <property> <name>http.content.limit</name> <value>10000000</value> </property> <property> <name>io.map.index.skip</name> <value>7</value> <description>Use less RAM. Index files get read into memory. This config. says read 1/7th only in at a time. Random access is slower but use more memory. </description> </property> <property> <name>indexer.termIndexInterval</name> <value>1024</value> <description>Determines the fraction of terms which Lucene keeps in RAM when searching, to facilitate random-access. Smaller values use more memory but make searches somewhat faster. Larger values use less memory but make searches somewhat slower. For lucene indexes, normally. The default is 128. Write every 1024 entries rather than every 128, the default. </description> </property> <property> <name>indexer.maxMergeDocs</name> <value>2147483647</value> <description>This number determines the maximum number of Lucene Documents to be merged into a new Lucene segment. Larger values increase indexing speed and reduce the number of Lucene segments, which reduces the number of open file handles; however, this also increases RAM usage during indexing. Doug says: "There was a bogus value for indexer.maxMergeDocs in nutch-default.xml which made indexing really slow. The correct value is something really big (like Integer.MAX_VALUE)." </description> </property> <property> <name>searcher.summary.context</name> <value>20</value> <description> The number of context terms to display preceding and following matching terms in a hit summary. Make summaries a little longer than the default. </description> </property> <property> <name>searcher.summary.length</name> <value>80</value> <description> The total number of terms to display in a hit summary. </description> </property> <property> <name>collections.host</name> <value>collections.example.org</value> <description>The name of the server hosting collections. </description> </property> <!-- The name of this archive collection. DEPRECATED. Now search.jsp uses the 'collection' returned by the search result drawing up the wayback URL and at index time, use the command-line 'collection' option. <property> <name>archive.collection</name> <value>be05</value> </property> --> <!-- <property> <name>searcher.dir</name> <value>/home/stack/workspace/nutch-datadir</value> <description>Optionally, hardcode the nutch datadir location rather than rely on tomcat startup location. </description> </property> --> <property> <name>archive.index.all</name> <value>true</value> <description>If set to true, all contenttypes are indexed. Otherwise we only index text/* and application/* </description> </property> <property> <name>archive.skip.big.html</name> <value>-1</value> <description>If text/html is larger than value, just skip it completely. Use this setting to bypass problematic massive text/html (We were seeing the text/html parser hang for hours in bad, big html docs). Default value is -1 which says don't skip text/html docs.</description> </property> </nutch-conf> --- nutch-site.xml.nutchwax DELETED --- |
From: Michael S. <sta...@us...> - 2005-11-04 19:24:10
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30318 Modified Files: build.xml maven.xml Log Message: * build.xml Fix the nutch-site.xml copy. We were picking up default nutch-site.xml rather than the nutchwax specific nutch-site.template (formerly nutch-site.xml.nutchwax). * maven.xml Renamed nutch-site.xml as nutch-site.xml.template to align with how nutch does template naming. * conf/nutch-site.xml.template Added. Rename of nutch-site.xml.nutchwax. * conf/nutch-site.xml.nutchwax Removed. Index: maven.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/maven.xml,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** maven.xml 22 Oct 2005 02:43:23 -0000 1.12 --- maven.xml 4 Nov 2005 19:23:58 -0000 1.13 *************** *** 41,45 **** </postGoal> - <postGoal name="dist:prepare-bin-filesystem"> <echo>[nutchwax] dist:prepare-bin-filesystem postGoal</echo> --- 41,44 ---- *************** *** 123,127 **** in place to a viewer such as wera.--> <copy tofile="${maven.dist.bin.assembly.dir}/conf/nutch-site.xml" ! file="${basedir}/conf/nutch-site.xml.nutchwax" filtering="true" overwrite="true" /> <attainGoal name="copy_docbook" /> --- 122,126 ---- in place to a viewer such as wera.--> <copy tofile="${maven.dist.bin.assembly.dir}/conf/nutch-site.xml" ! file="${basedir}/conf/nutch-site.xml.template" filtering="true" overwrite="true" /> <attainGoal name="copy_docbook" /> Index: build.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/build.xml,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** build.xml 26 Jul 2005 20:38:53 -0000 1.10 --- build.xml 4 Nov 2005 19:23:58 -0000 1.11 *************** *** 152,155 **** --- 152,160 ---- <!-- ================================================================== --> <target name="war" depends="jar, compile-plugins"> + <!--Copy our nutchwax nutch-site.xml template into the build dir as + nutch-site.xml. Then in the below, add it into the WEB-INF/classes dir. + --> + <copy file="${root}/conf/nutch-site.xml.template" + tofile="${build.dir}/nutch-site.xml" /> <war destfile="${build.dir}/${name}.war" webxml="${this.web}/web.xml"> *************** *** 159,164 **** </fileset> <fileset dir="${this.web}"/> ! <classes dir="${nutch.root}/conf" excludes="**/*.template"/> ! <classes dir="${root}/conf"/> <classes dir="${nutch.web}/locale"/> <zipfileset dir="${nutch.root}/build/docs" includes="**/include/*.html"/> --- 164,176 ---- </fileset> <fileset dir="${this.web}"/> ! <classes dir="${nutch.root}/conf" > ! <exclude name="nutch-site.xml" /> ! <exclude name="**/*.template"/> ! </classes> ! <classes dir="${root}/conf"> ! <exclude name="**/*.template"/> ! <exclude name="nutch-site.xml" /> ! </classes> ! <classes file="${build.dir}/nutch-site.xml" /> <classes dir="${nutch.web}/locale"/> <zipfileset dir="${nutch.root}/build/docs" includes="**/include/*.html"/> |
From: Michael S. <sta...@us...> - 2005-11-04 18:20:22
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9197 Modified Files: project.xml Log Message: * project.xml Fix version variable. Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/project.xml,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** project.xml 4 Nov 2005 08:59:11 -0000 1.23 --- project.xml 4 Nov 2005 18:20:02 -0000 1.24 *************** *** 12,16 **** <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.5.0{version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> --- 12,16 ---- <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.5.0${version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> |
From: Michael S. <sta...@us...> - 2005-11-04 08:59:19
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30472 Modified Files: project.xml Log Message: * project.xml Move past release. Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/project.xml,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** project.xml 4 Nov 2005 08:51:13 -0000 1.22 --- project.xml 4 Nov 2005 08:59:11 -0000 1.23 *************** *** 12,16 **** <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.4.1${version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> --- 12,16 ---- <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.5.0{version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> |
From: Michael S. <sta...@us...> - 2005-11-04 08:51:24
|
Update of /cvsroot/archive-access/archive-access/projects/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv28694 Modified Files: project.xml Log Message: * project.xml Set version to be 0.4.1. Index: project.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/project.xml,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** project.xml 22 Oct 2005 02:43:23 -0000 1.21 --- project.xml 4 Nov 2005 08:51:13 -0000 1.22 *************** *** 12,16 **** <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.5.0${version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> --- 12,16 ---- <!-- The version of the project under development, e.g. 1.1, 1.2, 2.0-SNAPSHOT --> ! <currentVersion>0.4.1${version.build.suffix}</currentVersion> <!-- details about the organization that 'owns' the project --> |
From: Michael S. <sta...@us...> - 2005-11-04 08:30:10
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/xdocs In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24897/xdocs Modified Files: index.xml Log Message: * xdocs/index.xml Note on 0.4.1. release. Index: index.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/xdocs/index.xml,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** index.xml 22 Oct 2005 01:34:00 -0000 1.13 --- index.xml 4 Nov 2005 08:30:01 -0000 1.14 *************** *** 23,26 **** --- 23,29 ---- </section> <section name="News"> + <subsection name="Release 0.4.1 - 11/03/2005"> + <p>Bug fix for double encoding issue in NutchWAX 0.4.0.</p> + </subsection> <subsection name="Release 0.4.0 - 10/21/2005"> <p>NutchWAX 0.4.0 is built against Nutch-0.7. Lots of Bug Fixes |
From: Michael S. <sta...@us...> - 2005-11-04 08:28:47
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/articles In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24696/src/articles Modified Files: releasenotes.xml Log Message: * src/articles/releasenotes.xml Update release notes for release 0.4.1. Index: releasenotes.xml =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/articles/releasenotes.xml,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** releasenotes.xml 22 Oct 2005 03:08:47 -0000 1.4 --- releasenotes.xml 4 Nov 2005 08:28:38 -0000 1.5 *************** *** 12,18 **** </authorgroup> </articleinfo> <sect1 id="0_4_0"> ! <title>Release 0.4.0 - 10/10/21</title> <abstract> <para>Bug fixes.</para> --- 12,28 ---- </authorgroup> </articleinfo> + <sect1 id="0_4_1"> + <title>Release 0.4.1 - 11/04/05</title> + <abstract> + <para>Bug fix.</para> + </abstract> + <para>Fix encoding issue in 0.4.0: + <ulink url="https://sourceforge.net/tracker/index.php?func=detail&aid=1348019&group_id=118427&atid=681137">[1348019] [nutchwax] Double encoding of disallowed xml chars + </ulink> + </para> + </sect1> <sect1 id="0_4_0"> ! <title>Release 0.4.0 - 10/10/05</title> <abstract> <para>Bug fixes.</para> |
From: Michael S. <sta...@us...> - 2005-11-04 07:45:48
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12183/src/java/org/archive/access/nutch Modified Files: NutchwaxOpenSearchServlet.java Log Message: * src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java Revert patch to just check for bad xml -- no encoding as was suggested on nutch list (Adding the later means we double encode). Index: NutchwaxOpenSearchServlet.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/NutchwaxOpenSearchServlet.java,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** NutchwaxOpenSearchServlet.java 15 Oct 2005 01:18:56 -0000 1.5 --- NutchwaxOpenSearchServlet.java 4 Nov 2005 07:45:39 -0000 1.6 *************** *** 268,272 **** String name, String text) { Element child = doc.createElement(name); ! child.appendChild(doc.createTextNode(toValidXmlText(text))); parent.appendChild(child); } --- 268,272 ---- String name, String text) { Element child = doc.createElement(name); ! child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } *************** *** 275,279 **** String ns, String name, String text) { Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); ! child.appendChild(doc.createTextNode(toValidXmlText(text))); parent.appendChild(child); } --- 275,279 ---- String ns, String name, String text) { Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); ! child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } *************** *** 282,427 **** String name, String value) { Attr attribute = doc.createAttribute(name); ! attribute.setValue(value); node.getAttributes().setNamedItem(attribute); } ! /** ! * Escapes a string so that it can be safely put into an XML text node. ! * Please note that some characters cannot be serialized into an XML text ! * (Such characters are dropped from the String returned). Refer to ! * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#charsets">XML ! * specification</a> for more information. ! * ! * @param str The string to be escaped. ! * <code>IllegalArgumentException</code> is thrown when an unescapable ! * sequence of characters is encountered. Otherwise, the offending ! * characters will be omitted in the output. ! * @return A string that is safe to use in an XML element or attribute. The ! * xml 5 'special characters' are entity encoded if present and characters ! * outside of the legal range for xml documents will have been removed. ! * @author Dawid Weiss */ ! public static String toValidXmlText(final String str) ! { ! return toValidXmlText(str, false); } ! /** ! * Escapes a string so that it can be safely put into an XML text node. ! * Please note that some characters cannot be serialized into an XML text. ! * Refer to <a href="http://www.w3.org/TR/2000/REC-xml-20001006#charsets">XML ! * specification</a> for more information. ! * ! * @param str The string to be escaped. ! * @param exceptionOnUnescapable If true, ! * <code>IllegalArgumentException</code> is thrown when an unescapable ! * sequence of characters is encountered. Otherwise, the offending ! * characters will be omitted in the output. ! * @return A string that is safe to use in an XML element or attribute. The ! * xml 5 'special characters' are entity encoded if present and characters ! * outside of the legal range for xml documents will have been removed ! * (if <code>exceptionOnUnescapable</code> is true. ! * @author Dawid Weiss ! */ ! public static String toValidXmlText(final String str, ! final boolean exceptionOnUnescapable) ! { ! StringBuffer buffer = null; ! ! for (int i = 0; i < str.length(); i++) ! { ! char ch = str.charAt(i); ! String entity; ! ! switch (ch) ! { ! case '<': // '<' ! entity = "<"; ! ! break; ! ! case '>': // '>' ! entity = ">"; ! ! break; ! ! case '&': // '&' ! entity = "&"; ! ! break; ! ! case '\'': ! entity = "'"; ! ! break; ! ! case '"': ! entity = """; ! ! break; ! ! case 0x09: // valid xml characters ! case 0x0a: ! case 0x0d: ! entity = null; ! ! break; ! ! default: ! ! // check if valid XML characters ! if ( ! ((ch >= 0x20) && (ch <= 0xD7FF)) || ! ((ch >= 0xe000) && (ch <= 0xfffd)) || ! ((ch >= 0x10000) && (ch <= 0x10ffff)) ! ) ! { ! entity = null; ! ! break; ! } ! else ! { ! if (exceptionOnUnescapable) ! { ! throw new IllegalArgumentException( ! "Character is not within valid XML characters " + ! "(code: 0x" + Integer.toHexString(ch) + ! ", position: " + i + ")." ! ); ! } ! else ! { ! // replace the character with an empty string. ! entity = ""; ! ! break; ! } ! } ! } ! ! if (buffer == null) ! { ! if (entity != null) ! { ! buffer = new StringBuffer(str.length() + 20); ! buffer.append(str.substring(0, i)); ! buffer.append(entity); ! } ! } ! else ! { ! if (entity == null) ! { ! buffer.append(ch); ! } ! else ! { ! buffer.append(entity); ! } ! } } ! ! return (buffer != null) ? buffer.toString() : str; } } --- 282,330 ---- String name, String value) { Attr attribute = doc.createAttribute(name); ! attribute.setValue(getLegalXml(value)); node.getAttributes().setNamedItem(attribute); } ! /* ! * Ensure string is legal xml. ! * First look to see if string has illegal characters. If it doesn't, ! * just return it. Otherwise, create new string with illegal characters ! * @param text String to verify. ! * @return Passed <code>text</code> or a new string with illegal ! * characters removed if any found in <code>text</code>. ! * @see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char */ ! private static String getLegalXml(final String text) { ! if (text == null) { ! return null; ! } ! boolean allLegal = true; ! for (int i = 0; i < text.length(); i++) { ! if (!isLegalXml(text.charAt(i))) { ! allLegal = false; ! break; ! } ! } ! return allLegal? text: createLegalXml(text); } ! private static String createLegalXml(final String text) { ! if (text == null) { ! return null; } ! StringBuffer buffer = new StringBuffer(text.length()); ! for (int i = 0; i < text.length(); i++) { ! char c = text.charAt(i); ! if (isLegalXml(c)) { ! buffer.append(c); ! } ! } ! return buffer.toString(); ! } ! ! private static boolean isLegalXml(final char c) { ! return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff) ! || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff); } } + |
From: Sverre B. <sv...@us...> - 2005-11-03 13:54:43
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1094 Modified Files: index.php Log Message: Index: index.php =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/index.php,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** index.php 3 Nov 2005 13:27:18 -0000 1.12 --- index.php 3 Nov 2005 13:54:22 -0000 1.13 *************** *** 93,98 **** </table> ! ! <table border='1' cellpadding='0' cellspacing='0' width=90%> <tr> <td class="norm" colspan="4" align="left"><img alt='' height='8' src='/images/1px.gif' width='1'></td> --- 93,97 ---- </table> ! <table border='0' cellpadding='0' cellspacing='0' width=90%> <tr> <td class="norm" colspan="4" align="left"><img alt='' height='8' src='/images/1px.gif' width='1'></td> |
From: Sverre B. <sv...@us...> - 2005-11-03 13:27:33
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26343 Modified Files: index.php Log Message: RFE1346889 Google-like result presentation Index: index.php =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/index.php,v retrieving revision 1.11 retrieving revision 1.12 diff -C2 -d -r1.11 -r1.12 *** index.php 21 Oct 2005 10:59:23 -0000 1.11 --- index.php 3 Nov 2005 13:27:18 -0000 1.12 *************** *** 34,39 **** include_once ("lib/config.inc"); include ($conf_index_file); - include ($conf_includepath."/time.inc"); - include ($conf_includepath."/url.inc"); //if register_globals is off --- 34,37 ---- *************** *** 45,75 **** $start = $_REQUEST['start']; $debug = $_REQUEST['debug']; ! include($conf_includepath . "/header.inc"); ! ?> ! ! <script language="javascript"> ! function submitForm(v) { ! ! if (v == 2){ ! document.search.action = "<?php print $conf_advanced_search ?>"; ! document.search.submit(); ! }else{ ! document.search.action = "<?php print $conf_simple_search ?>"; ! document.search.submit(); ! } ! } ! </script> ! ! </HEAD> ! ! <body><center> ! ! <?php - if ($year_from == "" and $year_to == "") { $query_time = ""; --- 43,58 ---- $start = $_REQUEST['start']; $debug = $_REQUEST['debug']; ! $showall = $_REQUEST['showall']; + if (strpos($query, 'site:') !== false) { + $showall = TRUE; } + + include($conf_includepath . "/header.inc"); ! ?> ! </HEAD><body><center> <?php if ($year_from == "" and $year_to == "") { $query_time = ""; *************** *** 96,100 **** $year_from = $first_year; } - # $query_time = "date:[".$year_from."0101000000;".$year_to."0101000000] "; $query_time = "date:".$year_from."0101000000-".$year_to."0101000000 "; } --- 79,82 ---- *************** *** 107,125 **** <tr> <td class="norm" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> ! <td class="norm" align="left"><img alt="" src="<?php print $conf_logo;?>"></td> ! <td class="norm" align="right"> ! <?php ! ! ! //print "<a href=\"$conf_advanced_search?query=$query&querytype=$querytype&year_from=$year_from&year_to=$year_to\">"; ! //print nls("Advanced search") . "</a>"; ! ?> ! </td> ! <td class="norm" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> </tr> </table> ! <table border='0' cellpadding='0' cellspacing='0' width=90%> <tr> <td class="norm" colspan="4" align="left"><img alt='' height='8' src='/images/1px.gif' width='1'></td> --- 89,98 ---- <tr> <td class="norm" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> ! <td class="norm" colspan="5" align="left"><img alt="" src="<?php print $conf_logo;?>"></td> </tr> </table> ! <table border='1' cellpadding='0' cellspacing='0' width=90%> <tr> <td class="norm" colspan="4" align="left"><img alt='' height='8' src='/images/1px.gif' width='1'></td> *************** *** 140,148 **** <tr> <td class="shade" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> ! <?php ! ! $query = trim(stripslashes($query)); - ?> --- 113,118 ---- <tr> <td class="shade" width="10"><img alt='' height='1' src='/images/1px.gif' width="1"></td> ! <?php $query = trim(stripslashes($query)); ?> *************** *** 150,154 **** <form name='search' action=<? echo $_SERVER['PHP_SELF']; ?> method='get'> <input type='text' name='query' value='<?php print $query; ?>' class="searchtext" size="50"/> ! <input type='submit' value='<?php print(nls("Search"));?>' class="searchbutton" onClick="submitForm(0);"/> </td> </tr> --- 120,124 ---- <form name='search' action=<? echo $_SERVER['PHP_SELF']; ?> method='get'> <input type='text' name='query' value='<?php print $query; ?>' class="searchtext" size="50"/> ! <input type='submit' value='<?php print(nls("Search"));?>' class="searchbutton"/> </td> </tr> *************** *** 162,171 **** <input name='year_from' size=4 maxlength="4" type="text" value='<?php print $year_from;?>'/> - <input name='year_to' maxlength="4" value='<?php print $year_to; ?>' size=4 type="text"/> ! <?php ! if (isset ($debug)) { print "<input type=\"hidden\" name=\"debug\" value=\"$debug\">"; } ?> </td> --- 132,141 ---- <input name='year_from' size=4 maxlength="4" type="text" value='<?php print $year_from;?>'/> - <input name='year_to' maxlength="4" value='<?php print $year_to; ?>' size=4 type="text"/> ! <?php if (isset ($debug)) { print "<input type=\"hidden\" name=\"debug\" value=\"$debug\">"; } + ?> </td> *************** *** 176,180 **** </table> - <!-- ************************ Results: ****************************************************** --> <table align="center" class="greyborder" border="0" cellspacing="0" cellpadding="1" width="90%"> <tr> --- 146,149 ---- *************** *** 225,230 **** $search->setSizeOfResultSet($sizeofresultset); $search->setOffset($start -1); ! $search->setFieldsInResult("title url description archiveidentifier"); ! $search->setSupressDuplicates(); if ($search->doQuery()) { --- 194,204 ---- $search->setSizeOfResultSet($sizeofresultset); $search->setOffset($start -1); ! $search->setFieldsInResult("title url description archiveidentifier site"); ! if ($showall) { ! $search->setDedup(0); ! } ! else { ! $search->setDedup(1); ! } if ($search->doQuery()) { *************** *** 237,241 **** if ($total > 0) { print (nls("Total number of versions found")." : <b>$total</b>. "); ! print (nls("Displaying URL's")); print " <b>$start-$numhits</b>"; print "</td></tr>"; --- 211,220 ---- if ($total > 0) { print (nls("Total number of versions found")." : <b>$total</b>. "); ! if ($showall) { ! print (nls("Displaying URL's")); ! } ! else { ! print (nls("Displaying sites")); ! } print " <b>$start-$numhits</b>"; print "</td></tr>"; *************** *** 250,254 **** if ($conf_show_num_verions) { $search2 = new $conf_index_class (); ! $search2->unsetSupressDuplicates(); $search2->setSortorder("descending"); $search2->setSizeOfResultSet(1); --- 229,233 ---- if ($conf_show_num_verions) { $search2 = new $conf_index_class (); ! $search->setDedup(0); $search2->setSortorder("descending"); $search2->setSizeOfResultSet(1); *************** *** 303,307 **** $linkstring = "<a href=\"result.php?time=".$versions[1]['date']."&url=".index_encode($value["url"])."\">".nls("Timeline")."</a>"; $overview = "<a href=\"overview.php?url=".index_encode($value["url"])."\" >".nls("Overview")."</a>"; ! print "<b>".$linkstring." | ".$overview."</b>"; print "<br> <br>"; $last_hit = $key; --- 282,291 ---- $linkstring = "<a href=\"result.php?time=".$versions[1]['date']."&url=".index_encode($value["url"])."\">".nls("Timeline")."</a>"; $overview = "<a href=\"overview.php?url=".index_encode($value["url"])."\" >".nls("Overview")."</a>"; ! $morefromsite ="<a href=\"index.php?query=site:". $value['site'] . " " . $query . "\" >".nls("More from this site")."</a>"; ! print "<b> $linkstring | $overview"; ! if(!$showall) { ! print " | $morefromsite"; ! } ! print "</b>"; print "<br> <br>"; $last_hit = $key; *************** *** 328,332 **** if ($start > 1) { $prev_start = $start - $sizeofresultset; ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$prev_start." \"><< ".nls("Prev")."</a> | "; } --- 312,316 ---- if ($start > 1) { $prev_start = $start - $sizeofresultset; ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$prev_start."&showall=".$showall." \"><< ".nls("Prev")."</a> | "; } *************** *** 339,343 **** print $low_lim."-".$last_hit." | "; } else { ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$low_lim." \">".$low_lim."-".$last_hit."</a> | "; } break; --- 323,327 ---- print $low_lim."-".$last_hit." | "; } else { ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$low_lim."&showall=".$showall." \">".$low_lim."-".$last_hit."</a> | "; } break; *************** *** 346,355 **** print $low_lim."-".$high_lim." | "; } else { ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$low_lim." \">".$low_lim."-".$high_lim."</a> | "; } } } if ($hits_in_set == $sizeofresultset and $search->morepages) { ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$next_start."\">".nls("Next")." >></a>"; } print "</b>"; --- 330,342 ---- print $low_lim."-".$high_lim." | "; } else { ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$low_lim."&showall=".$showall." \">".$low_lim."-".$high_lim."</a> | "; } } } if ($hits_in_set == $sizeofresultset and $search->morepages) { ! print " <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$next_start."&showall=".$showall."\">".nls("Next")." >></a>"; ! } ! if (!$showall) { ! print " | <a href=\"".$_SERVER['PHP_SELF']."?".$url_querypart."&start=".$start."&showall=true\">".nls("Show all")."</a>"; } print "</b>"; *************** *** 393,402 **** <?php include($conf_includepath . "/footer.inc"); ! ?> ! ! ! ! ! ! ! --- 380,382 ---- <?php include($conf_includepath . "/footer.inc"); ! ?> \ No newline at end of file |
From: Sverre B. <sv...@us...> - 2005-11-03 13:26:18
|
Update of /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25617/lib Modified Files: url.inc Log Message: tidy Index: url.inc =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/wera/src/webapps/wera/lib/url.inc,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** url.inc 4 Oct 2005 22:59:27 -0000 1.1 --- url.inc 3 Nov 2005 13:25:46 -0000 1.2 *************** *** 1,3 **** --- 1,4 ---- <?php + /* * This file is part of WERA. *************** *** 38,96 **** // string combineUrl(string $orig_url, string $url) - combines an absolute url with a relative link - - // Strips portnumber if 80 ! function stripPort($url) ! { ! $url = str_replace(":80/","/",$url); return $url; ! } // Strips http:// ! function stripProtocol($url) ! { ! $url = eregi_replace ("^http://","",$url); return $url; ! } ! // Example url: http://www.domain.com/path/index.html // returns true if it is :] ! function isAbsolute($url) ! { ! if (eregi ("^http://", $url) ) ! { return true; ! } ! else ! { return false; - } } // Returns document name: index.html ! function getDocname($url) ! { ! eregi ("[^/]*$", $url, $matches); ! if ($matches != "") ! { #print "Matches: " . $matches[0] . "<br>\n"; ! return $matches[0]; ! } ! else ! return false; ! } // Returns domain-name: www.domain.com // !! Only if the URL starts with http:// ! function getDomain($url) ! { ! if (isAbsolute($url)) ! { ! $url = eregi_replace ("^http://", "", $url); ! eregi ("^[^/]*", $url, $matches); return $matches[0]; ! } ! else { return false;} } // Returns path: /path/ usage: getDomain("someurl"[,true|false][,true|false]) --- 39,84 ---- // string combineUrl(string $orig_url, string $url) - combines an absolute url with a relative link // Strips portnumber if 80 ! function stripPort($url) { ! $url = str_replace(":80/", "/", $url); return $url; ! } // Strips http:// ! function stripProtocol($url) { ! $url = eregi_replace("^http://", "", $url); return $url; ! } // Example url: http://www.domain.com/path/index.html // returns true if it is :] ! function isAbsolute($url) { ! if (eregi("^http://", $url)) { return true; ! } else { return false; } + } // Returns document name: index.html ! function getDocname($url) { ! eregi("[^/]*$", $url, $matches); ! if ($matches != "") { #print "Matches: " . $matches[0] . "<br>\n"; ! return $matches[0]; ! } else ! return false; ! } // Returns domain-name: www.domain.com // !! Only if the URL starts with http:// ! function getDomain($url) { ! if (isAbsolute($url)) { ! $url = eregi_replace("^http://", "", $url); ! eregi("^[^/]*", $url, $matches); return $matches[0]; ! } else { ! return false; } + } // Returns path: /path/ usage: getDomain("someurl"[,true|false][,true|false]) *************** *** 98,106 **** // Set noendslash to true if you don't want the end slash returned // Otherwise, don't pass the the two last parameters ! function getPath($url,$nostartslash=FALSE, $noendslash=FALSE) ! { ! //If url field is empty - return a null field. ! if ($url==""){return $url;} $domain = getDomain($url); --- 86,95 ---- // Set noendslash to true if you don't want the end slash returned // Otherwise, don't pass the the two last parameters ! function getPath($url, $nostartslash = FALSE, $noendslash = FALSE) { ! //If url field is empty - return a null field. ! if ($url == "") { ! return $url; ! } $domain = getDomain($url); *************** *** 108,120 **** #print "Domain: $domain<br>\n"; #print "Docname: $docname<br>\n"; ! $url = eregi_replace ( "^http://", "", $url); ! $url = eregi_replace ( "$domain", "", $url); if ($docname) ! $url = eregi_replace ( "$docname", "", $url); ! if ($nostartslash) { $url = eregi_replace ( "^/*", "", $url ); } ! if ($noendslash) { $url = eregi_replace ( "/*$", "", $url ); } #print "Returned path: $url<br>\n"; return $url; ! } // Returns: --- 97,113 ---- #print "Domain: $domain<br>\n"; #print "Docname: $docname<br>\n"; ! $url = eregi_replace("^http://", "", $url); ! $url = eregi_replace("$domain", "", $url); if ($docname) ! $url = eregi_replace("$docname", "", $url); ! if ($nostartslash) { ! $url = eregi_replace("^/*", "", $url); ! } ! if ($noendslash) { ! $url = eregi_replace("/*$", "", $url); ! } #print "Returned path: $url<br>\n"; return $url; ! } // Returns: *************** *** 122,169 **** // 0 If url is relative to domain (starts with /) // 1 If url is relative to domain/path (doesn't start with /) ! function isRelative($url) ! { ! if (isAbsolute($url)) { return -1; } ! elseif ( eregi ( "^/", $url)) { return false; } ! else { return true;} } // Takes original url and a relative url and combines them to an absolute url ! function combineUrl($l_url,$o_url) ! { $query = $l_url; if (!isAbsolute($l_url)) // If relative ! { ! // Her m� det gj�res litt av hvert for � f� rett streng til s�ket. ! if (isRelative($l_url)) // Relative to domain/path/ ! { ! $domain = getDomain($o_url); #print "Parameter for getPath: $o_url<br>\n"; ! $orig_path = getPath($o_url); #print "Orig_path: $orig_path<br>\n"; ! while ( eregi ("^\.\.\/", $l_url)) ! { #print $orig_path; ! $orig_path = eregi_replace ("[^/]*\/$","",$orig_path); ! $orig_path = $orig_path . "/"; ! $orig_path = eregi_replace ("\/{2}","/",$orig_path); ! $l_url = eregi_replace ("^\.\.\/", "", $l_url); #print " -> " . $orig_path; ! } ! $l_url = eregi_replace ("^(\.\/)(\.\/)*", "", $l_url); ! $query = "http://" . $domain . $orig_path . $l_url; #print $query; ! } ! else // It is relative to domain only ! { ! $domain = getDomain($o_url); ! $orig_path = getPath($o_url); ! $query = "http://" . $domain . $l_url; ! } ! } return $query; ! } ! ?> --- 115,163 ---- // 0 If url is relative to domain (starts with /) // 1 If url is relative to domain/path (doesn't start with /) ! function isRelative($url) { ! if (isAbsolute($url)) { ! return -1; } + elseif (eregi("^/", $url)) { + return false; + } else { + return true; + } + } // Takes original url and a relative url and combines them to an absolute url ! function combineUrl($l_url, $o_url) { $query = $l_url; if (!isAbsolute($l_url)) // If relative ! { ! // Her m� det gj�res litt av hvert for � f� rett streng til s�ket. ! if (isRelative($l_url)) // Relative to domain/path/ ! { ! $domain = getDomain($o_url); #print "Parameter for getPath: $o_url<br>\n"; ! $orig_path = getPath($o_url); #print "Orig_path: $orig_path<br>\n"; ! while (eregi("^\.\.\/", $l_url)) { #print $orig_path; ! $orig_path = eregi_replace("[^/]*\/$", "", $orig_path); ! $orig_path = $orig_path."/"; ! $orig_path = eregi_replace("\/{2}", "/", $orig_path); ! $l_url = eregi_replace("^\.\.\/", "", $l_url); #print " -> " . $orig_path; ! } ! $l_url = eregi_replace("^(\.\/)(\.\/)*", "", $l_url); ! $query = "http://".$domain.$orig_path.$l_url; #print $query; ! } else // It is relative to domain only ! { ! $domain = getDomain($o_url); ! $orig_path = getPath($o_url); ! $query = "http://".$domain.$l_url; ! } ! } return $query; ! } ?> + |