You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(1) |
Aug
(10) |
Sep
(36) |
Oct
(339) |
Nov
(103) |
Dec
(152) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(141) |
Feb
(102) |
Mar
(125) |
Apr
(203) |
May
(57) |
Jun
(30) |
Jul
(139) |
Aug
(46) |
Sep
(64) |
Oct
(105) |
Nov
(34) |
Dec
(162) |
2007 |
Jan
(81) |
Feb
(57) |
Mar
(141) |
Apr
(72) |
May
(9) |
Jun
(1) |
Jul
(144) |
Aug
(88) |
Sep
(40) |
Oct
(43) |
Nov
(34) |
Dec
(20) |
2008 |
Jan
(44) |
Feb
(45) |
Mar
(16) |
Apr
(36) |
May
(8) |
Jun
(77) |
Jul
(177) |
Aug
(66) |
Sep
(8) |
Oct
(33) |
Nov
(13) |
Dec
(37) |
2009 |
Jan
(2) |
Feb
(5) |
Mar
(8) |
Apr
|
May
(36) |
Jun
(19) |
Jul
(46) |
Aug
(8) |
Sep
(1) |
Oct
(66) |
Nov
(61) |
Dec
(10) |
2010 |
Jan
(13) |
Feb
(16) |
Mar
(38) |
Apr
(76) |
May
(47) |
Jun
(32) |
Jul
(35) |
Aug
(45) |
Sep
(20) |
Oct
(61) |
Nov
(24) |
Dec
(16) |
2011 |
Jan
(22) |
Feb
(34) |
Mar
(11) |
Apr
(8) |
May
(24) |
Jun
(23) |
Jul
(11) |
Aug
(42) |
Sep
(81) |
Oct
(48) |
Nov
(21) |
Dec
(20) |
2012 |
Jan
(30) |
Feb
(25) |
Mar
(4) |
Apr
(6) |
May
(1) |
Jun
(5) |
Jul
(5) |
Aug
(8) |
Sep
(6) |
Oct
(6) |
Nov
|
Dec
|
Revision: 3180 http://archive-access.svn.sourceforge.net/archive-access/?rev=3180&view=rev Author: bradtofel Date: 2010-07-20 23:52:12 +0000 (Tue, 20 Jul 2010) Log Message: ----------- TWEAK: somewhat undoing wrapper code, at least allowing it to be optional Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2010-07-20 23:51:47 UTC (rev 3179) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2010-07-20 23:52:12 UTC (rev 3180) @@ -142,8 +142,13 @@ } } if(!handled) { - uiResults.forwardWrapped(httpRequest, httpResponse, - errorJsp, wbRequest.getAccessPoint().getWrapperJsp()); + String wrapperJsp = wbRequest.getAccessPoint().getWrapperJsp(); + if(wrapperJsp != null) { + uiResults.forwardWrapped(httpRequest, httpResponse, + errorJsp, wrapperJsp); + } else { + uiResults.forward(httpRequest, httpResponse, errorJsp); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 23:51:54
|
Revision: 3179 http://archive-access.svn.sourceforge.net/archive-access/?rev=3179&view=rev Author: bradtofel Date: 2010-07-20 23:51:47 +0000 (Tue, 20 Jul 2010) Log Message: ----------- BUGFIX: was not properly setting the User-Agent on output requests Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-07-20 23:50:57 UTC (rev 3178) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-07-20 23:51:47 UTC (rev 3179) @@ -88,7 +88,8 @@ manager.getParams().setSoTimeout(socketTimeoutMS); http.setHttpConnectionManager(manager); HttpClientParams clientParams = new HttpClientParams(); - clientParams.setParameter("http.useragent", userAgent); +// LOGGER.warn("Setting HTTP UserAgent to " + userAgent); +// clientParams.setParameter("http.useragent", userAgent); return http; } }; @@ -135,6 +136,7 @@ HttpClient client = getHttpClient(); getMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES); getMethod.setFollowRedirects(false); + getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); ByteOp.discardStream(getMethod.getResponseBodyAsStream()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 23:51:03
|
Revision: 3178 http://archive-access.svn.sourceforge.net/archive-access/?rev=3178&view=rev Author: bradtofel Date: 2010-07-20 23:50:57 +0000 (Tue, 20 Jul 2010) Log Message: ----------- TWEAK: somewhat undoing wrapper code, at least allowing it to be optional Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2010-07-20 23:50:10 UTC (rev 3177) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2010-07-20 23:50:57 UTC (rev 3178) @@ -61,8 +61,13 @@ if(wbRequest.isXMLMode()) { uiResults.forward(httpRequest, httpResponse, xmlCaptureJsp); } else { - uiResults.forwardWrapped(httpRequest, httpResponse, - captureJsp, wbRequest.getAccessPoint().getWrapperJsp()); + String wrapperJsp = wbRequest.getAccessPoint().getWrapperJsp(); + if(wrapperJsp != null) { + uiResults.forwardWrapped(httpRequest, httpResponse, + captureJsp, wrapperJsp); + } else { + uiResults.forward(httpRequest, httpResponse, captureJsp); + } } } @@ -78,8 +83,14 @@ if(wbRequest.isXMLMode()) { uiResults.forward(httpRequest, httpResponse, xmlUrlJsp); } else { - uiResults.forwardWrapped(httpRequest, httpResponse, - urlJsp,wbRequest.getAccessPoint().getWrapperJsp()); + String wrapperJsp = wbRequest.getAccessPoint().getWrapperJsp(); + if(wrapperJsp != null) { + + uiResults.forwardWrapped(httpRequest, httpResponse, + urlJsp,wbRequest.getAccessPoint().getWrapperJsp()); + } else { + uiResults.forward(httpRequest, httpResponse, urlJsp); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 23:50:17
|
Revision: 3177 http://archive-access.svn.sourceforge.net/archive-access/?rev=3177&view=rev Author: bradtofel Date: 2010-07-20 23:50:10 +0000 (Tue, 20 Jul 2010) Log Message: ----------- Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2010-07-20 23:49:44 UTC (rev 3176) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2010-07-20 23:50:10 UTC (rev 3177) @@ -36,7 +36,7 @@ <div id="positionHome"> <section> <div id="logoHome"> - <h1><span>Internet Archive's Wayback Machine</span></h1> + <a href="/index.jsp"><h1><span>Internet Archive's Wayback Machine</span></h1></a> </div> </section> <section> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3176 http://archive-access.svn.sourceforge.net/archive-access/?rev=3176&view=rev Author: bradtofel Date: 2010-07-20 23:49:44 +0000 (Tue, 20 Jul 2010) Log Message: ----------- FEATURE: add CaptureSearchResults to the exception if the problem is ResourceNotAvailable, allowing jsp to offer alternate versions. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java 2010-07-20 23:48:56 UTC (rev 3175) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/ResourceNotAvailableException.java 2010-07-20 23:49:44 UTC (rev 3176) @@ -26,6 +26,9 @@ import javax.servlet.http.HttpServletResponse; +import org.archive.wayback.core.CaptureSearchResults; +import org.archive.wayback.core.SearchResults; + /** * Exception class for queries which matching resource is not presently * accessible @@ -39,6 +42,7 @@ */ private static final long serialVersionUID = 1L; protected static final String ID = "resourceNotAvailable"; + private CaptureSearchResults results; /** * Constructor @@ -65,4 +69,13 @@ public int getStatus() { return HttpServletResponse.SC_SERVICE_UNAVAILABLE; } + /** + * @param results + */ + public void setCaptureSearchResults(CaptureSearchResults results) { + this.results = results; + } + public CaptureSearchResults getCaptureSearchResults() { + return results; + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 23:49:02
|
Revision: 3175 http://archive-access.svn.sourceforge.net/archive-access/?rev=3175&view=rev Author: bradtofel Date: 2010-07-20 23:48:56 +0000 (Tue, 20 Jul 2010) Log Message: ----------- FEATURE: add CaptureSearchResults to the exception if the problem is ResourceNotAvailable, allowing jsp to offer alternate versions. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-07-20 19:17:09 UTC (rev 3174) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-07-20 23:48:56 UTC (rev 3175) @@ -329,8 +329,13 @@ CaptureSearchResult closest = captureResults.getClosest(wbRequest, isUseAnchorWindow()); closest.setClosest(true); - resource = - getCollection().getResourceStore().retrieveResource(closest); + try { + resource = + getCollection().getResourceStore().retrieveResource(closest); + } catch (ResourceNotAvailableException rnae) { + rnae.setCaptureSearchResults((CaptureSearchResults)results); + throw rnae; + } p.retrieved(); ReplayRenderer renderer = getReplay().getRenderer(wbRequest, closest, resource); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 19:17:16
|
Revision: 3174 http://archive-access.svn.sourceforge.net/archive-access/?rev=3174&view=rev Author: bradtofel Date: 2010-07-20 19:17:09 +0000 (Tue, 20 Jul 2010) Log Message: ----------- Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp 2010-07-19 23:47:20 UTC (rev 3173) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp 2010-07-20 19:17:09 UTC (rev 3174) @@ -32,12 +32,10 @@ long totalCaptures = uResults.getMatchingCount(); %> -<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js" type="text/javascript"></script> -<script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.1/jquery-ui.min.js" type="text/javascript"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/jquery-1.4.2.min.js"></script> <script type="text/javascript" src="<%= staticPrefix %>js/jquery.dataTables.min.js" charset="utf-8"></script> <script type="text/javascript"> $().ready(function(){ - $(".dataTables_processing").show(); $('#resultsUrl th.url span').html(' ↑'); $('#resultsUrl th').mouseup(function(){ \$('#resultsUrl th span').html(''); @@ -81,14 +79,11 @@ }); } }); -$(window).load(function(){ - -}); </script> <div id="positionHome"> <section> <div id="logoHome"> - <h1><span>Internet Archive's Wayback Machine</span></h1> + <a href="/index.jsp"><h1><span>Internet Archive's Wayback Machine</span></h1></a> </div> </section> <section> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-19 23:47:26
|
Revision: 3173 http://archive-access.svn.sourceforge.net/archive-access/?rev=3173&view=rev Author: bradtofel Date: 2010-07-19 23:47:20 +0000 (Mon, 19 Jul 2010) Log Message: ----------- Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp 2010-07-19 23:45:08 UTC (rev 3172) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp 2010-07-19 23:47:20 UTC (rev 3173) @@ -139,16 +139,16 @@ var actualsize = $(this).find(".hidden").text(); var size = actualsize * 12; var offset = size / 2; - if (actualsize == 1) {size = 20, offset = 10;} - else if (actualsize == 2) {size = 30, offset = 15;} - else if (actualsize == 3) {size = 40, offset = 20;} - else if (actualsize == 4) {size = 50, offset = 25;} - else if (actualsize == 5) {size = 60, offset = 30;} - else if (actualsize == 6) {size = 70, offset = 35;} - else if (actualsize == 7) {size = 80, offset = 40;} - else if (actualsize == 8) {size = 90, offset = 45;} - else if (actualsize == 9) {size = 100, offset = 50;} - else if (actualsize >= 10) {size = 110, offset = 55;} + if (actualsize == 1) {size = 30, offset = 15;} + else if (actualsize == 2) {size = 40, offset = 20;} + else if (actualsize == 3) {size = 50, offset = 25;} + else if (actualsize == 4) {size = 60, offset = 30;} + else if (actualsize == 5) {size = 70, offset = 35;} + else if (actualsize == 6) {size = 80, offset = 40;} + else if (actualsize == 7) {size = 90, offset = 45;} + else if (actualsize == 8) {size = 100, offset = 50;} + else if (actualsize == 9) {size = 110, offset = 55;} + else if (actualsize >= 10) {size = 120, offset = 60;} $(this).find("img").attr("src","<%= staticPrefix %>images/blueblob-dk.png"); $(this).find(".measure").css({'width':+size+'px','height':+size+'px','top':'-'+offset+'px','left':'-'+offset+'px'}); }); @@ -163,22 +163,22 @@ $(".tooltip").bt({ positions: ['top','right','left','bottom'], contentSelector: "$(this).find('.pop').html()", - padding: '0', - width: '145px', - spikeGirth: 12, - spikeLength: 12, - overlap: '2px', + padding: 0, + width: '115px', + spikeGirth: 8, + spikeLength: 8, + overlap: 0, cornerRadius: 5, fill: '#efefef', strokeWidth: 1, strokeStyle: '#efefef', shadow: true, shadowColor: '#333', - shadowBlur: 6, + shadowBlur: 5, shadowOffsetX: 0, shadowOffsetY: 0, noShadowOpts: {strokeStyle:'#ccc'}, - hoverIntentOpts: {interval:0,timeout:4000}, + hoverIntentOpts: {interval:60,timeout:3500}, clickAnywhereToClose: true, closeWhenOthersOpen: true, windowMargin: 30, @@ -190,9 +190,7 @@ color: '#333' } }); -}); -$().ready(function(){ var yrCount = $(".wbChartThisContainer").size(); var yrTotal = <%= yearWidth %> * yrCount; var yrPad = (930 - yrTotal) / 2; @@ -218,8 +216,8 @@ </form> <div id="wbMeta"> - <p class="wbThis"><a href="<%= data.searchUrlForHTML %>"><%= data.searchUrlForHTML %></a> has been crawled <strong><%= fmt.format("{0} times",data.numResults) %></strong> going all the way back to <a href="firstcapture"><%= fmt.format("{0,date,MMM dd yyyy}",data.firstResultDate) %></a>.</p> - <p class="wbNote">A crawl can be a duplicate of the last one. It happens about [num]% of the time across [NUM] websites. <a href="FAQ">FAQ</a></p> + <p class="wbThis"><a href="<%= data.searchUrlForHTML %>"><%= data.searchUrlForHTML %></a> has been crawled <strong><%= fmt.format("{0} times",data.numResults) %></strong> going all the way back to <a href="firstcapture"><%= fmt.format("{0,date,MMMM d, yyyy}",data.firstResultDate) %></a>.</p> + <p class="wbNote">A crawl can be a duplicate of the last one. It happens about [num]% of the time across [NUM] websites. <a href="https://webarchive.jira.com/wiki/display/WWMOS/FAQs">FAQ</a></p> </div> </div> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-19 23:45:15
|
Revision: 3172 http://archive-access.svn.sourceforge.net/archive-access/?rev=3172&view=rev Author: bradtofel Date: 2010-07-19 23:45:08 +0000 (Mon, 19 Jul 2010) Log Message: ----------- Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp 2010-07-16 20:26:03 UTC (rev 3171) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp 2010-07-19 23:45:08 UTC (rev 3172) @@ -9,9 +9,11 @@ <%@ page import="org.archive.wayback.core.CaptureSearchResult" %> <%@ page import="org.archive.wayback.core.CaptureSearchResults" %> <%@ page import="org.archive.wayback.core.UIResults" %> +<%@ page import="org.archive.wayback.core.WaybackRequest" %> <%@ page import="org.archive.wayback.partition.BubbleCalendarData" %> <%@ page import="org.archive.wayback.util.partition.Partition" %> <%@ page import="org.archive.wayback.util.StringFormatter" %> +<jsp:include page="/WEB-INF/global-template/UI-header.jsp" flush="true" /> <jsp:include page="/WEB-INF/template/CookieJS.jsp" flush="true" /> <% UIResults results = UIResults.extractCaptureQuery(request); @@ -31,10 +33,10 @@ } // graph size "constants": These are currently baked-in to the JS logic... -int imgWidth = 735; +int imgWidth = 915; int imgHeight = 75; -int yearWidth = 49; -int monthWidth = 4; +int yearWidth = 61; +int monthWidth = 5; BubbleCalendarData data = new BubbleCalendarData(results); @@ -45,8 +47,7 @@ Calendar cal = BubbleCalendarData.getUTCCalendar(); %> -<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js" type="text/javascript"></script> -<script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.1/jquery-ui.min.js" type="text/javascript"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/jquery-1.4.2.min.js"></script> <script type="text/javascript" src="<%= staticPrefix %>js/excanvas.compiled.js"></script> <script type="text/javascript" src="<%= staticPrefix %>js/jquery.bt.min.js" charset="utf-8"></script> <script type="text/javascript" src="<%= staticPrefix %>js/jquery.hoverintent.min.js" charset="utf-8"></script> @@ -126,7 +127,7 @@ zeroPad(monthOfYear+1,2) + zeroPad(day,2) + "000000"; - var url = wbPrefix + dateString + '*/' + wbCurrentUrl; + var url = "<%= queryPrefix %>" + dateString + '*/' + wbCurrentUrl; document.getElementById('wm-graph-anchor').href = url; setActiveYear(year); } @@ -135,20 +136,29 @@ <script type="text/javascript"> $().ready(function(){ $(".date").each(function(i){ - var size = $(this).find(".hidden").text(); + var actualsize = $(this).find(".hidden").text(); + var size = actualsize * 12; var offset = size / 2; - if (size >= 1 && size <= 20) {size = 20, offset = 10;} + if (actualsize == 1) {size = 20, offset = 10;} + else if (actualsize == 2) {size = 30, offset = 15;} + else if (actualsize == 3) {size = 40, offset = 20;} + else if (actualsize == 4) {size = 50, offset = 25;} + else if (actualsize == 5) {size = 60, offset = 30;} + else if (actualsize == 6) {size = 70, offset = 35;} + else if (actualsize == 7) {size = 80, offset = 40;} + else if (actualsize == 8) {size = 90, offset = 45;} + else if (actualsize == 9) {size = 100, offset = 50;} + else if (actualsize >= 10) {size = 110, offset = 55;} $(this).find("img").attr("src","<%= staticPrefix %>images/blueblob-dk.png"); $(this).find(".measure").css({'width':+size+'px','height':+size+'px','top':'-'+offset+'px','left':'-'+offset+'px'}); }); $(".day a").each(function(i){ var dateClass = $(this).attr("class"); var dateId = "#"+dateClass; - $(this).hover(function(){ - $(dateId).removeClass("opacity20"); - },function(){ - $(dateId).addClass("opacity20"); - }); + $(this).hover( + function(){$(dateId).removeClass("opacity20");}, + function(){$(dateId).addClass("opacity20");} + ); }); $(".tooltip").bt({ positions: ['top','right','left','bottom'], @@ -181,183 +191,7 @@ } }); }); -</script> -<style type="text/css"> -body,div,p,td,th,ul,ol,li {margin:0;padding:0;} -body {background-color:#fff;font-family:"Arial","Helvetica Neue","Helvetica",sans-serif;font-size:100%;} -img {border:none;} -a {color:#069;} -.clearfix{width:100%;clear:both;} -.clearfix:after {content:".";display:block;height:0;clear:both;visibility:hidden;} -#position {padding:0;margin:0 auto;width:990px;background-color:#fff;} -#wbCalendar {position:relative;width:990px;margin-top:25px;} -.calPosition {padding:15px 0 25px 25px;} -#calUnder {overflow:hidden;} -#calOver {position:absolute;top:0;left:0;} -.hidden{display:none;} -.opacity20 { - opacity:.2; - -ms-filter:"progid:DXImageTransform.Microsoft.Alpha(Opacity=20)"; - filter: alpha(opacity=20); -} -.month { - width: 240px; - height: 210px; - float: left; -} -.month table { - border-collapse: collapse; - font-family: "Arial", sans-serif; - border-spacing: 1px; -} -.month table th { - font-size: 0.75em; - font-weight: 700; - text-transform: uppercase; - padding: 6px; -} -.month table span.label { - display: block; - min-height: 20px; -} -.month table td { - padding: 0; - vertical-align: middle; - color: #666; -} -.month table td .day { - width: 30px; - height: 30px; - text-align: center; -} -.month table td .day a, -.month table td .day span { - display: block; - font-size: 0.6875em; - width: 30px; - height: 22px; - padding-top: 8px; -} -.month table td .day a { - color: #000; - font-weight: 700; - text-decoration: none; -} -.month table td .day span { - padding-top: 9px; - height: 19px; -} -.month table td .day a:hover { - font-size: 0.9375em; - padding-top: 6px; - height: 22px; -} -.month table td .date { - width: 30px; - height: 30px; -} -.month table td .position { - position: relative; - top: 15px; - left: 15px; - width: 1px; - height: 1px; -} -.month table td .measure { - position: absolute; -} -.activeHighlight { - background-color: #000!important; - padding-top: 4px; - font-size: 1.375em!important; - color: #fff300!important; - font-weight: normal!important; - cursor: pointer; -} -.inactiveHighlight { - background-color: #fff!important; - padding-top: 4px; - font-size: .75em!important; - color: #000!important; - font-weight: normal!important; - cursor: pointer; -} -.bt-content { - text-align: left; -} -.pop {display:none;} -.bt-content h3 {font-size: 1em;font-weight: 700;text-transform: uppercase;margin:0 0 5px;} -.bt-content p {font-size: 0.875em;margin: 5px 0;color:#666;} -.bt-content ul {line-height:1.5em;margin:0 0 0 1em;} -.bt-content em {color:#999;} -.bt-content a:hover {color:#036;} - -#wbSearch {float:left;padding:30px 30px 0;} -#wbSearch #logo {float:left; width:223px;} -#wbSearch #form {float:left;width:707px;} -#wbSearch form {margin:0;padding:0;} -#wbSearch input {font-family:"Arial","Helvetica Neue","Helvetica",sans-serif;font-size:1.125em;} -#wbSearch input[type=text] {width:450px;font-weight:700;} -#wbSearch input[type=submit] {vertical-align: middle;} -#wbMeta {padding:15px 0;} -#wbMeta p {margin:0 0 2px;padding:0;} -#wbMeta p.wbThis {font-size:0.75em;} -#wbMeta p.wbNote {color:#666;font-size: 0.6875em;} -#wbMeta p.wbNote a {color:#666;} -#wbChart {text-align:center;padding:0 30px;} -#wbChartThis {position:relative;margin:0 auto;} -.wbChartThisContainer,.wbChartHover {width:<%= yearWidth %>px;height:30px;float:left;overflow:visible;} -.wbChartThisTop { - width: <%= yearWidth %>px; - height: 80px; - border: 1px solid #ccc; -} -.wbGradient { - background: #f3f3f3 -moz-linear-gradient(top,#ffffff,#f3f3f3); - background: #f3f3f3 -webkit-gradient(linear, left top, left bottom, from(#fff), to(#f3f3f3), color-stop(1.0, #f3f3f3)); - background-color: #f3f3f3; - filter: progid:DXImageTransform.Microsoft.Gradient(enabled='true',startColorstr=#FFFFFFFF, endColorstr=#FFF3F3F3); -} -.wbSelected, #wbSelected { - background: #fff300!important; - border-bottom: 1px solid #000!important; - filter: progid:DXImageTransform.Microsoft.Gradient(enabled='false')!important; - cursor: pointer; -} -#wbSelected { - cursor: default!important; -} -.wbChartThisBtm { - text-align:center; -} -.wbChartSm { - padding-top: 4px; - font-size: 0.625em; - color: #999; - font-weight: 700; -} -.wbChartBig, #wbChartBig { - background-color: #000!important; - padding-top: 4px; - font-size: 1.375em!important; - color: #fff300!important; - font-weight: normal!important; - cursor: pointer; -} -#wbChartBig { - cursor: default!important; -} -#wbChartGraph,#wbChartOver { - position: absolute; - top: 1px; - left: 1px; - cursor: pointer; -} - -</style> - -<script type="text/javascript"> $().ready(function(){ var yrCount = $(".wbChartThisContainer").size(); var yrTotal = <%= yearWidth %> * yrCount; @@ -366,14 +200,41 @@ }); </script> -<div id="wbChart"> +<div id="position"> + + + <div id="wbSearch"> + <div id="logo"> + <a href="/index.jsp"><img src="<%= staticPrefix %>images/logo_WM.png" alt="logo: Internet Archive's Wayback Machine" width="183" height="65" /></a> + </div> + + <div id="form"> + + <form name="form1" method="get" action="<%= queryPrefix %>query"> + <input type="hidden" name="<%= WaybackRequest.REQUEST_TYPE %>" value="<%= WaybackRequest.REQUEST_CAPTURE_QUERY %>"> + <input type="text" name="<%= WaybackRequest.REQUEST_URL %>" value="<%= data.searchUrlForHTML %>" size="40"> + <input type="submit" name="Submit" value="Go Wayback!"/> + </form> + + <div id="wbMeta"> + <p class="wbThis"><a href="<%= data.searchUrlForHTML %>"><%= data.searchUrlForHTML %></a> has been crawled <strong><%= fmt.format("{0} times",data.numResults) %></strong> going all the way back to <a href="firstcapture"><%= fmt.format("{0,date,MMM dd yyyy}",data.firstResultDate) %></a>.</p> + <p class="wbNote">A crawl can be a duplicate of the last one. It happens about [num]% of the time across [NUM] websites. <a href="FAQ">FAQ</a></p> + </div> + </div> + + </div> + + <div class="clearfix"></div> + + <div id="wbChart" onmouseout="showTrackers('none'); setActiveYear(startYear);"> + <div id="wbChartThis"> <a style="position:relative; white-space:nowrap; width:<%= imgWidth %>px;height:<%= imgHeight %>px;" href="" id="wm-graph-anchor"> <div id="wm-ipp-sparkline" style="position:relative; white-space:nowrap; width:<%= imgWidth %>px;height:<%= imgHeight %>px;background: #f3f3f3 -moz-linear-gradient(top,#ffffff,#f3f3f3);background: #f3f3f3 -webkit-gradient(linear, left top, left bottom, from(#fff), to(#f3f3f3), color-stop(1.0, #f3f3f3));background-color: #f3f3f3;filter: progid:DXImageTransform.Microsoft.Gradient(enabled='true',startColorstr=#FFFFFFFF, endColorstr=#FFF3F3F3);cursor:pointer;border: 1px solid #ccc;border-left:none;" title="<%= fmt.format("ToolBar.sparklineTitle") %>"> <img id="sparklineImgId" style="position:absolute;z-index:9012;top:0;left:0;" onmouseover="showTrackers('inline');" - onmouseout="showTrackers('none');" + onmousemove="trackMouseMove(event,this)" alt="sparklines" width="<%= imgWidth %>" @@ -385,13 +246,13 @@ width="<%= yearWidth %>" height="<%= imgHeight %>" border="0" - src="<%= staticPrefix %>images/toolbar/transp-yellow-pixel.png"></img> + src="<%= staticPrefix %>images/toolbar/yellow-pixel.png"></img> <img id="wbMouseTrackMonthImg" style="display:none; position:absolute; z-index:9011; " width="<%= monthWidth %>" height="<%= imgHeight %>" border="0" - src="<%= staticPrefix %>images/toolbar/transp-red-pixel.png"></img> + src="<%= staticPrefix %>images/toolbar/transp-black-pixel.png"></img> </div> </a> <% @@ -406,7 +267,6 @@ <div id="highlight-<%= i - 1996 %>" onmouseover="showTrackers('inline'); setActiveYear(<%= i - 1996 %>)" - onmouseout="showTrackers('none');" class="<%= curClass %>"><%= i %></div> </a> </div> @@ -461,12 +321,13 @@ String replayUrl = uriConverter.makeReplayURI( firstCaptureInDay.getCaptureTimestamp(), firstCaptureInDay.getOriginalUrl()); + Date firstCaptureInDayDate = firstCaptureInDay.getCaptureDate(); String safeUrl = fmt.escapeHtml(replayUrl); %><td> <div class="date"> <div class="position"> <div class="hidden"><%= count %></div> - <div class="measure opacity20" id=""><img width="100%" height="100%"/></div> + <div class="measure opacity20" id="<%= fmt.format("{0,date,MMM-d-yyyy}",firstCaptureInDayDate) %>"><img width="100%" height="100%"/></div> </div> </div> </td><% @@ -541,13 +402,12 @@ firstCaptureInDay.getOriginalUrl()); Date firstCaptureInDayDate = firstCaptureInDay.getCaptureDate(); String safeUrl = fmt.escapeHtml(replayUrl); - int dupes = 999; %><td> <div class="date tooltip"> <div class="pop"> <h3><%= fmt.format("{0,date,MMMMM d, yyyy}",firstCaptureInDayDate) %></h3> - <p><%= count %> snapshots, <em><%= dupes %> duplicates</em></p> + <p><%= count %> snapshots</p> <ul> <% Iterator<CaptureSearchResult> dayItr = @@ -566,7 +426,7 @@ </div> <div class="day"> - <a href="<%= safeUrl %>" title="<%= count %> snapshots (<%= dupes %> duplicates)" class="<%= fmt.format("{0,date,MMM-d-yyyy}",firstCaptureInDayDate) %>"><%= dom + 1 %></a> + <a href="<%= safeUrl %>" title="<%= count %> snapshots" class="<%= fmt.format("{0,date,MMM-d-yyyy}",firstCaptureInDayDate) %>"><%= dom + 1 %></a> </div> </div> </td><% @@ -601,4 +461,4 @@ } %> </div> - </div> \ No newline at end of file +<jsp:include page="/WEB-INF/global-template/UI-footer.jsp" flush="true" /> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2010-07-16 20:26:09
|
Revision: 3171 http://archive-access.svn.sourceforge.net/archive-access/?rev=3171&view=rev Author: binzino Date: 2010-07-16 20:26:03 +0000 (Fri, 16 Jul 2010) Log Message: ----------- Local edits of Nutch file to over-ride chatty log messages. Added Paths: ----------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/parse/ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/parse/ParseUtil.java Added: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/parse/ParseUtil.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/parse/ParseUtil.java (rev 0) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/parse/ParseUtil.java 2010-07-16 20:26:03 UTC (rev 3171) @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.parse; + +// Commons Logging imports +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +// Nutch Imports +import org.apache.nutch.protocol.Content; + +// Hadoop imports +import org.apache.hadoop.conf.Configuration; + + +/** + * A Utility class containing methods to simply perform parsing utilities such + * as iterating through a preferred list of {@link Parser}s to obtain + * {@link Parse} objects. + * + * @author mattmann + * @author Jérôme Charron + * @author Sébastien Le Callonnec + */ +public class ParseUtil { + + /* our log stream */ + public static final Log LOG = LogFactory.getLog(ParseUtil.class); + private ParserFactory parserFactory; + + /** + * + * @param conf + */ + public ParseUtil(Configuration conf) { + this.parserFactory = new ParserFactory(conf); + } + + /** + * Performs a parse by iterating through a List of preferred {@link Parser}s + * until a successful parse is performed and a {@link Parse} object is + * returned. If the parse is unsuccessful, a message is logged to the + * <code>WARNING</code> level, and an empty parse is returned. + * + * @param content The content to try and parse. + * @return <key, {@link Parse}> pairs. + * @throws ParseException If no suitable parser is found to perform the parse. + */ + public ParseResult parse(Content content) throws ParseException { + Parser[] parsers = null; + + try { + parsers = this.parserFactory.getParsers(content.getContentType(), + content.getUrl() != null ? content.getUrl():""); + } catch (ParserNotFound e) { + if (LOG.isDebugEnabled()) { + LOG.debug("No suitable parser found when trying to parse content " + content.getUrl() + + " of type " + content.getContentType()); + } + throw new ParseException(e.getMessage()); + } + + ParseResult parseResult = null; + for (int i=0; i<parsers.length; i++) { + if (LOG.isDebugEnabled()) { + LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]"); + } + parseResult = parsers[i].getParse(content); + if (parseResult != null && !parseResult.isEmpty()) + return parseResult; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Unable to successfully parse content " + content.getUrl() + + " of type " + content.getContentType()); + } + return null; + } + + /** + * Method parses a {@link Content} object using the {@link Parser} specified + * by the parameter <code>extId</code>, i.e., the Parser's extension ID. + * If a suitable {@link Parser} is not found, then a <code>WARNING</code> + * level message is logged, and a ParseException is thrown. If the parse is + * uncessful for any other reason, then a <code>WARNING</code> level + * message is logged, and a <code>ParseStatus.getEmptyParse()</code> is + * returned. + * + * @param extId The extension implementation ID of the {@link Parser} to use + * to parse the specified content. + * @param content The content to parse. + * + * @return <key, {@link Parse}> pairs if the parse is successful, otherwise, + * a single <key, <code>ParseStatus.getEmptyParse()</code>> pair. + * + * @throws ParseException If there is no suitable {@link Parser} found + * to perform the parse. + */ + public ParseResult parseByExtensionId(String extId, Content content) + throws ParseException { + Parser p = null; + + try { + p = this.parserFactory.getParserById(extId); + } catch (ParserNotFound e) { + if (LOG.isDebugEnabled()) { + LOG.debug("No suitable parser found when trying to parse content " + content.getUrl() + + " of type " + content.getContentType()); + } + throw new ParseException(e.getMessage()); + } + + ParseResult parseResult = p.getParse(content); + if (parseResult != null && !parseResult.isEmpty()) { + return parseResult; + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Unable to successfully parse content " + content.getUrl() + + " of type " + content.getContentType()); + } + return null; + } + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2010-07-16 20:25:47
|
Revision: 3170 http://archive-access.svn.sourceforge.net/archive-access/?rev=3170&view=rev Author: binzino Date: 2010-07-16 20:25:38 +0000 (Fri, 16 Jul 2010) Log Message: ----------- Changed logging levels to be less chatty. Modified Paths: -------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java 2010-07-12 02:26:34 UTC (rev 3169) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Importer.java 2010-07-16 20:25:38 UTC (rev 3170) @@ -193,13 +193,14 @@ if ( LOG.isInfoEnabled() ) LOG.info( "Importing ARC: " + arcUrl ); - ArchiveReader r = ArchiveReaderFactory.get( arcUrl ); - r.setDigest( true ); - - ArcReader reader = new ArcReader( r ); - + ArchiveReader r = null; try { + r = ArchiveReaderFactory.get( arcUrl ); + r.setDigest( true ); + + ArcReader reader = new ArcReader( r ); + for ( ARCRecord record : reader ) { // When reading WARC files, records of type other than @@ -214,7 +215,7 @@ } catch ( Exception e ) { - LOG.warn( "Error processing archive file: " + arcUrl, e ); + LOG.error( "Error processing archive file: " + arcUrl, e ); if ( jobConf.getBoolean( "nutchwax.import.abortOnArchiveReadError", false ) ) { @@ -223,7 +224,7 @@ } finally { - r.close(); + if ( r != null ) r.close(); if ( LOG.isInfoEnabled() ) { @@ -246,11 +247,11 @@ { ARCRecordMetaData meta = record.getMetaData(); - if ( LOG.isInfoEnabled() ) LOG.info( "Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength( ) + "]" ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Consider URL: " + meta.getUrl() + " (" + meta.getMimetype() + ") [" + meta.getLength( ) + "]" ); if ( ! this.httpStatusCodeFilter.isAllowed( record.getStatusCode( ) ) ) { - if ( LOG.isInfoEnabled() ) LOG.info( "Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode() ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Skip URL: " + meta.getUrl() + " HTTP status:" + record.getStatusCode() ); return false; } @@ -291,7 +292,7 @@ if ( url == null ) { - if ( LOG.isInfoEnabled() ) LOG.info( "Skip URL: " + meta.getUrl() ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Skip URL: " + meta.getUrl() ); return false; } @@ -375,11 +376,11 @@ } catch ( MalformedURLException mue ) { - if ( LOG.isInfoEnabled() ) LOG.info( "MalformedURL: " + candidateUrl ); + if ( LOG.isDebugEnabled() ) LOG.debug( "MalformedURL: " + candidateUrl ); } catch ( URLFilterException ufe ) { - if ( LOG.isInfoEnabled() ) LOG.info( "URL filtered: " + candidateUrl ); + if ( LOG.isDebugEnabled() ) LOG.debug( "URL filtered: " + candidateUrl ); } return null; @@ -439,9 +440,9 @@ { parseResult = this.parseUtil.parse( content ); } - catch ( Exception e ) + catch ( Throwable t ) { - LOG.warn( "Error parsing: " + key, e ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Error parsing: " + key, t ); } // ?: This is taken from Nutch Fetcher. I believe the signatures are used in the Fetcher @@ -590,7 +591,7 @@ count += record.read( buf, 0, Math.min( buf.length, record.available( ) ) ); } - if ( LOG.isInfoEnabled() ) LOG.info( "Bytes read: expected=" + contentLength + " bytes.length=" + bytes.length + " pos=" + pos + " count=" + count ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Bytes read: expected=" + contentLength + " bytes.length=" + bytes.length + " pos=" + pos + " count=" + count ); // Sanity check. The number of bytes read into our bytes[] // buffer, plus the count of extra stuff read after it should Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java 2010-07-12 02:26:34 UTC (rev 3169) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/urlfilter-nutchwax/src/java/org/archive/nutchwax/urlfilter/WaybackURLFilter.java 2010-07-16 20:25:38 UTC (rev 3170) @@ -70,7 +70,7 @@ if ( s.length != 3 ) { // Don't filter. - LOG.info( "Allowing : " + urlString ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Allowing : " + urlString ); return urlString; } @@ -101,12 +101,12 @@ if ( exclude ) { - LOG.info( "Excluding: " + urlString ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Excluding: " + urlString ); return null; } - LOG.info( "Allowing : " + urlString ); + if ( LOG.isDebugEnabled() ) LOG.debug( "Allowing : " + urlString ); return urlString; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2010-07-12 02:26:43
|
Revision: 3169 http://archive-access.svn.sourceforge.net/archive-access/?rev=3169&view=rev Author: binzino Date: 2010-07-12 02:26:34 +0000 (Mon, 12 Jul 2010) Log Message: ----------- Remove setting of segment and digest fields. Modified Paths: -------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexerMapReduce.java Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexerMapReduce.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexerMapReduce.java 2010-07-11 00:09:27 UTC (rev 3168) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexerMapReduce.java 2010-07-12 02:26:34 UTC (rev 3169) @@ -96,10 +96,10 @@ } // add segment, used to map from merged index back to segment files - doc.add("segment", metadata.get(Nutch.SEGMENT_NAME_KEY)); + //doc.add("segment", metadata.get(Nutch.SEGMENT_NAME_KEY)); // add digest, used by dedup - doc.add("digest", metadata.get(Nutch.SIGNATURE_KEY)); + //doc.add("digest", metadata.get(Nutch.SIGNATURE_KEY)); final Parse parse = new ParseImpl(parseText, parseData); try { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bi...@us...> - 2010-07-11 00:09:35
|
Revision: 3168 http://archive-access.svn.sourceforge.net/archive-access/?rev=3168&view=rev Author: binzino Date: 2010-07-11 00:09:27 +0000 (Sun, 11 Jul 2010) Log Message: ----------- A whole mess of accumulated hacks to get Importing and Indexing working with Hadoop 0.20 (Cloudera) on our Hadoop rack. Modified Paths: -------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexMerger.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Indexer.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/BuildIndex.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DateAdder.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/GetUniqFieldValues.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/LengthNormUpdater.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/nutch-site.xml tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/lucene/index/ArchiveParallelReader.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java Added Paths: ----------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/common-terms.utf8 tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/indexer/ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/indexer/lucene/ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/indexer/lucene/LuceneWriter.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/plugin/ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/plugin/PluginManifestParser.java Removed Paths: ------------- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/searcher/DistributedSearch.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/searcher/DistributedSegmentBean.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/searcher/FetchedSegments.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/searcher/LuceneSearchBean.java tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/searcher/NutchBean.java Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexMerger.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexMerger.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/IndexMerger.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -34,9 +34,10 @@ import org.apache.nutch.indexer.NutchSimilarity; import org.apache.nutch.indexer.FsDirectory; -import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.ArchiveParallelReader; /************************************************************************* @@ -84,10 +85,10 @@ // // Merge indices // - IndexWriter writer = new IndexWriter(localOutput.toString(), null, true); - writer.setMergeFactor(getConf().getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR)); + IndexWriter writer = new IndexWriter( new NIOFSDirectory( new File( localOutput.toString() ) ), null, IndexWriter.MaxFieldLength.UNLIMITED ); + writer.setMergeFactor(getConf().getInt("indexer.mergeFactor", LogMergePolicy.DEFAULT_MERGE_FACTOR)); writer.setMaxBufferedDocs(getConf().getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS)); - writer.setMaxMergeDocs(getConf().getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS)); + writer.setMaxMergeDocs(getConf().getInt("indexer.maxMergeDocs", LogMergePolicy.DEFAULT_MAX_MERGE_DOCS)); writer.setTermIndexInterval(getConf().getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL)); writer.setInfoStream(LogUtil.getDebugStream(LOG)); writer.setUseCompoundFile(false); Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Indexer.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Indexer.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/Indexer.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -63,8 +63,8 @@ FileOutputFormat.setOutputPath(job, luceneDir); - LuceneWriter.addFieldOptions("segment", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job); - LuceneWriter.addFieldOptions("digest", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job); + //LuceneWriter.addFieldOptions("segment", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job); + //LuceneWriter.addFieldOptions("digest", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, job); NutchIndexWriterFactory.addClassToConf(job, LuceneWriter.class); Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/BuildIndex.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/BuildIndex.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/BuildIndex.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -20,13 +20,16 @@ */ package org.archive.nutchwax.tools; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.analysis.WhitespaceAnalyzer; +import java.io.*; + import org.apache.hadoop.conf.Configured; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.NIOFSDirectory; import org.apache.nutch.util.NutchConfiguration; @@ -50,12 +53,12 @@ String fieldValue = args[2].trim(); int count = Integer.parseInt( args[3].trim() ); - IndexWriter writer = new IndexWriter( indexDir, new WhitespaceAnalyzer( ), true ); + IndexWriter writer = new IndexWriter( new NIOFSDirectory( new File( indexDir ) ), null, IndexWriter.MaxFieldLength.UNLIMITED ); for ( int i = 0 ; i < count ; i++ ) { Document newDoc = new Document( ); - newDoc.add( new Field( fieldKey, fieldValue, Field.Store.YES, Field.Index.TOKENIZED ) ); + newDoc.add( new Field( fieldKey, fieldValue, Field.Store.YES, Field.Index.ANALYZED ) ); writer.addDocument( newDoc ); } Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DateAdder.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DateAdder.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DateAdder.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -20,21 +20,15 @@ */ package org.archive.nutchwax.tools; -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; +import java.io.*; +import java.util.*; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.store.NIOFSDirectory; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configuration; @@ -104,15 +98,15 @@ } - IndexReader reader = IndexReader.open( mainIndexDir ); + IndexReader reader = IndexReader.open( new NIOFSDirectory( new File( mainIndexDir ) ), true ); IndexReader sourceReaders[] = new IndexReader[args.length-3]; for ( int i = 0 ; i < sourceReaders.length ; i++ ) { - sourceReaders[i] = IndexReader.open( args[i+1] ); + sourceReaders[i] = IndexReader.open( new NIOFSDirectory( new File( args[i+1] ) ), true ); } - IndexWriter writer = new IndexWriter( destIndexDir, new WhitespaceAnalyzer( ), true ); + IndexWriter writer = new IndexWriter( new NIOFSDirectory( new File( destIndexDir ) ), null, IndexWriter.MaxFieldLength.UNLIMITED ); UrlCanonicalizer canonicalizer = getCanonicalizer( this.getConf( ) ); @@ -134,7 +128,7 @@ } for ( String date : uniqueDates ) { - newDoc.add( new Field( NutchWax.DATE_KEY, date, Field.Store.YES, Field.Index.UN_TOKENIZED ) ); + newDoc.add( new Field( NutchWax.DATE_KEY, date, Field.Store.YES, Field.Index.NOT_ANALYZED ) ); } // Obtain the new dates for the document. @@ -162,7 +156,7 @@ { for ( String date : newDates.split("\\s+") ) { - newDoc.add( new Field( NutchWax.DATE_KEY, date, Field.Store.YES, Field.Index.UN_TOKENIZED ) ); + newDoc.add( new Field( NutchWax.DATE_KEY, date, Field.Store.YES, Field.Index.NOT_ANALYZED ) ); } } Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -27,7 +27,9 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ArchiveParallelReader; +import org.apache.lucene.store.NIOFSDirectory; + public class DumpParallelIndex { public static void main( String[] args ) throws Exception @@ -58,7 +60,7 @@ ArchiveParallelReader reader = new ArchiveParallelReader( ); for ( String dir : dirs ) { - reader.add( IndexReader.open( dir ) ); + reader.add( IndexReader.open( new NIOFSDirectory( new File( dir ) ) ) ); } if ( args[0].equals( "-l" ) ) Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/GetUniqFieldValues.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/GetUniqFieldValues.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/GetUniqFieldValues.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -20,13 +20,11 @@ */ package org.archive.nutchwax.tools; -import java.io.File; -import java.util.Iterator; -import java.util.Set; -import java.util.HashSet; -import java.util.Collection; +import java.io.*; +import java.util.*; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.store.NIOFSDirectory; /** * A quick-n-dirty command-line utility to get the unique values for a @@ -55,7 +53,7 @@ private static void dumpUniqValues( String fieldName, String indexDir ) throws Exception { - IndexReader reader = IndexReader.open(indexDir); + IndexReader reader = IndexReader.open( new NIOFSDirectory( new File( indexDir) ) ); Collection fieldNames = reader.getFieldNames( IndexReader.FieldOption.ALL ); Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/LengthNormUpdater.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/LengthNormUpdater.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/java/org/archive/nutchwax/tools/LengthNormUpdater.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -16,15 +16,8 @@ * limitations under the License. */ -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.Collection; -import java.util.HashSet; +import java.io.*; +import java.util.*; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; @@ -32,12 +25,11 @@ import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Similarity; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.NIOFSDirectory; - import org.apache.nutch.indexer.NutchSimilarity; + /** * This is heavily cribbed from org.apache.lucene.misc.LengthNormModifier */ @@ -132,7 +124,7 @@ String pagerankFile = args[pos++]; - IndexReader reader = IndexReader.open( args[pos++] ); + IndexReader reader = IndexReader.open( new NIOFSDirectory( new File( args[pos++] ) ) ); try { Added: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/common-terms.utf8 =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/common-terms.utf8 (rev 0) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/common-terms.utf8 2010-07-11 00:09:27 UTC (rev 3168) @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Common terms and phrases which will be indexed in n-grams +# in order to optimize search. +#content:a +#content:and +#content:for +#content:in +#content:of +#content:the +#content:to +#url:com +#url:http +#url:http-www +#url:www Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/nutch-site.xml =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/nutch-site.xml 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/conf/nutch-site.xml 2010-07-11 00:09:27 UTC (rev 3168) @@ -10,7 +10,7 @@ <!-- Add 'index-nutchwax' and 'query-nutchwax' to plugin list. --> <!-- Also, add 'parse-pdf' --> <!-- Remove 'urlfilter-regex' and 'normalizer-(pass|regex|basic)' --> - <value>protocol-http|parse-(text|html|js|pdf)|index-nutchwax|query-(basic|nutchwax)|summary-basic|scoring-nutchwax|urlfilter-nutchwax</value> + <value>protocol-http|parse-(text|html|pdf|msword|mspowerpoint|oo)|index-nutchwax|query-(basic|nutchwax)|summary-basic|scoring-nutchwax|urlfilter-nutchwax</value> </property> <!-- @@ -42,6 +42,7 @@ dest-key = src-key --> <name>nutchwax.filter.index</name> +<!-- <value> title:false:true:tokenized content:false:compress:tokenized @@ -55,6 +56,16 @@ type:true:true:no_norms length:false:true:no </value> +--> + <value> + title:false:true:tokenized + content:false:compress:tokenized + site:false:false:untokenized + url:false:true:tokenized + type:true:true:no_norms + date:false:true:no + length:false:true:no + </value> </property> <property> Modified: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/lucene/index/ArchiveParallelReader.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/lucene/index/ArchiveParallelReader.java 2010-07-10 23:34:25 UTC (rev 3167) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/lucene/index/ArchiveParallelReader.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -1,3 +1,5 @@ +package org.apache.lucene.index; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -14,24 +16,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/** - * ARCHIVE: This must be in the lucene index package because it needs - * to call protected methods on other IndexReader objects. - */ -package org.apache.lucene.index; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.TermFreqVector; -import org.apache.lucene.index.TermPositions; -import org.apache.lucene.index.TermVectorMapper; import java.io.IOException; import java.util.*; @@ -55,10 +44,12 @@ * undefined behavior</em>. */ public class ArchiveParallelReader extends IndexReader { - private List readers = new ArrayList(); - private List decrefOnClose = new ArrayList(); // remember which subreaders to decRef on close + private List<IndexReader> readers = new ArrayList<IndexReader>(); + private List<Boolean> decrefOnClose = new ArrayList<Boolean>(); // remember which subreaders to decRef on close boolean incRefReaders = false; - private SortedMap fieldToReader = new TreeMap(); + private SortedMap<String,IndexReader> fieldToReader = new TreeMap<String,IndexReader>(); + private Map<IndexReader,Collection<String>> readerToFields = new HashMap<IndexReader,Collection<String>>(); + private List<IndexReader> storedFieldReaders = new ArrayList<IndexReader>(); private int maxDoc; private int numDocs; @@ -81,9 +72,25 @@ /** Add an IndexReader. * @throws IOException if there is a low-level IO error */ - public void add(IndexReader reader) throws IOException - { + public void add(IndexReader reader) throws IOException { ensureOpen(); + add(reader, false); + } + + /** Add an IndexReader whose stored fields will not be returned. This can + * accelerate search when stored fields are only needed from a subset of + * the IndexReaders. + * + * @throws IllegalArgumentException if not all indexes contain the same number + * of documents + * @throws IllegalArgumentException if not all indexes have the same value + * of {@link IndexReader#maxDoc()} + * @throws IOException if there is a low-level IO error + */ + public void add(IndexReader reader, boolean ignoreStoredFields) + throws IOException { + + ensureOpen(); if (readers.size() == 0) { this.maxDoc = reader.maxDoc(); this.numDocs = reader.numDocs(); @@ -97,14 +104,15 @@ throw new IllegalArgumentException ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs()); - Collection fields = reader.getFieldNames(IndexReader.FieldOption.ALL); - Iterator i = fields.iterator(); - while (i.hasNext()) { // update fieldToReader map - String field = (String)i.next(); + Collection<String> fields = reader.getFieldNames(IndexReader.FieldOption.ALL); + readerToFields.put(reader, fields); + for (final String field : fields) { // update fieldToReader map if (fieldToReader.get(field) == null) fieldToReader.put(field, reader); } + if (!ignoreStoredFields) + storedFieldReaders.add(reader); // add to storedFieldReaders readers.add(reader); if (incRefReaders) { @@ -112,7 +120,16 @@ } decrefOnClose.add(Boolean.valueOf(incRefReaders)); } - + + @Override + public synchronized Object clone() { + try { + return doReopen(true); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + } + /** * Tries to reopen the subreaders. * <br> @@ -132,63 +149,42 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public IndexReader reopen() throws CorruptIndexException, IOException { + @Override + public synchronized IndexReader reopen() throws CorruptIndexException, IOException { + return doReopen(false); + } + + protected IndexReader doReopen(boolean doClone) throws CorruptIndexException, IOException { ensureOpen(); boolean reopened = false; - List newReaders = new ArrayList(); - List newDecrefOnClose = new ArrayList(); + List<IndexReader> newReaders = new ArrayList<IndexReader>(); boolean success = false; try { - - for (int i = 0; i < readers.size(); i++) { - IndexReader oldReader = (IndexReader) readers.get(i); - IndexReader newReader = oldReader.reopen(); + for (final IndexReader oldReader : readers) { + IndexReader newReader = null; + if (doClone) { + newReader = (IndexReader) oldReader.clone(); + } else { + newReader = oldReader.reopen(); + } newReaders.add(newReader); // if at least one of the subreaders was updated we remember that - // and return a new MultiReader + // and return a new ArchiveParallelReader if (newReader != oldReader) { reopened = true; } } - - if (reopened) { - ArchiveParallelReader pr = new ArchiveParallelReader(); - for (int i = 0; i < readers.size(); i++) { - IndexReader oldReader = (IndexReader) readers.get(i); - IndexReader newReader = (IndexReader) newReaders.get(i); - if (newReader == oldReader) { - newDecrefOnClose.add(Boolean.TRUE); - newReader.incRef(); - } else { - // this is a new subreader instance, so on close() we don't - // decRef but close it - newDecrefOnClose.add(Boolean.FALSE); - } - pr.add(newReader); - } - pr.decrefOnClose = newDecrefOnClose; - pr.incRefReaders = incRefReaders; - success = true; - return pr; - } else { - success = true; - // No subreader was refreshed - return this; - } + success = true; } finally { if (!success && reopened) { for (int i = 0; i < newReaders.size(); i++) { - IndexReader r = (IndexReader) newReaders.get(i); - if (r != null) { + IndexReader r = newReaders.get(i); + if (r != readers.get(i)) { try { - if (((Boolean) newDecrefOnClose.get(i)).booleanValue()) { - r.decRef(); - } else { - r.close(); - } + r.close(); } catch (IOException ignore) { // keep going - we want to clean up as much as possible } @@ -196,46 +192,74 @@ } } } + + if (reopened) { + List<Boolean> newDecrefOnClose = new ArrayList<Boolean>(); + ArchiveParallelReader pr = new ArchiveParallelReader(); + for (int i = 0; i < readers.size(); i++) { + IndexReader oldReader = readers.get(i); + IndexReader newReader = newReaders.get(i); + if (newReader == oldReader) { + newDecrefOnClose.add(Boolean.TRUE); + newReader.incRef(); + } else { + // this is a new subreader instance, so on close() we don't + // decRef but close it + newDecrefOnClose.add(Boolean.FALSE); + } + pr.add(newReader, !storedFieldReaders.contains(oldReader)); + } + pr.decrefOnClose = newDecrefOnClose; + pr.incRefReaders = incRefReaders; + return pr; + } else { + // No subreader was refreshed + return this; + } } + @Override public int numDocs() { // Don't call ensureOpen() here (it could affect performance) return numDocs; } + @Override public int maxDoc() { // Don't call ensureOpen() here (it could affect performance) return maxDoc; } + @Override public boolean hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return hasDeletions; } // check first reader + @Override public boolean isDeleted(int n) { // Don't call ensureOpen() here (it could affect performance) if (readers.size() > 0) - return ((IndexReader)readers.get(0)).isDeleted(n); + return readers.get(0).isDeleted(n); return false; } // delete in all readers + @Override protected void doDelete(int n) throws CorruptIndexException, IOException { - for (int i = 0; i < readers.size(); i++) { - ((IndexReader)readers.get(i)).deleteDocument(n); + for (final IndexReader reader : readers) { + reader.deleteDocument(n); } hasDeletions = true; } - /** - * @see org.apache.lucene.index.ParallelReader.doUndeleteAll - */ + // undeleteAll in all readers + @Override protected void doUndeleteAll() throws CorruptIndexException, IOException { - for (int i = 0; i < readers.size(); i++) { - ((IndexReader)readers.get(i)).undeleteAll(); + for (final IndexReader reader : readers) { + reader.undeleteAll(); } hasDeletions = false; } @@ -289,111 +313,150 @@ return result; } + /* + // append fields from storedFieldReaders + @Override + public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + ensureOpen(); + Document result = new Document(); + for (final IndexReader reader: storedFieldReaders) { + + boolean include = (fieldSelector==null); + if (!include) { + Collection<String> fields = readerToFields.get(reader); + for (final String field : fields) + if (fieldSelector.accept(field) != FieldSelectorResult.NO_LOAD) { + include = true; + break; + } + } + if (include) { + List<Fieldable> fields = reader.document(n, fieldSelector).getFields(); + for (Fieldable field : fields) { + result.add(field); + } + } + } + return result; + } + */ + // get all vectors + @Override public TermFreqVector[] getTermFreqVectors(int n) throws IOException { ensureOpen(); - ArrayList results = new ArrayList(); - Iterator i = fieldToReader.entrySet().iterator(); - while (i.hasNext()) { - Map.Entry e = (Map.Entry)i.next(); - String field = (String)e.getKey(); - IndexReader reader = (IndexReader)e.getValue(); + ArrayList<TermFreqVector> results = new ArrayList<TermFreqVector>(); + for (final Map.Entry<String,IndexReader> e: fieldToReader.entrySet()) { + + String field = e.getKey(); + IndexReader reader = e.getValue(); TermFreqVector vector = reader.getTermFreqVector(n, field); if (vector != null) results.add(vector); } - return (TermFreqVector[]) - results.toArray(new TermFreqVector[results.size()]); + return results.toArray(new TermFreqVector[results.size()]); } + @Override public TermFreqVector getTermFreqVector(int n, String field) throws IOException { ensureOpen(); - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); return reader==null ? null : reader.getTermFreqVector(n, field); } + @Override public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { ensureOpen(); - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); if (reader != null) { reader.getTermFreqVector(docNumber, field, mapper); } } + @Override public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { ensureOpen(); - ensureOpen(); - Iterator i = fieldToReader.entrySet().iterator(); - while (i.hasNext()) { - Map.Entry e = (Map.Entry)i.next(); - String field = (String)e.getKey(); - IndexReader reader = (IndexReader)e.getValue(); + for (final Map.Entry<String,IndexReader> e : fieldToReader.entrySet()) { + + String field = e.getKey(); + IndexReader reader = e.getValue(); reader.getTermFreqVector(docNumber, field, mapper); } } + @Override public boolean hasNorms(String field) throws IOException { ensureOpen(); - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); return reader==null ? false : reader.hasNorms(field); } + @Override public byte[] norms(String field) throws IOException { ensureOpen(); - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); return reader==null ? null : reader.norms(field); } + @Override public void norms(String field, byte[] result, int offset) throws IOException { ensureOpen(); - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); if (reader!=null) reader.norms(field, result, offset); } + @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); if (reader!=null) reader.doSetNorm(n, field, value); } + @Override public TermEnum terms() throws IOException { ensureOpen(); return new ParallelTermEnum(); } + @Override public TermEnum terms(Term term) throws IOException { ensureOpen(); return new ParallelTermEnum(term); } + @Override public int docFreq(Term term) throws IOException { ensureOpen(); - IndexReader reader = ((IndexReader)fieldToReader.get(term.field())); + IndexReader reader = fieldToReader.get(term.field()); return reader==null ? 0 : reader.docFreq(term); } + @Override public TermDocs termDocs(Term term) throws IOException { ensureOpen(); return new ParallelTermDocs(term); } + @Override public TermDocs termDocs() throws IOException { ensureOpen(); return new ParallelTermDocs(); } + @Override public TermPositions termPositions(Term term) throws IOException { ensureOpen(); return new ParallelTermPositions(term); } + @Override public TermPositions termPositions() throws IOException { ensureOpen(); return new ParallelTermPositions(); @@ -402,9 +465,10 @@ /** * Checks recursively if all subreaders are up to date. */ + @Override public boolean isCurrent() throws CorruptIndexException, IOException { - for (int i = 0; i < readers.size(); i++) { - if (!((IndexReader)readers.get(i)).isCurrent()) { + for (final IndexReader reader : readers) { + if (!reader.isCurrent()) { return false; } } @@ -416,9 +480,10 @@ /** * Checks recursively if all subindexes are optimized */ + @Override public boolean isOptimized() { - for (int i = 0; i < readers.size(); i++) { - if (!((IndexReader)readers.get(i)).isOptimized()) { + for (final IndexReader reader : readers) { + if (!reader.isOptimized()) { return false; } } @@ -431,36 +496,39 @@ /** Not implemented. * @throws UnsupportedOperationException */ + @Override public long getVersion() { throw new UnsupportedOperationException("ArchiveParallelReader does not support this method."); } // for testing IndexReader[] getSubReaders() { - return (IndexReader[]) readers.toArray(new IndexReader[readers.size()]); + return readers.toArray(new IndexReader[readers.size()]); } - protected void doCommit() throws IOException { - for (int i = 0; i < readers.size(); i++) - ((IndexReader)readers.get(i)).commit(); + @Override + protected void doCommit(Map<String,String> commitUserData) throws IOException { + for (final IndexReader reader : readers) + reader.commit(commitUserData); } + @Override protected synchronized void doClose() throws IOException { for (int i = 0; i < readers.size(); i++) { - if (((Boolean) decrefOnClose.get(i)).booleanValue()) { - ((IndexReader)readers.get(i)).decRef(); + if (decrefOnClose.get(i).booleanValue()) { + readers.get(i).decRef(); } else { - ((IndexReader)readers.get(i)).close(); + readers.get(i).close(); } } } - public Collection getFieldNames (IndexReader.FieldOption fieldNames) { + @Override + public Collection<String> getFieldNames (IndexReader.FieldOption fieldNames) { ensureOpen(); - Set fieldSet = new HashSet(); - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = ((IndexReader)readers.get(i)); - Collection names = reader.getFieldNames(fieldNames); + Set<String> fieldSet = new HashSet<String>(); + for (final IndexReader reader : readers) { + Collection<String> names = reader.getFieldNames(fieldNames); fieldSet.addAll(names); } return fieldSet; @@ -468,24 +536,28 @@ private class ParallelTermEnum extends TermEnum { private String field; - private Iterator fieldIterator; + private Iterator<String> fieldIterator; private TermEnum termEnum; public ParallelTermEnum() throws IOException { - if ( fieldToReader.isEmpty( ) ) return ; - - field = (String)fieldToReader.firstKey(); + try { + field = fieldToReader.firstKey(); + } catch(NoSuchElementException e) { + // No fields, so keep field == null, termEnum == null + return; + } if (field != null) - termEnum = ((IndexReader)fieldToReader.get(field)).terms(); + termEnum = fieldToReader.get(field).terms(); } public ParallelTermEnum(Term term) throws IOException { field = term.field(); - IndexReader reader = ((IndexReader)fieldToReader.get(field)); + IndexReader reader = fieldToReader.get(field); if (reader!=null) termEnum = reader.terms(term); } + @Override public boolean next() throws IOException { if (termEnum==null) return false; @@ -502,8 +574,8 @@ fieldIterator.next(); // Skip field to get next one } while (fieldIterator.hasNext()) { - field = (String) fieldIterator.next(); - termEnum = ((IndexReader)fieldToReader.get(field)).terms(new Term(field, "")); + field = fieldIterator.next(); + termEnum = fieldToReader.get(field).terms(new Term(field)); Term term = termEnum.term(); if (term!=null && term.field()==field) return true; @@ -514,6 +586,7 @@ return false; // no more fields } + @Override public Term term() { if (termEnum==null) return null; @@ -521,6 +594,7 @@ return termEnum.term(); } + @Override public int docFreq() { if (termEnum==null) return 0; @@ -528,6 +602,7 @@ return termEnum.docFreq(); } + @Override public void close() throws IOException { if (termEnum!=null) termEnum.close(); @@ -540,13 +615,18 @@ protected TermDocs termDocs; public ParallelTermDocs() {} - public ParallelTermDocs(Term term) throws IOException { seek(term); } + public ParallelTermDocs(Term term) throws IOException { + if (term == null) + termDocs = readers.isEmpty() ? null : readers.get(0).termDocs(null); + else + seek(term); + } public int doc() { return termDocs.doc(); } public int freq() { return termDocs.freq(); } public void seek(Term term) throws IOException { - IndexReader reader = ((IndexReader)fieldToReader.get(term.field())); + IndexReader reader = fieldToReader.get(term.field()); termDocs = reader!=null ? reader.termDocs(term) : null; } @@ -588,8 +668,9 @@ public ParallelTermPositions() {} public ParallelTermPositions(Term term) throws IOException { seek(term); } + @Override public void seek(Term term) throws IOException { - IndexReader reader = ((IndexReader)fieldToReader.get(term.field())); + IndexReader reader = fieldToReader.get(term.field()); termDocs = reader!=null ? reader.termPositions(term) : null; } @@ -614,3 +695,8 @@ } } + + + + + Added: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/indexer/lucene/LuceneWriter.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/indexer/lucene/LuceneWriter.java (rev 0) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/indexer/lucene/LuceneWriter.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -0,0 +1,330 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.indexer.lucene; + +import java.io.File; +import java.io.IOException; +import java.io.ByteArrayOutputStream; +import java.io.OutputStreamWriter; +import java.util.zip.GZIPOutputStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.CompressionTools; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.FSDirectory; +import org.apache.nutch.analysis.AnalyzerFactory; +import org.apache.nutch.analysis.NutchAnalyzer; +import org.apache.nutch.analysis.NutchDocumentAnalyzer; +import org.apache.nutch.indexer.Indexer; +import org.apache.nutch.indexer.NutchDocument; +import org.apache.nutch.indexer.NutchIndexWriter; +import org.apache.nutch.indexer.NutchSimilarity; +import org.apache.nutch.metadata.Metadata; +import org.apache.nutch.util.LogUtil; + +public class LuceneWriter implements NutchIndexWriter { + + public static enum STORE { YES, NO, COMPRESS } + + public static enum INDEX { NO, NO_NORMS, TOKENIZED, UNTOKENIZED } + + public static enum VECTOR { NO, OFFSET, POS, POS_OFFSET, YES } + + private IndexWriter writer; + + private AnalyzerFactory analyzerFactory; + + private Path perm; + + private Path temp; + + private FileSystem fs; + + private final Map<String, Field.Store> fieldStore; + private final Set<String> fieldCompress; + + private final Map<String, Field.Index> fieldIndex; + + private final Map<String, Field.TermVector> fieldVector; + + public LuceneWriter() { + fieldStore = new HashMap<String, Field.Store>(); + fieldCompress = new HashSet<String>(); + fieldIndex = new HashMap<String, Field.Index>(); + fieldVector = new HashMap<String, Field.TermVector>(); + } + + private Document createLuceneDoc(NutchDocument doc) { + final Document out = new Document(); + + out.setBoost(doc.getScore()); + + final Metadata documentMeta = doc.getDocumentMeta(); + for (final Entry<String, List<String>> entry : doc) { + final String fieldName = entry.getKey(); + + Field.Store store = fieldStore.get(fieldName); + boolean compress = fieldCompress.contains(fieldName); + Field.Index index = fieldIndex.get(fieldName); + Field.TermVector vector = fieldVector.get(fieldName); + + // default values + if (store == null) { + store = Field.Store.NO; + } + + if (index == null) { + index = Field.Index.NO; + } + + if (vector == null) { + vector = Field.TermVector.NO; + } + + // read document-level field information + final String[] fieldMetas = + documentMeta.getValues(LuceneConstants.FIELD_PREFIX + fieldName); + if (fieldMetas.length != 0) { + for (final String val : fieldMetas) { + System.out.println( fieldName + " : " + val ); + if (LuceneConstants.STORE_YES.equals(val)) { + store = Field.Store.YES; + } else if (LuceneConstants.STORE_NO.equals(val)) { + store = Field.Store.NO; + } else if (LuceneConstants.STORE_COMPRESS.equals(val)) { + compress = true; + } else if (LuceneConstants.INDEX_TOKENIZED.equals(val)) { + index = Field.Index.ANALYZED; + } else if (LuceneConstants.INDEX_NO.equals(val)) { + index = Field.Index.NO; + } else if (LuceneConstants.INDEX_UNTOKENIZED.equals(val)) { + index = Field.Index.NOT_ANALYZED; + } else if (LuceneConstants.INDEX_NO_NORMS.equals(val)) { + index = Field.Index.ANALYZED_NO_NORMS; + } else if (LuceneConstants.VECTOR_NO.equals(val)) { + vector = Field.TermVector.NO; + } else if (LuceneConstants.VECTOR_YES.equals(val)) { + vector = Field.TermVector.YES; + } else if (LuceneConstants.VECTOR_POS.equals(val)) { + vector = Field.TermVector.WITH_POSITIONS; + } else if (LuceneConstants.VECTOR_POS_OFFSET.equals(val)) { + vector = Field.TermVector.WITH_POSITIONS_OFFSETS; + } else if (LuceneConstants.VECTOR_OFFSET.equals(val)) { + vector = Field.TermVector.WITH_OFFSETS; + } + } + } + + for (final String fieldValue : entry.getValue()) { + if ( compress ) + { + out.add( new Field( fieldName, CompressionTools.compressString( fieldValue ), Field.Store.YES ) ); + } + out.add(new Field(fieldName, fieldValue, store, index, vector)); + } + } + + return out; + } + + @SuppressWarnings("unchecked") + private void processOptions(Configuration conf) { + final Iterator iterator = conf.iterator(); + while (iterator.hasNext()) { + final String key = (String) ((Map.Entry)iterator.next()).getKey(); + if (!key.startsWith(LuceneConstants.LUCENE_PREFIX)) { + continue; + } + if (key.startsWith(LuceneConstants.FIELD_STORE_PREFIX)) { + final String field = + key.substring(LuceneConstants.FIELD_STORE_PREFIX.length()); + final LuceneWriter.STORE store = LuceneWriter.STORE.valueOf(conf.get(key)); + switch (store) { + case YES: + fieldStore.put(field, Field.Store.YES); + break; + case NO: + fieldStore.put(field, Field.Store.NO); + break; + case COMPRESS: + fieldCompress.add(field); + break; + } + } else if (key.startsWith(LuceneConstants.FIELD_INDEX_PREFIX)) { + final String field = + key.substring(LuceneConstants.FIELD_INDEX_PREFIX.length()); + final LuceneWriter.INDEX index = LuceneWriter.INDEX.valueOf(conf.get(key)); + switch (index) { + case NO: + fieldIndex.put(field, Field.Index.NO); + break; + case NO_NORMS: + fieldIndex.put(field, Field.Index.NOT_ANALYZED_NO_NORMS); + break; + case TOKENIZED: + fieldIndex.put(field, Field.Index.ANALYZED); + break; + case UNTOKENIZED: + fieldIndex.put(field, Field.Index.NOT_ANALYZED); + break; + } + } else if (key.startsWith(LuceneConstants.FIELD_VECTOR_PREFIX)) { + final String field = + key.substring(LuceneConstants.FIELD_VECTOR_PREFIX.length()); + final LuceneWriter.VECTOR vector = LuceneWriter.VECTOR.valueOf(conf.get(key)); + switch (vector) { + case NO: + fieldVector.put(field, Field.TermVector.NO); + break; + case OFFSET: + fieldVector.put(field, Field.TermVector.WITH_OFFSETS); + break; + case POS: + fieldVector.put(field, Field.TermVector.WITH_POSITIONS); + break; + case POS_OFFSET: + fieldVector.put(field, Field.TermVector.WITH_POSITIONS_OFFSETS); + break; + case YES: + fieldVector.put(field, Field.TermVector.YES); + break; + } + } + } + } + + public void open(JobConf job, String name) + throws IOException { + this.fs = FileSystem.get(job); + perm = new Path(FileOutputFormat.getOutputPath(job), name); + temp = job.getLocalPath("index/_" + + Integer.toString(new Random().nextInt())); + + fs.delete(perm, true); // delete old, if any + analyzerFactory = new AnalyzerFactory(job); + writer = new IndexWriter( + FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), + new NutchDocumentAnalyzer(job), true, MaxFieldLength.UNLIMITED); + + writer.setMergeFactor(job.getInt("indexer.mergeFactor", 10)); + writer.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100)); + writer.setMaxMergeDocs(job + .getInt("indexer.maxMergeDocs", Integer.MAX_VALUE)); + writer.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128)); + writer.setMaxFieldLength(job.getInt("indexer.max.tokens", 10000)); + writer.setInfoStream(LogUtil.getDebugStream(Indexer.LOG)); + writer.setUseCompoundFile(false); + writer.setSimilarity(new NutchSimilarity()); + + processOptions(job); + } + + public void close() throws IOException { + writer.optimize(); + writer.close(); + fs.completeLocalOutput(perm, temp); // copy to dfs + fs.createNewFile(new Path(perm, Indexer.DONE_NAME)); + } + + public void write(NutchDocument doc) throws IOException { + final Document luceneDoc = createLuceneDoc(doc); + final NutchAnalyzer analyzer = analyzerFactory.get(luceneDoc.get("lang")); + if (Indexer.LOG.isDebugEnabled()) { + Indexer.LOG.debug("Indexing [" + luceneDoc.get("url") + + "] with analyzer " + analyzer + " (" + luceneDoc.get("lang") + + ")"); + } + writer.addDocument(luceneDoc, analyzer); + + } + + /** Adds a lucene field. + * <p> + * This method is provided for backward-compatibility with + * older indexing filters. This should not be used by newer + * implementations since this is slower than + * {@link NutchDocument#add(String, String)} and will be removed + * in a future release. + * </p> + * @param f Lucene field to be added. + * @deprecated Use {@link NutchDocument#add(String, String)} instead and + * set index-level metadata for field information. + * */ + @Deprecated + public static void add(NutchDocument doc, Field f) { + final String fieldName = f.name(); + final String key = LuceneConstants.FIELD_PREFIX + fieldName; + final Metadata documentMeta = doc.getDocumentMeta(); + if (f.isStored()) { + documentMeta.add(key, LuceneConstants.STORE_YES); + } else { + documentMeta.add(key, LuceneConstants.STORE_NO); + } + + if (f.isIndexed()) { + if (f.isTokenized()) { + documentMeta.add(key, LuceneConstants.INDEX_TOKENIZED); + } else if (f.getOmitNorms()) { + documentMeta.add(key, LuceneConstants.INDEX_NO_NORMS); + } else { + documentMeta.add(key, LuceneConstants.INDEX_UNTOKENIZED); + } + } else { + documentMeta.add(key, LuceneConstants.INDEX_NO); + } + + if (f.isStoreOffsetWithTermVector() && f.isStorePositionWithTermVector()) { + documentMeta.add(key, LuceneConstants.VECTOR_POS_OFFSET); + } else if (f.isStoreOffsetWithTermVector()) { + documentMeta.add(key, LuceneConstants.VECTOR_OFFSET); + } else if (f.isStorePositionWithTermVector()) { + documentMeta.add(key, LuceneConstants.VECTOR_POS); + } else if (f.isTermVectorStored()) { + documentMeta.add(key, LuceneConstants.VECTOR_YES); + } else { + documentMeta.add(key, LuceneConstants.VECTOR_NO); + } + } + + public static void addFieldOptions(String field, LuceneWriter.STORE store, + LuceneWriter.INDEX index, LuceneWriter.VECTOR vector, Configuration conf) { + + conf.set(LuceneConstants.FIELD_STORE_PREFIX + field, store.toString()); + conf.set(LuceneConstants.FIELD_INDEX_PREFIX + field, index.toString()); + conf.set(LuceneConstants.FIELD_VECTOR_PREFIX + field, vector.toString()); + } + + public static void addFieldOptions(String field, LuceneWriter.STORE store, + LuceneWriter.INDEX index, Configuration conf) { + LuceneWriter.addFieldOptions(field, store, index, LuceneWriter.VECTOR.NO, conf); + } +} Added: tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/plugin/PluginManifestParser.java =================================================================== --- tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/plugin/PluginManifestParser.java (rev 0) +++ tags/nutchwax-0_13-JIRA-WAX-75/archive/src/nutch/src/java/org/apache/nutch/plugin/PluginManifestParser.java 2010-07-11 00:09:27 UTC (rev 3168) @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.plugin; + +import java.io.File; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLDecoder; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.commons.logging.Log; + +import org.apache.hadoop.conf.Configuration; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +/** + * The <code>PluginManifestParser</code> parser just parse the manifest file + * in all plugin directories. + * + * @author joa23 + */ +public class PluginManifestParser { + private static final String ATTR_NAME = "name"; + private static final String ATTR_CLASS = "class"; + private static final String ATTR_ID = "id"; + + public static final Log LOG = PluginRepository.LOG; + + private static final boolean WINDOWS = System.getProperty("os.name") + .startsWith("Windows"); + + private Configuration conf; + + private PluginRepository pluginRepository; + + public PluginManifestParser(Configuration conf, + PluginRepository pluginRepository) { + this.conf = conf; + this.pluginRepository = pluginRepository; + } + + /** + * Returns a list of all found plugin descriptors. + * + * @param pluginFolders + * folders to search plugins from + * @return A {@link Map} of all found {@link PluginDescriptor}s. + */ + public Map<String, PluginDescriptor> parsePluginFolder(String[] pluginFolders) { + Map<String, PluginDescriptor> map = new HashMap<String, PluginDescriptor>(); + + if (pluginFolders == null) { + throw new IllegalArgumentException("plugin.folders is not defined"); + } + + for (String name : pluginFolders) { + File directory = getPluginFolder(name); + if (directory == null) { + continue; + } + LOG.info("Plugins: looking in: " + directory.getAbsolutePath()); + for (File oneSubFolder : directory.listFiles()) { + if (oneSubFolder.isDirectory()) { + String manifestPath = oneSubFolder.getAbsolutePath() + File.separator + + "plugin.xml"; + try { + LOG.debug("parsing: " + manifestPath); + PluginDescriptor p = parseManifestFile(manifestPath); + map.put(p.getPluginId(), p); + } catch (MalformedURLException e) { + LOG.warn(e.toString()); + } catch (SAXException e) { + LOG.warn(e.toString()); + } catch (IOException e) { + LOG.warn(e.toString()); + } catch (ParserConfigurationException e) { + LOG.warn(e.toString()); + } + } + } + } + return map; + } + + /** + * Return the named plugin folder. If the name is absolute then it is + * returned. Otherwise, for relative names, the classpath is scanned. + */ + public File getPluginFolder(String name) { + File directory = new File(name); + if (!directory.isAbsolute()) { + URL url = PluginManifestParser.class.getClassLoader().getResource(name); + if (url == null && directory.exists() && directory.isDirectory() + && directory.listFiles().length > 0) { + return directory; // relative path that is not in the classpath + } else if (url == null) { + LOG.warn("Plugins: directory not found: " + name); + return null; + } else if ( "jar".equals(url.getProtocol()) ) { + try + { + // HACK to find directory containing .jar file and look for plugins there. + LOG.warn( "HACK to look for plugin directory next to jar file: " + url ); + java.net.JarURLConnection connection = (java.net.JarURLConnection) url.openConnection(); + URL url2 = connection.getJarFileURL(); + if ( !"file".equals(url2.getProtocol()) ) + { + LOG.warn( "Jar file is not a file: " + url2 ); + return null; + } + directory = new File( new File( url2.getFile() ).getParent( ) + "/" + name ); + LOG.warn( "Plugin directory: " + directory ); + return directory; + } + catch ( IOException ioe ) + { + LOG.warn( ioe ); + return null; + } + } else if (!"file".equals(url.getProtocol())) { + LOG.warn("Plugins: not a file: url. Can't load plugins from: " + url); + return null; + } + String path = url.getPath(); + if (WINDOWS && path.startsWith("/")) // patch a windows bug + path = path.substring(1); + try { + path = URLDecoder.decode(path, "UTF-8"); // decode the url path + } catch (UnsupportedEncodingException e) { + } + directory = new File(path); + } + return directory; + } + + /** + * @param manifestPath + * @throws ParserConfigurationException + * @throws IOException + * @throws SAXException + * @throws MalformedURLException + */ + private PluginDescriptor parseManifestFile(String pManifestPath) + throws MalformedURLException, SAXException, IOException, + ParserConfigurationException { + Document document = parseXML(new File(pManifestPath).toURL()); + String pPath = new File(pManifestPath).getParent(); + return parsePlugin(document, pPath); + } + + /** + * @param url + * @return Document + * @throws IOException + * @throws SAXException + * @throws ParserConfigurationException + * @throws DocumentException + */ + private Document parseXML(URL url) throws SAXException, IOException, + ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + return builder.parse(url.openStream()); + } + + /** + * @param pDocument + * @throws MalformedURLException + */ + private PluginDescriptor parsePlugin(Document pDocument, String pPath) + throws MalformedURLException { + Element rootElement = pDocument.getDocumentElement(); + String id = rootElement.getAttribute(ATTR_ID); + String name = rootElement.getAttribute(ATTR_NAME); + String version = rootElement.getAttribute("version"); + String providerName = rootElement.getAttribute("provider-name"); + String pluginClazz = null; + if (rootElement.getAttribute(ATTR_CLASS).trim().length() > 0) { + pluginClazz = rootElement.getAttribute(ATTR_CLASS); + } + PluginDescriptor pluginDescriptor = new PluginDescriptor(id, version, name, + providerName, pluginClazz, pPath, this.conf); + LOG.debug("plugin: id=" + id + " name=" + name + " version=" + version + + " provider=" + providerName + "class=" + pluginClazz); + parseExtension(rootElement, pluginDescriptor); + parseExtensionPoints(rootElement, pluginDescriptor); + parseLibraries(rootElement, pluginDescriptor); + parseRequires(rootElement, pluginDescriptor); + return pluginDescriptor; + } + + /** + * @param pRootElement + * @param pDescriptor + * @throws MalformedURLException + */ + private void parseRequires(Element pRootElement, PluginDescriptor pDescriptor) + throws MalformedURLException { + + NodeList nodelist = pRootElement.getElementsByTagName("requires"); + if (nodelist.getLength() > 0) { + + Element requires = (Element) nodelist.item(0); + + NodeList imports = requires.getElementsByTagName("import"); + for (int i = 0; i < imports.getLength(); i++) { + Element anImport = (Element) imports.item(i); + String plugin = anImport.getAttribute("plugin"); + if (plugin != null) { + pDescriptor.addDependency(plugin); + } + } + } + } + + /** + * @param pRootElement + * @param pDescriptor + * @throws MalformedURLException + */ + private void parseLibraries(Element pRootElement, PluginDescriptor pDescriptor) + throws MalformedURLException { + NodeList nodelist = pRootElement.getElementsByTagName("runtime"); + if (nodelist.getLength() > 0) { + + Element runtime = (Element) nodelist.item(0); + + NodeList libraries = runtime.getElementsByTagName("library"); + for (int i = 0; i < libraries.getLength(); i++)... [truncated message content] |
From: <bi...@us...> - 2010-07-10 23:34:31
|
Revision: 3167 http://archive-access.svn.sourceforge.net/archive-access/?rev=3167&view=rev Author: binzino Date: 2010-07-10 23:34:25 +0000 (Sat, 10 Jul 2010) Log Message: ----------- Create tag/branch for this JIRA. Added Paths: ----------- tags/nutchwax-0_13-JIRA-WAX-75/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-06 22:23:38
|
Revision: 3166 http://archive-access.svn.sourceforge.net/archive-access/?rev=3166&view=rev Author: bradtofel Date: 2010-07-06 22:23:32 +0000 (Tue, 06 Jul 2010) Log Message: ----------- Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2010-07-06 22:22:12 UTC (rev 3165) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/exception/HTMLError.jsp 2010-07-06 22:23:32 UTC (rev 3166) @@ -1,7 +1,9 @@ <%@ page language="java" pageEncoding="utf-8" contentType="text/html;charset=utf-8"%> <%@ page import="java.util.List" %> <%@ page import="java.util.Date" %> +<%@ page import="java.util.Iterator" %> <%@ page import="org.archive.wayback.exception.WaybackException" %> +<%@ page import="org.archive.wayback.ResultURIConverter" %> <%@ page import="org.archive.wayback.exception.ResourceNotInArchiveException"%> <%@ page import="org.archive.wayback.exception.ResourceNotAvailableException"%> <%@ page import="org.archive.wayback.core.CaptureSearchResult" %> @@ -29,17 +31,39 @@ StringFormatter fmt = results.getWbRequest().getFormatter(); %> +<jsp:include page="/WEB-INF/global-template/UI-header.jsp" flush="true" /> -<h2><%= fmt.format(e.getTitleKey()) %></h2> -<p><b><%= fmt.format(e.getMessageKey(),e.getMessage()) %></b></p> + <div id="positionHome"> + <section> + <div id="logoHome"> + <h1><span>Internet Archive's Wayback Machine</span></h1> + </div> + </section> + <section> + <div id="searchHome"> + <form name="form1" method="get" action="<%= queryPrefix %>query"> + <input type="hidden" name="<%= WaybackRequest.REQUEST_TYPE %>" value="<%= WaybackRequest.REQUEST_CAPTURE_QUERY %>"> + <input type="text" name="<%= WaybackRequest.REQUEST_URL %>" value="http://" size="40"> + <button type="submit" name="Submit">Go Wayback!</button> + </form> + <div id="searchAdvHome"> + <a href="[ADVANCED SEARCH]">Advanced Search</a> + </div> + </div> + </section> + <section> + <div id="error"> + + <h2><%= fmt.format(e.getTitleKey()) %></h2> + <p><%= fmt.format(e.getMessageKey(),e.getMessage()) %></p> <% if(e instanceof ResourceNotInArchiveException) { ResourceNotInArchiveException niae = (ResourceNotInArchiveException) e; List<String> closeMatches = niae.getCloseMatches(); if(closeMatches != null && !closeMatches.isEmpty()) { %> - <p> - Other possible close matches to try:<br></br> + <p>Other possible close matches to try:</p> + <p> <% WaybackRequest tmp = wbr.clone(); for(String closeMatch : closeMatches) { @@ -47,7 +71,7 @@ String link = queryPrefix + "query?" + tmp.getQueryArguments(); %> - <a href="<%= link %>"><%= closeMatch %></a><br> + <a href="<%= link %>"><%= closeMatch %></a><br/> <% } } @@ -61,46 +85,58 @@ String escapedLink = fmt.escapeHtml(link); String escapedParentUrl = fmt.escapeHtml(parentUrl); %> - </p> - <p> - More options:<br></br> - Try Searching all pages under <a href="<%= escapedLink %>"><%= escapedParentUrl %></a> - </p> + </p> + <p>More options:</p> + <p>Try Searching all pages under <a href="<%= escapedLink %>"><%= escapedParentUrl %></a></p> <% } } else if(e instanceof ResourceNotAvailableException) { %> <div class="wm-nav-link-div"> <% - CaptureSearchResults cResults = results.getCaptureResults(); - Date firstDate = cResults.getFirstResultDate(); - Date lastDate = cResults.getLastResultDate(); - PartitionPartitionMap yearMap = - new PartitionPartitionMap(); - PartitionSize yearSize = Partitioner.yearSize; - Partitioner<Partition<CaptureSearchResult>> yearPartitioner = - new Partitioner<Partition<CaptureSearchResult>>(yearMap); - - List<Partition<Partition<CaptureSearchResult>>> yearPartitions = - yearPartitioner.getRange(yearSize,firstDate,lastDate); - - String navs[] = PartitionsToGraph.getNavigators(fmt,results.getResult()); - String links[] = PartitionsToGraph.getNavigatorLinks(yearPartitions,results.getURIConverter()); - String searchUrl = wbr.getRequestUrl(); - String starLink = fmt.escapeHtml(queryPrefix + "*/" + searchUrl); - links[PartitionsToGraph.NAV_CURRENT] = starLink; - for(int i = 0; i < navs.length; i++) { - if(i > 0) { - %> <% - } - if(links[i] == null) { - %><%= navs[i] %><% - } else { - %> <a href="<%= links[i] %>"><%= navs[i] %></a> <% - } + ResourceNotAvailableException rnae = (ResourceNotAvailableException) e; + + CaptureSearchResults cResults = rnae.getCaptureSearchResults(); + Iterator<CaptureSearchResult> itr = cResults.iterator(); + CaptureSearchResult prev = null; + CaptureSearchResult next = null; + while(itr.hasNext()) { + CaptureSearchResult cur = itr.next(); + if(cur.isClosest()) { + break; } + prev = cur; + } + if(itr.hasNext()) { + next = itr.next(); + } + if((prev != null) || (next != null)) { + String dateFormat = "{0,date,MMMM dd, yyyy HH:mm:ss}"; + ResultURIConverter conv = wbr.getAccessPoint().getUriConverter(); + %> + <div>Or try another close version:</div> + <% + if(prev != null) { + String safePrevReplay = fmt.escapeHtml(conv.makeReplayURI(prev.getCaptureTimestamp(),prev.getOriginalUrl())); + %> + <div>Previous:<a href="<%= safePrevReplay %>"><%= fmt.format(dateFormat,prev.getCaptureDate())%></a></div> + <% + } + if(next != null) { + String safeNextReplay = fmt.escapeHtml(conv.makeReplayURI(next.getCaptureTimestamp(),next.getOriginalUrl())); + %> + <div>Next:<a href="<%= safeNextReplay %>"><%= fmt.format(dateFormat,next.getCaptureDate())%></a></div> + <% + } + } %> </div> <% } %> + + </div> + </section> + <div id="errorBorder"></div> + +<jsp:include page="/WEB-INF/global-template/UI-footer.jsp" flush="true" /> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-06 22:22:18
|
Revision: 3165 http://archive-access.svn.sourceforge.net/archive-access/?rev=3165&view=rev Author: bradtofel Date: 2010-07-06 22:22:12 +0000 (Tue, 06 Jul 2010) Log Message: ----------- Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp 2010-07-06 22:17:28 UTC (rev 3164) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/HTMLUrlResults.jsp 2010-07-06 22:22:12 UTC (rev 3165) @@ -9,7 +9,9 @@ %><%@ page import="org.archive.wayback.core.UrlSearchResults" %><%@ page import="org.archive.wayback.core.WaybackRequest" %><%@ page import="org.archive.wayback.util.StringFormatter" -%><% +%> +<jsp:include page="/WEB-INF/global-template/UI-header.jsp" flush="true" /> +<% UIResults results = UIResults.extractUrlQuery(request); WaybackRequest wbRequest = results.getWbRequest(); UrlSearchResults uResults = results.getUrlResults(); @@ -17,9 +19,10 @@ StringFormatter fmt = wbRequest.getFormatter(); String searchString = wbRequest.getRequestUrl(); +String staticPrefix = results.getStaticPrefix(); +String queryPrefix = results.getQueryPrefix(); +String replayPrefix = results.getReplayPrefix(); - - Date searchStartDate = wbRequest.getStartDate(); Date searchEndDate = wbRequest.getEndDate(); @@ -29,11 +32,94 @@ long totalCaptures = uResults.getMatchingCount(); %> -<%= fmt.format("PathPrefixQuery.showingResults",firstResult + 1,lastResult, - totalCaptures,searchString) %> -<br/> +<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js" type="text/javascript"></script> +<script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.1/jquery-ui.min.js" type="text/javascript"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/jquery.dataTables.min.js" charset="utf-8"></script> +<script type="text/javascript"> +$().ready(function(){ + $(".dataTables_processing").show(); + $('#resultsUrl th.url span').html(' ↑'); + $('#resultsUrl th').mouseup(function(){ + \$('#resultsUrl th span').html(''); + \$(this).find('span').html(' ↑'); + if (\$(this).hasClass('sorting_asc')) { + \$(this).find('span').html(' ↓'); + } else if (\$(this).hasClass('sorting_desc')) { + \$(this).find('span').html(' ↑'); + }; + }); + var rowCount = \$('#resultsUrl tbody tr').length; + if (rowCount < 50) { + \$('#resultsUrl').dataTable({ + "bProcessing": true, + "aoColumns": [{"sType":"html"},{"sType":"date"},{"sType":"date"},null,null,null], + "aaSorting": [ [0,'asc'] ], + "bPaginate": false, + "bInfo": false, + "bFilter": true, + "bStateSave": true, + "bAutoWidth": false, + "oLanguage": { + "sSearch": "Filter results (i.e. '.txt'):" + } + }); + } else { + \$('#resultsUrl').dataTable({ + "bProcessing": true, + "aoColumns": [{"sType":"html"},{"sType":"date"},{"sType":"date"},null,null,null], + "aaSorting": [ [0,'asc'] ], + "bPaginate": true, + "bInfo": true, + "sPaginationType": "full_numbers", + "bFilter": true, + "bStateSave": true, + "bAutoWidth": false, + "oLanguage": { + "sSearch": "Filter results (i.e. '.txt'):" + }, + "iDisplayLength": 50 + }); + } + }); +$(window).load(function(){ -<hr></hr> +}); +</script> + <div id="positionHome"> + <section> + <div id="logoHome"> + <h1><span>Internet Archive's Wayback Machine</span></h1> + </div> + </section> + <section> + <div id="searchHome"> + <form name="form1" method="get" action="<%= queryPrefix %>query"> + <input type="hidden" name="<%= WaybackRequest.REQUEST_TYPE %>" value="<%= WaybackRequest.REQUEST_CAPTURE_QUERY %>"> + <input type="text" name="<%= WaybackRequest.REQUEST_URL %>" value="http://" size="40"> + <button type="submit" name="Submit">Go Wayback!</button> + </form> + <div id="searchAdvHome"> + <a href="[ADVANCED SEARCH]">Advanced Search</a> + </div> + </div> + </section> + </div> + <div id="positionTable"> + + <h2 class="green"><%= fmt.format("PathPrefixQuery.showingResults",totalCaptures) %></h2> + + <table id="resultsUrl"> + <thead> + <tr> + <th class="url">URL<span></span></th> + <th>From<span></span></th> + <th>To<span></span></th> + <th>Captures<span></span></th> + <th>Duplicates<span></span></th> + <th>Uniques<span></span></th> + </tr> + </thead> + <tbody> <% Iterator<UrlSearchResult> itr = uResults.iterator(); while(itr.hasNext()) { @@ -45,6 +131,7 @@ String lastDateTSss = result.getLastCaptureTimestamp(); long numCaptures = result.getNumCaptures(); long numVersions = result.getNumVersions(); + long numDupes = result.getNumCaptures() - result.getNumVersions(); Date firstDate = result.getFirstCaptureDate(); Date lastDate = result.getLastCaptureDate(); @@ -53,9 +140,17 @@ String ts = result.getFirstCaptureTimestamp(); String anchor = uriConverter.makeReplayURI(ts,originalUrl); %> - <a onclick="SetAnchorDate('<%= ts %>');" href="<%= anchor %>"> - <%= urlKey %> - </a> + <tr> + <td class="url"> + <a onclick="SetAnchorDate('<%= ts %>');" href="<%= anchor %>"><%= urlKey %></a> + </td> + <td class="dateFrom"><%= fmt.format("PathPrefixQuery.captureDate",firstDate) %></td> + <td class="dateTo"><%= fmt.format("PathPrefixQuery.captureDate",lastDate) %></td> + <td class="captures"><%= numCaptures %></td> + <td class="dupes"><%= numDupes %></td> + <td class="uniques"><%= numVersions %></td> + </tr> + <!-- <span class="mainSearchText"> <%= fmt.format("PathPrefixQuery.versionCount",numVersions) %> </span> @@ -63,11 +158,23 @@ <span class="mainSearchText"> <%= fmt.format("PathPrefixQuery.singleCaptureDate",firstDate) %> </span> + --> <% } else { String anchor = results.makeCaptureQueryUrl(originalUrl); %> + <tr> + <td class="url"> + <a href="<%= anchor %>"><%= urlKey %></a> + </td> + <td class="dateFrom"><%= fmt.format("PathPrefixQuery.captureDate",firstDate) %></td> + <td class="dateTo"><%= fmt.format("PathPrefixQuery.captureDate",lastDate) %></td> + <td class="captures"><%= numCaptures %></td> + <td class="dupes"><%= numDupes %></td> + <td class="uniques"><%= numVersions %></td> + </tr> + <!-- <a href="<%= anchor %>"> <%= urlKey %> </a> @@ -78,11 +185,10 @@ <span class="mainSearchText"> <%= fmt.format("PathPrefixQuery.multiCaptureDate",numCaptures,firstDate,lastDate) %> </span> + --> <% } %> - <br/> - <br/> <% } @@ -90,12 +196,11 @@ int curPage = uResults.getCurPageNum(); if(curPage > uResults.getNumPages()) { %> - <hr></hr> <a href="<%= results.urlForPage(1) %>">First results</a> <% } else if(uResults.getNumPages() > 1) { %> - <hr></hr> + <% for(int i = 1; i <= uResults.getNumPages(); i++) { if(i == curPage) { @@ -109,4 +214,9 @@ } } } -%> \ No newline at end of file +%> + </tbody> + </table> + + +<jsp:include page="/WEB-INF/global-template/UI-footer.jsp" flush="true" /> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-06 22:17:35
|
Revision: 3164 http://archive-access.svn.sourceforge.net/archive-access/?rev=3164&view=rev Author: bradtofel Date: 2010-07-06 22:17:28 +0000 (Tue, 06 Jul 2010) Log Message: ----------- tweaks Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/UrlRedirectNotice.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp 2010-06-24 20:27:18 UTC (rev 3163) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp 2010-07-06 22:17:28 UTC (rev 3164) @@ -174,7 +174,7 @@ <table style="border-collapse:collapse;margin:0 auto;padding:0;width:570px;"><tbody><tr> <td style="padding:3px 0;" colspan="2"> - <form target="_top" method="get" action="<%= queryPrefix %>query" name="wmtb" id="wmtb" style="margin:0!important;padding:0!important;"><input type="text" name="<%= WaybackRequest.REQUEST_URL %>" id="wmtbURL" value="<%= searchUrlSafe %>" style="width:400px;font-size:11px;font-family:'Lucida Grande','Arial',sans-serif;"/><input type="hidden" name="<%= WaybackRequest.REQUEST_TYPE %>" value="<%= WaybackRequest.REQUEST_REPLAY_QUERY %>"><input type="submit" value="Go" style="font-size:11px;font-family:'Lucida Grande','Arial',sans-serif;margin-left:5px;"/><span id="wm_tb_options" style="display:block;"></span></form> + <form target="_top" method="get" action="<%= queryPrefix %>query" name="wmtb" id="wmtb" style="margin:0!important;padding:0!important;"><input type="text" name="<%= WaybackRequest.REQUEST_URL %>" id="wmtbURL" value="<%= searchUrlSafe %>" style="width:400px;font-size:11px;font-family:'Lucida Grande','Arial',sans-serif;"/><input type="hidden" name="<%= WaybackRequest.REQUEST_TYPE %>" value="<%= WaybackRequest.REQUEST_REPLAY_QUERY %>"><input type="hidden" name="<%= WaybackRequest.REQUEST_DATE %>" value="<%= data.curResult.getCaptureTimestamp() %>"><input type="submit" value="Go" style="font-size:11px;font-family:'Lucida Grande','Arial',sans-serif;margin-left:5px;"/><span id="wm_tb_options" style="display:block;"></span></form> </td> <td style="vertical-align:bottom;padding:5px 0 0 0!important;" rowspan="2"> <table style="border-collapse:collapse;width:110px;color:#99a;font-family:'Helvetica','Lucida Grande','Arial',sans-serif;"><tbody> @@ -312,7 +312,7 @@ </td> <td style="text-align:right;padding:5px;width:65px;font-size:11px!important;"> <a href="javascript:;" onclick="document.getElementById('wm-ipp').style.display='none';" style="display:block;padding-right:18px;background:url(<%= staticPrefix %>images/toolbar/wm_tb_close.png) no-repeat 100% 0;color:#33f;font-family:'Lucida Grande','Arial',sans-serif;margin-bottom:23px;" title="<%= fmt.format("ToolBar.closeTitle") %>"><%= fmt.format("ToolBar.closeText") %></a> - <a href="FAQ" style="display:block;padding-right:18px;background:url(<%= staticPrefix %>images/toolbar/wm_tb_help.png) no-repeat 100% 0;color:#33f;font-family:'Lucida Grande','Arial',sans-serif;" title="<%= fmt.format("ToolBar.helpTitle") %>"><%= fmt.format("ToolBar.helpText") %></a> + <a href="https://webarchive.jira.com/wiki/display/WWMOS/FAQs" style="display:block;padding-right:18px;background:url(<%= staticPrefix %>images/toolbar/wm_tb_help.png) no-repeat 100% 0;color:#33f;font-family:'Lucida Grande','Arial',sans-serif;" title="<%= fmt.format("ToolBar.helpTitle") %>"><%= fmt.format("ToolBar.helpText") %></a> </td> </tr></tbody></table> Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/UrlRedirectNotice.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/UrlRedirectNotice.jsp 2010-06-24 20:27:18 UTC (rev 3163) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/UrlRedirectNotice.jsp 2010-07-06 22:17:28 UTC (rev 3164) @@ -35,19 +35,30 @@ int secs = 5; %> + <jsp:include page="/WEB-INF/global-template/UI-header.jsp" flush="true" /> + + <div id="positionHome"> + <section> + <div id="logoHome"> + <h1><span>Internet Archive's Wayback Machine</span></h1> + </div> + </section> + <section> + <div id="error"> <script type="text/javascript"> function go() { document.location.href = "<%= safeTargetReplayUrlJS %>"; } window.setTimeout("go()",<%= secs * 1000 %>); </script> - <h2>Following redirect...</h2> - <p>The URL you requested:</p> - <p><%= safeSource %></p> - <p>redirected to the URL:</p> + <p class="code">Loading...</p> + <p class="code shift"><%= safeSource %> | <%= prettyDate %></p> + <p class="code shift red">Got an HTTP 302 response at crawl time</p> + <p class="code">Redirecting to...</p> <p><%= safeTarget %></p> - <p> - when it was crawled at <%= prettyDate %>. You will be redirected - to that target in <%= secs %> seconds. - Click <a href="<%= safeTargetReplayUrl %>">here</a> to go now. - </p> + <p class="impatient"><a href="<%= safeTargetReplayUrl %>">Impatient?</a></p> + </div> + </section> + <div id="errorBorder"></div> + + <jsp:include page="/WEB-INF/global-template/UI-footer.jsp" flush="true" /> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-24 20:27:24
|
Revision: 3163 http://archive-access.svn.sourceforge.net/archive-access/?rev=3163&view=rev Author: bradtofel Date: 2010-06-24 20:27:18 +0000 (Thu, 24 Jun 2010) Log Message: ----------- BUGFIX(unreported) need different Recorder filename for each Thread.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-06-24 20:26:25 UTC (rev 3162) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-06-24 20:27:18 UTC (rev 3163) @@ -116,8 +116,8 @@ // to track if we got a response (any response) or an exception. boolean gotUrl = false; - - Recorder recorder = new Recorder(recorderCacheDir,backingFileBase, + String fName = backingFileBase + "-" + Thread.currentThread().getId(); + Recorder recorder = new Recorder(recorderCacheDir,fName, outBufferSize, inBufferSize); ExtendedGetMethod getMethod = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-24 20:26:31
|
Revision: 3162 http://archive-access.svn.sourceforge.net/archive-access/?rev=3162&view=rev Author: bradtofel Date: 2010-06-24 20:26:25 +0000 (Thu, 24 Jun 2010) Log Message: ----------- FEATURE: now adds HTTP "Authorization" header to the WaybackRequest Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-06-24 20:25:21 UTC (rev 3161) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2010-06-24 20:26:25 UTC (rev 3162) @@ -324,6 +324,10 @@ * see HttpServletRequest.getRemoteUser() */ public static final String REQUEST_REMOTE_USER = "requestremoteuser"; + /** + * Exact value from HTTP request for header "Authorization" + */ + public static final String REQUEST_AUTHORIZATION = "Authorization"; /** * User Locale name: Best Guess at users requested locale. @@ -866,6 +870,9 @@ putUnlessNull(REQUEST_WAYBACK_HOSTNAME, httpRequest.getLocalName()); putUnlessNull(REQUEST_AUTH_TYPE, httpRequest.getAuthType()); putUnlessNull(REQUEST_REMOTE_USER, httpRequest.getRemoteUser()); + + putUnlessNull(REQUEST_AUTHORIZATION, + httpRequest.getHeader(REQUEST_AUTHORIZATION)); putUnlessNull(REQUEST_WAYBACK_PORT, String.valueOf(httpRequest.getLocalPort())); putUnlessNull(REQUEST_WAYBACK_CONTEXT, httpRequest.getContextPath()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-24 20:25:27
|
Revision: 3161 http://archive-access.svn.sourceforge.net/archive-access/?rev=3161&view=rev Author: bradtofel Date: 2010-06-24 20:25:21 +0000 (Thu, 24 Jun 2010) Log Message: ----------- BUGFIX:(unreported) was not correctly handling empty/null datespec Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrl.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrl.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrl.java 2010-06-24 20:24:09 UTC (rev 3160) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrl.java 2010-06-24 20:25:21 UTC (rev 3161) @@ -69,9 +69,15 @@ } public String toString(String datespec, String url) { + int dateLen = 0; + if(datespec != null) { + dateLen = datespec.length(); + } StringBuilder sb = - new StringBuilder(url.length() + datespec.length()+10); - sb.append(datespec); + new StringBuilder(url.length() + dateLen +10); + if(dateLen > 0) { + sb.append(datespec); + } if(wbRequest.isCSSContext()) { sb.append(ArchivalUrlRequestParser.CSS_CONTEXT); sb.append(ArchivalUrlRequestParser.FLAG_DELIM); @@ -88,7 +94,9 @@ sb.append(ArchivalUrlRequestParser.IDENTITY_CONTEXT); sb.append(ArchivalUrlRequestParser.FLAG_DELIM); } - sb.append("/"); + if(dateLen > 0) { + sb.append("/"); + } sb.append(UrlOperations.stripDefaultPortFromUrl(url)); return sb.toString(); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3160 http://archive-access.svn.sourceforge.net/archive-access/?rev=3160&view=rev Author: bradtofel Date: 2010-06-24 20:24:09 +0000 (Thu, 24 Jun 2010) Log Message: ----------- BUGFIX(unreported) bad "if" logic... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ArchivalUrlFormRequestParser.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ArchivalUrlFormRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ArchivalUrlFormRequestParser.java 2010-06-24 20:23:06 UTC (rev 3159) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ArchivalUrlFormRequestParser.java 2010-06-24 20:24:09 UTC (rev 3160) @@ -50,7 +50,7 @@ WaybackRequest wbRequest = super.parse(httpRequest, accessPoint); if(wbRequest != null) { String replayTimestamp = wbRequest.getReplayTimestamp(); - if((replayTimestamp != null) && replayTimestamp.length() == 0) { + if((replayTimestamp == null) || replayTimestamp.length() == 0) { // lets call it a star query: // TODO: should we clone? wbRequest.setStartTimestamp(null); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 3159 http://archive-access.svn.sourceforge.net/archive-access/?rev=3159&view=rev Author: bradtofel Date: 2010-06-24 20:23:06 +0000 (Thu, 24 Jun 2010) Log Message: ----------- FEATURE: Unable to use Tomcat's built-in authorization - it's either all or nothing, so doesn't allow no-password access to certain IP range blocks. Now this class parses the users "Authorization" HTTP header to extract BASIC credentials. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java 2010-06-22 19:15:59 UTC (rev 3158) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java 2010-06-24 20:23:06 UTC (rev 3159) @@ -25,6 +25,8 @@ package org.archive.wayback.authenticationcontrol; import java.util.List; +import java.io.UnsupportedEncodingException; +import org.apache.commons.codec.binary.Base64; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.util.operator.BooleanOperator; @@ -41,12 +43,32 @@ if(allowedUsers == null) { return false; } - String currentUser = value.getRemoteUser(); + String currentUser = getHTTPAuth(value); if(currentUser == null) { return false; } return allowedUsers.contains(currentUser); } + private String decodeBasic(String authHeaderValue) { + if(authHeaderValue != null) { + if(authHeaderValue.startsWith("Basic ")) { + String b64 = authHeaderValue.substring(6); + byte[] decoded = Base64.decodeBase64(b64.getBytes()); + try { + return new String(decoded,"utf-8"); + } catch (UnsupportedEncodingException e) { + // really?... + return new String(decoded); + } + } + } + return null; + + } + private String getHTTPAuth(WaybackRequest request) { + return decodeBasic(request.get("Authorization")); + } + /** * @return the List of users that this operator matches against. */ @@ -55,6 +77,7 @@ } /** * @param allowedUsers the List of users that this operator matches against. + * format for values is "username:password" */ public void setAllowedUsers(List<String> allowedUsers) { this.allowedUsers = allowedUsers; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-22 19:16:06
|
Revision: 3158 http://archive-access.svn.sourceforge.net/archive-access/?rev=3158&view=rev Author: bradtofel Date: 2010-06-22 19:15:59 +0000 (Tue, 22 Jun 2010) Log Message: ----------- cleanup, some polish Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp Modified: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp 2010-06-18 00:39:52 UTC (rev 3157) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/replay/Toolbar.jsp 2010-06-22 19:15:59 UTC (rev 3158) @@ -178,67 +178,72 @@ </td> <td style="vertical-align:bottom;padding:5px 0 0 0!important;" rowspan="2"> <table style="border-collapse:collapse;width:110px;color:#99a;font-family:'Helvetica','Lucida Grande','Arial',sans-serif;"><tbody> - <tr> - <td style="padding-right:9px;text-align:right;"> + + <!-- NEXT/PREV MONTH NAV AND MONTH INDICATOR --> + <tr style="width:110px;height:16px;font-size:10px!important;"> + <td style="padding-right:9px;font-size:11px!important;font-weight: bold;text-align:right;white-space:nowrap;overflow:visible;" nowrap="nowrap"> <% - if(data.prevResult == null) { + if(data.monthPrevResult == null) { %> - <img src="<%= staticPrefix %>images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /> + <%= fmt.format("ToolBar.noPrevMonthText",ToolBarData.addMonth(data.curResult.getCaptureDate(),-1)) %> <% } else { %> - <a href="<%= data.makeReplayURL(data.prevResult) %>" title="<%= fmt.format("ToolBar.prevTitle",data.prevResult.getCaptureDate()) %>"><img src="<%= staticPrefix %>images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /></a> + <a href="<%= data.makeReplayURL(data.monthPrevResult) %>" style="text-decoration:none;color:#33f;" title="<%= fmt.format("ToolBar.prevMonthTitle",data.monthPrevResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.prevMonthText",data.monthPrevResult.getCaptureDate()).toUpperCase() %></a> <% } %> </td> - <td id="displayDayEl" style="background:#000;color:#ff0;width:34px;height:24px;padding:2px 0 0 0;text-align:center;font-size:22px;" title="<%= fmt.format("ToolBar.curDayTitle",data.curResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.curDayText",data.curResult.getCaptureDate()) %></td> - <td style="padding-left:9px;white-space:nowrap;overflow:visible;" nowrap="nowrap"> + <td id="displayMonthEl" style="background:#000;color:#ff0;font-size:11px!important;font-weight: bold;width:34px;height:15px;padding-top:1px;text-align:center;" title="<%= fmt.format("ToolBar.curMonthTitle",data.curResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.curMonthText",data.curResult.getCaptureDate()).toUpperCase() %></td> + <td style="padding-left:9px;font-size:11px!important;font-weight: bold;white-space:nowrap;overflow:visible;" nowrap="nowrap"> <% - if(data.nextResult == null) { + if(data.monthNextResult == null) { %> - <img src="<%= staticPrefix %>images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0"/> + <%= fmt.format("ToolBar.noNextMonthText",ToolBarData.addMonth(data.curResult.getCaptureDate(),1)) %> <% } else { %> - <a href="<%= data.makeReplayURL(data.nextResult) %>" title="<%= fmt.format("ToolBar.nextTitle",data.nextResult.getCaptureDate()) %>"><img src="<%= staticPrefix %>images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0"/></a> + <a href="<%= data.makeReplayURL(data.monthNextResult) %>" style="text-decoration:none;color:#33f;" title="<%= fmt.format("ToolBar.nextMonthTitle",data.monthNextResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.nextMonthText",data.monthNextResult.getCaptureDate()).toUpperCase() %></a> <% } %> - </td> + </td> </tr> - <tr style="width:110px;height:16px;font-size:10px!important;"> - <td style="padding-right:9px;text-align:right;white-space:nowrap;overflow:visible;" nowrap="nowrap"> + + <!-- NEXT/PREV CAPTURE NAV AND DAY OF MONTH INDICATOR --> + <tr> + <td style="padding-right:9px;text-align:right;"> <% - if(data.monthPrevResult == null) { + if(data.prevResult == null) { %> - <%= fmt.format("ToolBar.noPrevMonthText",ToolBarData.addMonth(data.curResult.getCaptureDate(),-1)) %> + <img src="<%= staticPrefix %>images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /> <% } else { %> - <a href="<%= data.makeReplayURL(data.monthPrevResult) %>" style="text-decoration:none;color:#33f;" title="<%= fmt.format("ToolBar.prevMonthTitle",data.monthPrevResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.prevMonthText",data.monthPrevResult.getCaptureDate()) %></a> + <a href="<%= data.makeReplayURL(data.prevResult) %>" title="<%= fmt.format("ToolBar.prevTitle",data.prevResult.getCaptureDate()) %>"><img src="<%= staticPrefix %>images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /></a> <% } %> </td> - <td id="displayMonthEl" style="background:#000;color:#ff0;font-size:12px!important;width:34px;height:15px;padding-top:1px;text-align:center;" title="<%= fmt.format("ToolBar.curMonthTitle",data.curResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.curMonthText",data.curResult.getCaptureDate()) %></td> + <td id="displayDayEl" style="background:#000;color:#ff0;width:34px;height:24px;padding:2px 0 0 0;text-align:center;font-size:24px;font-weight: bold;" title="<%= fmt.format("ToolBar.curDayTitle",data.curResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.curDayText",data.curResult.getCaptureDate()) %></td> <td style="padding-left:9px;white-space:nowrap;overflow:visible;" nowrap="nowrap"> <% - if(data.monthNextResult == null) { + if(data.nextResult == null) { %> - <%= fmt.format("ToolBar.noNextMonthText",ToolBarData.addMonth(data.curResult.getCaptureDate(),1)) %> + <img src="<%= staticPrefix %>images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0"/> <% } else { %> - <a href="<%= data.makeReplayURL(data.monthNextResult) %>" style="text-decoration:none;color:#33f;" title="<%= fmt.format("ToolBar.nextMonthTitle",data.monthNextResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.nextMonthText",data.monthNextResult.getCaptureDate()) %></a> + <a href="<%= data.makeReplayURL(data.nextResult) %>" title="<%= fmt.format("ToolBar.nextTitle",data.nextResult.getCaptureDate()) %>"><img src="<%= staticPrefix %>images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0"/></a> <% } %> - </td> + </td> </tr> + <!-- NEXT/PREV YEAR NAV AND YEAR INDICATOR --> <tr style="width:110px;height:13px;font-size:9px!important;"> - <td style="padding-right:9px;text-align:right;white-space:nowrap;overflow:visible;" nowrap="nowrap"> + <td style="padding-right:9px;font-size:11px!important;font-weight: bold;text-align:right;white-space:nowrap;overflow:visible;" nowrap="nowrap"> <% if(data.yearPrevResult == null) { %> @@ -251,8 +256,8 @@ } %> </td> - <td id="displayYearEl" style="background:#000;color:#ff0;font-size:10px!important;padding-top:1px;width:34px;height:13px;text-align:center;" title="<%= fmt.format("ToolBar.curYearTitle",data.curResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.curYearText",data.curResult.getCaptureDate()) %></td> - <td style="padding-left:9px;white-space:nowrap;overflow:visible;" nowrap="nowrap"> + <td id="displayYearEl" style="background:#000;color:#ff0;font-size:11px!important;font-weight: bold;padding-top:1px;width:34px;height:13px;text-align:center;" title="<%= fmt.format("ToolBar.curYearTitle",data.curResult.getCaptureDate()) %>"><%= fmt.format("ToolBar.curYearText",data.curResult.getCaptureDate()) %></td> + <td style="padding-left:9px;font-size:11px!important;font-weight: bold;white-space:nowrap;overflow:visible;" nowrap="nowrap"> <% if(data.yearNextResult == null) { %> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-18 00:39:59
|
Revision: 3157 http://archive-access.svn.sourceforge.net/archive-access/?rev=3157&view=rev Author: bradtofel Date: 2010-06-18 00:39:52 +0000 (Fri, 18 Jun 2010) Log Message: ----------- INITIAL REV: new Query Replay .jsp which shows 1 year at a time, displaying the results on a 'cal' style calendar Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp Added: trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp =================================================================== --- trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp (rev 0) +++ trunk/archive-access/projects/wayback/wayback-webapp/src/main/webapp/WEB-INF/query/BubbleCalendar.jsp 2010-06-18 00:39:52 UTC (rev 3157) @@ -0,0 +1,604 @@ +<%@ page language="java" pageEncoding="utf-8" contentType="text/html;charset=utf-8"%> +<%@ page import="java.util.List" %> +<%@ page import="java.util.ArrayList" %> +<%@ page import="java.util.Calendar" %> +<%@ page import="java.util.Date" %> +<%@ page import="java.util.Iterator" %> +<%@ page import="org.archive.wayback.ResultURIConverter" %> +<%@ page import="org.archive.wayback.WaybackConstants" %> +<%@ page import="org.archive.wayback.core.CaptureSearchResult" %> +<%@ page import="org.archive.wayback.core.CaptureSearchResults" %> +<%@ page import="org.archive.wayback.core.UIResults" %> +<%@ page import="org.archive.wayback.partition.BubbleCalendarData" %> +<%@ page import="org.archive.wayback.util.partition.Partition" %> +<%@ page import="org.archive.wayback.util.StringFormatter" %> +<jsp:include page="/WEB-INF/template/CookieJS.jsp" flush="true" /> +<% +UIResults results = UIResults.extractCaptureQuery(request); + +StringFormatter fmt = results.getWbRequest().getFormatter(); +ResultURIConverter uriConverter = results.getURIConverter(); + +// deployment-specific URL prefixes +String staticPrefix = results.getStaticPrefix(); +String queryPrefix = results.getQueryPrefix(); +String replayPrefix = results.getReplayPrefix(); + +//deployment-specific address for the graph generator: +String graphJspPrefix = results.getContextConfig("graphJspPrefix"); +if(graphJspPrefix == null) { + graphJspPrefix = queryPrefix; +} + +// graph size "constants": These are currently baked-in to the JS logic... +int imgWidth = 735; +int imgHeight = 75; +int yearWidth = 49; +int monthWidth = 4; + +BubbleCalendarData data = new BubbleCalendarData(results); + +String yearEncoded = data.getYearsGraphString(imgWidth,imgHeight); +String yearImgUrl = graphJspPrefix + "jsp/graph.jsp?graphdata=" + yearEncoded; + +// a Calendar object for doing days-in-week, day-of-week,days-in-month math: +Calendar cal = BubbleCalendarData.getUTCCalendar(); + +%> +<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js" type="text/javascript"></script> +<script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8.1/jquery-ui.min.js" type="text/javascript"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/excanvas.compiled.js"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/jquery.bt.min.js" charset="utf-8"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/jquery.hoverintent.min.js" charset="utf-8"></script> +<script type="text/javascript" src="<%= staticPrefix %>js/graph-calc.js" ></script> +<!-- More ugly JS to manage the highlight over the graph --> +<script type="text/javascript"> + + +var firstDate = <%= data.dataStartMSSE %>; +var lastDate = <%= data.dataEndMSSE %>; +var wbPrefix = "<%= replayPrefix %>"; +var wbCurrentUrl = "<%= data.searchUrlForJS %>"; + +var curYear = <%= data.yearNum - 1996 %>; +var curMonth = -1; +var yearCount = 15; +var firstYear = 1996; +var startYear = <%= data.yearNum - 1996 %>; +var imgWidth = <%= imgWidth %>; +var yearImgWidth = <%= yearWidth %>; +var monthImgWidth = <%= monthWidth %>; +var trackerVal = "none"; + +function showTrackers(val) { + if(val == trackerVal) { + return; + } + + document.getElementById("wbMouseTrackYearImg").style.display = val; + trackerVal = val; +} +function getElementX2(obj) { + var thing = jQuery(obj); + if((thing == undefined) + || (typeof thing == "undefined") + || (typeof thing.offset == "undefined")) { + return getElementX(obj); + } + return Math.round(thing.offset().left); +} +function setActiveYear(year) { + if(curYear != year) { + var yrOff = year * yearImgWidth; + document.getElementById("wbMouseTrackYearImg").style.left = yrOff + "px"; + if(curYear != -1) { + document.getElementById("highlight-"+curYear).setAttribute("class","inactiveHighlight"); + } + document.getElementById("highlight-"+year).setAttribute("class","activeHighlight"); + curYear = year; + } +} +function trackMouseMove(event,element) { + + var eventX = getEventX(event); + var elementX = getElementX2(element); + var xOff = eventX - elementX; + if(xOff < 0) { + xOff = 0; + } else if(xOff > imgWidth) { + xOff = imgWidth; + } + var monthOff = xOff % yearImgWidth; + + var year = Math.floor(xOff / yearImgWidth); + var yearStart = year * yearImgWidth; + var monthOfYear = Math.floor(monthOff / monthImgWidth); + if(monthOfYear > 11) { + monthOfYear = 11; + } + var month = (year * 12) + monthOfYear; + var day = 1; + if(monthOff % 2 == 1) { + day = 15; + } + var dateString = + zeroPad(year + firstYear) + + zeroPad(monthOfYear+1,2) + + zeroPad(day,2) + "000000"; + + var url = wbPrefix + dateString + '*/' + wbCurrentUrl; + document.getElementById('wm-graph-anchor').href = url; + setActiveYear(year); +} +</script> + +<script type="text/javascript"> +$().ready(function(){ + $(".date").each(function(i){ + var size = $(this).find(".hidden").text(); + var offset = size / 2; + if (size >= 1 && size <= 20) {size = 20, offset = 10;} + $(this).find("img").attr("src","<%= staticPrefix %>images/blueblob-dk.png"); + $(this).find(".measure").css({'width':+size+'px','height':+size+'px','top':'-'+offset+'px','left':'-'+offset+'px'}); + }); + $(".day a").each(function(i){ + var dateClass = $(this).attr("class"); + var dateId = "#"+dateClass; + $(this).hover(function(){ + $(dateId).removeClass("opacity20"); + },function(){ + $(dateId).addClass("opacity20"); + }); + }); + $(".tooltip").bt({ + positions: ['top','right','left','bottom'], + contentSelector: "$(this).find('.pop').html()", + padding: '0', + width: '145px', + spikeGirth: 12, + spikeLength: 12, + overlap: '2px', + cornerRadius: 5, + fill: '#efefef', + strokeWidth: 1, + strokeStyle: '#efefef', + shadow: true, + shadowColor: '#333', + shadowBlur: 6, + shadowOffsetX: 0, + shadowOffsetY: 0, + noShadowOpts: {strokeStyle:'#ccc'}, + hoverIntentOpts: {interval:0,timeout:4000}, + clickAnywhereToClose: true, + closeWhenOthersOpen: true, + windowMargin: 30, + cssStyles: { + fontSize: '12px', + fontFamily: '"Arial","Helvetica Neue","Helvetica",sans-serif', + lineHeight: 'normal', + padding: '10px', + color: '#333' + } + }); +}); +</script> +<style type="text/css"> +body,div,p,td,th,ul,ol,li {margin:0;padding:0;} +body {background-color:#fff;font-family:"Arial","Helvetica Neue","Helvetica",sans-serif;font-size:100%;} +img {border:none;} +a {color:#069;} +.clearfix{width:100%;clear:both;} +.clearfix:after {content:".";display:block;height:0;clear:both;visibility:hidden;} +#position {padding:0;margin:0 auto;width:990px;background-color:#fff;} +#wbCalendar {position:relative;width:990px;margin-top:25px;} +.calPosition {padding:15px 0 25px 25px;} +#calUnder {overflow:hidden;} +#calOver {position:absolute;top:0;left:0;} +.hidden{display:none;} +.opacity20 { + opacity:.2; + -ms-filter:"progid:DXImageTransform.Microsoft.Alpha(Opacity=20)"; + filter: alpha(opacity=20); +} +.month { + width: 240px; + height: 210px; + float: left; +} +.month table { + border-collapse: collapse; + font-family: "Arial", sans-serif; + border-spacing: 1px; +} +.month table th { + font-size: 0.75em; + font-weight: 700; + text-transform: uppercase; + padding: 6px; +} +.month table span.label { + display: block; + min-height: 20px; +} +.month table td { + padding: 0; + vertical-align: middle; + color: #666; +} +.month table td .day { + width: 30px; + height: 30px; + text-align: center; +} +.month table td .day a, +.month table td .day span { + display: block; + font-size: 0.6875em; + width: 30px; + height: 22px; + padding-top: 8px; +} +.month table td .day a { + color: #000; + font-weight: 700; + text-decoration: none; +} +.month table td .day span { + padding-top: 9px; + height: 19px; +} +.month table td .day a:hover { + font-size: 0.9375em; + padding-top: 6px; + height: 22px; +} +.month table td .date { + width: 30px; + height: 30px; +} +.month table td .position { + position: relative; + top: 15px; + left: 15px; + width: 1px; + height: 1px; +} +.month table td .measure { + position: absolute; +} +.activeHighlight { + background-color: #000!important; + padding-top: 4px; + font-size: 1.375em!important; + color: #fff300!important; + font-weight: normal!important; + cursor: pointer; +} +.inactiveHighlight { + background-color: #fff!important; + padding-top: 4px; + font-size: .75em!important; + color: #000!important; + font-weight: normal!important; + cursor: pointer; +} + +.bt-content { + text-align: left; +} +.pop {display:none;} +.bt-content h3 {font-size: 1em;font-weight: 700;text-transform: uppercase;margin:0 0 5px;} +.bt-content p {font-size: 0.875em;margin: 5px 0;color:#666;} +.bt-content ul {line-height:1.5em;margin:0 0 0 1em;} +.bt-content em {color:#999;} +.bt-content a:hover {color:#036;} + +#wbSearch {float:left;padding:30px 30px 0;} +#wbSearch #logo {float:left; width:223px;} +#wbSearch #form {float:left;width:707px;} +#wbSearch form {margin:0;padding:0;} +#wbSearch input {font-family:"Arial","Helvetica Neue","Helvetica",sans-serif;font-size:1.125em;} +#wbSearch input[type=text] {width:450px;font-weight:700;} +#wbSearch input[type=submit] {vertical-align: middle;} +#wbMeta {padding:15px 0;} +#wbMeta p {margin:0 0 2px;padding:0;} +#wbMeta p.wbThis {font-size:0.75em;} +#wbMeta p.wbNote {color:#666;font-size: 0.6875em;} +#wbMeta p.wbNote a {color:#666;} +#wbChart {text-align:center;padding:0 30px;} +#wbChartThis {position:relative;margin:0 auto;} +.wbChartThisContainer,.wbChartHover {width:<%= yearWidth %>px;height:30px;float:left;overflow:visible;} +.wbChartThisTop { + width: <%= yearWidth %>px; + height: 80px; + border: 1px solid #ccc; +} +.wbGradient { + background: #f3f3f3 -moz-linear-gradient(top,#ffffff,#f3f3f3); + background: #f3f3f3 -webkit-gradient(linear, left top, left bottom, from(#fff), to(#f3f3f3), color-stop(1.0, #f3f3f3)); + background-color: #f3f3f3; + filter: progid:DXImageTransform.Microsoft.Gradient(enabled='true',startColorstr=#FFFFFFFF, endColorstr=#FFF3F3F3); +} +.wbSelected, #wbSelected { + background: #fff300!important; + border-bottom: 1px solid #000!important; + filter: progid:DXImageTransform.Microsoft.Gradient(enabled='false')!important; + cursor: pointer; +} +#wbSelected { + cursor: default!important; +} +.wbChartThisBtm { + text-align:center; +} +.wbChartSm { + padding-top: 4px; + font-size: 0.625em; + color: #999; + font-weight: 700; +} +.wbChartBig, #wbChartBig { + background-color: #000!important; + padding-top: 4px; + font-size: 1.375em!important; + color: #fff300!important; + font-weight: normal!important; + cursor: pointer; +} +#wbChartBig { + cursor: default!important; +} +#wbChartGraph,#wbChartOver { + position: absolute; + top: 1px; + left: 1px; + cursor: pointer; +} + +</style> + +<script type="text/javascript"> +$().ready(function(){ + var yrCount = $(".wbChartThisContainer").size(); + var yrTotal = <%= yearWidth %> * yrCount; + var yrPad = (930 - yrTotal) / 2; + $("#wbChartThis").css("padding-left",yrPad+"px"); +}); +</script> + +<div id="wbChart"> + + <div id="wbChartThis"> + <a style="position:relative; white-space:nowrap; width:<%= imgWidth %>px;height:<%= imgHeight %>px;" href="" id="wm-graph-anchor"> + <div id="wm-ipp-sparkline" style="position:relative; white-space:nowrap; width:<%= imgWidth %>px;height:<%= imgHeight %>px;background: #f3f3f3 -moz-linear-gradient(top,#ffffff,#f3f3f3);background: #f3f3f3 -webkit-gradient(linear, left top, left bottom, from(#fff), to(#f3f3f3), color-stop(1.0, #f3f3f3));background-color: #f3f3f3;filter: progid:DXImageTransform.Microsoft.Gradient(enabled='true',startColorstr=#FFFFFFFF, endColorstr=#FFF3F3F3);cursor:pointer;border: 1px solid #ccc;border-left:none;" title="<%= fmt.format("ToolBar.sparklineTitle") %>"> + <img id="sparklineImgId" style="position:absolute;z-index:9012;top:0;left:0;" + onmouseover="showTrackers('inline');" + onmouseout="showTrackers('none');" + onmousemove="trackMouseMove(event,this)" + alt="sparklines" + width="<%= imgWidth %>" + height="<%= imgHeight %>" + border="0" + src="<%= yearImgUrl %>"></img> + <img id="wbMouseTrackYearImg" + style="display:none; position:absolute; z-index:9010;" + width="<%= yearWidth %>" + height="<%= imgHeight %>" + border="0" + src="<%= staticPrefix %>images/toolbar/transp-yellow-pixel.png"></img> + <img id="wbMouseTrackMonthImg" + style="display:none; position:absolute; z-index:9011; " + width="<%= monthWidth %>" + height="<%= imgHeight %>" + border="0" + src="<%= staticPrefix %>images/toolbar/transp-red-pixel.png"></img> + </div> + </a> + <% + for(int i = 1996; i < 2011; i++) { + String curClass = "inactiveHighlight"; + if(data.yearNum == i) { + curClass = "activeHighlight"; + } + %> + <div class="wbChartThisContainer"> + <a style="text-decoration: none;" href="<%= queryPrefix + i + "0101000000*/" + data.searchUrlForJS %>"> + + <div id="highlight-<%= i - 1996 %>" + onmouseover="showTrackers('inline'); setActiveYear(<%= i - 1996 %>)" + onmouseout="showTrackers('none');" + class="<%= curClass %>"><%= i %></div> + </a> + </div> + <% + } + %> + </div> +</div> +<div class="clearfix"></div> + +<div id="wbCalendar"> + + <div id="calUnder" class="calPosition"> + + + + +<% +// draw 12 months, 0-11 (0=Jan, 11=Dec) +for(int moy = 0; moy < 12; moy++) { + Partition<Partition<CaptureSearchResult>> curMonth = data.monthsByDay.get(moy); + List<Partition<CaptureSearchResult>> monthDays = curMonth.list(); +%> + <div class="month" id="<%= data.yearNum %>-<%= moy %>"> + <table> + + <thead> + <tr> + <th colspan="7"><span class="label"></span></th> + </tr> + </thead> + <tbody> + <tr> +<% + cal.setTime(curMonth.getStart()); + int skipDays = cal.get(Calendar.DAY_OF_WEEK) - 1; + int daysInMonth = cal.getActualMaximum(Calendar.DAY_OF_MONTH); + // skip until the 1st: + for(int i = 0; i < skipDays; i++) { + %><td><div class="date"></div></td><% + } + int dow = skipDays; + int dom; + for(dom = 0; dom < daysInMonth; dom++) { + + + int count = monthDays.get(dom).count(); + if(count > 0) { + // one or more captures in this day: + CaptureSearchResult firstCaptureInDay = + monthDays.get(dom).list().get(0); + String replayUrl = uriConverter.makeReplayURI( + firstCaptureInDay.getCaptureTimestamp(), + firstCaptureInDay.getOriginalUrl()); + String safeUrl = fmt.escapeHtml(replayUrl); + %><td> + <div class="date"> + <div class="position"> + <div class="hidden"><%= count %></div> + <div class="measure opacity20" id=""><img width="100%" height="100%"/></div> + </div> + </div> + </td><% + + } else { + // zero captures in this day: + %><td> + <div class="date"></div> + </td><% + + } + + + if(((dom+skipDays+1) % 7) == 0) { + // end of the week, start a new tr: + %></tr><tr><% + } + } + // fill in blank days until the end of the current week: + while(((dom+skipDays) % 7) != 0) { + %><td></td><% + dom++; + } +%> + </tr> + </tbody> + </table> + </div> + +<% +} +%> + </div> + <div id="calOver" class="calPosition"> +<% + +for(int moy = 0; moy < 12; moy++) { + Partition<Partition<CaptureSearchResult>> curMonth = data.monthsByDay.get(moy); + List<Partition<CaptureSearchResult>> monthDays = curMonth.list(); +%> + <div class="month" id="<%= data.yearNum %>-<%= moy %>"> + <table> + + <thead> + <tr> + <th colspan="7"><span class="label"><%= fmt.format("{0,date,MMM}",curMonth.getStart()) %></span></th> + </tr> + </thead> + <tbody> + <tr> +<% + cal.setTime(curMonth.getStart()); + int skipDays = cal.get(Calendar.DAY_OF_WEEK) - 1; + int daysInMonth = cal.getActualMaximum(Calendar.DAY_OF_MONTH); + // skip until the 1st: + for(int i = 0; i < skipDays; i++) { + %><td><div class="date"></div></td><% + } + int dow = skipDays; + int dom; + for(dom = 0; dom < daysInMonth; dom++) { + + + int count = monthDays.get(dom).count(); + + if(count > 0) { + // one or more captures in this day: + CaptureSearchResult firstCaptureInDay = + monthDays.get(dom).list().get(0); + String replayUrl = uriConverter.makeReplayURI( + firstCaptureInDay.getCaptureTimestamp(), + firstCaptureInDay.getOriginalUrl()); + Date firstCaptureInDayDate = firstCaptureInDay.getCaptureDate(); + String safeUrl = fmt.escapeHtml(replayUrl); + int dupes = 999; + + %><td> + <div class="date tooltip"> + <div class="pop"> + <h3><%= fmt.format("{0,date,MMMMM d, yyyy}",firstCaptureInDayDate) %></h3> + <p><%= count %> snapshots, <em><%= dupes %> duplicates</em></p> + <ul> + <% + Iterator<CaptureSearchResult> dayItr = + monthDays.get(dom).iterator(); + while(dayItr.hasNext()) { + CaptureSearchResult c = dayItr.next(); + String replayUrl2 = uriConverter.makeReplayURI( + c.getCaptureTimestamp(),c.getOriginalUrl()); + String safeUrl2 = fmt.escapeHtml(replayUrl2); + %> + <li><a href="<%= safeUrl2 %>"><%= fmt.format("{0,date,HH:mm:ss}",c.getCaptureDate()) %></a></li> + <% + } + %> + </ul> + </div> + <div class="day"> + + <a href="<%= safeUrl %>" title="<%= count %> snapshots (<%= dupes %> duplicates)" class="<%= fmt.format("{0,date,MMM-d-yyyy}",firstCaptureInDayDate) %>"><%= dom + 1 %></a> + </div> + </div> + </td><% + + } else { + // zero captures in this day: + %><td> + <div class="date"> + <div class="day"><span><%= dom + 1 %></span></div> + </div> + </td><% + + } + + + if(((dom+skipDays+1) % 7) == 0) { + // end of the week, start a new tr: + %></tr><tr><% + } + } + // fill in blank days until the end of the current week: + while(((dom+skipDays) % 7) != 0) { + %><td></td><% + dom++; + } +%> + </tr> + </tbody> + </table> + </div> +<% +} +%> + </div> + </div> \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-18 00:39:02
|
Revision: 3156 http://archive-access.svn.sourceforge.net/archive-access/?rev=3156&view=rev Author: bradtofel Date: 2010-06-18 00:38:56 +0000 (Fri, 18 Jun 2010) Log Message: ----------- made wrapperJsp optional... duh. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-06-14 19:17:21 UTC (rev 3155) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java 2010-06-18 00:38:56 UTC (rev 3156) @@ -105,7 +105,8 @@ private String queryPrefix = null; private String replayPrefix = null; - private String wrapperJsp = "/WEB-INF/template/UI-wrapper.jsp"; +// private String wrapperJsp = "/WEB-INF/template/UI-wrapper.jsp"; + private String wrapperJsp = null; private String interstitialJsp = INTERSTITIAL_JSP; private String refererAuth = null; @@ -148,7 +149,9 @@ wbRequest.fixup(httpRequest); UIResults uiResults = new UIResults(wbRequest,uriConverter); try { - if(translatedNoQuery.endsWith("-wrap.jsp")) { + if(wrapperJsp != null && + translatedNoQuery.endsWith("-wrap.jsp")) { + uiResults.forwardWrapped(httpRequest, httpResponse, translatedQ, wrapperJsp); } else { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |