[Archive-access-cvs] SF.net SVN: archive-access:[2504] trunk/archive-access/projects/nutchwax/ ima

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 2504
          http://archive-access.svn.sourceforge.net/archive-access/?rev=2504&view=rev
Author:   miklosh
Date:     2008-07-26 15:47:56 +0000 (Sat, 26 Jul 2008)

Log Message:
-----------
Made modifications needed for using Hadoop 0.17.
Display image metadata in search results.

Modified Paths:
--------------
    trunk/archive-access/projects/nutchwax/imagesearch/src/java/org/archive/nutchwax/imagesearch/ImageSearcherBean.java

Modified: trunk/archive-access/projects/nutchwax/imagesearch/src/java/org/archive/nutchwax/imagesearch/ImageSearcherBean.java
===================================================================

--- trunk/archive-access/projects/nutchwax/imagesearch/src/java/org/archive/nutchwax/imagesearch/ImageSearcherBean.java	2008-07-26 15:46:55 UTC (rev 2503)
+++ trunk/archive-access/projects/nutchwax/imagesearch/src/java/org/archive/nutchwax/imagesearch/ImageSearcherBean.java	2008-07-26 15:47:56 UTC (rev 2504)
@@ -25,6 +25,7 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
@@ -40,13 +41,16 @@
 import org.apache.lucene.store.FSDirectory;
 import org.apache.nutch.indexer.FsDirectory;
 import org.apache.nutch.indexer.Indexer;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.NutchConfiguration;
 
-public class ImageSearcherBean {
+public class ImageSearcherBean implements ImageLoader {
 
     public static final Log LOG = LogFactory.getLog(ImageSearcherBean.class);
     
     private IndexReader reader;
+    private ImageDataReader imageReader;
 
     private Path baseDir;
     private Configuration conf;
@@ -66,18 +70,9 @@
         Path indexesDir = new Path(baseDir, "indexes");
         if (this.fs.exists(indexesDir)) {
             Vector<Path> doneDirs = new Vector<Path>();
-            Path[] dirs = fs.listPaths(indexesDir, new PathFilter() {
-
-                public boolean accept(Path f) {
-                    try {
-                        if (fs.isDirectory(f)) {
-                            return true;
-                        }
-                    } catch (IOException ioe) {
-                    }
-                    return false;
-                }
-            });
+            FileStatus[] fstats = fs.listStatus(indexesDir, 
+                    HadoopFSUtil.getPassDirectoriesFilter(fs));
+            Path[] dirs = HadoopFSUtil.getPaths(fstats);
             for (Path dir : dirs) {
                 Path indexdone = new Path(dir, Indexer.DONE_NAME);
                 if (fs.isFile(indexdone)) {
@@ -95,6 +90,8 @@
             Path[] indexDir = {new Path(baseDir, "index")};
             init(indexDir);
         }
+        this.imageReader = new ImageDataReader(FileSystem.get(conf), 
+                new Path(baseDir, "segments").toString(), conf);
     }
 
     /** Init given a set of indexes or just one index. */
@@ -124,12 +121,19 @@
         if (reader != null) {
             reader.close();
         }
+        if (imageReader != null) {
+            imageReader.close();
+        }
     }
 
     public IndexReader getReader() {
         return reader;
     }
     
+    public ImageWritable getImage(String id) throws IOException {
+        return imageReader.getImage(id);
+    }
+    
     /**
      * Calculate the score for an image hit.
      * @param hit found hit
@@ -245,6 +249,9 @@
                     nextDist = imgIndex < numDocImages-1 ? 
                         Math.abs(imagePositions[imgIndex+1] - pos) + (end-pos) : Integer.MAX_VALUE;
                 }
+                if (imgIndex >= numDocImages) {
+                    continue;
+                }
                 // Check if this image is in the allowed proximity of the span
                 if (dist > distThreshold) {
                     if (LOG.isDebugEnabled()) {
@@ -261,6 +268,7 @@
                 ImageHit newHit = new ImageHit(imageIds[imgIndex], imageUrls[imgIndex], currentDoc);
                 newHit.docSim = docSim;
                 newHit.docScore = docBoost;
+                newHit.parentUrl = doc.get("url");
                 newHit.proximity = Math.min(1.0f, 1.0f-((float)dist/maxDist));
                 newHit.score = scoreHit(newHit, doc);
 
@@ -338,6 +346,10 @@
                 hits.getTotal() >= maxHits ? maxHits : (int)hits.getTotal());
         for (ImageHit hit : top) {
             System.out.println(hit.score + " " + hit.url + " " + hit.imageId);
+            ImageWritable imageData = isb.getImage(hit.imageId);
+            if (imageData != null) {
+                System.out.println("[ " + imageData.getMetadata() + "]");
+            }
         }
     }
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




[Archive-access-cvs] SF.net SVN: archive-access:[2504] trunk/archive-access/projects/nutchwax/ ima

[Archive-access-cvs] SF.net SVN: archive-access:[2504] trunk/archive-access/projects/nutchwax/ imagesearch/src/java/org/archive/nutchwax/imagesearch/ImageSearcherBean. java