From: Doug C. <cu...@us...> - 2005-09-01 18:45:38
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/bin In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24577/bin Added Files: Tag: mapred indexArcs.sh Removed Files: Tag: mapred arc2seg.sh arcs2segs.sh indexarcs.sh Log Message: Add indexArcs command. --- arcs2segs.sh DELETED --- --- indexarcs.sh DELETED --- --- NEW FILE: indexArcs.sh --- #!/bin/sh # resolve links - $0 may be a softlink THIS="$0" while [ -h "$THIS" ]; do ls=`ls -ld "$THIS"` link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '.*/.*' > /dev/null; then THIS="$link" else THIS=`dirname "$THIS"`/"$link" fi done # some directories THIS_DIR=`dirname "$THIS"` PROJECT_HOME=`cd "$THIS_DIR/.." ; pwd` # If no 'nutch' directory, assume the binaries-only layout (All scripts are # in a single 'bin' directory and NUTCH_HOME=PROJECT_HOME). NUTCH_HOME="${PROJECT_HOME}/nutch" if [ ! -d "${NUTCH_HOME}" ] then NUTCH_HOME="${PROJECT_HOME}" fi if [ "$JAVA_HOME" = "" ]; then echo "Error: JAVA_HOME is not set." exit 1 fi JAVA=$JAVA_HOME/bin/java if [ -z "$JAVA_OPTS" ] then JAVA_OPTS=(-Xmx400m -server) fi # CLASSPATH initially contains conf dirs CLASSPATH=${PROJECT_HOME}/conf:${NUTCH_HOME}/conf # for developers, add classes to CLASSPATH if [ -d "$PROJECT_HOME/build/classes" ]; then CLASSPATH=${CLASSPATH}:$PROJECT_HOME/build/classes fi # for developers, add Nutch classes to CLASSPATH if [ -d "$NUTCH_HOME/build/classes" ]; then CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/classes fi if [ -d "$NUTCH_HOME/build/plugins" ]; then CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build fi if [ -d "$NUTCH_HOME/build/test/classes" ]; then CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/test/classes fi # so that filenames w/ spaces are handled correctly in loops below IFS= # for releases, add Nutch jar to CLASSPATH for f in $NUTCH_HOME/nutch-*.jar; do CLASSPATH=${CLASSPATH}:$f; done # add plugins to classpath if [ -d "$NUTCH_HOME/plugins" ]; then CLASSPATH=${CLASSPATH}:$NUTCH_HOME fi # Add our libs to CLASSPATH but take care to make heritrix jar come # before the httpclient jar (heritrix overlays a couple of httpclient # classes). httpclient_jar= for f in ${PROJECT_HOME}/lib/*.jar; do case `basename $f` in commons-httpclient*.jar) httpclient_jar=$f ;; *) CLASSPATH=${CLASSPATH}:$f ;; esac done CLASSPATH=${CLASSPATH}:${httpclient_jar} # Add Nutch libs to CLASSPATH for f in $NUTCH_HOME/lib/*.jar; do CLASSPATH=${CLASSPATH}:$f; done # restore ordinary behaviour unset IFS CLASS=org.archive.access.nutch.IndexArcs # cygwin path translation if expr match `uname` 'CYGWIN*' &> /dev/null; then CLASSPATH=`cygpath -p -w "$CLASSPATH"` fi # Run it. Add in to java.net.URL the heritrix rsync handler. exec $JAVA ${JAVA_OPTS[@]} \ -Djava.protocol.handler.pkgs=org.archive.net \ -classpath "$CLASSPATH" $CLASS "$@" --- arc2seg.sh DELETED --- |