Update of /cvsroot/archive-access/archive-access/projects/nutch/bin
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv24577/bin
Added Files:
Tag: mapred
indexArcs.sh
Removed Files:
Tag: mapred
arc2seg.sh arcs2segs.sh indexarcs.sh
Log Message:
Add indexArcs command.
--- arcs2segs.sh DELETED ---
--- indexarcs.sh DELETED ---
--- NEW FILE: indexArcs.sh ---
#!/bin/sh
# resolve links - $0 may be a softlink
THIS="$0"
while [ -h "$THIS" ]; do
ls=`ls -ld "$THIS"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '.*/.*' > /dev/null; then
THIS="$link"
else
THIS=`dirname "$THIS"`/"$link"
fi
done
# some directories
THIS_DIR=`dirname "$THIS"`
PROJECT_HOME=`cd "$THIS_DIR/.." ; pwd`
# If no 'nutch' directory, assume the binaries-only layout (All scripts are
# in a single 'bin' directory and NUTCH_HOME=PROJECT_HOME).
NUTCH_HOME="${PROJECT_HOME}/nutch"
if [ ! -d "${NUTCH_HOME}" ]
then
NUTCH_HOME="${PROJECT_HOME}"
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
if [ -z "$JAVA_OPTS" ]
then
JAVA_OPTS=(-Xmx400m -server)
fi
# CLASSPATH initially contains conf dirs
CLASSPATH=${PROJECT_HOME}/conf:${NUTCH_HOME}/conf
# for developers, add classes to CLASSPATH
if [ -d "$PROJECT_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$PROJECT_HOME/build/classes
fi
# for developers, add Nutch classes to CLASSPATH
if [ -d "$NUTCH_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/classes
fi
if [ -d "$NUTCH_HOME/build/plugins" ]; then
CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build
fi
if [ -d "$NUTCH_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/test/classes
fi
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# for releases, add Nutch jar to CLASSPATH
for f in $NUTCH_HOME/nutch-*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add plugins to classpath
if [ -d "$NUTCH_HOME/plugins" ]; then
CLASSPATH=${CLASSPATH}:$NUTCH_HOME
fi
# Add our libs to CLASSPATH but take care to make heritrix jar come
# before the httpclient jar (heritrix overlays a couple of httpclient
# classes).
httpclient_jar=
for f in ${PROJECT_HOME}/lib/*.jar; do
case `basename $f` in
commons-httpclient*.jar) httpclient_jar=$f ;;
*) CLASSPATH=${CLASSPATH}:$f ;;
esac
done
CLASSPATH=${CLASSPATH}:${httpclient_jar}
# Add Nutch libs to CLASSPATH
for f in $NUTCH_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# restore ordinary behaviour
unset IFS
CLASS=org.archive.access.nutch.IndexArcs
# cygwin path translation
if expr match `uname` 'CYGWIN*' &> /dev/null; then
CLASSPATH=`cygpath -p -w "$CLASSPATH"`
fi
# Run it. Add in to java.net.URL the heritrix rsync handler.
exec $JAVA ${JAVA_OPTS[@]} \
-Djava.protocol.handler.pkgs=org.archive.net \
-classpath "$CLASSPATH" $CLASS "$@"
--- arc2seg.sh DELETED ---
|