Update of /cvsroot/htmlparser/htmlparser/bin
In directory sc8-pr-cvs1:/tmp/cvs-serv23579/bin
Added Files:
linkextractor linkextractor.bat
Removed Files:
crawler.bat ripper.bat
Log Message:
Add filter support to NodeList.
Rework LinkExtractor and remove MailRipper and Robot example programs.
Clean out docs directory.
--- NEW FILE: linkextractor ---
#! /bin/sh
if [ -z "$HTMLPARSER_HOME" ] ; then
## resolve links - $0 may be a link to the home
PRG="$0"
progname=`basename "$0"`
saveddir=`pwd`
# need this for relative symlinks
dirname_prg=`dirname "$PRG"`
cd "$dirname_prg"
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
HTMLPARSER_HOME=`dirname "$PRG"`/..
cd "$saveddir"
# make it fully qualified
HTMLPARSER_HOME=`cd "$HTMLPARSER_HOME" && pwd`
fi
if [ -z "$JAVACMD" ] ; then
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
else
JAVACMD=`which java 2> /dev/null `
if [ -z "$JAVACMD" ] ; then
JAVACMD=java
fi
fi
fi
if [ ! -x "$JAVACMD" ] ; then
echo "Error: JAVA_HOME is not defined correctly."
echo " We cannot execute $JAVACMD"
exit 1
fi
if [ -n "$CLASSPATH" ] ; then
LOCALCLASSPATH="$CLASSPATH"
fi
HTMLPARSER_LIB="${HTMLPARSER_HOME}/lib"
# add in the parser .jar file
if [ -z "$LOCALCLASSPATH" ] ; then
LOCALCLASSPATH="${HTMLPARSER_LIB}/htmlparser.jar"
else
LOCALCLASSPATH="${HTMLPARSER_LIB}/htmlparser.jar":"$LOCALCLASSPATH"
fi
# handle 1.1x JDKs
if [ -n "$JAVA_HOME" ] ; then
if [ -f "$JAVA_HOME/lib/classes.zip" ] ; then
LOCALCLASSPATH="$LOCALCLASSPATH:$JAVA_HOME/lib/classes.zip"
fi
fi
"$JAVACMD" -classpath "$LOCALCLASSPATH" org.htmlparser.parserapplications.LinkExtractor "$@"
--- NEW FILE: linkextractor.bat ---
java -jar ..\lib\htmlparser.jar org.htmlparser.parserapplications.LinkExtractor %1 %2
--- crawler.bat DELETED ---
--- ripper.bat DELETED ---
|