Thread: [Htmlparser-cvs] htmlparser/bin beanybaby.cmd,NONE,1.1 filterbuilder.cmd,NONE,1.1 lexer.cmd,NONE,1.1
Brought to you by:
derrickoswald
From: Derrick O. <der...@us...> - 2005-04-10 23:21:11
|
Update of /cvsroot/htmlparser/htmlparser/bin In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30655/htmlparser/bin Added Files: beanybaby.cmd filterbuilder.cmd lexer.cmd linkextractor.cmd parser.cmd sitecapturer sitecapturer.cmd stringextractor.cmd thumbelina.cmd translate.cmd Removed Files: beanybaby.bat filterbuilder.bat lexer.bat linkextractor.bat parser.bat stringextractor.bat thumbelina.bat translate.bat Log Message: Documentation revamp part one. Deprecated node decorators. Added doSemanticAction for Text and Comment nodes. Added missing sitecapturer scripts. Fixed DOS batch files to work when called from any location. --- parser.bat DELETED --- --- stringextractor.bat DELETED --- --- translate.bat DELETED --- --- NEW FILE: parser.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/parser.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmlparser.jar org.htmlparser.Parser %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmlparser.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- NEW FILE: filterbuilder.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/filterbuilder.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_htmlparser_jar_error if not exist %lib_path%filterbuilder.jar goto no_filterbuilder_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -Xmx256M -classpath %lib_path%filterbuilder.jar;%lib_path%htmlparser.jar org.htmlparser.parserapplications.filterbuilder.FilterBuilder %1 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_htmlparser_jar_error echo Unable to find htmlparser.jar goto end :no_filterbuilder_jar_error echo Unable to find filterbuilder.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- NEW FILE: sitecapturer.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/sitecapturer.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmlparser.jar org.htmlparser.parserapplications.SiteCapturer %1 %2 %3 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmlparser.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- NEW FILE: translate.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/translate.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmlparser.jar org.htmlparser.util.Translate %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmlparser.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- NEW FILE: lexer.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/lexer.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmllexer.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmllexer.jar org.htmlparser.lexer.Lexer %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmllexer.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- NEW FILE: linkextractor.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/linkextractor.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmlparser.jar org.htmlparser.parserapplications.LinkExtractor %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmlparser.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- NEW FILE: stringextractor.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/stringextractor.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmlparser.jar org.htmlparser.parserapplications.StringExtractor %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmlparser.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- beanybaby.bat DELETED --- --- filterbuilder.bat DELETED --- --- linkextractor.bat DELETED --- --- NEW FILE: beanybaby.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/beanybaby.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmlparser.jar goto no_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -classpath %lib_path%htmlparser.jar org.htmlparser.beans.BeanyBaby %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_jar_error echo Unable to find htmlparser.jar goto end :no_java_error echo Unable to find java.exe goto end :end --- lexer.bat DELETED --- --- NEW FILE: sitecapturer --- #! /bin/sh if [ -z "$HTMLPARSER_HOME" ] ; then ## resolve links - $0 may be a link to the home PRG="$0" progname=`basename "$0"` saveddir=`pwd` # need this for relative symlinks dirname_prg=`dirname "$PRG"` cd "$dirname_prg" while [ -h "$PRG" ] ; do ls=`ls -ld "$PRG"` link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '/.*' > /dev/null; then PRG="$link" else PRG=`dirname "$PRG"`"/$link" fi done HTMLPARSER_HOME=`dirname "$PRG"`/.. cd "$saveddir" # make it fully qualified HTMLPARSER_HOME=`cd "$HTMLPARSER_HOME" && pwd` fi if [ -z "$JAVACMD" ] ; then if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables JAVACMD="$JAVA_HOME/jre/sh/java" else JAVACMD="$JAVA_HOME/bin/java" fi else JAVACMD=`which java 2> /dev/null ` if [ -z "$JAVACMD" ] ; then JAVACMD=java fi fi fi if [ ! -x "$JAVACMD" ] ; then echo "Error: JAVA_HOME is not defined correctly." echo " We cannot execute $JAVACMD" exit 1 fi if [ -n "$CLASSPATH" ] ; then LOCALCLASSPATH="$CLASSPATH" fi HTMLPARSER_LIB="${HTMLPARSER_HOME}/lib" # add in the parser .jar file if [ -z "$LOCALCLASSPATH" ] ; then LOCALCLASSPATH="${HTMLPARSER_LIB}/htmlparser.jar" else LOCALCLASSPATH="${HTMLPARSER_LIB}/htmlparser.jar":"$LOCALCLASSPATH" fi # handle 1.1x JDKs if [ -n "$JAVA_HOME" ] ; then if [ -f "$JAVA_HOME/lib/classes.zip" ] ; then LOCALCLASSPATH="$LOCALCLASSPATH:$JAVA_HOME/lib/classes.zip" fi fi "$JAVACMD" -classpath "$LOCALCLASSPATH" org.htmlparser.parserapplications.SiteCapturer "$@" --- thumbelina.bat DELETED --- --- NEW FILE: thumbelina.cmd --- @echo off rem HTMLParser Library $Name: $ - A java-based parser for HTML rem http://sourceforge.org/projects/htmlparser rem Copyright (C) 2005 Derrick Oswald rem rem Revision Control Information rem rem $Source: /cvsroot/htmlparser/htmlparser/bin/thumbelina.cmd,v $ rem $Author: derrickoswald $ rem $Date: 2005/04/10 23:20:41 $ rem $Revision: 1.1 $ rem rem This library is free software; you can redistribute it and/or rem modify it under the terms of the GNU Lesser General Public rem License as published by the Free Software Foundation; either rem version 2.1 of the License, or (at your option) any later version. rem rem This library is distributed in the hope that it will be useful, rem but WITHOUT ANY WARRANTY; without even the implied warranty of rem MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU rem Lesser General Public License for more details. rem rem You should have received a copy of the GNU Lesser General Public rem License along with this library; if not, write to the Free Software rem Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rem setlocal enableextensions if errorlevel 1 goto no_extensions_error for %%i in (%0) do set cmd_path= %%~dpi for /D %%i in (%cmd_path%..\lib\) do set lib_path=%%~dpi if not exist %lib_path%htmllexer.jar goto no_htmllexer_jar_error if not exist %lib_path%thumbelina.jar goto no_thumbelina_jar_error for %%i in (java.exe) do set java_executable=%%~$PATH:i if "%java_executable%"=="" goto no_java_error @echo on %java_executable% -Xmx256M -classpath %lib_path%thumbelina.jar;%lib_path%htmllexer.jar org.htmlparser.lexerapplications.thumbelina.Thumbelina %1 %2 @echo off goto end :no_extensions_error echo Unable to use CMD extensions goto end :no_htmllexer_jar_error echo Unable to find htmllexer.jar goto end :no_thumbelina_jar_error echo Unable to find thumbelina.jar goto end :no_java_error echo Unable to find java.exe goto end :end |