From: Michael S. <sta...@us...> - 2005-09-02 01:08:34
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/bin In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25769/bin Modified Files: arcs2segs.sh indexarcs.sh Log Message: Make mergesegs work with our segments by providing our own versin of SegmentMergeTool and our own version of nutch script that invokes our tool instead of standard nutch's. * .classpath Changed the nutch jar to refer to 0.7 release. * maven.xml Copy over the nutch bins first then ours. Overwrite. This way our version of nutch script sits on top of theirs. * project.properties * project.xml Reference lucene. * bin/arcs2segs.sh * bin/indexarcs.sh Add in setting of logging level. Index: arcs2segs.sh =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/bin/arcs2segs.sh,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** arcs2segs.sh 9 Aug 2005 01:00:25 -0000 1.4 --- arcs2segs.sh 2 Sep 2005 01:08:18 -0000 1.5 *************** *** 2,17 **** # Check that we got right arguments. ! usage="$0 DIR_OF_ARCS DIR_FOR_SEGMENTS COLLECTION_NAME [#ARCS]" ! if [ $# -lt 3 ] then echo $usage exit 1 fi ! if [ $# -gt 4 ] then echo $usage exit 1 fi ! queue=$1 if [ ! -d $queue ] then --- 2,18 ---- # Check that we got right arguments. ! usage="$0 LOG_LEVEL DIR_OF_ARCS DIR_FOR_SEGMENTS COLLECTION_NAME [#ARCS]" ! if [ $# -lt 4 ] then echo $usage exit 1 fi ! if [ $# -gt 5 ] then echo $usage exit 1 fi ! level=$1 ! queue=$2 if [ ! -d $queue ] then *************** *** 20,29 **** exit 1 fi ! segments=$2 ! collection_name=$3 arc_count=100 ! if [ ! -z "$4" ] then ! arc_count="$4" fi if [ ! -d $segments ] --- 21,30 ---- exit 1 fi ! segments=$3 ! collection_name=$4 arc_count=100 ! if [ ! -z "$5" ] then ! arc_count="$5" fi if [ ! -d $segments ] *************** *** 42,46 **** fi seg=$segments/${hostname_prefix}`/bin/date +%F-%H%M%S` ! $arc2seg $seg $collection_name $arcs mkdir -p $seg/arcs mv $arcs $seg/arcs --- 43,47 ---- fi seg=$segments/${hostname_prefix}`/bin/date +%F-%H%M%S` ! $arc2seg -logLevel ${level} $seg $collection_name $arcs mkdir -p $seg/arcs mv $arcs $seg/arcs Index: indexarcs.sh =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/bin/indexarcs.sh,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** indexarcs.sh 9 Aug 2005 01:00:25 -0000 1.9 --- indexarcs.sh 2 Sep 2005 01:08:18 -0000 1.10 *************** *** 56,59 **** --- 56,60 ---- echo " (Does not turn-off cmdline checking). Optional." echo " -a How many arcs to do per segment. Default is 100." + echo " -l Java logging level. Default: info. Options: info, warning, etc." echo "This runs through all steps nutch indexing ARCs so their content is" echo "searchable by nutch. This script is for use against small collections" *************** *** 143,147 **** return fi ! ${BASEDIR}/bin/arcs2segs.sh ${DATADIR}/queue/ \ ${DATADIR}/segments ${COLLECTION_NAME} ${arcs_per_segment} } --- 144,148 ---- return fi ! ${BASEDIR}/bin/arcs2segs.sh ${level} ${DATADIR}/queue/ \ ${DATADIR}/segments ${COLLECTION_NAME} ${arcs_per_segment} } *************** *** 198,204 **** noop= expert= arcname_filter="*.arc.gz" arcs_per_segment=100 ! while getopts "hnte:m:s:d:c:f:a:" opt do if [ "$opt" = "?" ] --- 199,206 ---- noop= expert= + level="info" arcname_filter="*.arc.gz" arcs_per_segment=100 ! while getopts "hnte:m:s:d:c:f:a:l:" opt do if [ "$opt" = "?" ] *************** *** 216,224 **** 's') ARCSDIR=${OPTARG} ! if [ ! -e ${arcsdir} ] then echo "ERROR: ${arcsdir} does not exist." usage fi ;; 'd') --- 218,230 ---- 's') ARCSDIR=${OPTARG} ! if [ ! -e ${ARCSDIR} ] then echo "ERROR: ${arcsdir} does not exist." usage fi + if [ `dirname ${ARCSDIR}` = '.' ] + then + ARCSDIR=`pwd`/`basename ${ARCSDIR}` + fi ;; 'd') *************** *** 229,232 **** --- 235,242 ---- usage fi + if [ `dirname ${DATADIR}` = '.' ] + then + DATADIR=`pwd`/`basename ${DATADIR}` + fi ;; 'c') *************** *** 249,252 **** --- 259,266 ---- arcs_per_segment=${OPTARG} ;; + 'l') + # Java logging level. + level=${OPTARG} + ;; *) usage |