#!/bin/bash# Build a set of jobs submitted to OSIRIM.# The objective of each job is to run indexing, retrieval and evaluation.# Input: the directory containing one or many sub directories# Duy Dinh, IRIT - University of Toulouse, March. 2012# Example: # sh irtoolkit.sh -d directory -t topicsFile -q qrelsFile -j jobDir -l subDirLevelargs=$*# program arguments except $0inputDir="."# input directorytopicsFile=""# topics fileqrelsFile=""# relevance judgement filejobDir="`pwd`/jobs"# job directory containing# a template job file usually a shell scriptjobTemplate="$jobDir/$0"level=0# start to process at a particular level of input directoryloopID=0# loop ID for making job IDloopSize=1# number of nested loops per job (e.g., the number of voting algorithms used or the number of terminologies used, etc.)jobID=0# job identifier
usage(){echo"Usage:"echo" \$sh irtoolkit.sh -d inputDir -t topicsFile -q qrelsFile [-j jobDir] [-l subDirLevel] [-L loopID] [-s loopSize]"echo"Example:"echo" \$sh irtoolkit.sh -d ./ -t topics.txt -q qrels.txt"}# function: create job# make a new job that will be submitted to OSIRIM# @param $1 : collection name of directory containing a hiearchical structure for a test collection (i.e., /collection, /index, /results), each of which must be indexed separately# $2 : job identifier#
CREATE_JOB(){localcollection=$1id=$2# job templateshortname=$(basename "$jobDir/$0")# short filenameextension=${shortname##*.}# file extensionname=${shortname%.*}# file namenewTemplate="${jobDir}/${name}-${id}.${extension}"echo"cp $jobTemplate$newTemplate"
cp $jobTemplate$newTemplate# modify patterns in template# JOB_DIRECTORY# replace '/' by '\/' tmp=$(echo$jobDir|sed 's/\//\\\//g')
sed "s/JOB_DIRECTORY/$tmp/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# COLLECTION_PATH# replace '/' by '\/' tmp=$(echo"$collection/collection"|sed 's/\//\\\//g')
sed "s/COLLECTION_PATH/$tmp/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# INDEX_PATH# replace '/' by '\/' tmp=$(echo"$collection/index"|sed 's/\//\\\//g')
sed "s/INDEX_PATH/$tmp/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# TOPICS_PATH # replace '/' by '\/' tmp=$(echo"$topicsFile"|sed 's/\//\\\//g')
sed "s/TOPICS_PATH/$tmp/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# QRELS_PATH # replace '/' by '\/' tmp=$(echo"$qrelsFile"|sed 's/\//\\\//g')
sed "s/QRELS_PATH/$tmp/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# RESULTS_PATH # replace '/' by '\/' tmp=$(echo"$collection/results"|sed 's/\//\\\//g')
sed "s/RESULTS_PATH/$tmp/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# EXPANSION_TERMS
sed "s/EXPANSION_TERMS/$xterms/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# EXPANSION_DOCUMENTS
sed "s/EXPANSION_DOCUMENTS/$xdocs/g"$newTemplate > $newTemplate.tmp
mv $newTemplate.tmp $newTemplate# wait a litle bit before submitting job
sleep 1echo"Submitting job to OSIRIM: $newTemplate"
/opt/pbs/bin/qsub $newTemplate}# process a test collection
process_collection(){localc=$1iftest -d "$c"# entry is a directorythenechoecho"*** Creating new job for processing collection '$c' [JobID: $jobID]"
CREATE_JOB $c"$jobID"# next collection((jobID=jobID + 1))fi}# Collect sub folders containing test collections# Build a job for each collection, then submit job to OSIRIM
process_directory(){if !(test -d "$1")thenecho"'$1' is not a valid directory!";return;fiecho"Listing subdirectories of directory $input"for subDir in `find "$inputDir" -mindepth $level -maxdepth $level -type d`doiftest -d $subDirthenprintf"\n[%s] %s\n""$jobID""Processing subdirectory '$subDir'"
process_collection "$subDir"fidone}
make_absolute(){localfile=$1# file namelocalpath=$2# default path# get first character in file namelocalc=`echo$file| awk '{ print substr( $0, 0, 1 ) }'`absoluteFile=$fileif(test"$c" !="/")thenabsoluteFile="$path/$file"# file namefi# echo "RETURN $absoluteFile"}# main procedure
main(){if(test$# -eq 0)then
usage
exitfi# inputDir=$1# topicsFile=$2# qrelsFile=$3# jobDir=$4# parse optionswhilegetopts":d:D:t:T:q:j:hl:L:s:" args;docase$args in
d)inputDir=$OPTARGif !(test -d "$inputDir")thenecho"ERROR: Input directory '$inputDir' does not exist. Please make sure input directory is valid!"
usage
exitfi;;
j)jobDir=$OPTARG# job template fileif !(test -d "$jobDir")thenecho"ERROR: Job template directory does not exist. Please make sure that the template file is valid!"
usage
exitelseif !(test -e "$jobDir/$0")thenecho"ERROR: A job template titled '$0' under $jobDir directory is required to submit jobs"exitfifijobTemplate="$jobDir/$0"echo"Job template file: $jobTemplate";;
h)
usage
exit;;
l)level=$OPTARGiftest$level -lt "0"thenlevel="0"fiiftest$level -lt "0"thenecho"ERROR: Level could not be negative"exitfi;;
L)loopID=$OPTARG;;
s)loopSize=$OPTARG;;
t)topicsFile=$OPTARG;;
q)qrelsFile=$OPTARG;;
D)xdocs=$OPTARG;;
T)xterms=$OPTARG;;
?)# unknown optionecho"Invalid option: -$OPTARG"
exit;;esacdone((jobID=loopSize * loopID ))# initialize jobID to loopSize * loopID# make input absolute
make_absolute $inputDir`pwd`inputDir=$absoluteFileecho"*** Input directory: $inputDir"# browse input directory
process_directory $inputDir}# call main procedure
main $args#sleep 1#/opt/pbs/bin/qstat
Last edit: Duy Dinh 2012-03-11
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Last edit: Duy Dinh 2012-03-11