Duy Dinh - 2012-03-06
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#!/bin/bash
# Build a set of jobs submitted to OSIRIM.
# The objective of each job is to run indexing, retrieval and evaluation.
# Input: the directory containing one or many sub directories
# Duy Dinh, IRIT - University of Toulouse, March. 2012

# Example: 
# sh irtoolkit.sh -d directory -t topicsFile -q qrelsFile -j jobDir -l subDirLevel

args=$* # program arguments except $0

inputDir="." # input directory
topicsFile="" # topics file
qrelsFile="" # relevance judgement file

jobDir="`pwd`/jobs" # job directory containing
# a template job file usually a shell script
jobTemplate="$jobDir/$0"
level=0 # start to process at a particular level of input directory

loopID=0 # loop ID for making job ID
loopSize=1 # number of nested loops per job (e.g., the number of voting algorithms used or the number of terminologies used, etc.)
jobID=0 # job identifier

usage(){
    echo "Usage:"
    echo "    \$sh irtoolkit.sh -d inputDir -t topicsFile -q qrelsFile [-j jobDir] [-l subDirLevel] [-L loopID] [-s loopSize]"
    echo "Example:"
    echo "    \$sh irtoolkit.sh -d ./ -t topics.txt -q qrels.txt"
}

# function: create job
# make a new job that will be submitted to OSIRIM
# @param    $1 : collection name of directory containing a hiearchical structure for a test collection (i.e., /collection, /index, /results), each of which must be indexed separately
#           $2 : job identifier
#
CREATE_JOB(){
    local collection=$1
    id=$2

    # job template
    shortname=$(basename "$jobDir/$0") # short filename
    extension=${shortname##*.} # file extension
    name=${shortname%.*}    # file name

    newTemplate="${jobDir}/${name}-${id}.${extension}"

    echo "cp $jobTemplate $newTemplate"
    cp $jobTemplate $newTemplate

    # modify patterns in template
    # JOB_DIRECTORY
    # replace '/' by '\/' 
    tmp=$(echo $jobDir|sed 's/\//\\\//g')
    sed "s/JOB_DIRECTORY/$tmp/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # COLLECTION_PATH
    # replace '/' by '\/' 
    tmp=$(echo "$collection/collection"|sed 's/\//\\\//g')
    sed "s/COLLECTION_PATH/$tmp/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # INDEX_PATH
    # replace '/' by '\/' 
    tmp=$(echo "$collection/index"|sed 's/\//\\\//g')
    sed "s/INDEX_PATH/$tmp/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # TOPICS_PATH  
    # replace '/' by '\/' 
    tmp=$(echo "$topicsFile"|sed 's/\//\\\//g')
    sed "s/TOPICS_PATH/$tmp/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # QRELS_PATH  
    # replace '/' by '\/' 
    tmp=$(echo "$qrelsFile"|sed 's/\//\\\//g')
    sed "s/QRELS_PATH/$tmp/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # RESULTS_PATH  
    # replace '/' by '\/' 
    tmp=$(echo "$collection/results"|sed 's/\//\\\//g')
    sed "s/RESULTS_PATH/$tmp/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # EXPANSION_TERMS
    sed "s/EXPANSION_TERMS/$xterms/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # EXPANSION_DOCUMENTS
    sed "s/EXPANSION_DOCUMENTS/$xdocs/g"  $newTemplate > $newTemplate.tmp
    mv  $newTemplate.tmp $newTemplate

    # wait a litle bit before submitting job
    sleep 1 
    echo "Submitting job to OSIRIM: $newTemplate"
    /opt/pbs/bin/qsub $newTemplate
}

# process a test collection
process_collection()
{
    local c=$1
    if test -d "$c"  # entry is a directory
    then
        echo
        echo "*** Creating new job for processing collection '$c' [JobID: $jobID]"

        CREATE_JOB $c "$jobID"
        # next collection
        (( jobID=jobID + 1 ))
    fi
}

# Collect sub folders containing test collections
# Build a job for each collection, then submit job to OSIRIM
process_directory()
{
    if !(test -d "$1")  then 
        echo "'$1' is not a valid directory!";
        return;
    fi

    echo "Listing subdirectories of directory $input"

    for subDir in `find "$inputDir"  -mindepth $level  -maxdepth $level  -type d`
    do
        if test -d $subDir
        then
            printf "\n[%s] %s\n" "$jobID" "Processing subdirectory '$subDir'"
            process_collection "$subDir"
        fi
    done 
}

make_absolute(){
    local file=$1 # file name
    local path=$2 # default path
    # get first character in file name
    local c=`echo $file | awk '{ print substr( $0, 0, 1 ) }'`

    absoluteFile=$file
    if (test "$c" != "/") 
    then
        absoluteFile="$path/$file" # file name
    fi
#   echo "RETURN $absoluteFile"
}

# main procedure
main()
{
    if (test $# -eq 0)
    then    
        usage
        exit
    fi

#   inputDir=$1
#   topicsFile=$2
#   qrelsFile=$3
#   jobDir=$4

    # parse options
    while getopts ":d:D:t:T:q:j:hl:L:s:" args; do

    case $args in
        d)
            inputDir=$OPTARG
            if !(test -d "$inputDir")
            then
                echo "ERROR: Input directory '$inputDir' does not exist. Please make sure input directory is valid!"
                usage
                exit
            fi
            ;;      
        j) 
            jobDir=$OPTARG # job template file
            if !(test -d "$jobDir")
            then
                echo "ERROR: Job template directory does not exist. Please make sure that the template file is valid!"
                usage
                exit
            else
                if !(test -e "$jobDir/$0")
                then
                    echo "ERROR: A job template titled '$0' under $jobDir directory is required to submit jobs"
                    exit
                fi
            fi

            jobTemplate="$jobDir/$0"
            echo "Job template file: $jobTemplate";;
        h)
            usage
            exit;;
        l)
            level=$OPTARG
            if test $level -lt "0"
            then
                level="0"
            fi

            if test $level -lt "0" 
            then
                echo "ERROR: Level could not be negative"
                exit            
            fi
            ;;
        L)
            loopID=$OPTARG;;
        s) 
            loopSize=$OPTARG;;
        t)
            topicsFile=$OPTARG;;
        q)
            qrelsFile=$OPTARG;;
        D)
            xdocs=$OPTARG;; 
        T)
            xterms=$OPTARG;;
        ?) # unknown option
            echo "Invalid option: -$OPTARG"
            exit;;
    esac
    done

    (( jobID=loopSize * loopID )) # initialize jobID to loopSize * loopID

    # make input absolute
    make_absolute $inputDir `pwd`
    inputDir=$absoluteFile
    echo "*** Input directory: $inputDir"
    # browse input directory    
    process_directory $inputDir 
}

# call main procedure
main $args

#sleep 1
#/opt/pbs/bin/qstat
 

Last edit: Duy Dinh 2012-03-11