From: <ku...@us...> - 2009-02-05 15:06:42
|
Revision: 325 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=325&view=rev Author: kurtjx Date: 2009-02-05 15:06:38 +0000 (Thu, 05 Feb 2009) Log Message: ----------- some additional error handling for fails on importRDFFile and an arguement to restart mid directory Modified Paths: -------------- graphRDF/branches/old2sparul/old2sparul.py Modified: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py 2009-02-04 17:20:12 UTC (rev 324) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-05 15:06:38 UTC (rev 325) @@ -40,10 +40,24 @@ def __init__(self, msg): self.msg = msg +def tryImportRDF(filename, attempt): + if attempt < 5: + debug("importing rdf") + try: + mi = mopy.importRDFFile(filename) + except urllib2.URLError: + debug("URLError importing RDF, retrying") + sleep(1.0) + attempt+=1 + tryImportRDF(filename, attempt) + return mi + debug("import failed after tries: " + str(attempt)) + return None + def parseRDF(filename, base): '''parse the rdf and return a sparql update query''' sparqlU='' - mi = mopy.importRDFFile(base+filename) + mi = tryImportRDF(base+filename, 0) keys = mi.PersonIdx.keys() for key in keys: person = mi.PersonIdx[key] @@ -156,12 +170,13 @@ argv = sys.argv try: try: - opts, args = getopt.getopt(argv[1:], "ho:b:v", ["help", "output=","base="]) + opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="]) except getopt.error, msg: raise Usage(msg) # option processing base = None + start = None for option, value in opts: if option == "-v": verbose = True @@ -171,6 +186,8 @@ output = value if option in ("-b", "--base"): base = value + if option in ("-s", "--start"): + start = value '''if option in ("-g", '--graph'): defaultGraph = value insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""''' @@ -186,7 +203,14 @@ fileList = getFileListing(folder) debug('got list of files') #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] - for f in fileList: + startIndex=0 + if start: + try: + startIndex=fileList.index(start) + except: + debug("not a valid start file, not in list") + + for f in fileList[startIndex:]: debug('parsing on file: '+str(f)) #parse each file and do a sparql update to the repository sparul = parseRDF(f, base) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |