|
From: <ku...@us...> - 2009-02-05 15:06:42
|
Revision: 325
http://mypyspace.svn.sourceforge.net/mypyspace/?rev=325&view=rev
Author: kurtjx
Date: 2009-02-05 15:06:38 +0000 (Thu, 05 Feb 2009)
Log Message:
-----------
some additional error handling for fails on importRDFFile and an arguement to restart mid directory
Modified Paths:
--------------
graphRDF/branches/old2sparul/old2sparul.py
Modified: graphRDF/branches/old2sparul/old2sparul.py
===================================================================
--- graphRDF/branches/old2sparul/old2sparul.py 2009-02-04 17:20:12 UTC (rev 324)
+++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-05 15:06:38 UTC (rev 325)
@@ -40,10 +40,24 @@
def __init__(self, msg):
self.msg = msg
+def tryImportRDF(filename, attempt):
+ if attempt < 5:
+ debug("importing rdf")
+ try:
+ mi = mopy.importRDFFile(filename)
+ except urllib2.URLError:
+ debug("URLError importing RDF, retrying")
+ sleep(1.0)
+ attempt+=1
+ tryImportRDF(filename, attempt)
+ return mi
+ debug("import failed after tries: " + str(attempt))
+ return None
+
def parseRDF(filename, base):
'''parse the rdf and return a sparql update query'''
sparqlU=''
- mi = mopy.importRDFFile(base+filename)
+ mi = tryImportRDF(base+filename, 0)
keys = mi.PersonIdx.keys()
for key in keys:
person = mi.PersonIdx[key]
@@ -156,12 +170,13 @@
argv = sys.argv
try:
try:
- opts, args = getopt.getopt(argv[1:], "ho:b:v", ["help", "output=","base="])
+ opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="])
except getopt.error, msg:
raise Usage(msg)
# option processing
base = None
+ start = None
for option, value in opts:
if option == "-v":
verbose = True
@@ -171,6 +186,8 @@
output = value
if option in ("-b", "--base"):
base = value
+ if option in ("-s", "--start"):
+ start = value
'''if option in ("-g", '--graph'):
defaultGraph = value
insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""'''
@@ -186,7 +203,14 @@
fileList = getFileListing(folder)
debug('got list of files')
#fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf']
- for f in fileList:
+ startIndex=0
+ if start:
+ try:
+ startIndex=fileList.index(start)
+ except:
+ debug("not a valid start file, not in list")
+
+ for f in fileList[startIndex:]:
debug('parsing on file: '+str(f))
#parse each file and do a sparql update to the repository
sparul = parseRDF(f, base)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|