From: <ku...@us...> - 2009-02-03 20:55:05
|
Revision: 322 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=322&view=rev Author: kurtjx Date: 2009-02-03 20:55:00 +0000 (Tue, 03 Feb 2009) Log Message: ----------- lil python script for importing old myrdfspace data into 3 store, need to add a function to break long queries in two Added Paths: ----------- graphRDF/branches/old2sparul/old2sparul.py Added: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py (rev 0) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-03 20:55:00 UTC (rev 322) @@ -0,0 +1,216 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +old2sparul.py + +Created by Kurtis Random on 2009-02-03. +Copyright (c) 2009 __MyCompanyName__. All rights reserved. +""" + +import sys +import getopt +from logging import log, error, warning, info, debug +import logging +import ftplib +#from SPARQLWrapper import SPARQLWrapper +import SPARQLWrapper +import mopy +import urllib2 +from time import sleep + +help_message = ''' +take old myrdfspace files and add to the sparql endpoint... + -b --base <uri base from myrdfspace> +''' +failedList = [] +badQueryList = [] + +defaultGraph = "http://dbtune.org/myspace-test" +sparqlEndPoint = "http://dbtune.org/cmn/sparql" +myspaceBase = "http://dbtune.org/myspace/uid" +myspaceOnt = "http://purl.org/ontology/myspace" +prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>""" + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + +def parseRDF(filename, base): + '''parse the rdf and return a sparql update query''' + sparqlU = prefixes+""" \ninsert into graph <"""+defaultGraph+"""> {""" + mi = mopy.importRDFFile(base+filename) + keys = mi.PersonIdx.keys() + for key in keys: + person = mi.PersonIdx[key] + if person.name: + # if we find the name, this is the main subject + suid = person.URI.split(base)[1] + subject = "<"+myspaceBase+"/"+suid+">" + name = person.name.pop() + sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . ' + + # get all the top friends + while(1): + try: + p = person.knows.pop() + ouid = p.URI.split(base)[1] + obj = "<"+myspaceBase+"/"+ouid+">" + sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . ' + except: + break + + while(1): + try: + thm = person.theme.pop() + genre = "<"+myspaceOnt + "#"+urllib2.quote(thm.URI.split(base)[1])+">" + sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . ' + except: + break + + try: + playcount = person.tipjar.pop().URI.split(base)[1] + sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . ' + except: + pass + + sparqlU=sparqlU+'}' + return sparqlU + +def setLogger(): + '''just set the logger''' + loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s', + "datefmt":'%d.%m.%y %H:%M:%S', + "level": logging.DEBUG, + #"filename":logPath + "musicGrabber.log", + "filemode":"w"} + logging.basicConfig(**loggingConfig) + +def getFileListing(rdfFolder): + '''return a list of all the rdf files found w/ given base''' + rdfFolder = rdfFolder.rstrip('/') + rdfFolder = rdfFolder+'/' + ftp = ftplib.FTP("myrdfspace.com") + ftp.login("myrdf", "my1stRDF") + ftp.cwd("myrdfspace.com/"+rdfFolder) + vList = ftp.nlst() + return vList + +def trySparql(sparql, attempt, f): + try: + debug('attempting sparql update, try #' + str(attempt)) + sparql.setReturnFormat(SPARQLWrapper.TURTLE) + ret = sparql.query() + print ret.convert() + except urllib2.HTTPError: + debug('caught an http error, retrying...') + if attempt<5: + attempt+=1 + sleep(2) + trySparql(sparql, attempt, f) + else: + error("more that 5 http errors, giving up") + failedList.append(f) + except SPARQLWrapper.sparqlexceptions.QueryBadFormed: + error("query failed for "+ str(f)) + debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$') + badQueryList.append(f) + failedList.append(f) + except: + error("query failed for "+ str(f)) + debug('************UPDATE FAILED***********') + failedList.append(f) + error("Unexpected error:", sys.exc_info()[0]) + +def splitQuery(query): + '''sometime the query is too long and should be broke in two pieces''' + pass + +def main(argv=None): + if argv is None: + argv = sys.argv + try: + try: + opts, args = getopt.getopt(argv[1:], "ho:b:v", ["help", "output=","base="]) + except getopt.error, msg: + raise Usage(msg) + + # option processing + base = None + for option, value in opts: + if option == "-v": + verbose = True + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-o", "--output"): + output = value + if option in ("-b", "--base"): + base = value + + setLogger() + if base == None: + raise Usage(help_message) + return 2 + # parse base uri + folder = base.split("http://myrdfspace.com/")[1] + debug('getting list of files') + #fileList = getFileListing(folder) + debug('got list of files') + fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] + for f in fileList: + debug('parsing on file: '+str(f)) + #parse each file and do a sparql update to the repository + sparul = parseRDF(f, base) + sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint) + sparql.addDefaultGraph(defaultGraph) + sparql.setQuery(sparul) + trySparql(sparql, 0, f) + '''try: + debug('attempting sparql update') + sparql.setReturnFormat(SPARQLWrapper.TURTLE) + ret = sparql.query() + print ret.convert() + except urllib2.HTTPError: + debug('caught an http error, retrying...') + try: + ret = sparql.query() + print ret.convert() + except urllib2.HTTPError: + debug('second http error...') + try: + ret = sparql.query() + print ret.convert() + except: + print "query failed for "+ str(f) + debug('************UPDATE FAILED***********') + failedList.append(f) + print "FINAL error:", sys.exc_info()[0] + except: + print "query failed for "+ str(f) + debug('************UPDATE FAILED***********') + failedList.append(f) + print "Unexpected error:", sys.exc_info()[0] + except SPARQLWrapper.sparqlexceptions.QueryBadFormed: + debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$') + badQueryList.append(f) + except: + print "query failed for "+ str(f) + debug('************UPDATE FAILED***********') + failedList.append(f) + print "Unexpected error:", sys.exc_info()[0]''' + + + debug("Complete!!!") + print "\n\nREPORT:\n\tfailures: "+str(len(failedList)) + print "\nfails: " + print failedList + print "\n\nbad queries: " + print badQueryList + + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, "\t for help use --help" + return 2 + + +if __name__ == "__main__": + sys.exit(main()) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-02-04 15:18:11
|
Revision: 323 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=323&view=rev Author: kurtjx Date: 2009-02-04 15:18:08 +0000 (Wed, 04 Feb 2009) Log Message: ----------- splits big queries down now Modified Paths: -------------- graphRDF/branches/old2sparul/old2sparul.py Modified: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py 2009-02-03 20:55:00 UTC (rev 322) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-04 15:18:08 UTC (rev 323) @@ -22,22 +22,27 @@ take old myrdfspace files and add to the sparql endpoint... -b --base <uri base from myrdfspace> ''' + failedList = [] badQueryList = [] -defaultGraph = "http://dbtune.org/myspace-test" +defaultGraph = "http://dbtune.org/myspace-fj-2008p" sparqlEndPoint = "http://dbtune.org/cmn/sparql" myspaceBase = "http://dbtune.org/myspace/uid" myspaceOnt = "http://purl.org/ontology/myspace" prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>""" +insert = """ \ninsert into graph <"""+defaultGraph+"""> {""" + +apacheLimit = 2000 + class Usage(Exception): def __init__(self, msg): self.msg = msg def parseRDF(filename, base): '''parse the rdf and return a sparql update query''' - sparqlU = prefixes+""" \ninsert into graph <"""+defaultGraph+"""> {""" + sparqlU='' mi = mopy.importRDFFile(base+filename) keys = mi.PersonIdx.keys() for key in keys: @@ -99,8 +104,7 @@ try: debug('attempting sparql update, try #' + str(attempt)) sparql.setReturnFormat(SPARQLWrapper.TURTLE) - ret = sparql.query() - print ret.convert() + ret = sparql.query().convert() except urllib2.HTTPError: debug('caught an http error, retrying...') if attempt<5: @@ -113,17 +117,36 @@ except SPARQLWrapper.sparqlexceptions.QueryBadFormed: error("query failed for "+ str(f)) debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$') + print sparql.queryString badQueryList.append(f) failedList.append(f) except: error("query failed for "+ str(f)) debug('************UPDATE FAILED***********') failedList.append(f) - error("Unexpected error:", sys.exc_info()[0]) + print "Unexpected error:", sys.exc_info()[0] + print sparql.queryString + else: + print ret + return ret + return None def splitQuery(query): '''sometime the query is too long and should be broke in two pieces''' - pass + lines = query.splitlines(1) + splits = [] + split = "" + count = 0 + for line in lines: + if count < apacheLimit: + split = split+line + count+=len(line) + else: + splits.append(insert+split+'}') + split= line + count = 0 + splits.append(insert+split) + return splits def main(argv=None): if argv is None: @@ -145,6 +168,10 @@ output = value if option in ("-b", "--base"): base = value + '''if option in ("-g", '--graph'): + defaultGraph = value + insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""''' + setLogger() if base == None: @@ -153,50 +180,27 @@ # parse base uri folder = base.split("http://myrdfspace.com/")[1] debug('getting list of files') - #fileList = getFileListing(folder) + fileList = getFileListing(folder) debug('got list of files') - fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] + #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] for f in fileList: debug('parsing on file: '+str(f)) #parse each file and do a sparql update to the repository sparul = parseRDF(f, base) sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint) sparql.addDefaultGraph(defaultGraph) - sparql.setQuery(sparul) - trySparql(sparql, 0, f) - '''try: - debug('attempting sparql update') - sparql.setReturnFormat(SPARQLWrapper.TURTLE) - ret = sparql.query() - print ret.convert() - except urllib2.HTTPError: - debug('caught an http error, retrying...') - try: - ret = sparql.query() - print ret.convert() - except urllib2.HTTPError: - debug('second http error...') - try: - ret = sparql.query() - print ret.convert() - except: - print "query failed for "+ str(f) - debug('************UPDATE FAILED***********') - failedList.append(f) - print "FINAL error:", sys.exc_info()[0] - except: - print "query failed for "+ str(f) - debug('************UPDATE FAILED***********') - failedList.append(f) - print "Unexpected error:", sys.exc_info()[0] - except SPARQLWrapper.sparqlexceptions.QueryBadFormed: - debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$') - badQueryList.append(f) - except: - print "query failed for "+ str(f) - debug('************UPDATE FAILED***********') - failedList.append(f) - print "Unexpected error:", sys.exc_info()[0]''' + + # we have to deal w/ queries that are too long + if len(sparul) > apacheLimit: + debug('query too long, splitting...') + splitSparul = splitQuery(sparul) + for split in splitSparul: + sparql.setQuery(prefixes+split) + trySparql(sparql, 0, f) + else: + sparql.setQuery(prefixes+insert+sparul) + trySparql(sparql, 0, f) + debug("Complete!!!") This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-02-04 17:20:16
|
Revision: 324 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=324&view=rev Author: kurtjx Date: 2009-02-04 17:20:12 +0000 (Wed, 04 Feb 2009) Log Message: ----------- old2sparul working properly :-) Modified Paths: -------------- graphRDF/branches/old2sparul/old2sparul.py Modified: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py 2009-02-04 15:18:08 UTC (rev 323) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-04 17:20:12 UTC (rev 324) @@ -26,11 +26,11 @@ failedList = [] badQueryList = [] -defaultGraph = "http://dbtune.org/myspace-fj-2008p" +defaultGraph = "http://dbtune.org/myspace-fj-set-2008" sparqlEndPoint = "http://dbtune.org/cmn/sparql" myspaceBase = "http://dbtune.org/myspace/uid" myspaceOnt = "http://purl.org/ontology/myspace" -prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>""" +prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>""" insert = """ \ninsert into graph <"""+defaultGraph+"""> {""" @@ -52,6 +52,8 @@ suid = person.URI.split(base)[1] subject = "<"+myspaceBase+"/"+suid+">" name = person.name.pop() + sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .' + sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .' sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . ' # get all the top friends @@ -61,6 +63,7 @@ ouid = p.URI.split(base)[1] obj = "<"+myspaceBase+"/"+ouid+">" sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . ' + sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .' except: break This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-02-05 15:06:42
|
Revision: 325 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=325&view=rev Author: kurtjx Date: 2009-02-05 15:06:38 +0000 (Thu, 05 Feb 2009) Log Message: ----------- some additional error handling for fails on importRDFFile and an arguement to restart mid directory Modified Paths: -------------- graphRDF/branches/old2sparul/old2sparul.py Modified: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py 2009-02-04 17:20:12 UTC (rev 324) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-05 15:06:38 UTC (rev 325) @@ -40,10 +40,24 @@ def __init__(self, msg): self.msg = msg +def tryImportRDF(filename, attempt): + if attempt < 5: + debug("importing rdf") + try: + mi = mopy.importRDFFile(filename) + except urllib2.URLError: + debug("URLError importing RDF, retrying") + sleep(1.0) + attempt+=1 + tryImportRDF(filename, attempt) + return mi + debug("import failed after tries: " + str(attempt)) + return None + def parseRDF(filename, base): '''parse the rdf and return a sparql update query''' sparqlU='' - mi = mopy.importRDFFile(base+filename) + mi = tryImportRDF(base+filename, 0) keys = mi.PersonIdx.keys() for key in keys: person = mi.PersonIdx[key] @@ -156,12 +170,13 @@ argv = sys.argv try: try: - opts, args = getopt.getopt(argv[1:], "ho:b:v", ["help", "output=","base="]) + opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="]) except getopt.error, msg: raise Usage(msg) # option processing base = None + start = None for option, value in opts: if option == "-v": verbose = True @@ -171,6 +186,8 @@ output = value if option in ("-b", "--base"): base = value + if option in ("-s", "--start"): + start = value '''if option in ("-g", '--graph'): defaultGraph = value insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""''' @@ -186,7 +203,14 @@ fileList = getFileListing(folder) debug('got list of files') #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] - for f in fileList: + startIndex=0 + if start: + try: + startIndex=fileList.index(start) + except: + debug("not a valid start file, not in list") + + for f in fileList[startIndex:]: debug('parsing on file: '+str(f)) #parse each file and do a sparql update to the repository sparul = parseRDF(f, base) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-02-18 12:56:54
|
Revision: 327 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=327&view=rev Author: kurtjx Date: 2009-02-18 12:56:50 +0000 (Wed, 18 Feb 2009) Log Message: ----------- added some regex stuff to get rid of bad genre tags, sometime 1324123.rdf was set as a theme which was a bug in the old code i guess Modified Paths: -------------- graphRDF/branches/old2sparul/old2sparul.py Modified: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py 2009-02-18 12:51:29 UTC (rev 326) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-02-18 12:56:50 UTC (rev 327) @@ -3,8 +3,10 @@ """ old2sparul.py +This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint + Created by Kurtis Random on 2009-02-03. -Copyright (c) 2009 __MyCompanyName__. All rights reserved. +Copyright (c) 2009 C4DM QMUL. All rights reserved. """ import sys @@ -12,21 +14,22 @@ from logging import log, error, warning, info, debug import logging import ftplib -#from SPARQLWrapper import SPARQLWrapper import SPARQLWrapper import mopy import urllib2 +import re from time import sleep help_message = ''' take old myrdfspace files and add to the sparql endpoint... -b --base <uri base from myrdfspace> + -s --start <uid to start from> useful after a crash ;-) ''' failedList = [] badQueryList = [] -defaultGraph = "http://dbtune.org/myspace-fj-set-2008" +defaultGraph = "http://dbtune.org/myspace-fj-2008" sparqlEndPoint = "http://dbtune.org/cmn/sparql" myspaceBase = "http://dbtune.org/myspace/uid" myspaceOnt = "http://purl.org/ontology/myspace" @@ -50,7 +53,8 @@ sleep(1.0) attempt+=1 tryImportRDF(filename, attempt) - return mi + else: + return mi debug("import failed after tries: " + str(attempt)) return None @@ -58,45 +62,55 @@ '''parse the rdf and return a sparql update query''' sparqlU='' mi = tryImportRDF(base+filename, 0) - keys = mi.PersonIdx.keys() - for key in keys: - person = mi.PersonIdx[key] - if person.name: - # if we find the name, this is the main subject - suid = person.URI.split(base)[1] - subject = "<"+myspaceBase+"/"+suid+">" - name = person.name.pop() - sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .' - sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .' - sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . ' + if mi: + keys = mi.PersonIdx.keys() + for key in keys: + person = mi.PersonIdx[key] + if person.name: + # if we find the name, this is the main subject + suid = person.URI.split(base)[1] + subject = "<"+myspaceBase+"/"+suid+">" + name = person.name.pop() + sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .' + sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .' + sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . ' - # get all the top friends - while(1): - try: - p = person.knows.pop() - ouid = p.URI.split(base)[1] - obj = "<"+myspaceBase+"/"+ouid+">" - sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . ' - sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .' - except: - break + # get all the top friends + while(1): + try: + p = person.knows.pop() + except: + break + else: + ouid = p.URI.split(base)[1] + obj = "<"+myspaceBase+"/"+ouid+">" + sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . ' + sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .' - while(1): + while(1): + try: + thm = person.theme.pop() + except: + debug("breaking from genre pops") + break + else: + thm = thm.URI.split(base)[1] + # do some cleaning, bad genres in there like 35123543.rdf instead of hip hop + if not re.match(".*\.rdf",thm): + debug("adding genre: "+thm) + genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">" + sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . ' + try: - thm = person.theme.pop() - genre = "<"+myspaceOnt + "#"+urllib2.quote(thm.URI.split(base)[1])+">" - sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . ' + playcount = person.tipjar.pop().URI.split(base)[1] + sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . ' except: - break - - try: - playcount = person.tipjar.pop().URI.split(base)[1] - sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . ' - except: - pass + pass - sparqlU=sparqlU+'}' - return sparqlU + sparqlU=sparqlU+'}' + return sparqlU + else: + return None def setLogger(): '''just set the logger''' @@ -216,17 +230,20 @@ sparul = parseRDF(f, base) sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint) sparql.addDefaultGraph(defaultGraph) - - # we have to deal w/ queries that are too long - if len(sparul) > apacheLimit: - debug('query too long, splitting...') - splitSparul = splitQuery(sparul) - for split in splitSparul: - sparql.setQuery(prefixes+split) + if sparul: + # we have to deal w/ queries that are too long + if len(sparul) > apacheLimit: + debug('query too long, splitting...') + splitSparul = splitQuery(sparul) + for split in splitSparul: + sparql.setQuery(prefixes+split) + trySparql(sparql, 0, f) + else: + sparql.setQuery(prefixes+insert+sparul) trySparql(sparql, 0, f) else: - sparql.setQuery(prefixes+insert+sparul) - trySparql(sparql, 0, f) + debug('failure on '+str(f)) + failedList.append(f) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |