|
From: <ku...@us...> - 2009-06-22 14:08:14
|
Revision: 354
http://mypyspace.svn.sourceforge.net/mypyspace/?rev=354&view=rev
Author: kurtjx
Date: 2009-06-22 14:08:10 +0000 (Mon, 22 Jun 2009)
Log Message:
-----------
some fixing of the old graphs - adding data to sparql point
Added Paths:
-----------
graphRDF/branches/old2sparul/src/
graphRDF/branches/old2sparul/src/addTotalFriends.py
graphRDF/branches/old2sparul/src/old2sparul.py
Removed Paths:
-------------
graphRDF/branches/old2sparul/old2sparul.py
Deleted: graphRDF/branches/old2sparul/old2sparul.py
===================================================================
--- graphRDF/branches/old2sparul/old2sparul.py 2009-05-27 21:19:59 UTC (rev 353)
+++ graphRDF/branches/old2sparul/old2sparul.py 2009-06-22 14:08:10 UTC (rev 354)
@@ -1,264 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-"""
-old2sparul.py
-
-This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint
-
-Created by Kurtis Random on 2009-02-03.
-Copyright (c) 2009 C4DM QMUL. All rights reserved.
-"""
-
-import sys
-import getopt
-from logging import log, error, warning, info, debug
-import logging
-import ftplib
-import SPARQLWrapper
-import mopy
-import urllib2
-import re
-from time import sleep
-
-help_message = '''
-take old myrdfspace files and add to the sparql endpoint...
- -b --base <uri base from myrdfspace>
- -s --start <uid to start from> useful after a crash ;-)
-'''
-
-failedList = []
-badQueryList = []
-
-defaultGraph = "http://dbtune.org/myspace-fj-2008"
-sparqlEndPoint = "http://dbtune.org/cmn/sparql"
-myspaceBase = "http://dbtune.org/myspace/uid"
-myspaceOnt = "http://purl.org/ontology/myspace"
-prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"""
-
-insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""
-
-apacheLimit = 2000
-
-class Usage(Exception):
- def __init__(self, msg):
- self.msg = msg
-
-def tryImportRDF(filename, attempt):
- if attempt < 5:
- debug("importing rdf")
- try:
- mi = mopy.importRDFFile(filename)
- except urllib2.URLError:
- debug("URLError importing RDF, retrying")
- sleep(1.0)
- attempt+=1
- tryImportRDF(filename, attempt)
- else:
- return mi
- debug("import failed after tries: " + str(attempt))
- return None
-
-def parseRDF(filename, base):
- '''parse the rdf and return a sparql update query'''
- sparqlU=''
- mi = tryImportRDF(base+filename, 0)
- if mi:
- keys = mi.PersonIdx.keys()
- for key in keys:
- person = mi.PersonIdx[key]
- if person.name:
- # if we find the name, this is the main subject
- suid = person.URI.split(base)[1]
- subject = "<"+myspaceBase+"/"+suid+">"
- name = person.name.pop()
- sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .'
- sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .'
- sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . '
-
- # get all the top friends
- while(1):
- try:
- p = person.knows.pop()
- except:
- break
- else:
- ouid = p.URI.split(base)[1]
- obj = "<"+myspaceBase+"/"+ouid+">"
- sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . '
- sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .'
-
- while(1):
- try:
- thm = person.theme.pop()
- except:
- debug("breaking from genre pops")
- break
- else:
- thm = thm.URI.split(base)[1]
- # do some cleaning, bad genres in there like 35123543.rdf instead of hip hop
- if not re.match(".*\.rdf",thm):
- debug("adding genre: "+thm)
- genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">"
- sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . '
-
- try:
- playcount = person.tipjar.pop().URI.split(base)[1]
- sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . '
- except:
- pass
-
- sparqlU=sparqlU+'}'
- return sparqlU
- else:
- return None
-
-def setLogger():
- '''just set the logger'''
- loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s',
- "datefmt":'%d.%m.%y %H:%M:%S',
- "level": logging.DEBUG,
- #"filename":logPath + "musicGrabber.log",
- "filemode":"w"}
- logging.basicConfig(**loggingConfig)
-
-def getFileListing(rdfFolder):
- '''return a list of all the rdf files found w/ given base'''
- rdfFolder = rdfFolder.rstrip('/')
- rdfFolder = rdfFolder+'/'
- ftp = ftplib.FTP("myrdfspace.com")
- ftp.login("myrdf", "my1stRDF")
- ftp.cwd("myrdfspace.com/"+rdfFolder)
- vList = ftp.nlst()
- return vList
-
-def trySparql(sparql, attempt, f):
- try:
- debug('attempting sparql update, try #' + str(attempt))
- sparql.setReturnFormat(SPARQLWrapper.TURTLE)
- ret = sparql.query().convert()
- except urllib2.HTTPError:
- debug('caught an http error, retrying...')
- if attempt<5:
- attempt+=1
- sleep(2)
- trySparql(sparql, attempt, f)
- else:
- error("more that 5 http errors, giving up")
- failedList.append(f)
- except SPARQLWrapper.sparqlexceptions.QueryBadFormed:
- error("query failed for "+ str(f))
- debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$')
- print sparql.queryString
- badQueryList.append(f)
- failedList.append(f)
- except:
- error("query failed for "+ str(f))
- debug('************UPDATE FAILED***********')
- failedList.append(f)
- print "Unexpected error:", sys.exc_info()[0]
- print sparql.queryString
- else:
- print ret
- return ret
- return None
-
-def splitQuery(query):
- '''sometime the query is too long and should be broke in two pieces'''
- lines = query.splitlines(1)
- splits = []
- split = ""
- count = 0
- for line in lines:
- if count < apacheLimit:
- split = split+line
- count+=len(line)
- else:
- splits.append(insert+split+'}')
- split= line
- count = 0
- splits.append(insert+split)
- return splits
-
-def main(argv=None):
- if argv is None:
- argv = sys.argv
- try:
- try:
- opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="])
- except getopt.error, msg:
- raise Usage(msg)
-
- # option processing
- base = None
- start = None
- for option, value in opts:
- if option == "-v":
- verbose = True
- if option in ("-h", "--help"):
- raise Usage(help_message)
- if option in ("-o", "--output"):
- output = value
- if option in ("-b", "--base"):
- base = value
- if option in ("-s", "--start"):
- start = value
- '''if option in ("-g", '--graph'):
- defaultGraph = value
- insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""'''
-
-
- setLogger()
- if base == None:
- raise Usage(help_message)
- return 2
- # parse base uri
- folder = base.split("http://myrdfspace.com/")[1]
- debug('getting list of files')
- fileList = getFileListing(folder)
- debug('got list of files')
- #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf']
- startIndex=0
- if start:
- try:
- startIndex=fileList.index(start)
- except:
- debug("not a valid start file, not in list")
-
- for f in fileList[startIndex:]:
- debug('parsing on file: '+str(f))
- #parse each file and do a sparql update to the repository
- sparul = parseRDF(f, base)
- sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint)
- sparql.addDefaultGraph(defaultGraph)
- if sparul:
- # we have to deal w/ queries that are too long
- if len(sparul) > apacheLimit:
- debug('query too long, splitting...')
- splitSparul = splitQuery(sparul)
- for split in splitSparul:
- sparql.setQuery(prefixes+split)
- trySparql(sparql, 0, f)
- else:
- sparql.setQuery(prefixes+insert+sparul)
- trySparql(sparql, 0, f)
- else:
- debug('failure on '+str(f))
- failedList.append(f)
-
-
-
- debug("Complete!!!")
- print "\n\nREPORT:\n\tfailures: "+str(len(failedList))
- print "\nfails: "
- print failedList
- print "\n\nbad queries: "
- print badQueryList
-
- except Usage, err:
- print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
- print >> sys.stderr, "\t for help use --help"
- return 2
-
-
-if __name__ == "__main__":
- sys.exit(main())
Added: graphRDF/branches/old2sparul/src/addTotalFriends.py
===================================================================
--- graphRDF/branches/old2sparul/src/addTotalFriends.py (rev 0)
+++ graphRDF/branches/old2sparul/src/addTotalFriends.py 2009-06-22 14:08:10 UTC (rev 354)
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+'''
+Created on Jun 19, 2009
+
+@author: kurtjx
+'''
+import SPARQLWrapper
+from rdflib import ConjunctiveGraph, URIRef, Literal, Namespace
+import logging
+from logging import debug, error, info
+from time import sleep
+import sys
+import urllib2
+
+
+DEFAULT_GRAPH = "http://dbtune.org/myspace-fj-2008"
+ENDPOINT = "http://virtuoso.dbtune.org/sparql"
+
+MYSPACE = Namespace("http://purl.org/ontology/myspace#")
+
+totfri_fail_list = []
+country_fail_list = []
+local_fail_list = []
+
+def get_some_artists(sparql, limit=500, offset=0):
+ debug('querying for artists with limit %s and offset %s' % (str(limit), str(offset)) )
+ q = 'define sql:log-enable 2 SELECT DISTINCT ?artist FROM <%s> WHERE { ?artist a <http://purl.org/ontology/mo/MusicArtist> } LIMIT %s OFFSET %s' % (DEFAULT_GRAPH, str(limit), str(offset))
+ sparql.setQuery(q)
+ results = try_sparql(sparql, 0, 5)
+ if results == None:
+ error('freaking out, no results in last query: %s' % q)
+ print fail_list
+ sys.exit(2)
+ else:
+ debug('creating local graph and parsing results...')
+ graph = ConjunctiveGraph()
+ for result in results['results']['bindings']:
+ uri = result['artist']['value']
+ #debug('creating new cursor')
+ #cursor = CONNECT.cursor()
+ #print('inserting triples for %s' % uri)
+ debug('getting total friends for %s' % uri)
+ results = get_total_friends(uri)
+ #q = 'SPARQL define sql:log-enable 2 INSERT IN GRAPH <'+DEFAULT_GRAPH+'> { <'+uri+'> <http://purl.org/ontology/myspace#totalFriends> "'+totfri+'"^^xsd:int } '
+ #q = "DB.DBA.TTLP_MT('<%s> <http://purl.org/ontology/myspace#totalFriends> %s . ', '', '%s') " % (uri, totfri, DEFAULT_GRAPH)
+ #print q
+ #cursor.execute(q)
+ #cursor.close()
+ #debug('cursor closed')
+ if results['totalFriends'] != None:
+ graph.add((URIRef(uri), MYSPACE['totalFriends'], Literal(int(results['totalFriends']))))
+ else:
+ debug('!!!!!!!!!!!!!!!!!!!!!!! no friends for %s !!!!!!!!!!!!!!!!!!!!!!' % uri)
+ totfri_fail_list.append(uri+'\n')
+
+ if results['locality'] != None:
+ graph.add((URIRef(uri), MYSPACE['locality'], Literal(results['locality'])))
+ else:
+ debug('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ no locality for %s @@@@@@@@@@@@@@@@ ' % uri)
+ local_fail_list.append(uri+'\n')
+
+ if results['country'] != None:
+ graph.add((URIRef(uri), MYSPACE['country'], Literal(results['country'])))
+ else:
+ debug('******************************** no country for %s ***************************' % uri)
+ country_fail_list.append(uri+'\n')
+
+
+ debug('serializing results')
+ graph.serialize('./rdf/'+str(offset)+'.rdf')
+ f = open('./log/'+str(offset)+'_totfri_errors', 'w')
+ f.writelines(totfri_fail_list)
+ f.close()
+ f = open('./log/'+str(offset)+'_locality_errors', 'w')
+ f.writelines(local_fail_list)
+ f.close()
+ f = open('./log/'+str(offset)+'_country_errors', 'w')
+ f.writelines(country_fail_list)
+ f.close()
+
+ debug('done with result set')
+ offset += limit
+ get_some_artists(sparql, limit, offset)
+
+
+def get_total_friends(uri, attempt=0, fail=5):
+ '''get the total friends from the give uri'''
+ graph = ConjunctiveGraph()
+ results = {}
+ try:
+ graph.parse(uri)
+ except urllib2.HTTPError:
+ if attempt<fail:
+ attempt+=1
+ sleep(2)
+ get_total_friends(uri, attempt, fail)
+ else:
+ return None
+ for row in graph.query('select ?totfri where { ?x <http://purl.org/ontology/myspace#totalFriends> ?totfri . } ' ):
+ totfri = row[0]
+ try:
+ totfri = totfri.strip('http://dbtune.org/myspace/uid/')
+ except:
+ results['totalFriends'] = None # exception means we didn't find any friends
+ else:
+ results['totalFriends'] = totfri
+ local = None
+ for row in graph.query('select ?local where { ?x <http://purl.org/ontology/myspace#locality> ?local . } '):
+ local = row[0]
+ results['locality'] = local
+ country = None
+ for row in graph.query('select ?country where { ?x <http://purl.org/ontology/myspace#country> ?country . } '):
+ country = row[0]
+ results['country'] = country
+ return results
+
+
+def insert_total_friends(uri, cursor):
+ totfri = get_total_friends(uri)
+ q = 'SPARQL define sql:log-enable 2 INSERT IN GRAPH <%s> { <%s> <http://purl.org/ontology/myspace#totalFriends> "%s"^^xsd:int } ' % (DEFAULT_GRAPH, uri, totfri)
+ #print q
+ cursor.execute(q)
+
+def try_sparql(sparql, attempt=0, fail=5):
+ try:
+ debug('attempting sparql query, try #' + str(attempt))
+ sparql.setReturnFormat(SPARQLWrapper.JSON)
+ ret = sparql.query().convert()
+ except urllib2.HTTPError:
+ debug('caught an http error, retrying...')
+ if attempt<fail:
+ attempt+=1
+ sleep(2)
+ trySparql(sparql, attempt, fail)
+ else:
+ error("more that 5 http errors, giving up")
+ return None
+ return ret
+
+def set_logger(level = logging.DEBUG):
+ '''just set the logger'''
+ loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s',
+ "datefmt":'%d.%m.%y %H:%M:%S',
+ "level": level,
+ #"filename":logPath + "musicGrabber.log",
+ "filemode":"w"}
+ logging.basicConfig(**loggingConfig)
+
+def main():
+ set_logger()
+ sparql = SPARQLWrapper.SPARQLWrapper(ENDPOINT)
+ sparql.setReturnFormat(SPARQLWrapper.JSON)
+ #CONNECT = pyodbc.connect('DSN=SysVirt;UID=dba;PWD=dba;HOST=localhost:1112')
+ get_some_artists(sparql, 500,0)
+
+
+
+if __name__ == '__main__':
+ main()
+
\ No newline at end of file
Copied: graphRDF/branches/old2sparul/src/old2sparul.py (from rev 353, graphRDF/branches/old2sparul/old2sparul.py)
===================================================================
--- graphRDF/branches/old2sparul/src/old2sparul.py (rev 0)
+++ graphRDF/branches/old2sparul/src/old2sparul.py 2009-06-22 14:08:10 UTC (rev 354)
@@ -0,0 +1,264 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+old2sparul.py
+
+This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint
+
+Created by Kurtis Random on 2009-02-03.
+Copyright (c) 2009 C4DM QMUL. All rights reserved.
+"""
+
+import sys
+import getopt
+from logging import log, error, warning, info, debug
+import logging
+import ftplib
+import SPARQLWrapper
+import mopy
+import urllib2
+import re
+from time import sleep
+
+help_message = '''
+take old myrdfspace files and add to the sparql endpoint...
+ -b --base <uri base from myrdfspace>
+ -s --start <uid to start from> useful after a crash ;-)
+'''
+
+failedList = []
+badQueryList = []
+
+defaultGraph = "http://dbtune.org/myspace-fj-2008"
+sparqlEndPoint = "http://dbtune.org/cmn/sparql"
+myspaceBase = "http://dbtune.org/myspace/uid"
+myspaceOnt = "http://purl.org/ontology/myspace"
+prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"""
+
+insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""
+
+apacheLimit = 2000
+
+class Usage(Exception):
+ def __init__(self, msg):
+ self.msg = msg
+
+def tryImportRDF(filename, attempt):
+ if attempt < 5:
+ debug("importing rdf")
+ try:
+ mi = mopy.importRDFFile(filename)
+ except urllib2.URLError:
+ debug("URLError importing RDF, retrying")
+ sleep(1.0)
+ attempt+=1
+ tryImportRDF(filename, attempt)
+ else:
+ return mi
+ debug("import failed after tries: " + str(attempt))
+ return None
+
+def parseRDF(filename, base):
+ '''parse the rdf and return a sparql update query'''
+ sparqlU=''
+ mi = tryImportRDF(base+filename, 0)
+ if mi:
+ keys = mi.PersonIdx.keys()
+ for key in keys:
+ person = mi.PersonIdx[key]
+ if person.name:
+ # if we find the name, this is the main subject
+ suid = person.URI.split(base)[1]
+ subject = "<"+myspaceBase+"/"+suid+">"
+ name = person.name.pop()
+ sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .'
+ sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .'
+ sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . '
+
+ # get all the top friends
+ while(1):
+ try:
+ p = person.knows.pop()
+ except:
+ break
+ else:
+ ouid = p.URI.split(base)[1]
+ obj = "<"+myspaceBase+"/"+ouid+">"
+ sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . '
+ sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .'
+
+ while(1):
+ try:
+ thm = person.theme.pop()
+ except:
+ debug("breaking from genre pops")
+ break
+ else:
+ thm = thm.URI.split(base)[1]
+ # do some cleaning, bad genres in there like 35123543.rdf instead of hip hop
+ if not re.match(".*\.rdf",thm):
+ debug("adding genre: "+thm)
+ genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">"
+ sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . '
+
+ try:
+ playcount = person.tipjar.pop().URI.split(base)[1]
+ sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . '
+ except:
+ pass
+
+ sparqlU=sparqlU+'}'
+ return sparqlU
+ else:
+ return None
+
+def setLogger():
+ '''just set the logger'''
+ loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s',
+ "datefmt":'%d.%m.%y %H:%M:%S',
+ "level": logging.DEBUG,
+ #"filename":logPath + "musicGrabber.log",
+ "filemode":"w"}
+ logging.basicConfig(**loggingConfig)
+
+def getFileListing(rdfFolder):
+ '''return a list of all the rdf files found w/ given base'''
+ rdfFolder = rdfFolder.rstrip('/')
+ rdfFolder = rdfFolder+'/'
+ ftp = ftplib.FTP("myrdfspace.com")
+ ftp.login("myrdf", "my1stRDF")
+ ftp.cwd("myrdfspace.com/"+rdfFolder)
+ vList = ftp.nlst()
+ return vList
+
+def trySparql(sparql, attempt, f):
+ try:
+ debug('attempting sparql update, try #' + str(attempt))
+ sparql.setReturnFormat(SPARQLWrapper.TURTLE)
+ ret = sparql.query().convert()
+ except urllib2.HTTPError:
+ debug('caught an http error, retrying...')
+ if attempt<5:
+ attempt+=1
+ sleep(2)
+ trySparql(sparql, attempt, f)
+ else:
+ error("more that 5 http errors, giving up")
+ failedList.append(f)
+ except SPARQLWrapper.sparqlexceptions.QueryBadFormed:
+ error("query failed for "+ str(f))
+ debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$')
+ print sparql.queryString
+ badQueryList.append(f)
+ failedList.append(f)
+ except:
+ error("query failed for "+ str(f))
+ debug('************UPDATE FAILED***********')
+ failedList.append(f)
+ print "Unexpected error:", sys.exc_info()[0]
+ print sparql.queryString
+ else:
+ print ret
+ return ret
+ return None
+
+def splitQuery(query):
+ '''sometime the query is too long and should be broke in two pieces'''
+ lines = query.splitlines(1)
+ splits = []
+ split = ""
+ count = 0
+ for line in lines:
+ if count < apacheLimit:
+ split = split+line
+ count+=len(line)
+ else:
+ splits.append(insert+split+'}')
+ split= line
+ count = 0
+ splits.append(insert+split)
+ return splits
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv
+ try:
+ try:
+ opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="])
+ except getopt.error, msg:
+ raise Usage(msg)
+
+ # option processing
+ base = None
+ start = None
+ for option, value in opts:
+ if option == "-v":
+ verbose = True
+ if option in ("-h", "--help"):
+ raise Usage(help_message)
+ if option in ("-o", "--output"):
+ output = value
+ if option in ("-b", "--base"):
+ base = value
+ if option in ("-s", "--start"):
+ start = value
+ '''if option in ("-g", '--graph'):
+ defaultGraph = value
+ insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""'''
+
+
+ setLogger()
+ if base == None:
+ raise Usage(help_message)
+ return 2
+ # parse base uri
+ folder = base.split("http://myrdfspace.com/")[1]
+ debug('getting list of files')
+ fileList = getFileListing(folder)
+ debug('got list of files')
+ #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf']
+ startIndex=0
+ if start:
+ try:
+ startIndex=fileList.index(start)
+ except:
+ debug("not a valid start file, not in list")
+
+ for f in fileList[startIndex:]:
+ debug('parsing on file: '+str(f))
+ #parse each file and do a sparql update to the repository
+ sparul = parseRDF(f, base)
+ sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint)
+ sparql.addDefaultGraph(defaultGraph)
+ if sparul:
+ # we have to deal w/ queries that are too long
+ if len(sparul) > apacheLimit:
+ debug('query too long, splitting...')
+ splitSparul = splitQuery(sparul)
+ for split in splitSparul:
+ sparql.setQuery(prefixes+split)
+ trySparql(sparql, 0, f)
+ else:
+ sparql.setQuery(prefixes+insert+sparul)
+ trySparql(sparql, 0, f)
+ else:
+ debug('failure on '+str(f))
+ failedList.append(f)
+
+
+
+ debug("Complete!!!")
+ print "\n\nREPORT:\n\tfailures: "+str(len(failedList))
+ print "\nfails: "
+ print failedList
+ print "\n\nbad queries: "
+ print badQueryList
+
+ except Usage, err:
+ print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
+ print >> sys.stderr, "\t for help use --help"
+ return 2
+
+
+if __name__ == "__main__":
+ sys.exit(main())
Property changes on: graphRDF/branches/old2sparul/src/old2sparul.py
___________________________________________________________________
Added: svn:mergeinfo
+
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|