[Mypyspace-developer] SF.net SVN: mypyspace:[354] graphRDF/branches/old2sparul

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 354
          http://mypyspace.svn.sourceforge.net/mypyspace/?rev=354&view=rev
Author:   kurtjx
Date:     2009-06-22 14:08:10 +0000 (Mon, 22 Jun 2009)

Log Message:
-----------
some fixing of the old graphs - adding data to sparql point

Added Paths:
-----------
    graphRDF/branches/old2sparul/src/
    graphRDF/branches/old2sparul/src/addTotalFriends.py
    graphRDF/branches/old2sparul/src/old2sparul.py

Removed Paths:
-------------
    graphRDF/branches/old2sparul/old2sparul.py

Deleted: graphRDF/branches/old2sparul/old2sparul.py
===================================================================

--- graphRDF/branches/old2sparul/old2sparul.py	2009-05-27 21:19:59 UTC (rev 353)
+++ graphRDF/branches/old2sparul/old2sparul.py	2009-06-22 14:08:10 UTC (rev 354)
@@ -1,264 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-"""
-old2sparul.py
-
-This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint
-
-Created by Kurtis Random on 2009-02-03.
-Copyright (c) 2009 C4DM QMUL. All rights reserved.
-"""
-
-import sys
-import getopt
-from logging import log, error, warning, info, debug
-import logging
-import ftplib
-import SPARQLWrapper
-import mopy
-import urllib2
-import re
-from time import sleep
-
-help_message = '''
-take old myrdfspace files and add to the sparql endpoint...
-	-b --base <uri base from myrdfspace>
-	-s --start <uid to start from> useful after a crash ;-)
-'''
-
-failedList = []
-badQueryList = []
-
-defaultGraph = "http://dbtune.org/myspace-fj-2008"
-sparqlEndPoint = "http://dbtune.org/cmn/sparql"
-myspaceBase = "http://dbtune.org/myspace/uid"
-myspaceOnt = "http://purl.org/ontology/myspace"
-prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"""
-
-insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""
-
-apacheLimit = 2000
-
-class Usage(Exception):
-	def __init__(self, msg):
-		self.msg = msg
-
-def tryImportRDF(filename, attempt):
-	if attempt < 5:
-		debug("importing rdf")
-		try:
-			mi = mopy.importRDFFile(filename)
-		except urllib2.URLError:
-			debug("URLError importing RDF, retrying")
-			sleep(1.0)
-			attempt+=1
-			tryImportRDF(filename, attempt)
-		else:
-			return mi
-	debug("import failed after tries: " + str(attempt))
-	return None
-
-def parseRDF(filename, base):
-	'''parse the rdf and return a sparql update query'''
-	sparqlU=''
-	mi = tryImportRDF(base+filename, 0)
-	if mi:
-		keys = mi.PersonIdx.keys()
-		for key in keys:
-			person = mi.PersonIdx[key]
-			if person.name:
-				# if we find the name, this is the main subject
-				suid = person.URI.split(base)[1]
-				subject = "<"+myspaceBase+"/"+suid+">"
-				name = person.name.pop()
-				sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .'
-				sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .'
-				sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . '
-			
-				# get all the top friends
-				while(1):
-					try:
-						p = person.knows.pop()
-					except:
-						break
-					else:
-						ouid = p.URI.split(base)[1]
-						obj = "<"+myspaceBase+"/"+ouid+">"	
-						sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . '
-						sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .'
-					
-				while(1):
-					try:
-						thm = person.theme.pop()
-					except:
-						debug("breaking from genre pops")
-						break
-					else:
-						thm = thm.URI.split(base)[1]
-						# do some cleaning, bad genres in there like 35123543.rdf instead of hip hop
-						if not re.match(".*\.rdf",thm):
-							debug("adding genre: "+thm)
-							genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">"
-							sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . '
-					
-				try:
-					playcount = person.tipjar.pop().URI.split(base)[1]
-					sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . '
-				except:
-					pass
-				
-		sparqlU=sparqlU+'}'				
-		return sparqlU
-	else:
-		return None
-
-def setLogger():
-    '''just set the logger'''
-    loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s',
-                               "datefmt":'%d.%m.%y %H:%M:%S',
-                                "level": logging.DEBUG,
-                                #"filename":logPath + "musicGrabber.log",
-                                "filemode":"w"}
-    logging.basicConfig(**loggingConfig)
-
-def getFileListing(rdfFolder):
-	'''return a list of all the rdf files found w/ given base'''
-	rdfFolder = rdfFolder.rstrip('/')
-	rdfFolder = rdfFolder+'/'
-	ftp = ftplib.FTP("myrdfspace.com")
-	ftp.login("myrdf", "my1stRDF")
-	ftp.cwd("myrdfspace.com/"+rdfFolder)
-	vList = ftp.nlst()
-	return vList
-
-def trySparql(sparql, attempt, f):
-	try:
-		debug('attempting sparql update, try #' + str(attempt))
-		sparql.setReturnFormat(SPARQLWrapper.TURTLE)
-		ret = sparql.query().convert()
-	except urllib2.HTTPError:
-		debug('caught an http error, retrying...')
-		if attempt<5:
-			attempt+=1
-			sleep(2)
-			trySparql(sparql, attempt, f)
-		else:
-			error("more that 5 http errors, giving up")
-			failedList.append(f)
-	except SPARQLWrapper.sparqlexceptions.QueryBadFormed:
-		error("query failed for "+ str(f))
-		debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$')
-		print sparql.queryString
-		badQueryList.append(f)
-		failedList.append(f)
-	except:
-		error("query failed for "+ str(f))
-		debug('************UPDATE FAILED***********')
-		failedList.append(f)
-		print "Unexpected error:", sys.exc_info()[0]
-		print sparql.queryString
-	else:
-		print ret
-		return ret
-	return None
-		
-def splitQuery(query):
-	'''sometime the query is too long and should be broke in two pieces'''
-	lines = query.splitlines(1)
-	splits = []
-	split = ""
-	count = 0
-	for line in lines:
-		if count < apacheLimit:
-			split = split+line
-			count+=len(line)
-		else:
-			splits.append(insert+split+'}')
-			split= line
-			count = 0
-	splits.append(insert+split)
-	return splits
-
-def main(argv=None):
-	if argv is None:
-		argv = sys.argv
-	try:
-		try:
-			opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="])
-		except getopt.error, msg:
-			raise Usage(msg)
-	
-		# option processing
-		base = None
-		start = None
-		for option, value in opts:
-			if option == "-v":
-				verbose = True
-			if option in ("-h", "--help"):
-				raise Usage(help_message)
-			if option in ("-o", "--output"):
-				output = value
-			if option in ("-b", "--base"):
-				base = value
-			if option in ("-s", "--start"):
-				start = value
-			'''if option in ("-g", '--graph'):
-				defaultGraph = value
-				insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""'''
-				
-		
-		setLogger()
-		if base == None:
-			raise Usage(help_message)
-			return 2
-		# parse base uri
-		folder = base.split("http://myrdfspace.com/")[1]
-		debug('getting list of files')
-		fileList = getFileListing(folder)
-		debug('got list of files')
-		#fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf']
-		startIndex=0
-		if start:
-			try:
-				startIndex=fileList.index(start)
-			except:
-				debug("not a valid start file, not in list")
-				
-		for f in fileList[startIndex:]:
-			debug('parsing on file: '+str(f))
-			#parse each file and do a sparql update to the repository
-			sparul = parseRDF(f, base)
-			sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint)
-			sparql.addDefaultGraph(defaultGraph)
-			if sparul:
-				# we have to deal w/ queries that are too long
-				if len(sparul) > apacheLimit:
-					debug('query too long, splitting...')
-					splitSparul = splitQuery(sparul)
-					for split in splitSparul:
-						sparql.setQuery(prefixes+split)
-						trySparql(sparql, 0, f)
-				else:
-					sparql.setQuery(prefixes+insert+sparul)
-					trySparql(sparql, 0, f)
-			else:
-				debug('failure on '+str(f))
-				failedList.append(f)
-
-				
-		
-		debug("Complete!!!")
-		print "\n\nREPORT:\n\tfailures: "+str(len(failedList))
-		print "\nfails: "
-		print failedList 
-		print "\n\nbad queries: "
-		print badQueryList
-		
-	except Usage, err:
-		print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
-		print >> sys.stderr, "\t for help use --help"
-		return 2
-
-
-if __name__ == "__main__":
-	sys.exit(main())

Added: graphRDF/branches/old2sparul/src/addTotalFriends.py
===================================================================
--- graphRDF/branches/old2sparul/src/addTotalFriends.py	                        (rev 0)
+++ graphRDF/branches/old2sparul/src/addTotalFriends.py	2009-06-22 14:08:10 UTC (rev 354)
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+'''
+Created on Jun 19, 2009
+
+@author: kurtjx
+'''
+import SPARQLWrapper
+from rdflib import ConjunctiveGraph, URIRef, Literal, Namespace
+import logging
+from logging import debug, error, info
+from time import sleep
+import sys
+import urllib2
+
+
+DEFAULT_GRAPH = "http://dbtune.org/myspace-fj-2008"
+ENDPOINT = "http://virtuoso.dbtune.org/sparql"
+
+MYSPACE = Namespace("http://purl.org/ontology/myspace#")
+
+totfri_fail_list = []
+country_fail_list = []
+local_fail_list = []
+
+def get_some_artists(sparql, limit=500, offset=0):
+    debug('querying for artists with limit %s and offset %s' % (str(limit), str(offset)) )
+    q = 'define sql:log-enable 2 SELECT DISTINCT ?artist FROM <%s> WHERE { ?artist a <http://purl.org/ontology/mo/MusicArtist> } LIMIT %s OFFSET %s' % (DEFAULT_GRAPH, str(limit), str(offset)) 
+    sparql.setQuery(q)
+    results = try_sparql(sparql, 0, 5)
+    if results == None:
+        error('freaking out, no results in last query: %s' % q)
+        print fail_list
+        sys.exit(2)
+    else:
+        debug('creating local graph and parsing results...')
+        graph = ConjunctiveGraph()
+        for result in results['results']['bindings']:
+            uri = result['artist']['value']
+            #debug('creating new cursor')
+            #cursor = CONNECT.cursor()
+            #print('inserting triples for %s' % uri)
+            debug('getting total friends for %s' % uri)
+            results = get_total_friends(uri)
+            #q = 'SPARQL define sql:log-enable 2 INSERT IN GRAPH <'+DEFAULT_GRAPH+'> { <'+uri+'> <http://purl.org/ontology/myspace#totalFriends> "'+totfri+'"^^xsd:int } ' 
+            #q = "DB.DBA.TTLP_MT('<%s> <http://purl.org/ontology/myspace#totalFriends> %s . ', '', '%s') " % (uri, totfri, DEFAULT_GRAPH)
+            #print q
+            #cursor.execute(q)
+            #cursor.close()
+            #debug('cursor closed')
+            if results['totalFriends'] != None:
+                graph.add((URIRef(uri), MYSPACE['totalFriends'], Literal(int(results['totalFriends']))))
+            else:
+                debug('!!!!!!!!!!!!!!!!!!!!!!! no friends for %s !!!!!!!!!!!!!!!!!!!!!!' % uri)
+                totfri_fail_list.append(uri+'\n')
+                
+            if results['locality'] != None:
+                graph.add((URIRef(uri), MYSPACE['locality'], Literal(results['locality'])))
+            else:
+                debug('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ no locality for %s @@@@@@@@@@@@@@@@ ' % uri)
+                local_fail_list.append(uri+'\n')
+            
+            if results['country'] != None:
+                graph.add((URIRef(uri), MYSPACE['country'], Literal(results['country'])))
+            else:
+                debug('******************************** no country for %s ***************************' % uri)
+                country_fail_list.append(uri+'\n')
+                
+                
+        debug('serializing results')
+        graph.serialize('./rdf/'+str(offset)+'.rdf')
+        f = open('./log/'+str(offset)+'_totfri_errors', 'w')
+        f.writelines(totfri_fail_list)
+        f.close()
+        f = open('./log/'+str(offset)+'_locality_errors', 'w')
+        f.writelines(local_fail_list)
+        f.close()
+        f = open('./log/'+str(offset)+'_country_errors', 'w')
+        f.writelines(country_fail_list)
+        f.close()
+            
+        debug('done with result set')
+        offset += limit
+        get_some_artists(sparql, limit, offset)
+        
+
+def get_total_friends(uri, attempt=0, fail=5):
+    '''get the total friends from the give uri'''
+    graph = ConjunctiveGraph()
+    results = {}
+    try:
+        graph.parse(uri)
+    except urllib2.HTTPError:
+        if attempt<fail:
+            attempt+=1
+            sleep(2)
+            get_total_friends(uri, attempt, fail)
+        else:
+            return None
+    for row in graph.query('select ?totfri where { ?x <http://purl.org/ontology/myspace#totalFriends> ?totfri . } ' ):
+        totfri = row[0]
+    try:
+        totfri = totfri.strip('http://dbtune.org/myspace/uid/')
+    except:
+        results['totalFriends'] = None # exception means we didn't find any friends
+    else:
+        results['totalFriends'] = totfri
+    local = None
+    for row in graph.query('select ?local where { ?x <http://purl.org/ontology/myspace#locality> ?local . } '):
+        local = row[0]
+    results['locality'] = local
+    country = None
+    for row in graph.query('select ?country where { ?x <http://purl.org/ontology/myspace#country> ?country . } '):
+        country = row[0]
+    results['country'] = country
+    return results
+
+
+def insert_total_friends(uri, cursor):
+    totfri = get_total_friends(uri)
+    q = 'SPARQL define sql:log-enable 2 INSERT IN GRAPH <%s> { <%s> <http://purl.org/ontology/myspace#totalFriends> "%s"^^xsd:int } ' % (DEFAULT_GRAPH, uri, totfri)
+    #print q
+    cursor.execute(q)
+    
+def try_sparql(sparql, attempt=0, fail=5):
+    try:
+        debug('attempting sparql query, try #' + str(attempt))
+        sparql.setReturnFormat(SPARQLWrapper.JSON)
+        ret = sparql.query().convert()
+    except urllib2.HTTPError:
+        debug('caught an http error, retrying...')
+        if attempt<fail:
+            attempt+=1
+            sleep(2)
+            trySparql(sparql, attempt, fail)
+        else:
+            error("more that 5 http errors, giving up")
+            return None
+    return ret
+    
+def set_logger(level = logging.DEBUG):
+    '''just set the logger'''
+    loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s',
+                               "datefmt":'%d.%m.%y %H:%M:%S',
+                                "level": level,
+                                #"filename":logPath + "musicGrabber.log",
+                                "filemode":"w"}
+    logging.basicConfig(**loggingConfig)
+
+def main():
+    set_logger()
+    sparql = SPARQLWrapper.SPARQLWrapper(ENDPOINT)
+    sparql.setReturnFormat(SPARQLWrapper.JSON)
+    #CONNECT = pyodbc.connect('DSN=SysVirt;UID=dba;PWD=dba;HOST=localhost:1112')
+    get_some_artists(sparql, 500,0)
+    
+
+
+if __name__ == '__main__':
+    main()
+                             
\ No newline at end of file

Copied: graphRDF/branches/old2sparul/src/old2sparul.py (from rev 353, graphRDF/branches/old2sparul/old2sparul.py)
===================================================================
--- graphRDF/branches/old2sparul/src/old2sparul.py	                        (rev 0)
+++ graphRDF/branches/old2sparul/src/old2sparul.py	2009-06-22 14:08:10 UTC (rev 354)
@@ -0,0 +1,264 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+old2sparul.py
+
+This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint
+
+Created by Kurtis Random on 2009-02-03.
+Copyright (c) 2009 C4DM QMUL. All rights reserved.
+"""
+
+import sys
+import getopt
+from logging import log, error, warning, info, debug
+import logging
+import ftplib
+import SPARQLWrapper
+import mopy
+import urllib2
+import re
+from time import sleep
+
+help_message = '''
+take old myrdfspace files and add to the sparql endpoint...
+	-b --base <uri base from myrdfspace>
+	-s --start <uid to start from> useful after a crash ;-)
+'''
+
+failedList = []
+badQueryList = []
+
+defaultGraph = "http://dbtune.org/myspace-fj-2008"
+sparqlEndPoint = "http://dbtune.org/cmn/sparql"
+myspaceBase = "http://dbtune.org/myspace/uid"
+myspaceOnt = "http://purl.org/ontology/myspace"
+prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"""
+
+insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""
+
+apacheLimit = 2000
+
+class Usage(Exception):
+	def __init__(self, msg):
+		self.msg = msg
+
+def tryImportRDF(filename, attempt):
+	if attempt < 5:
+		debug("importing rdf")
+		try:
+			mi = mopy.importRDFFile(filename)
+		except urllib2.URLError:
+			debug("URLError importing RDF, retrying")
+			sleep(1.0)
+			attempt+=1
+			tryImportRDF(filename, attempt)
+		else:
+			return mi
+	debug("import failed after tries: " + str(attempt))
+	return None
+
+def parseRDF(filename, base):
+	'''parse the rdf and return a sparql update query'''
+	sparqlU=''
+	mi = tryImportRDF(base+filename, 0)
+	if mi:
+		keys = mi.PersonIdx.keys()
+		for key in keys:
+			person = mi.PersonIdx[key]
+			if person.name:
+				# if we find the name, this is the main subject
+				suid = person.URI.split(base)[1]
+				subject = "<"+myspaceBase+"/"+suid+">"
+				name = person.name.pop()
+				sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .'
+				sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .'
+				sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . '
+			
+				# get all the top friends
+				while(1):
+					try:
+						p = person.knows.pop()
+					except:
+						break
+					else:
+						ouid = p.URI.split(base)[1]
+						obj = "<"+myspaceBase+"/"+ouid+">"	
+						sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . '
+						sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .'
+					
+				while(1):
+					try:
+						thm = person.theme.pop()
+					except:
+						debug("breaking from genre pops")
+						break
+					else:
+						thm = thm.URI.split(base)[1]
+						# do some cleaning, bad genres in there like 35123543.rdf instead of hip hop
+						if not re.match(".*\.rdf",thm):
+							debug("adding genre: "+thm)
+							genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">"
+							sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . '
+					
+				try:
+					playcount = person.tipjar.pop().URI.split(base)[1]
+					sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . '
+				except:
+					pass
+				
+		sparqlU=sparqlU+'}'				
+		return sparqlU
+	else:
+		return None
+
+def setLogger():
+    '''just set the logger'''
+    loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s',
+                               "datefmt":'%d.%m.%y %H:%M:%S',
+                                "level": logging.DEBUG,
+                                #"filename":logPath + "musicGrabber.log",
+                                "filemode":"w"}
+    logging.basicConfig(**loggingConfig)
+
+def getFileListing(rdfFolder):
+	'''return a list of all the rdf files found w/ given base'''
+	rdfFolder = rdfFolder.rstrip('/')
+	rdfFolder = rdfFolder+'/'
+	ftp = ftplib.FTP("myrdfspace.com")
+	ftp.login("myrdf", "my1stRDF")
+	ftp.cwd("myrdfspace.com/"+rdfFolder)
+	vList = ftp.nlst()
+	return vList
+
+def trySparql(sparql, attempt, f):
+	try:
+		debug('attempting sparql update, try #' + str(attempt))
+		sparql.setReturnFormat(SPARQLWrapper.TURTLE)
+		ret = sparql.query().convert()
+	except urllib2.HTTPError:
+		debug('caught an http error, retrying...')
+		if attempt<5:
+			attempt+=1
+			sleep(2)
+			trySparql(sparql, attempt, f)
+		else:
+			error("more that 5 http errors, giving up")
+			failedList.append(f)
+	except SPARQLWrapper.sparqlexceptions.QueryBadFormed:
+		error("query failed for "+ str(f))
+		debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$')
+		print sparql.queryString
+		badQueryList.append(f)
+		failedList.append(f)
+	except:
+		error("query failed for "+ str(f))
+		debug('************UPDATE FAILED***********')
+		failedList.append(f)
+		print "Unexpected error:", sys.exc_info()[0]
+		print sparql.queryString
+	else:
+		print ret
+		return ret
+	return None
+		
+def splitQuery(query):
+	'''sometime the query is too long and should be broke in two pieces'''
+	lines = query.splitlines(1)
+	splits = []
+	split = ""
+	count = 0
+	for line in lines:
+		if count < apacheLimit:
+			split = split+line
+			count+=len(line)
+		else:
+			splits.append(insert+split+'}')
+			split= line
+			count = 0
+	splits.append(insert+split)
+	return splits
+
+def main(argv=None):
+	if argv is None:
+		argv = sys.argv
+	try:
+		try:
+			opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="])
+		except getopt.error, msg:
+			raise Usage(msg)
+	
+		# option processing
+		base = None
+		start = None
+		for option, value in opts:
+			if option == "-v":
+				verbose = True
+			if option in ("-h", "--help"):
+				raise Usage(help_message)
+			if option in ("-o", "--output"):
+				output = value
+			if option in ("-b", "--base"):
+				base = value
+			if option in ("-s", "--start"):
+				start = value
+			'''if option in ("-g", '--graph'):
+				defaultGraph = value
+				insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""'''
+				
+		
+		setLogger()
+		if base == None:
+			raise Usage(help_message)
+			return 2
+		# parse base uri
+		folder = base.split("http://myrdfspace.com/")[1]
+		debug('getting list of files')
+		fileList = getFileListing(folder)
+		debug('got list of files')
+		#fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf']
+		startIndex=0
+		if start:
+			try:
+				startIndex=fileList.index(start)
+			except:
+				debug("not a valid start file, not in list")
+				
+		for f in fileList[startIndex:]:
+			debug('parsing on file: '+str(f))
+			#parse each file and do a sparql update to the repository
+			sparul = parseRDF(f, base)
+			sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint)
+			sparql.addDefaultGraph(defaultGraph)
+			if sparul:
+				# we have to deal w/ queries that are too long
+				if len(sparul) > apacheLimit:
+					debug('query too long, splitting...')
+					splitSparul = splitQuery(sparul)
+					for split in splitSparul:
+						sparql.setQuery(prefixes+split)
+						trySparql(sparql, 0, f)
+				else:
+					sparql.setQuery(prefixes+insert+sparul)
+					trySparql(sparql, 0, f)
+			else:
+				debug('failure on '+str(f))
+				failedList.append(f)
+
+				
+		
+		debug("Complete!!!")
+		print "\n\nREPORT:\n\tfailures: "+str(len(failedList))
+		print "\nfails: "
+		print failedList 
+		print "\n\nbad queries: "
+		print badQueryList
+		
+	except Usage, err:
+		print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
+		print >> sys.stderr, "\t for help use --help"
+		return 2
+
+
+if __name__ == "__main__":
+	sys.exit(main())


Property changes on: graphRDF/branches/old2sparul/src/old2sparul.py
___________________________________________________________________
Added: svn:mergeinfo
   + 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.