You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
2009 |
Jan
(9) |
Feb
(13) |
Mar
(4) |
Apr
(4) |
May
(13) |
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
(2) |
Nov
|
Dec
|
From: <gea...@us...> - 2009-10-06 20:56:37
|
Revision: 357 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=357&view=rev Author: gearmonkey Date: 2009-10-06 20:55:19 +0000 (Tue, 06 Oct 2009) Log Message: ----------- I believe this modified setup.py should massively aid library install. Now the following should be possible: svn co https://mypyspace.svn.sourceforge.net/svnroot/mypyspace/mps/trunk mps cd mps python setup.py install and done, complete with dependency checking and such. In order to make this work, there is now an external pull to motools/mopy and mopy is installed by the setup script after genpy.py is run (which the setup.py file does for you.) Modified Paths: -------------- mps/trunk/setup.py Modified: mps/trunk/setup.py =================================================================== --- mps/trunk/setup.py 2009-10-06 18:20:33 UTC (rev 356) +++ mps/trunk/setup.py 2009-10-06 20:55:19 UTC (rev 357) @@ -15,9 +15,14 @@ from setuptools import setup, find_packages from subprocess import call +from os import chdir, getcwd + #this is a bit of a mess, should do it internally. -call(["python", "mopy/genpy.py"]) +chdir("mopy") +print getcwd() +call(["python", "genpy.py"]) +chdir("..") setup (name = 'mps', This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-10-06 18:20:51
|
Revision: 356 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=356&view=rev Author: gearmonkey Date: 2009-10-06 18:20:33 +0000 (Tue, 06 Oct 2009) Log Message: ----------- added an external call to mopy for the class installer. Fancy. Modified Paths: -------------- mps/trunk/setup.py Property Changed: ---------------- mps/trunk/ Property changes on: mps/trunk ___________________________________________________________________ Added: svn:externals + mopy -r835 https://motools.svn.sourceforge.net/svnroot/motools/mopy Modified: mps/trunk/setup.py =================================================================== --- mps/trunk/setup.py 2009-10-05 10:23:05 UTC (rev 355) +++ mps/trunk/setup.py 2009-10-06 18:20:33 UTC (rev 356) @@ -5,17 +5,21 @@ distutil file for the mps module -needs mopy to work right. the mopy install is a bit ugly... +uses some setuptools fanciness Created by Benjamin Fields on 2009-09-04. -Copyright (c) 2009 Goldsmith University of London. All rights reserved. +Copyright (c) 2009 Goldsmith University of London. """ from setuptools import setup, find_packages +from subprocess import call +#this is a bit of a mess, should do it internally. +call(["python", "mopy/genpy.py"]) + setup (name = 'mps', version = '0.1a', packages = find_packages(), This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-06-22 14:08:14
|
Revision: 354 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=354&view=rev Author: kurtjx Date: 2009-06-22 14:08:10 +0000 (Mon, 22 Jun 2009) Log Message: ----------- some fixing of the old graphs - adding data to sparql point Added Paths: ----------- graphRDF/branches/old2sparul/src/ graphRDF/branches/old2sparul/src/addTotalFriends.py graphRDF/branches/old2sparul/src/old2sparul.py Removed Paths: ------------- graphRDF/branches/old2sparul/old2sparul.py Deleted: graphRDF/branches/old2sparul/old2sparul.py =================================================================== --- graphRDF/branches/old2sparul/old2sparul.py 2009-05-27 21:19:59 UTC (rev 353) +++ graphRDF/branches/old2sparul/old2sparul.py 2009-06-22 14:08:10 UTC (rev 354) @@ -1,264 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -""" -old2sparul.py - -This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint - -Created by Kurtis Random on 2009-02-03. -Copyright (c) 2009 C4DM QMUL. All rights reserved. -""" - -import sys -import getopt -from logging import log, error, warning, info, debug -import logging -import ftplib -import SPARQLWrapper -import mopy -import urllib2 -import re -from time import sleep - -help_message = ''' -take old myrdfspace files and add to the sparql endpoint... - -b --base <uri base from myrdfspace> - -s --start <uid to start from> useful after a crash ;-) -''' - -failedList = [] -badQueryList = [] - -defaultGraph = "http://dbtune.org/myspace-fj-2008" -sparqlEndPoint = "http://dbtune.org/cmn/sparql" -myspaceBase = "http://dbtune.org/myspace/uid" -myspaceOnt = "http://purl.org/ontology/myspace" -prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>""" - -insert = """ \ninsert into graph <"""+defaultGraph+"""> {""" - -apacheLimit = 2000 - -class Usage(Exception): - def __init__(self, msg): - self.msg = msg - -def tryImportRDF(filename, attempt): - if attempt < 5: - debug("importing rdf") - try: - mi = mopy.importRDFFile(filename) - except urllib2.URLError: - debug("URLError importing RDF, retrying") - sleep(1.0) - attempt+=1 - tryImportRDF(filename, attempt) - else: - return mi - debug("import failed after tries: " + str(attempt)) - return None - -def parseRDF(filename, base): - '''parse the rdf and return a sparql update query''' - sparqlU='' - mi = tryImportRDF(base+filename, 0) - if mi: - keys = mi.PersonIdx.keys() - for key in keys: - person = mi.PersonIdx[key] - if person.name: - # if we find the name, this is the main subject - suid = person.URI.split(base)[1] - subject = "<"+myspaceBase+"/"+suid+">" - name = person.name.pop() - sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .' - sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .' - sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . ' - - # get all the top friends - while(1): - try: - p = person.knows.pop() - except: - break - else: - ouid = p.URI.split(base)[1] - obj = "<"+myspaceBase+"/"+ouid+">" - sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . ' - sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .' - - while(1): - try: - thm = person.theme.pop() - except: - debug("breaking from genre pops") - break - else: - thm = thm.URI.split(base)[1] - # do some cleaning, bad genres in there like 35123543.rdf instead of hip hop - if not re.match(".*\.rdf",thm): - debug("adding genre: "+thm) - genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">" - sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . ' - - try: - playcount = person.tipjar.pop().URI.split(base)[1] - sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . ' - except: - pass - - sparqlU=sparqlU+'}' - return sparqlU - else: - return None - -def setLogger(): - '''just set the logger''' - loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s', - "datefmt":'%d.%m.%y %H:%M:%S', - "level": logging.DEBUG, - #"filename":logPath + "musicGrabber.log", - "filemode":"w"} - logging.basicConfig(**loggingConfig) - -def getFileListing(rdfFolder): - '''return a list of all the rdf files found w/ given base''' - rdfFolder = rdfFolder.rstrip('/') - rdfFolder = rdfFolder+'/' - ftp = ftplib.FTP("myrdfspace.com") - ftp.login("myrdf", "my1stRDF") - ftp.cwd("myrdfspace.com/"+rdfFolder) - vList = ftp.nlst() - return vList - -def trySparql(sparql, attempt, f): - try: - debug('attempting sparql update, try #' + str(attempt)) - sparql.setReturnFormat(SPARQLWrapper.TURTLE) - ret = sparql.query().convert() - except urllib2.HTTPError: - debug('caught an http error, retrying...') - if attempt<5: - attempt+=1 - sleep(2) - trySparql(sparql, attempt, f) - else: - error("more that 5 http errors, giving up") - failedList.append(f) - except SPARQLWrapper.sparqlexceptions.QueryBadFormed: - error("query failed for "+ str(f)) - debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$') - print sparql.queryString - badQueryList.append(f) - failedList.append(f) - except: - error("query failed for "+ str(f)) - debug('************UPDATE FAILED***********') - failedList.append(f) - print "Unexpected error:", sys.exc_info()[0] - print sparql.queryString - else: - print ret - return ret - return None - -def splitQuery(query): - '''sometime the query is too long and should be broke in two pieces''' - lines = query.splitlines(1) - splits = [] - split = "" - count = 0 - for line in lines: - if count < apacheLimit: - split = split+line - count+=len(line) - else: - splits.append(insert+split+'}') - split= line - count = 0 - splits.append(insert+split) - return splits - -def main(argv=None): - if argv is None: - argv = sys.argv - try: - try: - opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="]) - except getopt.error, msg: - raise Usage(msg) - - # option processing - base = None - start = None - for option, value in opts: - if option == "-v": - verbose = True - if option in ("-h", "--help"): - raise Usage(help_message) - if option in ("-o", "--output"): - output = value - if option in ("-b", "--base"): - base = value - if option in ("-s", "--start"): - start = value - '''if option in ("-g", '--graph'): - defaultGraph = value - insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""''' - - - setLogger() - if base == None: - raise Usage(help_message) - return 2 - # parse base uri - folder = base.split("http://myrdfspace.com/")[1] - debug('getting list of files') - fileList = getFileListing(folder) - debug('got list of files') - #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] - startIndex=0 - if start: - try: - startIndex=fileList.index(start) - except: - debug("not a valid start file, not in list") - - for f in fileList[startIndex:]: - debug('parsing on file: '+str(f)) - #parse each file and do a sparql update to the repository - sparul = parseRDF(f, base) - sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint) - sparql.addDefaultGraph(defaultGraph) - if sparul: - # we have to deal w/ queries that are too long - if len(sparul) > apacheLimit: - debug('query too long, splitting...') - splitSparul = splitQuery(sparul) - for split in splitSparul: - sparql.setQuery(prefixes+split) - trySparql(sparql, 0, f) - else: - sparql.setQuery(prefixes+insert+sparul) - trySparql(sparql, 0, f) - else: - debug('failure on '+str(f)) - failedList.append(f) - - - - debug("Complete!!!") - print "\n\nREPORT:\n\tfailures: "+str(len(failedList)) - print "\nfails: " - print failedList - print "\n\nbad queries: " - print badQueryList - - except Usage, err: - print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) - print >> sys.stderr, "\t for help use --help" - return 2 - - -if __name__ == "__main__": - sys.exit(main()) Added: graphRDF/branches/old2sparul/src/addTotalFriends.py =================================================================== --- graphRDF/branches/old2sparul/src/addTotalFriends.py (rev 0) +++ graphRDF/branches/old2sparul/src/addTotalFriends.py 2009-06-22 14:08:10 UTC (rev 354) @@ -0,0 +1,160 @@ +#!/usr/bin/python +''' +Created on Jun 19, 2009 + +@author: kurtjx +''' +import SPARQLWrapper +from rdflib import ConjunctiveGraph, URIRef, Literal, Namespace +import logging +from logging import debug, error, info +from time import sleep +import sys +import urllib2 + + +DEFAULT_GRAPH = "http://dbtune.org/myspace-fj-2008" +ENDPOINT = "http://virtuoso.dbtune.org/sparql" + +MYSPACE = Namespace("http://purl.org/ontology/myspace#") + +totfri_fail_list = [] +country_fail_list = [] +local_fail_list = [] + +def get_some_artists(sparql, limit=500, offset=0): + debug('querying for artists with limit %s and offset %s' % (str(limit), str(offset)) ) + q = 'define sql:log-enable 2 SELECT DISTINCT ?artist FROM <%s> WHERE { ?artist a <http://purl.org/ontology/mo/MusicArtist> } LIMIT %s OFFSET %s' % (DEFAULT_GRAPH, str(limit), str(offset)) + sparql.setQuery(q) + results = try_sparql(sparql, 0, 5) + if results == None: + error('freaking out, no results in last query: %s' % q) + print fail_list + sys.exit(2) + else: + debug('creating local graph and parsing results...') + graph = ConjunctiveGraph() + for result in results['results']['bindings']: + uri = result['artist']['value'] + #debug('creating new cursor') + #cursor = CONNECT.cursor() + #print('inserting triples for %s' % uri) + debug('getting total friends for %s' % uri) + results = get_total_friends(uri) + #q = 'SPARQL define sql:log-enable 2 INSERT IN GRAPH <'+DEFAULT_GRAPH+'> { <'+uri+'> <http://purl.org/ontology/myspace#totalFriends> "'+totfri+'"^^xsd:int } ' + #q = "DB.DBA.TTLP_MT('<%s> <http://purl.org/ontology/myspace#totalFriends> %s . ', '', '%s') " % (uri, totfri, DEFAULT_GRAPH) + #print q + #cursor.execute(q) + #cursor.close() + #debug('cursor closed') + if results['totalFriends'] != None: + graph.add((URIRef(uri), MYSPACE['totalFriends'], Literal(int(results['totalFriends'])))) + else: + debug('!!!!!!!!!!!!!!!!!!!!!!! no friends for %s !!!!!!!!!!!!!!!!!!!!!!' % uri) + totfri_fail_list.append(uri+'\n') + + if results['locality'] != None: + graph.add((URIRef(uri), MYSPACE['locality'], Literal(results['locality']))) + else: + debug('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ no locality for %s @@@@@@@@@@@@@@@@ ' % uri) + local_fail_list.append(uri+'\n') + + if results['country'] != None: + graph.add((URIRef(uri), MYSPACE['country'], Literal(results['country']))) + else: + debug('******************************** no country for %s ***************************' % uri) + country_fail_list.append(uri+'\n') + + + debug('serializing results') + graph.serialize('./rdf/'+str(offset)+'.rdf') + f = open('./log/'+str(offset)+'_totfri_errors', 'w') + f.writelines(totfri_fail_list) + f.close() + f = open('./log/'+str(offset)+'_locality_errors', 'w') + f.writelines(local_fail_list) + f.close() + f = open('./log/'+str(offset)+'_country_errors', 'w') + f.writelines(country_fail_list) + f.close() + + debug('done with result set') + offset += limit + get_some_artists(sparql, limit, offset) + + +def get_total_friends(uri, attempt=0, fail=5): + '''get the total friends from the give uri''' + graph = ConjunctiveGraph() + results = {} + try: + graph.parse(uri) + except urllib2.HTTPError: + if attempt<fail: + attempt+=1 + sleep(2) + get_total_friends(uri, attempt, fail) + else: + return None + for row in graph.query('select ?totfri where { ?x <http://purl.org/ontology/myspace#totalFriends> ?totfri . } ' ): + totfri = row[0] + try: + totfri = totfri.strip('http://dbtune.org/myspace/uid/') + except: + results['totalFriends'] = None # exception means we didn't find any friends + else: + results['totalFriends'] = totfri + local = None + for row in graph.query('select ?local where { ?x <http://purl.org/ontology/myspace#locality> ?local . } '): + local = row[0] + results['locality'] = local + country = None + for row in graph.query('select ?country where { ?x <http://purl.org/ontology/myspace#country> ?country . } '): + country = row[0] + results['country'] = country + return results + + +def insert_total_friends(uri, cursor): + totfri = get_total_friends(uri) + q = 'SPARQL define sql:log-enable 2 INSERT IN GRAPH <%s> { <%s> <http://purl.org/ontology/myspace#totalFriends> "%s"^^xsd:int } ' % (DEFAULT_GRAPH, uri, totfri) + #print q + cursor.execute(q) + +def try_sparql(sparql, attempt=0, fail=5): + try: + debug('attempting sparql query, try #' + str(attempt)) + sparql.setReturnFormat(SPARQLWrapper.JSON) + ret = sparql.query().convert() + except urllib2.HTTPError: + debug('caught an http error, retrying...') + if attempt<fail: + attempt+=1 + sleep(2) + trySparql(sparql, attempt, fail) + else: + error("more that 5 http errors, giving up") + return None + return ret + +def set_logger(level = logging.DEBUG): + '''just set the logger''' + loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s', + "datefmt":'%d.%m.%y %H:%M:%S', + "level": level, + #"filename":logPath + "musicGrabber.log", + "filemode":"w"} + logging.basicConfig(**loggingConfig) + +def main(): + set_logger() + sparql = SPARQLWrapper.SPARQLWrapper(ENDPOINT) + sparql.setReturnFormat(SPARQLWrapper.JSON) + #CONNECT = pyodbc.connect('DSN=SysVirt;UID=dba;PWD=dba;HOST=localhost:1112') + get_some_artists(sparql, 500,0) + + + +if __name__ == '__main__': + main() + \ No newline at end of file Copied: graphRDF/branches/old2sparul/src/old2sparul.py (from rev 353, graphRDF/branches/old2sparul/old2sparul.py) =================================================================== --- graphRDF/branches/old2sparul/src/old2sparul.py (rev 0) +++ graphRDF/branches/old2sparul/src/old2sparul.py 2009-06-22 14:08:10 UTC (rev 354) @@ -0,0 +1,264 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +old2sparul.py + +This is an ad hoc script for taking data from myrdfspace.com, cleaning it, and putting in sparql endpoint + +Created by Kurtis Random on 2009-02-03. +Copyright (c) 2009 C4DM QMUL. All rights reserved. +""" + +import sys +import getopt +from logging import log, error, warning, info, debug +import logging +import ftplib +import SPARQLWrapper +import mopy +import urllib2 +import re +from time import sleep + +help_message = ''' +take old myrdfspace files and add to the sparql endpoint... + -b --base <uri base from myrdfspace> + -s --start <uid to start from> useful after a crash ;-) +''' + +failedList = [] +badQueryList = [] + +defaultGraph = "http://dbtune.org/myspace-fj-2008" +sparqlEndPoint = "http://dbtune.org/cmn/sparql" +myspaceBase = "http://dbtune.org/myspace/uid" +myspaceOnt = "http://purl.org/ontology/myspace" +prefixes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> \nPREFIX foaf: <http://xmlns.com/foaf/0.1/> \nPREFIX dc: <http://purl.org/dc/elements/1.1/> \nPREFIX mo: <http://purl.org/ontology/mo/>\nPREFIX myspace: <http://purl.org/ontology/myspace#>\nPREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>""" + +insert = """ \ninsert into graph <"""+defaultGraph+"""> {""" + +apacheLimit = 2000 + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + +def tryImportRDF(filename, attempt): + if attempt < 5: + debug("importing rdf") + try: + mi = mopy.importRDFFile(filename) + except urllib2.URLError: + debug("URLError importing RDF, retrying") + sleep(1.0) + attempt+=1 + tryImportRDF(filename, attempt) + else: + return mi + debug("import failed after tries: " + str(attempt)) + return None + +def parseRDF(filename, base): + '''parse the rdf and return a sparql update query''' + sparqlU='' + mi = tryImportRDF(base+filename, 0) + if mi: + keys = mi.PersonIdx.keys() + for key in keys: + person = mi.PersonIdx[key] + if person.name: + # if we find the name, this is the main subject + suid = person.URI.split(base)[1] + subject = "<"+myspaceBase+"/"+suid+">" + name = person.name.pop() + sparqlU = sparqlU + '\n'+subject+' rdf:type mo:MusicArtist .' + sparqlU = sparqlU + '\n'+subject+' myspace:myspaceID "'+filename.rstrip('.rdf')+'"^^xsd:int .' + sparqlU = sparqlU + """\n"""+subject+' foaf:name "' + urllib2.quote(name)+'"@en . ' + + # get all the top friends + while(1): + try: + p = person.knows.pop() + except: + break + else: + ouid = p.URI.split(base)[1] + obj = "<"+myspaceBase+"/"+ouid+">" + sparqlU=sparqlU+ "\n"+subject+" foaf:knows "+ obj+ ' . ' "\n"+subject+" myspace:topFriend "+obj+ ' . ' + sparqlU = sparqlU + '\n'+obj+' rdf:type mo:MusicArtist .' + + while(1): + try: + thm = person.theme.pop() + except: + debug("breaking from genre pops") + break + else: + thm = thm.URI.split(base)[1] + # do some cleaning, bad genres in there like 35123543.rdf instead of hip hop + if not re.match(".*\.rdf",thm): + debug("adding genre: "+thm) + genre = "<"+myspaceOnt + "#"+urllib2.quote(thm)+">" + sparqlU=sparqlU+ "\n"+subject+ " myspace:genreTag "+ genre+ ' . ' + + try: + playcount = person.tipjar.pop().URI.split(base)[1] + sparqlU=sparqlU+ "\n"+subject+ ' myspace:totalPlays "'+ playcount+'"^^xsd:int . ' + except: + pass + + sparqlU=sparqlU+'}' + return sparqlU + else: + return None + +def setLogger(): + '''just set the logger''' + loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s', + "datefmt":'%d.%m.%y %H:%M:%S', + "level": logging.DEBUG, + #"filename":logPath + "musicGrabber.log", + "filemode":"w"} + logging.basicConfig(**loggingConfig) + +def getFileListing(rdfFolder): + '''return a list of all the rdf files found w/ given base''' + rdfFolder = rdfFolder.rstrip('/') + rdfFolder = rdfFolder+'/' + ftp = ftplib.FTP("myrdfspace.com") + ftp.login("myrdf", "my1stRDF") + ftp.cwd("myrdfspace.com/"+rdfFolder) + vList = ftp.nlst() + return vList + +def trySparql(sparql, attempt, f): + try: + debug('attempting sparql update, try #' + str(attempt)) + sparql.setReturnFormat(SPARQLWrapper.TURTLE) + ret = sparql.query().convert() + except urllib2.HTTPError: + debug('caught an http error, retrying...') + if attempt<5: + attempt+=1 + sleep(2) + trySparql(sparql, attempt, f) + else: + error("more that 5 http errors, giving up") + failedList.append(f) + except SPARQLWrapper.sparqlexceptions.QueryBadFormed: + error("query failed for "+ str(f)) + debug('$$$$$$$$$$$$$$$$BADLY FORMED QUERY$$$$$$$$$$$$$$$$$$$') + print sparql.queryString + badQueryList.append(f) + failedList.append(f) + except: + error("query failed for "+ str(f)) + debug('************UPDATE FAILED***********') + failedList.append(f) + print "Unexpected error:", sys.exc_info()[0] + print sparql.queryString + else: + print ret + return ret + return None + +def splitQuery(query): + '''sometime the query is too long and should be broke in two pieces''' + lines = query.splitlines(1) + splits = [] + split = "" + count = 0 + for line in lines: + if count < apacheLimit: + split = split+line + count+=len(line) + else: + splits.append(insert+split+'}') + split= line + count = 0 + splits.append(insert+split) + return splits + +def main(argv=None): + if argv is None: + argv = sys.argv + try: + try: + opts, args = getopt.getopt(argv[1:], "ho:b:s:v", ["help", "output=","base=", "start="]) + except getopt.error, msg: + raise Usage(msg) + + # option processing + base = None + start = None + for option, value in opts: + if option == "-v": + verbose = True + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-o", "--output"): + output = value + if option in ("-b", "--base"): + base = value + if option in ("-s", "--start"): + start = value + '''if option in ("-g", '--graph'): + defaultGraph = value + insert = """ \ninsert into graph <"""+defaultGraph+"""> {"""''' + + + setLogger() + if base == None: + raise Usage(help_message) + return 2 + # parse base uri + folder = base.split("http://myrdfspace.com/")[1] + debug('getting list of files') + fileList = getFileListing(folder) + debug('got list of files') + #fileList = ['238729309.rdf', '13280592.rdf', '26412401.rdf', '8557307.rdf', '176635064.rdf', '12656647.rdf'] + startIndex=0 + if start: + try: + startIndex=fileList.index(start) + except: + debug("not a valid start file, not in list") + + for f in fileList[startIndex:]: + debug('parsing on file: '+str(f)) + #parse each file and do a sparql update to the repository + sparul = parseRDF(f, base) + sparql = SPARQLWrapper.SPARQLWrapper(sparqlEndPoint) + sparql.addDefaultGraph(defaultGraph) + if sparul: + # we have to deal w/ queries that are too long + if len(sparul) > apacheLimit: + debug('query too long, splitting...') + splitSparul = splitQuery(sparul) + for split in splitSparul: + sparql.setQuery(prefixes+split) + trySparql(sparql, 0, f) + else: + sparql.setQuery(prefixes+insert+sparul) + trySparql(sparql, 0, f) + else: + debug('failure on '+str(f)) + failedList.append(f) + + + + debug("Complete!!!") + print "\n\nREPORT:\n\tfailures: "+str(len(failedList)) + print "\nfails: " + print failedList + print "\n\nbad queries: " + print badQueryList + + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, "\t for help use --help" + return 2 + + +if __name__ == "__main__": + sys.exit(main()) Property changes on: graphRDF/branches/old2sparul/src/old2sparul.py ___________________________________________________________________ Added: svn:mergeinfo + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-27 21:20:03
|
Revision: 353 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=353&view=rev Author: gearmonkey Date: 2009-05-27 21:19:59 +0000 (Wed, 27 May 2009) Log Message: ----------- more auto-format detection added a new file with some common helper functions also a new executable that is designed to be the entry point for the backend of the radio player. At some point fairly soon the svn may need some restructuring. Modified Paths: -------------- graphRDF/branches/songsAsNodes/loadWeights.py Added Paths: ----------- graphRDF/branches/songsAsNodes/common.py graphRDF/branches/songsAsNodes/radioCore.py Added: graphRDF/branches/songsAsNodes/common.py =================================================================== --- graphRDF/branches/songsAsNodes/common.py (rev 0) +++ graphRDF/branches/songsAsNodes/common.py 2009-05-27 21:19:59 UTC (rev 353) @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +common.py + +Some common functions to speed access for song graphs + +Created by Benjamin Fields on 2009-05-18. +Copyright (c) 2009 Goldsmiths University of London. All rights reserved. +""" + +import sys +import os +import os.path +import igraph +import graphRDF + +def getTrackList(graph, nodeList): + trackList = [] + for vert in nodeList: + trackList.append(graph.vs[vert]['track']) + return trackList + +def avgDelta(graph, nodeList): + delta = 0 + for idx, node in enumerate(nodeList[1:]): + delta += igraph.EdgeSeq(graph, [nodeList[idx], node])[0]['audioWeight'] + return delta/(len(nodeList)-1) + +def deltaList(graph, nodeList): + deltaList = [] + for idx, node in enumerate(nodeList[1:]): + deltaList.append(igraph.EdgeSeq(graph, [nodeList[idx], node])[0]['audioWeight']) + return deltaList + + + +def main(): + pass + + +if __name__ == '__main__': + main() + Modified: graphRDF/branches/songsAsNodes/loadWeights.py =================================================================== --- graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-27 21:16:28 UTC (rev 352) +++ graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-27 21:19:59 UTC (rev 353) @@ -104,7 +104,7 @@ typoOutHandle.write(str(idx) + " : typo : " + line) typoOutHandle.flush() typos += 1 - print >> sys.stderr, str(err) + "\n" + str(typos) + " typos found." + print >> sys.stderr, str(Exception) + " : " + str(err) + "\n" + str(typos) + " typos found." continue foundWeight += 1 Added: graphRDF/branches/songsAsNodes/radioCore.py =================================================================== --- graphRDF/branches/songsAsNodes/radioCore.py (rev 0) +++ graphRDF/branches/songsAsNodes/radioCore.py 2009-05-27 21:19:59 UTC (rev 353) @@ -0,0 +1,130 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +radioCore.py + +This is the entry point for the backend of + +Created by Benjamin Fields on 2009-05-21. +Copyright (c) 2009 Goldsmiths University of London. All rights reserved. +""" + +import sys +import getopt +import igraph +import graphRDF +from common import * + + +help_message = ''' +Core utility end point to the automatic ong selection and aggregation toolkit, built around mypyspace's graphRDF project. + +modes available: + help print this message and exit + + makePlaylist create a playlist for MP3Broadcaster. Args as follows: + + >radioCore.py makePlaylist [options] graph.graphmlz srcNode destNode outfile.playlist + + graph.graphmlz -- the graph that will be used + --note this may change to a pickled igraph to speed load time.-- + srcNode -- where the playlist will start, specified as a filename that exists in graph.graphmlz + destNode -- where the playlist will end, specified as a filename that exists in graph.graphmlz + outfile.playlist -- the full path where the playlist will be written. + + destFinder given various circumstances determine all possible destination nodes for a +''' + +playlistheader = """*PLAY-LIST* +# +# Created by radioCore.py +# +""" + +class Usage(Exception): + def __init__(self, msg): + self.msg = msg + + +def main(argv=None): + verbose = False + weighted = True + if argv is None: + argv = sys.argv + try: + if len(argv) < 2: + raise Usage(help_message) + if argv[1] == 'help': + raise Usage(help_message) + elif argv[1] == 'makePlaylist': + if len(argv) < 6: + raise Usage(help_message) + mode = 'makePlaylist' + graphFile = argv[-4] + srcNode = argv[-3] + destNode = argv[-2] + outfile = argv[-1] + if len(argv) > 6: + generalOpts = argv[2:-4] + else: + generalOpts = [] + elif argv[1] == 'destFinder': + mode = 'destFinder' + + else: + raise Usage("Poor mode specification. Run 'radioCore.py help' for more detailed usage.") + try: + opts, args = getopt.getopt(generalOpts, "hvw", ["help", "vebose", "unweighted"]) + except getopt.error, msg: + raise Usage(msg) + + # option processing + for option, value in opts: + if option == ("-v", "--verbose"): + verbose = True + if option in ("-h", "--help"): + raise Usage(help_message) + if option in ("-w", "--unweighted"): + weighted = False + + except Usage, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + print >> sys.stderr, "\t for help use --help" + return 2 + songGraph = graphRDF.graph('media_seed_134901208') + if mode == 'makePlaylist': + try: + outHandle = open(outfile, 'w') + if os.path.splitext(graphFile)[1] in ['.pkl', '.pickle']: + songGraph.S = igraph.Graph.Load(graphFile, format='pickle') + elif os.path.splitext(graphFile)[1] in ['.mlz', '.graphmlz']: + songGraph.S = igraph.Graph.Load(graphFile, format='graphmlz') + else: + songGraph.S = igraph.Graph.Load(graphFile) + except Exception, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err) + print >> sys.stderr, "\t for help use --help" + return 12 + if weighted and (len(songGraph.S.es.select(audioWeight_lt=0)) > 0): + print "radioCore.py: makePlaylist: WARNING: attempting to use weights, but some of the weights are negative. This may cause in some odd results." + try: + srcIDX = songGraph.S.vs.select(track=srcNode)[0].index + destIDX = songGraph.S.vs.select(track=destNode)[0].index + except Exception, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err) + print >> sys.stderr, "\t source or destination track path poorly specified, unable to dereference." + return 13 + outHandle.write(playlistheader) + if weighted: + trackList = getTrackList(songGraph.S, songGraph.shortestPath(srcIDX, destIDX, graph='S', weight='audioWeight')) + else: + trackList = getTrackList(songGraph.S, songGraph.shortestPath(srcIDX, destIDX, graph='S', weight=None)) + + for track in trackList: + outHandle.write('"'+track +'" 5\n') + outHandle.close() + + + +if __name__ == "__main__": + sys.exit(main()) Property changes on: graphRDF/branches/songsAsNodes/radioCore.py ___________________________________________________________________ Added: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-27 21:16:43
|
Revision: 352 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=352&view=rev Author: gearmonkey Date: 2009-05-27 21:16:28 +0000 (Wed, 27 May 2009) Log Message: ----------- Cleaned up the IO to do some autoformat detection. Modified Paths: -------------- graphRDF/branches/songsAsNodes/loadWeights.py Modified: graphRDF/branches/songsAsNodes/loadWeights.py =================================================================== --- graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-26 21:29:16 UTC (rev 351) +++ graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-27 21:16:28 UTC (rev 352) @@ -17,8 +17,10 @@ import sys import os +import os.path import igraph import rightDict +import cPickle #import graphRDF Usage = """Usage:\nloadWeights.py weightfile.txt inGraph.mlz edgesLeft.txt outGraph.mlz\n\t\tFor more help read the source.""" @@ -51,12 +53,18 @@ return 2 print "loading graph into memory..." try: - putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') - print "again..." - putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') - except RuntimeWarning: - print "Trying a second time..." - putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') + if os.path.splitext(argv[2])[1] in ['.pkl', '.pickle']: + print 'unpickling...' + putWeightsInHere = cPickle.load(open(argv[2], 'r')) + elif os.path.splitext(argv[2])[1] in ['.mlz', '.graphmlz']: + print 'rendering from graphmlz...' + putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') + else: + print 'pushing to igraph loader without a format (attempting auto detection)...' + putWeightsInHere = igraph.Graph.Load(argv[2]) + # except RuntimeWarning: + # print "Trying a second time..." + # putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') except Exception, err: print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err) print >> sys.stderr, "\t" + Usage @@ -76,19 +84,26 @@ try: sourceV , targetV, Weight = line.strip().split('\t') except ValueError: - outfileHandle.write(str(idx) + " : Missing Weight : " + line) - outfileHandle.flush() - missed += 1 - continue + outfileHandle.write(str(idx) + " : Missing Weight : " + line) + outfileHandle.flush() + missed += 1 + print >> sys.stderr, str(missed) + " missing weights." + continue try: - srcIDX = indexdict[sourceV] - trgtIDX = indexdict[targetV] - + srcIDX = indexdict[sourceV.lstrip(prefix)] + trgtIDX = indexdict[targetV.lstrip(prefix)] + igraph.EdgeSeq(putWeightsInHere, [srcIDX, trgtIDX])[0]['audioWeight'] = float(Weight) + except ValueError: + outfileHandle.write(str(idx) + " : Missing Edge : " + line) + outfileHandle.flush() + typos += 1 + print >> sys.stderr, str(typos) + " typos found." + continue except Exception, err: - typoOutHandle.write(str(idx) + " : typo : " + line) - typoOutHandle.flush() - typos += 1 + typoOutHandle.write(str(idx) + " : typo : " + line) + typoOutHandle.flush() + typos += 1 print >> sys.stderr, str(err) + "\n" + str(typos) + " typos found." continue @@ -98,7 +113,16 @@ print "found " + str(missed) + " lines without weight." print "Saving updated graph to " + str(argv[4]) - putWeightsInHere.write(argv[4], format='graphmlz') + if os.path.splitext(argv[4])[1] in ['.pkl', '.pickle']: + cPickle.dump(putWeightsInHere, open(argv[4], 'w')) + elif os.path.splitext(argv[4])[1] in ['.mlz', '.graphmlz']: + putWeightsInHere.write(argv[4], format='graphmlz') + else: + try: + putWeightsInHere.write(argv[4]) + except IOError: + print "unable to determine the desired file format from the extension for the output graph.\nWriting it out as a graphmlz file and appending .mlz" + putWeightsInHere.write(argv[4]+'.mlz', format='graphmlz') infileHandle.close() outfileHandle.close() typoOutHandle.close() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-26 21:29:18
|
Revision: 351 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=351&view=rev Author: gearmonkey Date: 2009-05-26 21:29:16 +0000 (Tue, 26 May 2009) Log Message: ----------- bug fix on the input. Modified Paths: -------------- graphRDF/branches/songsAsNodes/loadWeights.py Modified: graphRDF/branches/songsAsNodes/loadWeights.py =================================================================== --- graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-26 21:18:00 UTC (rev 350) +++ graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-26 21:29:16 UTC (rev 351) @@ -51,12 +51,12 @@ return 2 print "loading graph into memory..." try: - putWeightsInHere = igraph.Graph.Load('complexSongGraph.mlz', format='graphmlz') + putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') print "again..." - putWeightsInHere = igraph.Graph.Load('complexSongGraph.mlz', format='graphmlz') + putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') except RuntimeWarning: print "Trying a second time..." - putWeightsInHere = igraph.Graph.Load('complexSongGraph.mlz', format='graphmlz') + putWeightsInHere = igraph.Graph.Load(argv[2], format='graphmlz') except Exception, err: print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err) print >> sys.stderr, "\t" + Usage This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-26 21:18:04
|
Revision: 350 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=350&view=rev Author: gearmonkey Date: 2009-05-26 21:18:00 +0000 (Tue, 26 May 2009) Log Message: ----------- forgot to add this custom dictionary I made for the weight loader... Added Paths: ----------- graphRDF/branches/songsAsNodes/rightDict.py Added: graphRDF/branches/songsAsNodes/rightDict.py =================================================================== --- graphRDF/branches/songsAsNodes/rightDict.py (rev 0) +++ graphRDF/branches/songsAsNodes/rightDict.py 2009-05-26 21:18:00 UTC (rev 350) @@ -0,0 +1,78 @@ +""" + +This is a simple extension of dict so that when you look for a string key, if it is not found as an exact match it will look for any key that contains the search string as an exact substring returnign the first it finds. +------- +I'm changing the _search to do a simple contains test and return on true. This will effectively perform a *value* match and return the first, if the exact match fails... +""" + +__revision__ = "$Rev$" + + + +class rightDict(dict): + "Provides a dictionary that performs fuzzy lookup" + def __init__(self, items = None): + """Construct a new rightDict instance + + items is an dictionary to copy items from (optional) + """ + super(rightDict, self).__init__() + + if items: + self.update(items) + + # short wrapper around some super (dict) methods + self._dict_contains = lambda key: \ + super(rightDict,self).__contains__(key) + + self._dict_getitem = lambda key: \ + super(rightDict,self).__getitem__(key) + + def _search(self, lookfor, stop_on_first = False): + """Returns the first value whose key contains lookfor + + """ + + # if the item is in the dictionary then just return it + if self._dict_contains(lookfor): + return True, lookfor, self._dict_getitem(lookfor), 1 + + + for key in self: + + # if the current key is not a string + # then we just skip it + try: + # perform the test: + if lookfor in key: + return (True, key, self._dict_getitem(key)) + except TypeError: + continue + + + #if we got through the loop there was no match. + return ( + False, + lookfor, + lookfor) + + + def __contains__(self, item): + "Overides Dictionary __contains__ to use fuzzy matching" + if self._search(item, True)[0]: + return True + else: + return False + + def __getitem__(self, lookfor): + "Overides Dictionary __getitem__ to use fuzzy matching" + matched, key, item = self._search(lookfor) + + if not matched: + raise KeyError( + "'%s' was not contained in any key."% + (str(lookfor))) + + return item + + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-26 21:16:01
|
Revision: 349 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=349&view=rev Author: gearmonkey Date: 2009-05-26 21:15:29 +0000 (Tue, 26 May 2009) Log Message: ----------- added the weight loader. Added Paths: ----------- graphRDF/branches/songsAsNodes/loadWeights.py Added: graphRDF/branches/songsAsNodes/loadWeights.py =================================================================== --- graphRDF/branches/songsAsNodes/loadWeights.py (rev 0) +++ graphRDF/branches/songsAsNodes/loadWeights.py 2009-05-26 21:15:29 UTC (rev 349) @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +loadWeights.py + +reads in the tab delimited list from arg1 and a graph from arg2. +For each line in arg1 checks that there are 3 values. +Takes the first two values as the edge specification and the third as the the audioWeight. +If the third value isn't present writes the edge to a text file named in arg3. +Saves the updated graph in graphmlz formated file named in arg4. + +all args are required. + +Created by Benjamin Fields on 2009-05-17. +Copyright (c) 2009 Goldsmiths University of London. All rights reserved. +""" + +import sys +import os +import igraph +import rightDict +#import graphRDF + +Usage = """Usage:\nloadWeights.py weightfile.txt inGraph.mlz edgesLeft.txt outGraph.mlz\n\t\tFor more help read the source.""" + +def createVerticesAttributeValueDict(workingGraph, attribute): + """creates a rightDict of the values in a graph of a given attribute and the node it came from to enable looser string matching (will return a substring match as true). May not work well if the values are non unique. + returns dictionary""" + attrValueDict = rightDict.rightDict() + for vert in workingGraph.vs: + attrValueDict[vert[attribute]] = vert.index + return attrValueDict + +def main(argv=None): + #if the graph has a different prefix than + prefix = "http://myrdfspace.com/media_seed_134901208/" + + if argv is None: + argv = sys.argv + if len(argv) != 5: + print Usage + return 1 + + try: + infileHandle = open(argv[1], 'r') + outfileHandle = open(argv[3], 'w') + typoOutHandle = open('typos.txt', 'w') + except Exception, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err) + print >> sys.stderr, "\t" + Usage + return 2 + print "loading graph into memory..." + try: + putWeightsInHere = igraph.Graph.Load('complexSongGraph.mlz', format='graphmlz') + print "again..." + putWeightsInHere = igraph.Graph.Load('complexSongGraph.mlz', format='graphmlz') + except RuntimeWarning: + print "Trying a second time..." + putWeightsInHere = igraph.Graph.Load('complexSongGraph.mlz', format='graphmlz') + except Exception, err: + print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err) + print >> sys.stderr, "\t" + Usage + return 3 + + + missed = 0 + typos = 0 + foundWeight = 0 + print "processing..." + indexdict = createVerticesAttributeValueDict(putWeightsInHere, 'track') + for idx, line in enumerate(infileHandle): + + if idx%1000 == 0: + print str(idx) + " lines of " + argv[1] + " have been processed." + + try: + sourceV , targetV, Weight = line.strip().split('\t') + except ValueError: + outfileHandle.write(str(idx) + " : Missing Weight : " + line) + outfileHandle.flush() + missed += 1 + continue + try: + srcIDX = indexdict[sourceV] + trgtIDX = indexdict[targetV] + + igraph.EdgeSeq(putWeightsInHere, [srcIDX, trgtIDX])[0]['audioWeight'] = float(Weight) + except Exception, err: + typoOutHandle.write(str(idx) + " : typo : " + line) + typoOutHandle.flush() + typos += 1 + print >> sys.stderr, str(err) + "\n" + str(typos) + " typos found." + continue + + foundWeight += 1 + + print "changed the weight of " + str(foundWeight) + " edges." + print "found " + str(missed) + " lines without weight." + print "Saving updated graph to " + str(argv[4]) + + putWeightsInHere.write(argv[4], format='graphmlz') + infileHandle.close() + outfileHandle.close() + typoOutHandle.close() + + +if __name__ == '__main__': + sys.exit(main()) + Property changes on: graphRDF/branches/songsAsNodes/loadWeights.py ___________________________________________________________________ Added: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-18 14:57:28
|
Revision: 348 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=348&view=rev Author: gearmonkey Date: 2009-05-18 14:57:26 +0000 (Mon, 18 May 2009) Log Message: ----------- adjusted the formating of the edge dump slightly. Added a graph with some initial weights (~27k of 1.5M edges) the rest of the edges are weighted at the median of the edges with actual weight, so as to continue neutrality. I'm currently adjusting the way the track is specified in the 'track' attribute to have full paths to location on the GDS cluster and will update when that's finished. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py Added Paths: ----------- graphRDF/branches/songsAsNodes/partiallyWeightedAdjustedSongGraph.graphmlz Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-17 15:34:06 UTC (rev 347) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-18 14:57:26 UTC (rev 348) @@ -331,10 +331,10 @@ """Writes out (text) the edgelist of the selected graph (G by default) as tab delimited pairs - source0\ttarget0 - source1\ttarget1 + edgeNum\tsource0\ttarget0 + edgeNum\tsource1\ttarget1 ... - sourceN\ttargetN + edgeNum\tsourceN\ttargetN If the graph selected is G, each node is represented as it's myspace artist ID. If the selected graph @@ -381,7 +381,7 @@ else: sourceText = str(graphToPrint.vs[edge.source]['track'].lstrip(prefix)) targetText = str(graphToPrint.vs[edge.target]['track'].lstrip(prefix)) - fH.write(sourceText + '\t' + targetText + '\n') + fH.write(str(edge.index) + '\t' + sourceText + '\t' + targetText + '\n') fH.flush() return @@ -398,12 +398,6 @@ self.S_shortestpaths_unweighted = [[]]*len(self.S.vs) try: return self.S_shortestpaths_unweighted[src][dst] - # try: - # if not self.S_shortestpaths_unweighted[src][dst] == []: - # return self.S_shortestpaths_unweighted[src][dst] - # else: - # self.S_shortestpaths_unweighted[src] = self.S.get_shortest_paths(src) - # return self.S_shortestpaths_unweighted[src][dst] except IndexError: self.S_shortestpaths_unweighted[src] = self.S.get_shortest_paths(src) return self.S_shortestpaths_unweighted[src][dst] Added: graphRDF/branches/songsAsNodes/partiallyWeightedAdjustedSongGraph.graphmlz =================================================================== (Binary files differ) Property changes on: graphRDF/branches/songsAsNodes/partiallyWeightedAdjustedSongGraph.graphmlz ___________________________________________________________________ Added: svn:mime-type + application/octet-stream This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-17 15:34:13
|
Revision: 347 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=347&view=rev Author: gearmonkey Date: 2009-05-17 15:34:06 +0000 (Sun, 17 May 2009) Log Message: ----------- fixed the bugs and cleaned up the shortest path function. Should work with unweighted or audio weight graphs now. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-16 13:32:46 UTC (rev 346) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-17 15:34:06 UTC (rev 347) @@ -389,18 +389,36 @@ def shortestPath(self, src, dst, graph='S', weight='audioWeight'): - """returns the shortest path between the nodes src and dst (specified as node numbers) within graph (specify 'S' or 'G', S used by default). The edge attribute to use as a weight can be specified in weight, use None if an unweighted shortest path is desired. Looks to see if the shortest path index has been filled in self.graph.shortestpaths_unweighted or self.graph.shortestpaths_weighted (for audioWeight, exoitic weights not cached.)""" + """returns the shortest path between the nodes src and dst (specified as node numbers) within graph (specify 'S' or 'G', S used by default). The edge attribute to use as a weight can be specified in weight, use None if an unweighted shortest path is desired. Looks to see if the shortest path index has been filled in self.graph.shortestpaths_unweighted or self.graph.shortestpaths_weighted (for audioWeight, exotic weights not cached.)""" #tests for existance of shortest paths for given src with the weighting and graph requested - if weight == None and graph = 'S': + if weight == None and graph == 'S': try: - self.S.shortestpaths_unweighted[src] - except NameError: - self.S.shortestpaths_unweighted[src] = []*len(S.vs) - if not self.S.shortestpaths_unweighted[src][dst] == []: - return self.S.shortestpaths_unweighted[src][dst] - else: - self.S.shortestpaths_unweighted[src] = self.S.get_shortest_paths(src) - return self.S.shortestpaths_unweighted[src][dst] + self.S_shortestpaths_unweighted + except AttributeError: + self.S_shortestpaths_unweighted = [[]]*len(self.S.vs) + try: + return self.S_shortestpaths_unweighted[src][dst] + # try: + # if not self.S_shortestpaths_unweighted[src][dst] == []: + # return self.S_shortestpaths_unweighted[src][dst] + # else: + # self.S_shortestpaths_unweighted[src] = self.S.get_shortest_paths(src) + # return self.S_shortestpaths_unweighted[src][dst] + except IndexError: + self.S_shortestpaths_unweighted[src] = self.S.get_shortest_paths(src) + return self.S_shortestpaths_unweighted[src][dst] + + + elif weight == 'audioWeight' and graph == 'S': + try: + self.S_shortestpaths_audioWeighted + except AttributeError: + self.S_shortestpaths_audioWeighted = [[]]*len(self.S.vs) + try: + return self.S_shortestpaths_audioWeighted[src][dst] + except IndexError: + self.S_shortestpaths_audioWeighted[src] = self.S.get_shortest_paths(src,weight) + return self.S_shortestpaths_audioWeighted[src][dst] else: print "the specified edge attribute weight and graph were poorly specifed or are not yet supported..." return This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-16 14:22:48
|
Revision: 346 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=346&view=rev Author: gearmonkey Date: 2009-05-16 13:32:46 +0000 (Sat, 16 May 2009) Log Message: ----------- Added a function to access the path from src to dst, currently uweighted graphs only. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-09 13:41:10 UTC (rev 345) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-16 13:32:46 UTC (rev 346) @@ -388,9 +388,26 @@ + def shortestPath(self, src, dst, graph='S', weight='audioWeight'): + """returns the shortest path between the nodes src and dst (specified as node numbers) within graph (specify 'S' or 'G', S used by default). The edge attribute to use as a weight can be specified in weight, use None if an unweighted shortest path is desired. Looks to see if the shortest path index has been filled in self.graph.shortestpaths_unweighted or self.graph.shortestpaths_weighted (for audioWeight, exoitic weights not cached.)""" + #tests for existance of shortest paths for given src with the weighting and graph requested + if weight == None and graph = 'S': + try: + self.S.shortestpaths_unweighted[src] + except NameError: + self.S.shortestpaths_unweighted[src] = []*len(S.vs) + if not self.S.shortestpaths_unweighted[src][dst] == []: + return self.S.shortestpaths_unweighted[src][dst] + else: + self.S.shortestpaths_unweighted[src] = self.S.get_shortest_paths(src) + return self.S.shortestpaths_unweighted[src][dst] + else: + print "the specified edge attribute weight and graph were poorly specifed or are not yet supported..." + return + def getGenreAssortativity(self): '''get the assortivaty coeff for the igraph G - based on Newman 2002 "Mixing Patterns in Networks" This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-09 14:22:27
|
Revision: 345 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=345&view=rev Author: gearmonkey Date: 2009-05-09 13:41:10 +0000 (Sat, 09 May 2009) Log Message: ----------- The edge dump now reflects the corrected graph. Modified Paths: -------------- graphRDF/branches/songsAsNodes/complexSongGraphEdgeDump.txt.zip Modified: graphRDF/branches/songsAsNodes/complexSongGraphEdgeDump.txt.zip =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-09 10:20:16
|
Revision: 344 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=344&view=rev Author: gearmonkey Date: 2009-05-09 10:20:05 +0000 (Sat, 09 May 2009) Log Message: ----------- This is the repaired complex Song graph Modified Paths: -------------- graphRDF/branches/songsAsNodes/complexSongGraph.mlz Modified: graphRDF/branches/songsAsNodes/complexSongGraph.mlz =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-09 07:10:43
|
Revision: 343 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=343&view=rev Author: gearmonkey Date: 2009-05-09 07:10:32 +0000 (Sat, 09 May 2009) Log Message: ----------- added a script to help repair any graphs made (principally mine) before I sorted out the bug in graphRDF that was fixed in the previous revision. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py graphRDF/branches/songsAsNodes/hplot.py Added Paths: ----------- graphRDF/branches/songsAsNodes/repairSongGraph.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-05 17:14:14 UTC (rev 342) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-09 07:10:32 UTC (rev 343) @@ -14,6 +14,7 @@ import getopt import ftplib import mopy +import re from logging import log, error, warning, info, debug import logging from numpy import * @@ -24,6 +25,7 @@ this package interfaces w/ myrdfspace.com to analyze and plot graphs of myspace artists by kurt Jacobson 29/10/2007 (c) +highly modified by Ben Fields on and around 15/04/2009 dependencies: - igraph (http://cneurocvs.rmki.kfki.hu/igraph/) @@ -69,10 +71,14 @@ def string2List(listAsString): """A little helper function that takes in a string that was made by printing a list and breaks it up into a list of it composite parts. returns the list. - could do with some error checking, but seems to work with the track attribute and ought to work with the genres as well.""" - items = listAsString.split("', '") - items[0] = items[0].lstrip("['") + could do with some error checking, but seems to work with the track attribute and ought to work with the genres as well. + Now handles both kinds of quote via the glory of regular expressions.""" + sep = re.compile('[\'\"], [\'\"]') + items = sep.split(listAsString) + items[0] = items[0].lstrip("['")#could do this with a regEx also, but, eh... + items[0] = items[0].lstrip("[\"") items[-1] = items[-1].rstrip("']") + items[-1] = items[-1].rstrip("\"]") return items Modified: graphRDF/branches/songsAsNodes/hplot.py =================================================================== --- graphRDF/branches/songsAsNodes/hplot.py 2009-05-05 17:14:14 UTC (rev 342) +++ graphRDF/branches/songsAsNodes/hplot.py 2009-05-09 07:10:32 UTC (rev 343) @@ -1,7 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 """ -untitled.py +hplot.py Created by Kurt Jacobson on 2008-11-11. Copyright (c) 2008 C4DM - Queen Mary U of London. All rights reserved. Added: graphRDF/branches/songsAsNodes/repairSongGraph.py =================================================================== --- graphRDF/branches/songsAsNodes/repairSongGraph.py (rev 0) +++ graphRDF/branches/songsAsNodes/repairSongGraph.py 2009-05-09 07:10:32 UTC (rev 343) @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +repairSongGraph.py + +A script to clean up the song graph without rebuilding the whole thing to deal with the poor string parsing in the original implementation + +""" + +import sys +import os +from graphRDF import * + +Usage = """usage: repairSongGraph [original Artist graphmlz] [orignal Song graphmlz] [destination Song graphmlz]\n""" + +def main(argv=None): + vertIdxsToDelete = [] + uidsAffected = [] + vertIdxsAdded = [] + idx = 0 + + if argv is None: + argv = sys.argv + if not len(argv) == 4: + print Usage + return 104 + + + fixMe = graph('person_seed_134901208') + print "Loading original Artist graph from " + argv[1] + try: + fixMe.G = igraph.Graph.Load(argv[1], format='graphmlz') + except Exception, err: + print Usage + "encountered problems loading the original Artist graphmlz file." + return 105 + fixMe.isPopulated = True + print "Loading orginal Song graph from " + argv[2] + try: + fixMe.S = igraph.Graph.Load(argv[2], format='graphmlz') + except Exception, err: + print Usage + "encountered problems loading the original Song graphmlz file." + return 106 + print "will write out correceted graph to " + argv[3] + print "scrubbing Song Graph..." + + + idx = len(fixMe.S.vs) - 1 + for vert in fixMe.S.vs: + tracks = string2List(vert['track']) + if len(tracks) > 1: + print "Removing corrupt vertex from artist with uid " + vert['uid'] + " and breaking it into " + str(len(tracks)) + " correct vertices." + vertIdxsToDelete.append(vert.index) + uidsAffected.append(vert['uid']) + fixMe.S.add_vertices(len(tracks)) + for track in tracks: + idx +=1 + fixMe.S.vs[idx]['track'] = track + fixMe.S.vs[idx]['uid'] = vert['uid'] + vertIdxsAdded.append(idx) + print "found " + str(len(vertIdxsToDelete)) + " vertices to delete.\nAdding " + str(len(vertIdxsAdded)) + " repaired vertices." + + addedVertSeq = igraph.VertexSeq(fixMe.S, vertIdxsAdded) + toDeleteVertSeq = igraph.VertexSeq(fixMe.S, vertIdxsToDelete) + artistVertsOfInterest = fixMe.G.vs.select(uid_in=uidsAffected) + sourcesDealtWith = [] + targetsDealtWith = [] + idx = len(fixMe.S.es) - 1 + print "dealing with " + str(len(artistVertsOfInterest)) + "nodes in total" + for i,vert in enumerate(artistVertsOfInterest): + for index, edgeIdx in enumerate(fixMe.G.adjacent(vert.index, 'all')): + edge = fixMe.G.es[edgeIdx] + sources = fixMe.S.vs.select(uid=fixMe.G.vs[edge.source]['uid']) + targets = fixMe.S.vs.select(uid=fixMe.G.vs[edge.target]['uid']) + #print "Expanding edge " + str(edgeIdx) + " to " + str(len(sources) * len(targets)) + " edges." + oldidx = idx + for source in sources: + if not source.index in sourcesDealtWith: + sourcesDealtWith.append(source.index) + for target in targets: + if not target.index in targetsDealtWith: + targetsDealtWith.append(target.index) + fixMe.S.add_edges((source.index,target.index)) + idx += 1 + fixMe.S.es[idx]['audioWeight'] = -1 + + #print "added " + str(idx-oldidx) + " edges\n--------" + print str(len(artistVertsOfInterest) - i - 1) + " nodes left to deal with" + print "should have added all the edges now. Have a look:\n" + str(fixMe.S) + "\nRemoving old corrupt nodes and saving. Saving expanding graph in outputfile dumpedgraph.mlz in case something goes wrong." + fixMe.S.write("dumpedgraph.mlz", format="graphmlz") + + cleanGraph = fixMe.S.delete_vertices(vertIdxsToDelete) + + print "overwriting " + argv[3] + " with corrected graph." + + cleanGraph.write(argv[3],format="graphmlz") + + return 0 + + + + +if __name__ == '__main__': + main() + Property changes on: graphRDF/branches/songsAsNodes/repairSongGraph.py ___________________________________________________________________ Added: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-05 17:14:30
|
Revision: 342 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=342&view=rev Author: gearmonkey Date: 2009-05-05 17:14:14 +0000 (Tue, 05 May 2009) Log Message: ----------- got an edge dump but in the process discovered a flaw in my string splitting algorithm that needs fixing and such. error has been labeled but not fixed (maybe we need trak for mypyspace...) Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py Added Paths: ----------- graphRDF/branches/songsAsNodes/complexSongGraphEdgeDump.txt.zip Added: graphRDF/branches/songsAsNodes/complexSongGraphEdgeDump.txt.zip =================================================================== (Binary files differ) Property changes on: graphRDF/branches/songsAsNodes/complexSongGraphEdgeDump.txt.zip ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-03 15:24:42 UTC (rev 341) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-05 17:14:14 UTC (rev 342) @@ -320,11 +320,71 @@ idx += 1 debug("added " + str(idx-oldidx) + " edges\n--------") print "should have added all the edges now. Have a look:\n" + str(self.S) + + def dumpEdgeList(self, filename, graphToPrint=None, prefix=''): + """Writes out (text) the edgelist of the selected + graph (G by default) as tab delimited pairs + + source0\ttarget0 + source1\ttarget1 + ... + sourceN\ttargetN + + If the graph selected is G, each node is represented + as it's myspace artist ID. If the selected graph + is S the nodes will be printed as filenames (or more + strictly, the path beyond the common <prefix>, which is + nominally just the filename but not always if there + is a directory structure of some kind.) + + WARNING: If the filename given for output already + exists it will be overwritten without any further + notice, be aware...""" + if graphToPrint==None or graphToPrint == 'G': + graphToPrint = self.G + dumpReducedGraph = True + print "Dumping the artist as node edge list." + elif graphToPrint == 'S': + graphToPrint = self.S + dumpReducedGraph = False + print "Dumping the song as node edge list.\nFile path prefix has been set to: " + prefix + else: + print "the graph specified does not exist. please specifit either 'G' for artist-centric graph or 'S' for song-centric graph." + return + try: + fH = open(filename, 'w') + except Exception, err: + print("dumpEdgeList had trouble opening the file named " + filename + " for writing. Sort it out and try again...") + print str(err) + return + + try: + print "The edge list from this graph has these properties:" + print graphToPrint + print "will be dumped to " + filename + except Exception, err: + print("dumpEdgeList was unable to access the graph you requested. Sort it out and try again...") + print str(err) + return + for edge in graphToPrint.es: + if dumpReducedGraph: + sourceText = str(graphToPrint.vs[edge.source]['uid']) + targetText = str(graphToPrint.vs[edge.target]['uid']) + else: + sourceText = str(graphToPrint.vs[edge.source]['track'].lstrip(prefix)) + targetText = str(graphToPrint.vs[edge.target]['track'].lstrip(prefix)) + fH.write(sourceText + '\t' + targetText + '\n') + fH.flush() + + return + + + def getGenreAssortativity(self): '''get the assortivaty coeff for the igraph G - based on Newman 2002 "Mixing Patterns in Networks" @@ -375,13 +435,16 @@ def main(argv=None): plot = False includeEnds = False + base = None + expanded = None folder = "person_seed_134901208" media = '' + if argv is None: argv = sys.argv try: try: - opts, args = getopt.getopt(argv[1:], "hf:m:o:vb:e:p", ["help", "folder=", "media=", "output=", "baseGraph=","expandedGraph=". "plot"]) + opts, args = getopt.getopt(argv[1:], "hf:m:o:vb:e:p", ["help", "folder=", "media=", "output=", "baseGraph=","expandedGraph=", "plot"]) except getopt.error, msg: raise Usage(msg) @@ -397,6 +460,8 @@ output = value if option in ("-f", "--folder"): folder = value + if option in ("-m", "--media"): + media = value if option in ("-p", "--plot"): plot = True if option in ("-b", "--baseGraph"): @@ -406,7 +471,9 @@ logging.basicConfig(**loggingConfig) - G = graph('') + baseGraph = igraph.Graph(folder, media) + if base != None: + G G.populate() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-05-03 15:24:54
|
Revision: 341 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=341&view=rev Author: gearmonkey Date: 2009-05-03 15:24:42 +0000 (Sun, 03 May 2009) Log Message: ----------- added full song as node graph. Audio weigts of edges all set to -1 as place holder Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py Added Paths: ----------- graphRDF/branches/songsAsNodes/complexSongGraph.mlz Added: graphRDF/branches/songsAsNodes/complexSongGraph.mlz =================================================================== (Binary files differ) Property changes on: graphRDF/branches/songsAsNodes/complexSongGraph.mlz ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-30 17:26:17 UTC (rev 340) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-03 15:24:42 UTC (rev 341) @@ -373,21 +373,19 @@ # self.Pkin = Pk def main(argv=None): - plot = 0 - includeEnds = 0 + plot = False + includeEnds = False + folder = "person_seed_134901208" + media = '' if argv is None: argv = sys.argv try: try: - opts, args = getopt.getopt(argv[1:], "ho:vl:p", ["help", "output=", "logtofile=", "plot"]) + opts, args = getopt.getopt(argv[1:], "hf:m:o:vb:e:p", ["help", "folder=", "media=", "output=", "baseGraph=","expandedGraph=". "plot"]) except getopt.error, msg: raise Usage(msg) - loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s', - "datefmt":'%d.%m.%y %H:%M:%S', - "level": logging.DEBUG, - #"filename":logPath + "musicGrabber.log", - "filemode":"w"} + # option processing for option, value in opts: @@ -397,19 +395,18 @@ raise Usage(help_message) if option in ("-o", "--output"): output = value + if option in ("-f", "--folder"): + folder = value if option in ("-p", "--plot"): - plot = 1 - if option in ("-l", "--logtofile"): - logPath = value - loggingConfig = {"format":'%(asctime)s %(levelname)-8s %(message)s', - "datefmt":'%d.%m.%y %H:%M:%S', - "level": logging.DEBUG, - "filename":logPath + "musicGrabber.log", - "filemode":"w"} + plot = True + if option in ("-b", "--baseGraph"): + base = value + if option in ("-e", "--expandedGraph"): + expanded = value logging.basicConfig(**loggingConfig) - G = graph(includeEnds) + G = graph('') G.populate() This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-04-30 17:26:18
|
Revision: 340 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=340&view=rev Author: gearmonkey Date: 2009-04-30 17:26:17 +0000 (Thu, 30 Apr 2009) Log Message: ----------- I now have a highly unoptimized function that expands the graph to be songwise. My back of the napkin calculation says it will take 6 - 8 hours to expand the graph, which is annoyingly long and I expect it could be improved, but this will work for now. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-30 15:01:37 UTC (rev 339) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-30 17:26:17 UTC (rev 340) @@ -287,11 +287,40 @@ # self.AG.add_edge(node, friend) def populateSongwise(self): - '''Create a graph S which uses artist relationships as egdes but seperates each song into seperate nodes, such that song to song distance can be used as edge weights, rather than artist to artist distance. The artist as node graph must be built first, via either populate() or populateLocal().''' + '''Create a graph S which uses artist relationships as egdes but seperates each song into seperate nodes, such that song to song distance can be used as edge weights, rather than artist to artist distance. The artist as node graph must be built first, via either populate() or populateLocal(). On initalisation weights are set to 1.''' if not self.isPopulated: error("Base graph has not been built. Run populate() or populateLocal() first.") return self.S = igraph.Graph(directed=True) + idx = 0 + artistLookup = {} + for v in self.G.vs: + tracks = string2List(v["tracks"]) + if len(tracks) == 0: + print "no songs found for artist #" + str(v["uid"]) + " moving on." + artistLookup[v["uid"]] = [] + for track in tracks: + self.S.add_vertices(1) + self.S.vs[idx]['uid'] = v['uid'] + self.S.vs[idx]['track'] = str(track) + artistLookup[v["uid"]] += [idx] + idx += 1 + print str(self.S) + print "hope that's all the songs, now it's time to add some edges.\n----------\n----------" + idx = 0 + for index, edge in enumerate(self.G.es): + sources = artistLookup[self.G.vs[edge.source]['uid']] + targets = artistLookup[self.G.vs[edge.target]['uid']] + debug("Expand edge " + str(index) + " to " + str(len(sources) * len(targets)) + " edges.") + oldidx = idx + for source in sources: + for target in targets: + self.S.add_edges((source,target)) + self.S.es[idx]['audioWeight'] = -1 + idx += 1 + debug("added " + str(idx-oldidx) + " edges\n--------") + print "should have added all the edges now. Have a look:\n" + str(self.S) + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-04-30 15:01:39
|
Revision: 339 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=339&view=rev Author: gearmonkey Date: 2009-04-30 15:01:37 +0000 (Thu, 30 Apr 2009) Log Message: ----------- added a nice helper function to break up igraph attributes that are formatted strings of Lists back into lists. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py myspaceCrawler/trunk/myspaceCrawler.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-17 12:53:17 UTC (rev 338) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-30 15:01:37 UTC (rev 339) @@ -65,6 +65,17 @@ #"filename":logPath + "musicGrabber.log", "filemode":"w"} logging.basicConfig(**loggingConfig) + +def string2List(listAsString): + """A little helper function that takes in a string that was made by printing a list and breaks it up into a list of it composite parts. + returns the list. + could do with some error checking, but seems to work with the track attribute and ought to work with the genres as well.""" + items = listAsString.split("', '") + items[0] = items[0].lstrip("['") + items[-1] = items[-1].rstrip("']") + return items + + class graph(object): '''G = graph(rdfFolder) -> returns a graph object encapsulating an igraph G Modified: myspaceCrawler/trunk/myspaceCrawler.py =================================================================== --- myspaceCrawler/trunk/myspaceCrawler.py 2009-04-17 12:53:17 UTC (rev 338) +++ myspaceCrawler/trunk/myspaceCrawler.py 2009-04-30 15:01:37 UTC (rev 339) @@ -31,8 +31,9 @@ from myspaceuris import * - -THREAD_CAP = 10000 #maximum number of threads allowed to be firing at once +#maximum number of threads allowed to be firing at once, if you're doing feature extraction, +THREAD_CAP = 16 #shouldn't be more than about 5 x numProcessors to prevent overburdening the system +######## THREAD_STALL_TIME = 30 #length of time in seconds to wait until the thread count is checked again LOG_FILENAME = "musicCrawler.log" #name of logger file (path set at commandline) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-04-17 12:53:28
|
Revision: 338 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=338&view=rev Author: gearmonkey Date: 2009-04-17 12:53:17 +0000 (Fri, 17 Apr 2009) Log Message: ----------- added a graphmlz file with the dumped graph that includes media file refs. Added Paths: ----------- graphRDF/branches/songsAsNodes/graphWithMedia.mlz Added: graphRDF/branches/songsAsNodes/graphWithMedia.mlz =================================================================== (Binary files differ) Property changes on: graphRDF/branches/songsAsNodes/graphWithMedia.mlz ___________________________________________________________________ Added: svn:mime-type + application/octet-stream This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-04-01 17:05:08
|
Revision: 337 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=337&view=rev Author: kurtjx Date: 2009-04-01 17:05:04 +0000 (Wed, 01 Apr 2009) Log Message: ----------- fixed friend scraping stuff Modified Paths: -------------- musicGrabber/branches/webserv-branch/myspace2rdf.py musicGrabber/branches/webserv-branch/myspaceuris.py Modified: musicGrabber/branches/webserv-branch/myspace2rdf.py =================================================================== --- musicGrabber/branches/webserv-branch/myspace2rdf.py 2009-03-23 11:15:35 UTC (rev 336) +++ musicGrabber/branches/webserv-branch/myspace2rdf.py 2009-04-01 17:05:04 UTC (rev 337) @@ -81,9 +81,15 @@ friendUIDs = scrapePageWhile(self.page, friendTag[0], friendTag[1]) friendNames = scrapePageWhile(self.page, friendNameTag[0], friendNameTag[1]) friendPics = scrapePageWhile(self.page, friendPicTag[0], friendPicTag[1]) - + + #print friendUIDs + for i in range(len(friendUIDs)): - friend = mopy.foaf.Person(dbtuneMyspace+'uid/' + str(friendUIDs[i])) + currentUID = friendUIDs[i] + if currentUID.isdigit(): + friend = mopy.foaf.Person(dbtuneMyspace+'uid/' + str(friendUIDs[i])) + else: + friend = mopy.foaf.Person(dbtuneMyspace+str(friendUIDs[i])) friend.name.set(friendNames[i]) try: img = mopy.foaf.Image(friendPics[i]) @@ -91,8 +97,8 @@ except: pass - if artist==False: - self.subject.knows.add(friend) + #if artist==False: + # self.subject.knows.add(friend) # self.subject.knows.add(friend) # since when did this happen??? mopy wont take foaf:knows as a prop of mo:MusicArtist @@ -127,7 +133,6 @@ else: #self.subject = mopy.mo.Agent('http://dbtune.org/myspace/uid/'+str(self.uid)) self.subject = mopy.foaf.Person(dbtuneMyspace+'uid/'+str(self.uid)) - self.subject = mopy.mo.MusicArtist(dbtuneMyspace+'uid/'+str(self.uid)) # add foaf:primaryTopic ppd = mopy.foaf.PersonalProfileDocument("") ppd.primaryTopic.set(self.subject) Modified: musicGrabber/branches/webserv-branch/myspaceuris.py =================================================================== --- musicGrabber/branches/webserv-branch/myspaceuris.py 2009-03-23 11:15:35 UTC (rev 336) +++ musicGrabber/branches/webserv-branch/myspaceuris.py 2009-04-01 17:05:04 UTC (rev 337) @@ -12,7 +12,8 @@ # useful tags playerTag = """SWFObject("http://musicservices.myspace.com/Modules/MusicServices/Services/Embed.ashx/ptype=4""", ''';''' # ### this tag will be terminated by a '.' ### -friendTag = '''<td bgcolor="FFFFFF" align="center" valign="top" width="107" style="word-wrap:break-word">\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t <a href="http://profile.myspace.com/index.cfm?fuseaction=user.viewProfile&friendID=''', '''"''' +#<td bgcolor="FFFFFF" align="center" valign="top" width="107" style="word-wrap:break-word">\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t +friendTag = ''' <a href="http://www.myspace.com/''', '''"''' # new tag updated 13/1/2009 #""" <a href="http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendid=""", '''"''' # ### tag will be terminated by a '"' ### This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-03-23 11:15:41
|
Revision: 336 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=336&view=rev Author: gearmonkey Date: 2009-03-23 11:15:35 +0000 (Mon, 23 Mar 2009) Log Message: ----------- the standard populate now has a property (is that the right word?) called tracks that contains a list of uris to songs associated with the current node. There is also a stub function that will eventually use this property to create a new graph with one node per track, but that's for later... Modified Paths: -------------- graphRDF/branches/songsAsNodes/audioWeights.py graphRDF/branches/songsAsNodes/graphRDF.py Modified: graphRDF/branches/songsAsNodes/audioWeights.py =================================================================== --- graphRDF/branches/songsAsNodes/audioWeights.py 2009-03-09 19:14:15 UTC (rev 335) +++ graphRDF/branches/songsAsNodes/audioWeights.py 2009-03-23 11:15:35 UTC (rev 336) @@ -38,7 +38,7 @@ lambda_max = 2500 def addAudioWeights(): - '''read in lambdsa and assign as audio label to each edge''' + '''read in lambdas and assign as audio label to each edge''' audioDic = {} audioZeros = 0 for line in lines: Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-03-09 19:14:15 UTC (rev 335) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-03-23 11:15:35 UTC (rev 336) @@ -69,15 +69,24 @@ class graph(object): '''G = graph(rdfFolder) -> returns a graph object encapsulating an igraph G includeEnds is a bool value to include nodes w/ no actual rdf file (ends) - rdfFolder is the "person" folder name on myrdfspace.com + rdfFolder is the "person" folder name on myrdfspace.com. If the graph + to be built is going to use songs as nodes based on the old split rdf files, + the mediaRDFFolder must point to the repository of rdf files containing + information about the songs. If it is not set and the songwise graph is + attempted, it will try to find the song references in the same rdf files as + the artist metadata (new style). *** NOTE includeEnds is disabled permanently ***''' - def __init__(self, rdfFolder): + def __init__(self, rdfFolder, mediaRdfFolder=''): self.includeEnds = 0 + self.isPopulated = False rdfFolder = rdfFolder.rstrip('/') rdfFolder = rdfFolder+'/' self.rdfFolder = rdfFolder + self.mediaRdfFolder = mediaRdfFolder setLogger() debug("creating graph object...") + if (mediaRdfFolder == ''): + debug("using old style seperate media rdf files.") self.G = igraph.Graph(directed=True) # self.AG = pg.AGraph() # set some default attribs @@ -87,7 +96,7 @@ # self.AG.node_attr['fontcolor'] = '#FFFFFF' # self.AG.node_attr['fontsize'] = '6.0' # self.AG.graph_attr['overlap'] = 'scale' - info("call populate() to build graph...") + print "call populate() or populateLocal() to build graph..." def populateLocal(self): '''use to populate from a local directory instead of myrdfspace.com''' @@ -127,7 +136,6 @@ pass print "genres for "+ str(node) + " are " +str(gkeys) self.G.vs[vDict[node]]['genres'] = gkeys - ## for the .dot file for graphviz ################ color by genre somehow??? # if gkeys: @@ -147,8 +155,8 @@ key = key[len(strip):] if not key == str(node.rstrip(".rdf")): friendList.append(key) + self.isPopulated = True - print friendList if self.includeEnds: for friend in friendList: @@ -183,7 +191,7 @@ idxE = 0 for v in vList: v = v.rstrip(".rdf") - print "*********** " + v + " *********** idx: " + str(idx) + #print "*********** " + v + " *********** idx: " + str(idx) # self.AG.add_node(v) self.G.add_vertices(1) self.G.vs[idx]['uid'] = v @@ -204,8 +212,28 @@ except: pass print "genres for "+ str(node) + " are " +str(gkeys) + self.G.vs[vDict[node]]['genres'] = gkeys + try: + trackList = [] + if (self.mediaRdfFolder != ''): + #attempt to fetch the corrisponding media RDF file, if the old split style has been declared. + mediaRDF = mopy.importRDFFile(os.path.join("http://myrdfspace.com/", self.mediaRdfFolder,str(node)+"media.rdf" )) + for track in mediaRDF.TrackIdx: + trackList.append(track) + debug("added " + track + " to list of tracks for artist # " + str(node)) + else: + #if the caller didn't specify a media RDF location assume new style combined rdf, so look for + #the track listings in the previously loaded rdf file. + for track in rdf.TrackIdx: + trackList.append(track) + debug("added " + track + " to list of tracks for artist # " + str(node)) + self.G.vs[vDict[node]]['tracks'] = trackList + except Exception, err: + error("something went wrong while trying to load the track listing for artist " + str(node) + "\nErrMsg: " + str(err)) + + ## for the .dot file for graphviz ################ color by genre somehow??? # if gkeys: @@ -226,7 +254,7 @@ if not key == str(node.rstrip(".rdf")): friendList.append(key) - + self.isPopulated = True print friendList if self.includeEnds: for friend in friendList: @@ -247,6 +275,17 @@ self.G.add_edges((vDict[node], vDict[friend])) # self.AG.add_edge(node, friend) + def populateSongwise(self): + '''Create a graph S which uses artist relationships as egdes but seperates each song into seperate nodes, such that song to song distance can be used as edge weights, rather than artist to artist distance. The artist as node graph must be built first, via either populate() or populateLocal().''' + if not self.isPopulated: + error("Base graph has not been built. Run populate() or populateLocal() first.") + return + self.S = igraph.Graph(directed=True) + + + + + def getGenreAssortativity(self): '''get the assortivaty coeff for the igraph G - based on Newman 2002 "Mixing Patterns in Networks" THIS DOESNT WORK QUITE RIGHT - graphmeasures.py ''' This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-03-09 19:14:38
|
Revision: 335 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=335&view=rev Author: kurtjx Date: 2009-03-09 19:14:15 +0000 (Mon, 09 Mar 2009) Log Message: ----------- some bugs getting artistID and playlistID fixed - if you dont find a playlistID dont try to use it duh - wonder why foaf:knows isnt still included in all rdf Modified Paths: -------------- musicGrabber/branches/webserv-branch/myspace2rdf.py musicGrabber/branches/webserv-branch/myspaceuris.py Modified: musicGrabber/branches/webserv-branch/myspace2rdf.py =================================================================== --- musicGrabber/branches/webserv-branch/myspace2rdf.py 2009-03-08 12:04:23 UTC (rev 334) +++ musicGrabber/branches/webserv-branch/myspace2rdf.py 2009-03-09 19:14:15 UTC (rev 335) @@ -43,6 +43,7 @@ def __init__(self, uid=""): self.uid = uid self.mi = mopy.MusicInfo() + self.playlistID = False def getPage(self): '''just grab the web page''' @@ -92,6 +93,10 @@ if artist==False: self.subject.knows.add(friend) + + # self.subject.knows.add(friend) + # since when did this happen??? mopy wont take foaf:knows as a prop of mo:MusicArtist + self.subject.topFriend.add(friend) self.mi.add(friend) @@ -163,27 +168,27 @@ # self.subject.sameAs.set(thing2) # self.mi.add(thing2) + if self.playlistID and self.artistID and self.uid: + xmlPage = try_open(mediaBase[0] + str(self.artistID) + mediaBase[1] + str(self.playlistID) + mediaBase[2] + str(self.uid) + mediaBase[3]) + #print mediaBase[0] + str(self.artistID) + mediaBase[1] + str(self.playlistID) + mediaBase[2] + str(self.uid) + mediaBase[3] - xmlPage = try_open(mediaBase[0] + str(self.artistID) + mediaBase[1] + str(self.playlistID) + mediaBase[2] + str(self.uid) + mediaBase[3]) - #print mediaBase[0] + str(self.artistID) + mediaBase[1] + str(self.playlistID) + mediaBase[2] + str(self.uid) + mediaBase[3] - - if xmlPage: - self.xmlStruct = dom.parseString(''.join(xmlPage.readlines())) - songList = self.xmlStruct.getElementsByTagName('song') - for song in songList: - # using ben's mpsSong class - thisSong = mpsSong(self, song, 'downloadprefix') - thisSong.getUri() + if xmlPage: + self.xmlStruct = dom.parseString(''.join(xmlPage.readlines())) + songList = self.xmlStruct.getElementsByTagName('song') + for song in songList: + # using ben's mpsSong class + thisSong = mpsSong(self, song, 'downloadprefix') + thisSong.getUri() - track = mopy.mo.Track() - track.title.set(thisSong.title) - availableAs = thisSong.uri - if availableAs: - avas = mopy.mo.MusicalItem(availableAs) - track.available_as.set(avas) - self.mi.add(avas) - self.subject.made.add(track) - self.mi.add(track) + track = mopy.mo.Track() + track.title.set(thisSong.title) + availableAs = thisSong.uri + if availableAs: + avas = mopy.mo.MusicalItem(availableAs) + track.available_as.set(avas) + self.mi.add(avas) + self.subject.made.add(track) + self.mi.add(track) self.createCommonRDF() Modified: musicGrabber/branches/webserv-branch/myspaceuris.py =================================================================== --- musicGrabber/branches/webserv-branch/myspaceuris.py 2009-03-08 12:04:23 UTC (rev 334) +++ musicGrabber/branches/webserv-branch/myspaceuris.py 2009-03-09 19:14:15 UTC (rev 335) @@ -41,9 +41,9 @@ #these two tag scraps are provisional for grabbing the ArtistID and playlist number, which are now nessecary to grab audio #both of these should be terminated by a comma playlistIDtag = """plid=""", '''&''' -artistIDtag = """artid=""",'''&''' +#artistIDtag = """artid=""",'''&''' +artistIDtag = '''"DisplayFriendId":''',''',''' - ######################################################################################################### # myspace uri for downloads ----this has gotten a bit more complicated in the roll out of myspace's new media player This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-03-08 12:04:37
|
Revision: 334 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=334&view=rev Author: kurtjx Date: 2009-03-08 12:04:23 +0000 (Sun, 08 Mar 2009) Log Message: ----------- some bug fixes for getting genre and getting playlistID / artistID - verify the string is indeed an integer cuz sometimes it is junk Modified Paths: -------------- musicGrabber/branches/webserv-branch/myspace2rdf.py Modified: musicGrabber/branches/webserv-branch/myspace2rdf.py =================================================================== --- musicGrabber/branches/webserv-branch/myspace2rdf.py 2009-03-08 12:00:15 UTC (rev 333) +++ musicGrabber/branches/webserv-branch/myspace2rdf.py 2009-03-08 12:04:23 UTC (rev 334) @@ -18,6 +18,7 @@ #Am making use of regular expressions import re import urllib +from mpsSong import mpsSong help_message = ''' @@ -44,6 +45,7 @@ self.mi = mopy.MusicInfo() def getPage(self): + '''just grab the web page''' url = viewProfileURLbase + str(self.uid) resp = try_open(url) if resp==None: @@ -109,6 +111,7 @@ ppd = mopy.foaf.PersonalProfileDocument("") ppd.primaryTopic.set(self.subject) self.mi.add(ppd) + # assuming the 'name' tag must be present, if not it's a bad url self.name = scrapePage(self.page, [nameTag[0]], nameTag[1]) if self.name: self.subject.name.set(self.name) @@ -168,13 +171,7 @@ self.xmlStruct = dom.parseString(''.join(xmlPage.readlines())) songList = self.xmlStruct.getElementsByTagName('song') for song in songList: - '''try: - songTitle = unicodedata.normalize('NFKC',song.getAttribute('title')).encode('ascii','ignore') - except AttributeError, err: - songTitle = str(None) - except IndexError, err: - songTitle = str(None) - #availableAs = song.getAttribute('durl')''' + # using ben's mpsSong class thisSong = mpsSong(self, song, 'downloadprefix') thisSong.getUri() @@ -185,7 +182,6 @@ avas = mopy.mo.MusicalItem(availableAs) track.available_as.set(avas) self.mi.add(avas) - #track.available_as.set(mopy.rdfs.Resource(availableAs)) self.subject.made.add(track) self.mi.add(track) @@ -201,7 +197,11 @@ def scrapeArtistID(self): '''attempt to find via scrape of page the internal artist number.''' try: - self.artistID = scrapePage(self.page, [artistIDtag[0]], artistIDtag[1]) + ids = scrapePageWhile(self.page, artistIDtag[0], artistIDtag[1]) + for i in ids: + if i.isdigit(): + self.artistID = i + # self.artistID = scrapePage(self.page, [artistIDtag[0]], artistIDtag[1]) return True except Exception, err: print "Ran into trouble trying to scrape the ArtistID for page from " + self.source + "\nError::" + str(err) @@ -210,7 +210,12 @@ def scrapePlaylistNumber(self): """attempts to find via scrape of the internal identifier of an artist's playlist of songs""" try: - self.playlistID = scrapePage(self.page, [playlistIDtag[0]], playlistIDtag[1]) + # make sure we get a digit and not some crap - maybe should to regex + ids = scrapePageWhile(self.page, playlistIDtag[0], playlistIDtag[1]) + for i in ids: + if i.isdigit(): + self.playlistID = i + #self.playlistID = scrapePage(self.page, [playlistIDtag[0]], playlistIDtag[1]) return True except Exception, err: print "Ran into trouble trying to scrape the playlistID for page from " + self.source + "\nError::" + str(err) @@ -293,150 +298,23 @@ genreNums = re.findall(''':"(.|..|...)"''', localGenres) # should return only 2 or 3 char string between genres = [] for gnum in genreNums: - genre = mopy.mo.Genre(myspaceOntology+urllib.quote(genreDict[int(gnum)])) - genre.name.set(genreDict[int(gnum)]) - self.mi.add(genre) - self.subject.genreTag.add(genre) - genres.append(genre) + try: + genre = mopy.mo.Genre(myspaceOntology+urllib.quote(genreDict[int(gnum)])) + except KeyError: + pass + else: + genre.name.set(genreDict[int(gnum)]) + self.mi.add(genre) + self.subject.genreTag.add(genre) + genres.append(genre) return genres -class mpsSong: - """a class that wraps around the downloading, feature extracting and modeling of a piece of media attached to a mpsUser - mpsSong object instances have the following public variables: - parent -- a weakref to the mpsUser that generated the mpsSong instance - uri -- lo res cached download link - betterUri -- hi res cached download link (not always available) - downloadprefix -- local prefix to stick the file when downloaded - extractionprefix -- local prefix to stick the feature files when extracted - title -- title of song - image -- url to get image associated with song - playcount -- number of times song has been played via myspace player - trackNum -- track number based on order presented on myspace - totalTracks -- number of songs available for parent - filename -- name used for local lofi file, when downloaded - HIFIfilename -- name used for local hifi file, when downloaded - beats -- local name of beat segmentaton file, used to do variable segment length feature extraction - """ - def __init__(self, parent, xmlNode, downloadprefix = '', extractionprefix = ''): - """initializes the mpsSong class. Parent is a pointer to the calling mpsUser, xmlNode should be a DOM object with the songs info. downloadprefix is the local directory prefix where the media will be put, default is an empty string. If no extractionprefix is given, extracted features will be places in the dir pointed to by downloadprefix""" - #self.parent = weakref.ref(parent) - self.xmlNode = xmlNode - self.getUri() - #the nicer file download is currently broken... - #self.betterURI = xmlNode.getAttribute('downloadable') - self.downloadprefix = downloadprefix - if extractionprefix == '': - self.extractionprefix = downloadprefix - else: - self.extractionprefix = extractionprefix - self.title = self.exhaustiveXML.getElementsByTagName('title')[0].firstChild.nodeValue - self.image = self.exhaustiveXML.getElementsByTagName('small')[0].firstChild.nodeValue - self.playcount = xmlNode.getElementsByTagName('stats')[0].getAttribute('plays') - self.comments = "" #this is a blank string hold for the comments fields. Might be used later. - self.trackNum, self.totalTracks = None, None - self.filename, self.HIFIfilename = None, None - self.beats = None - def getUri(self): - self.songID = self.xmlNode.getAttribute('songId') - xmlPage = try_open(songBase[0] + str(self.songID) + songBase[1]) - self.exhaustiveXML = dom.parseString(''.join(xmlPage.readlines())) - xmlPage.close() - try: - self.uri = self.exhaustiveXML.getElementsByTagName('link')[0].firstChild.nodeValue - except AttributeError, err: - #logging.info("mpsUser::getUri ran into a problem finding the download link for a song by artist with uid: " + - # str(self.parent().uid) + " link will be left blank.\n\tError msg: " + str(err)) - pass - self.uri = '' - def setTrackNum(self, trackNumber, totalTracks): - '''set the track number for this song and the number of tracks in the album it is in.''' - self.trackNum = trackNumber - self.totalTracks = totalTracks - def download(self): - '''download the track. - Upon success set self.filename to the local location of the downloaded song and return true. - On FAIL return false.''' - logging.debug("downloading " + self.title + " by " + self.parent().artist + " to " + self.downloadprefix) - if self.trackNum != None: - filename = unicode(str(self.trackNum), 'utf8') + u'_' + self.title + u'.mp3' - else: - filename = self.title + u'.mp3' - if try_get(self.uri, os.path.join(self.downloadprefix, filename)) != None: - logging.debug("success on " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) - self.filename = filename - return True - else: - logging.debug("FAIL on " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) - return False - - def downloadHIFI(self): - '''if it exists, download the hi fidelity version of the track. - Upon success set self.HIFIfilename to the local location of the downloaded song and return true. - On FAIL return false.''' - if not self.betterURI: - logging.info("NO hi-fi version of " + self.title + " by " + self.parent().artist + " but we did look for it.") - return False - logging.debug("downloading hifi copy of " + self.title + "by" + self.parent().artist + " to " + self.downloadprefix) - if self.trackNum != None: - filename = unicode(str(self.trackNum), 'utf8') + u'_' + self.title + u'_hifi.mp3' - else: - filename = self.title + u'_hifi.mp3' - if (try_get(self.betteruri, os.path.join(self.downloadprefix,filename)) != None): - logging.debug("success on hi-fi version of " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) - self.HIFIfilename = filename - return True - else: - logging.debug("FAIL on hi-fi version of " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) - return False - - - def tag(self, hifi = False): - '''create or modify the id3 tag for downloaded song associated with self. set optional hifi arg to tag the hifi download''' - if hifi: - fileToTag = os.path.join(self.downloadprefix,self.HIFIfilename) - else: - fileToTag = os.path.join(self.downloadprefix,self.filename) - if fileToTag == None: - logging.info("asked to tag a file associated with uid: " + str(self.parent().uid) + " but the song does not exist locally") - logging.debug("adding tags to " + fileToTag) - try: id3 = mutagen.id3.ID3(fileToTag) - except mutagen.id3.ID3NoHeaderError: - logging.info("No ID3 header found for " + fileToTag + "; creating tag from scratch") - id3 = mutagen.id3.ID3() - except Exception, err: - logging.error(str(err)) - return - id3.add(mutagen.id3.TIT2(encoding=3,text=self.title)) - id3.add(mutagen.id3.TPE1(encoding=3,text=self.parent().artist)) - id3.add(mutagen.id3.COMM(encoding=3,text=self.comments, lang="eng", desc="")) - #id3.add(mutagen.id3.COMM(encoding=3,text=relationshipLink, lang="eng", desc="MusicGrabberSig")) - id3.add(mutagen.id3.TALB(encoding=3,text=self.parent().album)) - if self.trackNum != None: - id3.add(mutagen.id3.TRCK(encoding=3,text=str(self.trackNum) + '/' + str(self.totalTracks))) - id3.add(mutagen.id3.POPM(encoding=3,email=str(self.parent().uid)+"@myspace", rating = 128, count=self.playcount)) - if self.image == None: - logging.error("No image present for " + self.title + ", " + self.parent().artist) - try: - logging.debug("trying to get image from " + self.image) - localImgPath, imgHeader = try_get(self.image, os.path.join("/tmp",os.path.basename(self.image))) - imgHandle = open(localImgPath) - id3.add(mutagen.id3.APIC(encoding=3, mime=imgHeader.type, data=imgHandle.read(), type=17, desc="Song pic from myspace.com")) - except: - logging.error("Unable to retieve image for " + self.title + ", " + self.parent().artist) - try: - id3.save(fileToTag) - except Exception, err: - logging.error(str(err) + ";couldn\'t save the tag for " + self.title + " by " + self.parent().artist) - - - - def main(argv=None): if argv is None: argv = sys.argv This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <ku...@us...> - 2009-03-08 12:00:26
|
Revision: 333 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=333&view=rev Author: kurtjx Date: 2009-03-08 12:00:15 +0000 (Sun, 08 Mar 2009) Log Message: ----------- put mpsSong in separate file Added Paths: ----------- musicGrabber/branches/webserv-branch/mpsSong.py Added: musicGrabber/branches/webserv-branch/mpsSong.py =================================================================== --- musicGrabber/branches/webserv-branch/mpsSong.py (rev 0) +++ musicGrabber/branches/webserv-branch/mpsSong.py 2009-03-08 12:00:15 UTC (rev 333) @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# encoding: utf-8 +''' +mpsSong.py - for handling myspace songs + +by Ben Fields +''' + +from xml.dom import minidom as dom +import unicodedata +from tryurl import * +from myspaceuris import * + +class mpsSong: + """a class that wraps around the downloading, feature extracting and modeling of a piece of media attached to a mpsUser + mpsSong object instances have the following public variables: + parent -- a weakref to the mpsUser that generated the mpsSong instance + uri -- lo res cached download link + betterUri -- hi res cached download link (not always available) + downloadprefix -- local prefix to stick the file when downloaded + extractionprefix -- local prefix to stick the feature files when extracted + title -- title of song + image -- url to get image associated with song + playcount -- number of times song has been played via myspace player + trackNum -- track number based on order presented on myspace + totalTracks -- number of songs available for parent + filename -- name used for local lofi file, when downloaded + HIFIfilename -- name used for local hifi file, when downloaded + beats -- local name of beat segmentaton file, used to do variable segment length feature extraction + + """ + def __init__(self, parent, xmlNode, downloadprefix = '', extractionprefix = ''): + """initializes the mpsSong class. Parent is a pointer to the calling mpsUser, xmlNode should be a DOM object with the songs info. downloadprefix is the local directory prefix where the media will be put, default is an empty string. If no extractionprefix is given, extracted features will be places in the dir pointed to by downloadprefix""" + #self.parent = weakref.ref(parent) + self.xmlNode = xmlNode + self.getUri() + #the nicer file download is currently broken... + #self.betterURI = xmlNode.getAttribute('downloadable') + self.downloadprefix = downloadprefix + if extractionprefix == '': + self.extractionprefix = downloadprefix + else: + self.extractionprefix = extractionprefix + self.title = self.exhaustiveXML.getElementsByTagName('title')[0].firstChild.nodeValue + self.image = self.exhaustiveXML.getElementsByTagName('small')[0].firstChild.nodeValue + self.playcount = xmlNode.getElementsByTagName('stats')[0].getAttribute('plays') + self.comments = "" #this is a blank string hold for the comments fields. Might be used later. + self.trackNum, self.totalTracks = None, None + self.filename, self.HIFIfilename = None, None + self.beats = None + + def getUri(self): + self.songID = self.xmlNode.getAttribute('songId') + xmlPage = try_open(songBase[0] + str(self.songID) + songBase[1]) + self.exhaustiveXML = dom.parseString(''.join(xmlPage.readlines())) + xmlPage.close() + try: + self.uri = self.exhaustiveXML.getElementsByTagName('link')[0].firstChild.nodeValue + except AttributeError, err: + #logging.info("mpsUser::getUri ran into a problem finding the download link for a song by artist with uid: " + + # str(self.parent().uid) + " link will be left blank.\n\tError msg: " + str(err)) + self.uri = '' + + def setTrackNum(self, trackNumber, totalTracks): + '''set the track number for this song and the number of tracks in the album it is in.''' + self.trackNum = trackNumber + self.totalTracks = totalTracks + + def download(self): + '''download the track. + Upon success set self.filename to the local location of the downloaded song and return true. + On FAIL return false.''' + logging.debug("downloading " + self.title + " by " + self.parent().artist + " to " + self.downloadprefix) + if self.trackNum != None: + filename = unicode(str(self.trackNum), 'utf8') + u'_' + self.title + u'.mp3' + else: + filename = self.title + u'.mp3' + if try_get(self.uri, os.path.join(self.downloadprefix, filename)) != None: + logging.debug("success on " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) + self.filename = filename + return True + else: + logging.debug("FAIL on " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) + return False + + def downloadHIFI(self): + '''if it exists, download the hi fidelity version of the track. + Upon success set self.HIFIfilename to the local location of the downloaded song and return true. + On FAIL return false.''' + if not self.betterURI: + logging.info("NO hi-fi version of " + self.title + " by " + self.parent().artist + " but we did look for it.") + return False + logging.debug("downloading hifi copy of " + self.title + "by" + self.parent().artist + " to " + self.downloadprefix) + if self.trackNum != None: + filename = unicode(str(self.trackNum), 'utf8') + u'_' + self.title + u'_hifi.mp3' + else: + filename = self.title + u'_hifi.mp3' + if (try_get(self.betteruri, os.path.join(self.downloadprefix,filename)) != None): + logging.debug("success on hi-fi version of " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) + self.HIFIfilename = filename + return True + else: + logging.debug("FAIL on hi-fi version of " + self.title + " by " + self.parent().artist + " to " + os.path.join(self.downloadprefix,filename)) + return False + + + def tag(self, hifi = False): + '''create or modify the id3 tag for downloaded song associated with self. set optional hifi arg to tag the hifi download''' + if hifi: + fileToTag = os.path.join(self.downloadprefix,self.HIFIfilename) + else: + fileToTag = os.path.join(self.downloadprefix,self.filename) + if fileToTag == None: + logging.info("asked to tag a file associated with uid: " + str(self.parent().uid) + " but the song does not exist locally") + logging.debug("adding tags to " + fileToTag) + try: id3 = mutagen.id3.ID3(fileToTag) + except mutagen.id3.ID3NoHeaderError: + logging.info("No ID3 header found for " + fileToTag + "; creating tag from scratch") + id3 = mutagen.id3.ID3() + except Exception, err: + logging.error(str(err)) + return + id3.add(mutagen.id3.TIT2(encoding=3,text=self.title)) + id3.add(mutagen.id3.TPE1(encoding=3,text=self.parent().artist)) + id3.add(mutagen.id3.COMM(encoding=3,text=self.comments, lang="eng", desc="")) + #id3.add(mutagen.id3.COMM(encoding=3,text=relationshipLink, lang="eng", desc="MusicGrabberSig")) + id3.add(mutagen.id3.TALB(encoding=3,text=self.parent().album)) + if self.trackNum != None: + id3.add(mutagen.id3.TRCK(encoding=3,text=str(self.trackNum) + '/' + str(self.totalTracks))) + id3.add(mutagen.id3.POPM(encoding=3,email=str(self.parent().uid)+"@myspace", rating = 128, count=self.playcount)) + if self.image == None: + logging.error("No image present for " + self.title + ", " + self.parent().artist) + try: + logging.debug("trying to get image from " + self.image) + localImgPath, imgHeader = try_get(self.image, os.path.join("/tmp",os.path.basename(self.image))) + imgHandle = open(localImgPath) + id3.add(mutagen.id3.APIC(encoding=3, mime=imgHeader.type, data=imgHandle.read(), type=17, desc="Song pic from myspace.com")) + except: + logging.error("Unable to retieve image for " + self.title + ", " + self.parent().artist) + try: + id3.save(fileToTag) + except Exception, err: + logging.error(str(err) + ";couldn\'t save the tag for " + self.title + " by " + self.parent().artist) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gea...@us...> - 2009-02-27 12:39:19
|
Revision: 332 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=332&view=rev Author: gearmonkey Date: 2009-02-27 12:39:08 +0000 (Fri, 27 Feb 2009) Log Message: ----------- starting a new branch that will generate the more complex graph with songs as nodes, each with connections of it's artist, with song to song weights. Added Paths: ----------- graphRDF/branches/songsAsNodes/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |