From: <gea...@us...> - 2009-05-09 07:10:43
|
Revision: 343 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=343&view=rev Author: gearmonkey Date: 2009-05-09 07:10:32 +0000 (Sat, 09 May 2009) Log Message: ----------- added a script to help repair any graphs made (principally mine) before I sorted out the bug in graphRDF that was fixed in the previous revision. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py graphRDF/branches/songsAsNodes/hplot.py Added Paths: ----------- graphRDF/branches/songsAsNodes/repairSongGraph.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-05 17:14:14 UTC (rev 342) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-05-09 07:10:32 UTC (rev 343) @@ -14,6 +14,7 @@ import getopt import ftplib import mopy +import re from logging import log, error, warning, info, debug import logging from numpy import * @@ -24,6 +25,7 @@ this package interfaces w/ myrdfspace.com to analyze and plot graphs of myspace artists by kurt Jacobson 29/10/2007 (c) +highly modified by Ben Fields on and around 15/04/2009 dependencies: - igraph (http://cneurocvs.rmki.kfki.hu/igraph/) @@ -69,10 +71,14 @@ def string2List(listAsString): """A little helper function that takes in a string that was made by printing a list and breaks it up into a list of it composite parts. returns the list. - could do with some error checking, but seems to work with the track attribute and ought to work with the genres as well.""" - items = listAsString.split("', '") - items[0] = items[0].lstrip("['") + could do with some error checking, but seems to work with the track attribute and ought to work with the genres as well. + Now handles both kinds of quote via the glory of regular expressions.""" + sep = re.compile('[\'\"], [\'\"]') + items = sep.split(listAsString) + items[0] = items[0].lstrip("['")#could do this with a regEx also, but, eh... + items[0] = items[0].lstrip("[\"") items[-1] = items[-1].rstrip("']") + items[-1] = items[-1].rstrip("\"]") return items Modified: graphRDF/branches/songsAsNodes/hplot.py =================================================================== --- graphRDF/branches/songsAsNodes/hplot.py 2009-05-05 17:14:14 UTC (rev 342) +++ graphRDF/branches/songsAsNodes/hplot.py 2009-05-09 07:10:32 UTC (rev 343) @@ -1,7 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 """ -untitled.py +hplot.py Created by Kurt Jacobson on 2008-11-11. Copyright (c) 2008 C4DM - Queen Mary U of London. All rights reserved. Added: graphRDF/branches/songsAsNodes/repairSongGraph.py =================================================================== --- graphRDF/branches/songsAsNodes/repairSongGraph.py (rev 0) +++ graphRDF/branches/songsAsNodes/repairSongGraph.py 2009-05-09 07:10:32 UTC (rev 343) @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +repairSongGraph.py + +A script to clean up the song graph without rebuilding the whole thing to deal with the poor string parsing in the original implementation + +""" + +import sys +import os +from graphRDF import * + +Usage = """usage: repairSongGraph [original Artist graphmlz] [orignal Song graphmlz] [destination Song graphmlz]\n""" + +def main(argv=None): + vertIdxsToDelete = [] + uidsAffected = [] + vertIdxsAdded = [] + idx = 0 + + if argv is None: + argv = sys.argv + if not len(argv) == 4: + print Usage + return 104 + + + fixMe = graph('person_seed_134901208') + print "Loading original Artist graph from " + argv[1] + try: + fixMe.G = igraph.Graph.Load(argv[1], format='graphmlz') + except Exception, err: + print Usage + "encountered problems loading the original Artist graphmlz file." + return 105 + fixMe.isPopulated = True + print "Loading orginal Song graph from " + argv[2] + try: + fixMe.S = igraph.Graph.Load(argv[2], format='graphmlz') + except Exception, err: + print Usage + "encountered problems loading the original Song graphmlz file." + return 106 + print "will write out correceted graph to " + argv[3] + print "scrubbing Song Graph..." + + + idx = len(fixMe.S.vs) - 1 + for vert in fixMe.S.vs: + tracks = string2List(vert['track']) + if len(tracks) > 1: + print "Removing corrupt vertex from artist with uid " + vert['uid'] + " and breaking it into " + str(len(tracks)) + " correct vertices." + vertIdxsToDelete.append(vert.index) + uidsAffected.append(vert['uid']) + fixMe.S.add_vertices(len(tracks)) + for track in tracks: + idx +=1 + fixMe.S.vs[idx]['track'] = track + fixMe.S.vs[idx]['uid'] = vert['uid'] + vertIdxsAdded.append(idx) + print "found " + str(len(vertIdxsToDelete)) + " vertices to delete.\nAdding " + str(len(vertIdxsAdded)) + " repaired vertices." + + addedVertSeq = igraph.VertexSeq(fixMe.S, vertIdxsAdded) + toDeleteVertSeq = igraph.VertexSeq(fixMe.S, vertIdxsToDelete) + artistVertsOfInterest = fixMe.G.vs.select(uid_in=uidsAffected) + sourcesDealtWith = [] + targetsDealtWith = [] + idx = len(fixMe.S.es) - 1 + print "dealing with " + str(len(artistVertsOfInterest)) + "nodes in total" + for i,vert in enumerate(artistVertsOfInterest): + for index, edgeIdx in enumerate(fixMe.G.adjacent(vert.index, 'all')): + edge = fixMe.G.es[edgeIdx] + sources = fixMe.S.vs.select(uid=fixMe.G.vs[edge.source]['uid']) + targets = fixMe.S.vs.select(uid=fixMe.G.vs[edge.target]['uid']) + #print "Expanding edge " + str(edgeIdx) + " to " + str(len(sources) * len(targets)) + " edges." + oldidx = idx + for source in sources: + if not source.index in sourcesDealtWith: + sourcesDealtWith.append(source.index) + for target in targets: + if not target.index in targetsDealtWith: + targetsDealtWith.append(target.index) + fixMe.S.add_edges((source.index,target.index)) + idx += 1 + fixMe.S.es[idx]['audioWeight'] = -1 + + #print "added " + str(idx-oldidx) + " edges\n--------" + print str(len(artistVertsOfInterest) - i - 1) + " nodes left to deal with" + print "should have added all the edges now. Have a look:\n" + str(fixMe.S) + "\nRemoving old corrupt nodes and saving. Saving expanding graph in outputfile dumpedgraph.mlz in case something goes wrong." + fixMe.S.write("dumpedgraph.mlz", format="graphmlz") + + cleanGraph = fixMe.S.delete_vertices(vertIdxsToDelete) + + print "overwriting " + argv[3] + " with corrected graph." + + cleanGraph.write(argv[3],format="graphmlz") + + return 0 + + + + +if __name__ == '__main__': + main() + Property changes on: graphRDF/branches/songsAsNodes/repairSongGraph.py ___________________________________________________________________ Added: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |