From: <gea...@us...> - 2009-04-30 15:01:39
|
Revision: 339 http://mypyspace.svn.sourceforge.net/mypyspace/?rev=339&view=rev Author: gearmonkey Date: 2009-04-30 15:01:37 +0000 (Thu, 30 Apr 2009) Log Message: ----------- added a nice helper function to break up igraph attributes that are formatted strings of Lists back into lists. Modified Paths: -------------- graphRDF/branches/songsAsNodes/graphRDF.py myspaceCrawler/trunk/myspaceCrawler.py Modified: graphRDF/branches/songsAsNodes/graphRDF.py =================================================================== --- graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-17 12:53:17 UTC (rev 338) +++ graphRDF/branches/songsAsNodes/graphRDF.py 2009-04-30 15:01:37 UTC (rev 339) @@ -65,6 +65,17 @@ #"filename":logPath + "musicGrabber.log", "filemode":"w"} logging.basicConfig(**loggingConfig) + +def string2List(listAsString): + """A little helper function that takes in a string that was made by printing a list and breaks it up into a list of it composite parts. + returns the list. + could do with some error checking, but seems to work with the track attribute and ought to work with the genres as well.""" + items = listAsString.split("', '") + items[0] = items[0].lstrip("['") + items[-1] = items[-1].rstrip("']") + return items + + class graph(object): '''G = graph(rdfFolder) -> returns a graph object encapsulating an igraph G Modified: myspaceCrawler/trunk/myspaceCrawler.py =================================================================== --- myspaceCrawler/trunk/myspaceCrawler.py 2009-04-17 12:53:17 UTC (rev 338) +++ myspaceCrawler/trunk/myspaceCrawler.py 2009-04-30 15:01:37 UTC (rev 339) @@ -31,8 +31,9 @@ from myspaceuris import * - -THREAD_CAP = 10000 #maximum number of threads allowed to be firing at once +#maximum number of threads allowed to be firing at once, if you're doing feature extraction, +THREAD_CAP = 16 #shouldn't be more than about 5 x numProcessors to prevent overburdening the system +######## THREAD_STALL_TIME = 30 #length of time in seconds to wait until the thread count is checked again LOG_FILENAME = "musicCrawler.log" #name of logger file (path set at commandline) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |