From: Duncan W. <du...@fr...> - 2010-01-31 09:28:18
|
Author: duncan Date: Sun Jan 31 04:28:16 2010 New Revision: 11651 Log: Clean up of unused code Modified: branches/rel-1/freevo/src/helpers/imdb.py branches/rel-1/freevo/src/util/fxdimdb.py tags/REL-1_9_1/freevo/src/helpers/imdb.py tags/REL-1_9_1/freevo/src/util/fxdimdb.py Modified: branches/rel-1/freevo/src/helpers/imdb.py ============================================================================== --- branches/rel-1/freevo/src/helpers/imdb.py (original) +++ branches/rel-1/freevo/src/helpers/imdb.py Sun Jan 31 04:28:16 2010 @@ -38,8 +38,8 @@ try: import config except ImportError: - print 'imdb.py can\'t be executed outside the Freevo environment.' - print 'Please use \'freevo imdb [args]\' instead' + print "imdb.py can't be executed outside the Freevo environment." + print "Please use 'freevo imdb [args]' instead" sys.exit(0) from util.fxdimdb import FxdImdb, makeVideo @@ -116,8 +116,6 @@ sys.exit(u'--search requires <search pattern>') elif opts.guess and len(args) < 1: sys.exit(u'--guess requires <guess pattern>') - #elif opts.tv and len(args) < 1: - # sys.exit(u'--tv requires <imdb id>') tv_marker = (opts.season or opts.episode) and '"' or '' if opts.rom_drive is not None: @@ -183,14 +181,6 @@ results = fxd.getIMDBid(opts.tv, opts.season, opts.episode) if len(results) == 0: print 'No results' - #for result in results: - # if result[3]: - # title = 'http://www.imdb.com/title/tt%s/ %s %s (%s) %s' % (result[:1] + result[:4]) - # elif result[2]: - # title = 'http://www.imdb.com/title/tt%s/ %s %s (%s)' % (result[:1] + result[:3]) - # else: - # title = 'http://www.imdb.com/title/tt%s/ %s %s' % (result[:1] + result[:2]) - # title = results title = 'http://www.imdb.com/title/tt%s/ %s' % (results, results) print '%s' % title.encode(opts.encoding) sys.exit(0) Modified: branches/rel-1/freevo/src/util/fxdimdb.py ============================================================================== --- branches/rel-1/freevo/src/util/fxdimdb.py (original) +++ branches/rel-1/freevo/src/util/fxdimdb.py Sun Jan 31 04:28:16 2010 @@ -49,8 +49,14 @@ import os import traceback from pprint import pprint, pformat -from BeautifulSoup import BeautifulSoup, NavigableString -from html5lib import HTMLParser, treebuilders +try: + from html5lib import HTMLParser, treebuilders + from html5lib.treebuilders.soup import NavigableString + using_html5lib = True +except ImportError: + import HTMLParser + from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString + using_html5lib = False import config import util @@ -313,10 +319,11 @@ dvd = 0 try: - #soup = BeautifulSoup(results.read(), convertEntities='xml') - parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) - soup = parser.parse(results.read()) #, encoding='latin-1') #, convertEntities='xml') - + if using_html5lib: + parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) + soup = parser.parse(results.read()) + else: + soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) except UnicodeDecodeError: print "Unicode error: check that /usr/lib/python2.x/site.py has the correct default encoding" traceback.print_exc() @@ -795,9 +802,11 @@ m = re.compile('/title/tt(\d+)/') y = re.compile('\((\d+)\) *(.*)') try: - #soup = BeautifulSoup(results.read(), convertEntities='xml') - parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) - soup = parser.parse(results.read()) #, encoding='latin-1') #, convertEntities='xml') + if using_html5lib: + parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) + soup = parser.parse(results.read()) + else: + soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) except HTMLParser.HTMLParseError, why: traceback.print_exc() _debug_('Cannot parse %r: %s' % (url, why), DWARNING) @@ -849,9 +858,11 @@ Returns a new id for getIMDBid with TV series episode data """ try: - #soup = BeautifulSoup(results.read(), convertEntities='xml') - parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) - soup = parser.parse(results.read()) #, encoding='latin-1') #, convertEntities='xml') + if using_html5lib: + parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) + soup = parser.parse(results.read()) + else: + soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) except UnicodeDecodeError: print "Unicode error; check that /usr/lib/python2.x/site.py has the correct default encoding" pass Modified: tags/REL-1_9_1/freevo/src/helpers/imdb.py ============================================================================== --- tags/REL-1_9_1/freevo/src/helpers/imdb.py (original) +++ tags/REL-1_9_1/freevo/src/helpers/imdb.py Sun Jan 31 04:28:16 2010 @@ -38,8 +38,8 @@ try: import config except ImportError: - print 'imdb.py can\'t be executed outside the Freevo environment.' - print 'Please use \'freevo imdb [args]\' instead' + print "imdb.py can't be executed outside the Freevo environment." + print "Please use 'freevo imdb [args]' instead" sys.exit(0) from util.fxdimdb import FxdImdb, makeVideo @@ -53,19 +53,19 @@ parser = OptionParser(version='%prog 1.0', conflict_handler='resolve', usage=""" Search IMDB for a movie or a TV show -freevo imdb [options] <search> [<output> <video file> [<video file>]] +freevo imdb [options] | [<result> <fxd file> <video file> [<video file>]] -Generate <output>.fxd for the movie. Files is a list of files that belongs to -this movie. Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add -a special DVD or VCD title to the list of files""") +Generate a fxd for the movie. Files is a list of files that belongs to this +movie. Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add a +special DVD or VCD title to the list of files""") parser.add_option('-v', '--verbose', action='count', default=0, help='set the level of verbosity [default:%default]') parser.add_option('-s', '--search', action='store_true', dest='search', default=False, help='search imdb for string [default:%default]') parser.add_option('-g', '--guess', action='store_true', dest='guess', default=False, help='search imdb for possible filename match [default:%default]') - parser.add_option('--tv', action='store_true', dest='tv', default=False, - help='specify the search is a tv programme [default:%default]') + parser.add_option('--tv', action='store', dest='tv', default=None, + help='specify the id of a tv programme for a eipsode search [default:%default]') parser.add_option('--season', dest='season', default=None, help='specify the season in the search [default:%default]') parser.add_option('--episode', dest='episode', default=None, @@ -116,7 +116,7 @@ sys.exit(u'--search requires <search pattern>') elif opts.guess and len(args) < 1: sys.exit(u'--guess requires <guess pattern>') - tv_marker = (opts.tv or opts.season or opts.episode) and '"' or '' + tv_marker = (opts.season or opts.episode) and '"' or '' if opts.rom_drive is not None: driveset = True @@ -176,6 +176,15 @@ print '%s' % title.encode(opts.encoding) sys.exit(0) + if opts.tv: + print "Searching IMDB for '%s' season:%s episode:%s..." % (opts.tv, opts.season, opts.episode) + results = fxd.getIMDBid(opts.tv, opts.season, opts.episode) + if len(results) == 0: + print 'No results' + title = 'http://www.imdb.com/title/tt%s/ %s' % (results, results) + print '%s' % title.encode(opts.encoding) + sys.exit(0) + # normal usage if len(args) < 3: sys.exit(u'requires <imdb id> <fxd filename> <video file>|<cd id>') Modified: tags/REL-1_9_1/freevo/src/util/fxdimdb.py ============================================================================== --- tags/REL-1_9_1/freevo/src/util/fxdimdb.py (original) +++ tags/REL-1_9_1/freevo/src/util/fxdimdb.py Sun Jan 31 04:28:16 2010 @@ -48,8 +48,15 @@ import codecs import os import traceback -from BeautifulSoup import BeautifulSoup, NavigableString -import HTMLParser +from pprint import pprint, pformat +try: + from html5lib import HTMLParser, treebuilders + from html5lib.treebuilders.soup import NavigableString + using_html5lib = True +except ImportError: + import HTMLParser + from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString + using_html5lib = False import config import util @@ -232,6 +239,7 @@ response.close() _debug_('id_list has %s items' % (len(self.id_list))) + #print 'id_list=%s' % (pformat(self.id_list)) if len(self.id_list) > 20: # too many results, check if there are stupid results in the list words = [] @@ -311,8 +319,11 @@ dvd = 0 try: - #soup = BeautifulSoup(results.read(), convertEntities='xml') - soup = BeautifulSoup(results.read()) + if using_html5lib: + parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) + soup = parser.parse(results.read()) + else: + soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) except UnicodeDecodeError: print "Unicode error: check that /usr/lib/python2.x/site.py has the correct default encoding" traceback.print_exc() @@ -337,68 +348,61 @@ self.info['year'] = y[1:-1] except (AttributeError, TypeError, ValueError): self.info['title'] = self.title - self.info['year'] = title.find('a').string.strip() + try: + self.info['year'] = title.find('a').contents[0].strip() + except AttributeError: + self.info['year'] = '' # Find the <div> with class info, each <h5> under this provides info + wanted_keys = ('release_date', 'genre', 'tagline', 'plot', 'plot_keywords', + 'also_known_as', 'mpaa', 'runtime', 'country', 'language', + 'color', 'aspect_ratio', 'sound_mix', 'certification', + ) + for info in main.findAll('div', {'class' : 'info'}): infoh5 = info.find('h5') if not infoh5: continue try: - infostr = infoh5.next - key = infostr.string.strip(':').lower().replace(' ', '_') - nextsibling = nextsibling = infoh5.nextSibling.strip() - sections = info.findAll('a', { 'href' : re.compile('/Sections') }) - lists = info.findAll('a', { 'href' : re.compile('/List') }) - if len(nextsibling) > 0: - self.info[key] = nextsibling + infostr = infoh5.find(text=True) + key = infostr.strip().strip(':').lower().replace(' ', '_') + if key not in wanted_keys: + continue + content = info.find('div', {'class' : 'info-content'}) + infocontent = content.find(text=True) + if infocontent: + infocontent = infocontent.strip() + sections = info.findAll('a', { 'href' : re.compile('^/Sections') }) + lists = info.findAll('a', { 'href' : re.compile('^/List') }) + keywords = info.findAll('a', { 'href' : re.compile('^/keyword') }) + #print 'key=%s content=%r keywords=%r sections=%r lists=%r' % (key, infocontent, keywords, sections, lists) + if len(infocontent) > 0: + self.info[key] = infocontent elif len(sections) > 0: items = [] for item in sections: - items.append(item.string) + items.append(item.contents[0].strip()) self.info[key] = ' / '.join(items) elif len(lists) > 0: items = [] for item in lists: - items.append(item.string) + items.append(item.contents[0].strip()) + self.info[key] = ' / '.join(items) + elif len(keywords) > 0: + items = [] + for item in keywords: + items.append(item.contents[0].strip()) self.info[key] = ' / '.join(items) except: pass - # Find Plot Outline/Summary: - # Normally the tag is named "Plot Outline:" - however sometimes - # the tag is "Plot Summary:" or just "Plot:". Search for all strings. - imdb_result = soup.find(text='Plot Outline:') - if not imdb_result: - imdb_result = soup.find(text='Plot Summary:') - if not imdb_result: - imdb_result = soup.find(text='Plot:') - if imdb_result: - self.info['plot'] = imdb_result.next.strip() - else: - self.info['plot'] = u'' - - # Find tagline - sometimes the tagline is missing. - # Use an empty string if no tagline could be found. - imdb_result = soup.find(text='Tagline:') - if imdb_result: - self.info['tagline'] = imdb_result.next.strip() - else: - self.info['tagline'] = u'' - rating = soup.find(text='User Rating:').findNext(text=re.compile('/10')) - if rating: + try: votes = rating.findNext('a') - self.info['rating'] = rating.strip() + ' (' + votes.string.strip() + ')' - else: + self.info['rating'] = rating.strip() + ' (' + votes.contents[0].strip() + ')' + except AttributeError: self.info['rating'] = '' - runtime = soup.find(text='Runtime:') - if runtime and runtime.next: - self.info['runtime'] = runtime.next.strip() - else: - self.info['runtime'] = '' - # Replace special characters in the items for (k,v) in self.info.items(): self.info[k] = self.convert_entities(v) @@ -795,11 +799,14 @@ _debug_('parsesearchdata(results=%r, url=%r, id=%r)' % (results, url, id)) self.id_list = [] - m = re.compile('/title/tt([0-9]*)/') - y = re.compile('\(([^)]+)\)') + m = re.compile('/title/tt(\d+)/') + y = re.compile('\((\d+)\) *(.*)') try: - #soup = BeautifulSoup(results.read(), convertEntities='xml') - soup = BeautifulSoup(results.read()) + if using_html5lib: + parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) + soup = parser.parse(results.read()) + else: + soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) except HTMLParser.HTMLParseError, why: traceback.print_exc() _debug_('Cannot parse %r: %s' % (url, why), DWARNING) @@ -808,28 +815,37 @@ traceback.print_exc() _debug_('Cannot parse %r: %s' % (url, why), DWARNING) return self.id_list - items = soup.findAll('a', href=re.compile('/title/tt')) + items = soup.findAll('a', href=re.compile('^/title/tt')) ids = set([]) for item in items: - idm = m.search(item['href']) + idm = item.attrMap['href'] if not idm: continue - if isinstance(item.next.next, NavigableString): - yrm = y.findall(item.next.next) - - id = idm.group(1) - name = item.string - # skip empty names - if not name: + m_match = m.match(idm) + if not m_match: + # skip invalid titles continue - # skip duplicate ids + id = m_match.group(1) if id in ids: + # skip duplicate ids + continue + name = item.contents[0] + if not isinstance(name, NavigableString): + # skip empty names continue + if isinstance(item.next.next, NavigableString): + yrm = item.next.next.strip() ids.add(id) - year = len(yrm) > 0 and yrm[0] or '0000' - type = len(yrm) > 1 and yrm[1] or '' + y_match = y.match(yrm) + if y_match: + year = y_match.group(1) + type = y_match.group(2) + else: + year = '0000' + type = '' #print 'url', item['href'] #print item.parent.findChildren(text=re.compile('[^ ]')) + #print 'id=%s name=%s year=%s type=%s' % (id, name, year, type) self.id_list += [ ( id, name, year, type ) ] for item in self.id_list: @@ -842,8 +858,11 @@ Returns a new id for getIMDBid with TV series episode data """ try: - #soup = BeautifulSoup(results.read(), convertEntities='xml') - soup = BeautifulSoup(results.read()) + if using_html5lib: + parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup')) + soup = parser.parse(results.read()) + else: + soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES) except UnicodeDecodeError: print "Unicode error; check that /usr/lib/python2.x/site.py has the correct default encoding" pass @@ -971,9 +990,6 @@ self.image = vfs.basename(self.image) _debug_('Downloaded cover image from %s' % (self.image_url)) - print "Freevo knows nothing about the copyright of this image, please" - print "go to %s to check for more information about private." % self.image_url - print "use of this image" def str2XML(self, line): |