[Freevo-cvslog] r11651 - branches/rel-1/freevo/src/helpers branches/rel-1/freevo/src/util tags/REL-

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Author: duncan
Date: Sun Jan 31 04:28:16 2010
New Revision: 11651

Log:
Clean up of unused code


Modified:
   branches/rel-1/freevo/src/helpers/imdb.py
   branches/rel-1/freevo/src/util/fxdimdb.py
   tags/REL-1_9_1/freevo/src/helpers/imdb.py
   tags/REL-1_9_1/freevo/src/util/fxdimdb.py

Modified: branches/rel-1/freevo/src/helpers/imdb.py
==============================================================================

--- branches/rel-1/freevo/src/helpers/imdb.py	(original)
+++ branches/rel-1/freevo/src/helpers/imdb.py	Sun Jan 31 04:28:16 2010
@@ -38,8 +38,8 @@
 try:
     import config
 except ImportError:
-    print 'imdb.py can\'t be executed outside the Freevo environment.'
-    print 'Please use \'freevo imdb [args]\' instead'
+    print "imdb.py can't be executed outside the Freevo environment."
+    print "Please use 'freevo imdb [args]' instead"
     sys.exit(0)
 
 from util.fxdimdb import FxdImdb, makeVideo
@@ -116,8 +116,6 @@
         sys.exit(u'--search requires <search pattern>')
     elif opts.guess and len(args) < 1:
         sys.exit(u'--guess requires <guess pattern>')
-    #elif opts.tv and len(args) < 1:
-    #    sys.exit(u'--tv requires <imdb id>')
     tv_marker = (opts.season or opts.episode) and '"' or ''
 
     if opts.rom_drive is not None:
@@ -183,14 +181,6 @@
         results = fxd.getIMDBid(opts.tv, opts.season, opts.episode)
         if len(results) == 0:
             print 'No results'
-        #for result in results:
-        #    if result[3]:
-        #        title = 'http://www.imdb.com/title/tt%s/  %s  %s (%s) %s' % (result[:1] + result[:4])
-        #    elif result[2]:
-        #        title = 'http://www.imdb.com/title/tt%s/  %s  %s (%s)' % (result[:1] + result[:3])
-        #    else:
-        #        title = 'http://www.imdb.com/title/tt%s/  %s  %s' % (result[:1] + result[:2])
-        #    title = results
         title = 'http://www.imdb.com/title/tt%s/  %s' % (results, results)
         print '%s' % title.encode(opts.encoding)
         sys.exit(0)

Modified: branches/rel-1/freevo/src/util/fxdimdb.py
==============================================================================
--- branches/rel-1/freevo/src/util/fxdimdb.py	(original)
+++ branches/rel-1/freevo/src/util/fxdimdb.py	Sun Jan 31 04:28:16 2010
@@ -49,8 +49,14 @@
 import os
 import traceback
 from pprint import pprint, pformat
-from BeautifulSoup import BeautifulSoup, NavigableString
-from html5lib import HTMLParser, treebuilders
+try:
+    from html5lib import HTMLParser, treebuilders
+    from html5lib.treebuilders.soup import NavigableString
+    using_html5lib = True
+except ImportError:
+    import HTMLParser
+    from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString
+    using_html5lib = False
 
 import config
 import util
@@ -313,10 +319,11 @@
         dvd = 0
 
         try:
-            #soup = BeautifulSoup(results.read(), convertEntities='xml')
-            parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
-            soup = parser.parse(results.read()) #, encoding='latin-1') #, convertEntities='xml')
-
+            if using_html5lib:
+                parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
+                soup = parser.parse(results.read())
+            else:
+                soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
         except UnicodeDecodeError:
             print "Unicode error: check that /usr/lib/python2.x/site.py has the correct default encoding"
             traceback.print_exc()
@@ -795,9 +802,11 @@
         m = re.compile('/title/tt(\d+)/')
         y = re.compile('\((\d+)\) *(.*)')
         try:
-            #soup = BeautifulSoup(results.read(), convertEntities='xml')
-            parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
-            soup = parser.parse(results.read()) #, encoding='latin-1') #, convertEntities='xml')
+            if using_html5lib:
+                parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
+                soup = parser.parse(results.read())
+            else:
+                soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
         except HTMLParser.HTMLParseError, why:
             traceback.print_exc()
             _debug_('Cannot parse %r: %s' % (url, why), DWARNING)
@@ -849,9 +858,11 @@
         Returns a new id for getIMDBid with TV series episode data
         """
         try:
-            #soup = BeautifulSoup(results.read(), convertEntities='xml')
-            parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
-            soup = parser.parse(results.read()) #, encoding='latin-1') #, convertEntities='xml')
+            if using_html5lib:
+                parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
+                soup = parser.parse(results.read())
+            else:
+                soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
         except UnicodeDecodeError:
             print "Unicode error; check that /usr/lib/python2.x/site.py has the correct default encoding"
             pass

Modified: tags/REL-1_9_1/freevo/src/helpers/imdb.py
==============================================================================
--- tags/REL-1_9_1/freevo/src/helpers/imdb.py	(original)
+++ tags/REL-1_9_1/freevo/src/helpers/imdb.py	Sun Jan 31 04:28:16 2010
@@ -38,8 +38,8 @@
 try:
     import config
 except ImportError:
-    print 'imdb.py can\'t be executed outside the Freevo environment.'
-    print 'Please use \'freevo imdb [args]\' instead'
+    print "imdb.py can't be executed outside the Freevo environment."
+    print "Please use 'freevo imdb [args]' instead"
     sys.exit(0)
 
 from util.fxdimdb import FxdImdb, makeVideo
@@ -53,19 +53,19 @@
     parser = OptionParser(version='%prog 1.0', conflict_handler='resolve', usage="""
 Search IMDB for a movie or a TV show
 
-freevo imdb [options] <search> [<output> <video file> [<video file>]]
+freevo imdb [options] | [<result> <fxd file> <video file> [<video file>]]
 
-Generate <output>.fxd for the movie.  Files is a list of files that belongs to
-this movie.  Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add
-a special DVD or VCD title to the list of files""")
+Generate a fxd for the movie.  Files is a list of files that belongs to this
+movie.  Use [dvd|vcd] to add the whole disc or use [dvd|vcd][title] to add a
+special DVD or VCD title to the list of files""")
     parser.add_option('-v', '--verbose', action='count', default=0,
         help='set the level of verbosity [default:%default]')
     parser.add_option('-s', '--search', action='store_true', dest='search', default=False,
         help='search imdb for string [default:%default]')
     parser.add_option('-g', '--guess', action='store_true', dest='guess', default=False,
         help='search imdb for possible filename match [default:%default]')
-    parser.add_option('--tv', action='store_true', dest='tv', default=False,
-        help='specify the search is a tv programme [default:%default]')
+    parser.add_option('--tv', action='store', dest='tv', default=None,
+        help='specify the id of a tv programme for a eipsode search [default:%default]')
     parser.add_option('--season', dest='season', default=None,
         help='specify the season in the search [default:%default]')
     parser.add_option('--episode', dest='episode', default=None,
@@ -116,7 +116,7 @@
         sys.exit(u'--search requires <search pattern>')
     elif opts.guess and len(args) < 1:
         sys.exit(u'--guess requires <guess pattern>')
-    tv_marker = (opts.tv or opts.season or opts.episode) and '"' or ''
+    tv_marker = (opts.season or opts.episode) and '"' or ''
 
     if opts.rom_drive is not None:
         driveset = True
@@ -176,6 +176,15 @@
             print '%s' % title.encode(opts.encoding)
         sys.exit(0)
 
+    if opts.tv:
+        print "Searching IMDB for '%s' season:%s episode:%s..." % (opts.tv, opts.season, opts.episode)
+        results = fxd.getIMDBid(opts.tv, opts.season, opts.episode)
+        if len(results) == 0:
+            print 'No results'
+        title = 'http://www.imdb.com/title/tt%s/  %s' % (results, results)
+        print '%s' % title.encode(opts.encoding)
+        sys.exit(0)
+
     # normal usage
     if len(args) < 3:
         sys.exit(u'requires <imdb id> <fxd filename> <video file>|<cd id>')

Modified: tags/REL-1_9_1/freevo/src/util/fxdimdb.py
==============================================================================
--- tags/REL-1_9_1/freevo/src/util/fxdimdb.py	(original)
+++ tags/REL-1_9_1/freevo/src/util/fxdimdb.py	Sun Jan 31 04:28:16 2010
@@ -48,8 +48,15 @@
 import codecs
 import os
 import traceback
-from BeautifulSoup import BeautifulSoup, NavigableString
-import HTMLParser
+from pprint import pprint, pformat
+try:
+    from html5lib import HTMLParser, treebuilders
+    from html5lib.treebuilders.soup import NavigableString
+    using_html5lib = True
+except ImportError:
+    import HTMLParser
+    from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, NavigableString
+    using_html5lib = False
 
 import config
 import util
@@ -232,6 +239,7 @@
         response.close()
 
         _debug_('id_list has %s items' % (len(self.id_list)))
+        #print 'id_list=%s' % (pformat(self.id_list))
         if len(self.id_list) > 20:
             # too many results, check if there are stupid results in the list
             words = []
@@ -311,8 +319,11 @@
         dvd = 0
 
         try:
-            #soup = BeautifulSoup(results.read(), convertEntities='xml')
-            soup = BeautifulSoup(results.read())
+            if using_html5lib:
+                parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
+                soup = parser.parse(results.read())
+            else:
+                soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
         except UnicodeDecodeError:
             print "Unicode error: check that /usr/lib/python2.x/site.py has the correct default encoding"
             traceback.print_exc()
@@ -337,68 +348,61 @@
             self.info['year'] = y[1:-1]
         except (AttributeError, TypeError, ValueError):
             self.info['title'] = self.title
-            self.info['year'] = title.find('a').string.strip()
+            try:
+                self.info['year'] = title.find('a').contents[0].strip()
+            except AttributeError:
+                self.info['year'] = ''
 
         # Find the <div> with class info, each <h5> under this provides info
+        wanted_keys = ('release_date', 'genre', 'tagline', 'plot', 'plot_keywords',
+                       'also_known_as', 'mpaa', 'runtime', 'country', 'language', 
+                       'color', 'aspect_ratio', 'sound_mix', 'certification',
+                       )
+        
         for info in main.findAll('div', {'class' : 'info'}):
             infoh5 = info.find('h5')
             if not infoh5:
                 continue
             try:
-                infostr = infoh5.next
-                key = infostr.string.strip(':').lower().replace(' ', '_')
-                nextsibling = nextsibling = infoh5.nextSibling.strip()
-                sections = info.findAll('a', { 'href' : re.compile('/Sections') })
-                lists = info.findAll('a', { 'href' : re.compile('/List') })
-                if len(nextsibling) > 0:
-                    self.info[key] = nextsibling
+                infostr = infoh5.find(text=True)
+                key = infostr.strip().strip(':').lower().replace(' ', '_')
+                if key not in wanted_keys:
+                    continue
+                content = info.find('div', {'class' : 'info-content'})
+                infocontent = content.find(text=True)
+                if infocontent:
+                    infocontent = infocontent.strip()
+                sections = info.findAll('a', { 'href' : re.compile('^/Sections') })
+                lists = info.findAll('a', { 'href' : re.compile('^/List') })
+                keywords = info.findAll('a', { 'href' : re.compile('^/keyword') })
+                #print 'key=%s content=%r keywords=%r sections=%r lists=%r' % (key, infocontent, keywords, sections, lists)
+                if len(infocontent) > 0:
+                    self.info[key] = infocontent
                 elif len(sections) > 0:
                     items = []
                     for item in sections:
-                        items.append(item.string)
+                        items.append(item.contents[0].strip())
                     self.info[key] = ' / '.join(items)
                 elif len(lists) > 0:
                     items = []
                     for item in lists:
-                        items.append(item.string)
+                        items.append(item.contents[0].strip())
+                    self.info[key] = ' / '.join(items)
+                elif len(keywords) > 0:
+                    items = []
+                    for item in keywords:
+                        items.append(item.contents[0].strip())
                     self.info[key] = ' / '.join(items)
             except:
                 pass
 
-        # Find Plot Outline/Summary:
-        # Normally the tag is named "Plot Outline:" - however sometimes
-        # the tag is "Plot Summary:" or just "Plot:". Search for all strings.
-        imdb_result = soup.find(text='Plot Outline:')
-        if not imdb_result:
-            imdb_result = soup.find(text='Plot Summary:')
-        if not imdb_result:
-            imdb_result = soup.find(text='Plot:')
-        if imdb_result:
-            self.info['plot'] = imdb_result.next.strip()
-        else:
-            self.info['plot'] = u''
-
-        # Find tagline - sometimes the tagline is missing.
-        # Use an empty string if no tagline could be found.
-        imdb_result = soup.find(text='Tagline:')
-        if imdb_result:
-            self.info['tagline'] = imdb_result.next.strip()
-        else:
-            self.info['tagline'] = u''
-
         rating = soup.find(text='User Rating:').findNext(text=re.compile('/10'))
-        if rating:
+        try:
             votes = rating.findNext('a')
-            self.info['rating'] = rating.strip() + ' (' + votes.string.strip() + ')'
-        else:
+            self.info['rating'] = rating.strip() + ' (' + votes.contents[0].strip() + ')'
+        except AttributeError:
             self.info['rating'] = ''
 
-        runtime = soup.find(text='Runtime:')
-        if runtime and runtime.next:
-            self.info['runtime'] = runtime.next.strip()
-        else:
-            self.info['runtime'] = ''
-
         # Replace special characters in the items
         for (k,v) in self.info.items():
             self.info[k] = self.convert_entities(v)
@@ -795,11 +799,14 @@
         _debug_('parsesearchdata(results=%r, url=%r, id=%r)' % (results, url, id))
 
         self.id_list = []
-        m = re.compile('/title/tt([0-9]*)/')
-        y = re.compile('\(([^)]+)\)')
+        m = re.compile('/title/tt(\d+)/')
+        y = re.compile('\((\d+)\) *(.*)')
         try:
-            #soup = BeautifulSoup(results.read(), convertEntities='xml')
-            soup = BeautifulSoup(results.read())
+            if using_html5lib:
+                parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
+                soup = parser.parse(results.read())
+            else:
+                soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
         except HTMLParser.HTMLParseError, why:
             traceback.print_exc()
             _debug_('Cannot parse %r: %s' % (url, why), DWARNING)
@@ -808,28 +815,37 @@
             traceback.print_exc()
             _debug_('Cannot parse %r: %s' % (url, why), DWARNING)
             return self.id_list
-        items = soup.findAll('a', href=re.compile('/title/tt'))
+        items = soup.findAll('a', href=re.compile('^/title/tt'))
         ids = set([])
         for item in items:
-            idm = m.search(item['href'])
+            idm = item.attrMap['href']
             if not idm:
                 continue
-            if isinstance(item.next.next, NavigableString):
-                yrm = y.findall(item.next.next)
-
-            id = idm.group(1)
-            name = item.string
-            # skip empty names
-            if not name:
+            m_match = m.match(idm)
+            if not m_match:
+                # skip invalid titles
                 continue
-            # skip duplicate ids
+            id = m_match.group(1)
             if id in ids:
+                # skip duplicate ids
+                continue
+            name = item.contents[0]
+            if not isinstance(name, NavigableString):
+                # skip empty names
                 continue
+            if isinstance(item.next.next, NavigableString):
+                yrm = item.next.next.strip()
             ids.add(id)
-            year = len(yrm) > 0 and yrm[0] or '0000'
-            type = len(yrm) > 1 and yrm[1] or ''
+            y_match = y.match(yrm)
+            if y_match:
+                year = y_match.group(1)
+                type = y_match.group(2)
+            else:
+                year = '0000'
+                type = ''
             #print 'url', item['href']
             #print item.parent.findChildren(text=re.compile('[^ ]'))
+            #print 'id=%s name=%s year=%s type=%s' % (id, name, year, type)
             self.id_list += [ ( id, name, year, type ) ]
 
         for item in self.id_list:
@@ -842,8 +858,11 @@
         Returns a new id for getIMDBid with TV series episode data
         """
         try:
-            #soup = BeautifulSoup(results.read(), convertEntities='xml')
-            soup = BeautifulSoup(results.read())
+            if using_html5lib:
+                parser = HTMLParser(tree=treebuilders.getTreeBuilder('beautifulsoup'))
+                soup = parser.parse(results.read())
+            else:
+                soup = BeautifulSoup(results.read(), convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
         except UnicodeDecodeError:
             print "Unicode error; check that /usr/lib/python2.x/site.py has the correct default encoding"
             pass
@@ -971,9 +990,6 @@
         self.image = vfs.basename(self.image)
 
         _debug_('Downloaded cover image from %s' % (self.image_url))
-        print "Freevo knows nothing about the copyright of this image, please"
-        print "go to %s to check for more information about private." % self.image_url
-        print "use of this image"
 
 
     def str2XML(self, line):




[Freevo-cvslog] r11651 - branches/rel-1/freevo/src/helpers branches/rel-1/freevo/src/util tags/REL-

[Freevo-cvslog] r11651 - branches/rel-1/freevo/src/helpers branches/rel-1/freevo/src/util tags/REL-1_9_1/freevo/src/helpers tags/REL-1_9_1/freevo/src/util