From: <ar...@us...> - 2010-10-07 17:07:03
|
Revision: 30683 http://xbmc4xbox.svn.sourceforge.net/xbmc4xbox/?rev=30683&view=rev Author: arnova Date: 2010-10-07 17:06:56 +0000 (Thu, 07 Oct 2010) Log Message: ----------- fixed: imdb scraper (ticket #127). Thanks to Blued! Modified Paths: -------------- trunk/system/scrapers/video/common/imdb.xml trunk/system/scrapers/video/imdb.xml Modified: trunk/system/scrapers/video/common/imdb.xml =================================================================== --- trunk/system/scrapers/video/common/imdb.xml 2010-10-06 18:00:57 UTC (rev 30682) +++ trunk/system/scrapers/video/common/imdb.xml 2010-10-07 17:06:56 UTC (rev 30683) @@ -24,41 +24,46 @@ <GetIMDBWriters dest="5"> <RegExp input="$$2" output="<details>\1</details>" dest="5"> <RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6"> - <expression noclean="1">>Writer.*?:</h5>(.*?)</div></expression> + <expression noclean="1">Writers?\:(.*?)</div></expression> </RegExp> <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6"> <expression noclean="1">Writing credits(.*?)name="</expression> </RegExp> <RegExp input="$$6" output="<credits>\1</credits>" dest="2+"> - <expression repeat="yes"><a href="/name/[^>]*>([^<]*)<</expression> + <expression fixchars="1" repeat="yes">/name/[^>"]*">([^<]*)<</expression> </RegExp> <expression noclean="1"/> </RegExp> </GetIMDBWriters> <GetIMDBCast dest="5"> <RegExp input="$$2" output="<details>\1</details>" dest="5"> - <RegExp input="$$1" output="\1" dest="6"> - <expression noclean="1"><table class="cast">(.*?)</table></expression> + <RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6"> + <expression noclean="1"><table class="cast_list">(.*?)</table></expression> </RegExp> - <RegExp input="$$6" output="<actor><thumb>\1_SX$INFO[imdbscale]_SY$INFO[imdbscale]_\2</thumb><name>\3</name><role>\4</role></actor>" dest="7"> - <expression clear="yes" repeat="yes" noclean="1,2" trim="3,4"><img src="(?:([^"]*\.)[^"]*(\.jpg))?[^>]*[^"]*"nm"><a href="[^"]*[^>]*>([^<]*)<[^"]*"ddd"> ... [^"]*"char">(.*?)</td></expression> + <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6"> + <expression noclean="1"><table class="cast">(.*?)</table></expression> </RegExp> + <RegExp input="$$6" output="<actor><thumb>\1</thumb><name>\2</name><role>\3 \4</role></actor>" dest="2+"> + <expression fixchars="2,3,4" clear="yes" repeat="yes" noclean="1" trim="2,3,4">src="([^"]*\.(?:jpg|png))".*?<td class="name">(.*?)</td>.*?<td class="character">.*?<div>\n([^\n]*)\n(.*?)</div>.*?</td></expression> + </RegExp> +<!-- <RegExp input="$$7" output="<actor><thumb>\1</thumb>\2</actor>" dest="2+"> <expression clear="yes" repeat="yes" noclean="1,2,3"><actor><thumb>(?:(http.*?)|_SX[0-9]+_SY[0-9]+_)</thumb>(.*?)</actor></expression> </RegExp> +--> <expression noclean="1"/> </RegExp> </GetIMDBCast> <GetIMDBDirectors dest="5"> <RegExp input="$$2" output="<details>\1</details>" dest="5"> <RegExp conditional="!fullcredits" input="$$1" output="\1" dest="6"> - <expression clear="yes" noclean="1">>Director.*?</h5>(.*?)</div></expression> + <expression clear="yes" noclean="1">Directors?\:(.*?)</div></expression> </RegExp> <RegExp conditional="fullcredits" input="$$1" output="\1" dest="6"> - <expression clear="yes" noclean="1">Directed by(.*?)name="</expression> + <expression clear="yes" noclean="1">Directed by(.*?)</table></expression> </RegExp> <RegExp input="$$6" output="<director>\1</director>" dest="2+"> - <expression clear="yes" repeat="yes"><a href="/name/[^>]*>([^<]*)<</expression> + <expression clear="yes" repeat="yes" fixchars="1">/name/[^>"]*">([^<]*)<</expression> </RegExp> <expression noclean="1"/> </RegExp> Modified: trunk/system/scrapers/video/imdb.xml =================================================================== --- trunk/system/scrapers/video/imdb.xml 2010-10-06 18:00:57 UTC (rev 30682) +++ trunk/system/scrapers/video/imdb.xml 2010-10-07 17:06:56 UTC (rev 30683) @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<scraper framework="1.1" date="2010-02-24" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> +<scraper framework="1.12x" date="2010-10-05" name="IMDb.com" content="movies" thumb="imdb.png" language="en"> <include>common/imdb.xml</include> <include>common/tmdb.xml</include> <include>common/movieposterdb.xml</include> @@ -53,18 +53,18 @@ <GetSearchResults dest="8"> <RegExp input="$$5" output="<?xml version="1.0" encoding="iso-8859-1" standalone="yes"?><results>\1</results>" dest="8"> <RegExp input="$$1" output="\1" dest="7"> - <expression fixchars="1" clear="yes">/title/([t0-9]*)/faq</expression> + <expression clear="yes" noclean="1">/title/([t0-9]*)/faq</expression> </RegExp> <RegExp input="$$1" output="<entity><title>\1</title><year>\2</year><url>http://akas.imdb.com/title/$$7/</url><id>$$7</id></entity>" dest="5"> - <expression fixchars="1" clear="yes" noclean="1"><meta name="title" content="([^"]*) \(([0-9]*)\)</expression> + <expression clear="yes" noclean="1"><meta name="title" content="([^"]*) \([^\)0-9]*([0-9]+)\)</expression> </RegExp> <RegExp input="$$1" output="\1" dest="4"> - <expression fixchars="1" noclean="1">(><a href="/title.*)</expression> + <expression noclean="1">(><a href="/title.*)</expression> </RegExp> <RegExp input="$$4" output="<entity><title>\2</title><year>\3</year><url>http://akas.imdb.com/title/\1/</url><id>\1</id></entity>" dest="5+"> - <expression fixchars="1" repeat="yes" noclean="1,2">><a href="/title/([t0-9]*)/[^>]*>([^<]*)</a> *\(([0-9]*)</expression> + <expression repeat="yes" noclean="1,2">><a href="/title/([t0-9]*)/[^>]*>([^<]*)</a> *\(([0-9]*)</expression> </RegExp> - <expression fixchars="1" clear="yes" noclean="1"/> + <expression clear="yes" noclean="1"/> </RegExp> </GetSearchResults> <GetDetails dest="3"> @@ -73,38 +73,47 @@ <expression fixchars="1"/> </RegExp> <RegExp input="$$1" output="<title>\1</title>" dest="5+"> - <expression fixchars="1" trim="1" noclean="1"><h1>([^<]*)</expression> + <expression fixchars="1" trim="1" noclean="1"><h1 class="header">([^<]*)</expression> </RegExp> <RegExp input="$$1" output="<year>\1</year>" dest="5+"> <expression fixchars="1">a href="/year/([0-9]*)</expression> </RegExp> <RegExp input="$$1" output="<top250>\1</top250>" dest="5+"> - <expression fixchars="1">Top 250: #([0-9]*)</a></expression> + <expression fixchars="1">Top 250 #([0-9]*)</a></expression> </RegExp> <RegExp input="$$1" output="<mpaa>\1</mpaa>" dest="5+"> - <expression fixchars="1">MPAA</a>:</h5>\n<div class="info-content">\n(.[^<]*)</expression> + <expression fixchars="1">MPAA</a>\)</h4>\n([^<]*)</expression> </RegExp> <RegExp input="$$1" output="<certification>\1 \3</certification>" dest="5+"> <expression fixchars="1" repeat="yes"><a href="/search/title\?certificates=[^"]*">([^<]*)</a>[^<]*(<i>([^<]*)</i>)?</expression> </RegExp> <RegExp input="$$1" output="<tagline>\1</tagline>" dest="5+"> - <expression fixchars="1"><h5>Tagline:</h5>\n<div class="info-content">([^<]*)</expression> + <expression fixchars="1">>Taglines:</h4>([^<]*)</expression> </RegExp> <RegExp input="$$1" output="<runtime>\1</runtime>" dest="5+"> - <expression fixchars="1" trim="1"><h5>Runtime:</h5>[^0-9]*([^<]*)</expression> + <expression noclean="1">>Runtime:</h4>[^0-9]*([,0-9]+) min</expression> </RegExp> - <RegExp input="$$1" output="<rating>\1</rating><votes>\2</votes>" dest="5+"> - <expression fixchars="1"><b>([0-9.]+)/10</b>[^<]*<a href="ratings" class="tn15more">([0-9,]+) votes</a></expression> + <RegExp input="$$1" output="<rating>\1</rating>" dest="5+"> + <expression fixchars="1">id="star-bar-user-rate"><b>([0-9.]+)</b></expression> </RegExp> + <RegExp input="$$1" output="<votes>\1</votes>" dest="5+"> + <expression fixchars="1">>([0-9,]+) votes</a></expression> + </RegExp> <RegExp input="$$1" output="<genre>\1</genre>" dest="5+"> - <expression fixchars="1" repeat="yes">"/Sections/Genres/[^/]*/">([^<]*)</a></expression> + <expression fixchars="1" repeat="yes">"/genre/[^/]*">([^<]*)</a></expression> </RegExp> <RegExp input="$$1" output="<studio>\1</studio>" dest="5+"> <expression fixchars="1" repeat="yes">"/company/[^/]*/">([^<]*)</a></expression> </RegExp> - <RegExp input="$$1" output="<outline>\1</outline><plot>\1</plot>" dest="5+"> - <expression fixchars="1">Plot:</h5>\n<div class="info-content">\n(.*?) \| <a class="tn15more</expression> + <RegExp input="$$9" output="<outline>\1</outline><plot>\1</plot>"dest="5+"> + <RegExp input="$$1" output="\1" dest="8"> + <expression >reviews</a></span>(.*?)(?:<a href="plotsummary">|</p>)</expression> </RegExp> + <RegExp input="$$8" output="\1 " dest="9+"> + <expression repeat="yes">([^\n]+)</expression> + </RegExp> + <expression noclean="1"></expression> + </RegExp> <RegExp input="$$2" output="<url function="GetIMDBPlot">$$3plotsummary</url>" dest="5+"> <expression fixchars="1"/> </RegExp> @@ -117,7 +126,7 @@ <RegExp conditional="!fullcredits" input="$$2" output="<url cache="$$2-credits.html" function="GetIMDBWriters">$$3</url>" dest="5+"> <expression fixchars="1"/> </RegExp> - <RegExp conditional="fullcredits" input="$$2" output="<url cache="$$2-fullcredits.html" function="GetIMDBCast">$$3fullcredits</url>" dest="5+"> + <RegExp conditional="fullcredits" input="$$2" output="<url cache="$$2-fullcredits.html" function="GetIMDBCast">$$3/fullcredits</url>" dest="5+"> <expression fixchars="1"/> </RegExp> <RegExp conditional="fullcredits" input="$$2" output="<url cache="$$2-fullcredits.html" function="GetIMDBDirectors">$$3fullcredits</url>" dest="5+"> @@ -153,7 +162,7 @@ <RegExp conditional="fanart" input="$$2" output="<url function="GetTMDBFanartById">$$3</url>" dest="5+"> <expression fixchars="1"/> </RegExp> - <expression fixchars="1" noclean="1"/> + <expression noclean="1"/> </RegExp> </GetDetails> </scraper> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |