[4acf5b]: script.image.bigpictures / resources / lib / scrapers / 5_tcp.py Maximize Restore History

Download this file

5_tcp.py    47 lines (39 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from scraper import ScraperPlugin
import re
class Scraper(ScraperPlugin):
NAME = 'TotallyCoolPix.com'
def getAlbums(self):
url = 'http://totallycoolpix.com/'
tree = self.getCachedTree(url)
self.albums = list()
storyNodes = tree.find('div', {'class': 'pri'}).findAll('div', {'class': 'block'})
for node in storyNodes:
try:
title = self.cleanHTML(node.find('h1').a.string)
link = node.find('h1').a['href']
desc_raw = node.find('div',
attrs={'class': 'post-intro'}).p.contents
description = self.cleanHTML(desc_raw)
pic = node.find('img')['src']
self.albums.append({'title': title,
'pic': pic,
'description': description,
'link': link})
except:
pass
return self.albums
def getPhotos(self, url):
tree = self.getCachedTree(url)
title = self.cleanHTML(tree.find('h1').a.string)
self.photos = list()
photoNodes = tree.findAll('div', {'class':
re.compile('^wp-caption')})
for node in photoNodes:
pic = node.img['src']
description = self.cleanHTML(node.p.contents)
self.photos.append({'title': title,
'pic': pic,
'description': description})
return self.photos
def register():
return Scraper()