Learn how easy it is to sync an existing GitHub or Google Code repo to a SourceForge project! See Demo

Close

[4acf5b]: script.image.bigpictures / resources / lib / scrapers / 1_tbp.py Maximize Restore History

Download this file

1_tbp.py    62 lines (53 with data), 2.4 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from scraper import ScraperPlugin
import re
class Scraper(ScraperPlugin):
NAME = 'Boston.com: The Big Picture'
def getFilters(self, url):
tree = self.getCachedTree(url)
self.months = list()
self.categories = list()
optionNodes = tree.findAll('option', value=re.compile('.+?'))
for node in optionNodes:
if node.parent.option.contents[0] == 'Select a month':
self.months.append([node.string, node['value']])
elif node.parent.option.contents[0] == 'Select a category':
self.categories.append([node.string, node['value']])
def getAlbums(self):
url = 'http://www.boston.com/bigpicture/'
tree = self.getCachedTree(url)
self.albums = list()
storyNodes = tree.findAll('div', 'headDiv2')
for node in storyNodes:
try:
title = node.find('a').string
link = node.find('a')['href']
desc_raw = node.find('div',
attrs={'class': 'bpBody'}).contents
description = self.cleanHTML(desc_raw)
pic = node.find('img')['src']
self.albums.append({'title': title,
'pic': pic,
'description': description,
'link': link})
except:
pass
return self.albums
def getPhotos(self, url):
referer = 'http://www.boston.com/bigpicture/'
tree = self.getCachedTree(url, referer)
title = tree.find('h2').a.string
self.photos = list()
photoNodes = tree.findAll('div', {'class':
re.compile('bpImageTop|bpBoth')})
for node in photoNodes:
pic = node.img['src']
if node.find('div', 'photoNum'):
node.find('div', 'photoNum').replaceWith('')
description = self.cleanHTML(node.find('div',
'bpCaption').contents)
self.photos.append({'title': title,
'pic': pic,
'description': description})
return self.photos
def register():
return Scraper()