In [1]:

    
import urllib2, sys
import pandas as pd
from bs4 import BeautifulSoup
import timeit
import re









    



/Users/peter/anaconda/lib/python2.7/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)



In [55]:

    
pages = range(0,39)

#for page in pages:

number = 5 # page number

site= "http://www.metacritic.com/browse/dvds/release-date/available/date/?page=" + str(number)
hdr = {'User-Agent': 'Mozilla/5.0'} # helps avoid 403 forbidden error

# Import html into beautifulsoup
req = urllib2.Request(site,headers=hdr) 
page = urllib2.urlopen(req)
soup = BeautifulSoup(page, 'lxml') # lxml is supposed to be fast way to parse html

# get movie titles
a_tags = soup.find_all('a') # get all a tags
a_tags_text =[] 
for i in a_tags:
    a_tags_text.append(i.text) # append unicode text objects to new list
a_tags_str = [unicode(x).encode("utf-8") for x in a_tags] # convert unicode elements to strings
titles = []
for i in a_tags_str:
    output = re.search('<a href="/movie/(.+?)>', i)
    if output:
        titles.append(output.group(1))
titles = [x.split('"')[0] for x in titles][:200]

# a) get metascores, b) convert metascore list elements from bs4.element.Tag to unicode to string, c) convert to int
metascores = soup(attrs={'class': 'metascore_w'})
metascores = list(map((lambda x: str(x.text)), metascores))
metascores = list(map((lambda x: int(x)), metascores))

# get user scores
userscores = soup(attrs={'class': 'textscore'}) # get user scores
userscores = [x.text for x in userscores] # get unicode elements
userscores = [unicode(x).encode("utf-8") for x in userscores] # convert unicode elements to strings

# zip titles, metascores, and user scores
scores = []
scores = zip(titles, metascores, userscores)

# Convert to dataframe
df = pd.DataFrame.from_records(scores, columns=['title', 'metascore', 'userscore'])

# Pickle dataframe
df.to_pickle('metacritic_' + str(number) + '.pickle')

df









    Out[55]:






  
    
      
      title
      metascore
      userscore
    
  
  
    
      0
      x-men-days-of-future-past
      74
      8.5
    
    
      1
      4-minute-mile
      48
      tbd
    
    
      2
      a-million-ways-to-die-in-the-west
      44
      5.2
    
    
      3
      a-people-uncounted
      72
      tbd
    
    
      4
      advanced-style
      66
      tbd
    
    
      5
      all-you-need-is-kill
      71
      8.6
    
    
      6
      jack-and-the-cuckoo-clock-heart
      56
      7.4
    
    
      7
      million-dollar-arm
      56
      7.1
    
    
      8
      obvious-child
      76
      6.6
    
    
      9
      radio-free-albemuth
      35
      tbd
    
    
      10
      supermensch-the-legend-of-shep-gordon
      64
      6.2
    
    
      11
      tasting-menu
      29
      tbd
    
    
      12
      the-grand-seduction
      57
      7.3
    
    
      13
      to-be-takei
      66
      8.3
    
    
      14
      are-you-here
      37
      5.9
    
    
      15
      chef
      68
      7.8
    
    
      16
      cold-in-july
      73
      7.3
    
    
      17
      decoding-annie-parker
      56
      8.0
    
    
      18
      hellion
      55
      6.8
    
    
      19
      ivory-tower
      65
      tbd
    
    
      20
      lucky-them
      65
      6.4
    
    
      21
      space-station-76
      49
      5.9
    
    
      22
      third-person
      38
      5.0
    
    
      23
      thunder-and-the-house-of-magic
      47
      5.1
    
    
      24
      transformers-4
      32
      4.5
    
    
      25
      fort-mccoy
      47
      tbd
    
    
      26
      found
      41
      tbd
    
    
      27
      free-the-mind
      51
      tbd
    
    
      28
      ida
      91
      7.5
    
    
      29
      neighbors
      68
      6.3
    
    
      ...
      ...
      ...
      ...
    
    
      170
      enemy
      61
      7.3
    
    
      171
      fracknation
      61
      6.6
    
    
      172
      pandoras-promise
      54
      tbd
    
    
      173
      rob-the-mob
      63
      6.9
    
    
      174
      the-wedding-video
      51
      tbd
    
    
      175
      winters-tale
      31
      5.4
    
    
      176
      wolf-creek-2
      44
      5.9
    
    
      177
      almost-human
      40
      4.2
    
    
      178
      authors-anonymous
      16
      tbd
    
    
      179
      ernest-celestine
      86
      7.4
    
    
      180
      jimmy-p
      58
      6.3
    
    
      181
      joe
      74
      7.5
    
    
      182
      test
      70
      8.0
    
    
      183
      the-final-member
      67
      tbd
    
    
      184
      the-grand-budapest-hotel
      88
      8.4
    
    
      185
      the-lego-movie
      83
      8.3
    
    
      186
      the-machine
      52
      7.1
    
    
      187
      vic-+-flo-saw-a-bear
      76
      tbd
    
    
      188
      walk-of-shame
      25
      4.8
    
    
      189
      a-short-history-of-decay
      54
      tbd
    
    
      190
      adult-world
      61
      7.0
    
    
      191
      alan-partridge
      66
      6.7
    
    
      192
      capital
      55
      6.2
    
    
      193
      devils-knot
      42
      5.6
    
    
      194
      haunt
      33
      4.4
    
    
      195
      jack-ryan-shadow-recruit
      57
      6.0
    
    
      196
      non-stop-2014
      56
      7.1
    
    
      197
      omar
      75
      7.5
    
    
      198
      patrick
      48
      7.3
    
    
      199
      perfect-sisters
      44
      6.6
    
  

200 rows × 3 columns

	title	metascore	userscore
0	x-men-days-of-future-past	74	8.5
1	4-minute-mile	48	tbd
2	a-million-ways-to-die-in-the-west	44	5.2
3	a-people-uncounted	72	tbd
4	advanced-style	66	tbd
5	all-you-need-is-kill	71	8.6
6	jack-and-the-cuckoo-clock-heart	56	7.4
7	million-dollar-arm	56	7.1
8	obvious-child	76	6.6
9	radio-free-albemuth	35	tbd
10	supermensch-the-legend-of-shep-gordon	64	6.2
11	tasting-menu	29	tbd
12	the-grand-seduction	57	7.3
13	to-be-takei	66	8.3
14	are-you-here	37	5.9
15	chef	68	7.8
16	cold-in-july	73	7.3
17	decoding-annie-parker	56	8.0
18	hellion	55	6.8
19	ivory-tower	65	tbd
20	lucky-them	65	6.4
21	space-station-76	49	5.9
22	third-person	38	5.0
23	thunder-and-the-house-of-magic	47	5.1
24	transformers-4	32	4.5
25	fort-mccoy	47	tbd
26	found	41	tbd
27	free-the-mind	51	tbd
28	ida	91	7.5
29	neighbors	68	6.3
...	...	...	...
170	enemy	61	7.3
171	fracknation	61	6.6
172	pandoras-promise	54	tbd
173	rob-the-mob	63	6.9
174	the-wedding-video	51	tbd
175	winters-tale	31	5.4
176	wolf-creek-2	44	5.9
177	almost-human	40	4.2
178	authors-anonymous	16	tbd
179	ernest-celestine	86	7.4
180	jimmy-p	58	6.3
181	joe	74	7.5
182	test	70	8.0
183	the-final-member	67	tbd
184	the-grand-budapest-hotel	88	8.4
185	the-lego-movie	83	8.3
186	the-machine	52	7.1
187	vic-+-flo-saw-a-bear	76	tbd
188	walk-of-shame	25	4.8
189	a-short-history-of-decay	54	tbd
190	adult-world	61	7.0
191	alan-partridge	66	6.7
192	capital	55	6.2
193	devils-knot	42	5.6
194	haunt	33	4.4
195	jack-ryan-shadow-recruit	57	6.0
196	non-stop-2014	56	7.1
197	omar	75	7.5
198	patrick	48	7.3
199	perfect-sisters	44	6.6