In [1]:
from requests_html import HTMLSession
session = HTMLSession()
In [3]:
resp = session.get('http://enter.lumenprize.com/people-choice-award-2018')
resp.ok
Out[3]:
In [13]:
people_urls = list(resp.html.find('.view-content', first=True).absolute_links)
len(people_urls)
Out[13]:
In [14]:
resp = session.get(people_urls[0])
resp.ok
Out[14]:
In [19]:
[x.text for x in resp.html.find('.percent')]
Out[19]:
In [21]:
resp.html.find('.field-name-field-artist-name', first=True).text
Out[21]:
In [25]:
rows = []
for url in people_urls:
resp = session.get(url)
artist = resp.html.find('.field-name-field-artist-name', first=True).text
up, down = [x.text for x in resp.html.find('.percent')]
rows += [{
'artist': artist,
'url': url,
'up': up,
'down': down
},]
In [27]:
import pandas as pd
In [28]:
df = pd.DataFrame(rows)
In [29]:
df.head()
Out[29]:
In [ ]: