In [1]:
from requests_html import HTMLSession

session = HTMLSession()

In [3]:
resp = session.get('http://enter.lumenprize.com/people-choice-award-2018')

resp.ok


Out[3]:
True

In [13]:
people_urls = list(resp.html.find('.view-content', first=True).absolute_links)

len(people_urls)


Out[13]:
60

In [14]:
resp = session.get(people_urls[0])

resp.ok


Out[14]:
True

In [19]:
[x.text for x in resp.html.find('.percent')]


Out[19]:
['38%', '62%']

In [21]:
resp.html.find('.field-name-field-artist-name', first=True).text


Out[21]:
'Béatrice Lartigue, Lab212'

In [25]:
rows = []

for url in people_urls:
    resp = session.get(url)
    
    artist = resp.html.find('.field-name-field-artist-name', first=True).text
    up, down = [x.text for x in resp.html.find('.percent')]

    rows += [{
        'artist': artist,
        'url': url,
        'up': up,
        'down': down
    },]

In [27]:
import pandas as pd

In [28]:
df = pd.DataFrame(rows)

In [29]:
df.head()


Out[29]:
artist down up url
0 Béatrice Lartigue, Lab212 62% 38% http://enter.lumenprize.com/node/756
1 Scott Billings 49% 51% http://enter.lumenprize.com/node/902
2 Clelio de Paula 30% 70% http://enter.lumenprize.com/node/874
3 Synspecies (Elías Merino & Tadej Droljc) 75% 25% http://enter.lumenprize.com/node/523
4 Ben Bogart 72% 28% http://enter.lumenprize.com/node/694

In [ ]: