In [1]:
import numpy as np
import mapache
import matplotlib.pylab as plt
%matplotlib inline
In [2]:
ciudadanos = mapache.Party('Ciudadanos',
logo_url = 'https://www.ciudadanos-cs.org/var/public/sections/page-imagen-del-partido/logo-ciudadanos.jpg',
short_name = 'C\'s',
full_name = 'Ciudadanos - Partido de la Ciudadanía')
ciudadanos.show()
In [3]:
ciudadanos = mapache.parseutils.party_from_wiki('https://en.wikipedia.org/wiki/Citizens_(Spanish_political_party)')
ciudadanos.show()
We recover the tables with all the polls for the 2016 Spanish National elections containing the parties taking part
In [4]:
wiki_url = "https://en.wikipedia.org/wiki/Opinion_polling_for_the_Spanish_general_election,_2016"
tables = mapache.parseutils.tables_from_wiki(wiki_url)
print('{0} tables found'.format(len(tables)))
The first row of the table is parsed to get the name and parties. Party logos are extracted from their wikipedia pages.
This function should be easy to adapt to any other table containing a list of parties
In [5]:
def parties_from_wikitable(table):
# A PartySet contains a group of parties
spanish_parties = mapache.PartySet()
# In the the opinion polls tables the first row corresponds to the parties,
# each cell except the first two and the last three correspond to a party
row = mapache.parseutils.wikitable_get_rows(table)[0]
cells = mapache.parseutils.wikitable_get_cells(row)
for c in cells[2:-3]:
# From each cell we recover:
# - The url of the party
# - The name of the party (in the url name)
# - The small logo of the party
url, name = mapache.parseutils.wikitable_get_url(c)
small_logo = mapache.parseutils.wikitable_get_imgurl(c)
# The party wiki page is fetched to get the full name and full logo
party = mapache.parseutils.party_from_wiki(url, name)
party.set_thumbnail(small_logo)
spanish_parties.add(party)
return spanish_parties
In [6]:
spanish_parties = parties_from_wikitable(tables[0])
display(HTML(spanish_parties.show_parties()))
In [7]:
display(HTML(spanish_parties.show_parties(small=True)))
The second table includes older opinion polls where the coalition 'Unidos Podemos' is not together, we can add the parties of the coalition so they are recognized in all polls
In [8]:
spanish_parties_old = parties_from_wikitable(tables[1])
display(HTML(spanish_parties_old.show_parties(small=True)))
spanish_parties_old.parties
Out[8]:
In [9]:
coalition_party_names = ['POD', 'IU-UPEC']
In [10]:
for p in coalition_party_names:
spanish_parties['UP'].add_to_coalition(spanish_parties_old[p])
In [11]:
spanish_parties['UP'].show()
In [12]:
from dateutil.parser import parse as dateparser
In [13]:
votes = np.random.random(len(spanish_parties.parties))
votes /= sum(votes)
votes *= 100
In [14]:
pollster = 'FakePollster'
date = dateparser('18 November 2015')
party_votes = {}
for party, vote in zip(spanish_parties.parties, votes):
party_votes[party] = vote
In [15]:
poll = mapache.Poll(parties=party_votes, date=date, pollster='Fake poll')
In [16]:
print(poll)
In [17]:
polls = mapache.parseutils.poll_from_table(tables[0], date_column=1, party_columns=(2,-3), poll_rows=(2,-1), name="Opinion Polls",
error_column=-3, pollster_column=0,)
print('{0} polls loaded'.format(len(polls.polls)))
In [18]:
print(polls.polls[4])
We add the old polls as well (from a different table):
In [19]:
old_polls = mapache.parseutils.poll_from_table(tables[1], date_column=1, party_columns=(2,-3), poll_rows=(2,-1), name="Opinion Polls",
error_column=-3, pollster_column=0,)
print('{0} polls loaded'.format(len(old_polls.polls)))
polls.add(old_polls)
In [20]:
name = 'ciutadans'
matched = spanish_parties.match(name)
print('\'{0}\' matched to \'{0}\', which included the names: {2}'.format(name, matched.name,
matched.get_all_names()))
Using name matching it is possible to get all poll results of a party
In [21]:
spanish_parties.match('unidad popular')
In [22]:
print(old_polls.polls[1])
After matching the polls it is possible to plot them. The color corresponding to each party it automatically generated from the logo
TODO: Solve clashes between parties with similar colours
In [23]:
ciudadanos.show_color()
Simple plots can be easily created with the usual matplotlib functions. Note that the match function does not require an specific name for the party
In [24]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()
parties = ['pp', 'psoe', 'up', 'cs']
for k in parties:
party_polls = polls.get_party(spanish_parties.match(k), join_coalitions=False)
dates = [x[0] for x in party_polls]
votes = [x[1] for x in party_polls]
plt.scatter(dates, votes, color=spanish_parties.match(k).color, s=40)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.grid()
plt.ylim((0,40));
If join_coallitions=False, the votes of the parties being part of a coallition are not sum together, to get them we can set it to true.
In [25]:
plt.rcParams['figure.figsize'] = (12, 6)
fig, ax = plt.subplots()
parties = ['pp', 'psoe', 'up', 'cs']
for k in parties:
party_polls = polls.get_party(spanish_parties.match(k), join_coalitions=True)
dates = [x[0] for x in party_polls]
votes = [x[1] for x in party_polls]
plt.scatter(dates, votes, color=spanish_parties.match(k).color, s=40)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.grid()
plt.ylim((0,40));
We will only visualize the four main parties, to extract them from the PartyList
TODO: Automatically select the top N parties
In [26]:
main_parties = spanish_parties.extract(['PP', 'PSOE', 'Unidos Podemos', 'Cs'])
In [27]:
display(HTML(main_parties.show_parties(small=True)))
In [28]:
poll=polls.polls[4]
In [29]:
elections = polls.polls[-1]
In [30]:
mapache.vis.SingleBars(poll, main_parties);
An indication of the result in a different Poll (eg. the elections) can be easily added:
In [31]:
mapache.vis.SingleBars(poll, main_parties, elections=elections);
#Add label to the line!
In [32]:
wiki_2015 = "https://en.wikipedia.org/wiki/Opinion_polling_for_the_Spanish_general_election,_2015"
tables_2015 = mapache.parseutils.tables_from_wiki(wiki_2015)
parties_2015 = parties_from_wikitable(tables_2015[0])
polls_2015 = mapache.parseutils.poll_from_table(tables_2015[0], date_column=1, party_columns=(2,-3), poll_rows=(2,-1), name="Opinion Polls",
error_column=-3, pollster_column=0,)
polls_2015._name = 'Opinion Polls'
In [33]:
parties_2015 = parties_2015.extract(['PP', 'PSOE', 'Podemos', 'Cs'])
display(HTML(parties_2015.show_parties(small=True)))
In [34]:
# The last row corresponds to the election results!
elections = polls_2015.polls[0]
elections20D = mapache.PollsList()
elections20D.add(elections)
elections20D._name = 'Elections 20D'
del polls_2015.polls[0]
In [35]:
import imp
imp.reload(mapache.vis)
Out[35]:
In [36]:
ts = mapache.vis.TimeSeries(parties_2015)
In [37]:
# A column with all the polls (add gps to arg)
ts.add_column(polls_2015)
# A column with the election results
ts.add_column(elections20D)
In [39]:
ts.show()