In [42]:
# World cup
import pandas as pd
from IPython.core.display import HTML
def show_best_team(teamA, teamB, data):
totA = data[data.team == teamA ]['tot']
totB = data[data.team == teamB]['tot']
totA = int(totA) if len(totA)>0 else 0
totB = int(totB) if len(totB)>0 else 0
if totA > totB:
print '%s won %d medals while %s won %d medals' % (teamA, totA, teamB, totB)
else:
print '%s won %d medals while %s won %d medals' % (teamB, totB, teamA, totA)
def show_best_winner(teamA, teamB, data):
totA = data[data.team == teamA ]['first']
totB = data[data.team == teamB]['first']
totA = int(totA) if len(totA)>0 else 0
totB = int(totB) if len(totB)>0 else 0
if totA > totB:
return '%s won %d championships while %s won %d championships' % (teamA, totA, teamB, totB)
else:
return '%s won %d championships while %s won %d championships' % (teamB, totB, teamA, totA)
medals = pd.read_csv("medals.csv")
medals.columns = ['team', 'first', 'second', 'third', 'fourth', 'tot']
HTML("<b>Best teams in soccer world cups</b>")
# text = show_best_team(teamA, teamB, medals)
# text = show_best_winner(teamA, teamB, medals)
Out[42]:
In [43]:
medals.head()
Out[43]:
In [44]:
s = pd.Series(medals['first'][:10].values, index=medals['team'][:10])
s.plot(kind='bar')
plt.show()
In [45]:
# world cup history
# data.head()
def show_team_history(team):
team_hist = pd.concat(
(data[data.firstp == team],
data[data.secodp == team],
data[data.thirdp == team],
data[data.fourthp == team])
)
if not team_hist.empty:
print '%s History in the world cup' %team
return team_hist.sort(['Year'])
# show_team_history(teamA)
data = pd.read_csv("history.csv")
data
Out[45]:
In [46]:
HTML('The first world cup was held in <b>%s</b><br>\
The last was in <b>%s</b>' % (int(data.head(1).Year), int(data.tail(1).Year)))
Out[46]:
In [47]:
# Read the teams and for each team get the medals and the number of victories
teams = pd.read_csv("teams.csv")
# merging dataframe: team with medals
team_and_medals = pd.merge(teams, medals, how='left', on='team')
HTML('Teams in <b>2014</b> world cup and their position in the medals ranking.')
#for t in teams.index:
# print teams.ix[t].team
# for each team get the num of medals
# m = medals[medals.team == teams.ix[t].team]
Out[47]:
In [48]:
team_and_medals.fillna(0).sort(['tot'], ascending=(0)).head(15)
Out[48]:
In [49]:
cal = pd.read_csv("calendar.csv")
def show_matches(team_a, team_b):
metches_ab = cal[cal.team_a == team_a][cal.team_b == team_b]
metches_ba = cal[cal.team_a == team_b][cal.team_a == team_a]
metches = pd.concat((metches_ab, metches_ba))
if not metches.empty:
print '%s and %s direct metches.' %(team_a, team_b)
return metches
else:
print '%s and %s never met before' %(team_a, team_b)
HTML('All matches so far ....')
Out[49]:
In [50]:
cal.head()
Out[50]:
In [51]:
import networkx as nx
G=nx.Graph()
for r in cal.index:
G.add_edge(cal.ix[r].team_a, cal.ix[r].team_b)
#print calendar.ix[r].team_a, calendar.ix[r].team_b
nx.draw_networkx(G, node_size=50, node_color='b', font_size=8 ,label='World cup past matches')
plt.show()
In [52]:
print 'Network diameter:', nx.diameter(G)
# The diameter is the maximum eccentricity among all nodes.
# The eccentricity of a node v is the maximum distance from v to all other nodes in G.
print
print 'Betweeness Centrality'
betweenness_centrality = nx.betweenness_centrality(G)
for w in sorted(betweenness_centrality, key=betweenness_centrality.get, reverse=True)[:10]:
print w, betweenness_centrality[w]
In [53]:
players = pd.read_csv("players.csv", sep='\t')
HTML('<b>Players</b> in 2014 tournament.')
Out[53]:
In [54]:
players.head()
Out[54]:
In [55]:
# older and younger player
#players.ix[players['eta'].argmax()]
HTML('Older Players.')
Out[55]:
In [56]:
players.sort(['eta'], ascending=[0])[:5]
Out[56]:
In [57]:
HTML('Younger Players')
Out[57]:
In [58]:
players.sort(['eta'], ascending=[1])[:5]
Out[58]:
In [59]:
HTML('Best scorer')
Out[59]:
In [60]:
# best scorer
# players.ix[players['Gol'].argmax()]
# compute a rate as GOL / Pres
players.sort(['Gol'], ascending=[0])[:5]
Out[60]:
In [61]:
HTML('<b>Best scorer</b> by <b>Rate</b> (Gol / Pres')
Out[61]:
In [62]:
players_who_played = players[players['Pres'] > 0]
# players_who_played['Rate'] = range(1, len(players_who_played) + 1)
players_who_played['Rate'] = players_who_played['Gol'] / players_who_played['Pres']
players_who_played.sort(['Rate'], ascending=[0])[:5]
Out[62]:
In [63]:
# worst goolkeeper
#players.ix[players['Gol'].argmin()]
players.sort(['Gol'], ascending=[1])[:5]
Out[63]:
In [64]:
HTML('<h3>Group A</h3>')
Out[64]:
In [72]:
# https://it.wikipedia.org/wiki/Campionato_mondiale_di_calcio_2014
# teamA = 'Brasile'
# teamB = 'Croazia'
# teamA = 'Messico'
# teamB = 'Camerun'
teamA = 'Italia'
teamB = 'Costarica'
print '%s vs %s' % (teamA, teamB)
teamA_players = players[players.Nazionale == teamA]
teamB_players = players[players.Nazionale == teamB]
print 'Mean goal scored by %s players: %f' %(teamA, teamA_players[teamA_players.Pos <> 'P']['Gol'].mean())
print 'Mean goal scored by %s players: %f' %(teamB, teamB_players[teamB_players.Pos <> 'P']['Gol'].mean())
print
# avarage age
print 'Mean age %s: %f' %(teamA, teamA_players['eta'].mean())
print 'Mean age %s: %f' %(teamB, teamB_players['eta'].mean())
In [74]:
HTML('<b>Direct metches</b>')
Out[74]:
In [73]:
show_matches(teamA, teamB)
In [75]:
HTML('<b>Best Team</b>')
Out[75]:
In [69]:
show_best_team(teamA, teamB, medals)
In [76]:
show_team_history(teamA)
Out[76]:
In [78]:
show_team_history(teamB)
In [77]:
In [ ]: