In [42]:
# World cup

import pandas as pd
from IPython.core.display import HTML

def show_best_team(teamA, teamB, data):
    
    totA = data[data.team == teamA ]['tot']
    totB = data[data.team == teamB]['tot']
    
    totA = int(totA) if len(totA)>0 else 0
    totB = int(totB) if len(totB)>0 else 0 
    
    if totA > totB:
        print '%s won %d medals while %s won %d medals' % (teamA, totA, teamB, totB)
    else:
        print '%s won %d medals while %s won %d medals' % (teamB, totB, teamA, totA)
   

def show_best_winner(teamA, teamB, data):
    totA = data[data.team == teamA ]['first']
    totB = data[data.team == teamB]['first']
    
    totA = int(totA) if len(totA)>0 else 0
    totB = int(totB) if len(totB)>0 else 0 
    
    if totA > totB:
        return '%s won %d championships while %s won %d championships' % (teamA, totA, teamB, totB)
    else:
        return '%s won %d championships while %s won %d championships' % (teamB, totB, teamA, totA)

medals = pd.read_csv("medals.csv")
medals.columns = ['team', 'first', 'second', 'third', 'fourth', 'tot']
HTML("<b>Best teams in soccer world cups</b>")


# text = show_best_team(teamA, teamB, medals) 
# text = show_best_winner(teamA, teamB, medals)


Out[42]:
Best teams in soccer world cups

In [43]:
medals.head()


Out[43]:
team first second third fourth tot
0 Brasile 5 2 2 1 10
1 Italia 4 2 1 1 8
2 Germania 3 4 4 1 12
3 Argentina 2 2 0 0 4
4 Uruguai 2 0 0 3 5

5 rows × 6 columns


In [44]:
s = pd.Series(medals['first'][:10].values, index=medals['team'][:10])
s.plot(kind='bar')
plt.show()



In [45]:
# world cup history
# data.head()
def show_team_history(team):
    team_hist = pd.concat(
        (data[data.firstp == team],
         data[data.secodp == team],
         data[data.thirdp == team],
         data[data.fourthp == team])
    )
    
    if not team_hist.empty:
    
        print '%s History in the world cup' %team
        return team_hist.sort(['Year'])

# show_team_history(teamA)
data = pd.read_csv("history.csv")
data


Out[45]:
Year Country firstp secodp thirdp fourthp
0 1930 Uruguai Uruguai Argentina USA Jugoslavia
1 1934 Italia Italia Cecoslovacchia Germania Austria
2 1938 Francia Italia Ungheria Brasile Svezia
3 1950 Brasile Uruguai Brasile Svezia Spagna
4 1954 Svizzera Germania Ungheria Austria Uruguai
5 1958 Svezia Brasile Svezia Francia Germania
6 1962 Cile Brasile Cecoslovacchia Cile Jugoslavia
7 1966 Inghilterra Inghilterra Germania Portogallo URSS
8 1970 Messico Brasile Italia Germania Uruguai
9 1974 Germania Germania Olanda Polonia Brasile
10 1978 Argentina Argentina Olanda Brasile Italia
11 1982 Spagna Italia Germania Polonia Francia
12 1986 Messico Argentina Germania Francia Belgio
13 1990 Italia Germania Argentina Italia Inghilterra
14 1994 USA Brasile Italia Svezia Bulgaria
15 1998 Francia Francia Brasile Croazia Olanda
16 2002 Corea del Sud\n\nGiappone Brasile Germania Turchia Corea del Sud
17 2006 Germania Italia Francia Germania Portogallo
18 2010 Sudafrica Spagna Olanda Germania Uruguai

19 rows × 6 columns


In [46]:
HTML('The first world cup was held in <b>%s</b><br>\
     The last was in <b>%s</b>' % (int(data.head(1).Year), int(data.tail(1).Year)))


Out[46]:
The first world cup was held in 1930
The last was in 2010

In [47]:
# Read the teams and for each team get the medals and the number of victories

teams = pd.read_csv("teams.csv")
# merging dataframe: team with medals
team_and_medals = pd.merge(teams, medals, how='left', on='team')

HTML('Teams in <b>2014</b> world cup and their position in the medals ranking.')
#for t in teams.index:
    # print teams.ix[t].team
    # for each team get the num of medals
    #    m = medals[medals.team == teams.ix[t].team]


Out[47]:
Teams in 2014 world cup and their position in the medals ranking.

In [48]:
team_and_medals.fillna(0).sort(['tot'], ascending=(0)).head(15)


Out[48]:
team first second third fourth tot
15 Germania 3 4 4 1 12
5 Brasile 5 2 2 1 10
22 Italia 4 2 1 1 8
14 Francia 1 1 2 1 5
1 Argentina 2 2 0 0 4
25 Olanda 0 3 0 1 4
20 Inghilterra 1 0 0 1 2
28 Spagna 1 0 0 1 2
26 Portogallo 0 0 1 1 2
9 Corea del Sud 0 0 0 1 1
7 Cile 0 0 1 0 1
12 Croazia 0 0 1 0 1
3 Belgio 0 0 0 1 1
27 Russia 0 0 0 1 1
11 Costa Rica 0 0 0 0 0

15 rows × 6 columns


In [49]:
cal = pd.read_csv("calendar.csv")


def show_matches(team_a, team_b):
        metches_ab = cal[cal.team_a == team_a][cal.team_b == team_b]
        metches_ba = cal[cal.team_a == team_b][cal.team_a == team_a]
        
        metches = pd.concat((metches_ab, metches_ba))
        
        if not metches.empty:
            print '%s and %s direct metches.' %(team_a, team_b)
            return metches
        else:
            print '%s and %s never met before' %(team_a, team_b)

HTML('All matches so far ....')


Out[49]:
All matches so far ....

In [50]:
cal.head()


Out[50]:
date team_a team_b result
0 13/07/30 Francia Messico 4-1
1 13/07/30 USA Belgio 3-0
2 14/07/30 Yugoslavia Brasile 2-1
3 14/07/30 Romania Per 3-1
4 15/07/30 Argentina Francia 1-0

5 rows × 4 columns


In [51]:
import networkx as nx
G=nx.Graph()

for r in cal.index:

    G.add_edge(cal.ix[r].team_a, cal.ix[r].team_b)
    #print calendar.ix[r].team_a, calendar.ix[r].team_b
    
nx.draw_networkx(G, node_size=50, node_color='b', font_size=8 ,label='World cup past matches')

plt.show()



In [52]:
print 'Network diameter:', nx.diameter(G)
# The diameter is the maximum eccentricity among all nodes.
# The eccentricity of a node v is the maximum distance from v to all other nodes in G.

print
print 'Betweeness Centrality'
betweenness_centrality = nx.betweenness_centrality(G)
for w in sorted(betweenness_centrality, key=betweenness_centrality.get, reverse=True)[:10]:
    print w, betweenness_centrality[w]


Network diameter: 3

Betweeness Centrality
Brasile 0.115381895991
Germania 0.0955543605446
Argentina 0.0889057137045
Italia 0.079892337807
Messico 0.0569623977149
Inghilterra 0.0537285888736
Francia 0.0494665130406
Svezia 0.0490278720536
Olanda 0.0472925150217
Ungheria 0.0413860718891

In [53]:
players = pd.read_csv("players.csv", sep='\t')
HTML('<b>Players</b> in 2014 tournament.')


Out[53]:
Players in 2014 tournament.

In [54]:
players.head()


Out[54]:
Pos Giocatore eta Pres Gol Nazionale
0 P Manuel Neuer 28 45 0 Germania
1 P Roman Weidenfeller 33 1 0 Germania
2 D Philipp Lahm 30 105 5 Germania
3 D Per Mertesacker 29 96 4 Germania
4 D Marcell Jansen 28 45 3 Germania

5 rows × 6 columns


In [55]:
# older and younger player
#players.ix[players['eta'].argmax()]
HTML('Older Players.')


Out[55]:
Older Players.

In [56]:
players.sort(['eta'], ascending=[0])[:5]


Out[56]:
Pos Giocatore eta Pres Gol Nazionale
184 P Noel Valladares 37 120 0 Honduras
174 C Giorgos Karagounis 37 132 10 Grecia
158 A Didier Drogba 36 99 63 Costa d’Avorio
259 D Daniel Van Buyten 36 77 10 Belgio
107 P Gianluigi Buffon 36 139 -114 Italia

5 rows × 6 columns


In [57]:
HTML('Younger Players')


Out[57]:
Younger Players

In [58]:
players.sort(['eta'], ascending=[1])[:5]


Out[58]:
Pos Giocatore eta Pres Gol Nazionale
648 D Frank Bagnack 18 0 0 Camerun
398 D Luke Shaw 18 1 0 Inghilterra
273 C Adnan Januzaj 19 0 0 Belgio
555 D José Giménez 19 4 0 Uruguai
287 D Cristian Ramírez 19 2 0 Ecuador

5 rows × 6 columns


In [59]:
HTML('Best scorer')


Out[59]:
Best scorer

In [60]:
# best scorer
# players.ix[players['Gol'].argmax()]
# compute a rate as GOL / Pres
players.sort(['Gol'], ascending=[0])[:5]


Out[60]:
Pos Giocatore eta Pres Gol Nazionale
18 A Miroslav Klose 35 131 68 Germania
158 A Didier Drogba 36 99 63 Costa d’Avorio
654 A Samuel Eto'o 33 115 55 Camerun
588 A Cristiano Ronaldo 29 110 49 Portogallo
10 C Lukas Podolski 28 112 46 Germania

5 rows × 6 columns


In [61]:
HTML('<b>Best scorer</b> by <b>Rate</b> (Gol / Pres')


Out[61]:
Best scorer by Rate (Gol / Pres

In [62]:
players_who_played = players[players['Pres'] > 0]

# players_who_played['Rate'] = range(1, len(players_who_played) + 1)

players_who_played['Rate'] = players_who_played['Gol'] / players_who_played['Pres']
players_who_played.sort(['Rate'], ascending=[0])[:5]


Out[62]:
Pos Giocatore eta Pres Gol Nazionale Rate
37 A Alan Pulido 23 3 4 Messico 1.333333
343 A Reza Ghoochannejhad 26 11 9 Iran 0.818182
337 C Yaghoub Karimi 22 6 4 Iran 0.666667
567 A Abel Hernández 23 11 7 Uruguai 0.636364
158 A Didier Drogba 36 99 63 Costa d’Avorio 0.636364

5 rows × 7 columns


In [63]:
# worst goolkeeper
#players.ix[players['Gol'].argmin()]
players.sort(['Gol'], ascending=[1])[:5]


Out[63]:
Pos Giocatore eta Pres Gol Nazionale
107 P Gianluigi Buffon 36 139 -114 Italia
367 P Júlio César 34 76 -54 Brasile
38 P Asmir Begović 26 28 -26 Bosnia Erzegovina
108 P Salvatore Sirigu 27 7 -8 Italia
368 P Victor 31 6 -4 Brasile

5 rows × 6 columns


In [64]:
HTML('<h3>Group A</h3>')


Out[64]:

Group A


In [72]:
# https://it.wikipedia.org/wiki/Campionato_mondiale_di_calcio_2014
# teamA = 'Brasile'
# teamB = 'Croazia'
# teamA = 'Messico'
# teamB = 'Camerun'
teamA = 'Italia'
teamB = 'Costarica'


print '%s vs %s' % (teamA, teamB)
teamA_players = players[players.Nazionale == teamA]
teamB_players = players[players.Nazionale == teamB]

print 'Mean goal scored by %s players: %f' %(teamA, teamA_players[teamA_players.Pos <> 'P']['Gol'].mean())
print 'Mean goal scored by %s players: %f' %(teamB, teamB_players[teamB_players.Pos <> 'P']['Gol'].mean())
print
# avarage age
print 'Mean age %s: %f' %(teamA, teamA_players['eta'].mean())
print 'Mean age %s: %f' %(teamB, teamB_players['eta'].mean())


Italia vs Costarica
Mean goal scored by Italia players: 2.851852
Mean goal scored by Costarica players: 4.450000

Mean age Italia: 27.533333
Mean age Costarica: 26.318182

In [74]:
HTML('<b>Direct metches</b>')


Out[74]:
Direct metches

In [73]:
show_matches(teamA, teamB)


Italia and Costarica never met before

In [75]:
HTML('<b>Best Team</b>')


Out[75]:
Best Team

In [69]:
show_best_team(teamA, teamB, medals)


Italia won 8 medals while Costarica won 0 medals

In [76]:
show_team_history(teamA)


Italia History in the world cup
Out[76]:
Year Country firstp secodp thirdp fourthp
1 1934 Italia Italia Cecoslovacchia Germania Austria
2 1938 Francia Italia Ungheria Brasile Svezia
8 1970 Messico Brasile Italia Germania Uruguai
10 1978 Argentina Argentina Olanda Brasile Italia
11 1982 Spagna Italia Germania Polonia Francia
13 1990 Italia Germania Argentina Italia Inghilterra
14 1994 USA Brasile Italia Svezia Bulgaria
17 2006 Germania Italia Francia Germania Portogallo

8 rows × 6 columns


In [78]:
show_team_history(teamB)

In [77]:


In [ ]: