In [1]:
import pandas as pd
Большое количество датасетов можно найти в репозитории awesome public datasets
In [2]:
import requests
r = requests.get('https://github.com/jalapic/engsoccerdata/raw/master/data-raw/spain.csv')
with open('spain.csv', 'w') as f:
f.write(r.text)
In [3]:
matches = pd.read_csv('spain.csv')
matches.head()
Out[3]:
In [4]:
matches.tail()
Out[4]:
In [5]:
matches.shape # 24000 матчей!
Out[5]:
In [ ]:
In [6]:
matches.iloc[-10:,1:4]
Out[6]:
In [8]:
matches.loc[matches.shape[0] - 10:,["Date", "home", "visitor", "FT"]]
Out[8]:
In [40]:
matches["Date"][-5:] # Series object
Out[40]:
In [38]:
matches[["Date", "FT"]][-5:]
Out[38]:
In [ ]:
In [49]:
matches["hgoal"].mean(), matches["vgoal"].mean()
Out[49]:
In [52]:
matches["hgoal"].max(), matches["vgoal"].max()
Out[52]:
In [54]:
matches["hgoal"].median(), matches["vgoal"].median()
Out[54]:
In [ ]:
In [9]:
matches[matches["hgoal"] > 10]
Out[9]:
In [10]:
matches[(matches["home"] == "Real Madrid") & (matches["visitor"] == "FC Barcelona") & (matches["vgoal"] > 3)]
Out[10]:
In [ ]:
In [11]:
%matplotlib inline
In [12]:
matches[matches["home"] == "FC Barcelona"]["hgoal"].hist()
Out[12]:
In [13]:
matches[matches["home"] == "Real Madrid"]["hgoal"].hist()
Out[13]: