In [6]:
%matplotlib inline
from matplotlib import pylab as plt
plt.rcParams['figure.figsize'] = (15.0, 10.0)
import pandas as pd
import seaborn as sns
In [4]:
data = pd.read_csv("978-3-319-12065-2/chapter-4/teams.csv")
data.head()
Out[4]:
In [19]:
data.plot(kind='scatter',x="payroll",y="wins")
Out[19]:
In [18]:
sns.regplot("payroll","wins",data,fit_reg=False)
Out[18]:
In [22]:
def label_point_orig(x, y, val, ax):
a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
for i, point in a.iterrows():
ax.text(point['x'], point['y'], str(point['val']))
ax = data.plot(kind='scatter',x="payroll",y="wins")
label_point_orig(data.payroll,data.wins,data.code,ax)
In [23]:
ax1 = sns.regplot("payroll","wins",data,fit_reg=False)
label_point_orig(data.payroll,data.wins,data.code,ax1)
In [37]:
data_al = data[data.league == "AL"]
data_nl = data[data.league == "NL"]
fig, ax = plt.subplots()
ax.plot(data_al.payroll, data_al.wins, marker='o',linestyle='',label="AL")
ax.plot(data_nl.payroll, data_nl.wins, marker='s',linestyle='',label="NL")
ax.legend()
ax.set_xlabel("payroll")
ax.set_ylabel("wins")
Out[37]:
In [35]:
data_groups = data.groupby('league')
markers = ["o","s"]
mindx = 0
fig, ax = plt.subplots()
for name, group in data_groups:
ax.plot(group.payroll, group.wins, marker=markers[mindx], linestyle='', ms=12, label=name)
mindx += 1
ax.legend()
ax.set_xlabel("payroll")
ax.set_ylabel("wins")
Out[35]:
In [58]:
data_gf_al = data[data.pct >= 0.5 ]
data_gf_nl = data[data.pct <= 0.5]
fig, ax = plt.subplots()
ax.plot(data_gf_al.payroll,data_gf_al.wins,marker='o',linestyle='')
ax.plot(data_gf_nl.payroll,data_gf_nl.wins,marker='s',linestyle='')
ax.plot([data.payroll.min(),data.payroll.max()],[81,81])
Out[58]:
In [64]:
data_groups = data.groupby('league')
markers = ["o","^"]
mindx = 0
fig, ax = plt.subplots()
for name, group in data_groups:
ax.plot(group.payroll, group.wins, marker=markers[mindx], linestyle='', ms=12, label=name)
mindx += 1
ax.legend()
ax.set_xlabel("payroll")
ax.set_ylabel("wins")
Out[64]:
In [65]:
data[data.league == "NL"].payroll.sum()
Out[65]:
In [66]:
data.groupby("league").payroll.sum()
Out[66]:
In [68]:
data.groupby(["league","division"]).payroll.sum()
Out[68]:
In [71]:
data.groupby("league").payroll.sum().plot(kind='bar')
Out[71]:
In [109]:
data.groupby(["league","division"]).payroll.sum().plot(kind='bar',stacked=True)
Out[109]:
In [116]:
data.groupby(["league","division"]).payroll.sum().unstack("division").plot(kind='bar',stacked=True)
Out[116]:
In [120]:
data.groupby(["league","division"]).payroll.sum().unstack("division").plot(kind='bar')
Out[120]:
In [121]:
data.groupby(["league"]).payroll.sum().plot(kind='pie')
Out[121]:
In [7]:
data.groupby(["league","division"]).payroll.sum().plot(kind='pie')
Out[7]:
In [10]:
data_al = data[data.league == "AL"]
data_nl = data[data.league == "NL"]
fig, ax = plt.subplots(2)
ax[0].plot(data_al.payroll, data_al.wins, marker='o', linestyle='', ms=12, label="AL")
ax[1].plot(data_nl.payroll, data_nl.wins, marker='o', linestyle='', ms=12, label="NL")
Out[10]:
In [21]:
from bokeh.sampledata.iris import flowers
from bokeh.plotting import figure, show, output_file,output_notebook
output_notebook()
colormap = {'AL': 'red', 'NL': 'green'}
data['color'] = data['league'].map(lambda x: colormap[x])
p = figure(title = "Payroll vs Wins")
p.xaxis.axis_label = 'Wins'
p.yaxis.axis_label = 'Payroll'
p.circle(data.payroll,data.wins,color=data["color"],size=10)
show(p)