In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
df = pd.read_csv("./tendulkar_ODI.csv")
df.head()


Out[2]:
Unnamed: 0 Runs Mins BF 4s 6s SR Pos Dismissal Inns Opposition Ground Start Date
0 1 0 - 2 0 0 0 5 caught 2 v Pakistan Gujranwala 18-Dec-89
1 2 0 2 2 0 0 0 5 caught 2 v New Zealand Dunedin 1-Mar-90
2 3 36 51 39 5 0 92.3 6 caught 1 v New Zealand Wellington 6-Mar-90
3 4 19 38 35 1 1 54.28 4 bowled 2 v England Leeds 18-Jul-90
4 5 31 31 26 3 0 119.23 6 bowled 2 v England Nottingham 20-Jul-90

In [3]:
df["Runs"] = df["Runs"].str.replace("*", "").str.replace("T?DNB", "0").astype("int64")

In [4]:
df["4s"] = df["4s"].str.replace("-", "0").astype("int64")

In [5]:
plt.figure(figsize=(10,5))
sns.distplot(df["Runs"], bins=range(df["Runs"].min(), df["Runs"].max(), 10))
plt.xlabel("Runs Scored by Tendulkar")
plt.show()



In [6]:
plt.figure(figsize=(10,5))
sns.distplot(df["4s"], bins=range(df["4s"].min(), df["4s"].max()+1))
plt.xlabel("4s hit by Tendulkar")
plt.show()