In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

In [12]:
names = pd.read_csv("../../data/names.txt", header=None)
names.columns = ["name"]
names["length"] = names["name"].apply(lambda name: len(name))

names.head()


Out[12]:
name length
0 barjraj 7
1 ramdin verma 12
2 sharat chandran 15
3 birender mandal 15
4 amit 4

In [13]:
names.describe()


Out[13]:
length
count 30172.000000
mean 9.220900
std 4.850825
min 2.000000
25% 6.000000
50% 8.000000
75% 12.000000
max 54.000000

In [55]:
plt.hist(names["length"], bins=[i for i in range(30)], rwidth=0.9, facecolor='green', alpha=0.75)
plt.title("Name Length Distribution")
plt.xlabel("Length")
plt.ylabel("#Names")


Out[55]:
<matplotlib.text.Text at 0x7f278970ac50>

In [ ]: