notebook.community

Edit and run



In [3]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd



In [12]:

    
names = pd.read_csv("../../data/names.txt", header=None)
names.columns = ["name"]
names["length"] = names["name"].apply(lambda name: len(name))

names.head()









    Out[12]:







  
    
      
      name
      length
    
  
  
    
      0
      barjraj
      7
    
    
      1
      ramdin verma
      12
    
    
      2
      sharat chandran
      15
    
    
      3
      birender mandal
      15
    
    
      4
      amit
      4



In [13]:

    
names.describe()









    Out[13]:







  
    
      
      length
    
  
  
    
      count
      30172.000000
    
    
      mean
      9.220900
    
    
      std
      4.850825
    
    
      min
      2.000000
    
    
      25%
      6.000000
    
    
      50%
      8.000000
    
    
      75%
      12.000000
    
    
      max
      54.000000



In [55]:

    
plt.hist(names["length"], bins=[i for i in range(30)], rwidth=0.9, facecolor='green', alpha=0.75)
plt.title("Name Length Distribution")
plt.xlabel("Length")
plt.ylabel("#Names")









    Out[55]:





<matplotlib.text.Text at 0x7f278970ac50>



In [ ]:

	name	length
0	barjraj	7
1	ramdin verma	12
2	sharat chandran	15
3	birender mandal	15
4	amit	4

	length
count	30172.000000
mean	9.220900
std	4.850825
min	2.000000
25%	6.000000
50%	8.000000
75%	12.000000
max	54.000000