In [8]:
import pandas as pd
import numpy as np
import matplotlib as plt

df = pd.read_csv("~/Downloads/train.csv")
df.describe()


Out[8]:
PassengerId Survived Pclass Age SibSp Parch Fare
count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000
mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208
std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429
min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000
25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400
50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200
75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.000000
max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.329200

In [13]:
df['Age'].median()


Out[13]:
28.0

In [14]:
df['Sex'].unique()


Out[14]:
array(['male', 'female'], dtype=object)

In [17]:
fig = plt.pyplot.figure()
ax = fig.add_subplot(111)
ax.hist(df['Age'], bins=10, range = (df['Age'].min(), df['Age'].max()))
plt.pyplot.title('Age Distribution')
plt.pyplot.xlabel('Age')
plt.pyplot.ylabel('Passenger Count')
plt.pyplot.show()



In [ ]:
source = "http://www.analyticsvidhya.com/blog/2014/08/baby-steps-python-performing-exploratory-analysis-python/"