In [1]:
import csv
import numpy as np
nfile_ref = open('train.csv', 'r')
csv_file = csv.reader(nfile_ref) # Load the csv file.
header = csv_file.next() # Skip the first line as it is a header.
data = [] # Create a variable to hold the data.
for row in csv_file: # Skip through each row in the csv file,
data.append(row[0:]) # adding each row to the data variable.
data = np.array(data) # Then convert from a list to a Numpy array.
nfile_ref.close()
In [2]:
# Import matplotlib and allow it to plot in the notebook.
import matplotlib.pyplot as plt
%matplotlib inline
# Import Numpy
import numpy as np
In [5]:
bottom_locs = np.array([1., 2.])
In [4]:
width = 0.3
In [7]:
men_only_stats = data[0::, 4] != "female" # This finds where all the men are in the data set.
men_onboard = data[men_only_stats, 1].astype(np.float) # 1st column of data (survived= 0,1), but only men.
men = (np.size(men_onboard)-np.sum(men_onboard), np.sum(men_onboard))
In [6]:
women_only_stats = data[0::, 4] == "female" # This finds where all the women are in the data set.
women_onboard = data[women_only_stats, 1].astype(np.float) # 1st column of data (survived= 0,1), but only women.
women = (np.size(women_onboard)-np.sum(women_onboard), np.sum(women_onboard))
In [8]:
# Add the values to the plot.
plt.bar(bottom_locs, men, label='Male', width=width)
plt.bar(bottom_locs, women, color='m', label='Female', width=width, bottom=men)
# Decorate the plot.
plt.ylabel('Count')
plt.title('Who Survived the Titanic?')
plt.legend(loc='best')
plt.xticks(bottom_locs+width/2., ('Died', 'Survived'))
Out[8]: