In [ ]:
from pandas import DataFrame
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [ ]:
data = pd.read_csv("full_data.csv")
In [ ]:
frame = DataFrame(data)
frame
In [ ]:
frame.groupby('year').size()
In [ ]:
intent_year = frame.groupby(['intent', 'year']).size()
intent_year.unstack('year').plot(kind='bar')
plt.show()
In [ ]:
intent_avYear = frame.groupby(['intent','year']).size()
intent_avYear.unstack('intent').mean().plot(kind='bar')
plt.show()
In [ ]:
intent_MvsF = frame.groupby(['intent', 'sex']).size()
intent_MvsF = intent_MvsF.unstack('sex')
intent_MvsF
In [ ]:
intent_MvsF.plot(kind='barh')
plt.show()
In [ ]:
intent_MvsF_normed = intent_MvsF.div(intent_MvsF.sum(1), axis=0)
intent_MvsF_normed.plot(kind='barh', stacked=True)
plt.show()
In [ ]:
intent_race=frame.groupby(['intent', 'race']).size()
intent_race.unstack('race')
In [ ]:
intent_race.unstack('race').plot(kind='bar')
plt.show()
In [ ]:
homicides_idx = frame['intent'] == "Homicide"
homicides = frame[homicides_idx]
young_idx = (15 < frame['age']) & (frame['age'] < 34)
h_y_f = frame[young_idx & homicides_idx]
print("Pourcentage of young men in homicides : ", len(h_y_f)/len(homicides)*100)
In [ ]:
t = frame.groupby(['intent', 'age', 'year']).size()
t_mean = t.unstack(['intent', 'age']).mean()
t_mean_homicide = t_mean.loc['Homicide']
num_y_h = t_mean_homicide[15:34].sum()
num_h = t_mean_homicide.sum()
print("Pourcentage of young men in homicides (mean) : ", num_y_h/num_h*100)