In [6]:
%matplotlib inline
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
In [7]:
df = pd.read_csv("New_York_City_Leading_Causes_of_Death.csv")
In [62]:
df
Out[62]:
In [7]:
df.columns
Out[7]:
In [131]:
df['Count'].plot.box()
Out[131]:
In [72]:
df.groupby('Year')['Count'].sum().sort_values(ascending=False)
Out[72]:
In [86]:
df.groupby('Year')['Count'].sum().mean()
Out[86]:
In [94]:
fig, ax = plt.subplots(figsize=(9, 6))
df.groupby('Year')['Count'].sum().plot.barh()
mean = df.groupby('Year')['Count'].sum().mean()
ax.plot([mean, mean], [0, 12], c='blue', linestyle="-", linewidth=0.5)
ax.annotate(s="Mean of death registered, 190,629.6", xy=(120000,0), color='Blue')
Out[94]:
In [70]:
df.groupby('Sex')['Count'].sum()
Out[70]:
In [119]:
df.groupby(['Year', 'Sex'])['Count'].sum()
Out[119]:
In [127]:
fig, ax = plt.subplots(figsize=(9, 7))
df.groupby(['Year', 'Sex'])['Count'].sum().plot(color=['darkred', 'blue'],kind='bar', title="deaths by gender over time")
ax.set_ylabel('total deaths')
ax.set_ylabel('Sex')
ax.set_ylim((0,110000))
Out[127]:
In [157]:
df.groupby(['Cause of Death', 'Sex'])['Count'].sum().sort_values(ascending=False).head(6)
Out[157]:
In [160]:
df.groupby(['Cause of Death', 'Sex'])['Count'].sum().sort_values(ascending=False)
Out[160]:
In [128]:
#Disease of heart is the number 1
df.groupby('Cause of Death')['Count'].sum().sort_values(ascending=False).head(1)
Out[128]:
In [111]:
df.groupby('Cause of Death')['Count'].sum().sort_values(ascending=False).head(10)
Out[111]:
In [104]:
fig, ax = plt.subplots(figsize=(9, 7))
df.groupby('Cause of Death')['Count'].sum().sort_values(ascending=True).plot.barh()
ax.set_xlim((0,400000))
Out[104]:
In [76]:
df.groupby('Ethnicity')['Count'].sum().sort_values(ascending=False)
Out[76]:
In [79]:
df.groupby('Ethnicity')['Count'].sum().plot.barh(color=['Black', 'Black', 'Black', 'darkred'])
Out[79]:
In [126]:
fig, ax = plt.subplots(figsize=(9, 7))
only_whites = df[df['Ethnicity'] == 'NON-HISPANIC WHITE']
only_whites.groupby('Cause of Death')['Count'].sum().sort_values(ascending=True).plot.barh(color='blue')
Out[126]:
Same distribution with the exception of an increase in deaths produced by diabetes type 2 and HIV
In [129]:
fig, ax = plt.subplots(figsize=(9, 7))
only_blacks = df[df['Ethnicity'] == 'NON-HISPANIC BLACK']
only_blacks.groupby('Cause of Death')['Count'].sum().sort_values(ascending=True).plot.barh(color='black')
Out[129]:
Similar results for spanish population
In [132]:
fig, ax = plt.subplots(figsize=(9, 7))
only_hispanic = df[df['Ethnicity'] == 'HISPANIC']
only_hispanic.groupby('Cause of Death')['Count'].sum().sort_values(ascending=True).plot.barh(color='brown')
Out[132]:
In [140]:
only_asians = df[df['Ethnicity'] == 'ASIAN & PACIFIC ISLANDER']
In [138]:
fig, ax = plt.subplots(figsize=(9, 7))
only_asians.groupby('Cause of Death')['Count'].sum().sort_values(ascending=True).plot.barh(color='green')
Out[138]:
In [ ]:
In [ ]:
In [ ]: