In [1]:
import pandas
In [2]:
df = pandas.read_csv("./Cluster-Crime-Janeiro.csv")
In [21]:
clusters = df[['CLUSTER']]
In [23]:
new_df = df.groupby('CLUSTER')
In [46]:
val = new_df['NATUREZA DA OCORRÊNCIA']
In [4]:
novo = df[df['CLUSTER'] == 1] #Fazer For loop para percorrer todas regiões
In [6]:
crime_types = novo[['NATUREZA DA OCORRÊNCIA']]
In [7]:
crime_type_total = crime_types.groupby('NATUREZA DA OCORRÊNCIA').size()
In [9]:
crime_type_counts = novo[['NATUREZA DA OCORRÊNCIA']].groupby('NATUREZA DA OCORRÊNCIA').sum()
In [10]:
crime_type_counts['TOTAL'] = crime_type_total
In [11]:
all_crime_types = crime_type_counts.sort_values(by='TOTAL', ascending=False)
In [12]:
all_crime_types
Out[12]:
In [50]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as ticker
In [55]:
all_crime_types.plot(kind='barh', figsize=(14,10), color='#cc0000')
plt.title('Crimes by Type (Cluster 01 - Jan 2017)')
plt.xlabel('Number of Crime reports')
plt.ylabel('Crime Type')
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))
plt.show()
In [ ]: