In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
df = pd.read_csv('./data/crimedataset.csv',)
print (df.shape)
df.head(10)
Out[2]:
In [3]:
df['OCCURRED ON'].fillna(df['OCCURRED TO'], inplace=True)
In [4]:
temp=pd.DatetimeIndex(df['OCCURRED ON'])
In [5]:
df['date'] = temp.date
df['hour'] = temp.time
df['year'] = (temp.year).astype(int)
df['month'] = (temp.month).astype(int)
df['day'] = (temp.day).astype(int)
df['weekday'] = temp.weekday_name
df.head()
Out[5]:
In [6]:
cols = ['OCCURRED ON','date','hour','year','month','day','weekday','ZIP','UCR CRIME CATEGORY','PREMISE TYPE']
df = df[cols]
df.head(10)
df.columns = ['datetime','date','hour','year','month','day','weekday','zip','crime','place']
In [7]:
df.to_csv('./data/cleaneddataset.csv', encoding='utf-8',index=False)
In [8]:
df['year'].unique()
Out[8]:
In [ ]: