In [3]:
import numpy as np
import pandas as pd
import itertools
from __future__ import division
from sklearn.tree import tree, DecisionTreeClassifier, export_graphviz
from sklearn import cluster
import geoplotlib as gpl
import time
%matplotlib inline
pd.set_option("display.max_columns", 500)
pd.set_option("max_rows", 1000)
In [4]:
filePath = 'datasets/NYPD_Motor_Vehicle_Collisions_weather4.csv'
collisions = pd.read_csv(filePath)
collisions = collisions[pd.notnull(collisions.LOCATION) &\
pd.notnull(collisions.Conditions)]
In [21]:
# def UTCtoActual(utcDate):
# from_zone = tz.gettz('UTC')
# to_zone = tz.gettz('America/New_York')
# utc = datetime.strptime(utcDate.DateUTC, '%Y-%m-%d %H:%M:%S')\
# .replace(tzinfo=from_zone)\
# .astimezone(to_zone)
# s = pd.Series([utc.year, utc.month, utc.day, utc.hour])
# s.columns = ['Year', 'Month', 'Day', 'Hour']
# return s
def location_condition_count(row):
start = time.time()
loc = row.LOCATION
counts = collisions[collisions.LOCATION == loc].Conditions.value_counts()
s = pd.Series(counts)
s.columns = collisions.Conditions.unique()
end = time.time()
print end - start
return s
In [23]:
dt = pd.DataFrame()
d = pd.DataFrame(0, index=np.arange(len(dt)), columns=collisions.Conditions.unique())
dt['LOCATION'] = ['(40.6810063, -73.812561)']#collisions.LOCATION.unique()
dt[collisions.Conditions.unique()] = dt.apply(location_condition_count, axis=1)
dt.head()
In [ ]:
dt.to_csv('datasets/weather_count.csv', sep=',')
In [25]:
c = collisions[collisions.LOCATION == '(40.6810063, -73.812561)'].Conditions.value_counts()
collisions.Conditions
se = pd.Series(c)
se
Out[25]:
In [16]:
Out[16]:
In [84]:
def location_condition_count(row):
# start = time.time()
# print row
loc = row.LOCATION
r = collisions[collisions.LOCATION == loc].Conditions.value_counts()
# end = time.time()
# print end-start
return pd.concat([row,r])
# return row.to_frame().join(r.to_frame())
# locations = collisions.LOCATION.unique()
locations = ['(40.6810063, -73.812561)']
d = pd.DataFrame(0, index=np.arange(len(locations)), columns=collisions.Conditions.unique())
d['LOCATION'] = locations
d = d.apply(location_condition_count, axis=1)
d
Out[84]:
In [43]:
Out[43]:
In [ ]: