In [3]:
import numpy as np
import pandas as pd
import itertools
from __future__ import division
from sklearn.tree import tree, DecisionTreeClassifier, export_graphviz
from sklearn import cluster
import geoplotlib as gpl
import time
%matplotlib inline
pd.set_option("display.max_columns", 500)
pd.set_option("max_rows", 1000)

In [4]:
filePath = 'datasets/NYPD_Motor_Vehicle_Collisions_weather4.csv'
collisions = pd.read_csv(filePath)
collisions = collisions[pd.notnull(collisions.LOCATION) &\
                        pd.notnull(collisions.Conditions)]


/Users/masve/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (30,31) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [21]:
# def UTCtoActual(utcDate):
#     from_zone = tz.gettz('UTC')
#     to_zone = tz.gettz('America/New_York')
    
#     utc = datetime.strptime(utcDate.DateUTC, '%Y-%m-%d %H:%M:%S')\
#                   .replace(tzinfo=from_zone)\
#                   .astimezone(to_zone)
#     s = pd.Series([utc.year, utc.month, utc.day, utc.hour])
#     s.columns = ['Year', 'Month', 'Day', 'Hour']
#     return s

def location_condition_count(row):
    start = time.time()
    loc = row.LOCATION
    
    counts = collisions[collisions.LOCATION == loc].Conditions.value_counts()
    s = pd.Series(counts)
    
    s.columns = collisions.Conditions.unique()    
    end = time.time()
    
    print end - start
    return s

In [23]:
dt = pd.DataFrame()
d = pd.DataFrame(0, index=np.arange(len(dt)), columns=collisions.Conditions.unique())
dt['LOCATION'] = ['(40.6810063, -73.812561)']#collisions.LOCATION.unique()
dt[collisions.Conditions.unique()] = dt.apply(location_condition_count, axis=1)
dt.head()


0.0955698490143
0.0827968120575
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-d86f883989ea> in <module>()
      1 dt = pd.DataFrame()
      2 dt['LOCATION'] = ['(40.6810063, -73.812561)']#collisions.LOCATION.unique()
----> 3 dt[collisions.Conditions.unique()] = dt.apply(location_condition_count, axis=1)
      4 dt.head()

/Users/masve/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __setitem__(self, key, value)
   2292 
   2293         if isinstance(key, (Series, np.ndarray, list, Index)):
-> 2294             self._setitem_array(key, value)
   2295         elif isinstance(key, DataFrame):
   2296             self._setitem_frame(key, value)

/Users/masve/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _setitem_array(self, key, value)
   2316             if isinstance(value, DataFrame):
   2317                 if len(value.columns) != len(key):
-> 2318                     raise ValueError('Columns must be same length as key')
   2319                 for k1, k2 in zip(key, value.columns):
   2320                     self[k1] = value[k2]

ValueError: Columns must be same length as key

In [ ]:
dt.to_csv('datasets/weather_count.csv', sep=',')

In [25]:
c = collisions[collisions.LOCATION == '(40.6810063, -73.812561)'].Conditions.value_counts()
collisions.Conditions

se = pd.Series(c)
se


Out[25]:
Mostly Cloudy       4
Overcast            3
Scattered Clouds    2
Light Snow          1
Rain                1
Light Rain          1
Partly Cloudy       1
Name: Conditions, dtype: int64

In [16]:



Out[16]:
Mostly Cloudy                    216491
Scattered Clouds                 130504
Overcast                          91121
Partly Cloudy                     72119
Light Rain                        38607
Clear                             25901
Light Snow                        11922
Rain                               6664
Light Drizzle                      5660
Fog                                2909
Haze                               2828
Heavy Rain                         2012
Light Thunderstorms and Rain       1319
Snow                               1129
Light Freezing Rain                 768
Thunderstorm                        724
Heavy Thunderstorms and Rain        526
Light Ice Pellets                   336
Mist                                248
Thunderstorms and Rain              215
Unknown                             172
Heavy Snow                          141
Light Freezing Drizzle              121
Shallow Fog                          84
Patches of Fog                       59
Squalls                              42
Ice Pellets                          38
Light Rain Showers                   36
Thunderstorms with Small Hail        26
Blowing Snow                         24
Name: Conditions, dtype: int64

In [84]:
def location_condition_count(row):
#     start = time.time()
#     print row
    loc = row.LOCATION
    r = collisions[collisions.LOCATION == loc].Conditions.value_counts()
#     end = time.time()
#     print end-start
    return pd.concat([row,r])
#     return row.to_frame().join(r.to_frame())

# locations = collisions.LOCATION.unique()
locations = ['(40.6810063, -73.812561)']

d = pd.DataFrame(0, index=np.arange(len(locations)), columns=collisions.Conditions.unique())
d['LOCATION'] = locations
d = d.apply(location_condition_count, axis=1)
d


Out[84]:
Mostly Cloudy Overcast Light Rain Heavy Rain Scattered Clouds Light Drizzle Light Snow Rain Light Ice Pellets Fog Shallow Fog Patches of Fog Partly Cloudy Haze Clear Thunderstorm Light Thunderstorms and Rain Heavy Thunderstorms and Rain Thunderstorms and Rain Squalls Mist Snow Light Freezing Rain Light Freezing Drizzle Unknown Light Rain Showers Ice Pellets Blowing Snow Heavy Snow Thunderstorms with Small Hail LOCATION Mostly Cloudy Overcast Scattered Clouds Light Snow Rain Light Rain Partly Cloudy
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 (40.6810063, -73.812561) 4 3 2 1 1 1 1

In [43]:



Out[43]:
Index([u'Mostly Cloudy', u'Overcast', u'Light Rain', u'Heavy Rain',
       u'Scattered Clouds', u'Light Drizzle', u'Light Snow', u'Rain',
       u'Light Ice Pellets', u'Fog', u'Shallow Fog', u'Patches of Fog',
       u'Partly Cloudy', u'Haze', u'Clear', u'Thunderstorm',
       u'Light Thunderstorms and Rain', u'Heavy Thunderstorms and Rain',
       u'Thunderstorms and Rain', u'Squalls', u'Mist', u'Snow',
       u'Light Freezing Rain', u'Light Freezing Drizzle', u'Unknown',
       u'Light Rain Showers', u'Ice Pellets', u'Blowing Snow', u'Heavy Snow',
       u'Thunderstorms with Small Hail', u'LOCATION'],
      dtype='object')

In [ ]: