notebook.community

Edit and run



In [3]:

    
import numpy as np
import pandas as pd
import itertools
from __future__ import division
from sklearn.tree import tree, DecisionTreeClassifier, export_graphviz
from sklearn import cluster
import geoplotlib as gpl
import time
%matplotlib inline
pd.set_option("display.max_columns", 500)
pd.set_option("max_rows", 1000)



In [4]:

    
filePath = 'datasets/NYPD_Motor_Vehicle_Collisions_weather4.csv'
collisions = pd.read_csv(filePath)
collisions = collisions[pd.notnull(collisions.LOCATION) &\
                        pd.notnull(collisions.Conditions)]









    



/Users/masve/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (30,31) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [21]:

    
# def UTCtoActual(utcDate):
#     from_zone = tz.gettz('UTC')
#     to_zone = tz.gettz('America/New_York')
    
#     utc = datetime.strptime(utcDate.DateUTC, '%Y-%m-%d %H:%M:%S')\
#                   .replace(tzinfo=from_zone)\
#                   .astimezone(to_zone)
#     s = pd.Series([utc.year, utc.month, utc.day, utc.hour])
#     s.columns = ['Year', 'Month', 'Day', 'Hour']
#     return s

def location_condition_count(row):
    start = time.time()
    loc = row.LOCATION
    
    counts = collisions[collisions.LOCATION == loc].Conditions.value_counts()
    s = pd.Series(counts)
    
    s.columns = collisions.Conditions.unique()    
    end = time.time()
    
    print end - start
    return s



In [23]:

    
dt = pd.DataFrame()
d = pd.DataFrame(0, index=np.arange(len(dt)), columns=collisions.Conditions.unique())
dt['LOCATION'] = ['(40.6810063, -73.812561)']#collisions.LOCATION.unique()
dt[collisions.Conditions.unique()] = dt.apply(location_condition_count, axis=1)
dt.head()









    



0.0955698490143
0.0827968120575






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-d86f883989ea> in <module>()
      1 dt = pd.DataFrame()
      2 dt['LOCATION'] = ['(40.6810063, -73.812561)']#collisions.LOCATION.unique()
----> 3 dt[collisions.Conditions.unique()] = dt.apply(location_condition_count, axis=1)
      4 dt.head()

/Users/masve/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __setitem__(self, key, value)
   2292 
   2293         if isinstance(key, (Series, np.ndarray, list, Index)):
-> 2294             self._setitem_array(key, value)
   2295         elif isinstance(key, DataFrame):
   2296             self._setitem_frame(key, value)

/Users/masve/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _setitem_array(self, key, value)
   2316             if isinstance(value, DataFrame):
   2317                 if len(value.columns) != len(key):
-> 2318                     raise ValueError('Columns must be same length as key')
   2319                 for k1, k2 in zip(key, value.columns):
   2320                     self[k1] = value[k2]

ValueError: Columns must be same length as key



In [ ]:

    
dt.to_csv('datasets/weather_count.csv', sep=',')



In [25]:

    
c = collisions[collisions.LOCATION == '(40.6810063, -73.812561)'].Conditions.value_counts()
collisions.Conditions

se = pd.Series(c)
se









    Out[25]:





Mostly Cloudy       4
Overcast            3
Scattered Clouds    2
Light Snow          1
Rain                1
Light Rain          1
Partly Cloudy       1
Name: Conditions, dtype: int64



In [16]:









    Out[16]:





Mostly Cloudy                    216491
Scattered Clouds                 130504
Overcast                          91121
Partly Cloudy                     72119
Light Rain                        38607
Clear                             25901
Light Snow                        11922
Rain                               6664
Light Drizzle                      5660
Fog                                2909
Haze                               2828
Heavy Rain                         2012
Light Thunderstorms and Rain       1319
Snow                               1129
Light Freezing Rain                 768
Thunderstorm                        724
Heavy Thunderstorms and Rain        526
Light Ice Pellets                   336
Mist                                248
Thunderstorms and Rain              215
Unknown                             172
Heavy Snow                          141
Light Freezing Drizzle              121
Shallow Fog                          84
Patches of Fog                       59
Squalls                              42
Ice Pellets                          38
Light Rain Showers                   36
Thunderstorms with Small Hail        26
Blowing Snow                         24
Name: Conditions, dtype: int64



In [84]:

    
def location_condition_count(row):
#     start = time.time()
#     print row
    loc = row.LOCATION
    r = collisions[collisions.LOCATION == loc].Conditions.value_counts()
#     end = time.time()
#     print end-start
    return pd.concat([row,r])
#     return row.to_frame().join(r.to_frame())

# locations = collisions.LOCATION.unique()
locations = ['(40.6810063, -73.812561)']

d = pd.DataFrame(0, index=np.arange(len(locations)), columns=collisions.Conditions.unique())
d['LOCATION'] = locations
d = d.apply(location_condition_count, axis=1)
d









    Out[84]:






  
    
      
      Mostly Cloudy
      Overcast
      Light Rain
      Heavy Rain
      Scattered Clouds
      Light Drizzle
      Light Snow
      Rain
      Light Ice Pellets
      Fog
      Shallow Fog
      Patches of Fog
      Partly Cloudy
      Haze
      Clear
      Thunderstorm
      Light Thunderstorms and Rain
      Heavy Thunderstorms and Rain
      Thunderstorms and Rain
      Squalls
      Mist
      Snow
      Light Freezing Rain
      Light Freezing Drizzle
      Unknown
      Light Rain Showers
      Ice Pellets
      Blowing Snow
      Heavy Snow
      Thunderstorms with Small Hail
      LOCATION
      Mostly Cloudy
      Overcast
      Scattered Clouds
      Light Snow
      Rain
      Light Rain
      Partly Cloudy
    
  
  
    
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      (40.6810063, -73.812561)
      4
      3
      2
      1
      1
      1
      1



In [43]:









    Out[43]:





Index([u'Mostly Cloudy', u'Overcast', u'Light Rain', u'Heavy Rain',
       u'Scattered Clouds', u'Light Drizzle', u'Light Snow', u'Rain',
       u'Light Ice Pellets', u'Fog', u'Shallow Fog', u'Patches of Fog',
       u'Partly Cloudy', u'Haze', u'Clear', u'Thunderstorm',
       u'Light Thunderstorms and Rain', u'Heavy Thunderstorms and Rain',
       u'Thunderstorms and Rain', u'Squalls', u'Mist', u'Snow',
       u'Light Freezing Rain', u'Light Freezing Drizzle', u'Unknown',
       u'Light Rain Showers', u'Ice Pellets', u'Blowing Snow', u'Heavy Snow',
       u'Thunderstorms with Small Hail', u'LOCATION'],
      dtype='object')



In [ ]: