In [1]:
#Notebook for converting 311 data into format of [tuple, list of 311 reports that occured at that tuple]

#DATA_PATH = "/home/datascience/project/data/Case_Data_from_San_Francisco_311__SF311_.csv" # Make this the /path/to/the/data
DATA_PATH = "/home/casual1water/cs194-16-data_manatees/data/data_311.csv" # path of amazon machine

import pandas as pd

data_311 = pd.read_csv(DATA_PATH, na_values=['-'])
data_311 = data_311.where((pd.notnull(data_311)), None)
data_311 = data_311[['Opened', 'Category', 'Request Type', 'Request Details', 'Supervisor District', 'Point', 'Source']]

data_311.head()


/opt/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1139: DtypeWarning: Columns (14) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)
Out[1]:
Opened Category Request Type Request Details Supervisor District Point Source
0 11/29/2014 12:57:11 AM SFHA Requests SFHA Priority - Emergency Carpenters 6 (37.782620592996, -122.416286644263) Voice In
1 11/29/2014 12:32:49 AM Rec and Park Requests Park - Neighborhood_Services_Area Irrigation 1 (37.771787999, -122.4712056) Open311
2 11/29/2014 12:22:50 AM SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In
3 11/29/2014 12:21:33 AM Streetlights Streetlight - Light_Burnt_Out Light_Burnt_Out on TrafficSignal Pole 6 (37.7899262286022, -122.394276396696) Web Self Service
4 11/29/2014 12:16:54 AM SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In

In [2]:
import numpy as np

#drop all the blank rows at the end of the 311 dataset
data_311 = data_311.dropna(thresh=5)
data_311.tail()


Out[2]:
Opened Category Request Type Request Details Supervisor District Point Source
1098661 07/01/2008 03:03:00 AM Street and Sidewalk Cleaning Sidewalk_Cleaning Garbage 9 (37.7406417656081, -122.423090064246) Voice In
1098662 07/01/2008 02:07:00 AM Abandoned Vehicle Abandoned Vehicle - Car4door DARK BLUE - FORD - CROWN VICTORIA - 3WBG858 5 (37.781800341, -122.428537476) Voice In
1098663 07/01/2008 01:56:00 AM Tree Maintenance Trees - Damaged_Tree Hanging_limb 5 (37.7643657242198, -122.458814894064) Voice In
1098664 07/01/2008 12:26:00 AM Street and Sidewalk Cleaning Illegal_Dumping Mattress 4 (37.7617471571397, -122.475924197088) Voice In
1098665 07/01/2008 12:13:00 AM Street and Sidewalk Cleaning Sidewalk_Cleaning Sidewalk_Cleaning 3 (37.800869062548, -122.406192162738) Voice In

In [3]:
#convert to datetime
data_311['DateTime'] = pd.to_datetime(data_311['Opened'])
data_311 = data_311.drop('Opened', 1)
data_311.head()


Out[3]:
Category Request Type Request Details Supervisor District Point Source DateTime
0 SFHA Requests SFHA Priority - Emergency Carpenters 6 (37.782620592996, -122.416286644263) Voice In 2014-11-29 00:57:11
1 Rec and Park Requests Park - Neighborhood_Services_Area Irrigation 1 (37.771787999, -122.4712056) Open311 2014-11-29 00:32:49
2 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:22:50
3 Streetlights Streetlight - Light_Burnt_Out Light_Burnt_Out on TrafficSignal Pole 6 (37.7899262286022, -122.394276396696) Web Self Service 2014-11-29 00:21:33
4 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:16:54

In [5]:
#testing

minidata = data_311.head()

minidata['DateTime'] = pd.to_datetime(minidata['Opened'])
minidata = minidata.drop('Opened', 1)
minidata


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-5-1761590aaa91> in <module>()
      3 minidata = data_311.head()
      4 
----> 5 minidata['DateTime'] = pd.to_datetime(minidata['Opened'])
      6 minidata = minidata.drop('Opened', 1)
      7 minidata

/opt/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1676             return self._getitem_multilevel(key)
   1677         else:
-> 1678             return self._getitem_column(key)
   1679 
   1680     def _getitem_column(self, key):

/opt/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   1683         # get column
   1684         if self.columns.is_unique:
-> 1685             return self._get_item_cache(key)
   1686 
   1687         # duplicate columns & possible reduce dimensionaility

/opt/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1050         res = cache.get(item)
   1051         if res is None:
-> 1052             values = self._data.get(item)
   1053             res = self._box_item_values(item, values)
   1054             cache[item] = res

/opt/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   2563 
   2564             if not isnull(item):
-> 2565                 loc = self.items.get_loc(item)
   2566             else:
   2567                 indexer = np.arange(len(self.items))[isnull(self.items)]

/opt/anaconda/lib/python2.7/site-packages/pandas/core/index.pyc in get_loc(self, key)
   1179         loc : int if unique index, possibly slice or mask if not
   1180         """
-> 1181         return self._engine.get_loc(_values_from_object(key))
   1182 
   1183     def get_value(self, series, key):

/opt/anaconda/lib/python2.7/site-packages/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3656)()

/opt/anaconda/lib/python2.7/site-packages/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3534)()

/opt/anaconda/lib/python2.7/site-packages/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:11911)()

/opt/anaconda/lib/python2.7/site-packages/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:11864)()

KeyError: 'Opened'

In [ ]:
#just some code to test something

mintime = data_311['DateTime'].min()
maxtime = data_311['DateTime'].max()

np.linspace(0, (maxtime-mintime).total_seconds(), 100)

In [4]:
data_path = "data/Case_Data_from_San_Francisco_311__SF311_.csv"

def get_loc_time_counts(csv_path, time_interval, location_interval, x_interval, y_interval, n_bins):
    """
    Gets the counts for each category of time divided by pdDistrict and time.
    
    @params csv_path string to csv_file
    @params time string granularity of time split
        "d" : split by day/month/year
        "m" : split by month/year
        "y" : split by year
    @params location_interval float size of each inteval in location grid
    @params min_x, min_y x,y coordinate of where to start grid
        Note: this argument is required because the max/min of 311 may not 
            be the same as 911
    @return pd.Dataframe [datetime, location, category, count]
    """
    data = pd.read_csv(csv_path, low_memory=False)
    
    # map each point to a location interval
    x_min = x_interval[0]
    x_max = x_interval[1]
    x_bins = create_bins(min_x, max_x, n_bins)
    
    y_min = y_interval[0]
    y_max = y_interval[1]
    y_bins = create_bins(min_y, max_y, n_bins)
    
    xy_bins_col = data.apply(lambda row: get_xy(row['Point']))
    
    
    # map each point to a datetime interval
    
    
    for district_name, district_data in data.groupby("PdDistrict"):
        
        for date_time, date_time_data in district_data.groupby("DateTime"):
            crime_counts = date_time_data.groupby("Category").size()

            
def get_xy(point):
    """
    Gets the x_point from an input string point.
    
    @params point string '(x,y)' coordinate 
    @return (x,y) float
    """ 
    x_str, y_str = point.split(',')
    return float(x_str[1:]), float(y_str[:-1])


def create_bins(min_pt, max_pt, n):
    """
    Creates n equally spaced bins between min_pt and max_pt
    
    @params min_pt float min value
    @params max_pt float max value
    @params n number of bins to create
    @return np.array bin values
    """
    return np.linspace(min_pt, max_pt, n)

def get_bin(bins, val):
    """
    Determines which bin the input val falls into. Bins are represented 
    by an increasing np.array. Val is assigned to the highest bin whose 
    value is less than val. (e.g. for bins [0.0, 0.5, 1.0], 0.25 would 
    be assigned to bin 0.0, 0.75 would be assigned to 0.5)
    
    @params bins np.array of increasing values
    @params val float to bin
    @return bin that val belongs to
    """
    for bin in bins[::-1]: # iterate through bins in reverse order
        if val >= bin:
            return bin

In [ ]:
#testing something on a mini data set

mintime = minidata['DateTime'].min()
maxtime = minidata['DateTime'].max()
bins = create_bins(0, (maxtime-mintime).total_seconds(), 1000)

minidata['TimeBin'] = minidata.apply(lambda row: get_bin(bins, (row['DateTime']-mintime).total_seconds()), axis = 1)

minidata

In [5]:
mintime = data_311['DateTime'].min()
maxtime = data_311['DateTime'].max()

print mintime
print maxtime


2008-07-01 00:13:00
2014-11-29 00:57:11

In [ ]:


In [ ]:


In [5]:
#make time bin for 311 dataset

mintime = pd.to_datetime('2008-07-01 00:13:00')
maxtime = pd.to_datetime('2014-11-29 00:57:11')

#subtracting 2 datetimes creates a tdelta object
bins = create_bins(0, (maxtime-mintime).total_seconds(), 1000)

data_311['TimeBin'] = data_311.apply(lambda row: get_bin(bins, (row['DateTime']-mintime).total_seconds()), axis = 1)

data_311.head()


Out[5]:
Category Request Type Request Details Supervisor District Point Source DateTime TimeBin
0 SFHA Requests SFHA Priority - Emergency Carpenters 6 (37.782620592996, -122.416286644263) Voice In 2014-11-29 00:57:11 2.023515e+08
1 Rec and Park Requests Park - Neighborhood_Services_Area Irrigation 1 (37.771787999, -122.4712056) Open311 2014-11-29 00:32:49 2.021489e+08
2 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:22:50 2.021489e+08
3 Streetlights Streetlight - Light_Burnt_Out Light_Burnt_Out on TrafficSignal Pole 6 (37.7899262286022, -122.394276396696) Web Self Service 2014-11-29 00:21:33 2.021489e+08
4 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:16:54 2.021489e+08

In [ ]:
#testing

minidata = data_311.head()
minidata

In [ ]:
#testing

minidata['X'] = minidata.apply(lambda row: float(row['Point'].strip(')(').split(',')[0]), axis=1)
minidata['Y'] = minidata.apply(lambda row: float(row['Point'].strip(')(').split(',')[1]), axis=1)

minidata

In [8]:
#separate the point tuple into x and y columns

data_311['X'] = data_311.apply(lambda row: float(row['Point'].strip(')(').split(',')[1]), axis=1)
data_311['Y'] = data_311.apply(lambda row: float(row['Point'].strip(')(').split(',')[0]), axis=1)

data_311.head()


Out[8]:
Category Request Type Request Details Supervisor District Point Source DateTime TimeBin X Y
0 SFHA Requests SFHA Priority - Emergency Carpenters 6 (37.782620592996, -122.416286644263) Voice In 2014-11-29 00:57:11 2.023515e+08 37.782621 -122.416287
1 Rec and Park Requests Park - Neighborhood_Services_Area Irrigation 1 (37.771787999, -122.4712056) Open311 2014-11-29 00:32:49 2.021489e+08 37.771788 -122.471206
2 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:22:50 2.021489e+08 37.777585 -122.440874
3 Streetlights Streetlight - Light_Burnt_Out Light_Burnt_Out on TrafficSignal Pole 6 (37.7899262286022, -122.394276396696) Web Self Service 2014-11-29 00:21:33 2.021489e+08 37.789926 -122.394276
4 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:16:54 2.021489e+08 37.777585 -122.440874

In [ ]:
#testing

max_x_loc = minidata['X'].max()
min_x_loc = minidata['X'].min()
print "min X location: %f" % min_x_loc
print "max X location: %f" % max_x_loc

max_y_loc = minidata['Y'].max()
min_y_loc = minidata['Y'].min()
print "min Y location: %f" % min_y_loc
print "max Y location: %f" % max_y_loc

In [ ]:
#testing

max_x_loc = minidata['X'].max()
max_x_loc

In [9]:
max_x_loc = 37.881603
min_x_loc = 37.624394
print "min X location: %f" % min_x_loc
print "max X location: %f" % max_x_loc

max_y_loc = -119.850760
min_y_loc = -122.514436
print "min Y location: %f" % min_y_loc
print "max Y location: %f" % max_y_loc


min X location: 37.624394
max X location: 37.881602
min Y location: -122.514435
max Y location: -119.850760

In [ ]:
#testing

xbins = create_bins(min_x_loc, max_x_loc, 10)
ybins = create_bins(min_y_loc, max_y_loc, 10)

minidata['XBin'] = minidata.apply(lambda row: get_bin(xbins, row['X']), axis = 1)
minidata['YBin'] = minidata.apply(lambda row: get_bin(ybins, row['Y']), axis = 1)
minidata

In [ ]:


In [10]:
#create xbins and ybins

xbins = create_bins(min_x_loc, max_x_loc, 10)
ybins = create_bins(min_y_loc, max_y_loc, 10)

data_311['XBin'] = data_311.apply(lambda row: get_bin(xbins, row['X']), axis = 1)
data_311['YBin'] = data_311.apply(lambda row: get_bin(ybins, row['Y']), axis = 1)
data_311


Out[10]:
Category Request Type Request Details Supervisor District Point Source DateTime TimeBin X Y XBin YBin
0 SFHA Requests SFHA Priority - Emergency Carpenters 6 (37.782620592996, -122.416286644263) Voice In 2014-11-29 00:57:11 2.023515e+08 37.782621 -122.416287 37.767288 -122.514435
1 Rec and Park Requests Park - Neighborhood_Services_Area Irrigation 1 (37.771787999, -122.4712056) Open311 2014-11-29 00:32:49 2.021489e+08 37.771788 -122.471206 37.767288 -122.514435
2 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:22:50 2.021489e+08 37.777585 -122.440874 37.767288 -122.514435
3 Streetlights Streetlight - Light_Burnt_Out Light_Burnt_Out on TrafficSignal Pole 6 (37.7899262286022, -122.394276396696) Web Self Service 2014-11-29 00:21:33 2.021489e+08 37.789926 -122.394276 37.767288 -122.514435
4 SFHA Requests SFHA Priority - Emergency Carpenters 5 (37.777584951111, -122.440874464132) Voice In 2014-11-29 00:16:54 2.021489e+08 37.777585 -122.440874 37.767288 -122.514435
5 SFHA Requests SFHA Priority - Emergency Electricians 6 (37.784587848, -122.41724) Voice In 2014-11-28 23:57:34 2.021489e+08 37.784588 -122.417240 37.767288 -122.514435
6 Graffiti Private Property Not_Offensive Graffiti on Private Property Sidewalk_in_front_of_property - Not_Offensive 8 (37.767359573605, -122.428568985778) Web Self Service 2014-11-28 22:49:44 2.021489e+08 37.767360 -122.428569 37.767288 -122.514435
7 Graffiti Private Property Not_Offensive Graffiti on Private Property Sidewalk_in_front_of_property - Not_Offensive 8 (37.7675043889879, -122.42894869198) Web Self Service 2014-11-28 22:47:56 2.021489e+08 37.767504 -122.428949 37.767288 -122.514435
8 Graffiti Public Property Not_Offensive Graffiti on Public Property Pole - Not_Offensive 8 (37.768009801116, -122.427950926119) Web Self Service 2014-11-28 22:43:35 2.021489e+08 37.768010 -122.427951 37.767288 -122.514435
9 General Requests dph - environmental_health - request_for_service dph - environmental_health - request_for_service 8 (37.770142611874, -122.423748632203) Voice In 2014-11-28 22:30:18 2.021489e+08 37.770143 -122.423749 37.767288 -122.514435
10 Litter Receptacles Damaged_City_Can None 6 (37.785744132, -122.40583073) Open311 2014-11-28 22:06:50 2.021489e+08 37.785744 -122.405831 37.767288 -122.514435
11 Graffiti Public Property Not_Offensive Graffiti on Public Property Transit_Shelter_Platform - Not_Offensive 5 (37.7739726209569, -122.43082484861) Open311 2014-11-28 21:35:29 2.021489e+08 37.773973 -122.430825 37.767288 -122.514435
12 Graffiti Public Property Offensive Graffiti on Public Property Transit_Shelter_Platform - Offensive 8 (37.7694561645459, -122.429128105966) Open311 2014-11-28 21:33:01 2.021489e+08 37.769456 -122.429128 37.767288 -122.514435
13 Color Curb Red Color Curb Request for Red Color Curb Request for 9 (37.746296687448, -122.414904721461) Web Self Service 2014-11-28 21:28:59 2.021489e+08 37.746297 -122.414905 37.738709 -122.514435
14 Street and Sidewalk Cleaning Illegal_Dumping Garbage 1 (37.785033028574, -122.464696786939) Open311 2014-11-28 21:22:59 2.021489e+08 37.785033 -122.464697 37.767288 -122.514435
15 Street and Sidewalk Cleaning Illegal_Dumping Garbage 10 (37.736295503264, -122.388638995106) Voice In 2014-11-28 21:19:24 2.021489e+08 37.736296 -122.388639 37.710130 -122.514435
16 Graffiti Public Property Offensive Graffiti on Public Property News_rack - Offensive 5 (37.766293556101, -122.453143373354) Open311 2014-11-28 21:03:45 2.021489e+08 37.766294 -122.453143 37.738709 -122.514435
17 Illegal Postings Illegal Postings - Affixed_Improperly None 5 (37.7643187729653, -122.45988770581) Open311 2014-11-28 21:03:00 2.021489e+08 37.764319 -122.459888 37.738709 -122.514435
18 Street and Sidewalk Cleaning Sidewalk_Cleaning Paper_spill 1 (37.7783114484598, -122.481380742089) Voice In 2014-11-28 21:02:17 2.021489e+08 37.778311 -122.481381 37.767288 -122.514435
19 Graffiti Public Property Offensive Graffiti on Public Property Signal_box - Offensive 5 (37.7641309855859, -122.464172021866) Open311 2014-11-28 21:02:00 2.021489e+08 37.764131 -122.464172 37.738709 -122.514435
20 Illegal Postings Illegal Postings - No_Posting_Date None 7 (37.7515034090962, -122.461523700836) Open311 2014-11-28 21:00:59 2.021489e+08 37.751503 -122.461524 37.738709 -122.514435
21 SFHA Requests SFHA Priority - Emergency Carpenters 8 (37.762099639558, -122.424481019146) Voice In 2014-11-28 20:57:40 2.021489e+08 37.762100 -122.424481 37.738709 -122.514435
22 Streetlights Streetlight - Light_Burnt_Out Light_Burnt_Out on Unknown Pole 8 (37.7523798578424, -122.442524644393) Voice In 2014-11-28 20:40:27 2.021489e+08 37.752380 -122.442525 37.738709 -122.514435
23 Graffiti Public Property Offensive Graffiti on Public Property News_rack - Offensive 9 (37.754112043399, -122.416357917291) Open311 2014-11-28 20:37:01 2.021489e+08 37.754112 -122.416358 37.738709 -122.514435
24 Street and Sidewalk Cleaning Illegal_Dumping Garbage 9 (37.753274331208, -122.416401177846) Open311 2014-11-28 20:36:04 2.021489e+08 37.753274 -122.416401 37.738709 -122.514435
25 Street and Sidewalk Cleaning Overflowing_City_Receptacle_or_Dumpster Overflowing_City_Receptacle_or_Dumpster 6 (37.785801913058, -122.40543032401) Integrated Agency 2014-11-28 20:34:26 2.021489e+08 37.785802 -122.405430 37.767288 -122.514435
26 Street and Sidewalk Cleaning Illegal_Dumping Garbage 5 (37.7770680425848, -122.436507295507) Open311 2014-11-28 20:34:00 2.021489e+08 37.777068 -122.436507 37.767288 -122.514435
27 General Requests district_attorney - da_victim_services - custo... district_attorney - da_victim_services - custo... 9 (37.766032082878, -122.419410819452) Voice In 2014-11-28 20:29:06 2.021489e+08 37.766032 -122.419411 37.738709 -122.514435
28 Rec and Park Requests Park - Park_Patrol Park_Rules_Violation 1 (37.780122585, -122.4722852) Open311 2014-11-28 20:20:57 2.021489e+08 37.780123 -122.472285 37.767288 -122.514435
29 Street and Sidewalk Cleaning Overflowing_City_Receptacle_or_Dumpster Overflowing_City_Receptacle_or_Dumpster 6 (37.7832878821779, -122.408952214449) Integrated Agency 2014-11-28 20:19:34 2.021489e+08 37.783288 -122.408952 37.767288 -122.514435
... ... ... ... ... ... ... ... ... ... ... ... ...
1098636 Abandoned Vehicle Abandoned Vehicle - PickupTruck green - Dodge - Ram - 5Z40117 8 (37.749995693, -122.433025754) Voice In 2008-07-01 07:13:00 0.000000e+00 37.749996 -122.433026 37.738709 -122.514435
1098637 Graffiti Public Property Not_Offensive Graffiti on Public Property Mail_box - Not_Offensive 5 (37.7805885619837, -122.438926930121) Voice In 2008-07-01 07:06:00 0.000000e+00 37.780589 -122.438927 37.767288 -122.514435
1098638 Street and Sidewalk Cleaning Illegal_Dumping Garbage 10 (37.763356015798, -122.405594426077) Voice In 2008-07-01 06:58:00 0.000000e+00 37.763356 -122.405594 37.738709 -122.514435
1098639 Street and Sidewalk Cleaning Sidewalk_Cleaning Human_waste_or_urine 8 (37.7595573310853, -122.430420425099) Voice In 2008-07-01 06:56:00 0.000000e+00 37.759557 -122.430420 37.738709 -122.514435
1098640 Street and Sidewalk Cleaning Illegal_Dumping Debris_filled_carts 9 (37.766759588, -122.418402776) Voice In 2008-07-01 06:49:00 0.000000e+00 37.766760 -122.418403 37.738709 -122.514435
1098641 Blocked Street or SideWalk Blocked_Sidewalk Tree_shrub_bush_privately_maintained blocking ... 8 (37.7707552988245, -122.43354732663) Voice In 2008-07-01 06:47:00 0.000000e+00 37.770755 -122.433547 37.767288 -122.514435
1098642 Street and Sidewalk Cleaning Illegal_Dumping Debris_filled_carts 9 (37.7668051353148, -122.417645456444) Voice In 2008-07-01 06:47:00 0.000000e+00 37.766805 -122.417645 37.738709 -122.514435
1098643 Street and Sidewalk Cleaning Illegal_Dumping Mattress 11 (37.7192983904956, -122.432385117155) Voice In 2008-07-01 06:42:00 0.000000e+00 37.719298 -122.432385 37.710130 -122.514435
1098644 Street and Sidewalk Cleaning Street_Cleaning Mattress 11 (37.721626366615, -122.438641709785) Voice In 2008-07-01 06:34:00 0.000000e+00 37.721626 -122.438642 37.710130 -122.514435
1098645 Damaged Property Damaged Kiosk_Public_Toilet Kiosk_Public_Toilet 1 (37.7797467973351, -122.492232434925) Voice In 2008-07-01 06:29:00 0.000000e+00 37.779747 -122.492232 37.767288 -122.514435
1098646 Street and Sidewalk Cleaning Illegal_Dumping Garbage 11 (37.7142094852329, -122.436176460695) Voice In 2008-07-01 06:28:00 0.000000e+00 37.714209 -122.436176 37.710130 -122.514435
1098647 Street and Sidewalk Cleaning Illegal_Dumping Furniture 9 (37.762872265114, -122.418433627914) Voice In 2008-07-01 06:26:00 0.000000e+00 37.762872 -122.418434 37.738709 -122.514435
1098648 Street and Sidewalk Cleaning Sidewalk_Cleaning Sidewalk_Cleaning 10 (37.720875294, -122.39703846) Voice In 2008-07-01 06:24:00 0.000000e+00 37.720875 -122.397038 37.710130 -122.514435
1098649 Street and Sidewalk Cleaning Sidewalk_Cleaning Garbage 10 (37.720875294, -122.39703846) Voice In 2008-07-01 06:21:00 0.000000e+00 37.720875 -122.397038 37.710130 -122.514435
1098650 Street Defects Construction_Plate_Shifted Construction_Plate_Shifted 6 (37.77958431741, -122.416768262915) Voice In 2008-07-01 06:14:00 0.000000e+00 37.779584 -122.416768 37.767288 -122.514435
1098651 Street and Sidewalk Cleaning Illegal_Dumping Garbage 10 (37.733074364, -122.393180389) Voice In 2008-07-01 06:11:00 0.000000e+00 37.733074 -122.393180 37.710130 -122.514435
1098652 Street and Sidewalk Cleaning Sidewalk_Cleaning Human_waste_or_urine 6 (37.7790321754703, -122.401843247026) Voice In 2008-07-01 06:09:00 0.000000e+00 37.779032 -122.401843 37.767288 -122.514435
1098653 Street and Sidewalk Cleaning Overflowing_City_Receptacle_or_Dumpster Overflowing_City_Receptacle_or_Dumpster 6 (37.7790321754703, -122.401843247026) Voice In 2008-07-01 06:04:00 0.000000e+00 37.779032 -122.401843 37.767288 -122.514435
1098654 Graffiti Private Property Not_Offensive Graffiti on Private Property Sidewalk_in_front_of_property - Not_Offensive 8 (37.7467653227988, -122.443960871546) Voice In 2008-07-01 05:58:00 0.000000e+00 37.746765 -122.443961 37.738709 -122.514435
1098655 Graffiti Public Property Not_Offensive Graffiti on Public Property Pole - Not_Offensive 8 (37.7467653227988, -122.443960871546) Voice In 2008-07-01 05:54:00 0.000000e+00 37.746765 -122.443961 37.738709 -122.514435
1098656 311 External Request Graffiti Graffiti - 8 (37.7467653227988, -122.443960871546) Voice In 2008-07-01 05:46:00 0.000000e+00 37.746765 -122.443961 37.738709 -122.514435
1098657 Street and Sidewalk Cleaning Sidewalk_Cleaning Tires_less_than_10 8 (37.7471717655759, -122.42238350605) Voice In 2008-07-01 05:05:00 0.000000e+00 37.747172 -122.422384 37.738709 -122.514435
1098658 Street and Sidewalk Cleaning Sidewalk_Cleaning Furniture 8 (37.7471717655759, -122.42238350605) Voice In 2008-07-01 05:03:00 0.000000e+00 37.747172 -122.422384 37.738709 -122.514435
1098659 Street and Sidewalk Cleaning Illegal_Dumping Garbage 8 (37.7471717655759, -122.42238350605) Voice In 2008-07-01 05:03:00 0.000000e+00 37.747172 -122.422384 37.738709 -122.514435
1098660 Sign Repair Sign Repair - Bent No_Parking - Bent; support - Bent 9 (37.743122600676, -122.421485554258) Voice In 2008-07-01 04:40:00 0.000000e+00 37.743123 -122.421486 37.738709 -122.514435
1098661 Street and Sidewalk Cleaning Sidewalk_Cleaning Garbage 9 (37.7406417656081, -122.423090064246) Voice In 2008-07-01 03:03:00 0.000000e+00 37.740642 -122.423090 37.738709 -122.514435
1098662 Abandoned Vehicle Abandoned Vehicle - Car4door DARK BLUE - FORD - CROWN VICTORIA - 3WBG858 5 (37.781800341, -122.428537476) Voice In 2008-07-01 02:07:00 0.000000e+00 37.781800 -122.428537 37.767288 -122.514435
1098663 Tree Maintenance Trees - Damaged_Tree Hanging_limb 5 (37.7643657242198, -122.458814894064) Voice In 2008-07-01 01:56:00 0.000000e+00 37.764366 -122.458815 37.738709 -122.514435
1098664 Street and Sidewalk Cleaning Illegal_Dumping Mattress 4 (37.7617471571397, -122.475924197088) Voice In 2008-07-01 00:26:00 0.000000e+00 37.761747 -122.475924 37.738709 -122.514435
1098665 Street and Sidewalk Cleaning Sidewalk_Cleaning Sidewalk_Cleaning 3 (37.800869062548, -122.406192162738) Voice In 2008-07-01 00:13:00 0.000000e+00 37.800869 -122.406192 37.795866 -122.514435

1098666 rows × 12 columns


In [14]:
#store into csv as checkpoint

data_311.to_csv('intermediate_311.csv', index_label=False)

In [ ]:
#DATA_PATH = "/home/datascience/project/intermediate_311.csv" # Make this the /path/to/the/data

DATA_PATH = "/home/casual1water/cs194-16-data_manatees/intermediate_311.csv" # path of amazon machine

import pandas as pd

data_311 = pd.read_csv(DATA_PATH, na_values=['-'])
data_311.head()

In [12]:
#testing

agg_dict_mini = {}
for index, row in minidata.iterrows():
    tupe = (row['TimeBin'], row['XBin'], row['YBin'])
    if tupe not in agg_dict_mini:
        agg_dict_mini[tupe] = []
    agg_dict_mini[tupe].append(row.to_dict())

agg_dict_mini2 = {}
agg_dict_mini2['reports'] = []
agg_dict_mini2['tuple'] = []
for k in agg_dict_mini:
    agg_dict_mini2['reports'].append(agg_dict_mini[k])
    agg_dict_mini2['tuple'].append(k)
print agg_dict_mini2


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-756ca9baf5ca> in <module>()
      3 agg_dict_mini = {}
      4 for index, row in minidata.iterrows():
----> 5     tupe = (row['TimeBin'], row['XBin'], row['YBin'])
      6     if tupe not in agg_dict_mini:
      7         agg_dict_mini[tupe] = []

/opt/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in __getitem__(self, key)
    482     def __getitem__(self, key):
    483         try:
--> 484             result = self.index.get_value(self, key)
    485 
    486             if not np.isscalar(result):

/opt/anaconda/lib/python2.7/site-packages/pandas/core/index.pyc in get_value(self, series, key)
   1208                     raise InvalidIndexError(key)
   1209                 else:
-> 1210                     raise e1
   1211             except Exception:  # pragma: no cover
   1212                 raise e1

KeyError: 'TimeBin'

In [15]:
#aggregate 311 reports by tuple

agg_dict_311 = {}
for index, row in data_311.iterrows():
    tupe = (row['TimeBin'], row['XBin'], row['YBin'])
    if tupe not in agg_dict_311:
        agg_dict_311[tupe] = []
    agg_dict_311[tupe].append(row.to_dict())

In [16]:
agg_dict_311_2 = {}
agg_dict_311_2['reports'] = []
agg_dict_311_2['tuple'] = []
for k in agg_dict_311:
    agg_dict_311_2['reports'].append(agg_dict_311[k])
    agg_dict_311_2['tuple'].append(k)
#print agg_dict_311_2

In [ ]:
df_mini = pd.DataFrame(agg_dict_mini2)
df_mini.head()
#df_mini.head(1)['reports']
#print (df_mini['reports'][0][0])

In [ ]:
df_mini.to_csv('mini_311.csv', index_label=False)

In [17]:
df_311 = pd.DataFrame(agg_dict_311_2)
df_311.head()


Out[17]:
reports tuple
0 [{u'Category': u'General Requests', u'TimeBin'... (35649504.8809, 37.7958663697, -122.514435088)
1 [{u'Category': u'SFHA Requests', u'TimeBin': 1... (127406469.148, 37.7101302543, -122.514435088)
2 [{u'Category': u'Tree Maintenance', u'TimeBin'... (87098222.1522, 37.6815515492, -122.514435088)
3 [{u'Category': u'Street and Sidewalk Cleaning'... (126798807.133, 37.7672876646, -122.514435088)
4 [{u'Category': u'Abandoned Vehicle', u'TimeBin... (137331615.393, 37.6815515492, -122.514435088)

In [18]:
df_311.to_csv('df_311_ready_to_join.csv', index_label=False)

In [ ]: