In [69]:
    
import cProfile
import functools
import geopy
import itertools
import json
from matplotlib import collections as mc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyproj
import requests
import scipy as sp
import rtree
import seaborn as sb
from scipy import signal
# import shapely
import shapely.geometry
%pylab inline
import data_munging
    
    
    
In [2]:
    
rides, readings = data_munging.read_raw_data()
readings = data_munging.clean_readings(readings)
readings = data_munging.add_proj_to_readings(readings, data_munging.NAD83)
    
In [3]:
    
reload(data_munging)
    
    Out[3]:
In [4]:
    
chi_readings = data_munging.filter_readings_to_chicago(readings)
    
In [5]:
    
chi_readings = data_munging.filter_to_good_readings(chi_readings)
    
In [6]:
    
print rides.shape
print readings.shape
n, p = readings.shape
    
    
In [7]:
    
chi_readings.plot(x='start_lon', y='start_lat', kind='scatter')
config = plt.gcf()
config.set_size_inches(12, 12)
plt.show()
    
    
In [9]:
    
max_lat = np.max(chi_readings['start_lat'])
max_lon = np.max(chi_readings['start_lon'])
min_lat = np.min(chi_readings['start_lat'])
min_lon = np.min(chi_readings['start_lon'])
    
In [10]:
    
max_x = np.max(chi_readings['start_x'])
max_y = np.max(chi_readings['start_y'])
min_x = np.min(chi_readings['start_x'])
min_y = np.min(chi_readings['start_y'])
    
In [11]:
    
x_range = max_x - min_x
y_range = max_y - min_y
    
In [12]:
    
data_munging.calc_dist(max_lon, max_lat, min_lon, min_lat)
    
    Out[12]:
In [13]:
    
north_south_meters = data_munging.calc_dist(max_lon, max_lat, min_lon, max_lat)
east_west_meters = data_munging.calc_dist(max_lon, max_lat, max_lon, min_lat)
print north_south_meters
print east_west_meters
    
    
In [14]:
    
patch_size = 5
    
In [15]:
    
north_south_patches = int(np.ceil(north_south_meters / patch_size))
east_west_patches = int(np.ceil(east_west_meters / patch_size))
    
In [16]:
    
x_step = x_range / east_west_patches
y_step = y_range / north_south_patches
    
In [17]:
    
rastered_pavement_quality = np.zeros((north_south_patches, east_west_patches))
    
In [18]:
    
readings_idx = data_munging.insert_readings_rtree(chi_readings)
    
In [19]:
    
chi_readings['abs_mean_over_speed'] = (chi_readings['abs_mean_total'] /
                                       chi_readings['gps_speed'])
    
In [21]:
    
total_itrs = 0
for i, j in itertools.product(range(north_south_patches), range(east_west_patches)):
    total_itrs += 1
    if total_itrs % 1000000 == 0:
        print total_itrs
    patch_bb = (min_x + x_step * j, min_y + y_step * i,
                min_x + x_step * (j + 1), min_y + y_step * (i + 1))
    segs_in_patch = list(readings_idx.intersection(patch_bb))
    if len(segs_in_patch) > 0:
        rastered_pavement_quality[i, j] = np.mean(chi_readings['abs_mean_over_speed'][segs_in_patch])
    
    
In [22]:
    
print north_south_patches
print east_west_patches
    
    
In [23]:
    
print north_south_patches * east_west_patches
    
    
In [26]:
    
for i, j in itertools.product(range(north_south_patches), range(east_west_patches)):
    patch_bb = (min_x + x_step * j, min_y + y_step * i,
                min_x + x_step * (j + 1), min_y + y_step * (i + 1))
    segs_in_patch = list(readings_idx.intersection(patch_bb))
    pavement_quality = 0
    if len(segs_in_patch) > 0:
        pavement_quality = np.mean(chi_readings['abs_mean_over_speed'][segs_in_patch])
    rastered_pavement_quality[i, j] = pavement_quality
    
In [65]:
    
def row_to_str(row):
    return functools.reduce(lambda x, y: x + ' ' + y, [str(int(i * 10)) for i in row])
    
In [67]:
    
data_munging.data_dir
    
    Out[67]:
In [66]:
    
with open(data_munging.data_dir + 'rastered_chicago.asc', 'w+') as f:
    for i in range(rastered_pavement_quality.shape[0]):
        f.write(row_to_str(rastered_pavement_quality[i, :]))
    
In [78]:
    
with open(data_munging.data_dir + 'raster_metadata.txt', 'w+') as f:
    metadata = {'lon_min': min_lon,
                'lon_max': max_lon,
                'lat_min': min_lat,
                'lat_max': max_lat,
                'nrows': rastered_pavement_quality.shape[0],
                'ncols': rastered_pavement_quality.shape[1]
               }
    json.dump(metadata, f)
    
In [90]:
    
print np.max(rastered_pavement_quality.flat)
print np.mean(rastered_pavement_quality.flat == 0)
    
    
In [89]:
    
plt.hist(rastered_pavement_quality.flat[rastered_pavement_quality.flat > 0], bins=100)
plt.title('Nonzero Raster Pavement Values')
gcf = plt.gcf()
gcf.set_size_inches((12, 8))
    
    
In [ ]:
    
chi_readings.plot(x='start_x', y='start_y', kind='scatter')
plt.plot([patch_bb[0], patch_bb[2] + 5000],
         [patch_bb[1], patch_bb[3] + 2000], color='red')
config = plt.gcf()
config.set_size_inches(12, 12)
plt.show()