In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
from numpy import ma
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

from pyproj import Geod
from metpy.io.nexrad import Level2File
from metpy.plots import ctables
import os, re, boto3
from botocore.handlers import disable_signing

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)

# Pulled from Stack Overflow here: http://stackoverflow.com/a/32674165
def folders(client, bucket, prefix=''):
    paginator = client.get_paginator('list_objects')
    for result in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
        for prefix in result.get('CommonPrefixes', []):
            yield prefix.get('Prefix')

def file_list(client, bucket, prefix=''):
    paginator = client.get_paginator('list_objects')
    for result in client.list_objects(Bucket=bucket, Prefix=prefix, Delimiter='/')['Contents']:
        if result.get('Key').endswith('.gz'):
            yield result.get('Key')

gen_folders = folders(s3_client, 'noaa-nexrad-level2')
list(gen_folders)

gen_subfolders = folders(s3_client, 'noaa-nexrad-level2', prefix='2015/')
list(gen_subfolders)

gen_klot_15 = list(file_list(s3_client, 'noaa-nexrad-level2', prefix='2015/08/01/KLOT/'))

In [8]:
def extract_data(filepath):
    f = Level2File(filepath)
    # Pull data out of the file
    sweep = 0

    # First item in ray is header, which has azimuth angle
    try:
        az = np.array([ray[0].az_angle for ray in f.sweeps[sweep]])
    except:
        return None, None, None

    # 5th item is a dict mapping a var name (byte string) to a tuple
    # of (header, data array)
    ref_hdr = f.sweeps[sweep][0][4][b'REF'][0]
    ref_range = np.arange(ref_hdr.num_gates) * ref_hdr.gate_width + ref_hdr.first_gate
    ref = np.array([ray[4][b'REF'][1] for ray in f.sweeps[sweep]])
    
    data_hdr = f.sweeps[sweep][0][1]
    
    data = ma.array(ref)
    data[data==0] = ma.masked

    # Data from MetPy needs to be converted to latitude and longitude coordinates
    g = Geod(ellps='clrk66')
    center_lat = np.ones([len(az),len(ref_range)])*data_hdr.lat
    center_lon = np.ones([len(az),len(ref_range)])*data_hdr.lon

    az2D = np.ones_like(center_lat)*az[:,None]
    rng2D = np.ones_like(center_lat)*np.transpose(ref_range[:,None])*1000
    lon,lat,back = g.fwd(center_lon,center_lat,az2D,rng2D)
    
    return lon, lat, data

def unstack_process(lon, lat, data):
    lat_df = pd.DataFrame(lat)
    lon_df = pd.DataFrame(lon)
    data_df = pd.DataFrame(data)
    
    lon_stack = lon_df.stack().reset_index()
    lon_stack = lon_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lon'})
    lat_stack = lat_df.stack().reset_index()
    lat_stack = lat_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lat'})
    coord_merge = pd.merge(lat_stack, lon_stack, on=['x', 'y']).reset_index()
    # Reducing to bounding box through selection rather than geospatial operation
    coord_merge = coord_merge.loc[(coord_merge['lat'] <= 42.0231311) &
                                  (coord_merge['lat'] >= 41.644335) &
                                  (coord_merge['lon'] <= -87.524044) &
                                  (coord_merge['lon'] >= -87.940267)]
    data_stack = data_df.stack().reset_index()
    data_stack = data_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'precip'})
    merged_data = pd.merge(coord_merge, data_stack, on=['x', 'y'], how='left')[['lat','lon','precip']]
    nexrad_df = merged_data.dropna().copy()
    # Convert precip in dBZ into mm/hr using Marshall-Palmer https://en.wikipedia.org/wiki/DBZ_(meteorology)
    nexrad_df.loc['precip'] = pow(pow(10, nexrad_df['precip']/10)/200, 0.625)
    return nexrad_df.dropna()
    
def spatial_join(nexrad_df, gpd_file, group_col, file_time):
    geo_df = gpd.read_file(gpd_file)
    crs = {'init':'epsg:4326'}
    geo_df.crs = crs
    geometry = nexrad_df.apply(lambda z: Point(z['lon'], z['lat']), axis=1).dropna()
    #geometry = [Point(xy) for xy in zip(nexrad_df.lon, nexrad_df.lat)]
    nexrad_geo = gpd.GeoDataFrame(nexrad_df, crs=crs, geometry=geometry)
    nexrad_geo.crs = geo_df.crs
    merged_nexrad = gpd.tools.sjoin(nexrad_geo, geo_df, how='right', op='within') #.reset_index()
    nexrad_grouped = merged_nexrad.groupby([group_col])['precip'].mean().reset_index()
    nexrad_grouped[group_col] = nexrad_grouped[group_col].astype(int)
    nexrad_grouped.fillna(value=0, inplace=True)
    nexrad_grouped.sort_values(by=group_col, inplace=True)
    nexrad_grouped.to_csv('data/nexrad_processed/{}_{}.csv'.format(group_col, file_time), index=False)

In [ ]:
nexrad_bucket = resource.Bucket('noaa-nexrad-level2')

for klot_file in gen_klot_15[15:]:
    local_filepath = 'nexrad_data/{}'.format(klot_file.split('/')[-1])
    nexrad_bucket.download_file(klot_file, local_filepath)

    file_time = re.search(r'\d{8}_\d{6}',local_filepath).group()
    lon, lat, data = extract_data(local_filepath)
    if lon is not None:
        print(data.shape)
        nexrad_df = unstack_process(lon, lat, data)
        spatial_join(nexrad_df, 'data/chicago_wards.geojson', 'ward', file_time)

In [ ]: