In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
from numpy import ma
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from pyproj import Geod
from metpy.io.nexrad import Level2File
from metpy.plots import ctables
import os, re, boto3
from botocore.handlers import disable_signing
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
# Pulled from Stack Overflow here: http://stackoverflow.com/a/32674165
def folders(client, bucket, prefix=''):
paginator = client.get_paginator('list_objects')
for result in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
for prefix in result.get('CommonPrefixes', []):
yield prefix.get('Prefix')
def file_list(client, bucket, prefix=''):
paginator = client.get_paginator('list_objects')
for result in client.list_objects(Bucket=bucket, Prefix=prefix, Delimiter='/')['Contents']:
if result.get('Key').endswith('.gz'):
yield result.get('Key')
gen_folders = folders(s3_client, 'noaa-nexrad-level2')
list(gen_folders)
gen_subfolders = folders(s3_client, 'noaa-nexrad-level2', prefix='2015/')
list(gen_subfolders)
gen_klot_15 = list(file_list(s3_client, 'noaa-nexrad-level2', prefix='2015/08/01/KLOT/'))
In [8]:
def extract_data(filepath):
f = Level2File(filepath)
# Pull data out of the file
sweep = 0
# First item in ray is header, which has azimuth angle
try:
az = np.array([ray[0].az_angle for ray in f.sweeps[sweep]])
except:
return None, None, None
# 5th item is a dict mapping a var name (byte string) to a tuple
# of (header, data array)
ref_hdr = f.sweeps[sweep][0][4][b'REF'][0]
ref_range = np.arange(ref_hdr.num_gates) * ref_hdr.gate_width + ref_hdr.first_gate
ref = np.array([ray[4][b'REF'][1] for ray in f.sweeps[sweep]])
data_hdr = f.sweeps[sweep][0][1]
data = ma.array(ref)
data[data==0] = ma.masked
# Data from MetPy needs to be converted to latitude and longitude coordinates
g = Geod(ellps='clrk66')
center_lat = np.ones([len(az),len(ref_range)])*data_hdr.lat
center_lon = np.ones([len(az),len(ref_range)])*data_hdr.lon
az2D = np.ones_like(center_lat)*az[:,None]
rng2D = np.ones_like(center_lat)*np.transpose(ref_range[:,None])*1000
lon,lat,back = g.fwd(center_lon,center_lat,az2D,rng2D)
return lon, lat, data
def unstack_process(lon, lat, data):
lat_df = pd.DataFrame(lat)
lon_df = pd.DataFrame(lon)
data_df = pd.DataFrame(data)
lon_stack = lon_df.stack().reset_index()
lon_stack = lon_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lon'})
lat_stack = lat_df.stack().reset_index()
lat_stack = lat_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lat'})
coord_merge = pd.merge(lat_stack, lon_stack, on=['x', 'y']).reset_index()
# Reducing to bounding box through selection rather than geospatial operation
coord_merge = coord_merge.loc[(coord_merge['lat'] <= 42.0231311) &
(coord_merge['lat'] >= 41.644335) &
(coord_merge['lon'] <= -87.524044) &
(coord_merge['lon'] >= -87.940267)]
data_stack = data_df.stack().reset_index()
data_stack = data_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'precip'})
merged_data = pd.merge(coord_merge, data_stack, on=['x', 'y'], how='left')[['lat','lon','precip']]
nexrad_df = merged_data.dropna().copy()
# Convert precip in dBZ into mm/hr using Marshall-Palmer https://en.wikipedia.org/wiki/DBZ_(meteorology)
nexrad_df.loc['precip'] = pow(pow(10, nexrad_df['precip']/10)/200, 0.625)
return nexrad_df.dropna()
def spatial_join(nexrad_df, gpd_file, group_col, file_time):
geo_df = gpd.read_file(gpd_file)
crs = {'init':'epsg:4326'}
geo_df.crs = crs
geometry = nexrad_df.apply(lambda z: Point(z['lon'], z['lat']), axis=1).dropna()
#geometry = [Point(xy) for xy in zip(nexrad_df.lon, nexrad_df.lat)]
nexrad_geo = gpd.GeoDataFrame(nexrad_df, crs=crs, geometry=geometry)
nexrad_geo.crs = geo_df.crs
merged_nexrad = gpd.tools.sjoin(nexrad_geo, geo_df, how='right', op='within') #.reset_index()
nexrad_grouped = merged_nexrad.groupby([group_col])['precip'].mean().reset_index()
nexrad_grouped[group_col] = nexrad_grouped[group_col].astype(int)
nexrad_grouped.fillna(value=0, inplace=True)
nexrad_grouped.sort_values(by=group_col, inplace=True)
nexrad_grouped.to_csv('data/nexrad_processed/{}_{}.csv'.format(group_col, file_time), index=False)
In [ ]:
nexrad_bucket = resource.Bucket('noaa-nexrad-level2')
for klot_file in gen_klot_15[15:]:
local_filepath = 'nexrad_data/{}'.format(klot_file.split('/')[-1])
nexrad_bucket.download_file(klot_file, local_filepath)
file_time = re.search(r'\d{8}_\d{6}',local_filepath).group()
lon, lat, data = extract_data(local_filepath)
if lon is not None:
print(data.shape)
nexrad_df = unstack_process(lon, lat, data)
spatial_join(nexrad_df, 'data/chicago_wards.geojson', 'ward', file_time)
In [ ]: