In [1]:
import numpy as np
import pandas as pd
import os, re, boto3
from botocore.handlers import disable_signing
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
def file_list(client, bucket, prefix=''):
paginator = client.get_paginator('list_objects')
for result in client.list_objects(Bucket=bucket, Prefix=prefix, Delimiter='/')['Contents']:
yield result.get('Key')
gen_s3_files = list(file_list(s3_client, 'nexrad-etl', prefix='test-aug3/'))
for i, f in enumerate(gen_s3_files):
s3_client.download_file('nexrad-etl',f,'test-aug3/nexrad{}.csv'.format(i))
folder_files = os.listdir(os.path.join(os.getcwd(), 'test-aug3'))
nexrad_df_list = list()
for f in folder_files:
if f.endswith('.csv'):
try:
nexrad_df_list.append(pd.read_csv('test-aug3/{}'.format(f)))
except:
#print(f)
pass
print(len(nexrad_df_list))
In [36]:
merged_nexrad = pd.concat(nexrad_df_list)
merged_nexrad['timestamp'] = pd.to_datetime(((merged_nexrad['timestamp'] / 1000) - (5*3600*1000)), unit='ms')
#merged_nexrad['timestamp'] = pd.to_datetime(merged_nexrad['timestamp'] / 1000, unit='ms')
merged_nexrad = merged_nexrad.set_index(pd.DatetimeIndex(merged_nexrad['timestamp']))
merged_nexrad = merged_nexrad.sort_values('timestamp')
merged_nexrad = merged_nexrad.fillna(0.0)
# Get diff between previous two reads
merged_nexrad['diff'] = merged_nexrad['timestamp'].diff()
merged_nexrad = merged_nexrad[1:]
print(merged_nexrad.shape)
In [22]:
merged_nexrad.index.min()
Out[22]:
In [23]:
merged_nexrad['diff'] = (merged_nexrad['diff'] / np.timedelta64(1, 'm')).astype(float) / 60
merged_nexrad.head()
Out[23]:
In [24]:
aug_day_ohare = merged_nexrad['2016-08-12'][['timestamp','60666','diff']]
aug_day_ohare.head()
Out[24]:
In [25]:
aug_day_ohare['60666'] = (aug_day_ohare['60666']*aug_day_ohare['diff'])/25.4
aug_day_ohare.head()
Out[25]:
In [51]:
# Checking against Weather Underground read for O'Hare on this day
print(aug_day_ohare['60666'].sum())
aug_day_ohare['60666'].plot()
Out[51]:
In [50]:
wunderground = pd.read_csv('test-aug3/aug-12.csv')
wunderground['PrecipitationIn'] = wunderground['PrecipitationIn'].fillna(0.0)
wunderground['TimeCDT'] = pd.to_datetime(wunderground['TimeCDT'])
wunderground = wunderground.set_index(pd.DatetimeIndex(wunderground['TimeCDT']))
wund_hour = wunderground['PrecipitationIn'].resample('1H').max()
print(wund_hour.sum())
wund_hour.plot()
Out[50]:
In [ ]: