In [94]:
# Adapted from work by @kbrose
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook

In [95]:
rain_df = pd.read_csv('data/full_ohare_noaa.csv')
rain_df.head()


d:\data_science_projects\chicagorain\virtualenvs\nyear-venv\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (8,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,29,33,34,37,38,39,40,44,45,59,63,64,67,68,69,70,84,85,86,87) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[95]:
STATION STATION_NAME ELEVATION LATITUDE LONGITUDE DATE REPORTTPYE HOURLYSKYCONDITIONS HOURLYVISIBILITY HOURLYPRSENTWEATHERTYPE ... MonthlyMaxSeaLevelPressureTime MonthlyMinSeaLevelPressureValue MonthlyMinSeaLevelPressureDate MonthlyMinSeaLevelPressureTime MonthlyTotalHeatingDegreeDays MonthlyTotalCoolingDegreeDays MonthlyDeptFromNormalHeatingDD MonthlyDeptFromNormalCoolingDD MonthlyTotalSeasonToDateHeatingDD MonthlyTotalSeasonToDateCoolingDD
0 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 201.8 41.995 -87.9336 1946-10-01 01:00 SAO CLR:00 1.99 ||FG:11 ... -9999 NaN -9999 -9999 NaN NaN NaN NaN NaN NaN
1 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 201.8 41.995 -87.9336 1946-10-01 02:00 SAO CLR:00 1.99 ||FG:11 ... -9999 NaN -9999 -9999 NaN NaN NaN NaN NaN NaN
2 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 201.8 41.995 -87.9336 1946-10-01 03:00 SAO CLR:00 1.99 ||FG:11 ... -9999 NaN -9999 -9999 NaN NaN NaN NaN NaN NaN
3 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 201.8 41.995 -87.9336 1946-10-01 04:00 SAO CLR:00 2.98 ||FG:11 ... -9999 NaN -9999 -9999 NaN NaN NaN NaN NaN NaN
4 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 201.8 41.995 -87.9336 1946-10-01 05:00 SAO CLR:00 2.98 ||FG:11 ... -9999 NaN -9999 -9999 NaN NaN NaN NaN NaN NaN

5 rows × 90 columns


In [96]:
rain_df.columns.values


Out[96]:
array(['STATION', 'STATION_NAME', 'ELEVATION', 'LATITUDE', 'LONGITUDE',
       'DATE', 'REPORTTPYE', 'HOURLYSKYCONDITIONS', 'HOURLYVISIBILITY',
       'HOURLYPRSENTWEATHERTYPE', 'HOURLYDRYBULBTEMPF',
       'HOURLYDRYBULBTEMPC', 'HOURLYWETBULBTEMPF', 'HOURLYWETBULBTEMPC',
       'HOURLYDewPointTempF', 'HOURLYDewPointTempC',
       'HOURLYRelativeHumidity', 'HOURLYWindSpeed', 'HOURLYWindDirection',
       'HOURLYWindGustSpeed', 'HOURLYStationPressure',
       'HOURLYPressureTendency', 'HOURLYPressureChange',
       'HOURLYSeaLevelPressure', 'HOURLYPrecip', 'HOURLYAltimeterSetting',
       'DAILYMaximumDryBulbTemp', 'DAILYMinimumDryBulbTemp',
       'DAILYAverageDryBulbTemp', 'DAILYDeptFromNormalAverageTemp',
       'DAILYAverageRelativeHumidity', 'DAILYAverageDewPointTemp',
       'DAILYAverageWetBulbTemp', 'DAILYHeatingDegreeDays',
       'DAILYCoolingDegreeDays', 'DAILYSunrise', 'DAILYSunset',
       'DAILYWeather', 'DAILYPrecip', 'DAILYSnowfall', 'DAILYSnowDepth',
       'DAILYAverageStationPressure', 'DAILYAverageSeaLevelPressure',
       'DAILYAverageWindSpeed', 'DAILYPeakWindSpeed', 'PeakWindDirection',
       'DAILYSustainedWindSpeed', 'DAILYSustainedWindDirection',
       'MonthlyMaximumTemp', 'MonthlyMinimumTemp', 'MonthlyMeanTemp',
       'MonthlyAverageRH', 'MonthlyDewpointTemp', 'MonthlyWetBulbTemp',
       'MonthlyAvgHeatingDegreeDays', 'MonthlyAvgCoolingDegreeDays',
       'MonthlyStationPressure', 'MonthlySeaLevelPressure',
       'MonthlyAverageWindSpeed', 'MonthlyTotalSnowfall',
       'MonthlyDeptFromNormalMaximumTemp',
       'MonthlyDeptFromNormalMinimumTemp',
       'MonthlyDeptFromNormalAverageTemp', 'MonthlyDeptFromNormalPrecip',
       'MonthlyTotalLiquidPrecip', 'MonthlyGreatestPrecip',
       'MonthlyGreatestPrecipDate', 'MonthlyGreatestSnowfall',
       'MonthlyGreatestSnowfallDate', 'MonthlyGreatestSnowDepth',
       'MonthlyGreatestSnowDepthDate', 'MonthlyDaysWithGT90Temp',
       'MonthlyDaysWithLT32Temp', 'MonthlyDaysWithGT32Temp',
       'MonthlyDaysWithLT0Temp', 'MonthlyDaysWithGT001Precip',
       'MonthlyDaysWithGT010Precip', 'MonthlyDaysWithGT1Snow',
       'MonthlyMaxSeaLevelPressureValue', 'MonthlyMaxSeaLevelPressureDate',
       'MonthlyMaxSeaLevelPressureTime', 'MonthlyMinSeaLevelPressureValue',
       'MonthlyMinSeaLevelPressureDate', 'MonthlyMinSeaLevelPressureTime',
       'MonthlyTotalHeatingDegreeDays', 'MonthlyTotalCoolingDegreeDays',
       'MonthlyDeptFromNormalHeatingDD', 'MonthlyDeptFromNormalCoolingDD',
       'MonthlyTotalSeasonToDateHeatingDD',
       'MonthlyTotalSeasonToDateCoolingDD'], dtype=object)

In [97]:
ohare_rain_df = rain_df[['STATION', 'STATION_NAME', 'DATE', 'HOURLYPrecip']]
ohare_rain_df['datetime'] = pd.to_datetime(ohare_rain_df['DATE'])
ohare_rain_df = ohare_rain_df.set_index(pd.DatetimeIndex(ohare_rain_df['datetime']))
ohare_rain_df.head()


d:\data_science_projects\chicagorain\virtualenvs\nyear-venv\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
Out[97]:
STATION STATION_NAME DATE HOURLYPrecip datetime
1946-10-01 01:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1946-10-01 01:00 NaN 1946-10-01 01:00:00
1946-10-01 02:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1946-10-01 02:00 NaN 1946-10-01 02:00:00
1946-10-01 03:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1946-10-01 03:00 NaN 1946-10-01 03:00:00
1946-10-01 04:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1946-10-01 04:00 NaN 1946-10-01 04:00:00
1946-10-01 05:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1946-10-01 05:00 NaN 1946-10-01 05:00:00

In [98]:
# There is no hourly precipitation data before 1970.
ohare_rain_df = ohare_rain_df['19700101':]
ohare_rain_df.head()


Out[98]:
STATION STATION_NAME DATE HOURLYPrecip datetime
1970-01-01 03:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1970-01-01 03:00 0 1970-01-01 03:00:00
1970-01-01 06:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1970-01-01 06:00 0 1970-01-01 06:00:00
1970-01-01 09:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1970-01-01 09:00 0.01 1970-01-01 09:00:00
1970-01-01 12:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1970-01-01 12:00 0 1970-01-01 12:00:00
1970-01-01 15:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1970-01-01 15:00 0 1970-01-01 15:00:00

In [99]:
# Some of the hourly precipitation values are not actually numbers
def find_bad_values():
    dates_with_bad_precip = []
    for index, row in ohare_rain_df.iterrows():
        try:
            float(row['HOURLYPrecip'])
        except:
            dates_with_bad_precip.append(str(row.datetime))
    return dates_with_bad_precip
ohare_rain_df[ohare_rain_df.index.isin(find_bad_values())]


Out[99]:
STATION STATION_NAME DATE HOURLYPrecip datetime
1973-01-03 14:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 14:00 0.09s 1973-01-03 14:00:00
1973-01-03 15:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 15:00 0.26s 1973-01-03 15:00:00
1973-01-03 16:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 16:00 0.16s 1973-01-03 16:00:00
1973-01-03 17:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 17:00 0.06s 1973-01-03 17:00:00
1973-01-03 18:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 18:00 0.01s 1973-01-03 18:00:00
1973-01-03 19:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 19:00 0.01s 1973-01-03 19:00:00
1973-01-03 20:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 20:00 0.01s 1973-01-03 20:00:00
1973-01-03 21:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 21:00 0.01s 1973-01-03 21:00:00
1973-01-03 22:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-03 22:00 0.01s 1973-01-03 22:00:00
1973-01-19 00:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-01-19 00:00 0.01s 1973-01-19 00:00:00
1973-03-03 02:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-03-03 02:00 0.01s 1973-03-03 02:00:00
1973-03-05 00:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-03-05 00:00 0.01s 1973-03-05 00:00:00
1973-03-09 09:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-03-09 09:00 0.01s 1973-03-09 09:00:00
1973-03-10 18:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-03-10 18:00 0.09s 1973-03-10 18:00:00
1973-03-16 18:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-03-16 18:00 0.01s 1973-03-16 18:00:00
1973-03-31 21:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-03-31 21:00 0.16s 1973-03-31 21:00:00
1973-04-09 03:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-04-09 03:00 0.04s 1973-04-09 03:00:00
1973-04-11 18:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-04-11 18:00 0.01s 1973-04-11 18:00:00
1973-04-29 03:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-04-29 03:00 0.01s 1973-04-29 03:00:00
1973-05-14 06:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-05-14 06:00 0.13s 1973-05-14 06:00:00
1973-05-22 03:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-05-22 03:00 0.01s 1973-05-22 03:00:00
1973-05-29 21:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-05-29 21:00 0.01s 1973-05-29 21:00:00
1973-06-05 09:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-06-05 09:00 0.01s 1973-06-05 09:00:00
1973-06-05 15:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-06-05 15:00 0.17s 1973-06-05 15:00:00
1973-06-23 12:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-06-23 12:00 0.05s 1973-06-23 12:00:00
1973-06-27 21:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-06-27 21:00 0.32s 1973-06-27 21:00:00
1973-08-14 03:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-08-14 03:00 0.01s 1973-08-14 03:00:00
1973-08-23 21:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-08-23 21:00 0.01s 1973-08-23 21:00:00
1973-09-16 12:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-09-16 12:00 0.01s 1973-09-16 12:00:00
1973-09-24 18:00:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 1973-09-24 18:00 0.26s 1973-09-24 18:00:00
... ... ... ... ... ...
2016-08-13 01:47:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-13 01:47 T 2016-08-13 01:47:00
2016-08-13 01:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-13 01:51 T 2016-08-13 01:51:00
2016-08-13 02:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-13 02:51 T 2016-08-13 02:51:00
2016-08-15 23:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-15 23:51 T 2016-08-15 23:51:00
2016-08-16 00:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-16 00:51 T 2016-08-16 00:51:00
2016-08-16 01:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-16 01:51 T 2016-08-16 01:51:00
2016-08-16 02:17:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-16 02:17 T 2016-08-16 02:17:00
2016-08-16 02:40:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-16 02:40 T 2016-08-16 02:40:00
2016-08-17 15:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-17 15:51 T 2016-08-17 15:51:00
2016-08-18 05:03:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-18 05:03 T 2016-08-18 05:03:00
2016-08-18 06:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-18 06:51 T 2016-08-18 06:51:00
2016-08-18 23:07:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-18 23:07 T 2016-08-18 23:07:00
2016-08-19 02:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-19 02:51 T 2016-08-19 02:51:00
2016-08-19 08:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-19 08:51 0.04s 2016-08-19 08:51:00
2016-08-20 08:40:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 08:40 T 2016-08-20 08:40:00
2016-08-20 08:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 08:51 T 2016-08-20 08:51:00
2016-08-20 09:18:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 09:18 T 2016-08-20 09:18:00
2016-08-20 09:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 09:51 T 2016-08-20 09:51:00
2016-08-20 11:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 11:51 T 2016-08-20 11:51:00
2016-08-20 12:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 12:51 T 2016-08-20 12:51:00
2016-08-20 16:29:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-20 16:29 T 2016-08-20 16:29:00
2016-08-24 06:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-24 06:51 T 2016-08-24 06:51:00
2016-08-24 08:58:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-24 08:58 T 2016-08-24 08:58:00
2016-08-24 23:44:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-24 23:44 T 2016-08-24 23:44:00
2016-08-24 23:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-24 23:51 0.04s 2016-08-24 23:51:00
2016-08-25 03:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-25 03:51 T 2016-08-25 03:51:00
2016-08-26 23:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-26 23:51 T 2016-08-26 23:51:00
2016-08-27 00:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-27 00:51 T 2016-08-27 00:51:00
2016-08-27 08:39:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-27 08:39 T 2016-08-27 08:39:00
2016-08-27 08:51:00 WBAN:94846 CHICAGO OHARE INTERNATIONAL AIRPORT IL US 2016-08-27 08:51 T 2016-08-27 08:51:00

23574 rows × 5 columns


In [100]:
# Some of the Hourly precipition values have a value of T, which means Trace.  This should be 0.
# Some have a number followed by 's'.  We still need to figure out what this means.  (A GitHub issue opened).  For now,
# we will set these to 0 as well
ohare_rain_df.loc[ohare_rain_df.index.isin(find_bad_values()), 'HOURLYPrecip'] = 0.0
ohare_rain_df[ohare_rain_df.index.isin(find_bad_values())]


Out[100]:
STATION STATION_NAME DATE HOURLYPrecip datetime

In [101]:
# Write CSV file
ohare_rain_df.to_csv('data/ohare_hourly_20160929.csv', index=False)

In [ ]: