In [94]:
# Adapted from work by @kbrose
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook
In [95]:
rain_df = pd.read_csv('data/full_ohare_noaa.csv')
rain_df.head()
d:\data_science_projects\chicagorain\virtualenvs\nyear-venv\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (8,10,11,12,13,14,15,16,17,18,20,22,23,24,25,26,27,28,29,33,34,37,38,39,40,44,45,59,63,64,67,68,69,70,84,85,86,87) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
Out[95]:
STATION
STATION_NAME
ELEVATION
LATITUDE
LONGITUDE
DATE
REPORTTPYE
HOURLYSKYCONDITIONS
HOURLYVISIBILITY
HOURLYPRSENTWEATHERTYPE
...
MonthlyMaxSeaLevelPressureTime
MonthlyMinSeaLevelPressureValue
MonthlyMinSeaLevelPressureDate
MonthlyMinSeaLevelPressureTime
MonthlyTotalHeatingDegreeDays
MonthlyTotalCoolingDegreeDays
MonthlyDeptFromNormalHeatingDD
MonthlyDeptFromNormalCoolingDD
MonthlyTotalSeasonToDateHeatingDD
MonthlyTotalSeasonToDateCoolingDD
0
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
201.8
41.995
-87.9336
1946-10-01 01:00
SAO
CLR:00
1.99
||FG:11
...
-9999
NaN
-9999
-9999
NaN
NaN
NaN
NaN
NaN
NaN
1
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
201.8
41.995
-87.9336
1946-10-01 02:00
SAO
CLR:00
1.99
||FG:11
...
-9999
NaN
-9999
-9999
NaN
NaN
NaN
NaN
NaN
NaN
2
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
201.8
41.995
-87.9336
1946-10-01 03:00
SAO
CLR:00
1.99
||FG:11
...
-9999
NaN
-9999
-9999
NaN
NaN
NaN
NaN
NaN
NaN
3
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
201.8
41.995
-87.9336
1946-10-01 04:00
SAO
CLR:00
2.98
||FG:11
...
-9999
NaN
-9999
-9999
NaN
NaN
NaN
NaN
NaN
NaN
4
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
201.8
41.995
-87.9336
1946-10-01 05:00
SAO
CLR:00
2.98
||FG:11
...
-9999
NaN
-9999
-9999
NaN
NaN
NaN
NaN
NaN
NaN
5 rows × 90 columns
In [96]:
rain_df.columns.values
Out[96]:
array(['STATION', 'STATION_NAME', 'ELEVATION', 'LATITUDE', 'LONGITUDE',
'DATE', 'REPORTTPYE', 'HOURLYSKYCONDITIONS', 'HOURLYVISIBILITY',
'HOURLYPRSENTWEATHERTYPE', 'HOURLYDRYBULBTEMPF',
'HOURLYDRYBULBTEMPC', 'HOURLYWETBULBTEMPF', 'HOURLYWETBULBTEMPC',
'HOURLYDewPointTempF', 'HOURLYDewPointTempC',
'HOURLYRelativeHumidity', 'HOURLYWindSpeed', 'HOURLYWindDirection',
'HOURLYWindGustSpeed', 'HOURLYStationPressure',
'HOURLYPressureTendency', 'HOURLYPressureChange',
'HOURLYSeaLevelPressure', 'HOURLYPrecip', 'HOURLYAltimeterSetting',
'DAILYMaximumDryBulbTemp', 'DAILYMinimumDryBulbTemp',
'DAILYAverageDryBulbTemp', 'DAILYDeptFromNormalAverageTemp',
'DAILYAverageRelativeHumidity', 'DAILYAverageDewPointTemp',
'DAILYAverageWetBulbTemp', 'DAILYHeatingDegreeDays',
'DAILYCoolingDegreeDays', 'DAILYSunrise', 'DAILYSunset',
'DAILYWeather', 'DAILYPrecip', 'DAILYSnowfall', 'DAILYSnowDepth',
'DAILYAverageStationPressure', 'DAILYAverageSeaLevelPressure',
'DAILYAverageWindSpeed', 'DAILYPeakWindSpeed', 'PeakWindDirection',
'DAILYSustainedWindSpeed', 'DAILYSustainedWindDirection',
'MonthlyMaximumTemp', 'MonthlyMinimumTemp', 'MonthlyMeanTemp',
'MonthlyAverageRH', 'MonthlyDewpointTemp', 'MonthlyWetBulbTemp',
'MonthlyAvgHeatingDegreeDays', 'MonthlyAvgCoolingDegreeDays',
'MonthlyStationPressure', 'MonthlySeaLevelPressure',
'MonthlyAverageWindSpeed', 'MonthlyTotalSnowfall',
'MonthlyDeptFromNormalMaximumTemp',
'MonthlyDeptFromNormalMinimumTemp',
'MonthlyDeptFromNormalAverageTemp', 'MonthlyDeptFromNormalPrecip',
'MonthlyTotalLiquidPrecip', 'MonthlyGreatestPrecip',
'MonthlyGreatestPrecipDate', 'MonthlyGreatestSnowfall',
'MonthlyGreatestSnowfallDate', 'MonthlyGreatestSnowDepth',
'MonthlyGreatestSnowDepthDate', 'MonthlyDaysWithGT90Temp',
'MonthlyDaysWithLT32Temp', 'MonthlyDaysWithGT32Temp',
'MonthlyDaysWithLT0Temp', 'MonthlyDaysWithGT001Precip',
'MonthlyDaysWithGT010Precip', 'MonthlyDaysWithGT1Snow',
'MonthlyMaxSeaLevelPressureValue', 'MonthlyMaxSeaLevelPressureDate',
'MonthlyMaxSeaLevelPressureTime', 'MonthlyMinSeaLevelPressureValue',
'MonthlyMinSeaLevelPressureDate', 'MonthlyMinSeaLevelPressureTime',
'MonthlyTotalHeatingDegreeDays', 'MonthlyTotalCoolingDegreeDays',
'MonthlyDeptFromNormalHeatingDD', 'MonthlyDeptFromNormalCoolingDD',
'MonthlyTotalSeasonToDateHeatingDD',
'MonthlyTotalSeasonToDateCoolingDD'], dtype=object)
In [97]:
ohare_rain_df = rain_df[['STATION', 'STATION_NAME', 'DATE', 'HOURLYPrecip']]
ohare_rain_df['datetime'] = pd.to_datetime(ohare_rain_df['DATE'])
ohare_rain_df = ohare_rain_df.set_index(pd.DatetimeIndex(ohare_rain_df['datetime']))
ohare_rain_df.head()
d:\data_science_projects\chicagorain\virtualenvs\nyear-venv\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
Out[97]:
STATION
STATION_NAME
DATE
HOURLYPrecip
datetime
1946-10-01 01:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1946-10-01 01:00
NaN
1946-10-01 01:00:00
1946-10-01 02:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1946-10-01 02:00
NaN
1946-10-01 02:00:00
1946-10-01 03:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1946-10-01 03:00
NaN
1946-10-01 03:00:00
1946-10-01 04:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1946-10-01 04:00
NaN
1946-10-01 04:00:00
1946-10-01 05:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1946-10-01 05:00
NaN
1946-10-01 05:00:00
In [98]:
# There is no hourly precipitation data before 1970.
ohare_rain_df = ohare_rain_df['19700101':]
ohare_rain_df.head()
Out[98]:
STATION
STATION_NAME
DATE
HOURLYPrecip
datetime
1970-01-01 03:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1970-01-01 03:00
0
1970-01-01 03:00:00
1970-01-01 06:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1970-01-01 06:00
0
1970-01-01 06:00:00
1970-01-01 09:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1970-01-01 09:00
0.01
1970-01-01 09:00:00
1970-01-01 12:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1970-01-01 12:00
0
1970-01-01 12:00:00
1970-01-01 15:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1970-01-01 15:00
0
1970-01-01 15:00:00
In [99]:
# Some of the hourly precipitation values are not actually numbers
def find_bad_values():
dates_with_bad_precip = []
for index, row in ohare_rain_df.iterrows():
try:
float(row['HOURLYPrecip'])
except:
dates_with_bad_precip.append(str(row.datetime))
return dates_with_bad_precip
ohare_rain_df[ohare_rain_df.index.isin(find_bad_values())]
Out[99]:
STATION
STATION_NAME
DATE
HOURLYPrecip
datetime
1973-01-03 14:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 14:00
0.09s
1973-01-03 14:00:00
1973-01-03 15:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 15:00
0.26s
1973-01-03 15:00:00
1973-01-03 16:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 16:00
0.16s
1973-01-03 16:00:00
1973-01-03 17:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 17:00
0.06s
1973-01-03 17:00:00
1973-01-03 18:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 18:00
0.01s
1973-01-03 18:00:00
1973-01-03 19:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 19:00
0.01s
1973-01-03 19:00:00
1973-01-03 20:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 20:00
0.01s
1973-01-03 20:00:00
1973-01-03 21:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 21:00
0.01s
1973-01-03 21:00:00
1973-01-03 22:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-03 22:00
0.01s
1973-01-03 22:00:00
1973-01-19 00:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-01-19 00:00
0.01s
1973-01-19 00:00:00
1973-03-03 02:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-03-03 02:00
0.01s
1973-03-03 02:00:00
1973-03-05 00:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-03-05 00:00
0.01s
1973-03-05 00:00:00
1973-03-09 09:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-03-09 09:00
0.01s
1973-03-09 09:00:00
1973-03-10 18:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-03-10 18:00
0.09s
1973-03-10 18:00:00
1973-03-16 18:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-03-16 18:00
0.01s
1973-03-16 18:00:00
1973-03-31 21:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-03-31 21:00
0.16s
1973-03-31 21:00:00
1973-04-09 03:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-04-09 03:00
0.04s
1973-04-09 03:00:00
1973-04-11 18:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-04-11 18:00
0.01s
1973-04-11 18:00:00
1973-04-29 03:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-04-29 03:00
0.01s
1973-04-29 03:00:00
1973-05-14 06:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-05-14 06:00
0.13s
1973-05-14 06:00:00
1973-05-22 03:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-05-22 03:00
0.01s
1973-05-22 03:00:00
1973-05-29 21:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-05-29 21:00
0.01s
1973-05-29 21:00:00
1973-06-05 09:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-06-05 09:00
0.01s
1973-06-05 09:00:00
1973-06-05 15:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-06-05 15:00
0.17s
1973-06-05 15:00:00
1973-06-23 12:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-06-23 12:00
0.05s
1973-06-23 12:00:00
1973-06-27 21:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-06-27 21:00
0.32s
1973-06-27 21:00:00
1973-08-14 03:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-08-14 03:00
0.01s
1973-08-14 03:00:00
1973-08-23 21:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-08-23 21:00
0.01s
1973-08-23 21:00:00
1973-09-16 12:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-09-16 12:00
0.01s
1973-09-16 12:00:00
1973-09-24 18:00:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
1973-09-24 18:00
0.26s
1973-09-24 18:00:00
...
...
...
...
...
...
2016-08-13 01:47:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-13 01:47
T
2016-08-13 01:47:00
2016-08-13 01:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-13 01:51
T
2016-08-13 01:51:00
2016-08-13 02:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-13 02:51
T
2016-08-13 02:51:00
2016-08-15 23:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-15 23:51
T
2016-08-15 23:51:00
2016-08-16 00:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-16 00:51
T
2016-08-16 00:51:00
2016-08-16 01:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-16 01:51
T
2016-08-16 01:51:00
2016-08-16 02:17:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-16 02:17
T
2016-08-16 02:17:00
2016-08-16 02:40:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-16 02:40
T
2016-08-16 02:40:00
2016-08-17 15:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-17 15:51
T
2016-08-17 15:51:00
2016-08-18 05:03:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-18 05:03
T
2016-08-18 05:03:00
2016-08-18 06:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-18 06:51
T
2016-08-18 06:51:00
2016-08-18 23:07:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-18 23:07
T
2016-08-18 23:07:00
2016-08-19 02:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-19 02:51
T
2016-08-19 02:51:00
2016-08-19 08:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-19 08:51
0.04s
2016-08-19 08:51:00
2016-08-20 08:40:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 08:40
T
2016-08-20 08:40:00
2016-08-20 08:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 08:51
T
2016-08-20 08:51:00
2016-08-20 09:18:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 09:18
T
2016-08-20 09:18:00
2016-08-20 09:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 09:51
T
2016-08-20 09:51:00
2016-08-20 11:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 11:51
T
2016-08-20 11:51:00
2016-08-20 12:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 12:51
T
2016-08-20 12:51:00
2016-08-20 16:29:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-20 16:29
T
2016-08-20 16:29:00
2016-08-24 06:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-24 06:51
T
2016-08-24 06:51:00
2016-08-24 08:58:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-24 08:58
T
2016-08-24 08:58:00
2016-08-24 23:44:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-24 23:44
T
2016-08-24 23:44:00
2016-08-24 23:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-24 23:51
0.04s
2016-08-24 23:51:00
2016-08-25 03:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-25 03:51
T
2016-08-25 03:51:00
2016-08-26 23:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-26 23:51
T
2016-08-26 23:51:00
2016-08-27 00:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-27 00:51
T
2016-08-27 00:51:00
2016-08-27 08:39:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-27 08:39
T
2016-08-27 08:39:00
2016-08-27 08:51:00
WBAN:94846
CHICAGO OHARE INTERNATIONAL AIRPORT IL US
2016-08-27 08:51
T
2016-08-27 08:51:00
23574 rows × 5 columns
In [100]:
# Some of the Hourly precipition values have a value of T, which means Trace. This should be 0.
# Some have a number followed by 's'. We still need to figure out what this means. (A GitHub issue opened). For now,
# we will set these to 0 as well
ohare_rain_df.loc[ohare_rain_df.index.isin(find_bad_values()), 'HOURLYPrecip'] = 0.0
ohare_rain_df[ohare_rain_df.index.isin(find_bad_values())]
Out[100]:
STATION
STATION_NAME
DATE
HOURLYPrecip
datetime
In [101]:
# Write CSV file
ohare_rain_df.to_csv('data/ohare_hourly_20160929.csv', index=False)
In [ ]:
Content source: NORCatUofC/rain
Similar notebooks: