The Crappiest Winter.
In [121]:
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline
In [122]:
#I'm grabbing a year's worth of data for each year, from November 1 of the prior year to October 31 of the "named" year.
url11 = "http://www.wunderground.com/history/airport/KMDW/2010/11/1/CustomHistory.html?dayend=31&monthend=10&yearend=2011&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=&format=1"
url12 = "http://www.wunderground.com/history/airport/KMDW/2011/11/1/CustomHistory.html?dayend=31&monthend=10&yearend=2012&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=&format=1"
url13 = "http://www.wunderground.com/history/airport/KMDW/2012/11/1/CustomHistory.html?dayend=31&monthend=10&yearend=2013&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=&format=1"
url14 = "http://www.wunderground.com/history/airport/KMDW/2013/11/1/CustomHistory.html?dayend=31&monthend=10&yearend=2014&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=&format=1"
url15 = "http://www.wunderground.com/history/airport/KMDW/2014/11/1/CustomHistory.html?dayend=31&monthend=10&yearend=2015&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=&format=1"
In [123]:
df_2011 = pd.read_csv(url11)
df_2012 = pd.read_csv(url12)
df_2013 = pd.read_csv(url13)
df_2014 = pd.read_csv(url14)
df_2015 = pd.read_csv(url15)
In [124]:
#Check to make sure all our columns are in the same format.
print (df_2011.columns == df_2012.columns)
print (df_2011.columns == df_2013.columns)
print (df_2011.columns == df_2014.columns)
print (df_2011.columns == df_2015.columns)
In [125]:
#Now let's check to see if the columns all contain the same object types.
print ("2011 data types")
print (df_2011.dtypes)
print ()
print ("2012 data types compared to 2011")
print (df_2011.dtypes == df_2012.dtypes)
print ()
print ("2013 data types compared to 2011")
print (df_2011.dtypes == df_2013.dtypes)
print ()
print ("2014 data types compared to 2011")
print (df_2011.dtypes == df_2014.dtypes)
print ()
print ("2015 data types compared to 2011")
print (df_2011.dtypes == df_2015.dtypes)
In [126]:
#Now before we go any further, let's clean up the damn leading spaces. (1/2)
print (df_2011.columns)
In [127]:
df_2011.columns = ['CDT', 'MaxTempF', 'MeanTempF', 'MinTempF', 'Max_DewPointF', 'Mean_DewPointF', 'Min_DewpointF', 'Max_Humidity', 'Mean_Humidity', 'Min_Humidity', 'Max_PressureIn', 'Mean_PressureIn', 'Min_PressureIn', 'Max_VisibilityMiles', 'Mean_VisibilityMiles', 'Min_VisibilityMiles', 'Max_Wind_SpeedMPH', 'Mean_Wind_SpeedMPH', 'Max_Gust_MPH', 'PrecipitationIn', 'CloudCover', 'Events', 'WindDirDegrees']
df_2012.columns = ['CDT', 'MaxTempF', 'MeanTempF', 'MinTempF', 'Max_DewPointF', 'Mean_DewPointF', 'Min_DewpointF', 'Max_Humidity', 'Mean_Humidity', 'Min_Humidity', 'Max_PressureIn', 'Mean_PressureIn', 'Min_PressureIn', 'Max_VisibilityMiles', 'Mean_VisibilityMiles', 'Min_VisibilityMiles', 'Max_Wind_SpeedMPH', 'Mean_Wind_SpeedMPH', 'Max_Gust_MPH', 'PrecipitationIn', 'CloudCover', 'Events', 'WindDirDegrees']
df_2013.columns = ['CDT', 'MaxTempF', 'MeanTempF', 'MinTempF', 'Max_DewPointF', 'Mean_DewPointF', 'Min_DewpointF', 'Max_Humidity', 'Mean_Humidity', 'Min_Humidity', 'Max_PressureIn', 'Mean_PressureIn', 'Min_PressureIn', 'Max_VisibilityMiles', 'Mean_VisibilityMiles', 'Min_VisibilityMiles', 'Max_Wind_SpeedMPH', 'Mean_Wind_SpeedMPH', 'Max_Gust_MPH', 'PrecipitationIn', 'CloudCover', 'Events', 'WindDirDegrees']
df_2014.columns = ['CDT', 'MaxTempF', 'MeanTempF', 'MinTempF', 'Max_DewPointF', 'Mean_DewPointF', 'Min_DewpointF', 'Max_Humidity', 'Mean_Humidity', 'Min_Humidity', 'Max_PressureIn', 'Mean_PressureIn', 'Min_PressureIn', 'Max_VisibilityMiles', 'Mean_VisibilityMiles', 'Min_VisibilityMiles', 'Max_Wind_SpeedMPH', 'Mean_Wind_SpeedMPH', 'Max_Gust_MPH', 'PrecipitationIn', 'CloudCover', 'Events', 'WindDirDegrees']
df_2015.columns = ['CDT', 'MaxTempF', 'MeanTempF', 'MinTempF', 'Max_DewPointF', 'Mean_DewPointF', 'Min_DewpointF', 'Max_Humidity', 'Mean_Humidity', 'Min_Humidity', 'Max_PressureIn', 'Mean_PressureIn', 'Min_PressureIn', 'Max_VisibilityMiles', 'Mean_VisibilityMiles', 'Min_VisibilityMiles', 'Max_Wind_SpeedMPH', 'Mean_Wind_SpeedMPH', 'Max_Gust_MPH', 'PrecipitationIn', 'CloudCover', 'Events', 'WindDirDegrees']
In [120]:
#now let's clean up the T-for-trace in the Precipitation column.
df_2011.dtypes
Out[120]:
In [134]:
allyears = [df_2011,df_2012,df_2013,df_2014,df_2015]
#THIS ISN'T WORKING AND I DON'T KNOW WHY.
for yr in allyears:
for i in yr["PrecipitationIn"]:
if "T" in str(i):
i = "0.01"
i = float(i)
In [135]:
for yr in allyears:
print(yr.dtypes)
In [136]:
df_2012["PrecipitationIn"].head
Out[136]:
In [ ]:
In [ ]:
In [114]:
df_2011[' CloudCover'].plot(kind="line")
df_2012[' CloudCover'].plot(kind="line")
df_2013[' CloudCover'].plot(kind="line")
df_2014[' CloudCover'].plot(kind="line")
df_2015[' CloudCover'].plot(kind="line")
Out[114]:
In [74]:
df_2011["misery"]=df_2011["Min TemperatureF"]*df_2011[" CloudCover"]
In [76]:
df_2011['misery'].plot(kind="line", color = 'red')
Out[76]:
In [77]:
df_2011.head()
Out[77]:
In [100]:
sum(df_2012["PrecipitationIn"])
In [99]:
[i for i in df_2012["PrecipitationIn"] if type(i) in [str]]
Out[99]:
In [89]:
[i for i in df_2012["PrecipitationIn"] if "T" in str(i)]
Out[89]:
In [88]:
df_2011["PrecipitationIn"].value_counts()
Out[88]:
In [ ]: