In [4]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # larger figure size
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
In [5]:
weather = pd.read_csv("https://raw.githubusercontent.com/mafudge/datasets/master/weather/syracuse-ny.csv")
In [6]:
weather.head()
Out[6]:
In [7]:
weather['Events'].unique()
Out[7]:
Let's get just the thunderstorms!
In [10]:
thunder = weather[ weather['Events'].str.find('Thunderstorm') >=0 ]
thunder.head()
Out[10]:
The percentage of days it thunders in Syracuse, historically
In [9]:
thunder.EST.count() / weather.EST.count()
Out[9]:
In [8]:
weather.columns
Out[8]:
What is the relationship between Temperature and Dewpoint?
In [11]:
weather.plot.scatter( x = 'Mean TemperatureF', y = 'MeanDew PointF')
Out[11]:
INSIGHT: Positive correlation between tem and dewpoint. Every meteroloogist knows this. ;-)
What is the relationship between cloud cover and visibility?
In [12]:
weather.plot.scatter( x = 'CloudCover', y = 'Mean VisibilityMiles')
Out[12]:
As one would expect the less cloud cover the greater visibility.
How about temperature and wind speed?
In [13]:
weather.plot.scatter( x = 'Mean TemperatureF', y = 'Mean Wind SpeedMPH')
Out[13]:
not much of an insight there, but...
when you look at the relationship on days where it thunders:
In [12]:
thunder.plot.scatter( x = 'Mean TemperatureF', y = 'Mean Wind SpeedMPH')
Out[12]:
We see that it doesn't really thunder when its cold out!
This plot it interesting. It shows when the temperature is cold, the wind isn't coming out of the south. Make sense for Syracuse.
In [14]:
weather.plot.scatter( x = 'Mean TemperatureF', y = 'WindDirDegrees')
Out[14]:
In [15]:
weather['Events'].unique()
Out[15]:
In [19]:
weather['Events'] = weather.Events.fillna('None')
In [20]:
weather['Diff TemperatureF'] = weather['Max TemperatureF'] - weather['Min TemperatureF']
In [21]:
import matplotlib
matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # larger figure size
In [22]:
weather['date'] = pd.to_datetime(weather.EST) # make timeseries data
In [23]:
# let's plot the temperature swings For may 2015
weather[weather['EST'].str.find("2015-5") >=0 ].plot.line( x = 'date', y = 'Diff TemperatureF')
Out[23]:
In [24]:
w2015 = weather[ weather.date > '2015-01-01']
w2015.plot.line(x = 'date', y =['Max TemperatureF', 'Min TemperatureF'] )
Out[24]:
In [ ]: