In [2]:
%pylab inline
data_dir = "./Data/Weather/"
In [3]:
!curl -o $data_dir/STAT.pickle http://mas-dse-open.s3.amazonaws.com/Weather/STAT.pickle
In [4]:
import pickle
STAT,STAT_description=pickle.load(open(data_dir+'/STAT.pickle','r'))
In [5]:
STAT.keys()
Out[5]:
In [6]:
STAT_description
Out[6]:
In [7]:
Scalars=['mean','std','low1000','low100','high100','high1000']
for meas in STAT.keys():
!grep $meas './Data/Weather/ghcnd-readme.txt'
S=STAT[meas]
for scalar in Scalars:
print '%s:%f'%(scalar,S[scalar]),
print
In [ ]:
In [8]:
def YearlyPlots(T,ttl='',yl='',xl='',y=None,x=None,size=(10,7), c=None):
yearday=[i for i in range(1,366)]
fig=figure(1,figsize=size,dpi=300)
if shape(T)[0] != 365:
raise ValueError("First dimension of T should be 365. Shape(T)="+str(shape(T)))
if c is not None:
plot_date(yearday,T, '-',color=c);
else:
plot_date(yearday,T, '-', );
# rotate and align the tick labels so they look better
#fig.autofmt_xdate()
plt.gca().xaxis.set_major_formatter( DateFormatter('%b') )
ylabel(yl)
xlabel(xl)
if y is not None:
ylim(y)
if x is not None:
xlim(x)
grid()
title(ttl)
In [9]:
figure(figsize=(15,30))
offset=1
for meas in STAT.keys():
subplot(6,3,offset)
offset+=1
S=STAT[meas]
pyplot.hist(S['SortedVals'],bins=np.arange((S['low100']), (S['high100']), 5))
subplot(6,3,offset)
offset+=1
## Your code for mean and mean +- std
std = sqrt(STAT[meas]["Var"])
mean_y = STAT[meas]['Mean']
std_plus_y = mean_y + std
std_minus_y = mean_y - std
YearlyPlots(mean_y,ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='r')
YearlyPlots(std_plus_y,ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='b')
YearlyPlots(std_minus_y,ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='b')
subplot(6,3,offset)
offset+=1
YearlyPlots(STAT[meas]['NE'], ttl="counts")
In [15]:
YearlyPlots(STAT["TMAX"]['NE'], ttl="counts", c='black')
Can you figure out what is the reason for these lower counts (especially at the beginning and end of the year and also the sudden dip at the end of each month)? Is it restricted to a subset of the stations? Suggest a way to remove this effect.
Can you Explain the counts per day for "SNWD" ?
Provide your explanation in new markdown cells appended after this cell. Support your explanation using code cells and graphs. If you need more data that is available only in the full dataset in the cloud but not in the data you have downloaded, contact your TA.
In [ ]:
In [ ]:
figure(figsize=(15,30))
offset=1
for meas in STAT.keys():
subplot(6,3,offset)
offset+=1
## Your code for percentage of variance explained
S=STAT[meas]
tvar = S['eigval'].sum()
explained_variance_ratio_ = S['eigval']/tvar
pyplot.plot(np.cumsum(explained_variance_ratio_[:10]))
subplot(6,3,offset)
offset+=1
## Your code for mean and mean +- std
std = sqrt(STAT[meas]["Var"])
mean_y = STAT[meas]['Mean']
std_plus_y = mean_y + std
std_minus_y = mean_y - std
YearlyPlots(mean_y,ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='r')
YearlyPlots(std_plus_y,ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='b')
YearlyPlots(std_minus_y,ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='b')
subplot(6,3,offset)
offset+=1
## Your code for top-3 eigenvectors
YearlyPlots(S['eigvec'][:,0], ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='r')
YearlyPlots(S['eigvec'][:,1], ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='g')
YearlyPlots(S['eigvec'][:,2], ttl=meas+' mean +-std',yl='',xl='',y=None,x=None,size=(10,7), c='b')
In [ ]: