In [1]:
from datetime import datetime
import urllib2
import pandas as pd
import pandas.io.data
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
pd.set_option('max_columns', 50)
%matplotlib inline
In [2]:
run_console = True
update_from_web = True
plot_on = True
run_console = True
In [3]:
if update_from_web:
url = 'http://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt'
res = urllib2.Request(url)
csvio = urllib2.urlopen(res)
df = pd.read_csv(csvio, index_col=['YR', 'SEAS'], sep='\s+', usecols=['SEAS', 'YR','TOTAL','ANOM'])
df.to_csv('ONI.csv')
url = 'http://www.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/detrend.nino34.ascii.txt'
res = urllib2.Request(url)
csvio = urllib2.urlopen(res)
df = pd.read_csv(csvio, sep='\s+', index_col=['YR', 'MON'], usecols=['MON','YR','TOTAL','ANOM','ClimAdjust'])
df.to_csv('NINO34.csv')
In [4]:
oni = pd.read_csv('ONI.csv')
nino = pd.read_csv('NINO34.csv')
data = pd.read_excel('DGAregionIV.xls', 'Hoja1', index_col=0, parse_dates=True)
In [5]:
format = "%m/%Y"
date = pd.to_datetime(nino.MON.astype(str) + '/' + nino.YR.astype(str) , format=format)
nino.set_index(date, inplace=True)
# and cleanup
nino.drop(['MON','YR'], axis=1, inplace=True)
In [6]:
rng = pd.date_range('1/1/1950', periods=len(oni), freq='MS')
oni.set_index(rng, inplace=True)
# and cleanup
oni.drop(['SEAS','YR'], axis=1, inplace=True)
In [7]:
oni.plot(figsize=(10, 10), subplots=True, sharex=True)
Out[7]:
In [8]:
nino.plot(figsize=(10, 10), subplots=True, sharex=True)
Out[8]:
In [9]:
data.plot(figsize=(10, 10), subplots=True, sharex=True)
Out[9]:
In [10]:
oni.info()
print '\n'
nino.info()
print '\n'
data.info()
In [11]:
print oni.describe()
print nino.describe()
print data.describe()
Warm (red) and cold (blue) episodes based on a threshold of +/- 0.5oC for the Oceanic Niño Index (ONI)
http://www.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/ensoyears.shtml
In [12]:
data[data.Snow_Laguna > data.Snow_Laguna.quantile(0.75)]
Out[12]:
In [13]:
data[data.Q_Elqui > data.Q_Elqui.quantile(0.75)]
Out[13]:
In [14]:
data[data.Snow_Laguna < data.Snow_Laguna.quantile(0.25)]
Out[14]:
In [15]:
data[data.Q_Elqui < data.Q_Elqui.quantile(0.25)]
Out[15]:
In [16]:
df = oni.drop('TOTAL', 1)
df.index.name = 'date'
df['Year'] = df.index.year
df['Month'] = df.index.month
df = df.pivot(index='Year', columns='Month', values='ANOM')
df.head()
Out[16]:
In [17]:
wet_years = data[data.Snow_Laguna > data.Snow_Laguna.quantile(0.75)].index.year
(df[4] - df[1]).loc[wet_years]
Out[17]:
In [18]:
df[4].loc[wet_years]
Out[18]:
In [19]:
dry_years = data[data.Snow_Laguna < data.Snow_Laguna.quantile(0.25)].index.year
(df[4] - df[1]).loc[dry_years]
Out[19]:
In [20]:
df[4].loc[dry_years]
Out[20]:
In [21]:
dataset = pd.DataFrame()
dataset['Trend'] = (df[4] - df[1])
dataset['April'] = df[4]
dataset.index = pd.to_datetime(dataset.index,format='%Y')
dataset['Snow_Laguna'] = data.Snow_Laguna
dataset.dropna(inplace=True)
dataset.tail()
Out[21]:
In [22]:
dataset.plot(figsize=(10, 10), subplots=True, sharex=True)
Out[22]:
Define feature matrix and a label vector
In [23]:
X = dataset[['Trend','April']].values
y = dataset[['Snow_Laguna']].values.ravel()
print X.shape
print y.shape
In [24]:
from sklearn import metrics
In [25]:
from sklearn.naive_bayes import GaussianNB
# Instantiate the estimator
clf = GaussianNB()
# Fit the estimator to the data, leaving out the last five samples
clf.fit(X[:-5], y[:-5])
# Use the model to predict the last several labels
y_pred = clf.predict(X[-5:])
print y_pred
print y[-5:]
print 'Accuracy: %s' % metrics.accuracy_score(y[-5:], y_pred)
In [26]:
from sklearn import linear_model
# Instantiate the estimator
clf = linear_model.BayesianRidge()
# Fit the estimator to the data, leaving out the last five samples
clf.fit(X[:-5], y[:-5])
# Use the model to predict the last several labels
y_pred = clf.predict(X[-5:])
print y_pred
print y[-5:]
In [27]:
from sklearn import linear_model
# Instantiate the estimator
clf = linear_model.LinearRegression()
# Fit the estimator to the data, leaving out the last five samples
clf.fit(X[:-5], y[:-5])
# Use the model to predict the last several labels
y_pred = clf.predict(X[-5:])
print y_pred
print y[-5:]
In [28]:
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure import TanhLayer
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
In [29]:
net = buildNetwork(2, 3, 1, bias=False, hiddenclass=TanhLayer)
net.activate([2, 1])
Out[29]:
In [30]:
ds = SupervisedDataSet(2, 1)
for i in np.arange(len(y[:-5])):
ds.addSample(X[i], y[i])
In [31]:
for inpt, target in ds:
print inpt, target
In [32]:
trainer = BackpropTrainer(net, ds)
trainer.train()
Out[32]:
In [33]:
print 'predicted vs real'
for i in len(y) - np.arange(1,len(y[-5:])):
print '%s vs %s' % (net.activate(X[i]), y[i])
In [34]:
if run_console:
%qtconsole