In [9]:
%pylab inline
import pandas as pd
from pykalman import KalmanFilter

df = pd.read_csv("../data/ChungCheonDC/CompositeETCdata.csv")
df_DC = pd.read_csv("../data/ChungCheonDC/CompositeDCdata.csv")
df_DCprc = pd.read_csv("../data/ChungCheonDC/CompositeDCdata_processed.csv")
df_DCstd = pd.read_csv("../data/ChungCheonDC/CompositeDCstddata.csv")


Populating the interactive namespace from numpy and matplotlib

In [10]:
# missininds = np.arange(df_DC[electrodeID[elecind]].values.size)[np.isnan(df_DC[electrodeID[elecind]].values)]
electrodeID = df_DC.keys()[1:-1]

In [11]:
from scipy import interpolate
sys.path.append("../codes/")
from DCdata import readReservoirDC_all
directory = "../data/ChungCheonDC/"
dat_temp,height_temp, ID = readReservoirDC_all(directory+"20151231180000.apr")
locs = dat_temp[:,:4]
mida = locs[:,:2].sum(axis=1)
midb = locs[:,2:].sum(axis=1)
mid = (mida + midb)*0.5
dz = mida-midb
x = np.linspace(mid.min(), mid.max(), 100)
z = np.linspace(dz.min(), dz.max(), 100)
grid_x, grid_z = np.meshgrid(x,z)

def vizDCtimeSeries(idatum, itime, itime_ref, colors, flag, df_DC):
    fig = plt.figure(figsize = (12, 12))
    ax1 = plt.subplot(411)
    ax2 = plt.subplot(412)
    
    valsratio = df_DC[electrodeID].values[itime,:].flatten() / df_DC[electrodeID].values[itime_ref,:].flatten()
    valsDC = np.log10(df_DC[electrodeID].values[itime,:].flatten())
    valsDCstd = df_DCstd[electrodeID].values[itime,:].flatten()
    grid_rho_ratio = griddata(mid, dz, valsratio, grid_x, grid_z, interp='linear')
    grid_rho_ratio = grid_rho_ratio.reshape(grid_x.shape)
    if flag =="std":
        vmin, vmax = 0, 10
        grid_rho = griddata(mid, dz, valsDCstd, grid_x, grid_z, interp='linear')        
    elif flag =="rho":
        vmin, vmax = np.log10(20), np.log10(200)
        grid_rho = griddata(mid, dz, valsDC, grid_x, grid_z, interp='linear')
    grid_rho = grid_rho.reshape(grid_x.shape)
        
    
    ax1.contourf(grid_x, grid_z, grid_rho, 200, vmin =vmin, vmax = vmax, clim=(vmin, vmax), cmap="jet")    
    vmin, vmax = 0.9, 1.1
    ax2.contourf(grid_x, grid_z, grid_rho_ratio, 200, vmin =vmin, vmax = vmax, clim=(vmin, vmax), cmap="jet")        
    ax1.scatter(mid, dz, s=20, c = valsDC, edgecolor="None", vmin =vmin, vmax = vmax, clim=(vmin, vmax))
    ax1.plot(mid, dz, 'k.')
    ax2.scatter(mid, dz, s=20, c = valsratio, edgecolor="None", vmin =vmin, vmax = vmax, clim=(vmin, vmax))
    ax2.plot(mid, dz, 'k.')
    
    for i in range(len(colors)):
        ax1.plot(mid[idatum[i]], dz[idatum[i]], 'o', color=colors[i])    
        ax2.plot(mid[idatum[i]], dz[idatum[i]], 'o', color=colors[i])    
        

    ax3 = plt.subplot(413)
    ax3_1 = ax3.twinx()
    df.plot(x='date', y='reservoirH', ax=ax3_1, color='k', linestyle='-', lw=2)
    df.plot(x='date', y='upperH_med', ax=ax3_1, color='b', linestyle='-', lw=2)
    df.plot(x='date', y='Temp (degree)', ax=ax3, color='r', linestyle='-', lw=2)
    df.plot(x='date', y='Rainfall (mm)', ax=ax3, color='b', linestyle='-', marker="o", lw=2)   # ms=4)
    ax3.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
    ax3_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
    itime_ref0 = itime_ref
    itime_ref1 = itime
    ax3.plot(np.r_[itime_ref0, itime_ref0], np.r_[-5, 40], 'k--', lw=2)
    ax3.plot(np.r_[itime_ref1, itime_ref1], np.r_[-5, 40], 'k--', lw=2)

    ax4 = plt.subplot(414)
    ax4_1 = ax4.twinx()
    ax4.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
    ax4.set_yscale('log')
    temp = df_DC[electrodeID[elecind]].values
    vmax = np.median(temp[~np.isnan(temp)]) + np.std(temp[~np.isnan(temp)])*20
    vmin = np.median(temp[~np.isnan(temp)]) - np.std(temp[~np.isnan(temp)])*20
    ax4.plot(np.r_[itime_ref1, itime_ref1], np.r_[vmin, vmax], 'k--', lw=2)
    ax4.plot(np.r_[itime_ref0, itime_ref0], np.r_[vmin, vmax], 'k--', lw=2)
    
    df_DC.plot(x='date', y=electrodeID[idatum], ax=ax4,color='r')
    df.plot(x='date', y='Rainfall (mm)', ax=ax4_1, color='b', linestyle='-', marker="o", ms=4)
  
    ax4.set_ylim(vmin, vmax)

In [12]:
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df.plot(figsize=(12,3), x='date', y='Temp (degree)', ax=ax1_1, color='r', linestyle='-', lw=2)

df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1, color='k', linestyle='-', lw=3)
df.plot(figsize=(12,3), x='date', y='upperH_med', ax=ax1, color='b', linestyle='-', lw=2)
#df.plot(figsize=(12,3), x='date', y='Temp (degree)', ax=ax1_1, color='r', linestyle='-', lw=2)
ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
ax1_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
itime_ref0 = 351
itime_ref1 = 257
itime_ref2 = 66

ax1_1.plot(np.r_[itime_ref0, itime_ref0], np.r_[-5, 35], 'k-')
ax1_1.plot(np.r_[itime_ref1, itime_ref1], np.r_[-5, 35], 'k-')
ax1_1.plot(np.r_[itime_ref2, itime_ref2], np.r_[-5, 35], 'k-')

ax1.set_ylabel("Water height (m)")
ax1_1.set_ylabel("Temperature (degree)")
print df['date'].values[itime_ref0]
print df['date'].values[itime_ref1]
print df['date'].values[itime_ref2]


2015-12-18
2015-09-15
2015-03-08

In [13]:
elecind = [1, 10, 30, 50, 70, 90]

In [14]:
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df_DC.plot(figsize=(12,3), x='date', y=electrodeID[elecind], ax=ax1, colors=['k', 'k', 'k'])
df.plot(figsize=(12,3), x='date', y='Temp (degree)', ax=ax1_1, color='r', linestyle='-', lw=2)
ax1.legend(("DC data", ), loc=3, bbox_to_anchor=(1.05, 0.7))
ax1_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
ax1.set_yscale('linear')
ax1.grid(True)
ax1.set_ylabel("Apparent resistivity (ohm-m)")
ax1_1.set_ylabel("Temperature (degree)", color="red")
ax1.set_ylim(50, 100)


/Users/sklim/anaconda/lib/python2.7/site-packages/pandas/tools/plotting.py:968: UserWarning: 'colors' is being deprecated. Please use 'color'instead of 'colors'
  warnings.warn(("'colors' is being deprecated. Please use 'color'"
Out[14]:
(50, 100)

In [15]:
# ax1 = plt.subplot(111)
# df_DCstd.plot(figsize=(12,3), x='date', y=electrodeID[elecind], ax=ax1, colors=['k', 'b', 'r'], linestyle="-", marker='.', lw=1)
# ax1.set_yscale('log')
# ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))

In [16]:
txrxID =  df_DC.keys()[1:-1]
xmasking = lambda x: np.ma.masked_where(np.isnan(x.values), x.values)

In [17]:
#x= electrodeID[elecind] 
x= df_DC[txrxID]
max3 = pd.rolling_max(x, 3)


/Users/sklim/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: pd.rolling_max is deprecated for DataFrame and will be removed in a future version, replace with 
	DataFrame.rolling(window=3,center=False).max()
  app.launch_new_instance()

In [18]:
from ipywidgets import interact

In [19]:
# making matrix like max3 (but with zeros)
newdata = np.zeros_like(max3)

In [20]:
newdata.shape


Out[20]:
(365, 380)

In [21]:
ndata = newdata.shape[1]
for i in range(ndata):
    x= df_DC[txrxID[i]]
    #median10 = pd.rolling_median(x, 6)
    mean10 = pd.rolling_max(x, 3)
    # Masking array having NaN
    xm = xmasking(mean10)
    kf = KalmanFilter(transition_matrices = [1],
                      observation_matrices = [1],
                      initial_state_mean = x[0],
                      initial_state_covariance = 1,
                      observation_covariance=1,
                      transition_covariance=1)
    # Use the observed values of the price to get a rolling mean
    state_means, _ = kf.filter(xm)    
    newdata[:,i] = state_means.flatten()


/Users/sklim/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:5: FutureWarning: pd.rolling_max is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=3,center=False).max()

In [22]:
df_DC_new = df_DC.copy()  
for i,index in enumerate(txrxID):
    df_DC_new.loc[:,index] = newdata[:,i].flatten()
# df_DC_new.to_csv("../data/ChungCheonDC/CompositeDCdata_processed.csv")

In [23]:
from ipywidgets import interact, IntSlider, ToggleButtons
itime = 201
itime_ref = 201
print df['date'].values[itime]
elecind = [5, 150,200]
 

# vizDCtimeSeries(elecind, itime, itime_ref, ['k','b','r'])
viz = lambda idatum, itime, flag: vizDCtimeSeries([idatum], itime, itime_ref, ['r'], flag, df_DC_new)
interact(viz, idatum=IntSlider(min=0, max=379, step=1, value=144)\
         ,itime=IntSlider(min=0, max=360, step=1, value=201)\
         ,flag=ToggleButtons(options=["std", "rho"]))


/Users/sklim/anaconda/lib/python2.7/site-packages/matplotlib/axes/_axes.py:531: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "
Out[23]:
<function __main__.<lambda>>

In [24]:
print df['date'].values[itime_ref]


2015-07-21

In [25]:
for i in range(0,379,100):
    x= df_DC[txrxID[i]]
    x1 = df_DC_new[txrxID[i]]
    plt.plot(newdata[:,i], 'k')
    plt.plot(x1, 'ro')    
    plt.plot(x, 'k.', ms=2)



In [26]:
i = 245
x= df_DC[txrxID[i]]
#median10 = pd.rolling_median(x, 6)
mean10 = pd.rolling_max(x, 3)
#x1 = median10
#x2 = mean10
# Masking array having NaN
xm = xmasking(mean10)
# Construct a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
                  observation_matrices = [1],
                  initial_state_mean = 67.6,
                  initial_state_covariance = 1,
                  observation_covariance=1,
                  transition_covariance=1)
# Use the observed values of the price to get a rolling mean
state_means, _ = kf.filter(xm)

#plt.plot(x1)
plt.plot(x)
#plt.plot(x1)
#plt.plot(x2)
plt.plot(state_means)
plt.legend([  'origin x','Kalman'])


/Users/sklim/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: FutureWarning: pd.rolling_max is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=3,center=False).max()
Out[26]:
<matplotlib.legend.Legend at 0x1136e2c90>

In [27]:
y='Rainfall (mm)'

In [28]:
print y


Rainfall (mm)

In [29]:
i = 144


x= df_DC[txrxID[i]]
#median10 = pd.rolling_median(x, 6)
mean10 = pd.rolling_max(x, 3)
#x1 = median10
#x2 = mean10
# Masking array having NaN
xm = xmasking(mean10)
# Construct a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
                  observation_matrices = [1],
                  initial_state_mean = 67.6,
                  initial_state_covariance = 1,
                  observation_covariance=1,
                  transition_covariance=1)
# Use the observed values of the price to get a rolling mean
state_means, _ = kf.filter(xm)

#plt.plot(x1)
plt.plot(x)
#plt.plot(x1)
#plt.plot(x2)
plt.plot(state_means)
plt.legend([  'origin x','Kalman'])


/Users/sklim/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:6: FutureWarning: pd.rolling_max is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(window=3,center=False).max()
Out[29]:
<matplotlib.legend.Legend at 0x118b550d0>

In [140]:
print len(x)


365

In [142]:
print len(state_means)


365

In [167]:
import numpy as np
import scipy.stats as st

In [172]:
n = 365 # length of the data
x = np.random.randn(n)
#y = 3 + 7*x + np.random.randn(n)
#y = 3 + 7*x + np.random.normal(n)
y = state_means

In [173]:
print x


[-1.1551447  -0.44233704  0.10344756  1.71608357  0.2101117  -1.22296673
 -1.82655804  2.25852852 -0.35031816  1.208164   -1.4344196   0.79663711
  1.32978715  0.5448117  -0.40452852  0.11320164 -0.90166329 -1.15877357
  0.53968305 -0.09483759 -0.79248081  0.26521558  0.49397353 -0.94569757
 -0.35801037  0.69989054  0.77919052  1.13988183 -1.81607878  0.00939741
  0.15499373 -1.07203573 -0.50727445  0.35608963 -0.16347872 -0.29372557
  1.18869344  1.22193079  0.28494744 -0.23646861 -1.00711097  2.32401238
  1.09829472  0.44037173 -0.24824384  1.49565798 -0.62655019  1.59769417
 -0.2785214   0.14027341  2.87011916 -0.60958137  0.40295734  1.27251062
  0.40310813  0.49666941  0.84759447  1.5326413  -0.91853129 -1.18824321
 -0.04821557  1.62997686  0.13862149  1.22734584 -1.73880222 -1.40437923
 -0.73132701 -1.50291045 -0.81913506  0.2676008  -0.21514476 -0.6641118
  1.2068055  -0.01188688 -0.22436817  0.80067991 -0.77026737  0.91658988
  0.11385895 -0.063139   -0.72017279  1.22255445 -0.53321895 -0.96194444
 -2.89545917  0.80085921 -2.03784276 -0.31143747 -0.63126812 -0.33271722
  0.27723466  2.27622091  1.48676572 -0.45081297 -0.33572093 -0.82693058
  1.11842692 -0.4949207  -0.3602442   0.82538675  0.4008481   0.90581242
  1.54916184 -0.97119587 -0.2492831   1.31277601 -0.20530249 -0.51254797
  1.6741721   0.02679046  0.38983757  1.35822655 -0.9982977  -0.25613868
  0.76393039  0.6466455  -0.17415165 -1.16264082 -1.07451894  0.17388936
 -1.64387053 -0.36375117 -1.17591339  2.01405965 -0.03816174 -0.57114985
 -2.26101728 -0.55270391  0.14582991  1.92761039 -0.76044345 -0.13597464
  0.09095166  1.05547316  0.48721539 -0.89697358 -0.35128436  0.63058289
 -0.84729268  1.10398343  0.24682064 -0.81972291 -0.78142957  0.72779582
  0.69287872  0.01487174 -0.61238618  0.3684079   1.56381072 -0.35714233
  1.90786095  0.18997778 -1.03925069  0.09228235  2.39049932 -1.47497331
  0.05506195 -0.42333387  0.44447129 -0.09220863  1.78858105  1.03530583
 -1.39117747 -1.46411188 -0.68404678 -0.058669    1.67959674 -0.48684488
 -1.4217599  -1.0828606   0.35813117  0.81149345  0.98738691 -1.50439303
  0.58717877 -0.22490474  0.28829512  0.75907018 -0.23298776  0.30960746
  0.40598993  1.31536888  0.65904597 -0.39031987 -2.01438589 -0.58728219
  2.60492511  1.19655689 -0.64236319 -1.3575518   1.43355982  1.82825683
  0.65272289 -0.35816478 -2.48911701 -1.10982214 -0.85805571 -0.79410148
  1.02251116  0.69663686 -1.65151249  0.61832904  1.31505775  0.71442872
 -0.71837388 -0.67128488 -1.54327998 -0.30411826  1.07126121 -1.25759478
 -0.22119518 -0.21666481  0.40073547 -0.67968513  1.2182394  -1.36707552
  0.20507559  1.21633878 -1.41050155  0.66364198  1.08871383  0.64522685
 -0.23866838 -0.11101051  0.78309835 -1.51829482  0.07419622 -0.30260749
  0.0162081  -0.20267653  1.07956484 -0.93706812  0.64676893 -0.11757598
  1.82596511 -0.20667126  1.35470608 -0.2281442   0.68742275 -0.122401
  0.47638835  1.08165783  0.17032336 -0.70839697 -0.08682871 -2.20132471
  0.76785889 -0.08230324 -0.1340065   0.14166887  0.04086835  1.01240688
 -0.38841515  0.57350781 -0.96279134 -0.05274343  2.35722383 -1.25115304
  1.14224013  1.54162279 -1.74799582  1.08131444 -0.41645449 -0.51569427
  0.33785382  0.92927578  1.43049326 -0.91711958 -0.56033552 -0.83244318
  0.36051973  0.42254593 -0.68559548 -1.00731132 -0.9966182   1.5764421
 -0.47407587 -0.32020059 -0.64503261  0.88185592  0.15929361 -0.33035318
  0.39179713  0.79767352 -0.04945525  0.01637318  0.14615601 -1.11803041
  0.30840809 -1.52312843 -0.39380961 -0.07335254  0.63673294 -1.18954012
 -0.07774506 -0.37293941  0.53021962 -0.27097341 -0.1055483  -0.05173193
  0.15511819  1.25103757 -0.20164875  0.53732397 -1.30946239  0.58521044
 -0.02106986  0.20317876  0.17799678  0.40709239  0.0701095  -0.01032325
 -0.2231875  -0.56770761 -0.04398235 -0.10087058  0.80671114  0.181622
  0.1557251  -1.73029434 -0.14759803  0.02500287 -0.0236584  -0.60300496
  1.02383392  0.22154065 -0.4124931   0.79781092 -0.23708961 -2.27812304
 -0.51880989 -0.25153479 -1.08119467  0.65638207 -0.30858048  0.61528823
  0.5481487   0.23729368  1.04500238 -0.99726726  0.11794704 -0.57536886
 -0.27262523 -0.25049347 -0.81042113  1.60750214  1.27803834 -0.37446582
  1.25179841 -0.0317722  -1.32378089 -0.95258331 -1.03129152 -1.72436478
  1.17858458  1.3733254  -1.23668068 -0.52934333 -0.78633325 -1.23801741
 -0.39510476 -0.31040653  1.27671484  1.16282475 -0.54500454]

In [174]:
#print len(y)

In [175]:
plot(x,y)


Out[175]:
[<matplotlib.lines.Line2D at 0x118be7210>]

In [176]:
print yscale


<function yscale at 0x10a7fa9b0>

In [177]:
# perform linear regression

In [178]:
b, a, r, p, e = st.linregress(x, y)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-178-f6503334ea0b> in <module>()
----> 1 b, a, r, p, e = st.linregress(x, y)

/Users/sklim/anaconda/lib/python2.7/site-packages/scipy/stats/_stats_mstats_common.pyc in linregress(x, y)
     77 
     78     # average sum of squares:
---> 79     ssxm, ssxym, ssyxm, ssym = np.cov(x, y, bias=1).flat
     80     r_num = ssxym
     81     r_den = np.sqrt(ssxm * ssym)

/Users/sklim/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in cov(m, y, rowvar, bias, ddof, fweights, aweights)
   2430         if rowvar == 0 and y.shape[0] != 1:
   2431             y = y.T
-> 2432         X = np.vstack((X, y))
   2433 
   2434     if ddof is None:

/Users/sklim/anaconda/lib/python2.7/site-packages/numpy/core/shape_base.pyc in vstack(tup)
    228 
    229     """
--> 230     return _nx.concatenate([atleast_2d(_m) for _m in tup], 0)
    231 
    232 def hstack(tup):

ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [179]:
print(a,b)


(2.9839829066229959, 7.0948534501941314)

In [180]:
eps = y - a - b*x  # error of fitting and measured data
x1 = np.linspace(0, 1) # x axis to plot the PI
# variace of fitting error
e_pi = np.var(eps)*(1+1.0/n + (x1-x.mean())**2/np.sum((x-x.mean())**2))
# z value using the t distribution and with dof = n-2
z = st.t.ppf(0.95, n-2)
# prediction interval
pi = np.sqrt(e_pi)*z
zl = st.t.ppf(0.10, n-2) # z at 0.1
zu = st.t.ppf(0.90, n-2) # z at 0.9
ll = a + b*x1 + np.sqrt(e_pi)*zl # 10 %
ul = a + b*x1 + np.sqrt(e_pi)*zu # 90 %

In [181]:
import matplotlib.pyplot as plt
plt.plot(x,y,'ro', label='measured')
plt.plot(x1,ll,'--', label='10%')
plt.plot(x1,ul,'--', label='90%')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(loc='best')
plt.savefig('lin_regress.png')



In [ ]: