In [1]:
%pylab inline
import pandas as pd
from pykalman import KalmanFilter


Populating the interactive namespace from numpy and matplotlib

In [2]:
df = pd.read_csv("../data/ChungCheonDC/CompositeETCdata.csv")
df_DC = pd.read_csv("../data/ChungCheonDC/CompositeDCdata.csv")
df_DCprc = pd.read_csv("../data/ChungCheonDC/CompositeDCdata_processed1.csv")
df_DCstd = pd.read_csv("../data/ChungCheonDC/CompositeDCstddata.csv")

In [3]:
# missininds = np.arange(df_DC[electrodeID[elecind]].values.size)[np.isnan(df_DC[electrodeID[elecind]].values)]
electrodeID = df_DC.keys()[1:-1]

In [4]:
from scipy import interpolate
sys.path.append("../codes/")
from DCdata import readReservoirDC_all
directory = "../data/ChungCheonDC/"
dat_temp,height_temp, ID = readReservoirDC_all(directory+"20151231180000.apr")
locs = dat_temp[:,:4]
mida = locs[:,:2].sum(axis=1)
midb = locs[:,2:].sum(axis=1)
mid = (mida + midb)*0.5
dz = mida-midb
x = np.linspace(mid.min(), mid.max(), 100)
z = np.linspace(dz.min(), dz.max(), 100)
grid_x, grid_z = np.meshgrid(x,z)

def vizDCtimeSeries(idatum, itime, itime_ref, colors, flag, df_DC):
    fig = plt.figure(figsize = (12, 12))
    ax1 = plt.subplot(411)
    ax2 = plt.subplot(412)
    
    valsratio = df_DC[electrodeID].values[itime,:].flatten() / df_DC[electrodeID].values[itime_ref,:].flatten()
    valsDC = np.log10(df_DC[electrodeID].values[itime,:].flatten())
    valsDCstd = df_DCstd[electrodeID].values[itime,:].flatten()
    grid_rho_ratio = griddata(mid, dz, valsratio, grid_x, grid_z, interp='linear')
    grid_rho_ratio = grid_rho_ratio.reshape(grid_x.shape)
    if flag =="std":
        vmin, vmax = 0, 10
        grid_rho = griddata(mid, dz, valsDCstd, grid_x, grid_z, interp='linear')        
    elif flag =="rho":
        vmin, vmax = np.log10(20), np.log10(200)
        grid_rho = griddata(mid, dz, valsDC, grid_x, grid_z, interp='linear')
    grid_rho = grid_rho.reshape(grid_x.shape)
        
    
    ax1.contourf(grid_x, grid_z, grid_rho, 200, vmin =vmin, vmax = vmax, clim=(vmin, vmax), cmap="jet")    
    vmin, vmax = 0.9, 1.1
    ax2.contourf(grid_x, grid_z, grid_rho_ratio, 200, vmin =vmin, vmax = vmax, clim=(vmin, vmax), cmap="jet")        
    ax1.scatter(mid, dz, s=20, c = valsDC, edgecolor="None", vmin =vmin, vmax = vmax, clim=(vmin, vmax))
    ax1.plot(mid, dz, 'k.')
    ax2.scatter(mid, dz, s=20, c = valsratio, edgecolor="None", vmin =vmin, vmax = vmax, clim=(vmin, vmax))
    ax2.plot(mid, dz, 'k.')
    
    for i in range(len(colors)):
        ax1.plot(mid[idatum[i]], dz[idatum[i]], 'o', color=colors[i])    
        ax2.plot(mid[idatum[i]], dz[idatum[i]], 'o', color=colors[i])    
        

    ax3 = plt.subplot(413)
    ax3_1 = ax3.twinx()
    df.plot(x='date', y='reservoirH', ax=ax3_1, color='k', linestyle='-', lw=2)
    df.plot(x='date', y='upperH_med', ax=ax3_1, color='b', linestyle='-', lw=2)
    df.plot(x='date', y='Temp (degree)', ax=ax3, color='r', linestyle='-', lw=2)
    df.plot(x='date', y='Rainfall (mm)', ax=ax3, color='b', linestyle='-', marker="o", ms=4)
    ax3.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
    ax3_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
    itime_ref0 = itime_ref
    itime_ref1 = itime
    ax3.plot(np.r_[itime_ref0, itime_ref0], np.r_[-5, 40], 'k--', lw=2)
    ax3.plot(np.r_[itime_ref1, itime_ref1], np.r_[-5, 40], 'k--', lw=2)

    ax4 = plt.subplot(414)
    df_DC.plot(x='date', y=electrodeID[idatum], ax=ax4)
    ax4.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
    ax4.set_yscale('log')
    temp = df_DC[electrodeID[elecind]].values
    vmax = np.median(temp[~np.isnan(temp)]) + np.std(temp[~np.isnan(temp)])*3
    vmin = np.median(temp[~np.isnan(temp)]) - np.std(temp[~np.isnan(temp)])*3
    ax4.plot(np.r_[itime_ref1, itime_ref1], np.r_[vmin, vmax], 'k--', lw=2)
    ax4.plot(np.r_[itime_ref0, itime_ref0], np.r_[vmin, vmax], 'k--', lw=2)
    
    ax4.set_ylim(vmin, vmax)


Efficiency Warning: Interpolation will be slow, use setup.py!

            python setup.py build_ext --inplace
    

In [5]:
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1_1, color='k', linestyle='-', lw=2)
df.plot(figsize=(12,3), x='date', y='upperH_med', ax=ax1_1, color='b', linestyle='-', lw=2)
df.plot(figsize=(12,3), x='date', y='Temp (degree)', ax=ax1, color='r', linestyle='-', lw=2)
ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
ax1_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
itime_ref0 = 255
itime_ref1 = 115
ax1.plot(np.r_[itime_ref0, itime_ref0], np.r_[-5, 35], 'k-')
ax1.plot(np.r_[itime_ref1, itime_ref1], np.r_[-5, 35], 'k-')
# print df['date'].values[itime_ref]


Out[5]:
[<matplotlib.lines.Line2D at 0x9d60e48>]

In [6]:
# ax1 = plt.subplot(111)
# ax1_1 = ax1.twinx()
# df_DC.plot(figsize=(12,3), x='date', y=electrodeID[elecind], ax=ax1, colors=['k', 'b', 'r'])
# df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1_1, color='k', linestyle='-', lw=2)
# ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
# ax1_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
# ax1.set_yscale('linear')

In [7]:
# ax1 = plt.subplot(111)
# df_DCstd.plot(figsize=(12,3), x='date', y=electrodeID[elecind], ax=ax1, colors=['k', 'b', 'r'], linestyle="-", marker='.', lw=1)
# ax1.set_yscale('log')
# ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))

In [8]:
txrxID =  df_DC.keys()[1:-1]
xmasking = lambda x: np.ma.masked_where(np.isnan(x.values), x.values)

In [9]:
#x= electrodeID[elecind] 
x= df_DC[txrxID]
max3 = pd.rolling_max(x, 3)

In [10]:
#pd.rolling_max??

In [11]:
# plt.plot(x)
# plt.plot(max3)

In [12]:
from ipywidgets import interact

In [13]:
# making matrix like max3 (but with zeros)
newdata = np.zeros_like(max3)

In [14]:
newdata.shape


Out[14]:
(365L, 380L)

In [15]:
ndata = newdata.shape[1]
for i in range(ndata):
    x= df_DC[txrxID[i]]
    #median10 = pd.rolling_median(x, 6)
    mean10 = pd.rolling_max(x, 3)
    # Masking array having NaN
    xm = xmasking(mean10)
    kf = KalmanFilter(transition_matrices = [1],
                      observation_matrices = [1],
                      initial_state_mean = x[0],
                      initial_state_covariance = 1,
                      observation_covariance=1,
                      transition_covariance=1)
    # Use the observed values of the price to get a rolling mean
    state_means, _ = kf.filter(xm)    
    newdata[:,i] = state_means.flatten()

In [16]:
df_DC_new = df_DC.copy()  
for i,index in enumerate(txrxID):
    df_DC_new.loc[:,index] = newdata[:,i].flatten()
# df_DC_new.to_csv("../data/ChungCheonDC/CompositeDCdata_processed.csv")

In [33]:
from ipywidgets import interact, IntSlider, ToggleButtons
itime = 201
itime_ref = 201  # 86 = Date 2015. 3.28, 201 = Date 2015. 7. 21
print df['date'].values[itime]
elecind = [5, 150,200]
 

# vizDCtimeSeries(elecind, itime, itime_ref, ['k','b','r'])
viz = lambda idatum, itime, flag: vizDCtimeSeries([idatum], itime, itime_ref, ['r'], flag, df_DC_new)
interact(viz, idatum=IntSlider(min=0, max=379, step=1, value=201)\
         ,itime=IntSlider(min=0, max=360, step=1, value=200)\
         ,flag=ToggleButtons(options=["std", "rho"]))
print df['date'].values[itime]


2015-07-21

In [27]:
print df_DC[electrodeID].values[228,:].flatten() / df_DC[electrodeID].values[201,:].flatten()


[ 0.94378917  0.97089054  0.95192731  0.95692494  0.95193781  0.97752667
  0.95361292  0.96317193  0.98441079  0.96481807  0.95712936  0.95372688
  0.97564573  0.96808788  0.94679616  0.97473724  0.94546695  0.98260453
  0.92724353  0.9347729   0.9439181   0.93041436  0.95709497  0.88051395
  0.9357107   0.91384566  0.95211768  0.91906974  0.93010692  0.92804207
  0.9193257   0.94094539  0.94975509  0.95811552  0.98520779  0.95333371
  0.91792139  0.94869415  0.96354883  0.97299977  0.96448626  0.94534412
  0.9562483   0.91711175  0.88626932  0.99915815  0.95616144  0.95347051
  0.95052956  0.98071184  0.9025725   0.95316066  0.97061929  0.9575292
  0.95997979  0.99651801  0.92362431  0.96122696  0.9620412   0.98472748
  0.96921004  0.95366863  0.93502616  0.99198249  0.93890082  1.0089163
  0.93279778  0.98901537  0.88280935  0.95969829  0.95251671  0.96137183
  0.96884861  0.95288108  0.95466993  0.964235    0.93686784  0.9765626
  0.93301894  0.95058357  0.94673391  0.94337558  0.97441643  0.97820573
  0.94001883  0.97877023  0.94447316  0.92659509  0.96002134  0.96883086
  0.94154204  0.94938748  0.95367184  0.96132581  1.64651571  1.32890793
  0.94339262  0.9672162   0.9775623   0.96956971  0.92505056  0.9611737
  0.98952591  0.96703122  1.00000631  0.92962868  0.97255801  0.96978514
  0.96775613  1.00151364  0.9635509   0.96477785  1.0008207   0.93661077
  1.00254605  0.94577628  1.02279482  0.87004523  0.96321425  0.97696591
  0.96472829  0.98882105  0.94962317  0.97765087  0.98041236  0.97459724
  0.94381519  0.98761173  0.95181057  0.9553418   0.96423068  0.97366944
  1.00143485  0.95737185  0.97051954  0.96238267  0.95624815  0.97065065
  0.97390523  0.95555678  0.94882274  0.95075031  0.97543003  0.94981553
  0.50107348  0.94570496  0.96630745  0.98275003  0.90759209  0.91939888
  0.9715794   1.00090957  1.01681826  0.9406361   0.98818742  0.97691133
  0.97037805  0.98013406  1.0022262   0.98822111  1.00388593  0.95029671
  1.02186127  0.94546831  1.0345514   0.88777243  0.95125971  0.97711537
  0.98429653  0.98658439  0.96655409  0.97469469  0.99511271  0.98027198
  0.97276991  0.94823731  1.01081166  0.94940387  0.96854229  0.98594602
  1.00599749  0.97955084  0.9705947   0.96308694  0.96571883  0.98303641
  0.98902686  0.96256175  0.95957106  0.94999112  0.98072502  0.99913357
  0.95262076  0.94475523  0.96953746  0.96990536  0.95964516  0.87072602
  0.97207525  1.0584296   0.95093003  0.98604464  0.9834745   0.97474715
  0.97725624  0.98211331  1.00590666  1.01878343  0.9416402   1.0231517
  0.96681136  1.0263573   0.887986    0.97015883  0.96234798  0.98438188
  1.005995    0.96290742  0.99501487  0.99349408  0.99503948  0.97222308
  0.97396678  0.97093669  1.00945887  0.96185271  0.99167044  1.01204235
  0.96091359  0.97637556  0.96899708  0.96084838  0.97931975  1.00127576
  0.97368738  0.96560919  0.9614862   0.97589665  0.97494333  1.01165109
  1.01797929  0.97580239  0.94033513  0.9626323   0.93721685  1.03052339
  0.98317826  0.99573997  0.98869238  0.97816517  0.98149312  0.96804673
  0.9718349   1.05249688  0.94879575  1.0147416   0.93117903  1.03607226
  0.88674328  0.98010418  0.98123871  0.96683718  1.00747479  0.98168176
  0.98577567  1.02160367  0.99345479  0.98767065  0.97528635  0.99729825
  0.96585531  1.02115906  0.98362896  1.00923526  0.97436443  0.96971872
  0.97905191  0.96907699  0.98244388  1.00522072  0.98202135  0.98198491
  0.97303813  0.98641133  1.00906858  0.98409571  0.93270282  0.98291242
  1.01191845  0.99029709  0.91593441  0.95356728  1.0337407   0.99021413
  0.8690681   0.98792759  0.97040119  0.99930947  1.0280361   0.98849995
  1.03423284  0.95813678  1.03207387  0.92783414  0.99527963  0.98515769
  0.98636203  0.98583044  0.98723292  1.01233667  1.00924102  1.02496629
  0.98508288  0.99014701  0.99782399  0.98942924  0.9760061   1.04890369
  0.99195635  0.97154357  0.9934474   0.97571576  0.97925357  0.9908461
  0.99840582  0.98122715  1.00256832  0.97931215  0.98743226  0.98243334
  0.96120024  0.98050622  1.01491953  0.9977221   0.97915697  0.92123908
  1.0070169   1.02782226  1.45523645  0.91042093  0.96637491  0.9880533
  1.02486231  0.97486461  1.0613883   0.96778041  1.02013259  0.92858087
  0.98625885  1.00646895  0.99174047  1.00775298  0.96328318  1.02145403
  1.04423929  1.00272398  1.03146738  0.98993193  1.01783332  0.99138043
  1.00185427  0.99225521  1.09718424  0.94099241  0.99296902  0.99138296
  0.9732994   1.00209472  1.01584882  0.98846677  0.97022745  0.99695023
  0.99870499  0.97473897  0.99907367  0.96020237  1.00842975  1.05622923
  1.09322599  0.92086448]

In [28]:
for i in range(0,379,100):
    x= df_DC[txrxID[i]]
    x1 = df_DC_new[txrxID[i]]
    plt.plot(newdata[:,i], 'k')
    plt.plot(x1, 'ro')    
    plt.plot(x, 'k.', ms=2)



In [29]:
plt.plot(newdata[:,i], 'k')
x1 = df_DC_new[txrxID[i]]



In [21]:
# for index in txrxID:
#     df_DC_new.loc[:,index] = newdata[:,i].flatten()

In [22]:
i = 112
def viz(i):
    x= df_DC[txrxID[i]]
    #median10 = pd.rolling_median(x, 6)
    mean10 = pd.rolling_max(x, 3)
    #x1 = median10
    #x2 = mean10
    # Masking array having NaN
    xm = xmasking(mean10)
    # Construct a Kalman filter

#     kf = KalmanFilter(transition_matrices = [1],
#                       observation_matrices = [1],
#                       initial_state_mean = x[0],
#                       initial_state_covariance = 1,
#                       observation_covariance=1,
#                       transition_covariance=1)
#     # Use the observed values of the price to get a rolling mean
#     state_means, _ = kf.filter(xm)
    state_means= df_DC_new[txrxID[i]]
    plt.plot(x)
    plt.plot(mean10, 'k.')
    #plt.plot(x1)
    #plt.plot(x2)
    plt.plot(state_means)
    # plt.legend([ i, 'Kalman Estimate'])
    # print df_DC[txrxID[i]]
interact(viz, i=(0,389,10))


Out[22]:
<function __main__.viz>

In [23]:
i = 105


x= df_DC[txrxID[i]]
#median10 = pd.rolling_median(x, 6)
mean10 = pd.rolling_max(x, 3)
#x1 = median10
#x2 = mean10
# Masking array having NaN
xm = xmasking(mean10)
# Construct a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
                  observation_matrices = [1],
                  initial_state_mean = 67.6,
                  initial_state_covariance = 1,
                  observation_covariance=1,
                  transition_covariance=1)
# Use the observed values of the price to get a rolling mean
state_means, _ = kf.filter(xm)

#plt.plot(x1)
plt.plot(x)
#plt.plot(x1)
#plt.plot(x2)
plt.plot(state_means)
plt.legend([  'origin x','Kalman Estimate'])


Out[23]:
<matplotlib.legend.Legend at 0xca899b0>

In [23]:
i = 300


x= df_DC[txrxID[i]]
#median10 = pd.rolling_median(x, 6)
mean10 = pd.rolling_max(x, 3)
#x1 = median10
#x2 = mean10
# Masking array having NaN
xm = xmasking(mean10)
# Construct a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
                  observation_matrices = [1],
                  initial_state_mean = 67.6,
                  initial_state_covariance = 1,
                  observation_covariance=1,
                  transition_covariance=1)
# Use the observed values of the price to get a rolling mean
state_means, _ = kf.filter(xm)

#plt.plot(x1)
plt.plot(x)
#plt.plot(x1)
#plt.plot(x2)
plt.plot(state_means)
plt.legend([  'origin x','Kalman Estimate'])


Out[23]:
<matplotlib.legend.Legend at 0xd083da0>

In [ ]:


In [ ]: