notebook.community

Edit and run



In [7]:

    
%pylab inline
import pandas as pd









    



Populating the interactive namespace from numpy and matplotlib



In [8]:

    
df = pd.read_csv("../data/ChungCheonDC/CompositeETCdata.csv")
df_DC = pd.read_csv("../data/ChungCheonDC/CompositeDCdata.csv")
df_DCstd = pd.read_csv("../data/ChungCheonDC/CompositeDCstddata.csv")



In [9]:

    
# missininds = np.arange(df_DC[electrodeID[elecind]].values.size)[np.isnan(df_DC[electrodeID[elecind]].values)]
electrodeID = df_DC.keys()[1:-1]



In [10]:

    
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1_1, color='k', linestyle='-', lw=2)
df.plot(figsize=(12,3), x='date', y='upperH_med', ax=ax1_1, color='b', linestyle='-', lw=2)
df.plot(figsize=(12,3), x='date', y='Temp (degree)', ax=ax1, color='r', linestyle='-', lw=2)
ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
ax1_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
itime_ref0 = 255
itime_ref1 = 115
ax1.plot(np.r_[itime_ref0, itime_ref0], np.r_[-5, 35], 'k-')
ax1.plot(np.r_[itime_ref1, itime_ref1], np.r_[-5, 35], 'k-')
print df['date'].values[itime_ref]



In [8]:

    
print pd_reservoirH[2]









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-721b31845635> in <module>()
----> 1 print pd_reservoirH[2]

NameError: name 'pd_reservoirH' is not defined



In [13]:

    
from ipywidgets import interact, IntSlider, ToggleButtons
itime = 93
itime_ref = 202
print df['date'].values[itime]
elecind = [53, 110, 300]
# vizDCtimeSeries(elecind, itime, itime_ref, ['k','b','r'])
viz = lambda idatum, itime, flag: vizDCtimeSeries([idatum], itime, itime_ref, ['r'], flag)
interact(viz, idatum=IntSlider(min=0, max=379, step=1, value=294)\
         ,itime=IntSlider(min=0, max=360, step=1, value=200)\
         ,flag=ToggleButtons(options=["std", "rho"]))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-7bdf015134b0> in <lambda>(idatum, itime, flag)
      5 elecind = [53, 110, 300]
      6 # vizDCtimeSeries(elecind, itime, itime_ref, ['k','b','r'])
----> 7 viz = lambda idatum, itime, flag: vizDCtimeSeries([idatum], itime, itime_ref, ['r'], flag)
      8 interact(viz, idatum=IntSlider(min=0, max=379, step=1, value=294)         ,itime=IntSlider(min=0, max=360, step=1, value=200)         ,flag=ToggleButtons(options=["std", "rho"]))

NameError: global name 'vizDCtimeSeries' is not defined





    Out[13]:





<function __main__.<lambda>>



In [7]:

    
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df_DC.plot(figsize=(12,3), x='date', y=electrodeID[elecind], ax=ax1, colors=['k', 'b', 'r'])
df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1_1, color='k', linestyle='-', lw=2)
ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
ax1_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
ax1.set_yscale('linear')









    



C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\tools\plotting.py:929: UserWarning: 'colors' is being deprecated. Please use 'color'instead of 'colors'
  warnings.warn(("'colors' is being deprecated. Please use 'color'"



In [8]:

    
ax1 = plt.subplot(111)
df_DCstd.plot(figsize=(12,3), x='date', y=electrodeID[elecind], ax=ax1, colors=['k', 'b', 'r'], linestyle="-", marker='.', lw=1)
ax1.set_yscale('log')
ax1.legend(loc=3, bbox_to_anchor=(1.05, 0.7))









    Out[8]:





<matplotlib.legend.Legend at 0x9ba1e10>



In [9]:

    
sys.path.append("../codes/")
from DCdata import readReservoirDC_all
directory = "../data/ChungCheonDC/"
dat_temp,height_temp, ID = readReservoirDC_all(directory+"20151231180000.apr")









    



Efficiency Warning: Interpolation will be slow, use setup.py!

            python setup.py build_ext --inplace



In [10]:

    
from scipy import interpolate
locs = dat_temp[:,:4]
mida = locs[:,:2].sum(axis=1)
midb = locs[:,2:].sum(axis=1)
mid = (mida + midb)*0.5
dz = mida-midb
x = np.linspace(mid.min(), mid.max(), 100)
z = np.linspace(dz.min(), dz.max(), 100)
grid_x, grid_z = np.meshgrid(x,z)

def vizDCtimeSeries(idatum, itime, itime_ref, colors, flag):
    fig = plt.figure(figsize = (12, 12))
    ax1 = plt.subplot(411)
    ax2 = plt.subplot(412)
    
    valsratio = df_DC[electrodeID].values[itime,:].flatten() / df_DC[electrodeID].values[itime_ref,:].flatten()
    valsDC = np.log10(df_DC[electrodeID].values[itime,:].flatten())
    valsDCstd = df_DCstd[electrodeID].values[itime,:].flatten()
    grid_rho_ratio = griddata(mid, dz, valsratio, grid_x, grid_z, interp='linear')
    grid_rho_ratio = grid_rho_ratio.reshape(grid_x.shape)
    if flag =="std":
        vmin, vmax = 0, 10
        grid_rho = griddata(mid, dz, valsDCstd, grid_x, grid_z, interp='linear')        
    elif flag =="rho":
        vmin, vmax = np.log10(20), np.log10(200)
        grid_rho = griddata(mid, dz, valsDC, grid_x, grid_z, interp='linear')
    grid_rho = grid_rho.reshape(grid_x.shape)
        
    
    ax1.contourf(grid_x, grid_z, grid_rho, 200, vmin =vmin, vmax = vmax, clim=(vmin, vmax), cmap="jet")    
    vmin, vmax = 0.9, 1.1
    ax2.contourf(grid_x, grid_z, grid_rho_ratio, 200, vmin =vmin, vmax = vmax, clim=(vmin, vmax), cmap="jet")        
    ax1.scatter(mid, dz, s=20, c = valsDC, edgecolor="None", vmin =vmin, vmax = vmax, clim=(vmin, vmax))
    ax1.plot(mid, dz, 'k.')
    ax2.scatter(mid, dz, s=20, c = valsratio, edgecolor="None", vmin =vmin, vmax = vmax, clim=(vmin, vmax))
    ax2.plot(mid, dz, 'k.')
    
    for i in range(len(colors)):
        ax1.plot(mid[idatum[i]], dz[idatum[i]], 'o', color=colors[i])    
        ax2.plot(mid[idatum[i]], dz[idatum[i]], 'o', color=colors[i])    
        

    ax3 = plt.subplot(413)
    ax3_1 = ax3.twinx()
    df.plot(x='date', y='reservoirH', ax=ax3_1, color='k', linestyle='-', lw=2)
    df.plot(x='date', y='upperH_med', ax=ax3_1, color='b', linestyle='-', lw=2)
    df.plot(x='date', y='Temp (degree)', ax=ax3, color='r', linestyle='-', lw=2)
    df.plot(x='date', y='Rainfall (mm)', ax=ax3, color='b', linestyle='-', marker="o", ms=4)
    ax3.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
    ax3_1.legend(loc=3, bbox_to_anchor=(1.05, 0.4))
    itime_ref0 = itime_ref
    itime_ref1 = itime
    ax3.plot(np.r_[itime_ref0, itime_ref0], np.r_[-5, 40], 'k--', lw=2)
    ax3.plot(np.r_[itime_ref1, itime_ref1], np.r_[-5, 40], 'k--', lw=2)

    ax4 = plt.subplot(414)
    df_DC.plot(x='date', y=electrodeID[idatum], ax=ax4)
    ax4.legend(loc=3, bbox_to_anchor=(1.05, 0.7))
    ax4.set_yscale('log')
    temp = df_DC[electrodeID[elecind]].values
    vmax = np.median(temp[~np.isnan(temp)]) + np.std(temp[~np.isnan(temp)])*3
    vmin = np.median(temp[~np.isnan(temp)]) - np.std(temp[~np.isnan(temp)])*3
    ax4.plot(np.r_[itime_ref1, itime_ref1], np.r_[vmin, vmax], 'k--', lw=2)
    ax4.plot(np.r_[itime_ref0, itime_ref0], np.r_[vmin, vmax], 'k--', lw=2)
    
    ax4.set_ylim(vmin, vmax)



In [3]:

    
print df_reservoirH









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-6bb36be4608e> in <module>()
----> 1 print df_reservoirH

NameError: name 'df_reservoirH' is not defined



In [1]:

    
import numpy as np

a = np.random.random(((5,3,3))); # example of what real input will look like

# create 2D flattened version of 3D input array
d1,d2,d3 = a.shape
b = np.zeros([d1,d2*d3])
for i in range(len(a)):
  b[i] = a[i].flatten()

print "shape of 3D array: ", a.shape
print "shape of flattened 2D array: ", b.shape, "\n"
print "flattened 2D array:\n", b, "\n"

# mean-center the flattened array
b -= np.mean(b, axis=0)

# calculate the covariance matrix of the flattened array
covar1 = np.cov(b, rowvar=0)   # this makes a 9x9 array
covar2 = np.dot(b, b.T)        # this makes a 5x5 array

print "covariance via numpy.cov:\n", covar1, "\n"
print "covariance via numpy.dot:\n", covar2, "\n"

# calculate eigenvalues and eigenvectors
eval1, evec1 = np.linalg.eig(covar1)
eval2, evec2 = np.linalg.eig(covar2)

print "eigenvalues via numpy.cov covariance matrix:\n", eval1, "\n"
print "eigenvectors via numpy.cov covariance matrix:\n", evec1, "\n"
print "eigenvalues via numpy.dot covariance matrix:\n", eval2, "\n"
print "eigenvectors via numpy.dot covariance matrix:\n", evec2, "\n"









    



shape of 3D array:  (5L, 3L, 3L)
shape of flattened 2D array:  (5L, 9L) 

flattened 2D array:
[[ 0.1610976   0.53408452  0.92564432  0.79417234  0.95817541  0.70043539
   0.80378572  0.83962233  0.04476981]
 [ 0.53964821  0.7019235   0.53174998  0.61080967  0.26941622  0.87936852
   0.2673331   0.93888042  0.15430463]
 [ 0.612724    0.40047046  0.82678424  0.95936799  0.70751275  0.54988299
   0.44828535  0.15883392  0.31543301]
 [ 0.35573653  0.59093642  0.16894013  0.79057648  0.87794153  0.61850889
   0.74833471  0.86277261  0.57514702]
 [ 0.65810902  0.42367822  0.59150756  0.78926301  0.96411273  0.50754312
   0.16847888  0.9854812   0.05101293]] 

covariance via numpy.cov:
[[  4.22449824e-02  -8.68311540e-03  -6.27972348e-03   2.54321964e-03
   -1.95152308e-02  -9.26667705e-03  -5.21132406e-02  -1.68327847e-02
   -2.25244311e-03]
 [ -8.68311540e-03   1.53424004e-02  -1.62882517e-02  -1.31533465e-02
   -2.28111198e-02   1.64585028e-02   4.58324358e-03   2.28099778e-02
    3.80694983e-03]
 [ -6.27972348e-03  -1.62882517e-02   8.69049535e-02   1.29520135e-02
    8.43608900e-03  -1.80055742e-03  -1.46220600e-04  -4.21756132e-02
   -4.57384460e-02]
 [  2.54321964e-03  -1.31533465e-02   1.29520135e-02   1.52015555e-02
    1.99340209e-02  -1.44383250e-02   8.62842937e-03  -3.34158490e-02
    6.89517240e-03]
 [ -1.95152308e-02  -2.28111198e-02   8.43608900e-03   1.99340209e-02
    8.45421790e-02  -3.25099279e-02   3.45974360e-02   4.41395687e-03
    1.84253526e-05]
 [ -9.26667705e-03   1.64585028e-02  -1.80055742e-03  -1.44383250e-02
   -3.25099279e-02   2.16140317e-02   1.65323471e-03   1.74727593e-02
   -5.15450013e-03]
 [ -5.21132406e-02   4.58324358e-03  -1.46220600e-04   8.62842937e-03
    3.45974360e-02   1.65323471e-03   7.99641991e-02  -8.93898104e-03
    2.54635772e-02]
 [ -1.68327847e-02   2.28099778e-02  -4.21756132e-02  -3.34158490e-02
    4.41395687e-03   1.74727593e-02  -8.93898104e-03   1.15275258e-01
   -2.11404405e-02]
 [ -2.25244311e-03   3.80694983e-03  -4.57384460e-02   6.89517240e-03
    1.84253526e-05  -5.15450013e-03   2.54635772e-02  -2.11404405e-02
    4.96210569e-02]] 

covariance via numpy.dot:
[[ 0.37715511 -0.17567352 -0.06781977 -0.05474768 -0.07891413]
 [-0.17567352  0.4477807  -0.1649711  -0.0948909  -0.01224518]
 [-0.06781977 -0.1649711   0.49469711 -0.16524901 -0.09665723]
 [-0.05474768 -0.0948909  -0.16524901  0.4251403  -0.11025271]
 [-0.07891413 -0.01224518 -0.09665723 -0.11025271  0.29806925]] 

eigenvalues via numpy.cov covariance matrix:
[  1.70508687e-01 +0.00000000e+00j   1.48176874e-01 +0.00000000e+00j
   8.31101411e-02 +0.00000000e+00j   1.08914915e-01 +0.00000000e+00j
   5.18128296e-18 +7.20706051e-18j   5.18128296e-18 -7.20706051e-18j
   8.71132765e-18 +0.00000000e+00j   4.29551008e-18 +0.00000000e+00j
  -6.20875818e-18 +0.00000000e+00j] 

eigenvectors via numpy.cov covariance matrix:
[[-0.03036503+0.j          0.42643595+0.j         -0.36890779+0.j
  -0.18755901+0.j          0.22394483+0.2361355j   0.22394483-0.2361355j
   0.03034394+0.j          0.07855307+0.j         -0.06674570+0.j        ]
 [-0.23145067+0.j         -0.04269796+0.j          0.26278158+0.j
  -0.04275618+0.j          0.55530396+0.j          0.55530396-0.j
   0.06266323+0.j          0.24761195+0.j         -0.23565992+0.j        ]
 [ 0.46813620+0.j          0.25742292+0.j          0.29144665+0.j
   0.54759348+0.j         -0.20042549-0.16404399j -0.20042549+0.16404399j
  -0.48568010+0.j         -0.47030478+0.j          0.39661853+0.j        ]
 [ 0.27578795+0.j         -0.02651538+0.j         -0.12919277+0.j
  -0.08251024+0.j         -0.15266921-0.2600468j  -0.15266921+0.2600468j
  -0.33884014+0.j         -0.18210788+0.j         -0.31309980+0.j        ]
 [ 0.30488162+0.j         -0.47485469+0.j         -0.57353935+0.j
   0.27003745+0.j          0.17305982+0.08508045j  0.17305982-0.08508045j
   0.08042779+0.j          0.06255966+0.j         -0.27808804+0.j        ]
 [-0.20105329+0.j          0.04998339+0.j          0.41312506+0.j
   0.03914107+0.j         -0.08996254+0.0824745j  -0.08996254-0.0824745j
  -0.04011749+0.j         -0.11417571+0.j         -0.55289465+0.j        ]
 [ 0.21344345+0.j         -0.62794424+0.j          0.40294209+0.j
  -0.05015505+0.j          0.15361565+0.21183077j  0.15361565-0.21183077j
   0.18787861+0.j          0.21317449+0.j          0.01543748+0.j        ]
 [-0.68653784+0.j         -0.26226919+0.j         -0.17598321+0.j
   0.45088786+0.j         -0.24728404-0.14943499j -0.24728404+0.14943499j
  -0.38927597+0.j         -0.35891721+0.j          0.27411833+0.j        ]
 [ 0.00552373+0.j         -0.24140263+0.j         -0.01639622+0.j
  -0.61323677+0.j         -0.38955759-0.26818678j -0.38955759+0.26818678j
  -0.67047935+0.j         -0.69782027+0.j          0.47567536+0.j        ]] 

eigenvalues via numpy.dot covariance matrix:
[  5.55111512e-17   3.32440564e-01   4.35659658e-01   6.82034746e-01
   5.92707496e-01] 

eigenvectors via numpy.dot covariance matrix:
[[-0.4472136  -0.3902898   0.63686733 -0.26626773 -0.41373346]
 [-0.4472136  -0.47158667 -0.04981179  0.5974605   0.46708216]
 [-0.4472136  -0.0465662  -0.38647176 -0.70622429  0.38693464]
 [-0.4472136   0.12981616 -0.55987944  0.22666536 -0.64676548]
 [-0.4472136   0.77862651  0.35929567  0.14836616  0.20648213]]



In [2]:

    
import numpy as np
 x=np.random.normal(size=25)
>>> y=np.random.normal(size=25)
>>> np.cov(x,y)
array([[ 0.77568388,  0.15568432],
       [ 0.15568432,  0.73839014]])









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-bed730c7d17d> in <module>()
      3 y=np.random.normal(size=25)
      4 np.cov(x,y)
----> 5 array([[ 0.77568388,  0.15568432],
      6        [ 0.15568432,  0.73839014]])

NameError: name 'array' is not defined



In [3]:

    
x=np.random.normal(size=25)



In [4]:

    
y=np.random.normal(size=25)



In [5]:

    
np.cov(x,y)









    Out[5]:





array([[ 1.19685995,  0.06473121],
       [ 0.06473121,  1.14174912]])



In [ ]:

    
import pylab
import random
import math


random.seed = 1

x1 = [1,4,7,8]
y1 = [1,3,5,7]


print "Mean of x is", pylab.mean(x1)
print "Sample variance of x is", pylab.var(x1,ddof=1)
print "Sample SD of x is", pylab.std(x1,ddof=1)
print "Mean of y is", pylab.mean(y1)
print "Sample variance of y is", pylab.var(y1,ddof=1)
print "Sample SD of y is", pylab.std(y1,ddof=1)

print "Correlation of X and Y is", pylab.corrcoef(x1,y1)

pylab.scatter(x1,y1,c="blue",marker="s")
pylab.xlabel("Variable X",size='x-large')
pylab.ylabel("Variable Y",size='x-large')
pylab.title("Scatter plot of two variables",size='x-large')
pylab.savefig("scatterXYExample.png")
pylab.show()


pylab.scatter(x1,y1,c="blue",marker="s")
pylab.xlabel("Variable X",size='x-large')
pylab.ylabel("Variable Y",size='x-large')
pylab.title("Scatter plot of two variables",size='x-large')
pylab.axhline(y=4)
pylab.axvline(x=5)
pylab.annotate('Mean of X = 5',
               xy=(5, 6),
               xycoords='data',
               xytext=(2,6),
               size='large',
               arrowprops=dict(arrowstyle="->"),
               ha='center',
               va='center')
pylab.annotate('Mean of Y = 4',
               xy=(7, 4),
               xycoords='data',
               xytext=(7,2),
               size='large',
               arrowprops=dict(arrowstyle="->"),
               ha='center',
               va='center')

pylab.savefig("scatterXYWithMeans.png")
pylab.show()


pylab.scatter(x1,y1,c="blue",marker="s")
pylab.xlabel("Variable X",size='x-large')
pylab.ylabel("Variable Y",size='x-large')
pylab.title("Scatter plot of two variables",size='x-large')
pylab.axhline(y=4)
pylab.axvline(x=5)
pylab.annotate('Mean of X = 5',
               xy=(5, 6),
               xycoords='data',
               xytext=(2,6),
               size='large',
               arrowprops=dict(arrowstyle="->"),
               ha='center',
               va='center')
pylab.annotate('Mean of Y = 4',
               xy=(7, 4),
               xycoords='data',
               xytext=(7,2),
               size='large',
               arrowprops=dict(arrowstyle="->"),
               ha='center',
               va='center')
pylab.annotate('(-4,-3)',
               xy=(1, 1),
               xycoords='data',
               xytext=(3,1),
               arrowprops=dict(arrowstyle="->",shrinkA=8,shrinkB=8),
               ha='center',
               va='center')
pylab.annotate('(-1,-1)',
               xy=(4, 3),
               xycoords='data',
               xytext=(2,3),
               arrowprops=dict(arrowstyle="->",shrinkA=8,shrinkB=8),
               ha='center',
               va='center')
pylab.annotate('(+2,+1)',
               xy=(7, 5),
               xycoords='data',
               xytext=(6,5),
               arrowprops=dict(arrowstyle="->",shrinkA=8,shrinkB=8),
               ha='center',
               va='center')
pylab.annotate('(+3,+3)',
               xy=(8, 7),
               xycoords='data',
               xytext=(6,7),
               arrowprops=dict(arrowstyle="->",shrinkA=8,shrinkB=8),
               ha='center',
               va='center')
pylab.savefig("scatterXYWithMeansAndDevs.png")
pylab.show()


sampleSize = 500
x2 = []
y2 = []

for i in range(sampleSize):
    x2.append(random.normalvariate(100,10))

for i in range(sampleSize):
    y2.append(x2[i] + random.normalvariate(100,10))


pylab.scatter(x2,y2,c="green",marker="o")
pylab.xlabel("Variable X",size='x-large')
pylab.ylabel("Variable Y",size='x-large')
pylab.title("Scatter plot of two variables",size='x-large')
pylab.savefig("scatterXYCorrelated.png")
pylab.show()

print "Correlation of X and Y is", pylab.corrcoef(x2,y2)[0,1]


x3 = []
y3 = []

for i in range(sampleSize):
    if x2[i] > 95 and x2[i] < 105:
        x3.append(x2[i])
        y3.append(y2[i])

pylab.scatter(x3,y3,c="green",marker="o")
pylab.xlabel("Variable X",size='x-large')
pylab.ylabel("Variable Y",size='x-large')
pylab.title("Scatter plot of two variables, limited range",size='x-large')
pylab.savefig("scatterXYLimitedRange.png")
pylab.show()

print "Correlation of X and Y over limited range is", pylab.corrcoef(x3,y3)[0,1]



##Calculate repeated correlation coefficients for samples of 50
sampleSize = 50

print "Sampling experiment"

for k in range(20):
    x2 = []
    y2 = []
    for i in range(sampleSize):
        x2.append(random.normalvariate(100,10))
    for i in range(sampleSize):
        y2.append(x2[i] + random.normalvariate(100,10))
    print pylab.corrcoef(x2,y2)[0,1]



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]: