In [1]:
import pandas as pd
import sys
import matplotlib

In [2]:
water23 = pd.read_csv("../data/waterlevel/Water23.csv", index_col='date')

In [3]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [4]:
climate = pd.read_csv("../data/waterlevel/ClimateWater.csv", index_col='date')

In [5]:
climate


Out[5]:
Temp(��C) Rainfall(mm) Moisture(%) SurfaceTemp(��C) WaterH1
date
2015-01-01 -3.9 0.2 62.9 -1.8 39.30
2015-01-02 -4.3 2.4 74.8 -0.8 39.34
2015-01-03 -1.1 NaN 69.4 0.3 39.34
2015-01-04 3.1 NaN 87.6 2.6 39.34
2015-01-05 5.5 13.5 77.4 3.1 39.42
2015-01-06 0.0 2.5 66.3 1.9 39.47
2015-01-07 -3.0 NaN 53.9 -3.0 39.53
2015-01-08 -2.3 NaN 73.0 -1.9 39.56
2015-01-09 0.4 0.0 80.9 0.1 39.59
2015-01-10 0.9 NaN 71.4 0.1 39.59
2015-01-11 2.5 0.0 61.0 1.5 39.59
2015-01-12 -1.8 NaN 52.8 -1.1 39.68
2015-01-13 0.8 NaN 57.4 -0.3 39.70
2015-01-14 3.3 NaN 61.3 1.7 39.72
2015-01-15 3.1 NaN 62.3 2.8 39.74
2015-01-16 3.3 0.0 74.0 2.3 39.76
2015-01-17 -1.3 NaN 60.5 1.0 39.76
2015-01-18 0.6 2.0 77.3 -0.2 39.76
2015-01-19 2.7 0.3 68.0 2.4 39.82
2015-01-20 0.4 NaN 71.3 1.8 39.84
2015-01-21 3.5 5.0 77.6 2.3 39.86
2015-01-22 2.5 1.0 89.1 4.2 39.89
2015-01-23 1.7 NaN 81.3 2.2 39.92
2015-01-24 3.5 NaN 78.6 3.8 39.92
2015-01-25 7.1 2.5 73.5 4.1 39.92
2015-01-26 5.6 0.5 94.5 5.9 39.97
2015-01-27 -1.0 NaN 65.4 1.5 40.00
2015-01-28 -2.8 NaN 58.9 -1.6 40.02
2015-01-29 0.5 NaN 65.4 0.7 40.04
2015-01-30 -0.8 NaN 63.9 1.6 40.06
... ... ... ... ... ...
2015-12-02 9.5 11.6 77.0 7.5 33.88
2015-12-03 4.0 4.1 66.0 3.8 33.99
2015-12-04 3.6 5.5 85.1 3.4 34.09
2015-12-05 6.6 NaN 67.9 4.9 34.18
2015-12-06 2.1 NaN 65.0 3.7 34.24
2015-12-07 2.5 NaN 65.5 3.2 34.29
2015-12-08 3.8 NaN 70.8 3.7 34.30
2015-12-09 7.6 NaN 71.8 5.8 34.40
2015-12-10 9.6 9.7 90.0 8.1 34.43
2015-12-11 7.7 NaN 79.0 6.9 34.50
2015-12-12 7.0 NaN 70.5 5.5 34.50
2015-12-13 8.6 NaN 64.9 8.4 34.57
2015-12-14 6.9 6.9 88.9 6.8 34.60
2015-12-15 7.0 0.0 78.4 7.0 34.60
2015-12-16 0.6 19.5 90.0 1.7 34.68
2015-12-17 -1.5 0.0 69.5 2.3 34.80
2015-12-18 0.1 NaN 86.8 2.3 34.80
2015-12-19 2.9 NaN 86.0 3.6 34.86
2015-12-20 3.0 0.0 80.5 2.7 34.90
2015-12-21 5.5 0.5 84.4 6.4 35.00
2015-12-22 3.6 0.0 86.3 3.3 35.00
2015-12-23 6.0 0.0 89.1 6.9 35.00
2015-12-24 3.8 NaN 76.3 4.0 35.10
2015-12-25 1.9 0.0 54.8 2.5 35.10
2015-12-26 6.4 0.2 68.9 4.6 35.10
2015-12-27 -2.1 NaN 62.9 0.9 35.20
2015-12-28 -1.4 NaN 67.1 0.9 35.20
2015-12-29 0.0 NaN 73.5 0.4 35.20
2015-12-30 3.0 7.0 78.3 3.1 35.30
2015-12-31 2.6 NaN 83.1 2.9 35.28

365 rows × 5 columns


In [6]:
newindex = []
for ind in water23.index:
    newindex.append(ind.split()[0])

In [7]:
vals, inds = np.unique(newindex, return_inverse=True)

In [8]:
upperh_med = np.ones(vals.size)*np.nan
downh_med = np.ones(vals.size)*np.nan
upperh_std = np.ones(vals.size)*np.nan
downh_std = np.ones(vals.size)*np.nan

for i in range (vals.size):
    active = inds==i
    upperh_med[i] = np.median(water23["upperlevel"].values[active])
    downh_med[i] = np.median(water23["downlevel"].values[active])
    upperh_std[i] = np.std(water23["upperlevel"].values[active])
    downh_std[i] = np.std(water23["downlevel"].values[active])

In [9]:
date = climate.index.values

In [10]:
climate.keys()


Out[10]:
Index([u'Temp(��C)', u'Rainfall(mm)', u'Moisture(%)', u'SurfaceTemp(��C)',
       u'WaterH1'],
      dtype='object')

In [ ]:


In [11]:
actind = np.in1d(date, vals)

In [12]:
upperh = np.ones(date.size)*np.nan
downh = np.ones(date.size)*np.nan
upperh[actind] = upperh_med
downh[actind] = downh_med

In [13]:
waterdataset = list (zip(date, climate['WaterH1'].values, upperh, downh,\
                         climate['Rainfall(mm)'].values, climate['SurfaceTemp(\xa1\xc6C)'].values, climate['Moisture(%)'].values))
df = pd.DataFrame(data = waterdataset, columns=['date', 'reservoirH', 'upperH_med', 'downH_med',\
                                                'Rainfall (mm)', 'Temp (degree)', 'Moisture (percent)'])
df.set_index('date')


Out[13]:
reservoirH upperH_med downH_med Rainfall (mm) Temp (degree) Moisture (percent)
date
2015-01-01 39.30 NaN NaN 0.2 -1.8 62.9
2015-01-02 39.34 NaN NaN 2.4 -0.8 74.8
2015-01-03 39.34 NaN NaN NaN 0.3 69.4
2015-01-04 39.34 NaN NaN NaN 2.6 87.6
2015-01-05 39.42 NaN NaN 13.5 3.1 77.4
2015-01-06 39.47 NaN NaN 2.5 1.9 66.3
2015-01-07 39.53 NaN NaN NaN -3.0 53.9
2015-01-08 39.56 NaN NaN NaN -1.9 73.0
2015-01-09 39.59 NaN NaN 0.0 0.1 80.9
2015-01-10 39.59 NaN NaN NaN 0.1 71.4
2015-01-11 39.59 NaN NaN 0.0 1.5 61.0
2015-01-12 39.68 NaN NaN NaN -1.1 52.8
2015-01-13 39.70 NaN NaN NaN -0.3 57.4
2015-01-14 39.72 NaN NaN NaN 1.7 61.3
2015-01-15 39.74 NaN NaN NaN 2.8 62.3
2015-01-16 39.76 35.515 19.750 0.0 2.3 74.0
2015-01-17 39.76 35.460 20.345 NaN 1.0 60.5
2015-01-18 39.76 35.500 20.480 2.0 -0.2 77.3
2015-01-19 39.82 35.550 20.580 0.3 2.4 68.0
2015-01-20 39.84 35.525 20.610 NaN 1.8 71.3
2015-01-21 39.86 35.585 20.570 5.0 2.3 77.6
2015-01-22 39.89 35.640 20.580 1.0 4.2 89.1
2015-01-23 39.92 35.630 20.610 NaN 2.2 81.3
2015-01-24 39.92 35.640 20.620 NaN 3.8 78.6
2015-01-25 39.92 35.660 20.595 2.5 4.1 73.5
2015-01-26 39.97 35.720 20.600 0.5 5.9 94.5
2015-01-27 40.00 35.700 20.630 NaN 1.5 65.4
2015-01-28 40.02 35.685 20.630 NaN -1.6 58.9
2015-01-29 40.04 35.710 20.600 NaN 0.7 65.4
2015-01-30 40.06 35.740 20.610 NaN 1.6 63.9
... ... ... ... ... ... ...
2015-12-02 33.88 NaN NaN 11.6 7.5 77.0
2015-12-03 33.99 NaN NaN 4.1 3.8 66.0
2015-12-04 34.09 NaN NaN 5.5 3.4 85.1
2015-12-05 34.18 NaN NaN NaN 4.9 67.9
2015-12-06 34.24 NaN NaN NaN 3.7 65.0
2015-12-07 34.29 NaN NaN NaN 3.2 65.5
2015-12-08 34.30 NaN NaN NaN 3.7 70.8
2015-12-09 34.40 NaN NaN NaN 5.8 71.8
2015-12-10 34.43 NaN NaN 9.7 8.1 90.0
2015-12-11 34.50 NaN NaN NaN 6.9 79.0
2015-12-12 34.50 NaN NaN NaN 5.5 70.5
2015-12-13 34.57 NaN NaN NaN 8.4 64.9
2015-12-14 34.60 NaN NaN 6.9 6.8 88.9
2015-12-15 34.60 NaN NaN 0.0 7.0 78.4
2015-12-16 34.68 NaN NaN 19.5 1.7 90.0
2015-12-17 34.80 NaN NaN 0.0 2.3 69.5
2015-12-18 34.80 NaN NaN NaN 2.3 86.8
2015-12-19 34.86 NaN NaN NaN 3.6 86.0
2015-12-20 34.90 NaN NaN 0.0 2.7 80.5
2015-12-21 35.00 NaN NaN 0.5 6.4 84.4
2015-12-22 35.00 NaN NaN 0.0 3.3 86.3
2015-12-23 35.00 NaN NaN 0.0 6.9 89.1
2015-12-24 35.10 NaN NaN NaN 4.0 76.3
2015-12-25 35.10 NaN NaN 0.0 2.5 54.8
2015-12-26 35.10 NaN NaN 0.2 4.6 68.9
2015-12-27 35.20 NaN NaN NaN 0.9 62.9
2015-12-28 35.20 NaN NaN NaN 0.9 67.1
2015-12-29 35.20 NaN NaN NaN 0.4 73.5
2015-12-30 35.30 NaN NaN 7.0 3.1 78.3
2015-12-31 35.28 NaN NaN NaN 2.9 83.1

365 rows × 6 columns


In [14]:
df['upperH_med'].plot(figsize=(20,3),color='k')


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x8bb1780>

In [15]:
fig = plt.figure(figsize=(12,4))
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df['upperH_med'].plot(figsize=(20,3), ax=ax1, color='k')
df['reservoirH'].plot(figsize=(20,3), ax=ax1, color='b')
df['downH_med'].plot(figsize=(20,3), ax=ax1_1, color='r')
grid(True)



In [16]:
df.keys()


Out[16]:
Index([u'date', u'reservoirH', u'upperH_med', u'downH_med', u'Rainfall (mm)',
       u'Temp (degree)', u'Moisture (percent)'],
      dtype='object')

In [17]:
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1, color='k', linestyle='-', lw=2, marker='.', ms=2)
df.plot(figsize=(12,3), x='date', y='upperH_med', ax=ax1, color='k', linestyle='--', lw=2)
df.plot(figsize=(12,3), x='date', y='downH_med', ax=ax1_1, color='r', linestyle='-')
ax1_1.legend(loc=4)
ax1.grid(True)
indst, indend = 80, 100
ax1.plot(np.r_[indst, indst], np.r_[28, 42], 'k-')
ax1.plot(np.r_[indend, indend], np.r_[28, 42], 'k--')
print df['date'].values[indst], df['date'].values[indst]
# ax1.set_ylim(39.5, 40.5)
# ax1.set_xlim(indst, indend)


2015-03-22 2015-03-22

In [18]:
ax1 = plt.subplot(111)
ax1_1 = ax1.twinx()
df.plot(figsize=(12,3), x='date', y='reservoirH', ax=ax1, color='k', linestyle='-', lw=2, marker='.', ms=2)
df.plot(figsize=(12,3), x='date', y='upperH_med', ax=ax1, color='k', linestyle='--', lw=2)
df.plot(figsize=(12,3), x='date', y='downH_med', ax=ax1_1, color='r', linestyle='-')
ax1_1.legend(loc=4)
ax1.grid(True)
indst, indend = 80, 100
ax1.plot(np.r_[indst, indst], np.r_[28, 42], 'k-')
ax1.plot(np.r_[indend, indend], np.r_[28, 42], 'k--')
print df['date'].values[indst], df['date'].values[indst]
ax1.set_ylim(39.5, 40.5)
ax1.set_xlim(indst, indend)


2015-03-22 2015-03-22
Out[18]:
(80, 100)

In [19]:
ax1 = plt.subplot(111)
df.plot(figsize=(12,3), x='date', y='Rainfall (mm)', ax=ax1, color='b', marker='o', linestyle="-", ms=3)
df.plot(figsize=(12,3), x='date', y='Temp (degree)', ax=ax1, color='k', marker='None', linestyle="-", ms=3)
plt.tight_layout()



In [20]:
import sys
sys.path.append("../codes/")

from Readfiles import getFnames
from DCdata import readReservoirDC_data, readReservoirDC_all

directory = "../data/ChungCheonDC/"
fnames = getFnames(directory, dtype="apr", minimumsize=7000.)


Efficiency Warning: Interpolation will be slow, use setup.py!

            python setup.py build_ext --inplace
    

In [21]:
import datetime
import numpy as np

In [22]:
def getdate(fstring):
    temp = fstring.split('.')[0]
    return datetime.date(int(temp[:4]), int(temp[4:6]), int(temp[6:8]))

In [23]:
date_temp = getdate(fnames[20])

In [24]:
date_temp.strftime("%Y-%m-%d")


Out[24]:
'2015-01-06'

In [25]:
# for i in range (vals.size):
#     active = inds==i
#     upperh_med[i] = np.median(water23["upperlevel"].values[active])
#     downh_med[i] = np.median(water23["downlevel"].values[active])
#     upperh_std[i] = np.std(water23["upperlevel"].values[active])
#     downh_std[i] = np.std(water23["downlevel"].values[active])

In [26]:
dat_temp, htemp, ID = readReservoirDC_all(directory+fnames[0])
ID.append('date')
ID.append('fnames')
ntimes = len(fnames)
DATA = np.zeros((dat_temp.shape[0], ntimes))*np.nan
index = np.ones(ntimes, dtype='bool')
for i, fname in enumerate(fnames):
    dat_temp = readReservoirDC_data(directory+fname)
    if dat_temp.shape[0] == 380:        
        DATA[:,i] = dat_temp[:,-1]
    else:
        print fname,dat_temp.shape[0]
        index[i] = False


20150103180000.apr 379
20150106180000.apr 379
20150109120000.apr 379
20150112120000.apr 379
20150117120000.apr 379
20150120120000.apr 379
20150123120000.apr 379
20150126120000.apr 379
20150127000000.apr 379
20150129060000.apr 379
20150201000000.apr 379
20150204000000.apr 379
20150207000000.apr 379
20150209180000.apr 379
20150212180000.apr 379
20150215180000.apr 379
20150218120000.apr 379
20150221060000.apr 379
20150224120000.apr 379
20150227060000.apr 379
20150228000000.apr 379
20150302000000.apr 379
20150305000000.apr 379
20150308000000.apr 379
20150329000000.apr 379
20150401000000.apr 379
20150403180000.apr 379
20150404120000.apr 379
20150406180000.apr 379
20150409000000.apr 379
20150412120000.apr 379
20150415060000.apr 379
20150418000000.apr 379
20150421000000.apr 379
20150424000000.apr 379

In [27]:
fnameDC = np.array(fnames)[index]
datesDC = []
for i in range(fnameDC.size):
    tempdate = getdate(fnameDC[i])
    datesDC.append(tempdate.strftime("%Y-%m-%d"))
datesDC = np.array(datesDC)

In [28]:
datesDC.size


Out[28]:
1234

In [29]:
vals, inds = np.unique(datesDC, return_inverse=True)

In [30]:
DATA_active = DATA[:,index]
DATA_DC = np.zeros((vals.size,DATA.shape[0]))*np.nan
DATA_DC_std = np.zeros((vals.size,DATA.shape[0]))*np.nan
for i in range (vals.size):
    active = inds==i
    DATA_DC[i,:] = np.median(DATA_active[:,active], axis=1)
    DATA_DC_std[i,:] = np.std(DATA_active[:,active], axis=1)

In [31]:
actind = np.in1d(date, vals)
DATA_DC_final = np.zeros((365,DATA.shape[0]))*np.nan
DATA_DC_std_final = np.zeros((365,DATA.shape[0]))*np.nan
DATA_DC_final[actind,:] = DATA_DC
DATA_DC_std_final[actind,:] = DATA_DC_std

In [32]:
DATA_DC.shape


Out[32]:
(322L, 380L)

In [33]:
df_DCstd = pd.DataFrame(data = np.hstack((DATA_DC_std_final, date.reshape([-1,1]))), columns=ID)
df_DCstd.set_index('date')

df_DC = pd.DataFrame(data = np.hstack((DATA_DC_final, date.reshape([-1,1]))), columns=ID)
df_DC.set_index('date')


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-33-98472938f8c7> in <module>()
----> 1 df_DCstd = pd.DataFrame(data = np.hstack((DATA_DC_std_final, date.reshape([-1,1]))), columns=ID)
      2 df_DCstd.set_index('date')
      3 
      4 df_DC = pd.DataFrame(data = np.hstack((DATA_DC_final, date.reshape([-1,1]))), columns=ID)
      5 df_DC.set_index('date')

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __init__(self, data, index, columns, dtype, copy)
    255             else:
    256                 mgr = self._init_ndarray(data, index, columns, dtype=dtype,
--> 257                                          copy=copy)
    258         elif isinstance(data, (list, types.GeneratorType)):
    259             if isinstance(data, types.GeneratorType):

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _init_ndarray(self, values, index, columns, dtype, copy)
    435             values = _possibly_infer_to_datetimelike(values)
    436 
--> 437         return create_block_manager_from_blocks([values], [columns, index])
    438 
    439     @property

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\internals.pyc in create_block_manager_from_blocks(blocks, axes)
   3903         blocks = [getattr(b, 'values', b) for b in blocks]
   3904         tot_items = sum(b.shape[0] for b in blocks)
-> 3905         construction_error(tot_items, blocks[0].shape[1:], axes, e)
   3906 
   3907 

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\internals.pyc in construction_error(tot_items, block_shape, axes, e)
   3880         raise e
   3881     raise ValueError("Shape of passed values is {0}, indices imply {1}".format(
-> 3882         passed,implied))
   3883 
   3884 

ValueError: Shape of passed values is (381, 365), indices imply (382, 365)

In [34]:
# df.to_csv("../data/ChungCheonDC/CompositeETCdata.csv")
# df_DC.to_csv("../data/ChungCheonDC/CompositeDCdata.csv")
# df_DCstd.to_csv("../data/ChungCheonDC/CompositeDCstddata.csv")

In [ ]: