In [89]:
import pandas as pd
import sys
import matplotlib

In [90]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)


Python version 2.7.12 |Anaconda 4.2.0 (x86_64)| (default, Jul  2 2016, 17:43:17) 
[GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2336.11.00)]
Pandas version 0.18.1
Matplotlib version 1.5.3

In [91]:
names = ['Bob','Jessica','Mary','John','Mel']
births = [968, 155, 77, 578, 973]

In [92]:
zip(names, births)


Out[92]:
[('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]

In [93]:
BabyDataSet = list (zip(names, births))
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])

In [94]:
df


Out[94]:
Names Births
0 Bob 968
1 Jessica 155
2 Mary 77
3 John 578
4 Mel 973

In [95]:
water23 = pd.read_csv("../data/waterlevel/Water23.csv", index_col='date')

In [96]:
water23


Out[96]:
upperlevel downlevel
date
2015-01-16 15:50:00 35.51 19.60
2015-01-16 16:00:00 35.51 19.61
2015-01-16 16:10:00 35.50 19.61
2015-01-16 17:00:00 35.55 19.67
2015-01-16 18:00:00 35.54 19.72
2015-01-16 19:00:00 35.53 19.78
2015-01-16 20:00:00 35.53 19.83
2015-01-16 21:00:00 35.52 19.87
2015-01-16 22:00:00 35.51 19.92
2015-01-16 23:00:00 35.50 19.96
2015-01-17 0:00:00 35.50 20.00
2015-01-17 1:00:00 35.50 20.04
2015-01-17 2:00:00 35.50 20.08
2015-01-17 3:00:00 35.49 20.11
2015-01-17 4:00:00 35.49 20.14
2015-01-17 5:00:00 35.48 20.17
2015-01-17 6:00:00 35.48 20.20
2015-01-17 7:00:00 35.48 20.23
2015-01-17 8:00:00 35.47 20.26
2015-01-17 9:00:00 35.46 20.29
2015-01-17 10:00:00 35.46 20.31
2015-01-17 11:00:00 35.45 20.34
2015-01-17 12:00:00 35.45 20.35
2015-01-17 13:00:00 35.45 20.35
2015-01-17 14:00:00 35.45 20.36
2015-01-17 15:00:00 35.45 20.37
2015-01-17 16:00:00 35.46 20.38
2015-01-17 17:00:00 35.46 20.40
2015-01-17 18:00:00 35.46 20.41
2015-01-17 19:00:00 35.45 20.43
... ... ...
2015-10-13 11:40:00 29.75 20.41
2015-10-13 11:50:00 29.75 20.41
2015-10-13 12:00:00 29.75 20.41
2015-10-13 12:10:00 29.74 20.41
2015-10-13 12:20:00 29.74 20.41
2015-10-13 12:30:00 29.74 20.41
2015-10-13 12:40:00 29.74 20.41
2015-10-13 12:50:00 29.74 20.41
2015-10-13 13:00:00 29.74 20.40
2015-10-13 13:10:00 29.74 20.40
2015-10-13 13:20:00 29.74 20.40
2015-10-13 13:30:00 29.75 20.40
2015-10-13 13:40:00 29.74 20.40
2015-10-13 13:50:00 29.75 20.40
2015-10-13 14:00:00 29.75 20.40
2015-10-13 14:10:00 29.75 20.40
2015-10-13 14:20:00 29.75 20.40
2015-10-13 14:30:00 29.74 20.40
2015-10-13 14:40:00 29.75 20.40
2015-10-13 14:50:00 29.75 20.40
2015-10-13 15:00:00 29.75 20.39
2015-10-13 15:10:00 29.75 20.40
2015-10-13 15:20:00 29.75 20.39
2015-10-13 15:30:00 29.75 20.39
2015-10-13 15:40:00 29.75 20.39
2015-10-13 15:50:00 29.75 20.39
2015-10-13 16:00:00 29.75 20.39
2015-10-13 16:10:00 29.75 20.39
2015-10-13 16:20:00 29.75 20.39
2015-10-13 16:30:00 29.75 20.39

34824 rows × 2 columns


In [97]:
ClimateWater = pd.read_csv("../data/waterlevel/ClimateWater.csv", index_col='date')

In [98]:
ClimateWater


Out[98]:
Temp(C) Rainfall(mm) Moisture(%) SurfaceTemp(surC) WaterH1
date
2015-01-01 -3.9 0.2 62.9 -1.8 39.30
2015-01-02 -4.3 2.4 74.8 -0.8 39.34
2015-01-03 -1.1 NaN 69.4 0.3 39.34
2015-01-04 3.1 NaN 87.6 2.6 39.34
2015-01-05 5.5 13.5 77.4 3.1 39.42
2015-01-06 0.0 2.5 66.3 1.9 39.47
2015-01-07 -3.0 NaN 53.9 -3.0 39.53
2015-01-08 -2.3 NaN 73.0 -1.9 39.56
2015-01-09 0.4 0.0 80.9 0.1 39.59
2015-01-10 0.9 NaN 71.4 0.1 39.59
2015-01-11 2.5 0.0 61.0 1.5 39.59
2015-01-12 -1.8 NaN 52.8 -1.1 39.68
2015-01-13 0.8 NaN 57.4 -0.3 39.70
2015-01-14 3.3 NaN 61.3 1.7 39.72
2015-01-15 3.1 NaN 62.3 2.8 39.74
2015-01-16 3.3 0.0 74.0 2.3 39.76
2015-01-17 -1.3 NaN 60.5 1.0 39.76
2015-01-18 0.6 2.0 77.3 -0.2 39.76
2015-01-19 2.7 0.3 68.0 2.4 39.82
2015-01-20 0.4 NaN 71.3 1.8 39.84
2015-01-21 3.5 5.0 77.6 2.3 39.86
2015-01-22 2.5 1.0 89.1 4.2 39.89
2015-01-23 1.7 NaN 81.3 2.2 39.92
2015-01-24 3.5 NaN 78.6 3.8 39.92
2015-01-25 7.1 2.5 73.5 4.1 39.92
2015-01-26 5.6 0.5 94.5 5.9 39.97
2015-01-27 -1.0 NaN 65.4 1.5 40.00
2015-01-28 -2.8 NaN 58.9 -1.6 40.02
2015-01-29 0.5 NaN 65.4 0.7 40.04
2015-01-30 -0.8 NaN 63.9 1.6 40.06
... ... ... ... ... ...
2015-12-02 9.5 11.6 77.0 7.5 33.88
2015-12-03 4.0 4.1 66.0 3.8 33.99
2015-12-04 3.6 5.5 85.1 3.4 34.09
2015-12-05 6.6 NaN 67.9 4.9 34.18
2015-12-06 2.1 NaN 65.0 3.7 34.24
2015-12-07 2.5 NaN 65.5 3.2 34.29
2015-12-08 3.8 NaN 70.8 3.7 34.30
2015-12-09 7.6 NaN 71.8 5.8 34.40
2015-12-10 9.6 9.7 90.0 8.1 34.43
2015-12-11 7.7 NaN 79.0 6.9 34.50
2015-12-12 7.0 NaN 70.5 5.5 34.50
2015-12-13 8.6 NaN 64.9 8.4 34.57
2015-12-14 6.9 6.9 88.9 6.8 34.60
2015-12-15 7.0 0.0 78.4 7.0 34.60
2015-12-16 0.6 19.5 90.0 1.7 34.68
2015-12-17 -1.5 0.0 69.5 2.3 34.80
2015-12-18 0.1 NaN 86.8 2.3 34.80
2015-12-19 2.9 NaN 86.0 3.6 34.86
2015-12-20 3.0 0.0 80.5 2.7 34.90
2015-12-21 5.5 0.5 84.4 6.4 35.00
2015-12-22 3.6 0.0 86.3 3.3 35.00
2015-12-23 6.0 0.0 89.1 6.9 35.00
2015-12-24 3.8 NaN 76.3 4.0 35.10
2015-12-25 1.9 0.0 54.8 2.5 35.10
2015-12-26 6.4 0.2 68.9 4.6 35.10
2015-12-27 -2.1 NaN 62.9 0.9 35.20
2015-12-28 -1.4 NaN 67.1 0.9 35.20
2015-12-29 0.0 NaN 73.5 0.4 35.20
2015-12-30 3.0 7.0 78.3 3.1 35.30
2015-12-31 2.6 NaN 83.1 2.9 35.28

365 rows × 5 columns


In [99]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [100]:
#water23['upperlevel'].plot()
fig = plt.figure(figsize=(12,4))
ax1 = plt.subplot(111)
water23['downlevel'].plot(ax=ax1)
ax1_1 = ax1.twinx()
water23['upperlevel'].plot(ax=ax1_1)
#climate["Rainfall(mm)"].plot.bar(figsize=(12,5))


Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x11a8dd190>

In [101]:
climate = pd.read_csv("../data/waterlevel/ClimateWater.csv", index_col='date')

In [102]:
#climate["WaterH1"].hist(bins=100)
water23['upperlevel'].hist(bins=500)
#water23['downlevel'].hist(bins=500)


Out[102]:
<matplotlib.axes._subplots.AxesSubplot at 0x11eeb2f90>

In [103]:
pd=climate["WaterH1"]

In [104]:
pd


Out[104]:
date
2015-01-01    39.30
2015-01-02    39.34
2015-01-03    39.34
2015-01-04    39.34
2015-01-05    39.42
2015-01-06    39.47
2015-01-07    39.53
2015-01-08    39.56
2015-01-09    39.59
2015-01-10    39.59
2015-01-11    39.59
2015-01-12    39.68
2015-01-13    39.70
2015-01-14    39.72
2015-01-15    39.74
2015-01-16    39.76
2015-01-17    39.76
2015-01-18    39.76
2015-01-19    39.82
2015-01-20    39.84
2015-01-21    39.86
2015-01-22    39.89
2015-01-23    39.92
2015-01-24    39.92
2015-01-25    39.92
2015-01-26    39.97
2015-01-27    40.00
2015-01-28    40.02
2015-01-29    40.04
2015-01-30    40.06
              ...  
2015-12-02    33.88
2015-12-03    33.99
2015-12-04    34.09
2015-12-05    34.18
2015-12-06    34.24
2015-12-07    34.29
2015-12-08    34.30
2015-12-09    34.40
2015-12-10    34.43
2015-12-11    34.50
2015-12-12    34.50
2015-12-13    34.57
2015-12-14    34.60
2015-12-15    34.60
2015-12-16    34.68
2015-12-17    34.80
2015-12-18    34.80
2015-12-19    34.86
2015-12-20    34.90
2015-12-21    35.00
2015-12-22    35.00
2015-12-23    35.00
2015-12-24    35.10
2015-12-25    35.10
2015-12-26    35.10
2015-12-27    35.20
2015-12-28    35.20
2015-12-29    35.20
2015-12-30    35.30
2015-12-31    35.28
Name: WaterH1, dtype: float64

In [105]:
pd.iloc[5]


Out[105]:
39.469999999999999

In [106]:
climate["WaterH1"].plot(figsize=(20,5))
climate["Rainfall(mm)"].plot(figsize=(20,5))


Out[106]:
<matplotlib.axes._subplots.AxesSubplot at 0x11c2fa450>

In [107]:
climate["WaterH1"].plot(figsize=(12,5))


Out[107]:
<matplotlib.axes._subplots.AxesSubplot at 0x11fa3c290>

In [108]:
water23['upperlevel'].plot(figsize=(12,5))
water23['downlevel'].plot()
#climate["Rainfall(mm)"].plot.bar(figsize=(12,5))


Out[108]:
<matplotlib.axes._subplots.AxesSubplot at 0x11caf3ed0>

In [109]:
newindex = []
for ind in water23.index:
    newindex.append(ind.split()[0])

In [110]:
vals, inds = np.unique(newindex, return_inverse=True)

In [111]:
upperh = np.ones(vals.size)*np.nan
downh = np.ones(vals.size)*np.nan
for i in range (vals.size):
    active = inds==i
    upperh[i] = water23["upperlevel"].values[active].sum() / active.sum()
    downh[i] = water23["downlevel"].values[active].sum() / active.sum()

In [ ]:


In [ ]:


In [112]:
climate["WaterH1"].plot(figsize=(20,3))
grid(True)



In [113]:
water23['upperlevel'].plot(figsize=(20,3))
grid(True)



In [114]:
water23['downlevel'].plot(figsize=(20,3))
grid(True)



In [115]:
climate.keys()


Out[115]:
Index([u'Temp(C)', u'Rainfall(mm)', u'Moisture(%)', u'SurfaceTemp(surC)',
       u'WaterH1'],
      dtype='object')

In [116]:
climate["Moisture(%)"].plot(figsize=(20,3))


Out[116]:
<matplotlib.axes._subplots.AxesSubplot at 0x1204e8290>

In [117]:
climate["SurfaceTemp(\xa1\xc6C)"].plot(figsize=(20,3))
grid(True)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-117-9981b0cd9aba> in <module>()
----> 1 climate["SurfaceTemp(\xa1\xc6C)"].plot(figsize=(20,3))
      2 grid(True)

/Users/sklim/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1995             return self._getitem_multilevel(key)
   1996         else:
-> 1997             return self._getitem_column(key)
   1998 
   1999     def _getitem_column(self, key):

/Users/sklim/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   2002         # get column
   2003         if self.columns.is_unique:
-> 2004             return self._get_item_cache(key)
   2005 
   2006         # duplicate columns & possible reduce dimensionality

/Users/sklim/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1348         res = cache.get(item)
   1349         if res is None:
-> 1350             values = self._data.get(item)
   1351             res = self._box_item_values(item, values)
   1352             cache[item] = res

/Users/sklim/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   3288 
   3289             if not isnull(item):
-> 3290                 loc = self.items.get_loc(item)
   3291             else:
   3292                 indexer = np.arange(len(self.items))[isnull(self.items)]

/Users/sklim/anaconda/lib/python2.7/site-packages/pandas/indexes/base.pyc in get_loc(self, key, method, tolerance)
   1945                 return self._engine.get_loc(key)
   1946             except KeyError:
-> 1947                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   1948 
   1949         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4154)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4018)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12368)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12322)()

KeyError: 'SurfaceTemp(\xa1\xc6C)'

In [ ]:
climate["Rainfall(mm)"].plot(figsize=(20,3))
grid(True)

In [ ]:
climate["Rainfall(mm)"].plot(figsize=(20,3)).bar

In [ ]:


In [ ]:
plt.plot(downh)

In [ ]:
waterdataset = list (zip(vals, upperh, downh))
#df = pd.DataFrame(data = waterdataset, columns=['vals', 'upperh', 'downh'])

In [ ]:
pd.DataFrame??

In [ ]:
df['upperH'].plot()

In [ ]: