In [5]:
import pandas as pd
import sys
import matplotlib

In [6]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)


Python version 2.7.11 |Anaconda 2.5.0 (64-bit)| (default, Jan 29 2016, 14:26:21) [MSC v.1500 64 bit (AMD64)]
Pandas version 0.17.1
Matplotlib version 1.5.1

In [7]:
names = ['Bob','Jessica','Mary','John','Mel']
births = [968, 155, 77, 578, 973]

In [8]:
zip(names, births)


Out[8]:
[('Bob', 968), ('Jessica', 155), ('Mary', 77), ('John', 578), ('Mel', 973)]

In [9]:
BabyDataSet = list (zip(names, births))
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])

In [10]:
df


Out[10]:
Names Births
0 Bob 968
1 Jessica 155
2 Mary 77
3 John 578
4 Mel 973

In [11]:
water23 = pd.read_csv("../data/waterlevel/Water23.csv", index_col='date')

In [12]:
water23


Out[12]:
upperlevel downlevel
date
2015-01-16 15:50:00 35.51 19.60
2015-01-16 16:00:00 35.51 19.61
2015-01-16 16:10:00 35.50 19.61
2015-01-16 17:00:00 35.55 19.67
2015-01-16 18:00:00 35.54 19.72
2015-01-16 19:00:00 35.53 19.78
2015-01-16 20:00:00 35.53 19.83
2015-01-16 21:00:00 35.52 19.87
2015-01-16 22:00:00 35.51 19.92
2015-01-16 23:00:00 35.50 19.96
2015-01-17 0:00:00 35.50 20.00
2015-01-17 1:00:00 35.50 20.04
2015-01-17 2:00:00 35.50 20.08
2015-01-17 3:00:00 35.49 20.11
2015-01-17 4:00:00 35.49 20.14
2015-01-17 5:00:00 35.48 20.17
2015-01-17 6:00:00 35.48 20.20
2015-01-17 7:00:00 35.48 20.23
2015-01-17 8:00:00 35.47 20.26
2015-01-17 9:00:00 35.46 20.29
2015-01-17 10:00:00 35.46 20.31
2015-01-17 11:00:00 35.45 20.34
2015-01-17 12:00:00 35.45 20.35
2015-01-17 13:00:00 35.45 20.35
2015-01-17 14:00:00 35.45 20.36
2015-01-17 15:00:00 35.45 20.37
2015-01-17 16:00:00 35.46 20.38
2015-01-17 17:00:00 35.46 20.40
2015-01-17 18:00:00 35.46 20.41
2015-01-17 19:00:00 35.45 20.43
... ... ...
2015-10-13 11:40:00 29.75 20.41
2015-10-13 11:50:00 29.75 20.41
2015-10-13 12:00:00 29.75 20.41
2015-10-13 12:10:00 29.74 20.41
2015-10-13 12:20:00 29.74 20.41
2015-10-13 12:30:00 29.74 20.41
2015-10-13 12:40:00 29.74 20.41
2015-10-13 12:50:00 29.74 20.41
2015-10-13 13:00:00 29.74 20.40
2015-10-13 13:10:00 29.74 20.40
2015-10-13 13:20:00 29.74 20.40
2015-10-13 13:30:00 29.75 20.40
2015-10-13 13:40:00 29.74 20.40
2015-10-13 13:50:00 29.75 20.40
2015-10-13 14:00:00 29.75 20.40
2015-10-13 14:10:00 29.75 20.40
2015-10-13 14:20:00 29.75 20.40
2015-10-13 14:30:00 29.74 20.40
2015-10-13 14:40:00 29.75 20.40
2015-10-13 14:50:00 29.75 20.40
2015-10-13 15:00:00 29.75 20.39
2015-10-13 15:10:00 29.75 20.40
2015-10-13 15:20:00 29.75 20.39
2015-10-13 15:30:00 29.75 20.39
2015-10-13 15:40:00 29.75 20.39
2015-10-13 15:50:00 29.75 20.39
2015-10-13 16:00:00 29.75 20.39
2015-10-13 16:10:00 29.75 20.39
2015-10-13 16:20:00 29.75 20.39
2015-10-13 16:30:00 29.75 20.39

34824 rows × 2 columns


In [13]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [14]:
#water23['upperlevel'].plot()
fig = plt.figure(figsize=(12,4))
ax1 = plt.subplot(111)
water23['downlevel'].plot(ax=ax1)
ax1_1 = ax1.twinx()
water23['upperlevel'].plot(ax=ax1_1)
#climate["Rainfall(mm)"].plot.bar(figsize=(12,5))


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x894d160>

In [15]:
climate = pd.read_csv("../data/waterlevel/ClimateWater.csv", index_col='date')

In [16]:
#climate["WaterH1"].hist(bins=100)
water23['upperlevel'].hist(bins=500)
#water23['downlevel'].hist(bins=500)


Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x90c2748>

In [17]:
pd=climate["WaterH1"]

In [18]:
pd


Out[18]:
date
2015-01-01    39.30
2015-01-02    39.34
2015-01-03    39.34
2015-01-04    39.34
2015-01-05    39.42
2015-01-06    39.47
2015-01-07    39.53
2015-01-08    39.56
2015-01-09    39.59
2015-01-10    39.59
2015-01-11    39.59
2015-01-12    39.68
2015-01-13    39.70
2015-01-14    39.72
2015-01-15    39.74
2015-01-16    39.76
2015-01-17    39.76
2015-01-18    39.76
2015-01-19    39.82
2015-01-20    39.84
2015-01-21    39.86
2015-01-22    39.89
2015-01-23    39.92
2015-01-24    39.92
2015-01-25    39.92
2015-01-26    39.97
2015-01-27    40.00
2015-01-28    40.02
2015-01-29    40.04
2015-01-30    40.06
              ...  
2015-12-02    33.88
2015-12-03    33.99
2015-12-04    34.09
2015-12-05    34.18
2015-12-06    34.24
2015-12-07    34.29
2015-12-08    34.30
2015-12-09    34.40
2015-12-10    34.43
2015-12-11    34.50
2015-12-12    34.50
2015-12-13    34.57
2015-12-14    34.60
2015-12-15    34.60
2015-12-16    34.68
2015-12-17    34.80
2015-12-18    34.80
2015-12-19    34.86
2015-12-20    34.90
2015-12-21    35.00
2015-12-22    35.00
2015-12-23    35.00
2015-12-24    35.10
2015-12-25    35.10
2015-12-26    35.10
2015-12-27    35.20
2015-12-28    35.20
2015-12-29    35.20
2015-12-30    35.30
2015-12-31    35.28
Name: WaterH1, dtype: float64

In [19]:
pd.iloc[5]


Out[19]:
39.469999999999999

In [20]:
climate["WaterH1"].plot(figsize=(20,5))
climate["Rainfall(mm)"].plot(figsize=(20,5))


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x8d60b00>

In [18]:
climate["WaterH1"].plot(figsize=(12,5))


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x85e3f98>

In [19]:
water23['upperlevel'].plot(figsize=(12,5))
water23['downlevel'].plot()
#climate["Rainfall(mm)"].plot.bar(figsize=(12,5))


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0xaeb5940>

In [20]:
newindex = []
for ind in water23.index:
    newindex.append(ind.split()[0])

In [21]:
vals, inds = np.unique(newindex, return_inverse=True)

In [22]:
upperh = np.ones(vals.size)*np.nan
downh = np.ones(vals.size)*np.nan
for i in range (vals.size):
    active = inds==i
    upperh[i] = water23["upperlevel"].values[active].sum() / active.sum()
    downh[i] = water23["downlevel"].values[active].sum() / active.sum()

In [ ]:


In [ ]:


In [23]:
climate["WaterH1"].plot(figsize=(20,3))
grid(True)



In [24]:
water23['upperlevel'].plot(figsize=(20,3))
grid(True)



In [25]:
water23['downlevel'].plot(figsize=(20,3))
grid(True)



In [26]:
climate.keys()


Out[26]:
Index([u'Temp(��C)', u'Rainfall(mm)', u'Moisture(%)', u'SurfaceTemp(��C)',
       u'WaterH1'],
      dtype='object')

In [27]:
climate["Moisture(%)"].plot(figsize=(20,3))


Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0xdaf8320>

In [28]:
climate["SurfaceTemp(\xa1\xc6C)"].plot(figsize=(20,3))
grid(True)



In [29]:
climate["Rainfall(mm)"].plot(figsize=(20,3))
grid(True)



In [30]:
climate["Rainfall(mm)"].plot(figsize=(20,3)).bar


Out[30]:
<bound method AxesSubplot.bar of <matplotlib.axes._subplots.AxesSubplot object at 0x000000000D6489E8>>

In [ ]:


In [31]:
plt.plot(downh)


Out[31]:
[<matplotlib.lines.Line2D at 0xd9b20f0>]

In [32]:
waterdataset = list (zip(vals, upperh, downh))
df = pd.DataFrame(data = waterdataset, columns=['date', 'upperH', 'downH'])


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-32-d1f2f75d1b00> in <module>()
      1 waterdataset = list (zip(vals, upperh, downh))
----> 2 df = pd.DataFrame(data = waterdataset, columns=['date', 'upperH', 'downH'])

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\generic.pyc in __getattr__(self, name)
   2358                 return self[name]
   2359             raise AttributeError("'%s' object has no attribute '%s'" %
-> 2360                                  (type(self).__name__, name))
   2361 
   2362     def __setattr__(self, name, value):

AttributeError: 'Series' object has no attribute 'DataFrame'

In [33]:
df


Out[33]:
Names Births
0 Bob 968
1 Jessica 155
2 Mary 77
3 John 578
4 Mel 973

In [33]:
df['upperH'].plot()


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-33-833b32122dbf> in <module>()
----> 1 df['upperH'].plot()

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
   1967             return self._getitem_multilevel(key)
   1968         else:
-> 1969             return self._getitem_column(key)
   1970 
   1971     def _getitem_column(self, key):

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
   1974         # get column
   1975         if self.columns.is_unique:
-> 1976             return self._get_item_cache(key)
   1977 
   1978         # duplicate columns & possible reduce dimensionality

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
   1089         res = cache.get(item)
   1090         if res is None:
-> 1091             values = self._data.get(item)
   1092             res = self._box_item_values(item, values)
   1093             cache[item] = res

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
   3209 
   3210             if not isnull(item):
-> 3211                 loc = self.items.get_loc(item)
   3212             else:
   3213                 indexer = np.arange(len(self.items))[isnull(self.items)]

C:\Users\sungkeun\Anaconda2\lib\site-packages\pandas\core\index.pyc in get_loc(self, key, method, tolerance)
   1757                                  'backfill or nearest lookups')
   1758             key = _values_from_object(key)
-> 1759             return self._engine.get_loc(key)
   1760 
   1761         indexer = self.get_indexer([key], method=method,

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3979)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:3843)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12265)()

pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:12216)()

KeyError: 'upperH'

In [ ]: