In [20]:
%matplotlib inline
import os
import sys
import platform
import matplotlib
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import matplotlib.ticker as tick
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime,timedelta
from pylab import rcParams
rcParams['figure.figsize'] = 30, 30

In [24]:
print("Operating System " + platform.system() + " " + platform.release())
print("Python Version " + str(sys.version))
print("Pandas Version " + str(pd.__version__))
print("Numpy Version " + str(np.__version__))
print("Matplotlib Version " + str(matplotlib.__version__))


Operating System Windows 7
Python Version 2.7.10 (default, May 23 2015, 09:40:32) [MSC v.1500 32 bit (Intel)]
Pandas Version 0.19.1
Numpy Version 1.11.2
Matplotlib Version 1.5.3

In [26]:
# get connection info for MySQL database
raw_archive_folder = 'M:/PROJECTS/Snake Valley Water/Transducer Data/Raw_data_archive'
sys.path.append(raw_archive_folder)
import engineGetter
engine = engineGetter.getEngine()

In [42]:
quer = "SELECT * FROM groundwater.well WHERE WellID <= 75 OR WellID = 136"
wells = pd.read_sql_query(sql=quer,con=engine, index_col='WellID')

In [62]:
quer = "SELECT * FROM groundwater.reading WHERE WellID in(4,5,57,40,41,42,39,22,23,71,28,29,30,31,72) AND DateTime >= '2009'"
data = pd.read_sql_query(sql=quer, con=engine, parse_dates='DateTime', index_col = 'DateTime')

In [63]:
match = wells['Well'].to_dict()
data['WellName'] = data['WellID'].apply(lambda x: match[x],1)
wellnames = list(data.WellName.unique())

In [64]:
data.set_index(['WellID'], append=True,inplace=True)

In [66]:
data


Out[66]:
ReadingID MeasuredLevel Temp BaroEfficiencyLevel DeltaLevel MeasuredDTW DriftCorrection DTWBelowCasing DTWBelowGroundSurface WaterElevation Tape MeasuredByID WellName
DateTime WellID
2009-01-01 09:19:38 39 612872 19.6400 NaN NaN 0.8100 81.3500 -0.330 81.6800 80.1200 5448.28 0 NaN AG15
2009-01-01 21:19:38 39 612873 19.6300 NaN NaN 0.8000 81.3600 -0.330 81.6900 80.1300 5448.27 0 NaN AG15
2009-01-02 09:19:38 39 612874 19.7000 NaN NaN 0.8700 81.2900 -0.330 81.6200 80.0600 5448.34 0 NaN AG15
2009-01-02 21:19:38 39 612875 19.6700 NaN NaN 0.8400 81.3200 -0.330 81.6500 80.0900 5448.31 0 NaN AG15
2009-01-03 09:19:38 39 612876 19.6600 NaN NaN 0.8300 81.3300 -0.340 81.6700 80.1100 5448.29 0 NaN AG15
2009-01-03 21:19:38 39 612877 19.6700 NaN NaN 0.8400 81.3200 -0.340 81.6600 80.1000 5448.30 0 NaN AG15
2009-01-04 09:19:38 39 612878 19.6800 NaN NaN 0.8500 81.3100 -0.340 81.6500 80.0900 5448.31 0 NaN AG15
2009-01-04 21:19:38 39 612879 19.6900 NaN NaN 0.8600 81.3000 -0.340 81.6400 80.0800 5448.32 0 NaN AG15
2009-01-05 09:19:38 39 612880 19.7400 NaN NaN 0.9100 81.2500 -0.350 81.6000 80.0400 5448.36 0 NaN AG15
2009-01-05 21:19:38 39 612881 19.7000 NaN NaN 0.8700 81.2900 -0.350 81.6400 80.0800 5448.32 0 NaN AG15
2009-01-06 09:19:38 39 612882 19.7100 NaN NaN 0.8800 81.2800 -0.350 81.6300 80.0700 5448.33 0 NaN AG15
2009-01-06 21:19:38 39 612883 19.6700 NaN NaN 0.8400 81.3200 -0.350 81.6700 80.1100 5448.29 0 NaN AG15
2009-01-07 09:19:38 39 612884 19.6700 NaN NaN 0.8400 81.3200 -0.360 81.6800 80.1200 5448.28 0 NaN AG15
2009-01-07 21:19:38 39 612885 19.6800 NaN NaN 0.8500 81.3100 -0.360 81.6700 80.1100 5448.29 0 NaN AG15
2009-01-08 09:19:38 39 612886 19.7300 NaN NaN 0.9000 81.2600 -0.360 81.6200 80.0600 5448.34 0 NaN AG15
2009-01-08 21:19:38 39 612887 19.7100 NaN NaN 0.8800 81.2800 -0.360 81.6400 80.0800 5448.32 0 NaN AG15
2009-01-09 09:19:38 39 612888 19.7000 NaN NaN 0.8700 81.2900 -0.370 81.6600 80.1000 5448.30 0 NaN AG15
2009-01-09 21:19:38 39 612889 19.6700 NaN NaN 0.8400 81.3200 -0.370 81.6900 80.1300 5448.27 0 NaN AG15
2009-01-10 09:19:38 39 612890 19.7300 NaN NaN 0.9000 81.2600 -0.370 81.6300 80.0700 5448.33 0 NaN AG15
2009-01-10 21:19:38 39 612891 19.7200 NaN NaN 0.8900 81.2700 -0.370 81.6400 80.0800 5448.32 0 NaN AG15
2009-01-11 09:19:38 39 612892 19.7500 NaN NaN 0.9200 81.2400 -0.380 81.6200 80.0600 5448.34 0 NaN AG15
2009-01-11 21:19:38 39 612893 19.7300 NaN NaN 0.9000 81.2600 -0.380 81.6400 80.0800 5448.32 0 NaN AG15
2009-01-12 09:19:38 39 612894 19.7600 NaN NaN 0.9300 81.2300 -0.380 81.6100 80.0500 5448.35 0 NaN AG15
2009-01-12 21:19:38 39 612895 19.7300 NaN NaN 0.9000 81.2600 -0.380 81.6400 80.0800 5448.32 0 NaN AG15
2009-01-13 09:19:38 39 612896 19.7800 NaN NaN 0.9500 81.2100 -0.390 81.6000 80.0400 5448.36 0 NaN AG15
2009-01-13 21:19:38 39 612897 19.7800 NaN NaN 0.9500 81.2100 -0.390 81.6000 80.0400 5448.36 0 NaN AG15
2009-01-14 09:19:38 39 612898 19.8100 NaN NaN 0.9800 81.1800 -0.390 81.5700 80.0100 5448.39 0 NaN AG15
2009-01-14 21:19:38 39 612899 19.7800 NaN NaN 0.9500 81.2100 -0.390 81.6000 80.0400 5448.36 0 NaN AG15
2009-01-15 09:19:38 39 612900 19.8100 NaN NaN 0.9800 81.1800 -0.400 81.5800 80.0200 5448.38 0 NaN AG15
2009-01-15 21:19:38 39 612901 19.7700 NaN NaN 0.9400 81.2200 -0.400 81.6200 80.0600 5448.34 0 NaN AG15
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2016-07-31 10:00:00 71 5138273 11.8414 12.450 9.1210 -0.2828 71.7028 0.002 71.7008 69.5508 5512.35 0 0.0 Snake Valley South MX
2016-07-31 11:00:00 71 5138274 11.8392 12.452 9.1148 -0.2890 71.7090 0.002 71.7070 69.5570 5512.34 0 0.0 Snake Valley South MX
2016-07-31 12:00:00 71 5138275 11.8376 12.452 9.1120 -0.2918 71.7118 0.002 71.7098 69.5598 5512.34 0 0.0 Snake Valley South MX
2016-07-31 13:00:00 71 5138276 11.8345 12.451 9.1348 -0.2690 71.6890 0.002 71.6870 69.5370 5512.36 0 0.0 Snake Valley South MX
2016-07-31 14:00:00 71 5138277 11.8162 12.450 9.1430 -0.2608 71.6808 0.002 71.6788 69.5288 5512.37 0 0.0 Snake Valley South MX
2016-07-31 15:00:00 71 5138278 11.8051 12.451 9.1533 -0.2505 71.6705 0.002 71.6685 69.5185 5512.38 0 0.0 Snake Valley South MX
2016-07-31 16:00:00 71 5138279 11.7894 12.452 9.1162 -0.2876 71.7076 0.002 71.7056 69.5556 5512.34 0 0.0 Snake Valley South MX
2016-07-31 17:00:00 71 5138280 11.7782 12.450 9.0959 -0.3079 71.7279 0.002 71.7259 69.5759 5512.32 0 0.0 Snake Valley South MX
2016-07-31 18:00:00 71 5138281 11.8046 12.452 9.1477 -0.2561 71.6761 0.002 71.6741 69.5241 5512.38 0 0.0 Snake Valley South MX
2016-07-31 19:00:00 71 5138282 11.7987 12.450 9.1323 -0.2715 71.6915 0.002 71.6895 69.5395 5512.36 0 0.0 Snake Valley South MX
2016-07-31 20:00:00 71 5138283 11.8120 12.451 9.1165 -0.2873 71.7073 0.002 71.7053 69.5553 5512.34 0 0.0 Snake Valley South MX
2016-07-31 21:00:00 71 5138284 11.8229 12.451 9.1089 -0.2949 71.7149 0.002 71.7129 69.5629 5512.34 0 0.0 Snake Valley South MX
2016-07-31 22:00:00 71 5138285 11.8442 12.450 9.1197 -0.2841 71.7041 0.002 71.7021 69.5521 5512.35 0 0.0 Snake Valley South MX
2016-07-31 23:00:00 71 5138286 11.8491 12.450 9.1342 -0.2696 71.6896 0.002 71.6876 69.5376 5512.36 0 0.0 Snake Valley South MX
2016-08-01 00:00:00 71 5138287 11.8632 12.452 9.1414 -0.2624 71.6824 0.002 71.6804 69.5304 5512.37 0 0.0 Snake Valley South MX
2016-08-01 01:00:00 71 5138288 11.8582 12.452 9.1340 -0.2698 71.6898 0.002 71.6878 69.5378 5512.36 0 0.0 Snake Valley South MX
2016-08-01 02:00:00 71 5138289 11.8536 12.451 9.1328 -0.2710 71.6910 0.002 71.6890 69.5390 5512.36 0 0.0 Snake Valley South MX
2016-08-01 03:00:00 71 5138290 11.8531 12.451 9.1372 -0.2666 71.6866 0.002 71.6846 69.5346 5512.37 0 0.0 Snake Valley South MX
2016-08-01 04:00:00 71 5138291 11.8480 12.450 9.1200 -0.2838 71.7038 0.002 71.7018 69.5518 5512.35 0 0.0 Snake Valley South MX
2016-08-01 05:00:00 71 5138292 11.8565 12.452 9.1175 -0.2863 71.7063 0.002 71.7043 69.5543 5512.35 0 0.0 Snake Valley South MX
2016-08-01 06:00:00 71 5138293 11.8666 12.451 9.1127 -0.2911 71.7111 0.002 71.7091 69.5591 5512.34 0 0.0 Snake Valley South MX
2016-08-01 07:00:00 71 5138294 11.8736 12.450 9.1070 -0.2968 71.7168 0.002 71.7148 69.5648 5512.34 0 0.0 Snake Valley South MX
2016-08-01 08:00:00 71 5138295 11.8825 12.452 9.1040 -0.2998 71.7198 0.002 71.7178 69.5678 5512.33 0 0.0 Snake Valley South MX
2016-08-01 09:00:00 71 5138296 11.8963 12.450 9.1135 -0.2903 71.7103 0.002 71.7083 69.5583 5512.34 0 0.0 Snake Valley South MX
2016-08-01 10:00:00 71 5138297 11.8965 12.450 9.1106 -0.2932 71.7132 0.002 71.7112 69.5612 5512.34 0 0.0 Snake Valley South MX
2016-08-01 11:00:00 71 5138298 11.8966 12.452 9.1092 -0.2946 71.7146 0.002 71.7126 69.5626 5512.34 0 0.0 Snake Valley South MX
2016-08-01 12:00:00 71 5138299 11.8977 12.451 9.1266 -0.2772 71.6972 0.002 71.6952 69.5452 5512.35 0 0.0 Snake Valley South MX
2016-08-01 13:00:00 71 5138300 11.8901 12.452 9.1327 -0.2711 71.6911 0.002 71.6891 69.5391 5512.36 0 0.0 Snake Valley South MX
2016-08-01 14:00:00 71 5138301 11.8815 12.452 9.1460 -0.2578 71.6778 0.002 71.6758 69.5258 5512.37 0 0.0 Snake Valley South MX
2016-08-01 15:00:00 71 5138302 11.8662 12.450 9.1518 -0.2520 71.6720 0.002 71.6700 69.5200 5512.38 0 0.0 Snake Valley South MX

837802 rows × 13 columns


In [70]:
df = data.groupby(pd.Grouper(freq='1D', level=0)).mean()


---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-70-75461be26c06> in <module>()
----> 1 df = data.groupby(pd.Grouper(freq='1D', level=0)).mean()

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\generic.pyc in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, **kwargs)
   3982         return groupby(self, by=by, axis=axis, level=level, as_index=as_index,
   3983                        sort=sort, group_keys=group_keys, squeeze=squeeze,
-> 3984                        **kwargs)
   3985 
   3986     def asfreq(self, freq, method=None, how=None, normalize=False):

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\groupby.pyc in groupby(obj, by, **kwds)
   1499         raise TypeError('invalid type: %s' % type(obj))
   1500 
-> 1501     return klass(obj, by, **kwds)
   1502 
   1503 

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\groupby.pyc in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, **kwargs)
    368                                                     level=level,
    369                                                     sort=sort,
--> 370                                                     mutated=self.mutated)
    371 
    372         self.obj = obj

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\groupby.pyc in _get_grouper(obj, key, axis, level, sort, mutated)
   2380     # a passed-in Grouper, directly convert
   2381     if isinstance(key, Grouper):
-> 2382         binner, grouper, obj = key._get_grouper(obj)
   2383         if key.key is None:
   2384             return grouper, [], obj

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\tseries\resample.pyc in _get_grouper(self, obj)
   1059     def _get_grouper(self, obj):
   1060         # create the resampler and return our binner
-> 1061         r = self._get_resampler(obj)
   1062         r._set_binner()
   1063         return r.binner, r.grouper, r.obj

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\tseries\resample.pyc in _get_resampler(self, obj, kind)
   1042                                           groupby=self,
   1043                                           kind=kind,
-> 1044                                           axis=self.axis)
   1045         elif isinstance(ax, PeriodIndex) or kind == 'period':
   1046             return PeriodIndexResampler(obj,

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\tseries\resample.pyc in __init__(self, obj, groupby, axis, kind, **kwargs)
     88 
     89         if self.groupby is not None:
---> 90             self.groupby._set_grouper(self._convert_obj(obj), sort=True)
     91 
     92     def __unicode__(self):

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\groupby.pyc in _set_grouper(self, obj, sort)
    296             ax = ax.take(indexer)
    297             obj = obj.take(indexer, axis=self.axis,
--> 298                            convert=False, is_copy=False)
    299 
    300         self.obj = obj

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\generic.pyc in take(self, indices, axis, convert, is_copy, **kwargs)
   1667         new_data = self._data.take(indices,
   1668                                    axis=self._get_block_manager_axis(axis),
-> 1669                                    convert=True, verify=True)
   1670         result = self._constructor(new_data).__finalize__(self)
   1671 

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\internals.pyc in take(self, indexer, axis, verify, convert)
   3960         new_labels = self.axes[axis].take(indexer)
   3961         return self.reindex_indexer(new_axis=new_labels, indexer=indexer,
-> 3962                                     axis=axis, allow_dups=True)
   3963 
   3964     def merge(self, other, lsuffix='', rsuffix=''):

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\internals.pyc in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
   3846             new_blocks = [blk.take_nd(indexer, axis=axis, fill_tuple=(
   3847                 fill_value if fill_value is not None else blk.fill_value,))
-> 3848                 for blk in self.blocks]
   3849 
   3850         new_axes = list(self.axes)

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\internals.pyc in take_nd(self, indexer, axis, new_mgr_locs, fill_tuple)
   1021             fill_value = fill_tuple[0]
   1022             new_values = algos.take_nd(values, indexer, axis=axis,
-> 1023                                        allow_fill=True, fill_value=fill_value)
   1024 
   1025         if new_mgr_locs is None:

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\algorithms.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
   1072             out = np.empty(out_shape, dtype=dtype, order='F')
   1073         else:
-> 1074             out = np.empty(out_shape, dtype=dtype)
   1075 
   1076     func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,

MemoryError: 

In [53]:
for well in wellnames:
    df = data[data['WellName']==well]
    df.to_csv('C:/Users/PAULINKENBRANDT/Documents/' + well + '.csv')
    plt.plot(pd.to_datetime(df.index),df['WaterElevation'],label=well)
plt.legend()


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-53-01f2b78a1f93> in <module>()
      1 for well in wellnames:
----> 2     df = data[data['WellName']==well]
      3     df.to_csv('C:/Users/PAULINKENBRANDT/Documents/' + well + '.csv')
      4     plt.plot(pd.to_datetime(df.index),df['WaterElevation'],label=well)
      5 plt.legend()

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
   2057             return self._getitem_multilevel(key)
   2058         else:
-> 2059             return self._getitem_column(key)
   2060 
   2061     def _getitem_column(self, key):

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
   2064         # get column
   2065         if self.columns.is_unique:
-> 2066             return self._get_item_cache(key)
   2067 
   2068         # duplicate columns & possible reduce dimensionality

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
   3539 
   3540             if not isnull(item):
-> 3541                 loc = self.items.get_loc(item)
   3542             else:
   3543                 indexer = np.arange(len(self.items))[isnull(self.items)]

c:\users\paulinkenbrandt\documents\github\env\lib\site-packages\pandas\indexes\base.pyc in get_loc(self, key, method, tolerance)
   2134                 return self._engine.get_loc(key)
   2135             except KeyError:
-> 2136                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2137 
   2138         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4443)()

pandas\index.pyx in pandas.index.IndexEngine.get_loc (pandas\index.c:4289)()

pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13733)()

pandas\src\hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas\hashtable.c:13687)()

KeyError: 'WellName'

In [ ]: