In [56]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy import linspace, polyval, polyfit, sqrt, stats, randn
from sklearn import datasets, linear_model
%matplotlib inline

In [57]:
# Import sensor data indexed by Timestamp
sensor_df = pd.read_csv('HHtest.txt', names = ["Sensor", "Seconds", "Timestamp", "Temp", "Rel Humid", "Voltage"], index_col = ['Timestamp'], parse_dates = ["Timestamp"])
# Delete unneeded columns
sensor_df.drop(['Sensor','Seconds','Voltage'], axis = 1, inplace = True)
sensor_df[:3]


Out[57]:
Temp Rel Humid
Timestamp
2016-07-07 14:59:53 75.20 60.0
2016-07-07 15:15:42 78.26 57.1
2016-07-07 15:31:30 76.82 44.5

In [58]:
#sensor_df.set_index(['Timestamp'], drop = False, inplace = True)
#sensor_df[:3]

In [59]:
# Resample for every 30 minutes and name according to midpoint
sensor_df = sensor_df.resample('30T', loffset = '-15 min').mean()
sensor_df.head


Out[59]:
<bound method DataFrame.head of                       Temp  Rel Humid
Timestamp                            
2016-07-07 14:15:00  75.20      60.00
2016-07-07 14:45:00  78.26      57.10
2016-07-07 15:15:00  76.82      43.55
2016-07-07 15:45:00  76.01      45.05
2016-07-07 16:15:00  74.93      46.30
2016-07-07 16:45:00  74.12      49.05
2016-07-07 17:15:00  73.85      47.70
2016-07-07 17:45:00  73.58      48.25
2016-07-07 18:15:00  73.31      51.10
2016-07-07 18:45:00  73.22      50.80
2016-07-07 19:15:00  73.40      52.70
2016-07-07 19:45:00  73.04      51.20
2016-07-07 20:15:00  73.22      52.45
2016-07-07 20:45:00  72.95      53.85
2016-07-07 21:15:00  72.77      53.40
2016-07-07 21:45:00  72.86      54.15
2016-07-07 22:15:00  73.13      54.80
2016-07-07 22:45:00  72.59      53.70
2016-07-07 23:15:00  73.04      55.05
2016-07-07 23:45:00  72.86      55.40
2016-07-08 00:15:00  72.86      55.65
2016-07-08 00:45:00  72.86      55.70
2016-07-08 01:15:00  72.86      55.80
2016-07-08 01:45:00  72.86      55.70
2016-07-08 02:15:00  72.77      56.00
2016-07-08 02:45:00  72.77      56.20
2016-07-08 03:15:00  72.86      56.55
2016-07-08 03:45:00  72.77      56.55
2016-07-08 04:15:00  72.86      56.65
2016-07-08 04:45:00  72.86      56.45
...                    ...        ...
2016-09-14 22:15:00  79.25      55.80
2016-09-14 22:45:00  79.25      55.75
2016-09-14 23:15:00  79.25      55.35
2016-09-14 23:45:00  79.25      54.25
2016-09-15 00:15:00  79.16      53.30
2016-09-15 00:45:00  79.16      52.70
2016-09-15 01:15:00  79.16      52.60
2016-09-15 01:45:00  78.98      52.10
2016-09-15 02:15:00  78.98      51.60
2016-09-15 02:45:00  78.98      50.90
2016-09-15 03:15:00  78.98      50.45
2016-09-15 03:45:00  78.98      49.95
2016-09-15 04:15:00  78.89      49.60
2016-09-15 04:45:00  78.80      49.20
2016-09-15 05:15:00  78.80      49.15
2016-09-15 05:45:00  78.80      48.55
2016-09-15 06:15:00  78.80      48.00
2016-09-15 06:45:00  78.80      48.30
2016-09-15 07:15:00  78.35      41.60
2016-09-15 07:45:00  78.26      41.05
2016-09-15 08:15:00  78.08      41.00
2016-09-15 08:45:00  78.08      40.85
2016-09-15 09:15:00  77.99      40.65
2016-09-15 09:45:00  78.08      39.90
2016-09-15 10:15:00  77.72      39.05
2016-09-15 10:45:00  77.36      38.90
2016-09-15 11:15:00  77.54      43.00
2016-09-15 11:45:00  77.18      47.60
2016-09-15 12:15:00  77.18      54.50
2016-09-15 12:45:00  77.18      62.40

[3358 rows x 2 columns]>

In [61]:
# Import sensor data indexed by Timestamp
LGA_df = pd.read_csv('KLGA.csv', names = ["Timestamp", "Temp", "Rel Humid"], index_col = ['Timestamp'], parse_dates = ["Timestamp"])
#LGA_df = LGA_df.set_index(['Timestamp'])
LGA_df.head


Out[61]:
<bound method DataFrame.head of                       Temp  Rel Humid
Timestamp                            
2016-07-01 02:00:00  75.22      60.98
2016-07-01 02:05:00  75.22      60.98
2016-07-01 02:10:00  75.22      60.98
2016-07-01 02:15:00  75.22      60.98
2016-07-01 02:20:00  75.22      60.98
2016-07-01 02:25:00  75.22      57.20
2016-07-01 02:30:00  75.22      57.20
2016-07-01 02:35:00  75.22      53.63
2016-07-01 02:40:00  75.22      53.63
2016-07-01 02:45:00  75.22      53.63
2016-07-01 02:50:00  75.22      53.63
2016-07-01 02:51:00  75.02      53.62
2016-07-01 02:55:00  75.22      53.63
2016-07-01 03:00:00  73.42      69.01
2016-07-01 03:05:00  73.42      69.01
2016-07-01 03:10:00  73.42      69.01
2016-07-01 03:15:00  73.42      69.01
2016-07-01 03:20:00  73.42      64.76
2016-07-01 03:25:00  73.42      64.76
2016-07-01 03:30:00  73.42      64.76
2016-07-01 03:35:00  73.42      64.76
2016-07-01 03:40:00  73.42      60.75
2016-07-01 03:45:00  73.42      60.75
2016-07-01 03:50:00  73.42      60.75
2016-07-01 03:51:00  73.94      60.75
2016-07-01 03:55:00  73.42      60.75
2016-07-01 04:00:00  73.42      60.75
2016-07-01 04:05:00  73.42      60.75
2016-07-01 04:10:00  73.42      60.75
2016-07-01 04:15:00  73.42      60.75
...                    ...        ...
2016-09-30 23:50:00  57.20      93.70
2016-09-30 23:51:00  57.92      93.70
2016-09-30 23:55:00  57.20      93.70
2016-10-01 00:00:00  57.20      93.70
2016-10-01 00:05:00  57.20      93.70
2016-10-01 00:10:00  57.20      93.70
2016-10-01 00:15:00  57.20      93.70
2016-10-01 00:20:00  57.20      93.70
2016-10-01 00:25:00  57.20      93.70
2016-10-01 00:30:00  57.20      93.70
2016-10-01 00:35:00  57.20      93.70
2016-10-01 00:40:00  57.20      93.70
2016-10-01 00:45:00  57.20      93.70
2016-10-01 00:50:00  57.20      93.70
2016-10-01 00:51:00  57.92      93.70
2016-10-01 00:55:00  57.20      93.70
2016-10-01 01:00:00  57.20      93.70
2016-10-01 01:05:00  57.20      93.70
2016-10-01 01:10:00  57.20      93.70
2016-10-01 01:15:00  57.20      93.70
2016-10-01 01:20:00  59.00      87.85
2016-10-01 01:25:00  57.20      93.70
2016-10-01 01:27:00  57.92      93.70
2016-10-01 01:30:00  57.20      93.70
2016-10-01 01:35:00  57.20      93.70
2016-10-01 01:40:00  57.20      93.70
2016-10-01 01:45:00  57.20      93.70
2016-10-01 01:50:00  57.20      93.70
2016-10-01 01:51:00  57.92      93.70
2016-10-01 01:55:00  57.20      93.70

[28262 rows x 2 columns]>

In [62]:
# Resample for every 30 minutes and name according to midpoint
LGA_df = LGA_df.resample('30T', loffset = '-15 min').mean()
LGA_df[:3]


Out[62]:
Temp Rel Humid
Timestamp
2016-07-01 01:45:00 75.220000 60.350000
2016-07-01 02:15:00 75.191429 54.138571
2016-07-01 02:45:00 73.420000 67.593333

In [38]:
# Join df together for all timestamps that exist in both files
joined_df = LGA_df.join(sensor_df, how = 'inner', lsuffix = '_LGA', rsuffix = '_sensor')
joined_df = joined_df.dropna() # drop NaN values
joined_df


Out[38]:
Temp_LGA Rel Humid_LGA Temp_sensor Rel Humid_sensor
Timestamp
2016-07-07 14:15:00 85.502857 67.405714 75.20 60.00
2016-07-07 14:45:00 83.920000 70.441667 78.26 57.10
2016-07-07 15:15:00 87.585714 59.391429 76.82 43.55
2016-07-07 15:45:00 85.420000 63.580000 76.01 45.05
2016-07-07 16:15:00 87.714286 56.904286 74.93 46.30
2016-07-07 16:45:00 86.920000 61.275000 74.12 49.05
2016-07-07 17:15:00 84.320000 66.630000 73.85 47.70
2016-07-07 17:45:00 83.920000 66.006667 73.58 48.25
2016-07-07 18:15:00 80.797143 72.445714 73.31 51.10
2016-07-07 18:45:00 78.820000 77.903333 73.22 50.80
2016-07-07 19:15:00 80.282857 74.831429 73.40 52.70
2016-07-07 19:45:00 79.420000 77.183333 73.04 51.20
2016-07-07 20:15:00 79.100000 78.038571 73.22 52.45
2016-07-07 20:45:00 80.320000 74.938333 72.95 53.85
2016-07-07 21:15:00 79.614286 76.755714 72.77 53.40
2016-07-07 21:45:00 78.820000 78.680000 72.86 54.15
2016-07-07 22:15:00 78.842857 80.788571 73.13 54.80
2016-07-07 22:45:00 78.220000 80.283333 72.59 53.70
2016-07-07 23:15:00 77.531429 82.114286 73.04 55.05
2016-07-07 23:45:00 75.520000 88.688333 72.86 55.40
2016-07-08 00:15:00 75.191429 88.620000 72.86 55.65
2016-07-08 00:45:00 74.920000 89.536667 72.86 55.70
2016-07-08 01:15:00 74.420000 90.977143 72.86 55.80
2016-07-08 01:45:00 74.920000 89.536667 72.86 55.70
2016-07-08 02:15:00 75.320000 88.620000 72.77 56.00
2016-07-08 02:45:00 75.220000 88.620000 72.77 56.20
2016-07-08 03:15:00 73.751429 93.334286 72.86 56.55
2016-07-08 03:45:00 75.220000 88.681667 72.77 56.55
2016-07-08 04:15:00 75.834286 87.154286 72.86 56.65
2016-07-08 04:45:00 74.320000 91.370000 72.86 56.45
... ... ... ... ...
2016-09-14 22:15:00 75.817143 60.508571 79.25 55.80
2016-09-14 22:45:00 73.700000 58.958333 79.25 55.75
2016-09-14 23:15:00 71.908571 56.222857 79.25 55.35
2016-09-14 23:45:00 71.600000 54.900000 79.25 54.25
2016-09-15 00:15:00 69.825714 56.440000 79.16 53.30
2016-09-15 00:45:00 69.800000 57.078333 79.16 52.70
2016-09-15 01:15:00 69.028571 53.751429 79.16 52.60
2016-09-15 01:45:00 68.000000 50.860000 78.98 52.10
2016-09-15 02:15:00 68.000000 52.000000 78.98 51.60
2016-09-15 02:45:00 66.800000 58.590000 78.98 50.90
2016-09-15 03:15:00 66.174286 58.135714 78.98 50.45
2016-09-15 03:45:00 66.200000 55.930000 78.98 49.95
2016-09-15 04:15:00 64.477143 59.540000 78.89 49.60
2016-09-15 04:45:00 64.400000 58.895000 78.80 49.20
2016-09-15 05:15:00 64.348571 58.434286 78.80 49.15
2016-09-15 05:45:00 64.100000 57.565000 78.80 48.55
2016-09-15 06:15:00 62.651429 59.300000 78.80 48.00
2016-09-15 06:45:00 63.500000 57.485000 78.80 48.30
2016-09-15 07:15:00 64.348571 55.670000 78.35 41.60
2016-09-15 07:45:00 64.400000 55.670000 78.26 41.05
2016-09-15 08:15:00 65.917143 50.331429 78.08 41.00
2016-09-15 08:45:00 66.500000 48.380000 78.08 40.85
2016-09-15 09:15:00 67.742857 45.918571 77.99 40.65
2016-09-15 09:45:00 69.200000 43.153333 78.08 39.90
2016-09-15 10:15:00 69.825714 40.340000 77.72 39.05
2016-09-15 10:45:00 70.100000 40.388333 77.36 38.90
2016-09-15 11:15:00 69.825714 40.340000 77.54 43.00
2016-09-15 11:45:00 70.700000 39.145000 77.18 47.60
2016-09-15 12:15:00 71.522857 37.228571 77.18 54.50
2016-09-15 12:45:00 71.600000 37.106667 77.18 62.40

3063 rows × 4 columns


In [64]:
#Plot all data wrt time
fig_size = np.array(mpl.rcParams['figure.figsize']) # size of plot
fig_size *=2 # increase size of plot
joined_df.plot(figsize = fig_size) # plot data


Out[64]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b223450>

In [44]:
# Linear regression between temp at LGA and temp from sensor (without time data) using linear_model from sklearn 
TempLGA = np.transpose(np.matrix(joined_df['Temp_LGA']))
Tempsensor = np.transpose(np.matrix(joined_df['Temp_sensor']))

regr = linear_model.LinearRegression()
regr.fit(TempLGA, Tempsensor)

#z = polyfit(TempLGA, Tempsensor, 4) # polyfit for fitting
#p = np.poly1d(z) #polynomial of fit


Out[44]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [90]:
# Attempt multivariable linear regression
LGA_data = np.transpose(np.array([joined_df['Temp_LGA'], joined_df['Rel Humid_LGA'],]))
print (LGA_data.shape)
sensor_data = np.transpose(np.array([joined_df['Temp_sensor'], joined_df['Rel Humid_sensor'],]))
print (sensor_data.shape)

mult_regr = linear_model.LinearRegression()
mult_regr.fit(LGA_data, sensor_data)
mult_regr.params()
#mult_regr.predict(LGA_data[:20]).shape


(3063, 2)
(3063, 2)
Out[90]:
(20, 2)

In [85]:
plt.plot()


Out[85]:
(2, 3)

In [86]:
plt.plot()


Out[86]:
[]

In [45]:
plt.plot(TempLGA, Tempsensor, '.', TempLGA, regr.predict(TempLGA))
#plt.plot(TempLGA, Tempsensor, '.', TempLGA, p(TempLGA), '--')


Out[45]:
[<matplotlib.lines.Line2D at 0x11c389c90>,
 <matplotlib.lines.Line2D at 0x11c389d50>]

In [51]:
TempLGA_arr = np.array(joined_df['Temp_LGA'][:30])
Tempsensor_arr = np.array(joined_df['Temp_sensor'][:30])
pd.rolling_corr(TempLGA_arr, Tempsensor_arr, 10).plot()


/Users/Zelda/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: pd.rolling_corr is deprecated for ndarrays and will be removed in a future version
  app.launch_new_instance()
---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-51-5a6da8260e90> in <module>()
      1 TempLGA_arr = np.array(joined_df['Temp_LGA'][:30])
      2 Tempsensor_arr = np.array(joined_df['Temp_sensor'][:30])
----> 3 pd.rolling_corr(TempLGA_arr, Tempsensor_arr, 10).plot()

/Users/Zelda/anaconda/lib/python2.7/site-packages/pandas/stats/moments.pyc in rolling_corr(arg1, arg2, window, pairwise, **kwargs)
    316                          pairwise=pairwise,
    317                          func_kw=['other', 'pairwise'],
--> 318                          **kwargs)
    319 
    320 

/Users/Zelda/anaconda/lib/python2.7/site-packages/pandas/stats/moments.pyc in ensure_compat(dispatch, name, arg, func_kw, *args, **kwargs)
    238                       FutureWarning, stacklevel=3)
    239 
--> 240     result = getattr(r, name)(*args, **kwds)
    241 
    242     if is_ndarray:

/Users/Zelda/anaconda/lib/python2.7/site-packages/pandas/core/window.pyc in corr(self, other, pairwise, **kwargs)
    855     def corr(self, other=None, pairwise=None, **kwargs):
    856         return super(Rolling, self).corr(other=other, pairwise=pairwise,
--> 857                                          **kwargs)
    858 
    859 

/Users/Zelda/anaconda/lib/python2.7/site-packages/pandas/core/window.pyc in corr(self, other, pairwise, **kwargs)
    725 
    726         return _flex_binary_moment(self._selected_obj, other._selected_obj,
--> 727                                    _get_corr, pairwise=bool(pairwise))
    728 
    729 

/Users/Zelda/anaconda/lib/python2.7/site-packages/pandas/core/window.pyc in _flex_binary_moment(arg1, arg2, f, pairwise)
   1263     if (isinstance(arg1, (np.ndarray, Series)) and
   1264             isinstance(arg2, (np.ndarray, Series))):
-> 1265         X, Y = _prep_binary(arg1, arg2)
   1266         return f(X, Y)
   1267 

/Users/Zelda/anaconda/lib/python2.7/site-packages/pandas/core/window.pyc in _prep_binary(arg1, arg2)
   1398 def _prep_binary(arg1, arg2):
   1399     if not isinstance(arg2, type(arg1)):
-> 1400         raise Exception('Input arrays must be of the same type!')
   1401 
   1402     # mask out values, this also makes a common index...

Exception: Input arrays must be of the same type!

In [ ]:


In [ ]: