In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy import linspace, polyval, polyfit, sqrt, stats, randn
from sklearn import datasets, linear_model
%matplotlib inline

In [2]:
# Import sensor data indexed by Timestamp
sensor_df = pd.read_csv('HHtest.txt', names = ["Sensor", "Seconds", "Timestamp", "Temp", "Rel Humid", "Voltage"],
                        index_col = ['Timestamp'], parse_dates = ["Timestamp"])
# Delete unneeded columns
sensor_df.drop(['Sensor','Seconds','Rel Humid', 'Voltage'], axis = 1, inplace = True)

#sensor_df[:3]

In [3]:
# Resample for every 30 minutes and name according to midpoint
sensor_df = sensor_df.resample('30T', loffset = '-15 min').mean()
#sensor_df.head

In [4]:
# Import sensor data indexed by Timestamp
LGA_df = pd.read_csv('KLGA.csv', names = ["Timestamp", "Temp", "Rel Humid"], index_col = ['Timestamp'], parse_dates = ["Timestamp"])

# Delete unneeded columns
LGA_df.drop(['Rel Humid'], axis = 1, inplace = True)

#LGA_df.head

In [5]:
# Resample for every 30 minutes and name according to midpoint
LGA_df = LGA_df.resample('30T', loffset = '-15 min').mean()
#LGA_df[:3]

In [6]:
# Join df together for all timestamps that exist in both files
joined_df = LGA_df.join(sensor_df, how = 'inner', lsuffix = '_LGA', rsuffix = '_sensor')
joined_df = joined_df.dropna() # drop NaN values **Are the values in order? Do we need to interpolate between the values?
#joined_df[:3]

In [7]:
def find_T_building(temp_3day):
    #Input first three days of the dataframe of indoor temp data. Computes one Temperature data
    #point (T_building) given data points from the three previous days (temp_3day) 
    #by assigning a weight to each data point and adding them up
    
    T_building = 0
    tau = 12.0 # decay constant in hours
    for i in range(len(temp_3day)):
        time_in_hours = 72 - (0.5*i) # each dp is a 0.5 hours
        temp_i = temp_3day[i]
        weight_i = math.exp(-time_in_hours/tau)
        T_building += temp_i * weight_i
    return T_building

In [11]:
#temp_sens_array = np.array(joined_df['Temp_sensor'])
temp_sens = joined_df['Temp_sensor']

def create_T_building_series(temp_sens):
    T_building_list = []
    T_building_index = []
    dt = pd.DateOffset(days = 3) # 3 days    

    for j in range(144,len(temp_sens)): 
        #144 points in the first 3 days (because 72 hours * 2 dp/hr)
        Timestamp_j = temp_sens.index[j]
        temp_3day = temp_sens[Timestamp_j-dt :Timestamp_j]
        T_building = find_T_building(temp_3day)
        T_building_list.append(T_building)
        T_building_index.append(Timestamp_j)
    
    return pd.Series(T_building_list, index = T_building_index)

In [12]:
%time
T_building_series = create_T_building_series(temp_sens)
T_building_series


CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 6.2 µs
Out[12]:
2016-07-11 19:45:00     293.039409
2016-07-11 20:15:00     305.637933
2016-07-11 20:45:00     318.772485
2016-07-11 21:15:00     332.465872
2016-07-11 21:45:00     346.741869
2016-07-11 22:15:00     361.625267
2016-07-11 22:45:00     377.141907
2016-07-11 23:15:00     393.318732
2016-07-11 23:45:00     410.183831
2016-07-12 00:15:00     427.726324
2016-07-12 00:45:00     446.057068
2016-07-12 01:15:00     465.167729
2016-07-12 01:45:00     485.091489
2016-07-12 02:15:00     505.862944
2016-07-12 02:45:00     527.518159
2016-07-12 03:15:00     550.094737
2016-07-12 03:45:00     573.631879
2016-07-12 04:15:00     598.142426
2016-07-12 04:45:00     623.666603
2016-07-12 05:15:00     650.276755
2016-07-12 05:45:00     678.019086
2016-07-12 06:15:00     706.941768
2016-07-12 06:45:00     737.095020
2016-07-12 07:15:00     768.531200
2016-07-12 07:45:00     801.267375
2016-07-12 08:15:00     835.357258
2016-07-12 08:45:00     870.897561
2016-07-12 09:15:00     907.949993
2016-07-12 09:45:00     946.667535
2016-07-12 10:15:00     987.032389
                          ...     
2016-09-14 22:15:00    1929.805670
2016-09-14 22:45:00    1930.104623
2016-09-14 23:15:00    1930.391376
2016-09-14 23:45:00    1930.666426
2016-09-15 00:15:00    1930.840251
2016-09-15 00:45:00    1931.006983
2016-09-15 01:15:00    1931.167338
2016-09-15 01:45:00    1931.141362
2016-09-15 02:15:00    1931.116661
2016-09-15 02:45:00    1931.092968
2016-09-15 03:15:00    1931.070242
2016-09-15 03:45:00    1931.048657
2016-09-15 04:15:00    1930.938167
2016-09-15 04:45:00    1930.742186
2016-09-15 05:15:00    1930.554418
2016-09-15 05:45:00    1930.374312
2016-09-15 06:15:00    1930.201984
2016-09-15 06:45:00    1930.036690
2016-09-15 07:15:00    1929.429424
2016-09-15 07:45:00    1928.757156
2016-09-15 08:15:00    1927.932324
2016-09-15 08:45:00    1927.141153
2016-09-15 09:15:00    1926.292698
2016-09-15 09:45:00    1925.568869
2016-09-15 10:15:00    1924.515008
2016-09-15 10:45:00    1923.144156
2016-09-15 11:15:00    1922.009677
2016-09-15 11:45:00    1920.561710
2016-09-15 12:15:00    1919.172836
2016-09-15 12:45:00    1917.840856
dtype: float64

In [ ]:
# Playing... Dont run
dt = pd.DateOffset(days = 3) # 3 days    
temp_3day = temp_sens[:temp_sens.index.min() + dt]
temp_now = temp_sens[temp_sens.index.min() + dt]

def find_T_building(temp_3day):
    
    T_building = 0
    tau = 12.0 # decay constant in hours
    for i in range(len(temp_3day)):
        time_in_hours = 72 - (0.5*i) # each dp is a 0.5 hours
        temp_i = temp_3day[i]
        weight_i = math.exp(-time_in_hours/tau)
        T_building += temp_i * weight_i
    return T_building

In [72]:
temp_sens.index[0]


Out[72]:
Timestamp('2016-07-07 14:15:00')

In [48]:
#temp_sens_array = np.array(joined_df['Temp_sensor'])
temp_sens = joined_df['Temp_sensor']

dt = pd.DateOffset(days = 3) # 3 days
temp_3day = temp_sens[:temp_sens.index.min() + dt]
temp_now = temp_sens[temp_sens.index.min() + dt]
T_building = 0
tau = 12.0 # decay constant in hours
for i in range(len(temp_3day)):
    time_in_hours = 72 - (0.5*i) # each dp is a 0.5 hours
    temp_i = temp_3day[i]
    weight_i = math.exp(-time_in_hours/tau)
    T_building += temp_i * weight_i
    


def find_building_temp(temp_sens_array):
    temp_list = []
    for i in range(0,3):
        temp_list.append(temp_sens_array[i])
        
    tau = 12 # decay constant in hours
    T0_contrib = temp_list[0] * math.exp(-12/tau) # contribution from temp val 0, 12 hours prior
    T1_contrib = temp_list[1] * math.exp(-36/tau) # contribution from temp val 1, 36 hours prior
    T2_contrib = temp_list[2] * math.exp(-60/tau) # contribution from temp val 2, 60 hours prior
    
    temp_list = temp_list[1:]
    building_temp = T0_contrib + T1_contrib + T2_contrib
    return building_temp

find_building_temp(temp_sens_array)
#temp_list


Out[48]:
32.078479035031243

In [ ]:
#Plot all data wrt time
fig_size = np.array(mpl.rcParams['figure.figsize']) # size of plot
fig_size *=2 # increase size of plot
#joined_df.plot(figsize = fig_size) # plot data

In [ ]:
# Linear regression between temp at LGA and temp from sensor (without time data) using linear_model from sklearn 
TempLGA = np.transpose(np.matrix(joined_df['Temp_LGA']))
Tempsensor = np.transpose(np.matrix(joined_df['Temp_sensor']))

regr = linear_model.LinearRegression()
regr.fit(TempLGA, Tempsensor)

#z = polyfit(TempLGA, Tempsensor, 4) # polyfit for fitting
#p = np.poly1d(z) #polynomial of fit

In [ ]:
# Attempt multivariable linear regression
LGA_data = np.transpose(np.array([joined_df['Temp_LGA'], joined_df['Rel Humid_LGA'],]))
print (LGA_data.shape)
sensor_data = np.transpose(np.array([joined_df['Temp_sensor'], joined_df['Rel Humid_sensor'],]))
print (sensor_data.shape)

mult_regr = linear_model.LinearRegression()
mult_regr.fit(LGA_data, sensor_data)
mult_regr.params()
#mult_regr.predict(LGA_data[:20]).shape

In [ ]:
plt.plot()

In [ ]:
plt.plot()

In [ ]:
plt.plot(TempLGA, Tempsensor, '.', TempLGA, regr.predict(TempLGA))
#plt.plot(TempLGA, Tempsensor, '.', TempLGA, p(TempLGA), '--')

In [ ]:
TempLGA_arr = np.array(joined_df['Temp_LGA'][:30])
Tempsensor_arr = np.array(joined_df['Temp_sensor'][:30])
pd.rolling_corr(TempLGA_arr, Tempsensor_arr, 10).plot()

In [ ]:


In [ ]: