In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import linspace, polyval, polyfit, sqrt, stats, randn
from sklearn import datasets, linear_model
%matplotlib inline
In [2]:
# Import sensor data indexed by Timestamp
sensor_df = pd.read_csv('HHtest.txt', names = ["Sensor", "Seconds", "Timestamp", "Temp", "Rel Humid", "Voltage"],
index_col = ['Timestamp'], parse_dates = ["Timestamp"])
# Delete unneeded columns
sensor_df.drop(['Sensor','Seconds','Rel Humid', 'Voltage'], axis = 1, inplace = True)
#sensor_df[:3]
In [3]:
# Resample for every 30 minutes and name according to midpoint
sensor_df = sensor_df.resample('30T', loffset = '-15 min').mean()
#sensor_df.head
In [4]:
# Import sensor data indexed by Timestamp
LGA_df = pd.read_csv('KLGA.csv', names = ["Timestamp", "Temp", "Rel Humid"], index_col = ['Timestamp'], parse_dates = ["Timestamp"])
# Delete unneeded columns
LGA_df.drop(['Rel Humid'], axis = 1, inplace = True)
#LGA_df.head
In [5]:
# Resample for every 30 minutes and name according to midpoint
LGA_df = LGA_df.resample('30T', loffset = '-15 min').mean()
#LGA_df[:3]
In [6]:
# Join df together for all timestamps that exist in both files
joined_df = LGA_df.join(sensor_df, how = 'inner', lsuffix = '_LGA', rsuffix = '_sensor')
joined_df = joined_df.dropna() # drop NaN values **Are the values in order? Do we need to interpolate between the values?
#joined_df[:3]
In [7]:
def find_T_building(temp_3day):
#Input first three days of the dataframe of indoor temp data. Computes one Temperature data
#point (T_building) given data points from the three previous days (temp_3day)
#by assigning a weight to each data point and adding them up
T_building = 0
tau = 12.0 # decay constant in hours
for i in range(len(temp_3day)):
time_in_hours = 72 - (0.5*i) # each dp is a 0.5 hours
temp_i = temp_3day[i]
weight_i = math.exp(-time_in_hours/tau)
T_building += temp_i * weight_i
return T_building
In [11]:
#temp_sens_array = np.array(joined_df['Temp_sensor'])
temp_sens = joined_df['Temp_sensor']
def create_T_building_series(temp_sens):
T_building_list = []
T_building_index = []
dt = pd.DateOffset(days = 3) # 3 days
for j in range(144,len(temp_sens)):
#144 points in the first 3 days (because 72 hours * 2 dp/hr)
Timestamp_j = temp_sens.index[j]
temp_3day = temp_sens[Timestamp_j-dt :Timestamp_j]
T_building = find_T_building(temp_3day)
T_building_list.append(T_building)
T_building_index.append(Timestamp_j)
return pd.Series(T_building_list, index = T_building_index)
In [12]:
%time
T_building_series = create_T_building_series(temp_sens)
T_building_series
Out[12]:
In [ ]:
# Playing... Dont run
dt = pd.DateOffset(days = 3) # 3 days
temp_3day = temp_sens[:temp_sens.index.min() + dt]
temp_now = temp_sens[temp_sens.index.min() + dt]
def find_T_building(temp_3day):
T_building = 0
tau = 12.0 # decay constant in hours
for i in range(len(temp_3day)):
time_in_hours = 72 - (0.5*i) # each dp is a 0.5 hours
temp_i = temp_3day[i]
weight_i = math.exp(-time_in_hours/tau)
T_building += temp_i * weight_i
return T_building
In [72]:
temp_sens.index[0]
Out[72]:
In [48]:
#temp_sens_array = np.array(joined_df['Temp_sensor'])
temp_sens = joined_df['Temp_sensor']
dt = pd.DateOffset(days = 3) # 3 days
temp_3day = temp_sens[:temp_sens.index.min() + dt]
temp_now = temp_sens[temp_sens.index.min() + dt]
T_building = 0
tau = 12.0 # decay constant in hours
for i in range(len(temp_3day)):
time_in_hours = 72 - (0.5*i) # each dp is a 0.5 hours
temp_i = temp_3day[i]
weight_i = math.exp(-time_in_hours/tau)
T_building += temp_i * weight_i
def find_building_temp(temp_sens_array):
temp_list = []
for i in range(0,3):
temp_list.append(temp_sens_array[i])
tau = 12 # decay constant in hours
T0_contrib = temp_list[0] * math.exp(-12/tau) # contribution from temp val 0, 12 hours prior
T1_contrib = temp_list[1] * math.exp(-36/tau) # contribution from temp val 1, 36 hours prior
T2_contrib = temp_list[2] * math.exp(-60/tau) # contribution from temp val 2, 60 hours prior
temp_list = temp_list[1:]
building_temp = T0_contrib + T1_contrib + T2_contrib
return building_temp
find_building_temp(temp_sens_array)
#temp_list
Out[48]:
In [ ]:
#Plot all data wrt time
fig_size = np.array(mpl.rcParams['figure.figsize']) # size of plot
fig_size *=2 # increase size of plot
#joined_df.plot(figsize = fig_size) # plot data
In [ ]:
# Linear regression between temp at LGA and temp from sensor (without time data) using linear_model from sklearn
TempLGA = np.transpose(np.matrix(joined_df['Temp_LGA']))
Tempsensor = np.transpose(np.matrix(joined_df['Temp_sensor']))
regr = linear_model.LinearRegression()
regr.fit(TempLGA, Tempsensor)
#z = polyfit(TempLGA, Tempsensor, 4) # polyfit for fitting
#p = np.poly1d(z) #polynomial of fit
In [ ]:
# Attempt multivariable linear regression
LGA_data = np.transpose(np.array([joined_df['Temp_LGA'], joined_df['Rel Humid_LGA'],]))
print (LGA_data.shape)
sensor_data = np.transpose(np.array([joined_df['Temp_sensor'], joined_df['Rel Humid_sensor'],]))
print (sensor_data.shape)
mult_regr = linear_model.LinearRegression()
mult_regr.fit(LGA_data, sensor_data)
mult_regr.params()
#mult_regr.predict(LGA_data[:20]).shape
In [ ]:
plt.plot()
In [ ]:
plt.plot()
In [ ]:
plt.plot(TempLGA, Tempsensor, '.', TempLGA, regr.predict(TempLGA))
#plt.plot(TempLGA, Tempsensor, '.', TempLGA, p(TempLGA), '--')
In [ ]:
TempLGA_arr = np.array(joined_df['Temp_LGA'][:30])
Tempsensor_arr = np.array(joined_df['Temp_sensor'][:30])
pd.rolling_corr(TempLGA_arr, Tempsensor_arr, 10).plot()
In [ ]:
In [ ]: