Test TU_Util()


In [4]:
import matplotlib 
from matplotlib import style
%matplotlib inline
style.use('ggplot')

# This is to import custom-made modules
# This can be removed after making these modules a real library
import os, sys
lib_path = os.path.abspath(os.path.join('..', 'building-analytics')) # relative path of the source code in Box Folder 
sys.path.append(lib_path)

from TS_Util_Clean_Data import *

# inputs
fileName = "data1.csv" # replace with other files used
folder = "../data/folder4"

## call script

# instantiate class
TSU = TS_Util()

# load data
data= TSU.load_TS(fileName, folder)

data.tail()


Out[4]:
Data 1
Timestamp
2017-05-06 14:30:00 30
2017-05-06 14:40:00 30
2017-05-06 14:50:00 30
2017-05-06 15:00:00 30
2017-05-06 15:10:00 28

In [ ]:
data= TSU.remove_start_NaN(data)
data.head()

In [ ]:
# clean start-end
data= TSU.remove_end_NaN(data)
data.tail()

In [ ]:
TSU._find_missing(data).head()

In [ ]:
TSU.display_missing(data, how="all")

In [ ]:
TSU.count_missing(data, output="number")

In [ ]:
TSU.remove_missing(data,how="any").head()

In [ ]:
TSU._find_outOfBound(data, 10, 300).head()

In [ ]:
TSU.display_outOfBound(data, 10, 300)

In [ ]:
TSU.count_outOfBound(data, 10, 300, output="number")

In [ ]:
TSU.remove_outOfBound(data, 10, 350)

In [ ]:
TSU.display_outliers(data,method="std",coeff=2, window=10)

In [ ]:
TSU.display_outliers(data,method="rstd",coeff=1, window=10)

In [ ]:
TSU.display_outliers(data,method="rmedian",coeff=1, window=10)

In [ ]:
TSU.display_outliers(data,method="iqr",coeff=1, window=10)

In [ ]:
TSU.display_outliers(data,method="qtl",coeff=1, window=10)

In [ ]:


In [13]:
var = "Data 1"


start_event = (data[var].diff(-1)==0) & ~(data[var].shift().diff(-1)==0) 
start = data[start_event].index.tolist()





end_events = ~(data[var].diff(-1)==0) & (data[var].shift().diff(-1)==0) # find NaN end events
end = data[end_events].index.tolist() 


events = pd.DataFrame.from_items([("start",start), ("end",end )])
        
events["length_min"] = (events["end"] - events["start"]).dt.total_seconds()/60 
events


Out[13]:
start end length_min
0 2017-05-03 12:00:00 2017-05-04 23:00:00 2100.0
1 2017-05-05 08:00:00 2017-05-06 13:00:00 1740.0
2 2017-05-06 13:10:00 2017-05-06 15:00:00 110.0

In [ ]:
data[data["Data 1"].diff() == 0]

In [12]:
data["Data 1"].diff()#.diff()


Out[12]:
Timestamp
2017-05-03 11:50:00     NaN
2017-05-03 12:00:00    10.0
2017-05-03 12:10:00     0.0
2017-05-03 12:20:00     0.0
2017-05-03 12:30:00     0.0
2017-05-03 12:40:00     0.0
2017-05-03 12:50:00     0.0
2017-05-03 13:00:00     0.0
2017-05-03 13:10:00     0.0
2017-05-03 13:20:00     0.0
2017-05-03 13:30:00     0.0
2017-05-03 13:40:00     0.0
2017-05-03 13:50:00     0.0
2017-05-03 14:00:00     0.0
2017-05-03 14:10:00     0.0
2017-05-03 14:20:00     0.0
2017-05-03 14:30:00     0.0
2017-05-03 14:40:00     0.0
2017-05-03 14:50:00     0.0
2017-05-03 15:00:00     0.0
2017-05-03 15:10:00     0.0
2017-05-03 15:20:00     0.0
2017-05-03 15:30:00     0.0
2017-05-03 15:40:00     0.0
2017-05-03 15:50:00     0.0
2017-05-03 16:00:00     0.0
2017-05-03 16:10:00     0.0
2017-05-03 16:20:00     0.0
2017-05-03 16:30:00     0.0
2017-05-03 16:40:00     0.0
                       ... 
2017-05-06 10:20:00     0.0
2017-05-06 10:30:00     0.0
2017-05-06 10:40:00     0.0
2017-05-06 10:50:00     0.0
2017-05-06 11:00:00     0.0
2017-05-06 11:10:00     0.0
2017-05-06 11:20:00     0.0
2017-05-06 11:30:00     0.0
2017-05-06 11:40:00     0.0
2017-05-06 11:50:00     0.0
2017-05-06 12:00:00     0.0
2017-05-06 12:10:00     0.0
2017-05-06 12:20:00     0.0
2017-05-06 12:30:00     0.0
2017-05-06 12:40:00     0.0
2017-05-06 12:50:00     0.0
2017-05-06 13:00:00     0.0
2017-05-06 13:10:00    10.0
2017-05-06 13:20:00     0.0
2017-05-06 13:30:00     0.0
2017-05-06 13:40:00     0.0
2017-05-06 13:50:00     0.0
2017-05-06 14:00:00     0.0
2017-05-06 14:10:00     0.0
2017-05-06 14:20:00     0.0
2017-05-06 14:30:00     0.0
2017-05-06 14:40:00     0.0
2017-05-06 14:50:00     0.0
2017-05-06 15:00:00     0.0
2017-05-06 15:10:00    -2.0
Name: Data 1, Length: 453, dtype: float64

In [ ]: