In [36]:
import matplotlib
from matplotlib import style
%matplotlib inline
style.use('ggplot')
# This is to import custom-made modules
# This can be removed after making these modules a real library
import os, sys
lib_path = os.path.abspath(os.path.join('..', 'building-analytics')) # relative path of the source code in Box Folder
sys.path.append(lib_path)
from TS_Util_Clean_Data import *
# inputs
fileName = "data1.csv" # replace with other files used
folder = "../data/folder4"
## call script
# instantiate class
TSU = TS_Util()
# load data
data= TSU.load_TS(fileName, folder)
data.tail()
Out[36]:
In [7]:
data= TSU.remove_start_NaN(data)
data.head()
Out[7]:
In [8]:
# clean start-end
data= TSU.remove_end_NaN(data)
data.tail()
Out[8]:
In [16]:
TSU._find_missing(data).head()
Out[16]:
In [17]:
TSU.display_missing(data, how="all")
Out[17]:
In [19]:
TSU.count_missing(data, output="number")
Out[19]:
In [20]:
TSU.remove_missing(data,how="any").head()
Out[20]:
In [21]:
TSU._find_outOfBound(data, 10, 300).head()
Out[21]:
In [22]:
TSU.display_outOfBound(data, 10, 300)
Out[22]:
In [25]:
TSU.count_outOfBound(data, 10, 300, output="number")
Out[25]:
In [27]:
TSU.remove_outOfBound(data, 10, 350)
Out[27]:
In [28]:
TSU.display_outliers(data,method="std",coeff=2, window=10)
Out[28]:
In [29]:
TSU.display_outliers(data,method="rstd",coeff=1, window=10)
Out[29]:
In [30]:
TSU.display_outliers(data,method="rmedian",coeff=1, window=10)
Out[30]:
In [31]:
TSU.display_outliers(data,method="iqr",coeff=1, window=10)
Out[31]:
In [32]:
TSU.display_outliers(data,method="qtl",coeff=1, window=10)
Out[32]:
In [ ]:
In [72]:
var = "Data 1"
start_event = (data[var].diff(-1)==0) & ~(data[var].shift().diff(-1)==0)
start = data[start_event].index.tolist()
end_events = ~(data[var].diff(-1)==0) & (data[var].shift().diff(-1)==0) # find NaN end events
end = data[end_events].index.tolist()
end
Out[72]:
In [48]:
data[data["Data 1"].diff() == 0]
Out[48]:
In [ ]:
In [ ]: