In [17]:
import matplotlib
from matplotlib import style
%matplotlib inline
style.use('ggplot')
# This is to import custom-made modules
# This can be removed after making these modules a real library
import os, sys
lib_path = os.path.abspath(os.path.join('..', 'building-analytics')) # relative path of the source code in Box Folder
sys.path.append(lib_path)
from TS_Util_Clean_Data import *
# inputs
fileName = "test_marco.csv" # replace with other files used
folder = "../test1"
## call script
# instantiate class
TSU = TS_Util()
# load data
data= TSU.load_TS(fileName, folder)
data.head()
Out[17]:
In [2]:
data= TSU.remove_start_NaN(data)
data.head()
Out[2]:
In [3]:
# clean start-end
data= TSU.remove_end_NaN(data)
data.tail()
Out[3]:
In [4]:
TSU._find_missing(data).head()
Out[4]:
In [5]:
TSU.display_missing(data, how="all")
Out[5]:
In [6]:
TSU.count_missing(data, how="number")
Out[6]:
In [7]:
TSU.remove_missing(data,how="any").head()
Out[7]:
In [8]:
TSU._find_outOfBound(data, 10, 300).head()
Out[8]:
In [9]:
TSU.display_outOfBound(data, 10, 300)
Out[9]:
In [10]:
TSU.count_outOfBound(data, 10, 300)
Out[10]:
In [11]:
TSU.remove_outOfBound(data, 10, 350)
Out[11]:
In [12]:
TSU.display_outliers(data,method="std",coeff=2, window=10)
Out[12]:
In [13]:
TSU.display_outliers(data,method="rstd",coeff=1, window=10)
Out[13]:
In [14]:
TSU.display_outliers(data,method="rmedian",coeff=1, window=10)
Out[14]:
In [15]:
TSU.display_outliers(data,method="iqr",coeff=1, window=10)
Out[15]:
In [16]:
TSU.display_outliers(data,method="qtl",coeff=1, window=10)
Out[16]:
In [ ]: