Test TU_Util()


In [36]:
import matplotlib 
from matplotlib import style
%matplotlib inline
style.use('ggplot')

# This is to import custom-made modules
# This can be removed after making these modules a real library
import os, sys
lib_path = os.path.abspath(os.path.join('..', 'building-analytics')) # relative path of the source code in Box Folder 
sys.path.append(lib_path)

from TS_Util_Clean_Data import *

# inputs
fileName = "data1.csv" # replace with other files used
folder = "../data/folder4"

## call script

# instantiate class
TSU = TS_Util()

# load data
data= TSU.load_TS(fileName, folder)

data.tail()


Out[36]:
Data 1
Timestamp
2017-05-06 14:30:00 30.0
2017-05-06 14:40:00 30.0
2017-05-06 14:50:00 30.0
2017-05-06 15:00:00 30.0
2017-05-06 15:10:00 30.0

In [7]:
data= TSU.remove_start_NaN(data)
data.head()


Out[7]:
Data 1
Timestamp
2017-05-03 12:00:00 10.0
2017-05-03 12:10:00 10.0
2017-05-03 12:20:00 10.0
2017-05-03 12:30:00 10.0
2017-05-03 12:40:00 10.0

In [8]:
# clean start-end
data= TSU.remove_end_NaN(data)
data.tail()


Out[8]:
Data 1
Timestamp
2017-05-06 14:30:00 10.0
2017-05-06 14:40:00 10.0
2017-05-06 14:50:00 10.0
2017-05-06 15:00:00 10.0
2017-05-06 15:10:00 10.0

In [16]:
TSU._find_missing(data).head()


Out[16]:
Data 1
Timestamp
2017-05-03 12:00:00 False
2017-05-03 12:10:00 False
2017-05-03 12:20:00 False
2017-05-03 12:30:00 False
2017-05-03 12:40:00 False

In [17]:
TSU.display_missing(data, how="all")


Out[17]:
Data 1
Timestamp

In [19]:
TSU.count_missing(data, output="number")


Out[19]:
Data 1    0
dtype: int64

In [20]:
TSU.remove_missing(data,how="any").head()


Out[20]:
Data 1
Timestamp
2017-05-03 12:00:00 10.0
2017-05-03 12:10:00 10.0
2017-05-03 12:20:00 10.0
2017-05-03 12:30:00 10.0
2017-05-03 12:40:00 10.0

In [21]:
TSU._find_outOfBound(data, 10, 300).head()


Out[21]:
Data 1
Timestamp
2017-05-03 12:00:00 False
2017-05-03 12:10:00 False
2017-05-03 12:20:00 False
2017-05-03 12:30:00 False
2017-05-03 12:40:00 False

In [22]:
TSU.display_outOfBound(data, 10, 300)


Out[22]:
Data 1
Timestamp
2017-05-04 23:10:00 2.0
2017-05-04 23:40:00 3.0
2017-05-05 00:10:00 5.0
2017-05-05 00:30:00 6.0
2017-05-05 00:40:00 7.0
2017-05-05 01:00:00 7.0
2017-05-05 01:10:00 899.0
2017-05-05 01:20:00 7.0
2017-05-05 01:50:00 7.0
2017-05-05 02:30:00 7.0
2017-05-05 02:50:00 8797.0
2017-05-05 03:00:00 0.0
2017-05-05 03:20:00 9.0
2017-05-05 03:30:00 578.0
2017-05-05 03:50:00 785.0
2017-05-05 04:30:00 6.0
2017-05-05 04:40:00 8.0
2017-05-05 04:50:00 7.0
2017-05-05 05:00:00 6.0
2017-05-05 05:20:00 5.0
2017-05-05 05:30:00 6.0
2017-05-05 05:50:00 8.0
2017-05-05 06:00:00 5.0
2017-05-05 06:10:00 678.0
2017-05-05 06:20:00 5.0
2017-05-05 06:40:00 7.0
2017-05-05 07:00:00 8.0
2017-05-05 07:10:00 5.0
2017-05-05 07:40:00 785.0
2017-05-05 07:50:00 6.0

In [25]:
TSU.count_outOfBound(data, 10, 300, output="number")


Out[25]:
Data 1    30
dtype: int64

In [27]:
TSU.remove_outOfBound(data, 10, 350)


Out[27]:
Data 1
Timestamp
2017-05-03 12:00:00 10.0
2017-05-03 12:10:00 10.0
2017-05-03 12:20:00 10.0
2017-05-03 12:30:00 10.0
2017-05-03 12:40:00 10.0
2017-05-03 12:50:00 10.0
2017-05-03 13:00:00 10.0
2017-05-03 13:10:00 10.0
2017-05-03 13:20:00 10.0
2017-05-03 13:30:00 10.0
2017-05-03 13:40:00 10.0
2017-05-03 13:50:00 10.0
2017-05-03 14:00:00 10.0
2017-05-03 14:10:00 10.0
2017-05-03 14:20:00 10.0
2017-05-03 14:30:00 10.0
2017-05-03 14:40:00 10.0
2017-05-03 14:50:00 10.0
2017-05-03 15:00:00 10.0
2017-05-03 15:10:00 10.0
2017-05-03 15:20:00 10.0
2017-05-03 15:30:00 10.0
2017-05-03 15:40:00 10.0
2017-05-03 15:50:00 10.0
2017-05-03 16:00:00 10.0
2017-05-03 16:10:00 10.0
2017-05-03 16:20:00 10.0
2017-05-03 16:30:00 10.0
2017-05-03 16:40:00 10.0
2017-05-03 16:50:00 10.0
... ...
2017-05-06 10:20:00 10.0
2017-05-06 10:30:00 10.0
2017-05-06 10:40:00 10.0
2017-05-06 10:50:00 10.0
2017-05-06 11:00:00 10.0
2017-05-06 11:10:00 10.0
2017-05-06 11:20:00 10.0
2017-05-06 11:30:00 10.0
2017-05-06 11:40:00 10.0
2017-05-06 11:50:00 10.0
2017-05-06 12:00:00 10.0
2017-05-06 12:10:00 10.0
2017-05-06 12:20:00 10.0
2017-05-06 12:30:00 10.0
2017-05-06 12:40:00 10.0
2017-05-06 12:50:00 10.0
2017-05-06 13:00:00 10.0
2017-05-06 13:10:00 10.0
2017-05-06 13:20:00 10.0
2017-05-06 13:30:00 10.0
2017-05-06 13:40:00 10.0
2017-05-06 13:50:00 10.0
2017-05-06 14:00:00 10.0
2017-05-06 14:10:00 10.0
2017-05-06 14:20:00 10.0
2017-05-06 14:30:00 10.0
2017-05-06 14:40:00 10.0
2017-05-06 14:50:00 10.0
2017-05-06 15:00:00 10.0
2017-05-06 15:10:00 10.0

422 rows × 1 columns


In [28]:
TSU.display_outliers(data,method="std",coeff=2, window=10)


Out[28]:
Data 1
Timestamp
2017-05-05 02:50:00 8797.0

In [29]:
TSU.display_outliers(data,method="rstd",coeff=1, window=10)


Out[29]:
Data 1
Timestamp
2017-05-04 23:10:00 2.0
2017-05-04 23:30:00 34.0
2017-05-04 23:50:00 54.0
2017-05-05 00:20:00 65.0
2017-05-05 00:50:00 86.0
2017-05-05 01:10:00 899.0
2017-05-05 02:50:00 8797.0
2017-05-05 05:40:00 87.0
2017-05-05 06:10:00 678.0
2017-05-05 07:40:00 785.0

In [30]:
TSU.display_outliers(data,method="rmedian",coeff=1, window=10)


Out[30]:
Data 1
Timestamp
2017-05-04 23:10:00 2.0
2017-05-04 23:30:00 34.0
2017-05-04 23:40:00 3.0
2017-05-04 23:50:00 54.0
2017-05-05 00:00:00 10.0
2017-05-05 00:10:00 5.0
2017-05-05 00:20:00 65.0
2017-05-05 00:30:00 6.0
2017-05-05 00:40:00 7.0
2017-05-05 00:50:00 86.0
2017-05-05 01:00:00 7.0
2017-05-05 01:10:00 899.0
2017-05-05 01:20:00 7.0
2017-05-05 01:40:00 98.0
2017-05-05 01:50:00 7.0
2017-05-05 02:10:00 78.0
2017-05-05 02:20:00 98.0
2017-05-05 02:30:00 7.0
2017-05-05 02:50:00 8797.0
2017-05-05 03:00:00 0.0
2017-05-05 03:30:00 578.0
2017-05-05 03:50:00 785.0
2017-05-05 04:10:00 67.0
2017-05-05 04:30:00 6.0
2017-05-05 05:00:00 6.0
2017-05-05 05:10:00 10.0
2017-05-05 05:20:00 5.0
2017-05-05 05:30:00 6.0
2017-05-05 05:40:00 87.0
2017-05-05 06:00:00 5.0
2017-05-05 06:10:00 678.0
2017-05-05 06:20:00 5.0
2017-05-05 06:30:00 85.0
2017-05-05 06:50:00 85.0
2017-05-05 07:00:00 8.0
2017-05-05 07:10:00 5.0
2017-05-05 07:20:00 85.0
2017-05-05 07:40:00 785.0
2017-05-05 07:50:00 6.0

In [31]:
TSU.display_outliers(data,method="iqr",coeff=1, window=10)


Out[31]:
Data 1
Timestamp
2017-05-04 23:10:00 2.0
2017-05-04 23:30:00 34.0
2017-05-04 23:40:00 3.0
2017-05-04 23:50:00 54.0
2017-05-05 00:10:00 5.0
2017-05-05 00:20:00 65.0
2017-05-05 00:30:00 6.0
2017-05-05 00:40:00 7.0
2017-05-05 00:50:00 86.0
2017-05-05 01:00:00 7.0
2017-05-05 01:10:00 899.0
2017-05-05 01:20:00 7.0
2017-05-05 01:40:00 98.0
2017-05-05 01:50:00 7.0
2017-05-05 02:10:00 78.0
2017-05-05 02:20:00 98.0
2017-05-05 02:30:00 7.0
2017-05-05 02:50:00 8797.0
2017-05-05 03:00:00 0.0
2017-05-05 03:20:00 9.0
2017-05-05 03:30:00 578.0
2017-05-05 03:50:00 785.0
2017-05-05 04:10:00 67.0
2017-05-05 04:30:00 6.0
2017-05-05 04:40:00 8.0
2017-05-05 04:50:00 7.0
2017-05-05 05:00:00 6.0
2017-05-05 05:20:00 5.0
2017-05-05 05:30:00 6.0
2017-05-05 05:40:00 87.0
2017-05-05 05:50:00 8.0
2017-05-05 06:00:00 5.0
2017-05-05 06:10:00 678.0
2017-05-05 06:20:00 5.0
2017-05-05 06:30:00 85.0
2017-05-05 06:40:00 7.0
2017-05-05 06:50:00 85.0
2017-05-05 07:00:00 8.0
2017-05-05 07:10:00 5.0
2017-05-05 07:20:00 85.0
2017-05-05 07:40:00 785.0
2017-05-05 07:50:00 6.0

In [32]:
TSU.display_outliers(data,method="qtl",coeff=1, window=10)


Out[32]:
Data 1
Timestamp
2017-05-04 23:10:00 2.0
2017-05-04 23:40:00 3.0
2017-05-05 01:10:00 899.0
2017-05-05 02:50:00 8797.0
2017-05-05 03:00:00 0.0

In [ ]:


In [72]:
var = "Data 1"


start_event = (data[var].diff(-1)==0) & ~(data[var].shift().diff(-1)==0) 
start = data[start_event].index.tolist()

end_events = ~(data[var].diff(-1)==0) & (data[var].shift().diff(-1)==0) # find NaN end events
end = data[end_events].index.tolist() 
end


Out[72]:
[Timestamp('2017-05-04 23:00:00'), Timestamp('2017-05-06 13:00:00')]

In [48]:
data[data["Data 1"].diff() == 0]


Out[48]:
Data 1
Timestamp
2017-05-03 12:10:00 10.0
2017-05-03 12:20:00 10.0
2017-05-03 12:30:00 10.0
2017-05-03 12:40:00 10.0
2017-05-03 12:50:00 10.0
2017-05-03 13:00:00 10.0
2017-05-03 13:10:00 10.0
2017-05-03 13:20:00 10.0
2017-05-03 13:30:00 10.0
2017-05-03 13:40:00 10.0
2017-05-03 13:50:00 10.0
2017-05-03 14:00:00 10.0
2017-05-03 14:10:00 10.0
2017-05-03 14:20:00 10.0
2017-05-03 14:30:00 10.0
2017-05-03 14:40:00 10.0
2017-05-03 14:50:00 10.0
2017-05-03 15:00:00 10.0
2017-05-03 15:10:00 10.0
2017-05-03 15:20:00 10.0
2017-05-03 15:30:00 10.0
2017-05-03 15:40:00 10.0
2017-05-03 15:50:00 10.0
2017-05-03 16:00:00 10.0
2017-05-03 16:10:00 10.0
2017-05-03 16:20:00 10.0
2017-05-03 16:30:00 10.0
2017-05-03 16:40:00 10.0
2017-05-03 16:50:00 10.0
2017-05-03 17:00:00 10.0
... ...
2017-05-06 10:10:00 20.0
2017-05-06 10:20:00 20.0
2017-05-06 10:30:00 20.0
2017-05-06 10:40:00 20.0
2017-05-06 10:50:00 20.0
2017-05-06 11:00:00 20.0
2017-05-06 11:10:00 20.0
2017-05-06 11:20:00 20.0
2017-05-06 11:30:00 20.0
2017-05-06 11:40:00 20.0
2017-05-06 11:50:00 20.0
2017-05-06 12:00:00 20.0
2017-05-06 12:10:00 20.0
2017-05-06 12:20:00 20.0
2017-05-06 12:30:00 20.0
2017-05-06 12:40:00 20.0
2017-05-06 12:50:00 20.0
2017-05-06 13:00:00 20.0
2017-05-06 13:20:00 30.0
2017-05-06 13:30:00 30.0
2017-05-06 13:40:00 30.0
2017-05-06 13:50:00 30.0
2017-05-06 14:00:00 30.0
2017-05-06 14:10:00 30.0
2017-05-06 14:20:00 30.0
2017-05-06 14:30:00 30.0
2017-05-06 14:40:00 30.0
2017-05-06 14:50:00 30.0
2017-05-06 15:00:00 30.0
2017-05-06 15:10:00 30.0

396 rows × 1 columns


In [ ]:


In [ ]: