In [1]:
# imports for this notebook
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import theano as T
import theano.tensor as tte
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")
import warnings
from scipy import VisibleDeprecationWarning
warnings.filterwarnings("ignore", category=VisibleDeprecationWarning)
import copy
import pymc3 as pm
import scipy as sp
import scipy.stats as stats
import cv2
import cw_utils
import cw_plots
import pickle
In [35]:
import importlib
importlib.reload(cw_plots)
Out[35]:
In [2]:
data_csvs = {}
data_csvs_bin = {}
data_csvs_raw = {}
path = './../data/test_data/'
names = [
"test1-2017-11-03T09-31.csv",
"test2-2017-11-08T09-19.csv",
"test3-2017-11-09T09-23.csv",
"test4-2017-11-10T09-24.csv",
"test5-2017-11-16T11-41.csv"
]
data_csvs['df_1'] = pd.read_csv(path+ names[0]).iloc[6:]
data_csvs['df_2'] = pd.read_csv(path+ names[1]).iloc[6:]
data_csvs['df_3'] = pd.read_csv(path+ names[2]).iloc[6:]
data_csvs['df_4'] = pd.read_csv(path+ names[3]).iloc[6:]
data_csvs['df_5'] = pd.read_csv(path+ names[4]).iloc[6:]
data_csvs_raw['df_1'] = pd.read_csv(path+ names[0]).iloc[6:]
data_csvs_raw['df_2'] = pd.read_csv(path+ names[1]).iloc[6:]
data_csvs_raw['df_3'] = pd.read_csv(path+ names[2]).iloc[6:]
data_csvs_raw['df_4'] = pd.read_csv(path+ names[3]).iloc[6:]
data_csvs_raw['df_5'] = pd.read_csv(path+ names[4]).iloc[6:]
biomes = ['Desert', 'Jungle', 'Wetlands', 'Plains']
for k,v in data_csvs.items():
data_csvs[k].columns = data_csvs[k].columns.str.strip()
data_csvs_raw[k].columns = data_csvs_raw[k].columns.str.strip()
data_csvs_bin[k] = pd.DataFrame()
for biome in biomes:
bins_data = data_csvs_raw[k]['%s_WaterBins'%biome].apply(lambda x: np.sum([float(num) for num in x.split('-')])/40)
bins_data += data_csvs_raw[k]['%s_FloodBins'%biome].apply(lambda x: np.sum([float(num) for num in x.split('-')])/40)
data_csvs_bin[k]['%s_Water'%biome] = bins_data
data_csvs[k] = data_csvs[k][['Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
data_csvs[k]['Other_Water'] = 2.5-data_csvs[k].sum(axis=1)
data_csvs[k] = data_csvs[k] / 2.5
data_csvs[k] = data_csvs[k][['Other_Water', 'Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
# data_csvs_bin[k]['Reservoir_Water'] = data_csvs[k]['Reservoir_Water']
data_csvs_bin[k] = data_csvs_bin[k][['Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
data_csvs_bin[k]['Other_Water'] = 1-data_csvs_bin[k].sum(axis=1)
data_csvs_bin[k] = data_csvs_bin[k][['Other_Water', 'Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
data_csvs['df_2'].head(2)
Out[2]:
In [3]:
# don't run this if not necessary
# for f_name in names:
# print(f_name)
# mov_name = f_name.replace('.csv', '.mov')
# cw_utils.get_mini_view_video(path + mov_name, path + f_name)
In [4]:
mu0 = np.ones(5)/5
Sig0 = np.eye(5)*1
Phi = np.eye(5)
Q = np.eye(5)*1e-3
R = np.eye(5)*1e-8
N = 5
params = {"mu0": mu0,
"Sig0": Sig0,
"Phi": Phi,
"Q": Q,
"R": R,
"N": N,
"A": np.eye(N)}
results = {}
In [31]:
df_name = 'df_1'
flag = 0
if ((df_name in results) and (not flag)):
em_res = results[df_name]
else:
em_res = cw_utils.learn_breakpoint(data_csvs[df_name].values,
num_breaks=5,
starting_params=params,
sensitivity=8)
results[df_name] = em_res
In [32]:
results['df_1']['breaks']
Out[32]:
In [36]:
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
In [37]:
cw_plots.plot_log_positions(df_name=df_name,
results=results,
names=names,
data_csvs=data_csvs,
data_csvs_raw=data_csvs_raw,
path = './../data/test_data/')
Points of interest:
In [38]:
df_name = 'df_2'
flag = 0
if df_name in results and not flag:
em_res = results[df_name]
else:
em_res = cw_utils.learn_breakpoint(data_csvs[df_name].values,
num_breaks=5,
starting_params=params,
sensitivity=0)
results[df_name] = em_res
In [39]:
results['df_2']['breaks']
Out[39]:
In [40]:
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
In [41]:
### df_name = 'df_2'
cw_plots.plot_log_positions(df_name=df_name,
results=results,
names=names,
data_csvs=data_csvs,
data_csvs_raw=data_csvs_raw,
path = './../data/test_data/')
Points of interest:
In [42]:
df_name = 'df_3'
flag = 0
if df_name in results and not flag:
em_res = results[df_name]
else:
em_res = cw_utils.learn_breakpoint(data_csvs[df_name].values, num_breaks=5, starting_params=params, sensitivity=0)
results[df_name] = em_res
In [43]:
results['df_3']['breaks']
Out[43]:
In [44]:
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
In [45]:
cw_plots.plot_log_positions(df_name=df_name,
results=results,
names=names,
data_csvs=data_csvs,
data_csvs_raw=data_csvs_raw,
path = './../data/test_data/')
Points of interest:
In [46]:
df_name = 'df_4'
if df_name in results:
em_res = results[df_name]
else:
em_res = cw_utils.learn_breakpoint(
data_csvs[df_name].values,
num_breaks=5,
starting_params=params,
sensitivity=3)
results[df_name] = em_res
In [47]:
results['df_4']['breaks']
Out[47]:
In [48]:
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
In [49]:
cw_plots.plot_log_positions(df_name=df_name,
results=results,
names=names,
data_csvs=data_csvs,
data_csvs_raw=data_csvs_raw,
path = './../data/test_data/')
Points of interest:
In [20]:
df_name = 'df_5'
if df_name in results:
em_res = results[df_name]
else:
em_res = cw_utils.learn_breakpoint(
data_csvs[df_name].values,
num_breaks=5,
starting_params=params,
sensitivity=1)
results[df_name] = em_res
In [27]:
results['df_5']['breaks']
Out[27]:
In [21]:
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
In [22]:
cw_plots.plot_log_positions(df_name=df_name,
results=results,
names=names,
data_csvs=data_csvs,
data_csvs_raw=data_csvs_raw,
path = './../data/test_data/')
Points of interest: