In [1]:
    
# imports for this notebook
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
import theano as T
import theano.tensor as tte
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")
import warnings
from scipy import VisibleDeprecationWarning
warnings.filterwarnings("ignore", category=VisibleDeprecationWarning) 
import copy
import pymc3 as pm
import scipy as sp
import scipy.stats as stats
import cv2
import cw_utils
import cw_plots
import pickle
    
In [35]:
    
import importlib
importlib.reload(cw_plots)
    
    Out[35]:
In [2]:
    
data_csvs = {}
data_csvs_bin = {}
data_csvs_raw = {}
path = './../data/test_data/'
names = [
    "test1-2017-11-03T09-31.csv",
    "test2-2017-11-08T09-19.csv",
    "test3-2017-11-09T09-23.csv",
    "test4-2017-11-10T09-24.csv",
    "test5-2017-11-16T11-41.csv"
]
data_csvs['df_1'] = pd.read_csv(path+ names[0]).iloc[6:]
data_csvs['df_2'] = pd.read_csv(path+ names[1]).iloc[6:]
data_csvs['df_3'] = pd.read_csv(path+ names[2]).iloc[6:]
data_csvs['df_4'] = pd.read_csv(path+ names[3]).iloc[6:]
data_csvs['df_5'] = pd.read_csv(path+ names[4]).iloc[6:]
data_csvs_raw['df_1'] = pd.read_csv(path+ names[0]).iloc[6:]
data_csvs_raw['df_2'] = pd.read_csv(path+ names[1]).iloc[6:]
data_csvs_raw['df_3'] = pd.read_csv(path+ names[2]).iloc[6:]
data_csvs_raw['df_4'] = pd.read_csv(path+ names[3]).iloc[6:]
data_csvs_raw['df_5'] = pd.read_csv(path+ names[4]).iloc[6:]
biomes = ['Desert', 'Jungle', 'Wetlands', 'Plains']
for k,v in data_csvs.items():
    
    data_csvs[k].columns = data_csvs[k].columns.str.strip()
    data_csvs_raw[k].columns = data_csvs_raw[k].columns.str.strip()
    data_csvs_bin[k] = pd.DataFrame()
    for biome in biomes:
        bins_data = data_csvs_raw[k]['%s_WaterBins'%biome].apply(lambda x: np.sum([float(num) for num in x.split('-')])/40)
        bins_data += data_csvs_raw[k]['%s_FloodBins'%biome].apply(lambda x: np.sum([float(num) for num in x.split('-')])/40)
        data_csvs_bin[k]['%s_Water'%biome] = bins_data
    
    data_csvs[k] = data_csvs[k][['Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
    data_csvs[k]['Other_Water'] = 2.5-data_csvs[k].sum(axis=1)
    data_csvs[k] = data_csvs[k] / 2.5
    data_csvs[k] = data_csvs[k][['Other_Water', 'Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
    
#     data_csvs_bin[k]['Reservoir_Water'] = data_csvs[k]['Reservoir_Water']
    data_csvs_bin[k] = data_csvs_bin[k][['Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
    data_csvs_bin[k]['Other_Water'] = 1-data_csvs_bin[k].sum(axis=1)
    data_csvs_bin[k] = data_csvs_bin[k][['Other_Water', 'Desert_Water', 'Jungle_Water', 'Wetlands_Water', 'Plains_Water']]
data_csvs['df_2'].head(2)
    
    
    Out[2]:
In [3]:
    
# don't run this if not necessary
# for f_name in names:
    
#     print(f_name)
#     mov_name = f_name.replace('.csv', '.mov')
#     cw_utils.get_mini_view_video(path + mov_name, path + f_name)
    
In [4]:
    
mu0 = np.ones(5)/5
Sig0 = np.eye(5)*1
Phi = np.eye(5)
Q = np.eye(5)*1e-3
R = np.eye(5)*1e-8
N = 5
params = {"mu0": mu0, 
           "Sig0": Sig0, 
           "Phi": Phi, 
           "Q": Q, 
           "R": R,
           "N": N,
           "A": np.eye(N)}
results = {}
    
In [31]:
    
df_name = 'df_1'
flag = 0
if ((df_name in results) and (not flag)):
    em_res = results[df_name]
else:
    em_res = cw_utils.learn_breakpoint(data_csvs[df_name].values,
                                       num_breaks=5,
                                       starting_params=params,
                                       sensitivity=8)
    results[df_name] = em_res
    
In [32]:
    
results['df_1']['breaks']
    
    Out[32]:
In [36]:
    
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
    
    
In [37]:
    
cw_plots.plot_log_positions(df_name=df_name, 
                            results=results, 
                            names=names, 
                            data_csvs=data_csvs, 
                            data_csvs_raw=data_csvs_raw, 
                            path = './../data/test_data/')
    
    
    
    
    
    
    
    
    
    
Points of interest:
In [38]:
    
df_name = 'df_2'
flag = 0
if df_name in results and not flag:
    em_res = results[df_name]
else:
    em_res = cw_utils.learn_breakpoint(data_csvs[df_name].values,
                                       num_breaks=5,
                                       starting_params=params,
                                      sensitivity=0)
    results[df_name] = em_res
    
In [39]:
    
results['df_2']['breaks']
    
    Out[39]:
In [40]:
    
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
    
    
In [41]:
    
### df_name = 'df_2'            
cw_plots.plot_log_positions(df_name=df_name, 
                            results=results, 
                            names=names, 
                            data_csvs=data_csvs, 
                            data_csvs_raw=data_csvs_raw, 
                            path = './../data/test_data/')
    
    
    
    
    
    
    
    
Points of interest:
In [42]:
    
df_name = 'df_3'
flag = 0
if df_name in results and not flag:
    em_res = results[df_name]
else:
    em_res = cw_utils.learn_breakpoint(data_csvs[df_name].values, num_breaks=5, starting_params=params, sensitivity=0)
    results[df_name] = em_res
    
In [43]:
    
results['df_3']['breaks']
    
    Out[43]:
In [44]:
    
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
    
    
In [45]:
    
cw_plots.plot_log_positions(df_name=df_name, 
                            results=results, 
                            names=names, 
                            data_csvs=data_csvs, 
                            data_csvs_raw=data_csvs_raw, 
                            path = './../data/test_data/')
    
    
    
    
    
    
    
    
Points of interest:
In [46]:
    
df_name = 'df_4'
if df_name in results:
    em_res = results[df_name]
else:
    em_res = cw_utils.learn_breakpoint(
        data_csvs[df_name].values,
        num_breaks=5,
        starting_params=params,
        sensitivity=3)
results[df_name] = em_res
    
In [47]:
    
results['df_4']['breaks']
    
    Out[47]:
In [48]:
    
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
    
    
In [49]:
    
cw_plots.plot_log_positions(df_name=df_name, 
                            results=results, 
                            names=names, 
                            data_csvs=data_csvs, 
                            data_csvs_raw=data_csvs_raw, 
                            path = './../data/test_data/')
    
    
    
    
    
    
    
    
Points of interest:
In [20]:
    
df_name = 'df_5'
if df_name in results:
    em_res = results[df_name]
else:
    em_res = cw_utils.learn_breakpoint(
        data_csvs[df_name].values,
        num_breaks=5,
        starting_params=params,
        sensitivity=1)
    results[df_name] = em_res
    
    
In [27]:
    
results['df_5']['breaks']
    
    Out[27]:
In [21]:
    
fig, ax = plt.subplots(1,1)
ax = cw_utils.plot_data_and_boundaries(df=data_csvs[df_name], em_results=em_res, ax=ax)
plt.show()
    
    
In [22]:
    
cw_plots.plot_log_positions(df_name=df_name, 
                            results=results, 
                            names=names, 
                            data_csvs=data_csvs, 
                            data_csvs_raw=data_csvs_raw, 
                            path = './../data/test_data/')
    
    
    
    
    
    
    
    
    
    
Points of interest: