In [2]:
# HEADS-UP: using inline to display instead of plt.show() everytime on notebook.
# %matplotlib inline 

import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcess
from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.interpolate import interp1d

# Read proc data
raw_itsc = np.loadtxt('../data/torque_participants/csv_S01_intersection/proc_intersection.csv', delimiter=',')
raw_y0 = np.loadtxt('../data/torque_participants/csv_S01_intersection/proc_y0.csv', delimiter=',')
raw_acc = np.loadtxt('../data/torque_participants/csv_S01_intersection/proc_acc.csv', delimiter=',')
# p intersection.shape[0]

# Group up intersections
intersections = []
cnt = 0
for i in range (1, raw_itsc.shape[0]):
    if (raw_itsc[i, 1] == raw_itsc[i - 1, 1]):
        continue
    else:
        intersections.append(raw_itsc[cnt: i])
        cnt = i + 1
print len(intersections)


3644

In [ ]:
##############################################################
# Sample disturbution for different itsc_type 

tmp_itsc_type = np.unique(raw_itsc[:, 3]) # [-1.  1.  4.  5.  6.  8.]
distributions = {i: list() for i in tmp_itsc_type}

# for cnt in range(len(intersections)):
cnt = 3
# Select a specific sample right now
itsc = intersections[cnt]

# Params
start_step = itsc[0, 0]
end_step = itsc[-1, 0]
# p start_step, end_step

# Select according window in ACC & Y0 datas,
#   [start_step, end_step]
for i in range(raw_y0.shape[0]):
    ts = raw_y0[i, 0]
    if ts >= start_step:
        start_index = i - 1
        for j in range(start_index, raw_y0.shape[0]):
            tss = raw_y0[j, 0]
            if tss >= end_step:
                end_index = j
#                     p start_step - raw_y0[start_index, 0], raw_y0[end_index, 0] - end_step
                break
        break
y0 = raw_y0[start_index: end_index + 1]

for i in range(raw_acc.shape[0]):
    ts = raw_acc[i, 0]
    if ts >= start_step:
        start_index = i - 1
        for j in range(start_index, raw_acc.shape[0]):
            tss = raw_acc[j, 0]
            if tss >= end_step:
                end_index = j
#                     p start_step - raw_acc[start_index, 0], raw_acc[end_index, 0] - end_step
                break
        break
acc = raw_acc[start_index: end_index + 1]

# Gaussian Process for interpolation
# for theta in np.linspace(0.0001, 1e-2, 10):
gp_y0 = GaussianProcess(theta0=1e-3, thetaL=1e-4, thetaU=1e-1)
gp_acc = GaussianProcess(theta0=1e-3, thetaL=1e-4, thetaU=1e-1)
gp_y0.fit(y0[:, 0].reshape(-1, 1), y0[:, 1].reshape(-1, 1))
gp_acc.fit(acc[:, 0].reshape(-1, 1), acc[:, 1].reshape(-1, 1))

yy = gp_y0.predict(itsc[:, 0].reshape(-1, 1))
aa = gp_acc.predict(itsc[:, 0].reshape(-1, 1))

# lowess_y0 = lowess(y0[:, 1], y0[:, 0], frac=.1)
# lowess_acc = lowess(acc[:, 1], acc[:, 0], frac=.3)

# y0_x = list(zip(*lowess_y0))[0]; y0_y = list(zip(*lowess_y0))[1]
# acc_x = list(zip(*lowess_acc))[0]; acc_y = list(zip(*lowess_acc))[1]

# f_y0 = interp1d(y0_x, y0_y, bounds_error=False)
# f_acc = interp1d(acc_x, acc_y, bounds_error=False)

# yy = f_y0(itsc[:, 0].reshape(-1, 1))
# aa = f_acc(itsc[:, 0].reshape(-1, 1))

theta = (np.arcsin(yy / aa) * 180 / 3.1415926).reshape(-1, 1)
# Do the differencing
delta_theta = np.diff(theta, axis=0)

# Form statistical data
for i in range (itsc.shape[0] - 1):
    distributions[itsc[i + 1, -1]].append(delta_theta[i, 0])

    #     if cnt >= 600:
    #         break
    #     p cnt * 1. / len(intersections)
    #     sys.stdout.write("Progress: %d%%   \r" % (cnt * 1. / len(intersections)) )
    #     sys.stdout.flush()

    ##############################################################
    # Visual validation

fig_1 = plt.figure()
# plt.plot(itsc[:, 0], itsc[:, 2])
plt.plot(itsc[:, 0], itsc[:, 3], 'k', label="itsc_type")
# plt.plot(y0[:, 0], y0[:, 1], label="y0_discrete")
# plt.plot(acc[:, 0], acc[:, 1], label="acc_discrete")
plt.plot(itsc[:, 0], yy, 'b--', linewidth=0.8, label="y0_lowessed")
plt.plot(itsc[:, 0], aa, 'c--', linewidth=0.8, label="acc_lowessed")
plt.plot(itsc[:, 0], theta, 'g', linewidth=1, label="raw_theta")
plt.plot(itsc[1:, 0], delta_theta, 'r', label="delta_theta")
plt.plot(itsc[:, 0], np.zeros(itsc.shape), 'grey', linewidth=0.2)

plt.xlim(itsc[0, 0] - 1000, itsc[-1, 0] + 1000)
ymin, ymax = plt.ylim()
plt.ylim(ymin - 1, ymax + 1)
plt.legend(loc='lower right')
plt.title("Data Demo on Intersection NO.13 (LEFT TURN)")
plt.xlabel("Time Stamp")
plt.ylabel("Degree")

plt.show()

# print gp_y0.score(y0[:, 0].reshape(-1, 1), y0[:, 1].reshape(-1, 1)), \
# gp_acc.score(acc[:, 0].reshape(-1, 1), acc[:, 1].reshape(-1, 1))
# plt.legend(['wheel_pos', 'itsc_type', 'theta'],
#            loc='center left',
#            bbox_to_anchor=(1, 0.5))

# # plt.figure()
# for itsc_type in distributions:
#     if itsc_type != 1 and itsc_type != -1 and itsc_type != 5:
#         sns.distplot(distributions[itsc_type], hist=True, rug=False)
# plt.legend([i for i in distributions if i != 1 and i != -1 and i != 5],
#            loc = 'center left', bbox_to_anchor=(1, 0.5))

In [5]:
##############################################################
# Sample disturbution for different itsc_type 

# tmp_itsc_type = np.unique(raw_itsc[:, 3]) # [-1.  1.  4.  5.  6.  8.]
# distributions = {i: list() for i in tmp_itsc_type}

# data = np.empty((0, raw_itsc.shape[1] + 4))
data = np.empty((0, raw_itsc.shape[1] + 2))
for cnt in range(len(intersections)):
# cnt = 3
# Select a specific sample right now
    if cnt % 50 == 0:
        print cnt * 1. / 3644
    try:
        itsc = intersections[cnt]

        # Params
        start_step = itsc[0, 0]
        end_step = itsc[-1, 0]
        # p start_step, end_step

        # Select according window in ACC & Y0 datas,
        #   [start_step, end_step]
        for i in range(raw_y0.shape[0]):
            ts = raw_y0[i, 0]
            if ts >= start_step:
                start_index = i - 1
                for j in range(start_index, raw_y0.shape[0]):
                    tss = raw_y0[j, 0]
                    if tss >= end_step:
                        end_index = j
        #                     p start_step - raw_y0[start_index, 0], raw_y0[end_index, 0] - end_step
                        break
                break
        y0 = raw_y0[start_index: end_index + 1]

        for i in range(raw_acc.shape[0]):
            ts = raw_acc[i, 0]
            if ts >= start_step:
                start_index = i - 1
                for j in range(start_index, raw_acc.shape[0]):
                    tss = raw_acc[j, 0]
                    if tss >= end_step:
                        end_index = j
        #                     p start_step - raw_acc[start_index, 0], raw_acc[end_index, 0] - end_step
                        break
                break
        acc = raw_acc[start_index: end_index + 1]

        # Gaussian Process for interpolation
        gp_y0 = GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
        gp_acc = GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
        gp_y0.fit(y0[:, 0].reshape(-1, 1), y0[:, 1].reshape(-1, 1))
        gp_acc.fit(acc[:, 0].reshape(-1, 1), acc[:, 1].reshape(-1, 1))

        yy = gp_y0.predict(itsc[:, 0].reshape(-1, 1))
        aa = gp_acc.predict(itsc[:, 0].reshape(-1, 1))
        
#         if cnt >= 100:
#             break

    except Exception or IndexError as err:
        print "Bad Example: ", cnt
        print "*****", err
#         dele.append(cnt)
        continue

    theta = (np.arcsin(yy / aa) * 180 / 3.1415926).reshape(-1, 1)
    delta_theta = np.diff(theta, axis=0)
    tmp_data = np.hstack((itsc[1:], theta[1:], delta_theta))
#     tmp_data = np.hstack((itsc[1:], theta[1:], delta_theta, yy[1:], aa[1:]))
    
    data = np.vstack((data, tmp_data))
    
    dele = []
    for i in range(data.shape[0]):
        if np.isnan(data[i, -1]) or np.isnan(data[i, -2]):
            dele.append(i)
    data = np.delete(data, dele, axis=0)

print data.shape
# np.savetxt('../data/torque_participants/csv_S01_intersection/S01_itsc_.csv', data, delimiter=',')
np.savetxt('../data/torque_participants/csv_S01_intersection/S01_itsc.csv', data, delimiter=',')
    

    # Form statistical data
#     for i in range (itsc.shape[0]):
#         distributions[itsc[i, -1]].append(theta[i, 0])

#     if cnt >= 100:
#         break
#     p cnt * 1. / len(intersections)
#     sys.stdout.write("Progress: %d%%   \r" % (cnt * 1. / len(intersections)) )
#     sys.stdout.flush()

        ##############################################################
        # Visual validation

#         fig_1 = plt.figure()
#         # plt.plot(itsc[:, 0], itsc[:, 2])
#         plt.plot(itsc[:, 0], itsc[:, 3])
#         plt.plot(y0[:, 0], y0[:, 1])
#         plt.plot(acc[:, 0], acc[:, 1])
#         plt.plot(itsc[:, 0], yy)
#         plt.plot(itsc[:, 0], aa)
#         plt.plot(itsc[:, 0], theta)

#         plt.xlim(itsc[0, 0] - 1000, itsc[-1, 0] + 1000)
#         ymin, ymax = plt.ylim()
#         plt.ylim(ymin - 1, ymax + 1)
#         plt.legend(['itsc_type', 'y0', 'acc', 'yy', 'aa', 'theta'],
#                    loc='center left',
#                    bbox_to_anchor=(1, 0.5))
#         p gp_y0.score(y0[:, 0].reshape(-1, 1), y0[:, 1].reshape(-1, 1)), \
#         gp_acc.score(acc[:, 0].reshape(-1, 1), acc[:, 1].reshape(-1, 1))
        
# plt.legend(['wheel_pos', 'itsc_type', 'theta'],
#            loc='center left',
#            bbox_to_anchor=(1, 0.5))

plt.figure()
for itsc_type in distributions:
    if itsc_type != 1 and itsc_type != -1 and itsc_type != 5:
#         sns.distplot(distributions[itsc_type], hist=True, rug=False)
        plt.hist(distributions[itsc_type])
plt.legend([i for i in distributions if i != 1 and i != -1 and i != 5],
           loc = 'center left', bbox_to_anchor=(1, 0.5))
plt.title("Degree Distribution for Different types (LEFT, RIGHT, STRAIGHT)")
plt.xlabel("Degree")
plt.ylabel("Number of samples * 1e3")


0.0
0.0137211855104
0.0274423710209
0.0411635565313
0.0548847420417
0.0686059275521
0.0823271130626
0.096048298573
0.109769484083
0.123490669594
0.137211855104
0.150933040615
0.164654226125
0.178375411636
0.192096597146
0.205817782656
0.219538968167
0.233260153677
0.246981339188
0.260702524698
0.274423710209
0.288144895719
0.301866081229
0.31558726674
0.32930845225
0.343029637761
0.356750823271
0.370472008782
0.384193194292
0.397914379802
0.411635565313
0.425356750823
0.439077936334
0.452799121844
Bad Example:  1681
***** Multiple input features cannot have the same target value.
0.466520307355
0.480241492865
0.493962678375
0.507683863886
0.521405049396
0.535126234907
0.548847420417
Optimization failed. Try increasing the ``nugget``
Bad Example:  2023
***** array must not contain infs or NaNs
Bad Example:  2027
***** Multiple input features cannot have the same target value.
0.562568605928
0.576289791438
0.590010976948
0.603732162459
0.617453347969
0.63117453348
Optimization failed. Try increasing the ``nugget``
Bad Example:  2309
***** array must not contain infs or NaNs
0.64489571899
0.658616904501
0.672338090011
Optimization failed. Try increasing the ``nugget``
Bad Example:  2457
***** array must not contain infs or NaNs
0.686059275521
Bad Example:  2516
***** Multiple input features cannot have the same target value.
0.699780461032
0.713501646542
0.727222832053
0.740944017563
0.754665203074
0.768386388584
0.782107574094
0.795828759605
0.809549945115
Bad Example:  2953
***** index 0 is out of bounds for axis 0 with size 0
0.823271130626
0.836992316136
0.850713501647
0.864434687157
0.878155872667
0.891877058178
0.905598243688
0.919319429199
0.933040614709
0.94676180022
0.96048298573
0.97420417124
0.987925356751
(47138, 8)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-3a5fe9dccba2> in <module>()
    117 
    118 plt.figure()
--> 119 for itsc_type in distributions:
    120     if itsc_type != 1 and itsc_type != -1 and itsc_type != 5:
    121 #         sns.distplot(distributions[itsc_type], hist=True, rug=False)

NameError: name 'distributions' is not defined