JLab ML Lunch 2 - Data Exploration

  • Second ML challenge hosted
  • On October 30th, a test dataset will be released, and predictions must be submitted within 24 hours
  • Let's take a look at the training data!

In [1]:
%matplotlib widget

In [116]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import imageio

In [56]:
from sys import path
path.append('../src')
from jlab import load_test_data, test_to_time_series, Z_VALS

Training Data


In [11]:
data = load_test_data("../data/MLchallenge2_testing_inputs.csv")
X = test_to_time_series(data)

In [53]:
y_pred = pd.read_csv('../data/submission/dannowitz_jlab2_submission_20191112.csv',
                     header=None, names=[0, 1, 3, 4, 5])

In [55]:
y_true = pd.read_csv('../data/ANSWERS.csv',
                     header=None, names=[0, 1, 3, 4, 5])

In [39]:
def get_track_start(track):
    for j in range(0, 24):
        if not all(track[j][i] == 0.0 for i in range(0,len(track[j]))):
            return j
    return None

In [40]:
def unpad_track(track):
    return track[get_track_start(track):]

In [107]:
def plot_quiver_track(track, color='b',
                      ax=None, elev=None,
                      azim=None, dist=None, alpha=1.0):
    
    # Get all the values of each type of feature
    x = [track[i][0] for i in range(0, len(track))]
    y = [track[i][1] for i in range(0, len(track))]
    z = [track[i][2] for i in range(0, len(track))]
    px = [track[i][3] for i in range(0, len(track))]
    py = [track[i][4] for i in range(0, len(track))]
    pz = [track[i][5] for i in range(0, len(track))]

    # Create our 3D figure
    if ax is None:
        fig = plt.figure(figsize=(5,5))
        ax = fig.gca(projection='3d')   
        ax.xaxis.set_pane_color((1,1,1,1))
        ax.yaxis.set_pane_color((1,1,1,1))
        ax.zaxis.set_pane_color((1,1,1,1))
    else:
        fig = None
    
    # Set the three 3D plot viewing attributes
    if elev is not None:
        ax.elev = elev
    if azim is not None:
        ax.azim = azim
    if dist is not None:
        ax.dist = dist
    
    # Create our quiver plot
    ax.quiver(z, x, y, pz, px, py, length=14,
              color=color, alpha=alpha)
    
    ax.set_xlabel("z", fontweight="bold")
    ax.set_ylabel("x", fontweight="bold")
    ax.set_zlabel("y", fontweight="bold")
    plt.tight_layout()

    return fig, ax

In [108]:
track_id = 7

track = unpad_track(X[track_id])

track_pred = y_pred.loc[track_id]
track_pred.loc[2] = Z_VALS[len(track)]
track_pred = np.array([track_pred.sort_index().values])

track_true = y_true.loc[track_id]
track_true.loc[2] = Z_VALS[len(track)]
track_true = np.array([track_true.sort_index().values])

In [109]:
fig, ax = plot_quiver_track(track, alpha=0.5)
_, _ = plot_quiver_track(track_pred, ax=ax, color='g')
_, _ = plot_quiver_track(track_true, ax=ax, color='r')



In [113]:
gif_filename = "track-pred-anim"

ax.elev = 50.
ax.azim = 90.
ax.dist = 9.

img_files = []
for n in range(0, 100):
    ax.elev = ax.elev-0.4
    ax.azim = ax.azim-1.5
    filename = f'../images/{gif_filename}/img{str(n).zfill(3)}.png'
    img_files.append(filename)
    plt.savefig(filename, bbox_inches='tight')

In [114]:
images = []
for filename in img_files:
    images.append(imageio.imread(filename))
imageio.mimsave('../images/track-pred.gif', images)

In [122]:
def make_track_gif(X, true, pred, track_id):

    track = unpad_track(X[track_id])

    track_pred = pred.loc[track_id]
    track_pred.loc[2] = Z_VALS[len(track)]
    track_pred = np.array([track_pred.sort_index().values])

    track_true = true.loc[track_id]
    track_true.loc[2] = Z_VALS[len(track)]
    track_true = np.array([track_true.sort_index().values])
    
    fig, ax = plot_quiver_track(track, alpha=0.5)
    _, _ = plot_quiver_track(track_pred, ax=ax, color='g')
    _, _ = plot_quiver_track(track_true, ax=ax, color='r')
    
    gif_filename = f"track-pred-{track_id}-anim"
    try:
        os.mkdir(f"../images/{gif_filename}")
    except:
        # path exists
        pass

    ax.elev = 50.
    ax.azim = 90.
    ax.dist = 9.

    img_files = []
    for n in range(0, 100):
        ax.elev = ax.elev-0.4
        ax.azim = ax.azim-1.5
        filename = f'../images/{gif_filename}/img{str(n).zfill(3)}.png'
        img_files.append(filename)
        plt.savefig(filename, bbox_inches='tight')
        
    images = []
    for filename in img_files:
        images.append(imageio.imread(filename))
    imageio.mimsave(f'../images/{gif_filename}.gif', images)

In [123]:
make_track_gif(X, y_true, y_pred, 7)
make_track_gif(X, y_true, y_pred, 10)
make_track_gif(X, y_true, y_pred, 15)
make_track_gif(X, y_true, y_pred, 20)
make_track_gif(X, y_true, y_pred, 25)


//anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:15: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  from ipykernel import kernelapp as app
//anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:15: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  from ipykernel import kernelapp as app
//anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:15: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  from ipykernel import kernelapp as app
//anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:15: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  from ipykernel import kernelapp as app