notebook.community



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from matplotlib.patches import Rectangle
import seaborn as sns
%matplotlib inline
sns.set_style("white")
sns.set_context("paper", font_scale=1.4)



In [2]:

    
data_df = pd.read_csv('raw-data.csv', index_col='eventID')
data_df.head()









    Out[2]:






  
    
      
      AfterInhMATRIX5
      PrescaleMATRIX5
      RawMATRIX4
      RawTriggers
      label
    
    
      eventID
      
      
      
      
      
    
  
  
    
      430001
      27094
      12
      1598521
      10759045
      2
    
    
      430002
      34901
      14
      1670878
      11813291
      3
    
    
      430003
      36317
      15
      1675869
      12002554
      3
    
    
      430004
      34088
      14
      1637602
      11564482
      3
    
    
      430005
      27489
      12
      1587623
      10627391
      4



In [3]:

    
feature = "AfterInhMATRIX5"
sns.distplot(data_df.query("label == 1")[feature])









    Out[3]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f90b8529850>

We'll be plotting over ranges of eventID

Get details of the eventID's we're working with.



In [4]:

    
minEventID = data_df.index.values[0]
maxEventID = data_df.index.values[-1]
nTotalEvents = maxEventID - minEventID
print ("Event Range: %d - %d (%d events)" %
       (minEventID, maxEventID, nTotalEvents))









    



Event Range: 430001 - 482555 (52554 events)



In [5]:

    
# Define animation parameters
frames = 60
step = int((nTotalEvents)/frames)
print ("#Frames: %d\nStep Size: %d events" % (frames, step))









    



#Frames: 60
Step Size: 875 events



In [6]:

    
def my_frame(nframe):
    """Creates a single frame for our animated plot."""
    
    # Clear the axes
    plt.cla()
    # Get step amount
    my_step = step
    # Calculate the eventID range from the frame number
    tmp_minEventID = minEventID + (nframe * my_step)
    # Define your max eventID.
    tmp_maxEventID = tmp_minEventID + my_step
    # Get desired slice of the data
    my_slice = data_df.query("label == 1 and "
                             "eventID >= @tmp_minEventID and "
                             "eventID < @tmp_maxEventID")[feature]
    if len(my_slice)!=0:
        # Plot the distribution
        ax = sns.distplot(my_slice)
        # Make sure you always use the same x-range
        plt.xlim([10000,100000])
        # Y-ticks will be different per normalized distplot
        plt.yticks(visible=False)
        ax.yaxis.grid(False)
        # Label with the eventID range plotted
        plt.title('eventID: [%d, %d)' %
                  (tmp_minEventID, tmp_maxEventID))



In [7]:

    
fps = 4
fig = plt.figure(figsize=(8,6));
anim = animation.FuncAnimation(fig, my_frame,
                               frames=frames);
anim.save(('figures/%dfps.gif' % fps), 
          writer='imagemagick', fps=fps);

Let's add the means for the Gaussians we fit earlier

Just for kicks, to see how they line up with this progression.



In [8]:

    
# Optimized values from scipy.optimize.curve_fit
#    (from previous post)
optim_means = [30367, 53071, 66494, 80549]
optim_amp = [7.46e-06, 1.99e-05, 5.85e-05, 4.70e-06]
# Normalize the list like so
norm_amp = [i/sum(optim_amp) for i in optim_amp]



In [9]:

    
def my_frame(nframe):
    """Creates a single frame for our animated plot."""
    
    # Clear the axes
    plt.cla()
    # Get step amount
    my_step = step
    # Calculate the eventID range from the frame number
    tmp_minEventID = minEventID + (nframe * my_step)
    # Define your max eventID.
    tmp_maxEventID = tmp_minEventID + my_step
    # Get desired slice of the data
    my_slice = data_df.query("label == 1 and "
                             "eventID >= @tmp_minEventID and "
                             "eventID < @tmp_maxEventID")[feature]
    if len(my_slice)!=0:
        # Plot the distribution
        ax = sns.distplot(my_slice)
        ###### **NEW** ######
        #Add lines for the Gaussian means we fitted
        ylim = ax.get_ylim()
        for mean, amp in zip(optim_means, norm_amp):
            plt.vlines(mean, 0.0, amp*ylim[1], color='black',
                       linestyles='--', lw=2)
        #####################
        # Make sure you always use the same x-range
        plt.xlim([10000,100000])
        # Y-ticks will be different per normalized distplot
        plt.yticks(visible=False)
        ax.yaxis.grid(False)
        # Label with the eventID range plotted
        plt.title('eventID: [%d, %d)' %
                  (tmp_minEventID, tmp_maxEventID))



In [10]:

    
fps = 4
fig = plt.figure(figsize=(8,6));
anim = animation.FuncAnimation(fig, my_frame,
                               frames=frames);
anim.save(('figures/%dfps_means.gif' % fps),
          writer='imagemagick', fps=fps);

Smoothing out the animations

You can smooth out the animation by:

Keep the step size the same
Increase the chunk of events you're looking at by some factor N

This allows that, for each step transition, only only 1/Nth of the data introduced is removed and added



In [22]:

    
def my_frame(nframe):
    """Creates a single frame for our animated plot."""
    
    # Clear the axes
    plt.cla()
    # Get step amount
    my_step = step
    # Calculate the eventID range from the frame number
    tmp_minEventID = minEventID + (nframe * my_step)
    ###### **NEW** ######
    # Define your max eventID. You can smooth things out
    #   by increasing this by some factor of step size
    N_factor = 3
    tmp_maxEventID = tmp_minEventID + (N_factor*my_step)
    #####################
    # Get desired slice of the data
    my_slice = data_df.query("label == 1 and "
                             "eventID >= @tmp_minEventID and "
                             "eventID < @tmp_maxEventID")[feature]
    if len(my_slice)!=0:
        # Plot the distribution
        ax = sns.distplot(my_slice)
        # Add lines for the Gaussian means we fitted
        ylim = ax.get_ylim()
        for mean, amp in zip(optim_means, norm_amp):
            plt.vlines(mean, 0.0, amp*ylim[1], color='black',
                       linestyles='--', lw=2)
        # Make sure you always use the same x-range
        plt.xlim([10000,100000])
        # Y-ticks will be different per normalized distplot
        plt.yticks(visible=False)
        ax.yaxis.grid(False)
        # Label with the eventID range plotted
        plt.title('eventID: [%d, %d)' %
                  (tmp_minEventID, tmp_maxEventID))



In [23]:

    
fps = 4
fig = plt.figure(figsize=(8,6));
anim = animation.FuncAnimation(fig, my_frame,
                               frames=frames);
anim.save(('figures/%dfps_means_smooth.gif' % fps),
          writer='imagemagick', fps=fps);

Add progress bar

It's difficult to watch the eventID values.

Help out the viewer by adding a progress bar. Allows one to, at the least, see when .gif loops.

Create empty Rectangle at the top
Create a filled in Rectangle to represent the data viewed



In [13]:

    
def my_frame(nframe):
    """Creates a single frame for our animated plot."""
    
    # Clear the axes
    plt.cla()
    # Get step amount
    my_step = step
    # Calculate the eventID range from the frame number
    tmp_minEventID = minEventID + (nframe * my_step)
    # Define your max eventID. You can smooth things out
    #   by increasing this by some factor of step size
    N_factor = 3
    tmp_maxEventID = tmp_minEventID + (N_factor*my_step)
    # Get some figures for where we are in the whose set
    nEvents = tmp_maxEventID - tmp_minEventID
    prcnt_events = (nEvents / float(nTotalEvents))
    prcnt_total = (tmp_maxEventID - minEventID) / float(nTotalEvents)
    # Get desired slice of the data
    my_slice = data_df.query("label == 1 and "
                             "eventID >= @tmp_minEventID and "
                             "eventID < @tmp_maxEventID")[feature]
    if len(my_slice)!=0:
        # Plot the distribution
        ax = sns.distplot(my_slice)
        # Add lines for the Gaussian means we fitted
        ylim = ax.get_ylim()
        for mean, amp in zip(optim_means, norm_amp):
            plt.vlines(mean, 0.0, amp*ylim[1], color='black',
                       linestyles='--', lw=2)
        ###### **NEW** ######
        # Add empty progress bar rectangle
        r1 = Rectangle((12000, 0.920*ylim[1]),    # Lower left (x,y)
                       86000,                     # Width
                       0.06*(ylim[1]-ylim[0]),    # Height
                       facecolor="grey", alpha=0.9, fill=False)
        ax.add_patch(r1)
        # Add rectangle to indicate data location on display
        r2 = Rectangle((12500+(prcnt_total*78000), 0.925*ylim[1]),
                       78000*prcnt_events,    
                       0.05*(ylim[1]-ylim[0]),
                       facecolor="grey", alpha=0.7, fill=True)
        ax.add_patch(r2)
        #####################
        # Make sure you always use the same x-range
        plt.xlim([10000,100000])
        # Y-ticks will be different per normalized distplot
        plt.yticks(visible=False)
        ax.yaxis.grid(False)
        # Label with the eventID range plotted
        plt.title('eventID: [%d, %d)' %
                  (tmp_minEventID, tmp_maxEventID))



In [14]:

    
fps = 4
fig = plt.figure(figsize=(8,6));
anim = animation.FuncAnimation(fig, my_frame,
                               frames=frames);
anim.save(('figures/%dfps_means_smooth_progress.gif' % fps),
          writer='imagemagick', fps=fps);

Adjust FPS

Now that you're done adding things, play around with FPS to see what looks best



In [15]:

    
# Plot for various speeds
for fps in [1, 9, 16]:
    fig = plt.figure(figsize=(8,6))
    anim = animation.FuncAnimation(fig, my_frame,
                                   frames=frames);
    anim.save(('figures/%dfps_means_smooth_progress.gif' % fps),
              writer='imagemagick', fps=fps);

I think I like somewhere around 9fps, so I'll use that.

Let's apply this to some different plots

Like, say if we wanted to get a feel for this time dependence across all categories. Does it effect them all the same?



In [18]:

    
def my_violin_frame(nframe):
    """Creates a single frame for our animated plot."""
    
    # Clear the axes
    plt.cla()
    # Get step amount
    my_step = step
    # Calculate the eventID range from the frame number
    tmp_minEventID = minEventID + (nframe * my_step)
    # Define your max eventID. You can smooth things out
    #   by increasing this by some factor of step size
    N_factor = 3
    tmp_maxEventID = tmp_minEventID + (N_factor*my_step)
    # Get some figures for where we are in the whose set
    nEvents = tmp_maxEventID - tmp_minEventID
    prcnt_events = (nEvents / float(nTotalEvents))
    prcnt_total = (tmp_maxEventID - minEventID) / float(nTotalEvents)
    
    # Get desired slice of the data
    my_slice = data_df.query("eventID >= @tmp_minEventID and "
                             "eventID < @tmp_maxEventID")
    if len(my_slice)!=0:
        # Plot the distribution
        ax = sns.violinplot(x='label', y=feature,
               data=my_slice.sort_values('label'),
               palette='muted')
    
        ylim = [10000,110000]
        plt.ylim(ylim)
    
        # Add empty progress bar rectangle
        r1 = Rectangle((0.1, 0.920*ylim[1]),    # Lower left (x,y)
                       6,                       # Width
                       0.06*(ylim[1]-ylim[0]),  # Height
                       facecolor="grey", alpha=0.9, fill=False)
        ax.add_patch(r1)
        # Add rectangle to indicate data location on display
        r2 = Rectangle((0.11+(prcnt_total*5.5), 0.925*ylim[1]),
                       5.5*prcnt_events,    
                       0.05*(ylim[1]-ylim[0]),
                       facecolor="grey", alpha=0.7, fill=True)
        ax.add_patch(r2)
    
        # Label with the eventID range plotted
        plt.title('eventID: [%d, %d)' %
                  (tmp_minEventID, tmp_maxEventID))



In [19]:

    
fps = 9
fig = plt.figure(figsize=(8,6))
anim = animation.FuncAnimation(fig, my_violin_frame,
                               frames=frames);
anim.save(('figures/%dfps_violin.gif' % fps),
          writer='imagemagick', fps=fps);



In [ ]:

	AfterInhMATRIX5	PrescaleMATRIX5	RawMATRIX4	RawTriggers	label
eventID
430001	27094	12	1598521	10759045	2
430002	34901	14	1670878	11813291	3
430003	36317	15	1675869	12002554	3
430004	34088	14	1637602	11564482	3
430005	27489	12	1587623	10627391	4