In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
import scipy.ndimage.filters

import string

%matplotlib inline

In [2]:
def get_hour(item):
    return item.hour

def get_day_frame(directory, date, remove_zeros=True, cutoff_area=10000, dropna_thres=4,
                 parse_filename=False, find_cumtime=True,
                 col_names = ['Interval', 'x', 'y', 'Area', 'Hour', 'Minute', 'Second']):
    """
    Returns the day frame for all files in the directory
    date: (month, date, year) tuple, for adding the relevant columns
    parse_filename: set True for files without timestamp, will infer timestamps from filename
    find_cumtime: set False for files that have intervals that are cumulative instead of dt
    """
    files = glob.glob(directory + '/*.csv') 
    
    if files == []:
        return None
    
    frames = []
    for filename in files:
        obs = pd.read_csv(filename, header=None, names=col_names)
        
        # Remove nan columns
        obs.dropna(thresh=dropna_thres)
        
        # Parse filename to get time of the record
        if parse_filename:
            components = string.split(filename, '_')
            timestr = string.join(components[1:], ':')[:-4]
            endtime = pd.to_datetime(timestr)
            
            
            if find_cumtime:
                cum_time = np.cumsum(obs.Interval)
            else:
                cum_time = obs.Interval
            
            cum_time_delta = pd.to_timedelta(cum_time, unit='s')
            starttime = endtime - cum_time_delta[obs.shape[0] - 1]
            obs['Time'] = starttime + cum_time_delta
        
                 
        frames.append(obs)

    # Concatenate readings from all files
    day_frame = pd.concat(frames)
    day_frame['Day'] = date[1]
    day_frame['Month'] = date[0]
    day_frame['Year'] = date[2]
    
    #from IPython.core.debugger import Tracer; Tracer()()
    
    if not parse_filename:
        day_frame['Time'] = pd.to_datetime(day_frame[['Day','Month', 'Year','Hour','Minute','Second']])
    
    # Light hour is from 9am to 7pm
    day_frame['Light'] = (day_frame.Time.apply(get_hour) >= 7) & (day_frame.Time.apply(get_hour) <= 21)
    
    # Remove zeros if specified
    if remove_zeros:
        day_frame = day_frame[(day_frame['x'] != 0) & (day_frame['y'] != 0)]
        
    # Remove entries which exceed a cutoff area
    if cutoff_area != None:
        day_frame = day_frame[day_frame.Area < cutoff_area]
        
    
    
    return day_frame
  

def collect_all_days(parent_dir, days, remove_zeros=True, cutoff_area=10000, dropna_thres=4,
                  parse_filename=False, find_cumtime=True,
                  col_names = ['Interval', 'x', 'y', 'Area', 'Hour', 'Minute', 'Second']):
    '''
    ------------------------
    Inputs:
    parent_dir: string indicating parent directory
    days: list of recording days
    ------------------------
    Output:
    A pd frame for all observations of that animal
    ------------------------    
    '''
    frames = []
    for day in days:
        #datestr = str(day[0]) + '/' + str(day[1]) + '/' + str(day[2])
        #datetime = pd.to_datetime(datestr)
        directory = parent_dir + day.strftime('%m%d') #+ '_proc/'
        print directory
        if os.path.isdir(directory):
            day_frame = get_day_frame(directory, (day.month, day.day, day.year),
                                     remove_zeros, cutoff_area, dropna_thres,
                                     parse_filename, find_cumtime, col_names)
            frames.append(day_frame)
        else:
            print '     Directory does not exist'
    
    return pd.concat(frames)

In [3]:
def get_displacement(day_frame):
    '''
    Get displacement from x and y coords
    '''
    
    dx = np.diff(day_frame['x'])
    dy = np.diff(day_frame['y'])
    displacement = np.sqrt(dx * dx + dy * dy)
    
    # Diff returns n-1 values, so concatenate a 0 behind and make a new column
    return np.concatenate((displacement, [0])) 

def get_zeit(day_frame, start_time_str=None, use_time_field=False):
    '''
    Compute the zeitgeber time, starting from 9am, in unit of hours
    '''
    timeseries = day_frame.Time
    
    if start_time_str == None:
        starttime = timeseries[0]
    else:
        starttime = pd.to_datetime(start_time_str)

    return (timeseries - starttime) / pd.Timedelta(1, unit='h')

def get_crossing(day_frame, xline=160):
    '''
    Compute the time of crossings across the specified xline
    Will return an np array with T being the time of crossing
    '''
    leftright = day_frame.x < 160
    diff = np.diff(leftright)
    
    return np.concatenate((diff, [0])) 

def plot_all_data(frame, axis):
    '''
    Make a plot of all displacement values
    '''
    axis.plot(frame['ZeitTime'], frame['Displacement'], '.', alpha=0.3)
    axis.fill_between(frame.ZeitTime, 0, axis.get_ylim()[1], 
                    where=~frame.Light, color='gray',  alpha=0.3)
    axis.set_xlabel('Zeitbeger time (h)')
    axis.set_ylabel('Displacement (pix)')
    
def ecdf(data):
    return np.sort(data), np.arange(len(data)) / float(len(data))

def aggregate_activity(day_frame, time, unit, axis=None, plot=False,
                      name='Bird', save=False):
    """
    Unit: either 'Hour', 'Minute', or 'Second'
    If plot set to True, also plot the activity
    Returns a data frame which is the total distance travelled in time * unit time specified
    """
    if unit == 'Hour':
        scale = 3600
    elif unit == 'Minute':
        scale = 60
    elif unit == 'Second':
        scale = 1
    else:
        raise ValueError('Invalid time unit')
        
    group_duration = time * scale # Number of seconds to group
    SECS_PER_DAY = 3600 * 24
    
    # ZeitTime is measured in hours, partitioned into groups of
    # specified group_duration
    group = (day_frame['ZeitTime'] / (group_duration/3600.0)).astype('int')
    agg_displacement = day_frame[['Displacement', 'Crosses']].groupby(group)
    
    agg_sum_grouped = agg_displacement.sum().reset_index()
    
    # Create data frame with day information
    agg_sum_grouped['Day'] = (agg_sum_grouped['ZeitTime'] / (SECS_PER_DAY / group_duration)).astype('int')
    agg_sum_grouped['Period'] = agg_sum_grouped['ZeitTime'] % int(SECS_PER_DAY / group_duration)
    
    # Produce a plot if instructed
    if plot:
        for day in agg_sum_grouped.Day.unique():
            activity_day = agg_sum_grouped.Displacement[agg_sum_grouped.Day == day]
            crosses_day = agg_sum_grouped.Crosses[agg_sum_grouped.Day == day]
            time_in_day = agg_sum_grouped.Period[agg_sum_grouped.Day == day] /3
            axis[0].plot(time_in_day, activity_day)
            axis[1].plot(time_in_day, crosses_day)
        axis[0].set_xlabel('Time (hour)')
        axis[0].set_ylabel('Total distance in 20 minutes')
        axis[1].set_xlabel('Time (hour)')
        axis[1].set_ylabel('Crosses in 20 minutes')
    
    return agg_sum_grouped

In [4]:
def process_animal(name, start_day_str, start_time_str=' 7:00AM', ndays=5,  
                   time=20, unit='Minute', save=False, 
                   cutoff_area=None, parse_filename=False, find_cumtime=True,
                  col_names = ['Interval', 'x', 'y', 'Area', 'Hour', 'Minute', 'Second'],
                  xline=160):
    '''
    Perform the analysis procedure for the specified animal
    name: string for name of animal, will read from folder of the same name
    start_day_str: string that represents start day, e.g. '9/25/2016'
    periods: number of days to read
    save: indicates whether the final frame should be saved to a .csv
    '''
    days = pd.date_range(start_day_str, periods=ndays, freq='d')
    frame_all = collect_all_days(name + '/', days, 
                                cutoff_area=cutoff_area, parse_filename=parse_filename,
                                find_cumtime=find_cumtime, col_names=col_names)

    # Sort, then calculate displacement and zeitbeger time
    frame_all = frame_all.sort_values('Time')
    frame_all['Displacement'] = get_displacement(frame_all)
    frame_all['ZeitTime'] = get_zeit(frame_all, start_time_str=start_day_str + start_time_str)
    frame_all['Crosses'] = get_crossing(frame_all, xline=xline)
 
    # Make plots of (1) all displacement points (2) binned activity, 20 minutes
    fig, ax = plt.subplots(1, 3, figsize=(18,10))
    plot_all_data(frame_all, ax[0])

    activity = aggregate_activity(frame_all, time=time, unit=unit, 
                                  plot=True, axis=(ax[1], ax[2]), save=save, name=name)
    
    # Save the frame when instructed
    if save:
        activity['Animal'] = name
        activity['Bin duration - ' + unit ] = time
        activity['Start Time'] = pd.to_datetime(start_day_str + start_time_str)
        save_filename = 'Binned_results/' + name + '_binned_20min_073117.csv'
        if not os.path.isfile(save_filename):
            activity.to_csv(save_filename)
        else:
            raise IOError('File exists, cannot overwrite...')
            
    return (frame_all, activity)

Functions for trajectory and heat-map plotting


In [17]:
def plot_trajectory(frame, start_time_str, end_time_str, return_fr=False):
    '''
    Given a frame, plot the animal's trajectory during the specified period
    Returns the frame used for plotting
    '''
    start_time = pd.to_datetime(start_time_str)
    end_time = pd.to_datetime(end_time_str)
    frame_interest = frame[(frame.Time > start_time) & (frame.Time < end_time)]
    plt.plot(frame_interest.x, frame_interest.y)
    
    if return_fr:
        return frame_interest
    
def plot_heatmap(frame, start_time_str, end_time_str, return_fr=False, sigma=1, bins=100):
    '''
    Given a frame, plot the animal's trajectory during the specified period
    Returns the frame used for plotting
    '''
    start_time = pd.to_datetime(start_time_str)
    end_time = pd.to_datetime(end_time_str)
    frame_interest = frame[(frame.Time > start_time) & (frame.Time < end_time)]
    
    heatmap, x_edges, y_edges  = np.histogram2d(frame_interest.x, frame_interest.y, 
                                  weights=frame_interest.Interval, bins=bins)
    
    # Apply a Gaussian filter to the heatmap
    heatmap_filt = scipy.ndimage.filters.gaussian_filter(heatmap, sigma=sigma)
    
    
    extent = [x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]]
    
    
    with sns.axes_style("dark"):
        plt.imshow(heatmap_filt.T, extent=extent, cmap='jet')
        
    if return_fr:
        return heatmap

In [7]:
fr, activity = process_animal('2CW100', '9/25/2016', ndays=5, save=False)


2CW100/0925
2CW100/0926
2CW100/0927
2CW100/0928
2CW100/0929

In [23]:
hm = plot_heatmap(fr, '9/29/2016 10:00AM', '9/29/2016 4:00PM', return_fr=False, bins=100, sigma=1)



In [ ]:
process_animal('2CW305', '7/14/2016', ndays=4, parse_filename=True, save=True)

In [ ]:
process_animal('2CW318', '6/28/2016', ndays=4, save=True, parse_filename=True, find_cumtime=False)

In [ ]:
process_animal('LB21', '7/1/2016', ndays=5, save=True, parse_filename=True)

In [ ]:
process_animal('2CW312', '7/9/2016', ndays=6, save=True, parse_filename=True)

In [ ]:
process_animal('LB15', '7/11/2016', ndays=4, save=True)

In [ ]:
process_animal('2CW306', '7/19/2016', ndays=4, save=True)

In [ ]:
process_animal('2CW308', '7/14/2016', ndays=4, save=True)

In [ ]:
process_animal('LB06', '7/18/2016', ndays=3, save=True)

In [ ]:
process_animal('Or130', '7/22/2016', ndays=7, save=True)

In [ ]:
process_animal('Or114', '7/26/2016', ndays=9, save=True)

In [ ]:
process_animal('2CW317', '7/20/2016', ndays=6, save=True)

In [ ]:
process_animal('Z1Pi229', '8/3/2016', ndays=15, save=True)

In [ ]:
process_animal('Z1Pi230', '8/3/2016', ndays=13, save=True)

In [ ]:
process_animal('LB2', '9/19/2016', ndays=6, save=True)

In [ ]:
process_animal('LB11', '8/22/2016', ndays=9, save=True)

In [ ]:
process_animal('Or128', '8/22/2016', ndays=9, save=True)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: