In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import os
import scipy.ndimage.filters
import string
%matplotlib inline
In [2]:
def get_hour(item):
return item.hour
def get_day_frame(directory, date, remove_zeros=True, cutoff_area=10000, dropna_thres=4,
parse_filename=False, find_cumtime=True,
col_names = ['Interval', 'x', 'y', 'Area', 'Hour', 'Minute', 'Second']):
"""
Returns the day frame for all files in the directory
date: (month, date, year) tuple, for adding the relevant columns
parse_filename: set True for files without timestamp, will infer timestamps from filename
find_cumtime: set False for files that have intervals that are cumulative instead of dt
"""
files = glob.glob(directory + '/*.csv')
if files == []:
return None
frames = []
for filename in files:
obs = pd.read_csv(filename, header=None, names=col_names)
# Remove nan columns
obs.dropna(thresh=dropna_thres)
# Parse filename to get time of the record
if parse_filename:
components = string.split(filename, '_')
timestr = string.join(components[1:], ':')[:-4]
endtime = pd.to_datetime(timestr)
if find_cumtime:
cum_time = np.cumsum(obs.Interval)
else:
cum_time = obs.Interval
cum_time_delta = pd.to_timedelta(cum_time, unit='s')
starttime = endtime - cum_time_delta[obs.shape[0] - 1]
obs['Time'] = starttime + cum_time_delta
frames.append(obs)
# Concatenate readings from all files
day_frame = pd.concat(frames)
day_frame['Day'] = date[1]
day_frame['Month'] = date[0]
day_frame['Year'] = date[2]
#from IPython.core.debugger import Tracer; Tracer()()
if not parse_filename:
day_frame['Time'] = pd.to_datetime(day_frame[['Day','Month', 'Year','Hour','Minute','Second']])
# Light hour is from 9am to 7pm
day_frame['Light'] = (day_frame.Time.apply(get_hour) >= 7) & (day_frame.Time.apply(get_hour) <= 21)
# Remove zeros if specified
if remove_zeros:
day_frame = day_frame[(day_frame['x'] != 0) & (day_frame['y'] != 0)]
# Remove entries which exceed a cutoff area
if cutoff_area != None:
day_frame = day_frame[day_frame.Area < cutoff_area]
return day_frame
def collect_all_days(parent_dir, days, remove_zeros=True, cutoff_area=10000, dropna_thres=4,
parse_filename=False, find_cumtime=True,
col_names = ['Interval', 'x', 'y', 'Area', 'Hour', 'Minute', 'Second']):
'''
------------------------
Inputs:
parent_dir: string indicating parent directory
days: list of recording days
------------------------
Output:
A pd frame for all observations of that animal
------------------------
'''
frames = []
for day in days:
#datestr = str(day[0]) + '/' + str(day[1]) + '/' + str(day[2])
#datetime = pd.to_datetime(datestr)
directory = parent_dir + day.strftime('%m%d') #+ '_proc/'
print directory
if os.path.isdir(directory):
day_frame = get_day_frame(directory, (day.month, day.day, day.year),
remove_zeros, cutoff_area, dropna_thres,
parse_filename, find_cumtime, col_names)
frames.append(day_frame)
else:
print ' Directory does not exist'
return pd.concat(frames)
In [3]:
def get_displacement(day_frame):
'''
Get displacement from x and y coords
'''
dx = np.diff(day_frame['x'])
dy = np.diff(day_frame['y'])
displacement = np.sqrt(dx * dx + dy * dy)
# Diff returns n-1 values, so concatenate a 0 behind and make a new column
return np.concatenate((displacement, [0]))
def get_zeit(day_frame, start_time_str=None, use_time_field=False):
'''
Compute the zeitgeber time, starting from 9am, in unit of hours
'''
timeseries = day_frame.Time
if start_time_str == None:
starttime = timeseries[0]
else:
starttime = pd.to_datetime(start_time_str)
return (timeseries - starttime) / pd.Timedelta(1, unit='h')
def get_crossing(day_frame, xline=160):
'''
Compute the time of crossings across the specified xline
Will return an np array with T being the time of crossing
'''
leftright = day_frame.x < 160
diff = np.diff(leftright)
return np.concatenate((diff, [0]))
def plot_all_data(frame, axis):
'''
Make a plot of all displacement values
'''
axis.plot(frame['ZeitTime'], frame['Displacement'], '.', alpha=0.3)
axis.fill_between(frame.ZeitTime, 0, axis.get_ylim()[1],
where=~frame.Light, color='gray', alpha=0.3)
axis.set_xlabel('Zeitbeger time (h)')
axis.set_ylabel('Displacement (pix)')
def ecdf(data):
return np.sort(data), np.arange(len(data)) / float(len(data))
def aggregate_activity(day_frame, time, unit, axis=None, plot=False,
name='Bird', save=False):
"""
Unit: either 'Hour', 'Minute', or 'Second'
If plot set to True, also plot the activity
Returns a data frame which is the total distance travelled in time * unit time specified
"""
if unit == 'Hour':
scale = 3600
elif unit == 'Minute':
scale = 60
elif unit == 'Second':
scale = 1
else:
raise ValueError('Invalid time unit')
group_duration = time * scale # Number of seconds to group
SECS_PER_DAY = 3600 * 24
# ZeitTime is measured in hours, partitioned into groups of
# specified group_duration
group = (day_frame['ZeitTime'] / (group_duration/3600.0)).astype('int')
agg_displacement = day_frame[['Displacement', 'Crosses']].groupby(group)
agg_sum_grouped = agg_displacement.sum().reset_index()
# Create data frame with day information
agg_sum_grouped['Day'] = (agg_sum_grouped['ZeitTime'] / (SECS_PER_DAY / group_duration)).astype('int')
agg_sum_grouped['Period'] = agg_sum_grouped['ZeitTime'] % int(SECS_PER_DAY / group_duration)
# Produce a plot if instructed
if plot:
for day in agg_sum_grouped.Day.unique():
activity_day = agg_sum_grouped.Displacement[agg_sum_grouped.Day == day]
crosses_day = agg_sum_grouped.Crosses[agg_sum_grouped.Day == day]
time_in_day = agg_sum_grouped.Period[agg_sum_grouped.Day == day] /3
axis[0].plot(time_in_day, activity_day)
axis[1].plot(time_in_day, crosses_day)
axis[0].set_xlabel('Time (hour)')
axis[0].set_ylabel('Total distance in 20 minutes')
axis[1].set_xlabel('Time (hour)')
axis[1].set_ylabel('Crosses in 20 minutes')
return agg_sum_grouped
In [4]:
def process_animal(name, start_day_str, start_time_str=' 7:00AM', ndays=5,
time=20, unit='Minute', save=False,
cutoff_area=None, parse_filename=False, find_cumtime=True,
col_names = ['Interval', 'x', 'y', 'Area', 'Hour', 'Minute', 'Second'],
xline=160):
'''
Perform the analysis procedure for the specified animal
name: string for name of animal, will read from folder of the same name
start_day_str: string that represents start day, e.g. '9/25/2016'
periods: number of days to read
save: indicates whether the final frame should be saved to a .csv
'''
days = pd.date_range(start_day_str, periods=ndays, freq='d')
frame_all = collect_all_days(name + '/', days,
cutoff_area=cutoff_area, parse_filename=parse_filename,
find_cumtime=find_cumtime, col_names=col_names)
# Sort, then calculate displacement and zeitbeger time
frame_all = frame_all.sort_values('Time')
frame_all['Displacement'] = get_displacement(frame_all)
frame_all['ZeitTime'] = get_zeit(frame_all, start_time_str=start_day_str + start_time_str)
frame_all['Crosses'] = get_crossing(frame_all, xline=xline)
# Make plots of (1) all displacement points (2) binned activity, 20 minutes
fig, ax = plt.subplots(1, 3, figsize=(18,10))
plot_all_data(frame_all, ax[0])
activity = aggregate_activity(frame_all, time=time, unit=unit,
plot=True, axis=(ax[1], ax[2]), save=save, name=name)
# Save the frame when instructed
if save:
activity['Animal'] = name
activity['Bin duration - ' + unit ] = time
activity['Start Time'] = pd.to_datetime(start_day_str + start_time_str)
save_filename = 'Binned_results/' + name + '_binned_20min_073117.csv'
if not os.path.isfile(save_filename):
activity.to_csv(save_filename)
else:
raise IOError('File exists, cannot overwrite...')
return (frame_all, activity)
In [17]:
def plot_trajectory(frame, start_time_str, end_time_str, return_fr=False):
'''
Given a frame, plot the animal's trajectory during the specified period
Returns the frame used for plotting
'''
start_time = pd.to_datetime(start_time_str)
end_time = pd.to_datetime(end_time_str)
frame_interest = frame[(frame.Time > start_time) & (frame.Time < end_time)]
plt.plot(frame_interest.x, frame_interest.y)
if return_fr:
return frame_interest
def plot_heatmap(frame, start_time_str, end_time_str, return_fr=False, sigma=1, bins=100):
'''
Given a frame, plot the animal's trajectory during the specified period
Returns the frame used for plotting
'''
start_time = pd.to_datetime(start_time_str)
end_time = pd.to_datetime(end_time_str)
frame_interest = frame[(frame.Time > start_time) & (frame.Time < end_time)]
heatmap, x_edges, y_edges = np.histogram2d(frame_interest.x, frame_interest.y,
weights=frame_interest.Interval, bins=bins)
# Apply a Gaussian filter to the heatmap
heatmap_filt = scipy.ndimage.filters.gaussian_filter(heatmap, sigma=sigma)
extent = [x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]]
with sns.axes_style("dark"):
plt.imshow(heatmap_filt.T, extent=extent, cmap='jet')
if return_fr:
return heatmap
In [7]:
fr, activity = process_animal('2CW100', '9/25/2016', ndays=5, save=False)
In [23]:
hm = plot_heatmap(fr, '9/29/2016 10:00AM', '9/29/2016 4:00PM', return_fr=False, bins=100, sigma=1)
In [ ]:
process_animal('2CW305', '7/14/2016', ndays=4, parse_filename=True, save=True)
In [ ]:
process_animal('2CW318', '6/28/2016', ndays=4, save=True, parse_filename=True, find_cumtime=False)
In [ ]:
process_animal('LB21', '7/1/2016', ndays=5, save=True, parse_filename=True)
In [ ]:
process_animal('2CW312', '7/9/2016', ndays=6, save=True, parse_filename=True)
In [ ]:
process_animal('LB15', '7/11/2016', ndays=4, save=True)
In [ ]:
process_animal('2CW306', '7/19/2016', ndays=4, save=True)
In [ ]:
process_animal('2CW308', '7/14/2016', ndays=4, save=True)
In [ ]:
process_animal('LB06', '7/18/2016', ndays=3, save=True)
In [ ]:
process_animal('Or130', '7/22/2016', ndays=7, save=True)
In [ ]:
process_animal('Or114', '7/26/2016', ndays=9, save=True)
In [ ]:
process_animal('2CW317', '7/20/2016', ndays=6, save=True)
In [ ]:
process_animal('Z1Pi229', '8/3/2016', ndays=15, save=True)
In [ ]:
process_animal('Z1Pi230', '8/3/2016', ndays=13, save=True)
In [ ]:
process_animal('LB2', '9/19/2016', ndays=6, save=True)
In [ ]:
process_animal('LB11', '8/22/2016', ndays=9, save=True)
In [ ]:
process_animal('Or128', '8/22/2016', ndays=9, save=True)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: