In [30]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from matplotlib import pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns
import json
%run -i nb_analysis # Run instead of import so we can access 'df' defined in notebook
%matplotlib inline
In [86]:
df = pd.read_json('combined.json', orient='records')
df['tabsActivated']
Out[86]:
In [53]:
#pd.DataFrame(df, columns=['tabsActivated'])
#df.info() # Column information
df.info()
In [105]:
#df['name'][2],df['state'][2], df['tabsOpen'][2], df['name'][3], df['state'][3], df['tabsOpen'][3]
#[df['tabsOpen'][index].keys() for index in df.index]
#df['tabsActivated'][0]['index'][23:26], df['tabsActivated'][0]['numOpenTabs'][23:26], df['tabsActivated'][0]['id'][23:26]
Out[105]:
In [ ]:
# URLs visited vs URLs revisited
In [54]:
# Tab switching over time
fig = plt.figure(figsize=(16, 8)) # Larger figure
nrows, ncols = len(df.index) / 2, 2
for index in df.index:
x, y = nestedxy(index=index, maincol='tabsActivated', xcol='time', ycol='index')
plt.subplot(nrows, ncols, index + 1)
plt.hold(True)
plt.plot(x, y, 'ro', markersize=3)
plt.plot(x, y)
plt.xlabel('Time (s)')
plt.ylabel('Active tab index')
plt.title('Run %d (%s)' % (index, df['state'][index]))
plt.autoscale(tight='x')
plt.tight_layout()
In [135]:
from scipy.stats import ttest_ind, ttest_rel
# Number of tab switches per second (ON vs OFF)
on, off = [], []
for index in df.index:
state = df['state'][index]
n_switches = len(df['tabsActivated'][index]['index'])
last_switch_time = df['tabsActivated'][index]['time'][-1]
first_switch_time = df['tabsActivated'][index]['time'][0]
#duration = (df['endTime'][index] - df['startTime'][index]) / 1000. # ms -> s
duration = last_switch_time - first_switch_time
duration /= 1000. # ms -> s
switches_per_second = n_switches / duration
if state == 'ON':
on.append(switches_per_second)
else:
off.append(switches_per_second)
on = np.array(on)
off = np.array(off)
print on
print off
print "Independent: ", ttest_ind(on, off, equal_var=False)
print "Dependent: ", ttest_rel(on, off)
In [122]:
print df['state']
print df['when']
In [107]:
# Tab switching over time
fig = plt.figure(figsize=(16, 8)) # Larger figure
nrows, ncols = len(df.index) / 2, 2
for index in df.index:
x, y = nestedxy(index=index, maincol='tabsActivated', xcol='time', ycol='index')
_, y_open = nestedxy(index=index, maincol='tabsActivated', xcol='time', ycol='numOpenTabs')
x_diff = x[1:]
y_diff = [(y[i+1] - y[i]) % (y_open[i+1]) for i in range(len(y) - 1)] # Difference in tab index
plt.subplot(nrows, ncols, index + 1)
plt.hold(True)
plt.plot(x, y_open, 'g--')
plt.plot(x_diff, y_diff, 'ro', markersize=3)
plt.plot(x_diff, y_diff)
plt.xlabel('Time (s)')
plt.ylabel('Tab index difference')
plt.title('Run %d (%s)' % (index, df['state'][index]))
plt.autoscale(tight='x')
plt.tight_layout()
In [124]:
x0, y0 = nestedxy(index=0, maincol='tabsActivated', xcol='time', ycol='numOpenTabs')
x4, y4 = nestedxy(index=4, maincol='tabsActivated', xcol='time', ycol='numOpenTabs')
plt.hold(True)
plt.plot(x0, y0, 'b')
plt.plot(x4, y4, 'g')
plt.hold(False)
In [97]:
# Tab marking over time
fig = plt.figure(figsize=(16, 8)) # Larger figure
nrows, ncols = len(df.index) / 2, 2
for index in df.index:
if index % 2 == 1:
continue
x, y = nestedxy(index=index, maincol='tabsMarked', xcol='time', ycol='numMarkedTabs')
_, y1 = nestedxy(index=index, maincol='tabsMarked', xcol='time', ycol='numOpenTabs')
plt.subplot(nrows, ncols, index + 1)
plt.hold(True)
plt.plot(x, y, 'ro', markersize=3)
plt.plot(x, y)
plt.plot(x, y1, 'g--', markersize=3)
plt.xlabel('Time (s)')
plt.ylabel('Number of marked tabs')
plt.title('Run %d (%s)' % (index, df['state'][index]))
plt.autoscale(tight='x')
plt.tight_layout()
In [91]:
# Open over time, with and without extension
fig = plt.figure(figsize=(16, 8)) # Larger figure
nrows, ncols = len(df.index) / 2, 2
for index in df.index:
if index % 2 == 1:
continue
state0 = df['state'][index]
x0, y0 = nestedxy(index=index, maincol='tabsOpen', xcol='time', ycol='numOpenTabs')
state1 = df['state'][index+1]
x1, y1 = nestedxy(index=index+1, maincol='tabsOpen', xcol='time', ycol='numOpenTabs')
plt.subplot(nrows, ncols, index + 1)
plt.hold(True)
# ON: blue, OFF: red
color0 = 'b' if state0 == 'ON' else 'r'
color1 = 'b' if state1 == 'ON' else 'r'
plt.plot(x0, y0, color0)
plt.plot(x1, y1, color1)
#plt.plot(x, y1, 'g--', markersize=3)
plt.xlabel('Time (s)')
plt.ylabel('Number of open tabs')
plt.title('Run %d (%s)' % (index, df['state'][index]))
plt.autoscale(tight='x')
plt.tight_layout()
In [61]:
# URL revisitation; info missing in runs where older version that did not log this was used
nrows, ncols = len(df.index) / 2, 2
x0, y0 = scalarxy(ycol='urlsVisited')
x1, y1 = scalarxy(ycol='urlsRevisited')
plt.plot(x0, y0, label='URLs visited')
plt.plot(x1, y1, label='URLs revisited')
annotate(x=x0, y=y0) # Show y values
annotate(x=x1, y=y1)
plt.autoscale(tight='x')
plt.tight_layout()
plt.legend()
Out[61]: