In [46]:
%matplotlib inline
In [47]:
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
In [48]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as num
sns.set(color_codes=True)
import logging
from collections import Counter, OrderedDict
In [49]:
import cookey
from cookey import keyboard
from cookey import util
from cookey.typist import Typist
from cookey.util import get_logger
In [50]:
typists = OrderedDict([
(layout_name, Typist(layout)) for layout_name,layout in [
('QWERTY', keyboard.QWERTY),
('Simplified Dvorak', keyboard.SDVORAK),
('Programmers\' Dvorak', keyboard.PDVORAK),
('Drunken Dvorak', keyboard.DDVORAK),
('Colemak', keyboard.COLEMAK),
('Workman', keyboard.WORKMAN),
('Programmers\' Workman', keyboard.PWORKMAN),
('Norman', keyboard.NORMAN),
]
])
In [51]:
def type_all_files(typists, directory='.', patterns=None):
if patterns is None:
patterns = ['*.c', '*.h', '*.py', '*.txt', '*.rst']
chars = [0 for _ in range(256)]
charcounter = Counter()
for filename in util.find_files(directory, patterns):
file = ""
with open(filename, 'r+b') as f:
while True:
bb = f.read(8192*10)
if not bb:
break
file += bb.decode('ascii', errors='ignore')
for typist in typists.values():
typist.type(file)
for char in file:
try:
chars[ord(char)] += 1
except IndexError:
pass # We're not going to worry about non-ascii chars for now
charcounter += Counter({
chr(char):chars[char] for char in range(256)
})
total_chars = sum(charcounter.values())
return (total_chars, charcounter)
In [52]:
%time
num_chars, charcounter = type_all_files(typists, directory='../yapf')
In [81]:
char_count = pd.DataFrame(pd.Series(charcounter), columns=['count'])
char_count['percent'] = char_count['count'].apply(lambda c: round(c/num_chars*100, 3))
char_count_plot = char_count[char_count['percent'] > 1.0]['count'].plot(kind='bar', figsize=(15,10), fontsize=17)
char_count_plot.set_title("Character Occurrences (Total: {})".format(num_chars), fontsize=17)
char_count_plot.set_xlabel("Character")
char_count_plot.set_xlabel("Percent of occurrences")
Out[81]:
In [61]:
typist_stats = []
for layout_name,typist in typists.iteritems():
entry = {'layout': layout_name}
typist_stats.append(entry)
keystrokes, finger_usage, line_usage, hand_usage, distance, pedalling_coef, nsf_coef = typist.get_stats()
entry['keystrokes'] = keystrokes
entry['pedalling_coef'] = pedalling_coef
entry['nsf_coef'] = nsf_coef
r_num, r_top, r_home, r_bot = line_usage
entry['home_row_pct'] = r_home
entry['top_row_pct'] = r_top
entry['bot_row_pct'] = r_bot
entry['num_row_pct'] = r_num
entry['left_hand'], entry['right_hand'] = hand_usage
lp, lr, lm, li, lt, rt, ri, rm, rr, rp = finger_usage
entry['left_pinky'] = lp
entry['left_ring'] = lr
entry['left_middle'] = lm
entry['left_index'] = li
entry['left_thumb'] = lt
entry['right_thumb'] = rt
entry['right_index'] = ri
entry['right_middle'] = rm
entry['right_ring'] = rr
entry['right_pinky'] = rp
entry['total_distance'] = sum(distance)
layout_stats = pd.DataFrame(typist_stats).set_index('layout')
layout_stats#[['keystrokes']]
Out[61]:
In [82]:
ks = layout_stats['keystrokes']
layout_stats['ks_pct'] = layout_stats['keystrokes'].apply(lambda c: round((c-ks.max())*100.0/ks.max(),2))
layout_stats[['ks_pct']]#.plot(kind='bar', fontsize=17).set_title("Keystroke Difference (Minimum: {})".format(ks.min()))
Out[82]:
In [74]:
layout_stats[['home_row_pct']].plot(kind='bar', figsize=(15,10), fontsize=17).set_title("Home row usage", fontsize=17)
Out[74]:
In [76]:
layout_stats[['top_row_pct', 'bot_row_pct', 'num_row_pct']].plot(kind='bar', figsize=(15,10), fontsize=17).set_title("Other row usage", fontsize=17)
Out[76]:
In [78]:
layout_stats[['left_pinky','left_ring', 'left_middle', 'left_index']].plot(kind='bar', figsize=(15,10), fontsize=17).set_title("Left Hand Finger Usage", fontsize=17)
Out[78]:
In [80]:
layout_stats[['right_index', 'right_middle', 'right_ring', 'right_pinky']].plot(kind='bar', figsize=(15,10), fontsize=17).set_title("Right Hand Finger Usage", fontsize=17)
Out[80]:
In [84]:
td = layout_stats['total_distance']
max_d = td.max()
layout_stats['dist_gain'] = td.apply(lambda c: round(c/max_d*100,2))
layout_stats[['dist_gain', 'pedalling_coef', 'nsf_coef']].sort('dist_gain', ascending=False) #.plot(kind='barh', figsize=(15,10))
Out[84]:
In [ ]: