Convert strings into keypress sequences

A huge fraction of typos arise purely due to pressing wrong key in the keyboard. This module converts any string into a keypress sequence. Currently it only supports 'US' keyboard.


In [4]:
from word2keypress import distance, Keyboard

In [17]:
kb = Keyboard(u'US') # making unicode is mandatory (weird Cython)
kseq = kb.word_to_keyseq('Password')
print "\nRaw sequence:", repr(kseq)

print "\nReadable sequence:", repr(kb.print_keyseq(kseq))

print "\nkeyseq->word:", kb.keyseq_to_word(kseq)


Raw sequence: u'\x03password'

Readable sequence: u'<s>password'

keyseq->word: Password

In [19]:
print "All typos of 'Password' (keseq edit distance 1)"
print list(kb.word_to_typos('Password'))[:10]


All typos of 'Password' (keseq edit distance 1)
[u'`Password', u'1Password', u'2Password', u'3Password', u'4Password', u'5Password', u'6Password', u'7Password', u'8Password', u'9Password']

In [20]:
word_pairs = [
    ('password', 'PASSWORD'),
    ('Password', 'password'),
    ('Password', 'PASSWORD'),
    ('Password1', 'Password!'),
    ('pASSWORD', 'Password'),  # This is not good!                                                                                                                       
    ('P@ssword', 'Password')]
for w1, w2 in word_pairs:
    assert distance(w1, w2)<3


password <c>password
<s>password password
<s>password <c>password
<s>password1 <s>password<s>1
p<c>assword <s>password
<s>p<s>2ssword <s>password