In [1]:
%matplotlib inline
from stemgraphic.alpha import stem_text
from stemgraphic.stopwords import VOWELS, EN
In [2]:
source = '../datasets/A Case of Identity by Arthur Conan Doyle.txt'
In [3]:
stem_text(source, column=VOWELS, display=250);
In [4]:
stem_text(source, caps=False, display=750, reverse=False, stop_words=EN, legend_pos=None);
In [5]:
# looking at words in reverse. 'word' converts to bigram 'dr', stem 'd', leaf 'r'
stem_text(source, caps=False, display=750, reverse=True, stop_words=EN, legend_pos=None);
In [6]:
rows, hm, df = stem_text(source, break_on='m', caps=False, display=1200, random_state=120,
rows_only=False, sort_by='alpha', stop_words=EN);
o followed by an apostrophe (’). Irish name?
In [7]:
df[df.word.str[:2]=='o’']
Out[7]:
Ah, o’clock. That explains it.