In [1]:
# ignore twython library missing, we aren't using it's functionality
# Must use nltk.download() and get the Opinion Lexicon and Vader Lexicon
from nlp import *
In [ ]:
text = """I had my suspicions in early 2003. Who wouldn't? That is why when my wife, Stacy, out on her daily walk yesterday, I texted my Stacy that I was going shopping and I would be back soon."""
tokens = tokenize(text)
tok = tokenizeFindAllRegex(r"""([A-Za-z0-9&]+[']?[A-Za-z]?)""")
tokens = tok(text)
tagged = pos(tokens)
tokens
In [ ]:
lemmatize(tagged)
In [ ]:
tokenSuffixes(tokens)
dict(freq(tokenSuffixes(tokens)))
In [ ]:
dict(freq(grams(tokens, 1)))
In [ ]:
posOnly = posTagOnly(tagged)
freq(grams(posOnly, 3))
In [ ]:
syllableGrams(tokens, 3)
In [ ]:
vowelGrams(tokens, 3)
In [ ]:
ull = upperLowerLen(tokens)
In [ ]:
capLetterFreq(ull)
In [ ]:
cases = wordCases(ull)
In [ ]:
freq(grams(cases, 3))
In [ ]:
sentimentGrams([tokens[:int(len(tokens)/2)]])
sentimentGrams([tokens[int(len(tokens)/2):]])
sentimentGrams(ngrams(tokens, 3))
In [ ]:
sentimentGrams([tokens])
In [ ]:
chunked = chunk(tagged)
removeNamedEntities(chunked, removeNumbers=True)
In [ ]:
removeNamedEntities(chunked, removeNumbers=False)
In [ ]:
punctuationFeatures(text)
In [ ]:
In [11]:
cleanTokensReddit("/u/dog said to me that he's the funniest guy ever.~")
Out[11]:
In [ ]: