In [1]:
import nltk

In [2]:
# Sample Text
text = """If I could be a superhero, just for the day,
I would want to be Supergirl, in every way.
She’s the young cousin of Superman with long golden locks,
But don’t let that fool you because she’s tougher than rocks.

Her powers consist of flying with speed,
To the moon, around the world or wherever the need.
She can hear a pin drop or the beat of a human’s heart,
Not to mention the faintest whisper, oh how very smart!

In addition to mind control, Supergirl’s vision is x-ray
She also has eyes that generate heat without delay.
Just like her cousin, she has her weakness too,
Kryptonite, oh Kryptonite. There’s only one and not two.

So why would I want to be this superhero for the day?
Well, that’s easy, I will tell you. So listen to what I say.
Bullying has become a major problem everywhere we turn.
Our teachers discuss the issue, but there is more that we need to learn.

Throughout the school halls and at the lunchroom tables,
Students are teased or pushed, and fighting back, well they aren’t able.
As Supergirl I would stop all this nonsense,
By using my powers to aid in every victim’s defense.

Throughout the day, I would listen for the negative chatter
And change each bully’s insults to words that matter.
Before the first punch is thrown or a foot trips another,
I would zap the tormentor’s behind with heat until he calls his mother.

It’s too bad I can’t be this superhero for longer,
It will take more than a day to help bullied victims to become stronger.
The truth is that no one deserves this cruel and hateful treatment
Everyone deserves happiness and that should be a unanimous agreement."""

In [3]:
# Tokenize Sentences
from nltk import sent_tokenize

In [4]:
sent_tokenize(text)


Out[4]:
['If I could be a superhero, just for the day,\nI would want to be Supergirl, in every way.',
 'She’s the young cousin of Superman with long golden locks,\nBut don’t let that fool you because she’s tougher than rocks.',
 'Her powers consist of flying with speed,\nTo the moon, around the world or wherever the need.',
 'She can hear a pin drop or the beat of a human’s heart,\nNot to mention the faintest whisper, oh how very smart!',
 'In addition to mind control, Supergirl’s vision is x-ray\nShe also has eyes that generate heat without delay.',
 'Just like her cousin, she has her weakness too,\nKryptonite, oh Kryptonite.',
 'There’s only one and not two.',
 'So why would I want to be this superhero for the day?',
 'Well, that’s easy, I will tell you.',
 'So listen to what I say.',
 'Bullying has become a major problem everywhere we turn.',
 'Our teachers discuss the issue, but there is more that we need to learn.',
 'Throughout the school halls and at the lunchroom tables,\nStudents are teased or pushed, and fighting back, well they aren’t able.',
 'As Supergirl I would stop all this nonsense,\nBy using my powers to aid in every victim’s defense.',
 'Throughout the day, I would listen for the negative chatter\nAnd change each bully’s insults to words that matter.',
 'Before the first punch is thrown or a foot trips another,\nI would zap the tormentor’s behind with heat until he calls his mother.',
 'It’s too bad I can’t be this superhero for longer,\nIt will take more than a day to help bullied victims to become stronger.',
 'The truth is that no one deserves this cruel and hateful treatment\nEveryone deserves happiness and that should be a unanimous agreement.']

In [5]:
# List of Words
from nltk import word_tokenize

In [6]:
word_tokenize(text)


Out[6]:
['If',
 'I',
 'could',
 'be',
 'a',
 'superhero',
 ',',
 'just',
 'for',
 'the',
 'day',
 ',',
 'I',
 'would',
 'want',
 'to',
 'be',
 'Supergirl',
 ',',
 'in',
 'every',
 'way',
 '.',
 'She',
 '’',
 's',
 'the',
 'young',
 'cousin',
 'of',
 'Superman',
 'with',
 'long',
 'golden',
 'locks',
 ',',
 'But',
 'don',
 '’',
 't',
 'let',
 'that',
 'fool',
 'you',
 'because',
 'she',
 '’',
 's',
 'tougher',
 'than',
 'rocks',
 '.',
 'Her',
 'powers',
 'consist',
 'of',
 'flying',
 'with',
 'speed',
 ',',
 'To',
 'the',
 'moon',
 ',',
 'around',
 'the',
 'world',
 'or',
 'wherever',
 'the',
 'need',
 '.',
 'She',
 'can',
 'hear',
 'a',
 'pin',
 'drop',
 'or',
 'the',
 'beat',
 'of',
 'a',
 'human',
 '’',
 's',
 'heart',
 ',',
 'Not',
 'to',
 'mention',
 'the',
 'faintest',
 'whisper',
 ',',
 'oh',
 'how',
 'very',
 'smart',
 '!',
 'In',
 'addition',
 'to',
 'mind',
 'control',
 ',',
 'Supergirl',
 '’',
 's',
 'vision',
 'is',
 'x-ray',
 'She',
 'also',
 'has',
 'eyes',
 'that',
 'generate',
 'heat',
 'without',
 'delay',
 '.',
 'Just',
 'like',
 'her',
 'cousin',
 ',',
 'she',
 'has',
 'her',
 'weakness',
 'too',
 ',',
 'Kryptonite',
 ',',
 'oh',
 'Kryptonite',
 '.',
 'There',
 '’',
 's',
 'only',
 'one',
 'and',
 'not',
 'two',
 '.',
 'So',
 'why',
 'would',
 'I',
 'want',
 'to',
 'be',
 'this',
 'superhero',
 'for',
 'the',
 'day',
 '?',
 'Well',
 ',',
 'that',
 '’',
 's',
 'easy',
 ',',
 'I',
 'will',
 'tell',
 'you',
 '.',
 'So',
 'listen',
 'to',
 'what',
 'I',
 'say',
 '.',
 'Bullying',
 'has',
 'become',
 'a',
 'major',
 'problem',
 'everywhere',
 'we',
 'turn',
 '.',
 'Our',
 'teachers',
 'discuss',
 'the',
 'issue',
 ',',
 'but',
 'there',
 'is',
 'more',
 'that',
 'we',
 'need',
 'to',
 'learn',
 '.',
 'Throughout',
 'the',
 'school',
 'halls',
 'and',
 'at',
 'the',
 'lunchroom',
 'tables',
 ',',
 'Students',
 'are',
 'teased',
 'or',
 'pushed',
 ',',
 'and',
 'fighting',
 'back',
 ',',
 'well',
 'they',
 'aren',
 '’',
 't',
 'able',
 '.',
 'As',
 'Supergirl',
 'I',
 'would',
 'stop',
 'all',
 'this',
 'nonsense',
 ',',
 'By',
 'using',
 'my',
 'powers',
 'to',
 'aid',
 'in',
 'every',
 'victim',
 '’',
 's',
 'defense',
 '.',
 'Throughout',
 'the',
 'day',
 ',',
 'I',
 'would',
 'listen',
 'for',
 'the',
 'negative',
 'chatter',
 'And',
 'change',
 'each',
 'bully',
 '’',
 's',
 'insults',
 'to',
 'words',
 'that',
 'matter',
 '.',
 'Before',
 'the',
 'first',
 'punch',
 'is',
 'thrown',
 'or',
 'a',
 'foot',
 'trips',
 'another',
 ',',
 'I',
 'would',
 'zap',
 'the',
 'tormentor',
 '’',
 's',
 'behind',
 'with',
 'heat',
 'until',
 'he',
 'calls',
 'his',
 'mother',
 '.',
 'It',
 '’',
 's',
 'too',
 'bad',
 'I',
 'can',
 '’',
 't',
 'be',
 'this',
 'superhero',
 'for',
 'longer',
 ',',
 'It',
 'will',
 'take',
 'more',
 'than',
 'a',
 'day',
 'to',
 'help',
 'bullied',
 'victims',
 'to',
 'become',
 'stronger',
 '.',
 'The',
 'truth',
 'is',
 'that',
 'no',
 'one',
 'deserves',
 'this',
 'cruel',
 'and',
 'hateful',
 'treatment',
 'Everyone',
 'deserves',
 'happiness',
 'and',
 'that',
 'should',
 'be',
 'a',
 'unanimous',
 'agreement',
 '.']

Stemming


In [7]:
from nltk.stem import PorterStemmer

In [8]:
tokenized_sentences = sent_tokenize(text)
porter_stemmer = PorterStemmer()

for i in range(len(tokenized_sentences)):
    tokenized_words = word_tokenize(tokenized_sentences[i])
    stem_words = [porter_stemmer.stem(word) for word in tokenized_words]
    tokenized_sentences[i] = ' '.join(stem_words)
print(tokenized_sentences)


['If I could be a superhero , just for the day , I would want to be supergirl , in everi way .', 'she ’ s the young cousin of superman with long golden lock , but don ’ t let that fool you becaus she ’ s tougher than rock .', 'her power consist of fli with speed , To the moon , around the world or wherev the need .', 'she can hear a pin drop or the beat of a human ’ s heart , not to mention the faintest whisper , oh how veri smart !', 'In addit to mind control , supergirl ’ s vision is x-ray she also ha eye that gener heat without delay .', 'just like her cousin , she ha her weak too , kryptonit , oh kryptonit .', 'there ’ s onli one and not two .', 'So whi would I want to be thi superhero for the day ?', 'well , that ’ s easi , I will tell you .', 'So listen to what I say .', 'bulli ha becom a major problem everywher we turn .', 'our teacher discuss the issu , but there is more that we need to learn .', 'throughout the school hall and at the lunchroom tabl , student are teas or push , and fight back , well they aren ’ t abl .', 'As supergirl I would stop all thi nonsens , By use my power to aid in everi victim ’ s defens .', 'throughout the day , I would listen for the neg chatter and chang each bulli ’ s insult to word that matter .', 'befor the first punch is thrown or a foot trip anoth , I would zap the tormentor ’ s behind with heat until he call hi mother .', 'It ’ s too bad I can ’ t be thi superhero for longer , It will take more than a day to help bulli victim to becom stronger .', 'the truth is that no one deserv thi cruel and hate treatment everyon deserv happi and that should be a unanim agreement .']

Lemmatization


In [9]:
from nltk.stem import WordNetLemmatizer

In [10]:
tokenized_sentences = sent_tokenize(text)
wordnet_lemmatizer = WordNetLemmatizer()

for i in range(len(tokenized_sentences)):
    tokenized_words = word_tokenize(tokenized_sentences[i])
    lemmatized_words = [wordnet_lemmatizer.lemmatize(word) for word in tokenized_words]
    tokenized_sentences[i] = ' '.join(lemmatized_words)
print(tokenized_sentences)


['If I could be a superhero , just for the day , I would want to be Supergirl , in every way .', 'She ’ s the young cousin of Superman with long golden lock , But don ’ t let that fool you because she ’ s tougher than rock .', 'Her power consist of flying with speed , To the moon , around the world or wherever the need .', 'She can hear a pin drop or the beat of a human ’ s heart , Not to mention the faintest whisper , oh how very smart !', 'In addition to mind control , Supergirl ’ s vision is x-ray She also ha eye that generate heat without delay .', 'Just like her cousin , she ha her weakness too , Kryptonite , oh Kryptonite .', 'There ’ s only one and not two .', 'So why would I want to be this superhero for the day ?', 'Well , that ’ s easy , I will tell you .', 'So listen to what I say .', 'Bullying ha become a major problem everywhere we turn .', 'Our teacher discus the issue , but there is more that we need to learn .', 'Throughout the school hall and at the lunchroom table , Students are teased or pushed , and fighting back , well they aren ’ t able .', 'As Supergirl I would stop all this nonsense , By using my power to aid in every victim ’ s defense .', 'Throughout the day , I would listen for the negative chatter And change each bully ’ s insult to word that matter .', 'Before the first punch is thrown or a foot trip another , I would zap the tormentor ’ s behind with heat until he call his mother .', 'It ’ s too bad I can ’ t be this superhero for longer , It will take more than a day to help bullied victim to become stronger .', 'The truth is that no one deserves this cruel and hateful treatment Everyone deserves happiness and that should be a unanimous agreement .']

Stop Words


In [11]:
from nltk.corpus import stopwords

In [12]:
tokenized_sentences = sent_tokenize(text)

for i in range(len(tokenized_sentences)):
    tokenized_words = word_tokenize(tokenized_sentences[i])
    stop_words = [word for word in tokenized_words if word not in stopwords.words('english')]
    tokenized_sentences[i] = ' '.join(stop_words)
print(tokenized_sentences)


['If I could superhero , day , I would want Supergirl , every way .', 'She ’ young cousin Superman long golden locks , But ’ let fool ’ tougher rocks .', 'Her powers consist flying speed , To moon , around world wherever need .', 'She hear pin drop beat human ’ heart , Not mention faintest whisper , oh smart !', 'In addition mind control , Supergirl ’ vision x-ray She also eyes generate heat without delay .', 'Just like cousin , weakness , Kryptonite , oh Kryptonite .', 'There ’ one two .', 'So would I want superhero day ?', 'Well , ’ easy , I tell .', 'So listen I say .', 'Bullying become major problem everywhere turn .', 'Our teachers discuss issue , need learn .', 'Throughout school halls lunchroom tables , Students teased pushed , fighting back , well ’ able .', 'As Supergirl I would stop nonsense , By using powers aid every victim ’ defense .', 'Throughout day , I would listen negative chatter And change bully ’ insults words matter .', 'Before first punch thrown foot trips another , I would zap tormentor ’ behind heat calls mother .', 'It ’ bad I ’ superhero longer , It take day help bullied victims become stronger .', 'The truth one deserves cruel hateful treatment Everyone deserves happiness unanimous agreement .']

Part of Speech Tagging


In [13]:
words = word_tokenize(text)

In [14]:
tagged_words = nltk.pos_tag(words)

In [15]:
tagged_words


Out[15]:
[('If', 'IN'),
 ('I', 'PRP'),
 ('could', 'MD'),
 ('be', 'VB'),
 ('a', 'DT'),
 ('superhero', 'NN'),
 (',', ','),
 ('just', 'RB'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('day', 'NN'),
 (',', ','),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('want', 'VB'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('Supergirl', 'NNP'),
 (',', ','),
 ('in', 'IN'),
 ('every', 'DT'),
 ('way', 'NN'),
 ('.', '.'),
 ('She', 'PRP'),
 ('’', 'VBD'),
 ('s', 'PRP'),
 ('the', 'DT'),
 ('young', 'JJ'),
 ('cousin', 'NN'),
 ('of', 'IN'),
 ('Superman', 'NNP'),
 ('with', 'IN'),
 ('long', 'JJ'),
 ('golden', 'JJ'),
 ('locks', 'NNS'),
 (',', ','),
 ('But', 'CC'),
 ('don', 'VBZ'),
 ('’', 'JJ'),
 ('t', 'NNS'),
 ('let', 'VBP'),
 ('that', 'IN'),
 ('fool', 'NN'),
 ('you', 'PRP'),
 ('because', 'IN'),
 ('she', 'PRP'),
 ('’', 'VBZ'),
 ('s', 'RB'),
 ('tougher', 'JJR'),
 ('than', 'IN'),
 ('rocks', 'NNS'),
 ('.', '.'),
 ('Her', 'PRP$'),
 ('powers', 'NNS'),
 ('consist', 'VBP'),
 ('of', 'IN'),
 ('flying', 'VBG'),
 ('with', 'IN'),
 ('speed', 'NN'),
 (',', ','),
 ('To', 'TO'),
 ('the', 'DT'),
 ('moon', 'NN'),
 (',', ','),
 ('around', 'IN'),
 ('the', 'DT'),
 ('world', 'NN'),
 ('or', 'CC'),
 ('wherever', 'VB'),
 ('the', 'DT'),
 ('need', 'NN'),
 ('.', '.'),
 ('She', 'PRP'),
 ('can', 'MD'),
 ('hear', 'VB'),
 ('a', 'DT'),
 ('pin', 'JJ'),
 ('drop', 'NN'),
 ('or', 'CC'),
 ('the', 'DT'),
 ('beat', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('human', 'JJ'),
 ('’', 'NN'),
 ('s', 'JJ'),
 ('heart', 'NN'),
 (',', ','),
 ('Not', 'RB'),
 ('to', 'TO'),
 ('mention', 'VB'),
 ('the', 'DT'),
 ('faintest', 'JJS'),
 ('whisper', 'NN'),
 (',', ','),
 ('oh', 'UH'),
 ('how', 'WRB'),
 ('very', 'RB'),
 ('smart', 'JJ'),
 ('!', '.'),
 ('In', 'IN'),
 ('addition', 'NN'),
 ('to', 'TO'),
 ('mind', 'VB'),
 ('control', 'NN'),
 (',', ','),
 ('Supergirl', 'NNP'),
 ('’', 'NNP'),
 ('s', 'JJ'),
 ('vision', 'NN'),
 ('is', 'VBZ'),
 ('x-ray', 'JJ'),
 ('She', 'PRP'),
 ('also', 'RB'),
 ('has', 'VBZ'),
 ('eyes', 'NNS'),
 ('that', 'IN'),
 ('generate', 'VBP'),
 ('heat', 'NN'),
 ('without', 'IN'),
 ('delay', 'NN'),
 ('.', '.'),
 ('Just', 'NNP'),
 ('like', 'IN'),
 ('her', 'PRP$'),
 ('cousin', 'NN'),
 (',', ','),
 ('she', 'PRP'),
 ('has', 'VBZ'),
 ('her', 'PRP$'),
 ('weakness', 'NN'),
 ('too', 'RB'),
 (',', ','),
 ('Kryptonite', 'NNP'),
 (',', ','),
 ('oh', 'JJ'),
 ('Kryptonite', 'NNP'),
 ('.', '.'),
 ('There', 'EX'),
 ('’', 'JJ'),
 ('s', 'NN'),
 ('only', 'RB'),
 ('one', 'CD'),
 ('and', 'CC'),
 ('not', 'RB'),
 ('two', 'CD'),
 ('.', '.'),
 ('So', 'CC'),
 ('why', 'WRB'),
 ('would', 'MD'),
 ('I', 'PRP'),
 ('want', 'VB'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('this', 'DT'),
 ('superhero', 'NN'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('day', 'NN'),
 ('?', '.'),
 ('Well', 'UH'),
 (',', ','),
 ('that', 'DT'),
 ('’', 'VBP'),
 ('s', 'JJ'),
 ('easy', 'JJ'),
 (',', ','),
 ('I', 'PRP'),
 ('will', 'MD'),
 ('tell', 'VB'),
 ('you', 'PRP'),
 ('.', '.'),
 ('So', 'RB'),
 ('listen', 'JJ'),
 ('to', 'TO'),
 ('what', 'WP'),
 ('I', 'PRP'),
 ('say', 'VBP'),
 ('.', '.'),
 ('Bullying', 'NNP'),
 ('has', 'VBZ'),
 ('become', 'VBN'),
 ('a', 'DT'),
 ('major', 'JJ'),
 ('problem', 'NN'),
 ('everywhere', 'RB'),
 ('we', 'PRP'),
 ('turn', 'VBP'),
 ('.', '.'),
 ('Our', 'PRP$'),
 ('teachers', 'NNS'),
 ('discuss', 'VBP'),
 ('the', 'DT'),
 ('issue', 'NN'),
 (',', ','),
 ('but', 'CC'),
 ('there', 'EX'),
 ('is', 'VBZ'),
 ('more', 'JJR'),
 ('that', 'IN'),
 ('we', 'PRP'),
 ('need', 'VBP'),
 ('to', 'TO'),
 ('learn', 'VB'),
 ('.', '.'),
 ('Throughout', 'IN'),
 ('the', 'DT'),
 ('school', 'NN'),
 ('halls', 'NNS'),
 ('and', 'CC'),
 ('at', 'IN'),
 ('the', 'DT'),
 ('lunchroom', 'NN'),
 ('tables', 'NNS'),
 (',', ','),
 ('Students', 'NNS'),
 ('are', 'VBP'),
 ('teased', 'VBN'),
 ('or', 'CC'),
 ('pushed', 'VBN'),
 (',', ','),
 ('and', 'CC'),
 ('fighting', 'VBG'),
 ('back', 'RB'),
 (',', ','),
 ('well', 'UH'),
 ('they', 'PRP'),
 ('aren', 'VBP'),
 ('’', 'JJ'),
 ('t', 'NN'),
 ('able', 'JJ'),
 ('.', '.'),
 ('As', 'IN'),
 ('Supergirl', 'NNP'),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('stop', 'VB'),
 ('all', 'PDT'),
 ('this', 'DT'),
 ('nonsense', 'NN'),
 (',', ','),
 ('By', 'IN'),
 ('using', 'VBG'),
 ('my', 'PRP$'),
 ('powers', 'NNS'),
 ('to', 'TO'),
 ('aid', 'VB'),
 ('in', 'IN'),
 ('every', 'DT'),
 ('victim', 'NN'),
 ('’', 'VBZ'),
 ('s', 'JJ'),
 ('defense', 'NN'),
 ('.', '.'),
 ('Throughout', 'IN'),
 ('the', 'DT'),
 ('day', 'NN'),
 (',', ','),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('listen', 'VB'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('negative', 'JJ'),
 ('chatter', 'NN'),
 ('And', 'CC'),
 ('change', 'NN'),
 ('each', 'DT'),
 ('bully', 'RB'),
 ('’', 'JJ'),
 ('s', 'NN'),
 ('insults', 'NNS'),
 ('to', 'TO'),
 ('words', 'NNS'),
 ('that', 'IN'),
 ('matter', 'NN'),
 ('.', '.'),
 ('Before', 'IN'),
 ('the', 'DT'),
 ('first', 'JJ'),
 ('punch', 'NN'),
 ('is', 'VBZ'),
 ('thrown', 'VBN'),
 ('or', 'CC'),
 ('a', 'DT'),
 ('foot', 'JJ'),
 ('trips', 'NN'),
 ('another', 'DT'),
 (',', ','),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('zap', 'VB'),
 ('the', 'DT'),
 ('tormentor', 'NN'),
 ('’', 'NN'),
 ('s', 'NN'),
 ('behind', 'IN'),
 ('with', 'IN'),
 ('heat', 'NN'),
 ('until', 'IN'),
 ('he', 'PRP'),
 ('calls', 'VBZ'),
 ('his', 'PRP$'),
 ('mother', 'NN'),
 ('.', '.'),
 ('It', 'PRP'),
 ('’', 'VBD'),
 ('s', 'VB'),
 ('too', 'RB'),
 ('bad', 'JJ'),
 ('I', 'PRP'),
 ('can', 'MD'),
 ('’', 'VB'),
 ('t', 'RB'),
 ('be', 'VB'),
 ('this', 'DT'),
 ('superhero', 'NN'),
 ('for', 'IN'),
 ('longer', 'JJR'),
 (',', ','),
 ('It', 'PRP'),
 ('will', 'MD'),
 ('take', 'VB'),
 ('more', 'JJR'),
 ('than', 'IN'),
 ('a', 'DT'),
 ('day', 'NN'),
 ('to', 'TO'),
 ('help', 'VB'),
 ('bullied', 'VB'),
 ('victims', 'NNS'),
 ('to', 'TO'),
 ('become', 'VB'),
 ('stronger', 'JJR'),
 ('.', '.'),
 ('The', 'DT'),
 ('truth', 'NN'),
 ('is', 'VBZ'),
 ('that', 'IN'),
 ('no', 'DT'),
 ('one', 'NN'),
 ('deserves', 'VBZ'),
 ('this', 'DT'),
 ('cruel', 'NN'),
 ('and', 'CC'),
 ('hateful', 'JJ'),
 ('treatment', 'NN'),
 ('Everyone', 'NNP'),
 ('deserves', 'VBZ'),
 ('happiness', 'NN'),
 ('and', 'CC'),
 ('that', 'DT'),
 ('should', 'MD'),
 ('be', 'VB'),
 ('a', 'DT'),
 ('unanimous', 'JJ'),
 ('agreement', 'NN'),
 ('.', '.')]

In [16]:
word_tags = []

for word in tagged_words:
    word_tags.append(word[0] + "_" + word[1])
tagged_text = ' '.join(word_tags)
print(tagged_text)


If_IN I_PRP could_MD be_VB a_DT superhero_NN ,_, just_RB for_IN the_DT day_NN ,_, I_PRP would_MD want_VB to_TO be_VB Supergirl_NNP ,_, in_IN every_DT way_NN ._. She_PRP ’_VBD s_PRP the_DT young_JJ cousin_NN of_IN Superman_NNP with_IN long_JJ golden_JJ locks_NNS ,_, But_CC don_VBZ ’_JJ t_NNS let_VBP that_IN fool_NN you_PRP because_IN she_PRP ’_VBZ s_RB tougher_JJR than_IN rocks_NNS ._. Her_PRP$ powers_NNS consist_VBP of_IN flying_VBG with_IN speed_NN ,_, To_TO the_DT moon_NN ,_, around_IN the_DT world_NN or_CC wherever_VB the_DT need_NN ._. She_PRP can_MD hear_VB a_DT pin_JJ drop_NN or_CC the_DT beat_NN of_IN a_DT human_JJ ’_NN s_JJ heart_NN ,_, Not_RB to_TO mention_VB the_DT faintest_JJS whisper_NN ,_, oh_UH how_WRB very_RB smart_JJ !_. In_IN addition_NN to_TO mind_VB control_NN ,_, Supergirl_NNP ’_NNP s_JJ vision_NN is_VBZ x-ray_JJ She_PRP also_RB has_VBZ eyes_NNS that_IN generate_VBP heat_NN without_IN delay_NN ._. Just_NNP like_IN her_PRP$ cousin_NN ,_, she_PRP has_VBZ her_PRP$ weakness_NN too_RB ,_, Kryptonite_NNP ,_, oh_JJ Kryptonite_NNP ._. There_EX ’_JJ s_NN only_RB one_CD and_CC not_RB two_CD ._. So_CC why_WRB would_MD I_PRP want_VB to_TO be_VB this_DT superhero_NN for_IN the_DT day_NN ?_. Well_UH ,_, that_DT ’_VBP s_JJ easy_JJ ,_, I_PRP will_MD tell_VB you_PRP ._. So_RB listen_JJ to_TO what_WP I_PRP say_VBP ._. Bullying_NNP has_VBZ become_VBN a_DT major_JJ problem_NN everywhere_RB we_PRP turn_VBP ._. Our_PRP$ teachers_NNS discuss_VBP the_DT issue_NN ,_, but_CC there_EX is_VBZ more_JJR that_IN we_PRP need_VBP to_TO learn_VB ._. Throughout_IN the_DT school_NN halls_NNS and_CC at_IN the_DT lunchroom_NN tables_NNS ,_, Students_NNS are_VBP teased_VBN or_CC pushed_VBN ,_, and_CC fighting_VBG back_RB ,_, well_UH they_PRP aren_VBP ’_JJ t_NN able_JJ ._. As_IN Supergirl_NNP I_PRP would_MD stop_VB all_PDT this_DT nonsense_NN ,_, By_IN using_VBG my_PRP$ powers_NNS to_TO aid_VB in_IN every_DT victim_NN ’_VBZ s_JJ defense_NN ._. Throughout_IN the_DT day_NN ,_, I_PRP would_MD listen_VB for_IN the_DT negative_JJ chatter_NN And_CC change_NN each_DT bully_RB ’_JJ s_NN insults_NNS to_TO words_NNS that_IN matter_NN ._. Before_IN the_DT first_JJ punch_NN is_VBZ thrown_VBN or_CC a_DT foot_JJ trips_NN another_DT ,_, I_PRP would_MD zap_VB the_DT tormentor_NN ’_NN s_NN behind_IN with_IN heat_NN until_IN he_PRP calls_VBZ his_PRP$ mother_NN ._. It_PRP ’_VBD s_VB too_RB bad_JJ I_PRP can_MD ’_VB t_RB be_VB this_DT superhero_NN for_IN longer_JJR ,_, It_PRP will_MD take_VB more_JJR than_IN a_DT day_NN to_TO help_VB bullied_VB victims_NNS to_TO become_VB stronger_JJR ._. The_DT truth_NN is_VBZ that_IN no_DT one_NN deserves_VBZ this_DT cruel_NN and_CC hateful_JJ treatment_NN Everyone_NNP deserves_VBZ happiness_NN and_CC that_DT should_MD be_VB a_DT unanimous_JJ agreement_NN ._.

Named Entity Recognition


In [17]:
tokenized_words = word_tokenize(text)

In [18]:
from nltk import pos_tag
tagged_words = pos_tag(tokenized_words)
tagged_words


Out[18]:
[('If', 'IN'),
 ('I', 'PRP'),
 ('could', 'MD'),
 ('be', 'VB'),
 ('a', 'DT'),
 ('superhero', 'NN'),
 (',', ','),
 ('just', 'RB'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('day', 'NN'),
 (',', ','),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('want', 'VB'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('Supergirl', 'NNP'),
 (',', ','),
 ('in', 'IN'),
 ('every', 'DT'),
 ('way', 'NN'),
 ('.', '.'),
 ('She', 'PRP'),
 ('’', 'VBD'),
 ('s', 'PRP'),
 ('the', 'DT'),
 ('young', 'JJ'),
 ('cousin', 'NN'),
 ('of', 'IN'),
 ('Superman', 'NNP'),
 ('with', 'IN'),
 ('long', 'JJ'),
 ('golden', 'JJ'),
 ('locks', 'NNS'),
 (',', ','),
 ('But', 'CC'),
 ('don', 'VBZ'),
 ('’', 'JJ'),
 ('t', 'NNS'),
 ('let', 'VBP'),
 ('that', 'IN'),
 ('fool', 'NN'),
 ('you', 'PRP'),
 ('because', 'IN'),
 ('she', 'PRP'),
 ('’', 'VBZ'),
 ('s', 'RB'),
 ('tougher', 'JJR'),
 ('than', 'IN'),
 ('rocks', 'NNS'),
 ('.', '.'),
 ('Her', 'PRP$'),
 ('powers', 'NNS'),
 ('consist', 'VBP'),
 ('of', 'IN'),
 ('flying', 'VBG'),
 ('with', 'IN'),
 ('speed', 'NN'),
 (',', ','),
 ('To', 'TO'),
 ('the', 'DT'),
 ('moon', 'NN'),
 (',', ','),
 ('around', 'IN'),
 ('the', 'DT'),
 ('world', 'NN'),
 ('or', 'CC'),
 ('wherever', 'VB'),
 ('the', 'DT'),
 ('need', 'NN'),
 ('.', '.'),
 ('She', 'PRP'),
 ('can', 'MD'),
 ('hear', 'VB'),
 ('a', 'DT'),
 ('pin', 'JJ'),
 ('drop', 'NN'),
 ('or', 'CC'),
 ('the', 'DT'),
 ('beat', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('human', 'JJ'),
 ('’', 'NN'),
 ('s', 'JJ'),
 ('heart', 'NN'),
 (',', ','),
 ('Not', 'RB'),
 ('to', 'TO'),
 ('mention', 'VB'),
 ('the', 'DT'),
 ('faintest', 'JJS'),
 ('whisper', 'NN'),
 (',', ','),
 ('oh', 'UH'),
 ('how', 'WRB'),
 ('very', 'RB'),
 ('smart', 'JJ'),
 ('!', '.'),
 ('In', 'IN'),
 ('addition', 'NN'),
 ('to', 'TO'),
 ('mind', 'VB'),
 ('control', 'NN'),
 (',', ','),
 ('Supergirl', 'NNP'),
 ('’', 'NNP'),
 ('s', 'JJ'),
 ('vision', 'NN'),
 ('is', 'VBZ'),
 ('x-ray', 'JJ'),
 ('She', 'PRP'),
 ('also', 'RB'),
 ('has', 'VBZ'),
 ('eyes', 'NNS'),
 ('that', 'IN'),
 ('generate', 'VBP'),
 ('heat', 'NN'),
 ('without', 'IN'),
 ('delay', 'NN'),
 ('.', '.'),
 ('Just', 'NNP'),
 ('like', 'IN'),
 ('her', 'PRP$'),
 ('cousin', 'NN'),
 (',', ','),
 ('she', 'PRP'),
 ('has', 'VBZ'),
 ('her', 'PRP$'),
 ('weakness', 'NN'),
 ('too', 'RB'),
 (',', ','),
 ('Kryptonite', 'NNP'),
 (',', ','),
 ('oh', 'JJ'),
 ('Kryptonite', 'NNP'),
 ('.', '.'),
 ('There', 'EX'),
 ('’', 'JJ'),
 ('s', 'NN'),
 ('only', 'RB'),
 ('one', 'CD'),
 ('and', 'CC'),
 ('not', 'RB'),
 ('two', 'CD'),
 ('.', '.'),
 ('So', 'CC'),
 ('why', 'WRB'),
 ('would', 'MD'),
 ('I', 'PRP'),
 ('want', 'VB'),
 ('to', 'TO'),
 ('be', 'VB'),
 ('this', 'DT'),
 ('superhero', 'NN'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('day', 'NN'),
 ('?', '.'),
 ('Well', 'UH'),
 (',', ','),
 ('that', 'DT'),
 ('’', 'VBP'),
 ('s', 'JJ'),
 ('easy', 'JJ'),
 (',', ','),
 ('I', 'PRP'),
 ('will', 'MD'),
 ('tell', 'VB'),
 ('you', 'PRP'),
 ('.', '.'),
 ('So', 'RB'),
 ('listen', 'JJ'),
 ('to', 'TO'),
 ('what', 'WP'),
 ('I', 'PRP'),
 ('say', 'VBP'),
 ('.', '.'),
 ('Bullying', 'NNP'),
 ('has', 'VBZ'),
 ('become', 'VBN'),
 ('a', 'DT'),
 ('major', 'JJ'),
 ('problem', 'NN'),
 ('everywhere', 'RB'),
 ('we', 'PRP'),
 ('turn', 'VBP'),
 ('.', '.'),
 ('Our', 'PRP$'),
 ('teachers', 'NNS'),
 ('discuss', 'VBP'),
 ('the', 'DT'),
 ('issue', 'NN'),
 (',', ','),
 ('but', 'CC'),
 ('there', 'EX'),
 ('is', 'VBZ'),
 ('more', 'JJR'),
 ('that', 'IN'),
 ('we', 'PRP'),
 ('need', 'VBP'),
 ('to', 'TO'),
 ('learn', 'VB'),
 ('.', '.'),
 ('Throughout', 'IN'),
 ('the', 'DT'),
 ('school', 'NN'),
 ('halls', 'NNS'),
 ('and', 'CC'),
 ('at', 'IN'),
 ('the', 'DT'),
 ('lunchroom', 'NN'),
 ('tables', 'NNS'),
 (',', ','),
 ('Students', 'NNS'),
 ('are', 'VBP'),
 ('teased', 'VBN'),
 ('or', 'CC'),
 ('pushed', 'VBN'),
 (',', ','),
 ('and', 'CC'),
 ('fighting', 'VBG'),
 ('back', 'RB'),
 (',', ','),
 ('well', 'UH'),
 ('they', 'PRP'),
 ('aren', 'VBP'),
 ('’', 'JJ'),
 ('t', 'NN'),
 ('able', 'JJ'),
 ('.', '.'),
 ('As', 'IN'),
 ('Supergirl', 'NNP'),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('stop', 'VB'),
 ('all', 'PDT'),
 ('this', 'DT'),
 ('nonsense', 'NN'),
 (',', ','),
 ('By', 'IN'),
 ('using', 'VBG'),
 ('my', 'PRP$'),
 ('powers', 'NNS'),
 ('to', 'TO'),
 ('aid', 'VB'),
 ('in', 'IN'),
 ('every', 'DT'),
 ('victim', 'NN'),
 ('’', 'VBZ'),
 ('s', 'JJ'),
 ('defense', 'NN'),
 ('.', '.'),
 ('Throughout', 'IN'),
 ('the', 'DT'),
 ('day', 'NN'),
 (',', ','),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('listen', 'VB'),
 ('for', 'IN'),
 ('the', 'DT'),
 ('negative', 'JJ'),
 ('chatter', 'NN'),
 ('And', 'CC'),
 ('change', 'NN'),
 ('each', 'DT'),
 ('bully', 'RB'),
 ('’', 'JJ'),
 ('s', 'NN'),
 ('insults', 'NNS'),
 ('to', 'TO'),
 ('words', 'NNS'),
 ('that', 'IN'),
 ('matter', 'NN'),
 ('.', '.'),
 ('Before', 'IN'),
 ('the', 'DT'),
 ('first', 'JJ'),
 ('punch', 'NN'),
 ('is', 'VBZ'),
 ('thrown', 'VBN'),
 ('or', 'CC'),
 ('a', 'DT'),
 ('foot', 'JJ'),
 ('trips', 'NN'),
 ('another', 'DT'),
 (',', ','),
 ('I', 'PRP'),
 ('would', 'MD'),
 ('zap', 'VB'),
 ('the', 'DT'),
 ('tormentor', 'NN'),
 ('’', 'NN'),
 ('s', 'NN'),
 ('behind', 'IN'),
 ('with', 'IN'),
 ('heat', 'NN'),
 ('until', 'IN'),
 ('he', 'PRP'),
 ('calls', 'VBZ'),
 ('his', 'PRP$'),
 ('mother', 'NN'),
 ('.', '.'),
 ('It', 'PRP'),
 ('’', 'VBD'),
 ('s', 'VB'),
 ('too', 'RB'),
 ('bad', 'JJ'),
 ('I', 'PRP'),
 ('can', 'MD'),
 ('’', 'VB'),
 ('t', 'RB'),
 ('be', 'VB'),
 ('this', 'DT'),
 ('superhero', 'NN'),
 ('for', 'IN'),
 ('longer', 'JJR'),
 (',', ','),
 ('It', 'PRP'),
 ('will', 'MD'),
 ('take', 'VB'),
 ('more', 'JJR'),
 ('than', 'IN'),
 ('a', 'DT'),
 ('day', 'NN'),
 ('to', 'TO'),
 ('help', 'VB'),
 ('bullied', 'VB'),
 ('victims', 'NNS'),
 ('to', 'TO'),
 ('become', 'VB'),
 ('stronger', 'JJR'),
 ('.', '.'),
 ('The', 'DT'),
 ('truth', 'NN'),
 ('is', 'VBZ'),
 ('that', 'IN'),
 ('no', 'DT'),
 ('one', 'NN'),
 ('deserves', 'VBZ'),
 ('this', 'DT'),
 ('cruel', 'NN'),
 ('and', 'CC'),
 ('hateful', 'JJ'),
 ('treatment', 'NN'),
 ('Everyone', 'NNP'),
 ('deserves', 'VBZ'),
 ('happiness', 'NN'),
 ('and', 'CC'),
 ('that', 'DT'),
 ('should', 'MD'),
 ('be', 'VB'),
 ('a', 'DT'),
 ('unanimous', 'JJ'),
 ('agreement', 'NN'),
 ('.', '.')]

In [19]:
from nltk import ne_chunk
named_entity = ne_chunk(tagged_words)
#named_entity.draw()