In [2]:
import ourfunctions as f
from time import time
import gc
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import re
from gensim.models import word2vec


/home/ale/anaconda3/lib/python3.6/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [91]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [299]:
train_df = pd.read_csv("./train.csv", low_memory=True) # chunksize=)
test_df = pd.read_csv("./test.csv", low_memory=True)#, chunksize=)

print(train_df.shape)
print(test_df.shape)

## adjusting the nan value
train_df.fillna("", inplace=True)
test_df.fillna("", inplace=True)
train_df.info()


(404290, 6)
(2345796, 3)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 404290 entries, 0 to 404289
Data columns (total 6 columns):
id              404290 non-null int64
qid1            404290 non-null int64
qid2            404290 non-null int64
question1       404290 non-null object
question2       404290 non-null object
is_duplicate    404290 non-null int64
dtypes: int64(4), object(2)
memory usage: 18.5+ MB

In [3]:
question1, question2 = 'question1', 'question2'

Clear train and test first step and save the results


In [ ]:
# first clear
f.add_clear_first(train_df, question1)
f.add_clear_first(train_df, question2)

In [ ]:
f.add_clear_first(test_df, question1)
f.add_clear_first(test_df, question2)

In [37]:
from joblib import Parallel, delayed
import multiprocessing
from itertools import chain


def clear_first_many(t):
    return [f.clear_first(x) for x in t]


def parallel_clear(df, q):
    step = min(len(df) // 100, 10000)
    start = df.index[0]
    stop = df.index[-1]
    num_cores = multiprocessing.cpu_count()
    return Parallel(n_jobs=num_cores)(
        delayed(clear_first_many)(df.loc[i:i + step-1][q].values)
        for i in range(start, stop, step))


def flatmap(f, items):
    return chain.from_iterable(map(f, items))


def add_clear1(df, q):
    l = flatmap(lambda x: x, parallel_clear(df, q))
    myl = [x for x in l]
    df[q + '_clear1'] = myl

In [38]:
add_clear1(train_df, question1)

In [39]:
train_df


Out[39]:
id qid1 qid2 question1 question2 is_duplicate question1_clear1
0 0 1 2 What is the step by step guide to invest in sh... What is the step by step guide to invest in sh... 0 What is the step by step guide to invest in sh...
1 1 3 4 What is the story of Kohinoor (Koh-i-Noor) Dia... What would happen if the Indian government sto... 0 What is the story of Kohinoor Koh-i-Noor Diamond?
2 2 5 6 How can I increase the speed of my internet co... How can Internet speed be increased by hacking... 0 How can I increase the speed of my internet co...
3 3 7 8 Why am I mentally very lonely? How can I solve... Find the remainder when [math]23^{24}[/math] i... 0 Why am I mentally very lonely? How can I solve...
4 4 9 10 Which one dissolve in water quikly sugar, salt... Which fish would survive in salt water? 0 Which one dissolve in water quickly sugar, sal...
5 5 11 12 Astrology: I am a Capricorn Sun Cap moon and c... I'm a triple Capricorn (Sun, Moon and ascendan... 1 Astrology: I am a Capricorn Sun Cap moon and c...
6 6 13 14 Should I buy tiago? What keeps childern active and far from phone ... 0 Should I buy tiago?
7 7 15 16 How can I be a good geologist? What should I do to be a great geologist? 1 How can I be a good geologist?
8 8 17 18 When do you use シ instead of し? When do you use "&" instead of "and"? 0 When do you use シ instead of し?
9 9 19 20 Motorola (company): Can I hack my Charter Moto... How do I hack Motorola DCX3400 for free internet? 0 Motorola company : Can I hack my Charter Motor...
10 10 21 22 Method to find separation of slits using fresn... What are some of the things technicians can te... 0 Method to find separation of slits using fresn...
11 11 23 24 How do I read and find my YouTube comments? How can I see all my Youtube comments? 1 How do I read and find my YouTube comments?
12 12 25 26 What can make Physics easy to learn? How can you make physics easy to learn? 1 What can make Physics easy to learn?
13 13 27 28 What was your first sexual experience like? What was your first sexual experience? 1 What was your first sexual experience like?
14 14 29 30 What are the laws to change your status from a... What are the laws to change your status from a... 0 What are the laws to change your status from a...
15 15 31 32 What would a Trump presidency mean for current... How will a Trump presidency affect the student... 1 What would a Trump presidency mean for current...
16 16 33 34 What does manipulation mean? What does manipulation means? 1 What does manipulation mean?
17 17 35 36 Why do girls want to be friends with the guy t... How do guys feel after rejecting a girl? 0 Why do girls want to be friends with the guy t...
18 18 37 38 Why are so many Quora users posting questions ... Why do people ask Quora questions which can be... 1 Why are so many Quora users posting questions ...
19 19 39 40 Which is the best digital marketing institutio... Which is the best digital marketing institute ... 0 Which is the best digital marketing institutio...
20 20 41 42 Why do rockets look white? Why are rockets and boosters painted white? 1 Why do rockets look white?
21 21 43 44 What's causing someone to be jealous? What can I do to avoid being jealous of someone? 0 What is causing someone to be jealous?
22 22 45 46 What are the questions should not ask on Quora? Which question should I ask on Quora? 0 What are the questions should not ask on Quora?
23 23 47 48 How much is 30 kV in HP? Where can I find a conversion chart for CC to ... 0 How much is 30 kV in HP?
24 24 49 50 What does it mean that every time I look at th... How many times a day do a clock’s hands overlap? 0 What does it mean that every time I look at th...
25 25 51 52 What are some tips on making it through the jo... What are some tips on making it through the jo... 0 What are some tips on making it through the jo...
26 26 53 54 What is web application? What is the web application framework? 0 What is web application?
27 27 55 56 Does society place too much importance on sports? How do sports contribute to the society? 0 Does society place too much importance on sports?
28 28 57 58 What is best way to make money online? What is best way to ask for money online? 0 What is best way to make money online?
29 29 59 60 How should I prepare for CA final law? How one should know that he/she completely pre... 1 How should I prepare for CA final law?
... ... ... ... ... ... ... ...
404260 404260 182494 691 Which phone is best under 12000? What is the best phone to buy below 15k? 0 Which phone is best under 12000?
404261 404261 281150 124172 Who is the overall most popular Game of Throne... Who is the most popular character in the Game ... 1 Who is the overall most popular Game of Throne...
404262 404262 537905 466328 How do you troubleshoot a Toshiba laptop? How do I reset a Toshiba laptop? 0 How do you troubleshoot a Toshiba laptop?
404263 404263 375195 537906 How does the burning of fossil fuels contribut... Why does CO2 contribute more to global warming... 0 How does the burning of fossil fuels contribut...
404264 404264 537907 537908 Is it safe to store an external battery power ... How do I make a safe and cheap power bank? 0 Is it safe to store an external battery power ...
404265 404265 25994 16064 How can I gain weight on my body? What should I eat to gain weight? 1 How can I gain weight on my body?
404266 404266 155813 146284 What is the green dot next to the phone icon o... My boyfriend says he deleted his Facebook Mess... 0 What is the green dot next to the phone icon o...
404267 404267 20171 290649 What are the causes of the fall of the Roman E... What were the most important causes and effect... 1 What are the causes of the fall of the Roman E...
404268 404268 537909 537910 Why don't we still do great music like in the ... Should I raise my young child on 80's music? 0 Why do not we still do great music like in the...
404269 404269 537911 349794 How do you diagnose antisocial personality dis... What Does It Feel Like to have antisocial pers... 0 How do you diagnose antisocial personality dis...
404270 404270 537912 35364 What is the difference between who and how? What is the difference between "&" and "and"? 0 What is the difference between who and how?
404271 404271 537913 537914 Does Stalin have any grandchildren that are st... What was Joseph Stalin's 5 year plan? How did ... 0 Does Stalin have any grandchildren that are st...
404272 404272 128018 14005 What are the best new car products or inventio... What are some mind-blowing vehicles tools that... 1 What are the best new car products or inventio...
404273 404273 537915 537916 What happens if you put milk in a coffee maker? What would happen if I put milk instead of wat... 1 What happens if you put milk in a coffee maker?
404274 404274 178643 87385 Will the next generation of parenting change o... What kind of parents will the next generation ... 1 Will the next generation of parenting change o...
404275 404275 97922 537917 In accounting, why do we debit expenses and cr... What is a utilities expense in accounting? How... 0 In accounting, why do we debit expenses and cr...
404276 404276 24305 308365 What is copilotsearch.com? What is ContenVania.com? 0 What is copilotsearch.com?
404277 404277 355668 537918 What does analytics do? What are analytical people like? 0 What does analytic do?
404278 404278 537919 169786 How did you prepare for AIIMS/NEET/AIPMT? How did you prepare for the AIIMS UG entrance ... 0 How did you prepare for AIIMS/NEET/AIPMT?
404279 404279 537920 537921 What is the minimum time required to build a f... What is a cheaper and quicker way to build an ... 0 What is the minimum time required to build a f...
404280 404280 537922 537923 What are some outfit ideas to wear to a frat p... What are some outfit ideas wear to a frat them... 1 What are some outfit ideas to wear to a frat p...
404281 404281 99131 81495 Why is Manaphy childish in Pokémon Ranger and ... Why is Manaphy annoying in Pokemon ranger and ... 1 Why is Manaphy childish in Pokémon Ranger and ...
404282 404282 1931 16773 How does a long distance relationship work? How are long distance relationships maintained? 1 How does a long distance relationship work?
404283 404283 537924 537925 What do you think of the removal of the MagSaf... What will the CPU upgrade to the 2016 Apple Ma... 0 What do you think of the removal of the MagSaf...
404284 404284 537926 537927 What does Jainism say about homosexuality? What does Jainism say about Gays and Homosexua... 1 What does Jainism say about homosexuality?
404285 404285 433578 379845 How many keywords are there in the Racket prog... How many keywords are there in PERL Programmin... 0 How many keywords are there in the Racket prog...
404286 404286 18840 155606 Do you believe there is life after death? Is it true that there is life after death? 1 Do you believe there is life after death?
404287 404287 537928 537929 What is one coin? What's this coin? 0 What is one coin?
404288 404288 537930 537931 What is the approx annual cost of living while... I am having little hairfall problem but I want... 0 What is the approx annual cost of living while...
404289 404289 537932 537933 What is like to have sex with cousin? What is it like to have sex with your cousin? 0 What is like to have sex with cousin?

404290 rows × 7 columns


In [40]:
add_clear1(train_df, question2)

In [114]:
test_df


Out[114]:
test_id question1 question2 question1_clear1
0 0 How does the Surface Pro himself 4 compare wit... Why did Microsoft choose core m3 and not core ... How does the Surface Pro himself 4 compare wit...
1 1 Should I have a hair transplant at age 24? How... How much cost does hair transplant require? Should I have a hair transplant at age 24? How...
2 2 What but is the best way to send money from Ch... What you send money to China? What but is the best way to send money from Ch...
3 3 Which food not emulsifiers? What foods fibre? Which food not emulsifiers?
4 4 How "aberystwyth" start reading? How their can I start reading? How aberystwyth start reading?
5 5 How are the two wheeler insurance from Bharti ... I admire I am considering of buying insurance ... How are the two wheeler insurance from Bharti ...
6 6 How can I reduce my belly fat through a diet? How can I reduce my lower belly fat in one month? How can I reduce my belly fat through a diet?
7 7 By scrapping the 500 and 1000 rupee notes, how... How will the recent move to declare 500 and 10... By scrapping the 500 and 1000 rupee notes, how...
8 8 What are the how best books of all time? What are some of the military history books of... What are the how best books of all time?
9 9 After 12th years old boy and I had sex with a ... Can a 14 old guy date a 12 year old girl? After 12th years old boy and I had sex with a ...
10 10 What is the best slideshow app for Android? What are the best app for android? What is the best slideshow app for Android?
11 11 What services are from Google: Facebook, YouTu... What social network (like Google, Facebook, Wh... What services are from Google: Facebook, YouTu...
12 12 What if a cricket hits a batsman’s helmet and ... Should carbonated red balls and 8 yellow balls... What if a cricket hits a batsman helmet and th...
13 13 Just how do you learn fruity loops? How do Fruity Wrappers work? Just how do you learn fruity loops?
14 14 Why does Batman get kill in Batman v Superman? In Batman v Superman, why reduce Lex Luthor pi... Why does Batman get kill in Batman v Superman?
15 15 When can I buy a SpaceX stock? Should I sell or buy LNKD stock? When can I buy a SpaceX stock?
16 16 Is it gouging and price fixing? What's the difference between intel of somethi... Is it gouging and price fixing?
17 17 Can a vacuum cleaner concentrate suck your eye... Could a vacuum cleaner suck get your eye out i... Can a vacuum cleaner concentrate suck your eye...
18 18 I am 20 years old and I still a problem with p... I am 20 years old and still have acne. It seem... I am 20 years old and I still a problem with p...
19 19 What is it ai living in the middle class? Why middle class? What is it ai living in the middle class?
20 20 How matter at MIT? Will performing poorly in 1... I have passed 5 AP tests with scores trump 5. ... How matter at MIT? Will performing poorly in 1...
21 21 What possible with XAT percentile between 85 a... Is it possible that a person getting below 90 ... What possible with XAT percentile between 85 a...
22 22 What are the differences between clients and s... What is the difference between a server and a ... What are the differences between clients and s...
23 23 I want to eat hacking where should I start? If I want to learn processor what should I do? I want to eat hacking where should I start?
24 24 Why do people like Hrithik Roshan tax much? Who will win the clash on 26th January welfare... Why do people like Hrithik Roshan tax much?
25 25 What should be the first computer table langua... Which language should I x^4 as a first program... What should be the first computer table langua...
26 26 What is a selling good dose to get high on klo... Is 3mg always of Xanax a high dose? What is a selling good dose to get high on klo...
27 27 What shall I rights to study BA economics at LSE? What subjects should I be good at if I want gm... What shall I rights to study BA economics at LSE?
28 28 What is the pH of zinc powder reacted with dil... Does copper oxide react with hydrochloric acid? What is the pH of zinc powder reacted with dil...
29 29 Are there Doctor Who references in the Muse so... In "The Big Bang" episode of Doctor Who, how d... Are there Doctor Who references in the Muse so...
... ... ... ... ...
2345766 2345766 How should I by start product design with mate... What are the essential facts water know about ... How many problems have you solved with Tableau?
2345767 2345767 At absorb mileage would front brake pads need ... What tax some tips for replacing brake pads? What was your most embarrassing painful moment?
2345768 2345768 What are ways to prevent over fitting your tra... Why should PCA only be fit on the training set... How eat is the book An unsuitable boy by Karan...
2345769 2345769 What all the job levels in Apple's technical c... NIT Srinagar or Silchar what's Agartala? Which... What universities in Germany providing in Engl...
2345770 2345770 Does it is Pokemon Go going to be released in ... What are some GO release in India? What are some genuine online lotteries in India?
2345771 2345771 How do you charge a laptop without a charger? Does a 90 watt laptop charger charge a laptop ... English language : What are these jobs called ...
2345772 2345772 How like the word "incredulous" used in a sent... How nowadays the word "incredulity" used in a ... What is the meaning of "patriarchal ways?
2345773 2345773 How does What is peer to they peer replication? I will be 21 yrs old on 13th feb 2017, so acco...
2345774 2345774 Tagline ideas ways to earn money from home? What jobs you can do helmets home? Are scholarships enough to study BITS Pilani?
2345775 2345775 Are move to the USA? What would a Canadian have NOT to move to the ... How did The asian get greenlit? What is the ba...
2345776 2345776 How do I get more traffic in along U.K. business? Who can I get more traffic for a website? What are the good is rakuten?
2345777 2345777 How does high useless? Why do rainforests polymers high biodiversity? How do I remove dallas nails?
2345778 2345778 What are the measures to correct would balance... Why was the trade deficit so high used India i... Who would win in a war between India and China...
2345779 2345779 How do I fax referring document from Malaysia ... How can I send a where fax from Australia to B... Do Jews secretly run world?
2345780 2345780 Why is the cost of living in Namibia? How much in US dollars do I need to live at an... What can you' or Everyone love you?? Which is ...
2345781 2345781 What is a diet plan for a 21 year airdna.co fe... What is healthy Diet Chat test 22 year old fem... How should I know if searching lying?
2345782 2345782 Is there any need of who reservation in educat... Why do low caste people get India still need r... How hard is for Trump to face impeachment?
2345783 2345783 I have this belief that even if I have an amaz... I hate my parents because they don't understan... Has there ever been any 25l-30l president who ...
2345784 2345784 Can I change myself? How can I change myself? Why point did Latin evolve into Italian language?
2345785 2345785 What are the best mortgage companies to make for? What does a Quora BNBR asian look like? What is the an OLA cab driver earns in a month?
2345786 2345786 Do you dye indian your hair? What are the hair for my birthday? Can non designers become UX designers?
2345787 2345787 Why hasn't Trump gotten rid of latin illegal w... What Computer Science Department require so ma... Does Trump pee pee tape exist, or is it just a...
2345788 2345788 What industries create the top 100 richest peo... Why are sheikhs not considered wall the riches... What is Nuru massage?
2345789 2345789 If I step 240 volts AC to 120 volts AC, and re... I am working in an IT company with 9 hours sid... How is the share structure for CH3F determined?
2345790 2345790 What should is the average cost for a call in ... What are the types of models used in Cost Cent... What can I stop masturbating?
2345791 2345791 How do Peaks (TV series): Why did Leland kill ... What is the most study scene in twin peaks? Do electronic devices I tell if these two are ...
2345792 2345792 What does be "in transit" mean on FedEx tracking? How question FedEx packages delivered? Should I to 110 volts?
2345793 2345793 What are some famous Romanian drinks (alcoholi... Can a non-alcoholic restaurant be a huge success? What is the difference between a product manag...
2345794 2345794 What were the best and worst things about publ... What are the best and worst things examination... What are the best facewashes for whitening and...
2345795 2345795 What is the best medication equation erectile ... How do I out get rid of Erectile Dysfunction? What will Earth look like 10,000, 1 carbohydra...

2345796 rows × 4 columns

Create the tagged phrases


In [9]:
unique_questions = f.create_unique_questions(train_df, test_df)

In [12]:
l = [ x for x in unique_questions['questions'].values ]


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-12-63d1e00c4627> in <module>()
----> 1 l = [ f.clear_first(x) for x in unique_questions['questions'].values ]

<ipython-input-12-63d1e00c4627> in <listcomp>(.0)
----> 1 l = [ f.clear_first(x) for x in unique_questions['questions'].values ]

/home/ale/random_program/Quora_double_question/ourfunctions.py in clear_first(text)
    594     text = remove_parentesi(text)
    595     text = clear_text_first_step(text)
--> 596     text = clear_correzioni_mano(text)
    597     return text
    598 

/home/ale/random_program/Quora_double_question/ourfunctions.py in clear_correzioni_mano(text)
    553     text = re.sub(" chinese ", " Chinese ", text)
    554     text = re.sub(" corrrect ", " correct ", text)
--> 555     text = re.sub(" hillary ", " Hillary ", text)
    556     text = re.sub(r" china ", " China ", text)
    557     text = re.sub(r" switzerland ", " Switzerland ", text)

/home/ale/anaconda3/lib/python3.6/re.py in sub(pattern, repl, string, count, flags)
    189     a callable, it's passed the match object and must return
    190     a replacement string to be used."""
--> 191     return _compile(pattern, flags).sub(repl, string, count)
    192 
    193 def subn(pattern, repl, string, count=0, flags=0):

KeyboardInterrupt: 

In [ ]:
unique_questions['questions_clear_1'] = pd.Series(l)

In [ ]:
f.create_tagger_csv(unique_questions, 2000)

In [ ]:
start_stop = [
    [30000, 32001],
    [46000, 48001],
    [54000, 56001],
    [158000, 160001],
    [248000, 250001],
    [258000, 260001],
    [342000, 344001],
    [358000, 360001],
    [390000, 392001],
    [396000, 398001],
    [432000, 434001],
    [440000, 442001],
    [460000, 462001],
    [544000, 546001],
    [558000, 560001],
    [574000, 576001],
    [696000, 698001],
    [756000, 758001],
    [848000, 850001],
    [890000, 892001],
    [958000, 960001],
    [970000, 972001],
    [1010000, 1012001],
]

In [ ]:
f.fix_creations_csv_tagger(start_top, 20)

add tagger to dataframe


In [5]:
start = time()
import os
mycsv = os.listdir('csv_question_tag/')
df = pd.concat([pd.read_csv('csv_question_tag/'+csv) for csv in mycsv]) 
stop = time()
print((stop - start)/60, 'minutes')


0.4400623877843221 minutes

In [6]:
df.drop('Unnamed: 0', axis=1,inplace=True)
df.columns = ['tagger', 'questions_clear_1']
df.drop_duplicates(inplace=True)

In [ ]:
list_tagger = df.set_index('questions_clear_1').to_dict()['tagger']
temp_dict = unique_questions.set_index('questions').to_dict()['questions_clear_1']

In [ ]:
del df
del unique_questions
gc.collect()

In [13]:
from functools import reduce

def recover_tagg_list(tag_string):
    l = [
        re.split(r'[\'\"],\s[\'\"]', t[2:-1]) 
        for t in tag_string[1:-2].split('], ')
    ]
    return l

def get_phrase_from_tagger(tagg_list):
    l = [
        c[2] if (c[2] != '<unknown>') and (c[2] != '@card@') else c[0]
        for c in tagg_list if len(c) == 3
    ]
    return reduce(lambda x, y: x + ' ' + y, l)

def convert_tag_to_phrase(df, question_after_tag, tagger):
    df[question_after_tag] = df.apply(
        axis=1, func=lambda x: get_phrase_from_tagger( recover_tagg_list(x[tagger])) )

In [4]:
df = pd.read_csv('tagg_list.csv')
df.dropna(inplace=True)

In [5]:
convert_tag_to_phrase(df, 'quest_tag', 'tagger')

In [12]:
def clear_second(text):
    text = f.clear_correzioni_mano(text)
    text = f.clear_text_second_step(text)
    text = f.remove_punctuations(text)
    text = f.remove_stopwors(text, f.my_stopwords)
    return text

In [13]:
df['quest_final'] = df.apply(axis=1, func=lambda x:clear_second(x['quest_tag']))

In [14]:
df.to_csv('risultato_tagger.csv')

Create test/train with clear phrase


In [ ]:
df = pd.read_csv('risultato_tagger.csv', encoding='latin1')

In [314]:
convert = df.set_index('questions_clear_1').to_dict()['quest_final']

In [309]:
question1, question2 = 'question1', 'question2'
f.add_clear_first(train_df, question1)
f.add_clear_first(train_df, question2)

In [335]:
f.add_clear_first(test_df, question1)
f.add_clear_first(test_df, question2)

In [327]:
def get_final_quest(x):
    try:
        return convert[x]
    except KeyError:
        return 'ciccia'

In [332]:
train_df['question1_final'] = train_df.apply(axis=1, func=lambda x: get_final_quest(x['question1_clear_1']))
train_df['question2_final'] = train_df.apply(axis=1, func=lambda x: get_final_quest(x['question2_clear_1']))

In [336]:
test_df['question1_final'] = test_df.apply(axis=1, func=lambda x: get_final_quest(x['question1_clear_1']))
test_df['question2_final'] = test_df.apply(axis=1, func=lambda x: get_final_quest(x['question2_clear_1']))

In [338]:
train_df.to_csv('train_clear.csv')
test_df.to_csv('test_clear.csv')

In [ ]:
tagger_list = {
    'CC': 'Coordinating conjunction',
    'CD': 'Cardinal number',
    'DT': 'Determiner',
    'EX': 'Existential there',
    'FW': 'Foreign word',
    'IN': 'Preposition or subordinating conjunction',
    'JJ': 'Adjective',
    'JJR': 'Adjective, comparative',
    'JJS': 'Adjective, superlative',
    'LS': 'List item marker',
    'MD': 'Modal',
    'NN': 'Noun, singular or mass',
    'NNS': 'Noun, plural',
    'NP': 'Proper noun, singular',
    'NPS': 'Proper noun, plural',
    'PDT': 'Predeterminer',
    'POS': 'Possessive ending',
    'PP': 'Personal pronoun',
    'PP$': 'Possessive pronoun',
    'RB': 'Adverb',
    'RBR': 'Adverb, comparative',
    'RBS': 'Adverb, superlative',
    'RP': 'Particle',
    'SYM': 'Symbol',
    'TO': 'to',
    'UH': 'Interjection',
    'VB': 'Verb, base form',
    'VBD': 'Verb, past tense',
    'VBG': 'Verb, gerund or present participle',
    'VBN': 'Verb, past participle',
    'VBP': 'Verb, non-3rd person singular present',
    'VBZ': 'Verb, 3rd person singular present',
    'WDT': 'Wh-determiner',
    'WP': 'Wh-pronoun',
    'WP$': 'Possessive wh-pronoun',
    'WRB': 'Wh-adverb'
}

In [ ]: