Shannon Entropy of the phrase: as expected it didn't work ---> removed


In [ ]:
def entropy(string):
    "Calculates the Shannon entropy of a string"
    # get probability of chars in string
    prob = [
        0 if len(string)==0 else
        float(string.count(c)) / len(string)
        for c in dict.fromkeys(list(string))
    ]
    # calculate the entropy
    entropy = -sum([p * np.log(p) / np.log(2.0) for p in prob])
    return entropy


def entropy_ideal(length):
    "Calculates the ideal Shannon entropy of a string with given length"
    prob = 0.0000001 if length == 0 else 1.0 / length
    return -1.0 * length * prob * np.log(prob) / np.log(2.0)

In [ ]:
train_df['entropy1'] = train_df.apply(axis=1, func=lambda x:entropy(x['question1']))
train_df['entropy2'] = train_df.apply(axis=1, func=lambda x:entropy(x['question2']))

train_df['entropy_ideal1'] = train_df.apply(axis=1, func=lambda x:entropy_ideal(len(x['question1'])))
train_df['entropy_ideal2'] = train_df.apply(axis=1, func=lambda x:entropy_ideal(len(x['question2'])))

train_df['diff_entropy1'] = train_df['entropy_ideal1'] - train_df['entropy1']
train_df['diff_entropy2'] = train_df['entropy_ideal2'] - train_df['entropy2']

train_df['diff_entropy_diff'] = np.abs(train_df['diff_entropy1'] - train_df['diff_entropy2'])

train_df['diff_entropy'] = np.abs(train_df['entropy1'] - train_df['entropy2'])
train_df['diff_entropy_ideal'] = np.abs(train_df['entropy_ideal1'] - train_df['entropy_ideal2'])

In [ ]:
test_df['entropy1'] = test_df.apply(axis=1, func=lambda x:entropy(x['question1']))
test_df['entropy2'] = test_df.apply(axis=1, func=lambda x:entropy(x['question2']))
test_df['entropy_ideal1'] = test_df.apply(axis=1, func=lambda x:entropy_ideal(len(x['question1'])))
test_df['entropy_ideal2'] = test_df.apply(axis=1, func=lambda x:entropy_ideal(len(x['question2'])))
test_df['diff_entropy1'] = test_df['entropy_ideal1'] - test_df['entropy1']
test_df['diff_entropy2'] = test_df['entropy_ideal2'] - test_df['entropy2']
test_df['diff_entropy_diff'] = np.abs(test_df['diff_entropy1'] - test_df['diff_entropy2'])
test_df['diff_entropy'] = np.abs(test_df['entropy1'] - test_df['entropy2'])
test_df['diff_entropy_ideal'] = np.abs(test_df['entropy_ideal1'] - test_df['entropy_ideal2'])

Another variables take from kaggle (frequency of the question) ### ELIMINATED

  • https://www.kaggle.com/jturkewitz/magic-features-0-03-gain
  • calculate the frequencies of a question in the dataframe and saving them into a dictionary
  • add a features which is how many times the questions occurred using the previous dictionary for both q1 and q2
  • here we are using both test and train questions to get more stats

In [ ]:
from collections import Counter
d_freq = Counter(
    test_df.question1.append([
        test_df.question2, train_df.question1, train_df.question2
    ]).reset_index(drop=True).values)

In [ ]:
train_df['q1_freq'] = train_df.apply(axis=1, func=lambda x: d_freq[x['question1']])
train_df['q2_freq'] = train_df.apply(axis=1, func=lambda x: d_freq[x['question2']])

Using pyenchant to correct misspelled


In [3]:
import enchant

In [2]:
from itertools import chain

def flatmap(f, items):
    return chain.from_iterable(map(f, items))

def get_words(x):
    return remove_punctuations(x).split(' ')

In [ ]:
from collections import Counter
all_unique_quests = set(
    list(set(train_df['question1'])) + list(set(train_df['question2'])) +
    list(set(test_df['question1'])) + list(set(test_df['question2'])))
all_words = flatmap(get_words, all_unique_quests)
cnt_words = Counter(all_words)

In [ ]:
len(cnt_words.keys())

In [ ]:
a = cnt_words.most_common(100000)

In [4]:
en = enchant.Dict('en_US')

In [ ]:
for i in range(0-1 -100, -1):
    if not sum([c.isupper() for c in a[i][0]]):
        print(a[i], en.suggest(a[i][0]), sep='\n', end='\n\n')

In [7]:
# d = enchant.DictWithPWL("en_US", "../../Documents/myDataScience/Data_nlp/mywords.txt")
d = enchant.DictWithPWL("en_US")
chkr = SpellChecker(d)

per correggere per bene servirebbe:

  • lista nomi film
  • lista recenti parole inglesi tipo app, sms
  • buzz words del momento tipo bitcoin
  • lista siti internet
  • elenco città del mondo
  • chi più ne ha più ne metta

forse conbiene mettere tutta la lista di parole su cui ha trainato google? però gli errori li c'erano

anche usare le parole più frequenti della chat non va bene

Conclusione: NON CORREGGERE NIENTE


In [ ]:
fanboys
['fan boys', 'fan-boys', 'Sanborn', 'Barnabas', 'cannabis']
screenshot
['screen shot', 'screen-shot', 'screens hot', 'screens-hot', 'screenwriter', 'touchscreen', 'sunscreen', 'screene
bootloader
['boot loader', 'boot-loader', 'bootlegger', 'autoloader', 'boatload', 'reloader', 'bootlaces', 'bootlegged']

## cosa farci? lui vorrebbe o separale o metterci - nel mezzo, dovrei chiedere di non separare mai le parole?
## attualmente stiamo sostituendo gli - con spazi, ma la gente non scrive con - in genere
## conclusione: corregge molte parole scritte male (e ci sono) però ammazza parole che sono uniche e quindi
## dovrebbero parlare molto della frase

In [10]:
i = 0
for q2 in train_df.question1.values:
    i += 1
    if i> 1000:
        break
    chkr.set_text(q2)
    for err in chkr:
        if not sum([c.isupper() for c in err.word]):
            print(q2)
            print(err.word)
            print(chkr.suggest(err.word), '\n\n')


What is the step by step guide to invest in share market in india?
india
['India', 'indie', 'Indian', 'Indira', 'Ind', 'ind', 'Indy', 'Oneida', 'Indra', 'Inuit', 'innit', 'Indies', 'indies', 'Nadia', 'Enid', 'Anita', 'Ida', 'Ina', "India's", 'Indiana', 'indigo', 'indite', 'indium', 'and', 'end', 'int', 'Hindi', 'Linda', 'kinda', 'idea'] 


Which one dissolve in water quikly sugar, salt, methane and carbon di oxide?
quikly
['quickly', 'quill', 'quirky', 'quietly', 'quaky', 'quail', 'quick', 'quirk', 'wkly', 'luckily', 'murkily', 'gully', 'quell', 'likely', 'quirks', 'sickly', 'July', 'giggly', 'jiggly', 'squiggly', 'cockily', 'gawkily', 'jerkily', 'ugly', 'queerly', 'quicker', 'quickie', 'cagily', "quick's", "quirk's", 'Aquila', 'curly', 'gaily', 'girly', 'guile', 'quake'] 


Which one dissolve in water quikly sugar, salt, methane and carbon di oxide?
di
['DI', 'Di', 'DUI', 'Dir', 'Du', 'die', 'do', 'ID', 'id', 'D', 'd', 'DWI', 'Dis', 'did', 'dig', 'dim', 'din', 'dip', 'dis', 'div', 'Si', 'I', 'i', 'Dix', 'DA', 'DD', 'DE', 'Dy', 'Ti', 'dd', 'ti', 'SDI', 'DC', 'DH', 'DJ', 'DP', 'Dr', 'dB', 'db', 'dc', 'dz', 'AI', 'Bi', 'Ci', 'GI', 'HI', 'Li', 'MI', 'Ni', 'RI', 'VI'] 


Should I buy tiago?
tiago
['ti ago', 'ti-ago', 'Iago', 'Togo', 'Tia', 'ago', 'tag', 'taiga', 'ciao', 'GIGO', 'dago', 'sago', 'taco', 'shag', 'Riga', 'toga', 'Diego', "Tia's", 'Virgo', 'tiara', 'GAO', 'rigor', 'tog', 'Argo', 'Igor', 'rag', 'rig', 'tug', 'go', 'Ag', 'biog', 'Fargo', 'Magoo', 'Margo', 'Taegu', 'cargo', 'ciaos', 'largo', 'tiger', 'vigor', 'CIA', 'Chicago', 'KIA', 'MiG', 'Paige', 'TKO', 'age', 'bag', 'big', 'cigar', 'dag', 'dig', 'ego', 'fag', 'fig', 'gag', 'gig', 'hag', 'jag', 'jig', 'lag', 'mag', 'nag', 'pig', 'sag', 'tic', 'wag', 'wig', 'chg'] 


Method to find separation of slits using fresnel biprism?
fresnel
['Fresnel', "Fresnel's", 'Fresno'] 


Method to find separation of slits using fresnel biprism?
biprism
['bi prism', 'bi-prism', 'prism', 'baptism', 'purism'] 


Which is the best digital marketing institution in banglore?
banglore
['Bangalore', 'bang lore', 'bang-lore', 'bangle', 'bangles', 'bungler', 'bungle', 'bunghole', 'bunglers', "bangle's", "bungler's"] 


What is the best travel website in spain?
spain
['Spain', 'spa in', 'spa-in', 'Span', 'span', 'spin', 'Spahn', 'spawn', 'spine', 'spiny', 'spun', 'spavin', 'sprain', 'pain', 'spoon', 'slain', 'stain', 'swain', 'spurn', "Spain's"] 


Who is israil friend?
israil
['is rail', 'is-rail', 'Israel', 'Israeli', 'Ismail', 'Israels', 'assail', 'Ismael', "Israel's"] 


How do I download content from a kickass torrent without registration?
kickass
['kick ass', 'kick-ass', 'kicks', 'jackass', "kick's", 'kickers', 'kikes', "kicker's", "Keck's", "jackass's", "Hicks's"] 


What were the major effects of the cambodia earthquake, and how do these effects compare to the Kamchatca earthquakes in 1952?
cambodia
['Cambodia', 'Cambodian', "Cambodia's"] 


What is the best reference book for physics class 11th?
th
['Th', 'Thu', 'the', 'tho', 'thy', 'THC', 'Rh', 'HT', 'ht', 'H', 'T', 'h', 't', 'nth', 'Ch', 'OH', 'TA', 'Ta', 'Te', 'Ti', 'Tu', 'Ty', 'ah', 'ch', 'eh', 'oh', 'pH', 'sh', 'ta', 'ti', 'to', 'uh', 'DH', 'NH', 'TB', 'TD', 'TM', 'TN', 'TV', 'TX', 'Tb', 'Tc', 'Tl', 'Tm', 'tn', 'tr', 'ts', "Th's", "T's"] 


Why did harry become a horcrux?
horcrux
['Crux', 'crux', 'Gacrux', 'Acrux', 'crocus'] 


Will the recent demonetisation results in higher GDP? If so how much?
demonetisation
['demonetization', "demonetization's", 'demagnetization', 'demonstration'] 


Where can I watch gonulcelen with english subtitles?
gonulcelen
['concealing', 'counseling', 'canceling', 'gunslinger', 'consoling'] 


Where can I watch gonulcelen with english subtitles?
english
['English', 'Englisher', "English's", 'Englishes'] 


Why my question was marked as needing imrovement?
imrovement
['improvement', 'improvements', 'movement', "improvement's"] 


What are some yakshini mantras?
yakshini
['cashing', 'gashing', 'yakking', 'quashing', 'yoking', 'caching', 'coshing', 'gushing', 'joshing', 'yukking'] 


Are exocytosis and endocytosis examples of active or passive transport?
exocytosis
["Exocet's", 'exorcists', 'exorcises', 'excises', 'ecstasies', 'exoduses', 'excites', "exorcist's", 'excesses', 'exciters', "excise's", 'exists', "exciter's", 'exercises', 'exceeds', "expertise's", "ecstasy's", "exercise's"] 


Are exocytosis and endocytosis examples of active or passive transport?
endocytosis
['antisepsis', 'industries', 'understudies', 'industrious', 'indecencies', 'undecideds', 'antacids', 'undersides', "undecided's", 'interstices', "antacid's", "industry's", "indecency's", 'understates', 'intercessors', "intestacy's", "understudy's", "interstice's", "underside's", "antisepsis's", "intercessor's"] 


How do I find a startup accelerator?
startup
['start up', 'start-up', 'start', 'stirrup', 'statue', 'starts', 'status', 'starter', "start's", 'started', 'startle', 'strap', 'Stuart', 'setup', 'statute', 'strip', 'starting', 'stat', 'satrap', 'Stuarts', 'static', 'statues', 'stature', 'state', 'stoup', 'stamp', 'stats', 'strep', 'strop', "Stuart's", 'starters', 'stater', 'Staten', 'States', "stat's", 'stated', 'states', "state's", "statue's", "status's", "starter's"] 


How can I check wifi history and access it through android phones?
wifi
['Wii', 'wife', 'wiki'] 


How do you take a screenshot on a Mac laptop?
screenshot
['screen shot', 'screen-shot', 'screens hot', 'screens-hot', 'screens', 'screened', 'screeched', "screen's", 'scrunched', 'screen', 'serenest', 'screechy', 'screech', 'scrunchy', 'screenwriter', 'scrunch', "screech's", 'screeches', 'screening', "scrunch's"] 


What are the effects of demonitization of 500 and 1000 rupees notes on real estate sector?
demonitization
['demonetization', "demonetization's", 'demobilization', 'demagnetization', 'democratization', 'desensitization', 'demonstration'] 


Why are women who are on their periods are regarded as 'unclean' such that they are prevented in taking an actived part in rituals (Hinduism)?
actived
['actives', 'active', 'activate', "active's", 'acted', 'activated', 'actively', 'activity'] 


Do inkjet printers use color ink when printing black and white documents? If so, why?
inkjet
['ink jet', 'ink-jet', 'inject', 'inkiest', 'inked', 'anklet', 'injects', 'injector', 'infect', 'ingest', 'insect', 'inquest', 'oinked', 'ingot'] 


What is my puk code?
puk
['Puck', 'puck', 'puke', 'pk', 'punk', 'Pu', 'UK', 'pug', 'auk', 'pub', 'pud', 'pun', 'pup', 'pus', 'put', 'yuk', "Pu's"] 


How do you feel when someone upvotes your answer on Quora?
upvotes
['up votes', 'up-votes', 'outvotes', 'pivots', 'updates', "pivot's", 'invites', 'upsets', 'uproots', 'upshots', "Epcot's", "update's", "upshot's", 'upsides', "invite's", 'opiates', 'uploads', "upset's", "upside's", "opiate's"] 


How far would you go for love? Should I wait for the one I love ir move on?
ir
['Ir', 'IE', 'OR', 'Ur', 'or', 'RI', 'I', 'i', 'IRA', 'IRS', 'IT', 'Ira', 'It', 'ire', 'irk', 'it', 'air', 'R', 'r', 'AR', 'Ar', 'ER', 'Er', 'IA', 'Ia', 'Io', 'er', 'ii', 'Dir', 'Mir', 'Sir', 'cir', 'fir', 'sir', 'ID', 'IL', 'IN', 'IQ', 'IV', 'In', 'id', 'if', 'in', 'is', 'iv', 'ix', 'RR', 'yr', 'BR', 'Br', 'Cr', 'Dr'] 


When travelling to a new region is it better to immerse yourself in 1–2 cities or to see as many cities as you can cram in?
travelling
['travel ling', 'travel-ling', 'traveling', 'travailing', 'travelings', 'raveling', 'graveling', 'driveling', 'grovelling', 'traversing', 'trawling', "traveling's", 'reveling', 'trailing', 'trilling', 'trolling'] 


What would be the estimated cost of repairing the cracked screen of iphone 6?
iphone
['iPhone', 'phone', 'siphon', 'phony', 'earphone'] 


Should I repeat 2nd year in college, or find a new college? It's a 5 year course.
nd
['ND', 'Nd', 'Ned', 'nod', 'MD', 'Md', 'NF', 'NS', 'D', 'N', 'd', 'n', 'NT', 'Ind', 'and', 'end', 'ind', 'DD', 'NE', 'NW', 'NY', 'Na', 'Ne', 'Ni', 'No', 'dd', 'no', 'nu', 'yd', 'AD', 'CD', 'Cd', 'Ed', 'FD', 'Gd', 'ID', 'JD', 'NB', 'NC', 'NH', 'NJ', 'NM', 'NP', 'NR', 'NV', 'NZ', 'Nb', 'Np', 'OD', 'PD'] 


I wish to simulate a fake location for an app running on my iOS device. How do you fake a GPS location for iOS without jailbreaking as a non-coder?
jailbreaking
['jail breaking', 'jail-breaking', 'jailbreak', 'jailbreaks', 'lawbreaking', "jailbreak's", 'calibrating'] 


Which is the best programing language for tcs?
programing
['programming', 'programmings', "programming's", 'program', 'progressing', 'deprogramming', 'reprogramming', 'programs', 'proclaiming', 'procreating', "program's", 'programmer', 'programmed', 'procuring'] 


Which is the best programing language for tcs?
tcs
["Tc's", 'tics', 'TVs', 'Cs', 'Tc', 'cs', 'ts', "T's", 'JCS', 'PCs', 'tbs', "tic's", "TLC's", "TV's", "C's", "CT's", "DC's", "Ta's", "Te's", "Ti's", "Ty's", "ti's", "AC's", "Ac's", "BC's", "PC's", "SC's", "Sc's", "TB's", "Tb's", "Tl's", "Tm's", "Th's"] 


What is the VTU 1st sem exam time table of 2016 (Dec)?
sem
['seam', 'seem', 'semi', 'Sm', 'stem', 'Dem', 'Sen', 'sen', 'SE', 'Se', 'EM', 'em', 'SAM', 'Sam', 'sum', 'sea', 'see', 'sew', 'REM', 'SEC', 'Sec', 'Set', 'fem', 'gem', 'hem', 'rem', 'sec', 'seq', 'set', 'sex', "SE's", "Se's"] 


How do I send message from one Android phone to another Android phone through bluetooth?
bluetooth
['Bluetooth', 'blue tooth', 'blue-tooth', "Bluetooth's"] 


What are some examples of deuteromycota and how are they formed?
deuteromycota
['detract', 'detractor', 'detriment'] 


How do I find the zeros of the polynomial function [math]f(x)=\dfrac{1}{2}x^{3}-3x[/math]?
dfrac
['Dirac', 'Draco', 'defrock', 'defray', 'Dvorak', 'drag', 'frag', 'Doric', 'Duroc', 'diffract', 'Africa', 'defraud', 'defrays', 'Drake', 'drake', 'freak', 'frock', 'track', 'Derick', 'drug', 'freq', 'frig', 'frog'] 


Can you debeak cockerels at 8 months old?
debeak
['debark', 'Debra', 'beak', 'debar', 'Derek', 'debarks', 'debug', 'debunk', 'Debian', 'Debora', 'dybbuk', 'Debs', 'debars', 'debs', 'desk', 'daybreak', 'deb', 'tieback', 'Dubcek', 'deejay', "deb's", 'debase', 'debate', 'Rebekah', 'Beck', 'beck', 'berk', 'deck', 'teabag', 'teak', 'tiebreak', 'Reebok', 'debt', 'Debby', 'Decca', 'Dubai', 'decay', 'bedeck'] 


Which car has good build quality in india?
india
['India', 'indie', 'Indian', 'Indira', 'Ind', 'ind', 'Indy', 'Oneida', 'Indra', 'Inuit', 'innit', 'Indies', 'indies', 'Nadia', 'Enid', 'Anita', 'Ida', 'Ina', "India's", 'Indiana', 'indigo', 'indite', 'indium', 'and', 'end', 'int', 'Hindi', 'Linda', 'kinda', 'idea'] 


Why are my bestfriend still ignoring me?
bestfriend
['best friend', 'best-friend', 'befriend', 'boyfriend', 'bestirred', 'bestrewn', 'bestride', 'restrained', 'bestridden', 'bestirring', 'bestrewed', 'bestriding', 'Bertrand'] 


How many sponsored candidates are shortlisted for CMC vellore?
vellore
['velour', 'Valerie', 'Valarie', 'Velcro', 'Weller', 'lore', 'wellie', 'velours', 'valor', 'velar', 'Villon', 'allure', 'galore', 'velars', 'vellum', 'Valery', 'Loire', 'Lorie', 'Lorre', 'lire', 'verier', 'wore', 'vulture', 'welfare', 'Mallory', 'pillory', 'verdure', 'village', "velour's", 'viler', 'Elroy', 'valley', 'volley', 'Lora', 'Lori', 'Vela', 'Waller', 'lure', 'lyre', 'valuer', 'veal', 'veil', 'vela', 'well', 'were', "valor's", "velar's", "we're", "Weller's", "we'll"] 


I argued with my gf that she made friends with the person that hit on her. She said I'm a control freak and she wants her freedom. What should I do?
gf
['ff', 'Gd', 'HF', 'Hf', 'hf', 'F', 'G', 'f', 'g', 'CF', 'Cf', 'cf', 'GA', 'GE', 'GI', 'GU', 'Ga', 'Ge', 'go', 'AF', 'GB', 'GM', 'GP', 'Gk', 'Gr', 'NF', 'RF', 'Rf', 'SF', 'VF', 'bf', 'gm', 'gr', 'gs', 'gt', 'if', 'of', 'pf', 'sf', "G's"] 


Is it true that in order to be part of an elite or just rich and successful, you have to join a secret organisation otherwise no chance at all?
organisation
['organization', 'organizations', "organization's", 'organizational', 'reorganization'] 


Why is sandeep maheshwari not on Quora?
sandeep
['sander', 'Sanders', 'sanders', 'sanded', 'sandier', 'Sand', 'Saunders', 'sand', "sander's", 'sender', 'sunder', 'Sandra', 'Sandy', 'sandy', 'steep', 'sands', 'sundae', "sand's", 'sandal', 'sandpit', "Sandy's"] 


Why is sandeep maheshwari not on Quora?
maheshwari
['Maharashtra', 'hardware', 'dishware', 'Mahavira', 'hatchway', 'Manchuria', 'hatchways', 'hatchery', 'showery', 'matchwood', 'haywire', "hatchway's"] 


What are some cute shounen ai series?
shounen
['shone', 'showmen', 'shorten', 'Shannon', 'shine', 'shining', 'shown', 'shogun', 'shunned', 'shun', 'shiner', 'shinning', 'shined', 'shines', 'shrine', 'Shane', 'Shaun', 'sheen', 'shuns', 'shunt', 'Shriner', 'shinned', 'shouting', 'showman', 'Shauna', 'chosen', 'shaken', 'shaven', 'Shawnee', "Shaun's", 'sharpen', 'shebeen', "shine's", "Shane's", "Shauna's"] 


What are some cute shounen ai series?
ai
['AI', 'IA', 'Ia', 'Au', 'air', 'A', 'I', 'a', 'i', 'AIs', 'Ali', 'aid', 'ail', 'aim', 'Si', 'AA', 'AR', 'Ar', 'ah', 'aw', 'ii', 'oi', 'CAI', 'Mai', 'AB', 'AC', 'AD', 'AF', 'AK', 'AL', 'AM', 'AP', 'AV', 'AZ', 'Ac', 'Ag', 'Al', 'Am', 'As', 'At', 'Av', 'ab', 'ac', 'ad', 'am', 'an', 'as', 'at', 'av', 'ax'] 


What is an actinomorphic flower?
actinomorphic
['autonomic', 'agronomic', 'economic', 'actinium', "actinium's", 'ergonomic', 'academic'] 


What words rank the highest on Dictionary.com's difficulty index?
com's
["coma's", "come's", "corm's", "Cm's", "Qom's", "cam's", 'comas', 'comes', "cum's", "comma's", 'corms', "comb's", "comp's", "comer's", "con's", "CO's", "Co's", "Jim's", "Kim's", 'comers', 'commas', "om's", 'cams', 'cums', 'Combs', "Coy's", 'combs', 'comps', "coo's", "cos's", "cow's", "ROM's", "Tom's", "cob's", "cod's", "cog's", "cop's", "cot's", "mom's", "tom's", 'Camus', "gem's", "gum's", "gym's", "jam's", 'coma', 'cons', 'Com', 'com', 'cos', 'oms', 'Como', 'comb', 'come', 'comm', 'coos'] 


How many minutes of cardio a day should I do?
cardio
['cardie', 'card', 'Cardin', 'caddie', 'carder', 'Cardozo', 'Cato', 'radio', 'CAD', 'Casio', 'cad', 'Cardiff', 'Claudio', 'cardiac', 'cardies', 'carding', 'cart', 'cord', 'curd', 'Cadiz', 'Cartier', 'cards', 'Carlo', 'audio', 'cargo', 'carpi', 'curio', "card's", 'carded'] 


Who do I activate the dlc of skyrim in a laptop? Ive got it working on my computer after doing the command lines, but did the same on laptop, to no use..
dlc
['LDC', 'DC', 'LC', 'dc', 'TLC', 'DEC', 'Dec', 'doc'] 


Who do I activate the dlc of skyrim in a laptop? Ive got it working on my computer after doing the command lines, but did the same on laptop, to no use..
skyrim
['sky rim', 'sky-rim', 'scrim', 'scrum', 'skim', 'sacrum', 'scram', 'strum', 'squirm', 'Sikkim', 'scrimp', 'scrims', 'serum', 'grim', 'scream', 'Seagram', 'scrip', "scrim's"] 


Where do I find 3ds emulator for android?
ds
["D's", 'DOS', 'Dis', 'dis', 'dos', 'SD', 'DA', 'DD', 'DPs', 'DST', 'SS', 'dd', 'DDS', 'dds', 'D', 'S', 'd', 's', 'dz', 'ts', 'IDs', 'ODs', 'ads', 'eds', 'ids', 'DE', 'DI', 'Di', 'Du', 'Dy', 'do', 'As', 'BS', 'Cs', 'DC', 'DH', 'DJ', 'DP', 'Dr', 'Es', 'HS', 'KS', 'Ks', 'MS', 'Ms', 'NS', 'OS', 'Os', 'PS'] 


Is it necessary to unlock bootloader before rooting Android phones?
bootloader
['boot loader', 'boot-loader', 'boatload', 'bootlace', 'boatloads', 'bottled', 'bootlegger', 'bolder', 'Boulder', 'bloater', 'bottler', 'boulder'] 


What is bss engineer?
bss
["BS's", 'BSA', 'Bass', 'Bess', 'bass', 'boss', 'buss', 'BS', 'BSD', 'SS', 'BSDs', "B's", 'BBS', 'bis', 'bus', 'SSS', 'bps', 'bxs', 'USS', 'ass', "BA's", "Ba's", "bus's", "S's", "Sb's", "Be's", "Bi's", "bi's", "by's", "PBS's", "UBS's", "abs's", "BC's", "BM's", "Bk's", "Br's", "MS's", "OS's", "PS's", "US's", "BB's"] 


Is it possible to do CA after 12th Science?
th
['Th', 'Thu', 'the', 'tho', 'thy', 'THC', 'Rh', 'HT', 'ht', 'H', 'T', 'h', 't', 'nth', 'Ch', 'OH', 'TA', 'Ta', 'Te', 'Ti', 'Tu', 'Ty', 'ah', 'ch', 'eh', 'oh', 'pH', 'sh', 'ta', 'ti', 'to', 'uh', 'DH', 'NH', 'TB', 'TD', 'TM', 'TN', 'TV', 'TX', 'Tb', 'Tc', 'Tl', 'Tm', 'tn', 'tr', 'ts', "Th's", "T's"] 


How can I hack someones whatsapp account?
whatsapp
['whats app', 'whats-app', 'whats', 'whatsit', "what's", 'WASP', 'WATS', 'wasp', 'Watson', 'whets', 'whits', 'whitecap', "WATS's", "wheat's", 'Watts', 'waits', 'warts', 'watts', 'Winesap', 'Watusi', 'Whites', "whit's", 'whites', 'vats', 'wads', 'wets', 'wits', "wait's", "wart's", "watt's", "VAT's", "White's", "vat's", "wad's", "wet's", "white's", "wit's"] 


What is your favourite anime character and why?
favourite
['favorite', 'favorites', 'favored', "favorite's", 'fluorite'] 


What is your favourite anime character and why?
anime
['anise', 'Annie', 'Amie', 'anemia', 'name', 'Aimee', 'Anne', 'Nome', 'Angie', 'Annam', 'aim', 'animate', 'anytime', 'animal', 'animus', 'gnome', 'anode', 'acme', 'ante', 'airmen', 'Anita', 'Niamey', 'anion', 'unite', 'Nam', 'Amen', 'amen', 'enema', 'enemy', 'Amer', 'Arnhem', 'anemic', 'anew', 'anthem', 'AM', 'Ainu', 'Am', 'NM', 'am', 'an', "I'm"] 


How do I start a solar energy business in egypt?
egypt
['Egypt', "Egypt's"] 


Can my PC with specs 2 GB DDR2@ RAM, Intel Core Duo 3.5 GHz, NVidia GeForce GT 610 3gb DDR3 run Assassin's Creed Syndicate and GTA V?
gb
['GB', 'gab', 'gob', 'B', 'G', 'b', 'g', 'CB', 'Cb', 'KB', 'Kb', 'QB', 'KGB', 'BB', 'GA', 'GE', 'GI', 'GU', 'Ga', 'Ge', 'Yb', 'go', 'AB', 'GM', 'GP', 'Gd', 'Gk', 'Gr', 'MB', 'Mb', 'NB', 'Nb', 'OB', 'Ob', 'Pb', 'Rb', 'Sb', 'TB', 'Tb', 'ab', 'dB', 'db', 'gm', 'gr', 'gs', 'gt', 'lb', 'ob', 'vb', "GB's"] 


Why India does not have friendly relations with it's neighbouring countries?
neighbouring
['neighboring', 'neighbored', 'Behring', 'boring', 'newborn', 'gibbering', 'numbering', 'neutering', 'nurturing', 'laboring', 'nibbling', 'burring', 'neuron', 'debarring', 'harboring', 'nickering', 'baring'] 


How do I find the phenotypic ratio?
phenotypic
['phenotype', 'phonetic', 'nitpick', 'nutpick', 'handpick', 'fanatic'] 


You have given all statement as correct in UNCCD question in CSE prelim 2016. While many coachings have taken 2nd statement as wrong.?
coachings
['catchings', 'coaching', 'caching', "poaching's", 'couching', 'coatings', 'teachings', 'touchings', "coating's", "teaching's"] 


You have given all statement as correct in UNCCD question in CSE prelim 2016. While many coachings have taken 2nd statement as wrong.?
nd
['ND', 'Nd', 'Ned', 'nod', 'MD', 'Md', 'NF', 'NS', 'D', 'N', 'd', 'n', 'NT', 'Ind', 'and', 'end', 'ind', 'DD', 'NE', 'NW', 'NY', 'Na', 'Ne', 'Ni', 'No', 'dd', 'no', 'nu', 'yd', 'AD', 'CD', 'Cd', 'Ed', 'FD', 'Gd', 'ID', 'JD', 'NB', 'NC', 'NH', 'NJ', 'NM', 'NP', 'NR', 'NV', 'NZ', 'Nb', 'Np', 'OD', 'PD'] 


Is it possible to turn off indicator light on a dahua camera?
dahua
['Doha', 'Dachau', 'dahlia', 'DH', 'dacha', 'Dhaka', 'aha', 'Bahia', 'Dada', 'Dana', 'Oahu', 'data', 'daub', 'dhow', 'Darla', 'sadhu', 'duh', 'Tahoe', 'Day', 'day', 'dash', 'dual', 'DA', 'Du', 'Ha', 'ha', 'Douay', 'HUD', 'hut', 'DAR', 'DEA', 'DOA', 'DUI', 'Donahue', 'Hui', 'due', 'duo', 'hue', 'huh', 'tau', 'Idaho', "Doha's"] 


How do I can stop hairfall?
hairfall
['hair fall', 'hair-fall', 'hairball', 'Haifa', "Haifa's", 'Harrell', 'airfoil', 'halal', 'Hamill', 'harmfully', 'Hill', 'fall', 'hill', 'harmful', 'airflow', 'befall', 'hardly', 'jarful', 'hatefully', 'hateful', 'highball', 'fairly', 'Rafael', 'barfly', 'earful', 'herbal', 'larval'] 


What is the opposite of "homebird"?
homebird
['home bird', 'home-bird', 'homebody', 'homeward', 'homered', 'homeboys', 'homed', 'homeboy', 'homburg', 'hominid', 'hombre', 'morbid', 'Lombard', 'bombard', "homeboy's", 'howbeit', 'Humberto', 'bombed', 'combed', 'hotbed', 'rhomboid', 'tombed', 'humid', "homebody's"] 


What are the good websites to learn C programming for begineer?
begineer
['beginner', 'begone', 'beguine', 'Begin', 'begin', 'beginners', 'bargainer', 'begun', 'begging', 'beguines', 'began', 'Ginger', "beginner's", 'begins', 'ginger', "beguine's"] 


How can I get MOOC/E-learning through online web and video courses related to filmmaking & production developed by Indian professors?
filmmaking
['film making', 'film-making', 'filming', 'flaking', 'filmmaker', 'flanking', 'flaming', 'filmmakers', 'flicking', 'flummoxing', 'lawmaking', 'flunking', 'Fleming', 'flecking', 'flocking', 'lovemaking', "filmmaker's"] 


Is there any way to get rid of gynecomastia?
gynecomastia
['genomes', 'gangsta', 'incomes', 'newcomers', 'noncoms', 'consortia', "glaucoma's", "genome's", "noncom's", "newcomer's", 'juncos', 'incomers', 'enigmas', "junco's", "income's", 'minicams', 'Banjarmasin', 'ginormous', 'condoms', "enigma's", "condom's", "ginkgo's", "gingham's", "minicam's"] 


What are the best available smartphones gadgets?
smartphones
['smart phones', 'smart-phones', 'smartens', 'smartness', 'symphonies', 'semitones', 'Stephens', 'smatterings', 'Sumatrans', "semitone's", "symphony's", "Stephan's", "Sumatran's", "smartness's", "smattering's", "Stephanie's"] 


Why hasn't Gayle Laakmann McDowell/careercup created a MOOC or a coding bootcamp?
careercup
['career cup', 'career-cup', 'crackup', 'creep', 'creepy', 'recoup', 'reequip', 'carrycot', 'Creek', 'creek', 'croup', 'correct', 'crackups', 'reoccupy', 'Creeks', 'creeks', 'creeper', 'crocus', 'preoccupy', 'crepe', 'Caracas', "Creek's", 'breakup', "creek's", 'Arequipa', 'crape', 'caregiver', 'Garrick', 'croupy', 'recopy', 'cracker', 'crapper', 'creepier', 'cracks', 'crackers', "crackup's", 'Crick', 'Greek', 'creak', 'crick', 'crock', 'group', "cracker's", "crack's"] 


Why hasn't Gayle Laakmann McDowell/careercup created a MOOC or a coding bootcamp?
bootcamp
['boot camp', 'boot-camp', 'bitmap', 'decamp', 'blowlamp', 'camp', 'tamp', 'boatman', 'scamp', 'stamp', 'tramp', 'bottom', 'became', 'toecap', 'doorjamb', 'encamp', 'bottoms', 'Bertram', 'outcome', 'outcrop', 'bookmark', 'bookshop', "bottom's", "Bertram's"] 


What is latency in telecom?
telecom
['talcum', 'telex', 'welcome', 'telegram', 'LCM', 'TLC', 'telecommute', 'Tacoma', 'talc', 'Holcomb', 'locum', "TLC's", 'Telugu', "talc's", "talcum's"] 


If you screenshot someone's Instagram video, will they get notified that you screenshotted it?
screenshot
['screen shot', 'screen-shot', 'screens hot', 'screens-hot', 'screens', 'screened', 'screeched', "screen's", 'scrunched', 'screen', 'serenest', 'screechy', 'screech', 'scrunchy', 'screenwriter', 'scrunch', "screech's", 'screeches', 'screening', "scrunch's"] 


If you screenshot someone's Instagram video, will they get notified that you screenshotted it?
screenshotted
['screens hotted', 'screens-hotted', 'screeched', 'scrunched', 'screenwriter', 'screenwriters', 'crenelated', "screenwriter's", 'scrimshawed'] 


What is the effect of hypodensity of white matter in parietal lobe of brain?
hypodensity
['hypo density', 'hypo-density', 'hypotenuse', 'hypotenuses', 'hedonist', 'hypnotist', 'hypertensive', "hypotenuse's"] 


Cochin to London etihad is the change over time of 1hr sufficient in Dubai?
etihad
['Erhard', 'ETD', 'egghead', 'towhead', 'airhead', 'etude', 'redhead', 'edited', 'Utahan', 'ahead', 'bedhead', 'eddied', 'oohed', 'Edward', 'edit', 'Deadhead', 'deadhead', 'Earhart', 'Godhead', 'edified', 'godhead', 'aided', 'atilt', 'edict', 'attired', 'editor'] 


Jawed habib haircut prices?
habib
['ha bib', 'ha-bib', 'habit', 'Harbin', 'hubbub', 'Bib', 'bib', 'nabob', 'Haber', 'Bob', 'HBO', 'bob', 'bub', 'hob', 'hub', 'harbor', 'hobnob', 'barb', 'hobby', 'hobo', 'hubby', 'Hobbs', 'hobbit', 'Bobbi', 'Heb', 'habitue', 'Barbie', 'babier', 'barbie', 'baobab', 'babe', 'baby', 'Hebe', 'herb', 'hobs', 'hubs', 'Huber', "hob's", "hub's", "Haber's"] 


Does any one have ebook of answers of wren and Martin grammer and composition?
ebook
['book', 'Bork', 'bock', 'Ebro', 'obj', 'Biko', 'Bioko', 'Ebola', 'Ebony', 'ebony', 'evoke', 'aback', 'oik', 'Beck', 'Bk', 'Booker', 'OK', 'beak', 'beck', 'berk', 'bk', 'bookie', 'oboe', 'biog', 'overbook', 'Reebok', 'yearbook', 'Eco', 'Ibo', 'bog', 'ebb', 'eek', 'ego', 'oak', 'elk', 'embark', 'ABC', 'Borg', 'Buck', 'EEOC', 'Eggo', 'Eyck', 'back', 'bark', 'boga', 'buck', 'eBay', 'ergo'] 


Does any one have ebook of answers of wren and Martin grammer and composition?
grammer
['crammer', 'grammar', 'grimmer', 'Grammy', 'Kramer', 'grimier', 'groomer'] 


What is [math]x[/math] if [math]x+\left(\dfrac{1}{x}\right) =0[/math]?
dfrac
['Dirac', 'Draco', 'defrock', 'defray', 'Dvorak', 'drag', 'frag', 'Doric', 'Duroc', 'diffract', 'Africa', 'defraud', 'defrays', 'Drake', 'drake', 'freak', 'frock', 'track', 'Derick', 'drug', 'freq', 'frig', 'frog'] 


Whenever its about “her” its a very special feeling. Tried hard to forget her but she's alwys spcl. Should I stop talking to her even as a friend ?
alwys
['alleys', 'awls', 'ales', 'always', "awl's", "ale's", 'allays', 'alloys', "ally's", "Al's", 'Alas', 'alas', "alley's", 'alias', "all's", 'aloes', 'Alyssa', 'allows', 'ails', 'also', 'laws', 'lays', "alloy's", "aloe's", 'alts', 'owls', "AOL's", 'Alisa', 'Alyce', "law's", "owl's", "Alar's", 'Alps', "Eloy's", 'albs', 'ally', 'alms', 'alps', "lay's", "alb's", "alp's", "Elway's", "Orly's"] 


Whenever its about “her” its a very special feeling. Tried hard to forget her but she's alwys spcl. Should I stop talking to her even as a friend ?
spcl
['SPCA', 'Spock', 'speck', 'spec', 'spic', 'Spica', 'sepal', 'spell', 'spiel', 'spill', 'spoil', 'spool', 'suppl', 'specs', 'spics', "spec's"] 


My maths have become extremely weak and I am in class 12th. How can I improve my maths so that I can clear my JEE exams next year?
th
['Th', 'Thu', 'the', 'tho', 'thy', 'THC', 'Rh', 'HT', 'ht', 'H', 'T', 'h', 't', 'nth', 'Ch', 'OH', 'TA', 'Ta', 'Te', 'Ti', 'Tu', 'Ty', 'ah', 'ch', 'eh', 'oh', 'pH', 'sh', 'ta', 'ti', 'to', 'uh', 'DH', 'NH', 'TB', 'TD', 'TM', 'TN', 'TV', 'TX', 'Tb', 'Tc', 'Tl', 'Tm', 'tn', 'tr', 'ts', "Th's", "T's"] 


What is the name of the song in which picturized on Madhuri Dixit and Ranbir Kapoor and is from which movie?
picturized
['pictured', 'pictures', 'pasteurized', "picture's", 'factorized', 'cauterized'] 


What is the function of nucleoplasm in a plant cell?
nucleoplasm
['neoplasm', 'nucleolus', "nucleolus's"] 


Could CAT exam and MAT exam take place on same day (4th December 2016)?
th
['Th', 'Thu', 'the', 'tho', 'thy', 'THC', 'Rh', 'HT', 'ht', 'H', 'T', 'h', 't', 'nth', 'Ch', 'OH', 'TA', 'Ta', 'Te', 'Ti', 'Tu', 'Ty', 'ah', 'ch', 'eh', 'oh', 'pH', 'sh', 'ta', 'ti', 'to', 'uh', 'DH', 'NH', 'TB', 'TD', 'TM', 'TN', 'TV', 'TX', 'Tb', 'Tc', 'Tl', 'Tm', 'tn', 'tr', 'ts', "Th's", "T's"] 


Is government liasioning legal in India?
liasioning
['visioning', 'fashioning', 'rationing', 'loaning', 'cushioning', 'lining', 'cautioning', 'lashing', 'leaning', 'limning', 'lessening', 'loosening', 'motioning', 'chaining', 'learning', 'leashing', 'likening', 'livening', 'leavening', 'lightning'] 


Which is the best SSC and banking training insititute in Chandigarh?
insititute
['institute', 'instituter', 'instituted', 'institutes', 'instituters', "institute's", "instituter's"] 


Why does India sabotage and badmouth all economic projects which its neighbouring counries have in the world?
neighbouring
['neighboring', 'neighbored', 'Behring', 'boring', 'newborn', 'gibbering', 'numbering', 'neutering', 'nurturing', 'laboring', 'nibbling', 'burring', 'neuron', 'debarring', 'harboring', 'nickering', 'baring'] 


Why does India sabotage and badmouth all economic projects which its neighbouring counries have in the world?
counries
['countries', 'counties', 'cornrows', 'couriers', 'Canaries', 'canaries', 'curies', 'canneries', 'corries', 'cowries', 'curries', "cornrow's", 'coteries', "courier's", "curie's", "Connie's", "cowrie's", "coterie's"] 


When will moto G3 (Moto G 3rd gen 2015) get Android 7.0 (Nougat) update?
moto
['moot', 'mo to', 'mo-to', 'motor', 'motto', 'mot', 'Moro', 'moo', 'Mott', 'mote', 'mots', 'Moho', 'Soto', 'Toto', 'mono', 'moth', "mot's"] 


Prove that SNR of power = (SNR of voltage) sequare?
sequare
['square', 'squarer', 'secure', 'squire', 'squared', 'squares', 'Esquire', 'esquire', 'securer', 'Segre', 'Sucre', 'scare', "square's"] 


Why was cyrus mistry removed?
cyrus
['Cyrus', 'Cyprus', 'cirrus', 'Ceres', 'citrus', 'cerise', 'cruse', 'yrs', "cry's", 'syrups', 'Caruso', 'chorus', 'circus', 'Sirius', 'serous', "Cr's", 'Cruz', 'Grus', "Sr's", "Zr's", "Cyprus's", "Ru's", "Syria's", "cirrus's", "Ceres's", "syrup's"] 


Why was cyrus mistry removed?
mistry
['Misty', 'mastery', 'misty', 'mystery', 'misery', 'Mister', 'mister', 'moisture', 'musty', 'ministry', 'mist', 'mistral', 'history', 'misters', 'mistily', 'moistly', 'maestro', 'mists', "mist's", "mister's", "Misty's"] 


I am frightened of Arvind kejriwal, he may kill me, What shall I do?
kejriwal
['kraal', 'krill', 'Gujranwala', 'clerical', 'bejewel', 'crewel', 'Geritol', 'Jewel', 'crawl', 'jewel', 'scribal', 'Karol', 'Kigali', 'growl', 'rowel', 'Quirinal', 'mercurial', 'scrawl', 'Gabriela', 'Kringle', 'drywall', 'general', 'trowel', 'coral', 'gorilla', 'grill', 'Cornwall', 'firewall', 'guerrilla', 'corral', 'jackal', 'Cabral', 'karakul', 'caraway'] 


Is Bihar really developing under mahagathbandhan sarkar?
mahagathbandhan
[] 


Is Bihar really developing under mahagathbandhan sarkar?
sarkar
['Sakai', 'sarky', 'sake', 'Dakar', 'ska', 'Saar', 'sarge', 'Sara', 'starker', 'Saki', 'darker', 'sack', 'saga', 'scar', 'Sanka', 'seeker', 'Samar', 'Sarah', 'parka', 'sager', 'sugar', 'Barker', 'Parker', 'barker', 'marker', 'scalar', 'soak', 'sicker', 'sucker', 'Sakha', 'Saks', 'Stark', 'sacra', 'sear', 'snark', 'spark', 'stark', 'SK', 'Shaka', 'shark', 'sparkier', 'Asoka', 'skier', 'Ark', 'Serra', 'aka', 'ark', 'sparky', 'Osaka', 'SAC', 'sac', 'sag', 'Salk', 'Sask', 'sank', 'soar', 'Garza', "ska's"] 


Is the dynamic/flexi-pricing harsh on middle class people?
flexi
['flex', 'Felix', 'flax', 'flux', 'flecks', 'flexing', 'lxi', 'fleck', "flex's", 'flexed', 'flexes', 'Alexei', 'Alex', 'clxi', 'fleas', 'flees', "fleck's", "flax's", "flea's", "flux's"] 


How much do olympic gold medalists earn?
olympic
['Olympic', 'Olympics', 'Olympia', 'Olympiad', 'Olympian', 'Olympias', "Olympia's"] 


What is reactance in a capacitor?
reactance
['reluctance', 'reactant', 'reactants', "reactant's", 'remittance', 'radiance', 'reacting'] 


What are some math related working models for class 10th?
th
['Th', 'Thu', 'the', 'tho', 'thy', 'THC', 'Rh', 'HT', 'ht', 'H', 'T', 'h', 't', 'nth', 'Ch', 'OH', 'TA', 'Ta', 'Te', 'Ti', 'Tu', 'Ty', 'ah', 'ch', 'eh', 'oh', 'pH', 'sh', 'ta', 'ti', 'to', 'uh', 'DH', 'NH', 'TB', 'TD', 'TM', 'TN', 'TV', 'TX', 'Tb', 'Tc', 'Tl', 'Tm', 'tn', 'tr', 'ts', "Th's", "T's"] 


How do I know if my spouse is my soulmate?
soulmate
['soul mate', 'soul-mate', 'sulfate', 'simulate', 'sublimate', 'solute', 'cellmate', 'slate', 'palmate', 'schoolmate', 'summat', 'stalemate', 'slammed', 'slummed', 'Slater', 'Sumter', 'seatmate', 'sodomite', 'salute', 'slat'] 


How can I change my snapdeal account's mobile number?
snapdeal
['snap deal', 'snap-deal', 'sandal', 'snapped', 'sniped', 'Snapple', 'snidely', 'sundial', 'snappily', 'snipped', 'snooped', 'snippet', 'spatula', 'snippets', "snippet's"] 


Why is 'fahrenheit 451'  perceived as dystopia?
fahrenheit
['Fahrenheit', 'frenzied', 'freshet', 'freshest', 'forehead', 'frenetic', 'barrenest', 'frontier', 'front', 'Frenches', 'fringed', 'reheat', 'rennet', 'ferniest', 'freest', 'frenzies', 'Fronde', 'baronet', 'fainest', 'preheat', 'forewent', 'frequent', 'forehand', 'freehand'] 


Why is 'fahrenheit 451'  perceived as dystopia?
dystopia
['dustpan', 'stop', 'desktop', 'Dustin', 'doorstop', 'stoop', 'stoup', 'dust', 'distort', 'dustier', 'DST', 'Dusty', 'doorstops', 'dusty', 'despair', 'despot', 'dist', 'dost', 'step', 'despite', "doorstop's", 'stopper', 'doorstep', 'disport'] 


How do I define tasks vs milestones vs deliverables in an agile scrum methodology?
deliverables
['deliverable', "deliverance's", 'deliverance', 'deliveries', 'deliverers', "deliverer's"] 


Where can I buy meldonium in Canada?
meldonium
['melding', 'Melton', 'plutonium', 'millennium', 'melanoma', "Melton's", 'Miltonic', 'melodrama', 'Maldonado', 'melting', 'molding', 'Milton', 'molybdenum', 'moldings', 'platinum', "molding's", 'laudanum'] 


I accidentally took my blood pressure medicine twice, will I be ok?
ok
['OK', 'OJ', 'oak', 'oik', 'KO', 'OKs', 'pk', 'K', 'O', 'k', 'o', 'AK', 'UK', 'ox', 'wok', 'OE', 'OH', 'OR', 'ck', 'oh', 'oi', 'or', 'ow', 'wk', 'Bk', 'Gk', 'Mk', 'OB', 'OD', 'ON', 'OS', 'OT', 'Ob', 'Os', 'Oz', 'SK', 'bk', 'ob', 'of', 'om', 'on', 'op', 'oz', "OK's", "O's"] 


I and my girlfriends private partstouched each other.can she become pregenant?
partstouched
['parts touched', 'parts-touched', 'pastiches', 'pastiche', 'pastured', 'restitched', "pastiche's", 'psyched', 'stitched', 'starched', 'postured', 'persuaded', 'mustached', 'pastorate', 'presorted'] 


I and my girlfriends private partstouched each other.can she become pregenant?
pregenant
['pregnant', 'regnant', 'pregnancy', 'poignant', 'repugnant', 'preeminent'] 


Could we use cherenkov atmosphere radiation (with gamma rays or similar) to image the surface of a planet from here with ground based telescopes?
cherenkov
['Cerenkov', 'chronic', 'shrank', 'shrink', 'shrunk', 'shrinks', "shrink's", 'shrunken'] 


Train Logit Model


In [ ]:
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_curve, auc, roc_curve
# from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import MinMaxScaler

GridSearchCV = sklearn.grid_search.GridSearchCV
train_test_split = sklearn.cross_validation.train_test_split

features = list(set(train_df.columns) - set(['id', 'qid1', 'qid2', 'question1', 'question2', 'is_duplicate']))
scaler = MinMaxScaler().fit(df[features])
X = scaler.transform(df[features])
#X = df[features]
y = df['is_duplicate']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [ ]:
clf = LogisticRegression()
grid = {
    'C': [1e-6, 1e-3, 1e0],
    'penalty': ['l1', 'l2']
}
cv = GridSearchCV(clf, grid, scoring='log_loss', n_jobs=-1, verbose=1)
cv.fit(X_train, y_train)

In [ ]:
cv.score(X_test, y_test)

In [ ]:
0.65004704695703419, 0.65

In [ ]:
colors = ['r', 'g', 'b', 'y', 'k', 'c', 'm', 'brown', 'r']
lw = 1
Cs = [1e-6, 1e-4, 1e0]

plt.figure(figsize=(12, 8))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for different classifiers')

plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')

labels = []
for idx, C in enumerate(Cs):
    clf = LogisticRegression(C=C)
    clf.fit(X_train, y_train)
    print("C: {}, parameters {} and intercept {}".format(
        C, clf.coef_, clf.intercept_))
    fpr, tpr, _ = roc_curve(y_test, clf.predict_proba(X_test)[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=lw, color=colors[idx])
    labels.append("C: {}, AUC = {}".format(C, np.round(roc_auc, 4)))

plt.legend(['random AUC = 0.5'] + labels)
plt.show()

In [ ]:
pr, re, _ = precision_recall_curve(y_test, cv.best_estimator_.predict_proba(X_test)[:,1])
plt.figure(figsize=(12,8))
plt.plot(re, pr)
plt.title('PR Curve (AUC {})'.format(auc(re, pr)))
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.show()

Tentativi di Tagger


In [ ]:
from joblib import Parallel, delayed
import multiprocessing

In [ ]:
def add_tagger(train_df, question, tagger):
    num_cores = multiprocessing.cpu_count()
    train_df[tagger] = pd.Series(
        Parallel(n_jobs=num_cores)(delayed(tag_phrase)(text)
                                   for text in train_df[question])

In [ ]:
def parallel_tagger(df, start, stop):
    step = 10000
    num_cores = multiprocessing.cpu_count()
    return Parallel(n_jobs=num_cores)(
        delayed(sum_quests_and_tag)(df.loc[i:i+step])
        for i in range(start, stop, step))

Proprietà transitiva: q è duplicata di B e anche di C allora B e C sono tra loro duplicati


In [ ]:
df = train_df[train_df.is_duplicate == 1]

qid1_qid2_dup = (df[['qid1',
                     'qid2']].groupby('qid1')['qid2'].apply(list)).to_dict()
qid2_qid1_dup = (df[['qid1',
                     'qid2']].groupby('qid2')['qid1'].apply(list)).to_dict()


def get_value_dict(d, v):
    r = []
    try:
        r = d[v]
    except KeyError:
        pass
    return r


def get_qid_grouped(n_g):
    qid_g = get_value_dict(qid1_qid2_dup, n_g).copy()
    qid_g.extend(get_value_dict(qid2_qid1_dup, n_g))
    while True:
        temp = qid_g.copy()
        for l in temp:
            qid_g.extend(get_value_dict(qid1_qid2_dup, l))
            qid_g.extend(get_value_dict(qid2_qid1_dup, l))
        qid_g = list(set(qid_g))
        if len(temp) == len(qid_g): break
    return qid_g


run_on_me = set(list(df.qid1) + list(df.qid2))
group_dict = {}
while len(run_on_me) != 0:
    i = next(iter(run_on_me))
    group_dict[i] = get_qid_grouped(i)
    run_on_me = run_on_me - set(group_dict[i])

# q1 and q2 are never the same question
print(len(df[df.qid1 == df.qid2]))

present_cases1 = set(
    df[['qid1', 'qid2']].apply(axis=1, func=lambda x: (x['qid1'], x['qid2'])))
present_cases2 = set(
    df[['qid1', 'qid2']].apply(axis=1, func=lambda x: (x['qid2'], x['qid1'])))

# check if there are duplicate q1-q2 pairs (q2-q1) -----> It tooked a long time, no duplicate pairs found
# in the original dataframe
# [j for j in present_cases if (j[1], j[0]) in present_cases] # ---> RESULT: []

all_combo = []
for ng in group_dict:
    for i, l in enumerate(group_dict[ng]):
        for j in group_dict[ng][i + 1:]:
            all_combo.append((l, j))
new_combo = list((set(all_combo) - set(present_cases1)) - set(present_cases2))

print(
    len(all_combo),
    len(new_combo),
    len(present_cases1), len(present_cases2), len(new_combo) / len(all_combo))



dq = (df[['qid1', question1]].set_index('qid1',
                                          drop=True)).to_dict()[question1]
dq.update((df[['qid2', question2]].set_index(
    'qid2', drop=True)).to_dict()[question2])

new_df = pd.DataFrame(
    list(map(lambda x: (x[0], x[1], dq[x[0]], dq[x[1]], 1), new_combo)),
    columns=['qid1', 'qid2', question1, question2, 'is_duplicate'])

train_df = pd.concat(axis=0, objs=[train_df, new_df])
train_df.reset_index(drop=True, inplace=True)
train_df['id'] = train_df.index

Check number of 1's in the test on the LB using logloss value for a fixed prediction probability


In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import log_loss

In [47]:
## we use the fact that we know the logloss value of 0.37, user your value for a new try
myp = 1
mylogloss = 28
p = [myp] * 10000
l = np.array([log_loss([1] * r + [0] * (10000 - r), p) for r in range(1, 10000)])
x = np.arange(0.01, 100, 0.01)

In [48]:
plt.plot(x, l, '_')
plt.title('Log Loss vs. Pct. Positve with Constant Prediction of {}'.format(myp))
plt.xlabel('% Positve in LB')
plt.ylabel('Log Loss for Constant Prediction {}'.format(myp))
plt.grid()
plt.show()



In [53]:
df_check = pd.Series(data=np.abs(l - mylogloss), index=x)
print('percentage of 1 in prediction is: {}'.format(x[int(df_check.argmin())]))
df_check.plot();
plt.show()


percentage of 1 in prediction is: 0.19

creazione tagger_counter


In [ ]:
tagger = pd.read_csv('/home/ale/random_program/Quora_double_question/risultato_tagger.csv', encoding='latin1')


def recover_tagg_list(tag_string):
    l = [
        re.split(r'[\'\"],\s[\'\"]', t[2:-1])
        for t in tag_string[1:-2].split('], ')
    ]
    return l

def tagged_ppf_verb(x):
    app = recover_tagg_list(x)
    return ([i[1] for i in app if len(i) > 1])


tagger['new'] = tagger.apply(
    axis=1, func=lambda x: Counter(tagged_ppf_verb(x['tagger'])))
dizionario = tagger.set_index('quest_final')['new'].to_dict()

import pickle


def save_obj(obj, name):
    with open('./' + name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


def load_obj(name):
    with open('./' + name + '.pkl', 'rb') as f:
        return pickle.load(f)


save_obj(dizionario, 'tag_counter')

In [ ]:
dizionario_max = pd.read_csv(
    '/home/ale/random_program/Quora_double_question/tagged_list_counted_solved.csv',
    sep=";",
    encoding='latin1')
dizionario_max = dizionario_max.set_index('name')['tag_recod'].to_dict()
save_obj(dizionario_max, 'tag_max_freq')

In [ ]:
alltag = from_tag_tocluster(tagger[0:100]['tagger'].values)
app = pd.DataFrame(alltag.groupby(['name', 'tag']).size()).reset_index()
app.columns =['name','tag','count']
app.to_csv('.\\taggestlist_count.csv',sep=";")

def recodtag(x):
    if x[0] == 'N':
        return('NOUN')
    elif x[0] == 'V':
        return('VERB')
    elif x[0] == 'J':
        return('JJ')
    else:
        return(x)

app['tag_recod'] = app.apply(axis=1, func = lambda x: recodtag(x.tag))
app2 = pd.DataFrame(app.groupby(['name','tag_recod'])['count'].sum()).reset_index()
app2.columns =['name','tag_recod','count']
app3 = pd.DataFrame(app2.groupby(['name'])['count'].max()).reset_index()
app3 = pd.merge(app3, app2, on =['name','count'])
app3.to_csv('.\\tagged_list_counted_solved.csv',sep=";")
app3

Tuning w2v

** The hyper-parameter choice is crucial for performance (both speed and accuracy):

  • architecture: skip-gram (slower, better for infrequent words) vs CBOW (fast)the training
  • algorithm: hierarchical softmax (better for infrequent words) vs negative sampling (better for frequent words, better with low dimensional vectors)
  • sub-sampling of frequent words: can improve both accuracy and speed for large data sets (useful values are in range 1e-3 to 1e-5)
  • context (window) size: for skip-gram usually around 10, for CBOW around 5
  • alpha is the initial learning rate (will linearly drop to min_alpha as training progresses).
  • min_count = ignore all words with total frequency lower than this.
  • negative = if > 0, negative sampling will be used, the int for negative specifies how many “noise words” should be drawn (usually between 5-20). Default is 5. If set to 0, no negative samping is used.
  • cbow_mean = if 0, use the sum of the context word vectors. If 1 (default), use the mean. Only applies when cbow is used.
  • iter = number of iterations (epochs) over the corpus. Default is 5.
  • batch_words = target size (in words) for batches of examples passed to worker threads (and thus cython routines). Default is 10000.

The model can be stored/loaded via its save() and load() methods, or stored/loaded in a format compatible with the original word2vec implementation via wv.save_word2vec_format() and KeyedVectors.load_word2vec_format().

If you’re finished training a model (=no more updates, only querying), then switch to the gensim.models.KeyedVectors instance in wv

word_vectors = model.wv del model to trim unneeded model memory


In [ ]:
def get_train(m, win, s, sort, m_c, h):
    print('New model:', m, win, s, sort, m_c, h)
    model = word2vec.Word2Vec(
        corpus,
        size=s,
        window=win,
        min_count=m_c,
        workers=4,
        sorted_vocab=sort,
        hs=h,
        iter=10,
        sg=m,
        negative=10)

    df = train_df[[q1, q2, 'is_duplicate']].copy()
    tempvar = df.apply(
        axis=1, func=lambda x: getback_function(pca_vars(x[q1], x[q2], model)))
    tempvar.columns = ['diff_eigenv_pca', 'cos_pca', 'diff_ratio_ecc_pca']
    #  + ['first_pca_'] + str(i) for i in range(0, model.vector_size + 1)
    df = pd.concat([df, tempvar], axis=1)
    df.fillna(0, inplace=True)

    tempvar = df.apply(
        axis=1,
        func=
        lambda x: getback_function(get_W2V_variables(x[q1], x[q2], model)))
    tempvar.columns = [
        'norm_mean_wv', 'norm_sum_wv', 'cos_mean_wv', 'cos_sum_wv'
    ]
    #  + ['vect_diff_'] + str(i) for i in range(0, model.vector_size + 1)
    df = pd.concat([df, tempvar], axis=1)
    df.replace(np.inf, np.nan, inplace=True)
    df.fillna(0, inplace=True)

    eventuali = [
        'question1_clear_1', 'question2_clear_1', 'question1_clear_1_clear_2',
        'question1_clear_1_clear_2', 'question2_clear_1_clear_2',
        'question1_final', 'question2_final'
    ]
    features = list(
        set(df.columns) -
        set(['id', 'qid1', 'qid2', 'question1', 'question2', 'is_duplicate'] +
            eventuali))

    x_train = df[features + ['is_duplicate']]

    model_xgb, roc_auc_test, roc_auc_train, var_imp = train_xgboost(
        x_train, features, 'is_duplicate', params, other_par)

In [ ]:
#150 the best
for s in [120, 135, 150, 165, 180]:
    get_train(0, 5, s, 1, 5, 0)
    
for h in [0, 1]:
    get_train(0, 5, 150, 1, 5, h)

for sort in [-1, 1]:
    get_train(0, 5, 150, sort, 5, 0)

for m in [0, 1]:
    win = 5 if m == 0 else 10
    get_train(m, win, 150, 1, 5, 0)

To speed up most_similar we can use a restriction in the vocab, restriction in the top most similarities don't help

Find the top-N most similar words. Positive words contribute positively towards the similarity, negative words negatively.

This method computes cosine similarity between a simple mean of the projection weight vectors of the given words and the vectors for each word in the model. The method corresponds to the word-analogy and distance scripts in the original word2vec implementation.

If topn is False, most_similar returns the vector of similarity scores.

restrict_vocab is an optional integer which limits the range of vectors which are searched for most-similar values. For example, restrict_vocab=10000 would only check the first 10000 word vectors in the vocabulary order. (This may be meaningful if you've sorted the vocabulary by descending frequency.)


In [ ]:
model.most_similar('house', restrict_vocab=100)

Create data for cluster


In [ ]:
def from_tag_tocluster(tagg_string):
    non_for_cluster = ['SENT', '(', 'CD', ')', ',', ':', '``']
    df_for_cluster = pd.DataFrame([(i[2], i[1])
                                   for t in tagg_string
                                   for i in recover_tagg_list(t) 
                                   if (len(i) == 3) and (i[1] not in non_for_cluster)])
    df_for_cluster.columns = ['name', 'tag']
    df_for_cluster.drop_duplicates(inplace=True)
    df_for_cluster.reset_index(drop=True, inplace=True)
    df_for_cluster['name'] = df_for_cluster.apply(axis=1, func=lambda x:x['name'].lower())
    df_for_cluster.drop_duplicates(inplace=True)
    return df_for_cluster

In [ ]:
df_for_cluster = from_tag_tocluster( df['tagger'].values )
df_for_cluster.to_csv('clusterizzami_sto_vocabolario.csv')

In [ ]:
df_for_cluster = pd.read_csv('clusterizzami_sto_vocabolario.csv', encoding='latin1', usecols=range(1,3))

In [ ]:
tag_verbs = ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
tag_adverbs = ['RB','RBR','RBS']
tag_adjective = ['JJ','JJR','JJS']
tag_nouns = ['NN','NNS','NP','NPS']

In [ ]:
verbs = df_for_cluster[df_for_cluster.apply(axis=1, func=lambda x:x['tag'] in tag_verbs)]['name']
adverbs = df_for_cluster[df_for_cluster.apply(axis=1, func=lambda x:x['tag'] in tag_adverbs)]['name']
adjective = df_for_cluster[df_for_cluster.apply(axis=1, func=lambda x:x['tag'] in tag_adjective)]['name']
nouns = df_for_cluster[df_for_cluster.apply(axis=1, func=lambda x:x['tag'] in tag_nouns)]['name']

verbs = verbs.drop_duplicates().reset_index(drop=True)
adverbs = adverbs.drop_duplicates().reset_index(drop=True)
adjective = adjective.drop_duplicates().reset_index(drop=True)
nouns = nouns.drop_duplicates().reset_index(drop=True)

In [ ]:
len(verbs), len(adverbs), len(adjective), len(nouns)

In [ ]:
'FW': 'Foreign word', --> variable has_foreign_word (and-or q1-q2)
## potenzialmetne utile, da controllare meglio:
'DT': 'Determiner',
'MD': 'Modal',
'SYM': 'Symbol',
'UH': 'Interjection',

In [ ]:
from nltk.corpus import wordnet as wn
def simple_distance(x_hx, y_hy, ix, iy):
    hx, hy = x_hx[1][ix], y_hy[1][iy]
    try:
        xy_c = next(i for j in hx for i in hy if i == j)
    except StopIteration:
        return 1
    n_x, n_y = len(hx), len(hy)
    try:
        return (((hx.index(xy_c) + 1) / n_x) + (
            (hy.index(xy_c) + 1) / n_y)) / 2
    except ValueError:
        return 1


def noun_similarity(x, y, name_sin):
    nhx = len(name_sin[x][1])
    nhy = len(name_sin[y][1])
    return min([
        1 - simple_distance(name_sin[x], name_sin[y], ix, iy)
        for ix in range(0, nhx) for iy in range(0, nhy)
    ])


def gethypernyms(x):
    out = wn.synsets(x, lang='eng')
    if len(out) != 0:
        out = out[0]
    else:
        out = wn.synsets('entity', lang='eng')[0]
    return (out, [l[::-1] for l in out.hypernym_paths()])

In [ ]:
myverbs = sorted( set(verbs.values) )
name_sin = {k:gethypernyms(k) for k in myverbs}
idx_name = {i:myverbs[i] for i in range(0,len(myverbs))} 
name_idx = {k:v for v,k in idx_name.items()}

In [ ]:
from scipy import sparse
import time
def symmetrize(a):
    return a + a.T - np.eye(a.shape[0])

def create_distance_matrix(name_sin, idx_name, mysimilarity):
    start_time = time.time()
    tot = len(idx_name)
    l = [(d, row, col)
         for row in range(0, tot)
         for col in range(row, tot)
         for d in [ mysimilarity(idx_name[row], idx_name[col], name_sin) ]
         if d != 0]
    df = pd.DataFrame(l, columns=['distance', 'row', 'col'])
    data = list(df['distance'].values)
    row = list(df['row'].values)
    col = list(df['col'].values)
    matrixd = symmetrize(sparse.csr_matrix((data, (row, col))).todense())
    print('ho impiegato ' + str((time.time() - start_time) / 60) + ' minuti')
    return 1 - matrixd

In [ ]:
matrixd = create_distance_matrix(name_sin, idx_name, noun_similarity)

In [ ]:
from Clusters_helper import *

In [ ]:
for m in range(1,10):
    print("Using {} min_samples".format(m))
    plot_eps_ncluster(matrixd, 0.1, 1, min_samples=m)

In [ ]:
for eps in np.linspace(0.0001, 1, 10):
    print("Using {} eps".format(eps))
    plot_minsamples_ncluster(matrixd, 0, 25, eps)

In [ ]:
for eps in np.linspace( 0.263231578947, 0.4, 20):
    print("eps used: ", eps)
    db = run_cluster(eps=eps, min_samples=3, X=matrixd)
    if len(set(db.labels_)) == 1:
        print('Only 1 cluster')
        continue
    print_silhouette(matrixd, db.labels_)

In [ ]:
group = pd.DataFrame(verbs, columns=['name'])

X = matrixd
db = run_cluster(eps=0.328016620498, min_samples=3, X=matrixd)
update_group(db, group, name_idx)
cluster = create_cluster_db(group, matrixd, name_idx)
update_group2(group, name_idx, matrixd, cluster)

Ensamble stuff


In [ ]:
train = pd.read_csv('train_basic_vars.csv')
test = pd.read_csv('test_basic_vars.csv')

In [ ]:
test_2 = pd.read_csv('test_isomap_isomappca.csv')

In [ ]:
test_final = pd.concat([test.set_index('test_id'), test_2.set_index('test_id')], axis=1)

In [ ]:
eventuali = [
    'question1_clear_1', 'question2_clear_1', 'question1_clear_1_clear_2',
    'question1_clear_1_clear_2', 'question2_clear_1_clear_2',
    'question1_final', 'question2_final'
]
features = list(
    set(train.columns) -
    set(['id', 'qid1', 'qid2', 'question1', 'question2', 'is_duplicate'] +
        eventuali + ['dist_'+str(i) for i in range(0,10)]))

In [ ]:
xgb_model = ['model_tuned' + str(i) for i in range(0, 5)]
target = 'is_duplicate'
xg_train = xgb.DMatrix(train[features])
xg_test = xgb.DMatrix(test_final[features])

In [ ]:
xgb_preds_test = np.ones(len(test))
xgb_preds_train = np.ones(len(train))

In [ ]:
for m in xgb_model:
    model = xgb.Booster({'nthread':4})
    model.load_model(m)
    xgb_preds_test += model.predict(xg_test)
    print(m)
xgb_preds_test /= len(xgb_model)

In [ ]:
for m in xgb_model:
    model = xgb.Booster({'nthread':4})
    model.load_model(m)
    xgb_preds_train += model.predict(xg_train)
    print(m)
xgb_preds_train /= len(xgb_model)

In [ ]:
#### submission xgboost
submission = pd.DataFrame({'test_id': test['test_id'], 'is_duplicate': xgb_preds_test})
submission.to_csv("submission_xgbfinal.csv", index=False)

NN-xgb ensemble


In [ ]:
trainNN = pd.read_csv('train_predsNN.csv')
testNN = pd.read_csv('submissionNN.csv')
testNN.set_index('test_id',inplace=True)
trainNN.columns = ['preds_NN', 'is_duplicate']

In [ ]:
xgb_train_df = pd.DataFrame({'id': train['id'], 'preds_xgb': xgb_preds_train})
xgb_train_df.set_index('id',inplace=True)

In [ ]:
trainNN['preds_xgb'] = xgb_train_df['preds_xgb']

In [ ]:
for alpha in np.linspace(0, 1, 10):
    n = 'ens_'+str(alpha)
    trainNN[n] = (1.-alpha)*trainNN['preds_NN'] + alpha*trainNN['preds_xgb']
    trainNN[n+'_right'] = (trainNN[n] > 0.5) == trainNN['is_duplicate']
    print(n, sum(trainNN[n+'_right'])/len(trainNN))

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: