In [10]:
from textblob import TextBlob
import pickle

In [11]:
text = "Linda Christian, the first Bond girl, dies at 87 . Widow of Tyrone Power was in TV version of Casino Royale"
blob = TextBlob(text)
blob.noun_phrases


Out[11]:
WordList(['linda', 'bond', 'widow', u'tyrone power', u'tv version', 'royale'])

In [13]:
with open("NEEL_tweets(with_grams).pickle", "rb") as f:
    tweet_corpus = pickle.load(f)

In [76]:
tweet_corpus.keys()


Out[76]:
['93314579924393984',
 '94884467910512640',
 '93141776474456064',
 '91707256726499328',
 '92955019615272961',
 '102380900473507840',
 '93404134618841088',
 '102312601190539265',
 '99288973943390209',
 '102125361248215040',
 '101574290444521472',
 '100658108661112832',
 '72058654651711488',
 '100977986563604480',
 '95953148530790400',
 '91927932758540291',
 '96222040377393152',
 '102000318501490688',
 '92681654858035201',
 '92401321323409409',
 '95969501367111680',
 '92579651679039488',
 '102065579078332417',
 '92481778706690048',
 '92237433697091584',
 '97408964903448576',
 '98431701365178369',
 '93344390839410689',
 '101419346307461120',
 '100152860288880640',
 '93455340603064320',
 '92625159864856577',
 '93548445998129152',
 '92643760508387328',
 '93500846666551296',
 '100571949746495488',
 '96985217134366720',
 '93899167105167360',
 '100712904470429696',
 '94404324922363904',
 '98888035881586691',
 '96004045017321473',
 '95103154651275264',
 '91853498135752704',
 '100689319639007232',
 '93484337873420288',
 '100992810420936704',
 '93745589786902528',
 '99611423281393664',
 '96548633469652992',
 '97665015968047104',
 '101588676915625984',
 '100901337633521664',
 '93708758311632896',
 '91952178360631296',
 '95533101521977344',
 '92110996134576128',
 '91840950862741504',
 '92744177418379265',
 '94500411716808705',
 '100671608204701696',
 '98181224346034176',
 '93923166950395904',
 '93002028334317568',
 '93672807057203201',
 '99670130161745920',
 '94462178547077123',
 '99147499725529088',
 '101478195630587904',
 '98291102519537664',
 '96665996416397313',
 '95906498089189376',
 '94441788412133377',
 '95434223678263297',
 '93154993573593088',
 '92043992925077505',
 '92266760765583360',
 '97458359346806784',
 '95289511801663488',
 '92691023037349889',
 '103209376944357376',
 '92665161021788160',
 '98037368111640576',
 '101029769130414081',
 '100998100411363329',
 '100653917553164288',
 '93429476393893888',
 '101573195743764482',
 '94409462944768000',
 '99659141890576384',
 '93348468701151232',
 '92817093799587841',
 '94575922270838784',
 '91655593139511296',
 '92275090007404544',
 '94709694458900480',
 '99029732041031680',
 '99287434281484288',
 '92689105703550976',
 '97376969657823233',
 '92006871375945728',
 '92884204265811970',
 '101371988911726592',
 '102865437901070336',
 '93273993544597504',
 '92968056053186562',
 '93443989793030144',
 '92446791315890176',
 '94899543874867200',
 '93490469958467584',
 '94518623552552961',
 '102411827916455936',
 '92713454162554880',
 '92284144960286721',
 '95269953741144066',
 '96956789559537665',
 '99556024939331584',
 '93324903985577984',
 '92294980080644096',
 '100529934270873600',
 '92712259708665856',
 '91998635692261376',
 '99653392141533184',
 '96211977642315776',
 '100998532747640832',
 '93080394370519040',
 '96427426464284673',
 '96836366918352896',
 '97819281215074304',
 '95396113888194560',
 '96651234517323776',
 '96564181729423360',
 '94029148615483393',
 '92297618285273088',
 '91708290513383425',
 '92435485066145792',
 '99641382926434304',
 '95438229242773504',
 '92624675829596160',
 '93538364686483456',
 '93905391481651200',
 '94813064473481216',
 '93731915043049473',
 '93031292421144576',
 '93211671123861504',
 '92540726604349440',
 '102087976670076928',
 '92928036990746625',
 '93783806972149761',
 '100925943694110720',
 '91904310308384770',
 '96228351936692224',
 '92990179672985600',
 '94810322493390848',
 '93318722109964288',
 '99087290470629376',
 '100732225204654080',
 '94811433027633152',
 '93305039787012096',
 '93020527530221568',
 '94563016393232384',
 '93497872150036481',
 '98054026175848448',
 '93041278475972608',
 '92504145797451777',
 '93084582966079488',
 '91835092061794304',
 '92099152187633664',
 '91676265702100992',
 '94809680999759872',
 '93639890465595392',
 '93388656202620928',
 '92680294544588800',
 '92945537803886592',
 '93326271056060418',
 '98029817156468736',
 '97311289960968192',
 '97826342103564289',
 '99252118250201089',
 '100978397903192064',
 '94137640999325696',
 '94013224856453120',
 '101808467257470976',
 '92952817727324160',
 '92574271288778752',
 '100951827431948288',
 '93681637950365696',
 '98174256785010688',
 '100709634683305985',
 '94135082436132864',
 '93188706533515264',
 '97339570164731904',
 '101739359849562112',
 '94070517031636997',
 '97096475271634944',
 '91796115715919873',
 '94712231622746112',
 '91803825563910144',
 '97722436182749184',
 '92931106382413824',
 '100903217851928576',
 '97292216472580096',
 '93772486549848064',
 '94859886923481088',
 '92981004804960256',
 '92668398948978688',
 '92755669811339264',
 '101571060847230976',
 '101032158956761088',
 '92768228354424832',
 '93322827813494785',
 '93398463697526784',
 '94558606829166595',
 '94453092074008577',
 '101056007421173760',
 '91769916096643072',
 '92206704678285312',
 '96190924983508992',
 '96953957791956992',
 '92828887821717504',
 '101874095125307393',
 '92975510937800704',
 '94183808546516992',
 '98176727284592640',
 '92647293202857984',
 '93018627565027329',
 '91779272523517953',
 '92594537654009856',
 '101668632941178880',
 '100659567297445888',
 '95182527685337089',
 '92683945992392704',
 '92705594049441793',
 '92019191024066560',
 '101080446888181760',
 '92239380411981825',
 '94398791322578944',
 '93999585655140352',
 '98694351315664896',
 '91676190217220096',
 '91845119975759872',
 '101037747019587584',
 '92468980765306881',
 '94526163174690817',
 '91946592923430912',
 '95177954954448896',
 '93435019728650240',
 '92577654489886720',
 '93374198965219330',
 '102732362944749568',
 '92876105060581376',
 '99480036737957888',
 '93170883702685696',
 '91628739892490240',
 '93942339093004288',
 '101736175064190976',
 '98501375574544385',
 '94429050306707456',
 '98052883123146752',
 '100947346967367680',
 '94808210258669568',
 '92829473745014784',
 '94934958866579456',
 '92331525051400195',
 '91928845980143616',
 '92691491260084224',
 '94807440419332097',
 '93071805631303680',
 '99594637676789760',
 '92628263700148224',
 '91989088462307329',
 '103226937052381184',
 '92670268052484096',
 '97969923694931968',
 '93246093017620480',
 '97462622781321216',
 '93246986748301312',
 '99121683209785344',
 '92246588633780226',
 '93349268135481344',
 '102041024507031555',
 '92240386990407680',
 '91970961699647488',
 '92380102771474432',
 '92773735211335680',
 '98932393993449472',
 '91839109567488000',
 '93093466476773377',
 '92803031367819264',
 '93368936996933632',
 '98371205341515776',
 '92487204324126720',
 '97853032653529089',
 '92720622232276992',
 '99661771735969792',
 '95815783678087169',
 '91908013434216448',
 '92361086627622912',
 '100338505523281920',
 '92675273430024192',
 '99129277999611906',
 '96024488075997184',
 '102744094140411904',
 '93202345378332673',
 '93485185114443777',
 '97668537476317184',
 '93725261312438272',
 '93036974818074624',
 '93032242238062592',
 '96113851828940800',
 '93152330396082176',
 '92323388076470274',
 '92348579057909760',
 '94376906727763968',
 '92693524981616640',
 '101734277972107264',
 '92891661000130561',
 '91845690656960512',
 '102668987531395072',
 '101721820142637057',
 '93334427844878336',
 '93363101503729664',
 '99117434937679872',
 '92588165663162369',
 '91914130545573888',
 '97028520554938368',
 '97939393377075200',
 '99144320430518274',
 '103170022179999745',
 '93038068117942272',
 '91999267694194688',
 '92282588458258432',
 '101643392131403776',
 '96185595470159873',
 '102580796497461248',
 '103212458935726080',
 '96945755121659904',
 '100581944642904064',
 '91991519430582272',
 '93239357858791425',
 '92203555385131008',
 '94365640856580097',
 '100994783358631936',
 '93691914536239104',
 '96473651876077568',
 '96979327224266752',
 '97733882346078208',
 '92054549212299265',
 '93015347564777472',
 '93745034825961472',
 '97476522566361089',
 '93411920736034816',
 '93352717229109248',
 '92638811993030656',
 '101757466324385792',
 '93151121543135232',
 '93031684374667264',
 '93617326653587456',
 '102124541182091264',
 '100553819494158337',
 '99510126662254594',
 '100686707728453632',
 '92630700901138432',
 '93389798248681472',
 '102068034964946945',
 '100687447985364992',
 '96258902395584512',
 '95724083647488000',
 '100980416290373632',
 '91962882371235840',
 '92296187314581504',
 '95489083136155649',
 '96402182710104065',
 '93274772011626496',
 '93146504931442688',
 '100912986448863232',
 '93321380757639169',
 '93598767844032512',
 '96651090807894017',
 '100672810757783552',
 '102086288940863488',
 '92033712904093696',
 '93670144651427841',
 '97004151946481664',
 '93568254626369536',
 '100714522351906817',
 '100717045175418881',
 '93265655243096064',
 '94619773723357186',
 '91915890144186368',
 '92706091976237056',
 '92224552599429120',
 '96677269535334400',
 '102558727151960064',
 '94523757728776192',
 '100733236031913984',
 '94138194685210624',
 '92705848836628480',
 '99760962835460096',
 '98173600703582208',
 '93710611619393537',
 '103139143063842816',
 '91871635388375040',
 '91937809920823296',
 '93103314736132096',
 '102314342346788865',
 '101432724321091584',
 '93050424524935168',
 '93143598341685248',
 '94386507481219072',
 '93356496913567744',
 '101940709053042688',
 '94153790718099456',
 '91858594844246016',
 '101392563843502082',
 '102784970023641089',
 '92247182312349696',
 '92075215315681281',
 '100660375749529600',
 '93742631238111232',
 '96178288992587776',
 '93826957975883776',
 '98025480908849153',
 '93967144450138112',
 '93081565348892672',
 '100644788671488000',
 '93695066069151744',
 '92333694446084096',
 '94867896928448513',
 '92245339104481281',
 '101058689523400704',
 '93334286316478464',
 '92168102359474176',
 '100654462988849152',
 '100704090522271744',
 '98919027602231296',
 '99171192048599040',
 '100342613860818944',
 '98107329765056512',
 '96542543348637697',
 '93023640379076609',
 '102620482892869633',
 '93330419516313600',
 '93511911219535872',
 '92969106881515520',
 '95962079915290624',
 '94930009613017089',
 '93445783814291456',
 '94480845791956993',
 '92643959591022592',
 '96585350411071489',
 '97732251214823424',
 '92989573579288576',
 '96191043862663169',
 '91928522565754880',
 '96782542702780416',
 '91733741646516224',
 '100960424735940608',
 '92166818927620096',
 '98743218979618817',
 '94198829171216384',
 '93660551019905025',
 '94156730929381376',
 '92627625608097795',
 '96276255372091392',
 '101975001678364672',
 '100860202164830208',
 '93459204706275329',
 '92209240676110336',
 '96569948897427456',
 '93052450197606400',
 '92732661361152000',
 '100933019145207808',
 '93634518149365760',
 '100623454981722112',
 '102862579310936064',
 '93166585153454081',
 '100736523179274240',
 '94556024995975169',
 '100620752272900097',
 '95895148315164673',
 '92663918589259776',
 '92488433934680064',
 '94818150943686657',
 '102875807608881152',
 '96309990792507392',
 '95660302888214529',
 '100375503868923904',
 '94812166439452672',
 '101152953116803072',
 '91887708938579968',
 '96565555221368832',
 '100644104689557505',
 '96250830499479552',
 '102759881064460288',
 '94767948295700480',
 '101933443201511424',
 '98101823776374784',
 '96203693912891394',
 '100130942361939968',
 '100202414153539585',
 '97500470293168128',
 '92974406980210688',
 '92229656115286016',
 '91966296882810880',
 '95637005131722752',
 '93017474072715264',
 '101815114281385984',
 '91693727273328641',
 '97462635209031680',
 '92860837802414081',
 '93477094557876224',
 '91814689134219264',
 '100913412317523969',
 '94421647397888000',
 '103225732452454400',
 '92406762476535808',
 '98298979716055040',
 '95430079714299904',
 '101044028170174466',
 '93750712361689088',
 '93440864361263105',
 '92105762414927872',
 '97379561582497792',
 '92406036845166594',
 '102532528845492224',
 '100325515235303424',
 '100227045275082752',
 '101038340438114304',
 '101491826229395456',
 '92313714224668672',
 '101020620464197632',
 '93461072069140480',
 '98039628522725378',
 '93321511091445760',
 '92068856931155968',
 '102371087672807425',
 '98069781692940288',
 '100864013872803841',
 '93377441845874688',
 '96657103237816320',
 '93405257639526400',
 '93317692710338560',
 '93477874656489472',
 '92749453886373888',
 '100719104977154048',
 '101020302405931008',
 '101330147394850816',
 '102568154164768768',
 '102844692646993921',
 '93005124485652481',
 '99852692247166976',
 '101756407149375489',
 '93755926498127872',
 '92463363644334080',
 '94319469920403456',
 '96988012839317504',
 '93060072762118144',
 '93006349599907840',
 '94103364815699969',
 '94441232654278656',
 '93286216094580736',
 '100793214600085504',
 '3543845320',
 '93346617897390082',
 '92789379453566976',
 '93063371515105280',
 '91970435507421184',
 '103073204700053505',
 '93316633199775744',
 '96966781851271169',
 '99273520994992130',
 '97017783145070592',
 '91961808604237824',
 '97476128305979392',
 '93476427013423104',
 '92907150279589888',
 '92916991156957184',
 '95317687516934144',
 '93168725246746624',
 '100900499162808321',
 '93412095177146369',
 '93448491019411456',
 '93372288518459393',
 '94209038987952128',
 '93709249020039168',
 '94452083499081728',
 '99618926908018689',
 '99885981905321985',
 '94247453636829184',
 '95906544255897600',
 '92699088658767872',
 '93378151698272256',
 '99598183302299648',
 '92542317491273729',
 '93755599896064001',
 '92276158808334336',
 '91812471555362816',
 '98017799158497280',
 '93734422158913536',
 '94440137878020096',
 '92433257878134784',
 '92812416949297152',
 '97296020681142272',
 '92712188497768449',
 '93165074721677312',
 '100625708447043585',
 '102568383198937088',
 '94823264559431683',
 '96227712355667969',
 '93046116983123968',
 '92508560365334528',
 '91946438266851329',
 '98094167196049409',
 '100955195776827392',
 '92881941317165056',
 '94811243692568577',
 '98048170008903681',
 '88416243299786752',
 '97569036308709376',
 '99672140021903360',
 '93335385572257792',
 '99200989688643584',
 '97867813376622592',
 '92666511990341633',
 '102266212200890368',
 '93165092149006336',
 '94811470172393472',
 '100678378755067904',
 '93402528036827137',
 '102041478183927808',
 '101007518347694080',
 '92283176067661824',
 '96587380278050816',
 '95132090957447168',
 '94137483159281664',
 '93024530745921536',
 '98182828684087296',
 '95056222239211520',
 '96145938346811393',
 '93464745524465664',
 '97345214657732608',
 '95922572083990528',
 '92201997121499136',
 '92699366166503424',
 '98530236588761088',
 '100690934215344129',
 '92378631279611904',
 '93009463564447745',
 '99810287129067521',
 '92742671491284992',
 '91683689796354048',
 '102073483936268288',
 '93737472516829184',
 '93330084408201216',
 '92289190707990528',
 '93654618210443264',
 '91986216400076800',
 '94179121093017600',
 '91714818343583744',
 '98838837832327168',
 '93499783599235072',
 '102660518929637377',
 '102350153272401920',
 '101414384043175937',
 '95802517128089601',
 '92787340803715072',
 '99204286419968000',
 '94259157615648770',
 '93648865886089216',
 '101495953428717569',
 '94457834649034754',
 '101748047888920577',
 '92688744313929728',
 '102856587256926208',
 '93392986490155009',
 '95222918291783680',
 '101186888085413889',
 '92683638923210752',
 '103069454442823681',
 '99195943936724992',
 '102581530127376384',
 '102784871444918273',
 '92683762680344576',
 '92861039120625664',
 '101983612601237505',
 '96931007181225986',
 '96064326309384192',
 '93048284138057729',
 '95645142622539776',
 '91666751665868800',
 '93214187051958273',
 '96984948635992065',
 '92647901381136384',
 '91872307479449600',
 '100832401193844736',
 '94764774340046849',
 '93011237734055936',
 '101235330262372352',
 '92020245019099136',
 '92489713902039040',
 '93055040520069120',
 '91944850680844288',
 '93086151035994113',
 '93364673793110020',
 '91876443038027776',
 '92284091642294272',
 '96404114954661888',
 '92353415123968000',
 '92088579400011776',
 '102747877671047168',
 '92624967635705856',
 '93419359736832000',
 '95990282411180033',
 '93592366388224000',
 '101867891749683200',
 '92703274603528192',
 '93350745100922880',
 '92267949112561664',
 '92710465171505153',
 '91811406156017664',
 '93158117478645761',
 '93717377883189248',
 '92979130907377664',
 '94524421292834816',
 '92074318242127872',
 '93203339000561664',
 '100966801382440960',
 '93512635420647425',
 '95300837726887936',
 '94303538561290240',
 '98518238442434560',
 '94079773847986177',
 '103196131361697792',
 '92077173778825216',
 '99538840863252480',
 '92701930975010816',
 '97963233780056064',
 '93505979316060160',
 '92497498207297536',
 '94810268923736064',
 '94586978460385281',
 '94815768675487745',
 '91974853590126592',
 '100939109257842689',
 '92934213975805952',
 '94759766982799360',
 '92866765834567680',
 '91812021514932225',
 '97352404718194688',
 '93147699544723457',
 '97759293780144128',
 '98497382597148672',
 '91804404180725760',
 '97822384853233665',
 '92205398123216896',
 '93483825652449280',
 '93289285981175808',
 '97074481746550785',
 '93786420036108288',
 '93629100102660096',
 '103127655129427968',
 '96879263122337792',
 '100766029856260097',
 '96924850878287872',
 '96688125719486464',
 '92519151947624448',
 '100871916511969280',
 '92673205621370880',
 '91681462000164864',
 '101371723798151169',
 '96255867585966080',
 '94537519206637569',
 '92291532354355200',
 '101300660988936193',
 '93324115456434176',
 '98094627499941888',
 '91578861036380160',
 '97995265683886081',
 '101013010050592768',
 '96455331181375488',
 '93535374181269504',
 '94194936869683200',
 '95127529593122816',
 '94633563730874368',
 '103041969965645824',
 '93105665089867776',
 '92607649736167424',
 '93550314287931392',
 '99226394587971585',
 '100644516125622272',
 '92969134756859906',
 '93452805095960576',
 '103165156539904000',
 '100866205610553344',
 '92350554352783361',
 '92236628923392000',
 '97829869311901696',
 '93322219631026176',
 '93372491627642880',
 '93030932201734144',
 '95489982260723712',
 '91652237725663232',
 '101024829385347073',
 '100731015894540288',
 '97325208838471680',
 '96252784025931776',
 '99523095437651968',
 '98182476941373440',
 '101557840388440064',
 '91987850437988352',
 '92740682158047232',
 '101351686358048768',
 '92671586628407296',
 '96039621137399809',
 '98034371264655360',
 '102923598414622720',
 '93009815311364096',
 '93699434617118721',
 '92274776369926144',
 '97815160516919296',
 '100914329276256257',
 '100685777289224192',
 '93661388681125888',
 '91810529621966848',
 '98027157300854784',
 '99686731250532352',
 '92145076297404416',
 '101654556852748288',
 '100724230852845568',
 '94481646916603904',
 '94063258499153920',
 '92706915892731904',
 '92381667926351872',
 '98924387897585664',
 '101428194460180480',
 '93409536827854848',
 '92643124052107264',
 '101992331275796480',
 '92911563723386880',
 '93960668197289984',
 '101315943799590915',
 '97248713143099392',
 '96956379297882112',
 '96410979562291200',
 '91772743091105793',
 '95506152313470976',
 '100902012501229568',
 '91900145075105793',
 '95951788217352193',
 '98446576988585984',
 '92700486129553408',
 '97373690815197184',
 '99227803861516288',
 '92786465272119296',
 '101065852761284608',
 '92717757963059200',
 '97079031924666368',
 '94539178125176833',
 '100652144067223552',
 '100653353050193920',
 '93546678673616896',
 '93691733627502593',
 '101671002735521793',
 '102067318368108544',
 '91883894542049281',
 '94509025118519296',
 '100000025580548097',
 '96324653890539521',
 '97827245397254145',
 '101413281792671745',
 '92066368203141120',
 '96092892203978752',
 '94829178528075776',
 '95578608407552000',
 '92494167556636672',
 '93734032189304832',
 '95114160530141185',
 '93808293348257792',
 '89391663738400769',
 '93322370688880641',
 '93270399021887488',
 '93014813629890561',
 '101830130401423360',
 '97129896891006976',
 '98037598789971969',
 '100668166455312385',
 '93697436974985216',
 '93362605669892097',
 '91950032097525760',
 '101758768534138880',
 '92457484513574913',
 '98537413051293697',
 '93784012509818880',
 '97040545645473792',
 '93772952134369280',
 '101016893695082496',
 '101427796009689088',
 '92960965297045504',
 '94828031247515648',
 '93400837157695488',
 '96369935135158272',
 '102352338030837761',
 '96992650850344961',
 '95502045565562880',
 '101389292311556096',
 '91792484321067008',
 '100536116343607296',
 '92280866566455296',
 '93334546715648000',
 '93359167267549184',
 '102182296815276032',
 '92684412923289600',
 '91763608274468864',
 '92453059170537474',
 '101087898002145280',
 '95188303338422272',
 '93983936493010944',
 '93769818116861953',
 '95898503749963777',
 '94502159269363712',
 '98807639470899200',
 '93282854406078464',
 '96575772726263809',
 '99357694883930112',
 '102139053503283200',
 '93736461454688256',
 '91850107737227265',
 '93978341606043648',
 '103207879510724608',
 '96984991245942784',
 '103120046653583360',
 '93794603127410688',
 '93319713035272192',
 '98483906319368192',
 '91794662699974656',
 '96276025083822080',
 '100978916554047488',
 '91871796252512256',
 '92668693426880512',
 '91718858846633984',
 '94142638093115392',
 '100868492621914112',
 '97457198254407681',
 '93729457709383680',
 '101108656782839809',
 '91911670112329728',
 '101329150966640641',
 '97025714552971264',
 '93590697944432640',
 '101020544421478400',
 '91925590256525312',
 '102401405851140096',
 '101841857687977984',
 '101671774474870785',
 '93411155669827584',
 '93502515991166976',
 '93779352625483776',
 '101022789389131776',
 '96116890295992320',
 '91786366156935168',
 '96354439341936640',
 '98068716322951169',
 '102715817497604096',
 '94939136431095808',
 '95128487727349760',
 '92926655382818819',
 '93059420296183808',
 '92370537531183104',
 '94201131856707584',
 '92640767331418113',
 '93291567711916032',
 '96726597469618176',
 '94510033202724865',
 '97027268685213696',
 '97002463147720705',
 '93900949902462977',
 '93726115671183360',
 '96229136179281920',
 '93800424825565184',
 '93750898479730688',
 '92642134204760065',
 '92848660353794049',
 '93378613063327744',
 '91904202460241920',
 '99954453586780161',
 '93526095730716673',
 '92648687725051905',
 '103137083945783296',
 '102441571353509888',
 '93709909803274240',
 '93314053316939776',
 '94456468476137472',
 '100720839560933376',
 '91990712266133504',
 '93671907987177473',
 '94807286882635776',
 '101197985895034880',
 '92339434510495744',
 '95999339016626176',
 ...]

In [17]:
tweet_corpus['93745589786902528']


Out[17]:
{'cashtag_mapping': {},
 'goldens': [{'end_idx': '3',
   'freebase_title': '/m/0d05l6',
   'mention': 'God',
   'mention_orig': 'God',
   'start_idx': '0',
   'tid': '93745589786902528',
   'wiki_title': 'God'}],
 'gram_set': {'#TLS',
  ',',
  ', Ask',
  ', Ask,',
  ', Ask, Yield',
  ', Ask, Yield.',
  ', Repent',
  ', Repent,',
  ', Repent, Ask',
  ', Repent, Ask,',
  ', Repent, Ask, Yield',
  ', Repent, Ask, Yield.',
  ', Yield',
  ', Yield.',
  '.',
  '=',
  '= Praise',
  '= Praise,',
  '= Praise, Repent',
  '= Praise, Repent,',
  '= Praise, Repent, Ask',
  '= Praise, Repent, Ask,',
  '= Praise, Repent, Ask, Yield',
  '= Praise, Repent, Ask, Yield.',
  'Ask',
  'Ask,',
  'Ask, Yield',
  'Ask, Yield.',
  'God',
  'God will',
  'God will never',
  'God will never drop',
  'God will never drop you',
  'God will never drop you any',
  'God will never drop you any farther',
  'God will never drop you any farther than',
  'God will never drop you any farther than your',
  'God will never drop you any farther than your knees',
  'God will never drop you any farther than your knees and',
  'God will never drop you any farther than your knees and that',
  'God will never drop you any farther than your knees and that is',
  'God will never drop you any farther than your knees and that is the',
  'God will never drop you any farther than your knees and that is the perfect',
  'God will never drop you any farther than your knees and that is the perfect position',
  'God will never drop you any farther than your knees and that is the perfect position to',
  'God will never drop you any farther than your knees and that is the perfect position to be',
  'God will never drop you any farther than your knees and that is the perfect position to be in',
  'God will never drop you any farther than your knees and that is the perfect position to be in.',
  'P.R.A.Y.',
  'P.R.A.Y. =',
  'P.R.A.Y. = Praise',
  'P.R.A.Y. = Praise,',
  'P.R.A.Y. = Praise, Repent',
  'P.R.A.Y. = Praise, Repent,',
  'P.R.A.Y. = Praise, Repent, Ask',
  'P.R.A.Y. = Praise, Repent, Ask,',
  'P.R.A.Y. = Praise, Repent, Ask, Yield',
  'P.R.A.Y. = Praise, Repent, Ask, Yield.',
  'Praise',
  'Praise,',
  'Praise, Repent',
  'Praise, Repent,',
  'Praise, Repent, Ask',
  'Praise, Repent, Ask,',
  'Praise, Repent, Ask, Yield',
  'Praise, Repent, Ask, Yield.',
  'Repent',
  'Repent,',
  'Repent, Ask',
  'Repent, Ask,',
  'Repent, Ask, Yield',
  'Repent, Ask, Yield.',
  'Yield',
  'Yield.',
  'and',
  'and that',
  'and that is',
  'and that is the',
  'and that is the perfect',
  'and that is the perfect position',
  'and that is the perfect position to',
  'and that is the perfect position to be',
  'and that is the perfect position to be in',
  'and that is the perfect position to be in.',
  'any',
  'any farther',
  'any farther than',
  'any farther than your',
  'any farther than your knees',
  'any farther than your knees and',
  'any farther than your knees and that',
  'any farther than your knees and that is',
  'any farther than your knees and that is the',
  'any farther than your knees and that is the perfect',
  'any farther than your knees and that is the perfect position',
  'any farther than your knees and that is the perfect position to',
  'any farther than your knees and that is the perfect position to be',
  'any farther than your knees and that is the perfect position to be in',
  'any farther than your knees and that is the perfect position to be in.',
  'be',
  'be in',
  'be in.',
  'drop',
  'drop you',
  'drop you any',
  'drop you any farther',
  'drop you any farther than',
  'drop you any farther than your',
  'drop you any farther than your knees',
  'drop you any farther than your knees and',
  'drop you any farther than your knees and that',
  'drop you any farther than your knees and that is',
  'drop you any farther than your knees and that is the',
  'drop you any farther than your knees and that is the perfect',
  'drop you any farther than your knees and that is the perfect position',
  'drop you any farther than your knees and that is the perfect position to',
  'drop you any farther than your knees and that is the perfect position to be',
  'drop you any farther than your knees and that is the perfect position to be in',
  'drop you any farther than your knees and that is the perfect position to be in.',
  'farther',
  'farther than',
  'farther than your',
  'farther than your knees',
  'farther than your knees and',
  'farther than your knees and that',
  'farther than your knees and that is',
  'farther than your knees and that is the',
  'farther than your knees and that is the perfect',
  'farther than your knees and that is the perfect position',
  'farther than your knees and that is the perfect position to',
  'farther than your knees and that is the perfect position to be',
  'farther than your knees and that is the perfect position to be in',
  'farther than your knees and that is the perfect position to be in.',
  'in',
  'in.',
  'is',
  'is the',
  'is the perfect',
  'is the perfect position',
  'is the perfect position to',
  'is the perfect position to be',
  'is the perfect position to be in',
  'is the perfect position to be in.',
  'knees',
  'knees and',
  'knees and that',
  'knees and that is',
  'knees and that is the',
  'knees and that is the perfect',
  'knees and that is the perfect position',
  'knees and that is the perfect position to',
  'knees and that is the perfect position to be',
  'knees and that is the perfect position to be in',
  'knees and that is the perfect position to be in.',
  'never',
  'never drop',
  'never drop you',
  'never drop you any',
  'never drop you any farther',
  'never drop you any farther than',
  'never drop you any farther than your',
  'never drop you any farther than your knees',
  'never drop you any farther than your knees and',
  'never drop you any farther than your knees and that',
  'never drop you any farther than your knees and that is',
  'never drop you any farther than your knees and that is the',
  'never drop you any farther than your knees and that is the perfect',
  'never drop you any farther than your knees and that is the perfect position',
  'never drop you any farther than your knees and that is the perfect position to',
  'never drop you any farther than your knees and that is the perfect position to be',
  'never drop you any farther than your knees and that is the perfect position to be in',
  'never drop you any farther than your knees and that is the perfect position to be in.',
  'perfect',
  'perfect position',
  'perfect position to',
  'perfect position to be',
  'perfect position to be in',
  'perfect position to be in.',
  'position',
  'position to',
  'position to be',
  'position to be in',
  'position to be in.',
  'than',
  'than your',
  'than your knees',
  'than your knees and',
  'than your knees and that',
  'than your knees and that is',
  'than your knees and that is the',
  'than your knees and that is the perfect',
  'than your knees and that is the perfect position',
  'than your knees and that is the perfect position to',
  'than your knees and that is the perfect position to be',
  'than your knees and that is the perfect position to be in',
  'than your knees and that is the perfect position to be in.',
  'that',
  'that is',
  'that is the',
  'that is the perfect',
  'that is the perfect position',
  'that is the perfect position to',
  'that is the perfect position to be',
  'that is the perfect position to be in',
  'that is the perfect position to be in.',
  'the',
  'the perfect',
  'the perfect position',
  'the perfect position to',
  'the perfect position to be',
  'the perfect position to be in',
  'the perfect position to be in.',
  'to',
  'to be',
  'to be in',
  'to be in.',
  'will',
  'will never',
  'will never drop',
  'will never drop you',
  'will never drop you any',
  'will never drop you any farther',
  'will never drop you any farther than',
  'will never drop you any farther than your',
  'will never drop you any farther than your knees',
  'will never drop you any farther than your knees and',
  'will never drop you any farther than your knees and that',
  'will never drop you any farther than your knees and that is',
  'will never drop you any farther than your knees and that is the',
  'will never drop you any farther than your knees and that is the perfect',
  'will never drop you any farther than your knees and that is the perfect position',
  'will never drop you any farther than your knees and that is the perfect position to',
  'will never drop you any farther than your knees and that is the perfect position to be',
  'will never drop you any farther than your knees and that is the perfect position to be in',
  'will never drop you any farther than your knees and that is the perfect position to be in.',
  'you',
  'you any',
  'you any farther',
  'you any farther than',
  'you any farther than your',
  'you any farther than your knees',
  'you any farther than your knees and',
  'you any farther than your knees and that',
  'you any farther than your knees and that is',
  'you any farther than your knees and that is the',
  'you any farther than your knees and that is the perfect',
  'you any farther than your knees and that is the perfect position',
  'you any farther than your knees and that is the perfect position to',
  'you any farther than your knees and that is the perfect position to be',
  'you any farther than your knees and that is the perfect position to be in',
  'you any farther than your knees and that is the perfect position to be in.',
  'your',
  'your knees',
  'your knees and',
  'your knees and that',
  'your knees and that is',
  'your knees and that is the',
  'your knees and that is the perfect',
  'your knees and that is the perfect position',
  'your knees and that is the perfect position to',
  'your knees and that is the perfect position to be',
  'your knees and that is the perfect position to be in',
  'your knees and that is the perfect position to be in.'},
 'hashtag_mapping': {u'TLS': {u'indices': [134, 138], u'text': u'TLS'}},
 'mention_set': {'God'},
 'ngrams': {1: ['God',
   'will',
   'never',
   'drop',
   'you',
   'any',
   'farther',
   'than',
   'your',
   'knees',
   'and',
   'that',
   'is',
   'the',
   'perfect',
   'position',
   'to',
   'be',
   'in',
   '.',
   'P.R.A.Y.',
   '=',
   'Praise',
   ',',
   'Repent',
   ',',
   'Ask',
   ',',
   'Yield',
   '.',
   '#TLS'],
  2: ['God will',
   'will never',
   'never drop',
   'drop you',
   'you any',
   'any farther',
   'farther than',
   'than your',
   'your knees',
   'knees and',
   'and that',
   'that is',
   'is the',
   'the perfect',
   'perfect position',
   'position to',
   'to be',
   'be in',
   'in.',
   'P.R.A.Y. =',
   '= Praise',
   'Praise,',
   ', Repent',
   'Repent,',
   ', Ask',
   'Ask,',
   ', Yield',
   'Yield.',
   'in',
   'Yield'],
  3: ['God will never',
   'will never drop',
   'never drop you',
   'drop you any',
   'you any farther',
   'any farther than',
   'farther than your',
   'than your knees',
   'your knees and',
   'knees and that',
   'and that is',
   'that is the',
   'is the perfect',
   'the perfect position',
   'perfect position to',
   'position to be',
   'to be in',
   'be in.',
   'P.R.A.Y. = Praise',
   '= Praise,',
   'Praise, Repent',
   ', Repent,',
   'Repent, Ask',
   ', Ask,',
   'Ask, Yield',
   ', Yield.',
   'be in',
   ', Yield'],
  4: ['God will never drop',
   'will never drop you',
   'never drop you any',
   'drop you any farther',
   'you any farther than',
   'any farther than your',
   'farther than your knees',
   'than your knees and',
   'your knees and that',
   'knees and that is',
   'and that is the',
   'that is the perfect',
   'is the perfect position',
   'the perfect position to',
   'perfect position to be',
   'position to be in',
   'to be in.',
   'P.R.A.Y. = Praise,',
   '= Praise, Repent',
   'Praise, Repent,',
   ', Repent, Ask',
   'Repent, Ask,',
   ', Ask, Yield',
   'Ask, Yield.',
   'to be in',
   'Ask, Yield'],
  5: ['God will never drop you',
   'will never drop you any',
   'never drop you any farther',
   'drop you any farther than',
   'you any farther than your',
   'any farther than your knees',
   'farther than your knees and',
   'than your knees and that',
   'your knees and that is',
   'knees and that is the',
   'and that is the perfect',
   'that is the perfect position',
   'is the perfect position to',
   'the perfect position to be',
   'perfect position to be in',
   'position to be in.',
   'P.R.A.Y. = Praise, Repent',
   '= Praise, Repent,',
   'Praise, Repent, Ask',
   ', Repent, Ask,',
   'Repent, Ask, Yield',
   ', Ask, Yield.',
   'position to be in',
   ', Ask, Yield'],
  6: ['God will never drop you any',
   'will never drop you any farther',
   'never drop you any farther than',
   'drop you any farther than your',
   'you any farther than your knees',
   'any farther than your knees and',
   'farther than your knees and that',
   'than your knees and that is',
   'your knees and that is the',
   'knees and that is the perfect',
   'and that is the perfect position',
   'that is the perfect position to',
   'is the perfect position to be',
   'the perfect position to be in',
   'perfect position to be in.',
   'P.R.A.Y. = Praise, Repent,',
   '= Praise, Repent, Ask',
   'Praise, Repent, Ask,',
   ', Repent, Ask, Yield',
   'Repent, Ask, Yield.',
   'perfect position to be in',
   'Repent, Ask, Yield'],
  7: ['God will never drop you any farther',
   'will never drop you any farther than',
   'never drop you any farther than your',
   'drop you any farther than your knees',
   'you any farther than your knees and',
   'any farther than your knees and that',
   'farther than your knees and that is',
   'than your knees and that is the',
   'your knees and that is the perfect',
   'knees and that is the perfect position',
   'and that is the perfect position to',
   'that is the perfect position to be',
   'is the perfect position to be in',
   'the perfect position to be in.',
   'P.R.A.Y. = Praise, Repent, Ask',
   '= Praise, Repent, Ask,',
   'Praise, Repent, Ask, Yield',
   ', Repent, Ask, Yield.',
   'the perfect position to be in',
   ', Repent, Ask, Yield'],
  8: ['God will never drop you any farther than',
   'will never drop you any farther than your',
   'never drop you any farther than your knees',
   'drop you any farther than your knees and',
   'you any farther than your knees and that',
   'any farther than your knees and that is',
   'farther than your knees and that is the',
   'than your knees and that is the perfect',
   'your knees and that is the perfect position',
   'knees and that is the perfect position to',
   'and that is the perfect position to be',
   'that is the perfect position to be in',
   'is the perfect position to be in.',
   'P.R.A.Y. = Praise, Repent, Ask,',
   '= Praise, Repent, Ask, Yield',
   'Praise, Repent, Ask, Yield.',
   'is the perfect position to be in',
   'Praise, Repent, Ask, Yield'],
  9: ['God will never drop you any farther than your',
   'will never drop you any farther than your knees',
   'never drop you any farther than your knees and',
   'drop you any farther than your knees and that',
   'you any farther than your knees and that is',
   'any farther than your knees and that is the',
   'farther than your knees and that is the perfect',
   'than your knees and that is the perfect position',
   'your knees and that is the perfect position to',
   'knees and that is the perfect position to be',
   'and that is the perfect position to be in',
   'that is the perfect position to be in.',
   'P.R.A.Y. = Praise, Repent, Ask, Yield',
   '= Praise, Repent, Ask, Yield.',
   'that is the perfect position to be in',
   '= Praise, Repent, Ask, Yield'],
  10: ['God will never drop you any farther than your knees',
   'will never drop you any farther than your knees and',
   'never drop you any farther than your knees and that',
   'drop you any farther than your knees and that is',
   'you any farther than your knees and that is the',
   'any farther than your knees and that is the perfect',
   'farther than your knees and that is the perfect position',
   'than your knees and that is the perfect position to',
   'your knees and that is the perfect position to be',
   'knees and that is the perfect position to be in',
   'and that is the perfect position to be in.',
   'P.R.A.Y. = Praise, Repent, Ask, Yield.',
   'and that is the perfect position to be in'],
  11: ['God will never drop you any farther than your knees and',
   'will never drop you any farther than your knees and that',
   'never drop you any farther than your knees and that is',
   'drop you any farther than your knees and that is the',
   'you any farther than your knees and that is the perfect',
   'any farther than your knees and that is the perfect position',
   'farther than your knees and that is the perfect position to',
   'than your knees and that is the perfect position to be',
   'your knees and that is the perfect position to be in',
   'knees and that is the perfect position to be in.',
   'knees and that is the perfect position to be in'],
  12: ['God will never drop you any farther than your knees and that',
   'will never drop you any farther than your knees and that is',
   'never drop you any farther than your knees and that is the',
   'drop you any farther than your knees and that is the perfect',
   'you any farther than your knees and that is the perfect position',
   'any farther than your knees and that is the perfect position to',
   'farther than your knees and that is the perfect position to be',
   'than your knees and that is the perfect position to be in',
   'your knees and that is the perfect position to be in.',
   'your knees and that is the perfect position to be in'],
  13: ['God will never drop you any farther than your knees and that is',
   'will never drop you any farther than your knees and that is the',
   'never drop you any farther than your knees and that is the perfect',
   'drop you any farther than your knees and that is the perfect position',
   'you any farther than your knees and that is the perfect position to',
   'any farther than your knees and that is the perfect position to be',
   'farther than your knees and that is the perfect position to be in',
   'than your knees and that is the perfect position to be in.',
   'than your knees and that is the perfect position to be in'],
  14: ['God will never drop you any farther than your knees and that is the',
   'will never drop you any farther than your knees and that is the perfect',
   'never drop you any farther than your knees and that is the perfect position',
   'drop you any farther than your knees and that is the perfect position to',
   'you any farther than your knees and that is the perfect position to be',
   'any farther than your knees and that is the perfect position to be in',
   'farther than your knees and that is the perfect position to be in.',
   'farther than your knees and that is the perfect position to be in'],
  15: ['God will never drop you any farther than your knees and that is the perfect',
   'will never drop you any farther than your knees and that is the perfect position',
   'never drop you any farther than your knees and that is the perfect position to',
   'drop you any farther than your knees and that is the perfect position to be',
   'you any farther than your knees and that is the perfect position to be in',
   'any farther than your knees and that is the perfect position to be in.',
   'any farther than your knees and that is the perfect position to be in'],
  16: ['God will never drop you any farther than your knees and that is the perfect position',
   'will never drop you any farther than your knees and that is the perfect position to',
   'never drop you any farther than your knees and that is the perfect position to be',
   'drop you any farther than your knees and that is the perfect position to be in',
   'you any farther than your knees and that is the perfect position to be in.',
   'you any farther than your knees and that is the perfect position to be in'],
  17: ['God will never drop you any farther than your knees and that is the perfect position to',
   'will never drop you any farther than your knees and that is the perfect position to be',
   'never drop you any farther than your knees and that is the perfect position to be in',
   'drop you any farther than your knees and that is the perfect position to be in.',
   'drop you any farther than your knees and that is the perfect position to be in'],
  18: ['God will never drop you any farther than your knees and that is the perfect position to be',
   'will never drop you any farther than your knees and that is the perfect position to be in',
   'never drop you any farther than your knees and that is the perfect position to be in.',
   'never drop you any farther than your knees and that is the perfect position to be in'],
  19: ['God will never drop you any farther than your knees and that is the perfect position to be in',
   'will never drop you any farther than your knees and that is the perfect position to be in.',
   'will never drop you any farther than your knees and that is the perfect position to be in'],
  20: ['God will never drop you any farther than your knees and that is the perfect position to be in.',
   'God will never drop you any farther than your knees and that is the perfect position to be in'],
  21: [],
  22: [],
  23: [],
  24: [],
  25: [],
  26: [],
  27: [],
  28: [],
  29: [],
  30: [],
  31: [],
  32: [],
  33: [],
  34: [],
  35: [],
  36: [],
  37: [],
  38: [],
  39: [],
  40: [],
  41: [],
  42: [],
  43: [],
  44: [],
  45: [],
  46: [],
  47: [],
  48: [],
  49: [],
  50: [],
  51: [],
  52: [],
  53: [],
  54: [],
  55: [],
  56: [],
  57: [],
  58: [],
  59: [],
  60: [],
  61: [],
  62: [],
  63: [],
  64: [],
  65: [],
  66: [],
  67: [],
  68: [],
  69: [],
  70: [],
  71: [],
  72: [],
  73: [],
  74: [],
  75: [],
  76: [],
  77: [],
  78: [],
  79: [],
  80: [],
  81: [],
  82: [],
  83: [],
  84: [],
  85: [],
  86: [],
  87: [],
  88: [],
  89: [],
  90: [],
  91: [],
  92: [],
  93: [],
  94: [],
  95: [],
  96: [],
  97: [],
  98: [],
  99: []},
 'stanford_parsed': {u'coref': [[[[u'that', 0, 1, 11, 12],
     [u'the perfect position to be in', 0, 5, 13, 19]]],
   [[[u'P.R.A.Y. = Praise', 1, 2, 0, 3],
     [u'Repent , Ask , Yield', 1, 8, 4, 9]]]],
  u'sentences': [{u'dependencies': [[u'root', u'ROOT', u'drop'],
     [u'nsubj', u'drop', u'God'],
     [u'aux', u'drop', u'will'],
     [u'neg', u'drop', u'never'],
     [u'dobj', u'drop', u'you'],
     [u'dep', u'farther', u'any'],
     [u'advmod', u'drop', u'farther'],
     [u'poss', u'knees', u'your'],
     [u'prep_than', u'drop', u'knees'],
     [u'nsubj', u'position', u'that'],
     [u'cop', u'position', u'is'],
     [u'det', u'position', u'the'],
     [u'amod', u'position', u'perfect'],
     [u'conj_and', u'drop', u'position'],
     [u'aux', u'be', u'to'],
     [u'vmod', u'position', u'be'],
     [u'prep', u'be', u'in']],
    u'parsetree': u'(ROOT (S (S (NP (NNP God)) (VP (MD will) (ADVP (RB never)) (VP (VB drop) (NP (PRP you)) (ADVP (DT any) (RBR farther)) (PP (IN than) (NP (PRP$ your) (NNS knees)))))) (CC and) (S (NP (DT that)) (VP (VBZ is) (NP (DT the) (JJ perfect) (NN position) (S (VP (TO to) (VP (VB be) (PP (IN in)))))))) (. .)))',
    u'text': u'God will never drop you any farther than your knees and that is the perfect position to be in.',
    u'words': [[u'God',
      {u'CharacterOffsetBegin': u'0',
       u'CharacterOffsetEnd': u'3',
       u'Lemma': u'God',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NNP'}],
     [u'will',
      {u'CharacterOffsetBegin': u'4',
       u'CharacterOffsetEnd': u'8',
       u'Lemma': u'will',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'MD'}],
     [u'never',
      {u'CharacterOffsetBegin': u'9',
       u'CharacterOffsetEnd': u'14',
       u'Lemma': u'never',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'RB'}],
     [u'drop',
      {u'CharacterOffsetBegin': u'15',
       u'CharacterOffsetEnd': u'19',
       u'Lemma': u'drop',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'VB'}],
     [u'you',
      {u'CharacterOffsetBegin': u'20',
       u'CharacterOffsetEnd': u'23',
       u'Lemma': u'you',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'PRP'}],
     [u'any',
      {u'CharacterOffsetBegin': u'24',
       u'CharacterOffsetEnd': u'27',
       u'Lemma': u'any',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'DT'}],
     [u'farther',
      {u'CharacterOffsetBegin': u'28',
       u'CharacterOffsetEnd': u'35',
       u'Lemma': u'farther',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'RBR'}],
     [u'than',
      {u'CharacterOffsetBegin': u'36',
       u'CharacterOffsetEnd': u'40',
       u'Lemma': u'than',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'IN'}],
     [u'your',
      {u'CharacterOffsetBegin': u'41',
       u'CharacterOffsetEnd': u'45',
       u'Lemma': u'you',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'PRP$'}],
     [u'knees',
      {u'CharacterOffsetBegin': u'46',
       u'CharacterOffsetEnd': u'51',
       u'Lemma': u'knee',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NNS'}],
     [u'and',
      {u'CharacterOffsetBegin': u'52',
       u'CharacterOffsetEnd': u'55',
       u'Lemma': u'and',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'CC'}],
     [u'that',
      {u'CharacterOffsetBegin': u'56',
       u'CharacterOffsetEnd': u'60',
       u'Lemma': u'that',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'DT'}],
     [u'is',
      {u'CharacterOffsetBegin': u'61',
       u'CharacterOffsetEnd': u'63',
       u'Lemma': u'be',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'VBZ'}],
     [u'the',
      {u'CharacterOffsetBegin': u'64',
       u'CharacterOffsetEnd': u'67',
       u'Lemma': u'the',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'DT'}],
     [u'perfect',
      {u'CharacterOffsetBegin': u'68',
       u'CharacterOffsetEnd': u'75',
       u'Lemma': u'perfect',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'JJ'}],
     [u'position',
      {u'CharacterOffsetBegin': u'76',
       u'CharacterOffsetEnd': u'84',
       u'Lemma': u'position',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NN'}],
     [u'to',
      {u'CharacterOffsetBegin': u'85',
       u'CharacterOffsetEnd': u'87',
       u'Lemma': u'to',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'TO'}],
     [u'be',
      {u'CharacterOffsetBegin': u'88',
       u'CharacterOffsetEnd': u'90',
       u'Lemma': u'be',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'VB'}],
     [u'in',
      {u'CharacterOffsetBegin': u'91',
       u'CharacterOffsetEnd': u'93',
       u'Lemma': u'in',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'IN'}],
     [u'.',
      {u'CharacterOffsetBegin': u'93',
       u'CharacterOffsetEnd': u'94',
       u'Lemma': u'.',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'.'}]]},
   {u'dependencies': [[u'root', u'ROOT', u'Praise'],
     [u'nn', u'Praise', u'P.R.A.Y.'],
     [u'amod', u'Praise', u'='],
     [u'nn', u'Yield', u'Repent'],
     [u'appos', u'Yield', u'Ask'],
     [u'appos', u'Praise', u'Yield']],
    u'parsetree': u'(ROOT (NP (NP (NN P.R.A.Y.) (JJ =) (NN Praise)) (, ,) (NP (NNP Repent) (, ,) (NNP Ask) (, ,) (NNP Yield)) (. .)))',
    u'text': u'P.R.A.Y. = Praise, Repent, Ask, Yield.',
    u'words': [[u'P.R.A.Y.',
      {u'CharacterOffsetBegin': u'95',
       u'CharacterOffsetEnd': u'103',
       u'Lemma': u'p.r.a.y.',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NN'}],
     [u'',
      {u'': u'',
       u'CharacterOffsetBegin': u'104',
       u'CharacterOffsetEnd': u'105',
       u'Lemma': u'',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'JJ'}],
     [u'Praise',
      {u'CharacterOffsetBegin': u'106',
       u'CharacterOffsetEnd': u'112',
       u'Lemma': u'praise',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NN'}],
     [u',',
      {u'CharacterOffsetBegin': u'112',
       u'CharacterOffsetEnd': u'113',
       u'Lemma': u',',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u','}],
     [u'Repent',
      {u'CharacterOffsetBegin': u'114',
       u'CharacterOffsetEnd': u'120',
       u'Lemma': u'Repent',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NNP'}],
     [u',',
      {u'CharacterOffsetBegin': u'120',
       u'CharacterOffsetEnd': u'121',
       u'Lemma': u',',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u','}],
     [u'Ask',
      {u'CharacterOffsetBegin': u'122',
       u'CharacterOffsetEnd': u'125',
       u'Lemma': u'Ask',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NNP'}],
     [u',',
      {u'CharacterOffsetBegin': u'125',
       u'CharacterOffsetEnd': u'126',
       u'Lemma': u',',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u','}],
     [u'Yield',
      {u'CharacterOffsetBegin': u'127',
       u'CharacterOffsetEnd': u'132',
       u'Lemma': u'Yield',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NNP'}],
     [u'.',
      {u'CharacterOffsetBegin': u'132',
       u'CharacterOffsetEnd': u'133',
       u'Lemma': u'.',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'.'}]]},
   {u'dependencies': [[u'root', u'ROOT', u'#TLS']],
    u'parsetree': u'(ROOT (NP (NN #TLS)))',
    u'text': u'#TLS',
    u'words': [[u'#TLS',
      {u'CharacterOffsetBegin': u'134',
       u'CharacterOffsetEnd': u'138',
       u'Lemma': u'#tls',
       u'NamedEntityTag': u'O',
       u'PartOfSpeech': u'NN'}]]}]},
 'tweet_info': {u'contributors': None,
  u'coordinates': None,
  u'created_at': u'Wed Jul 20 18:14:24 +0000 2011',
  u'entities': {u'hashtags': [{u'indices': [134, 138], u'text': u'TLS'}],
   u'symbols': [],
   u'urls': [],
   u'user_mentions': []},
  u'favorite_count': 194,
  u'favorited': False,
  u'geo': None,
  u'id': 93745589786902528,
  u'id_str': u'93745589786902528',
  u'in_reply_to_screen_name': None,
  u'in_reply_to_status_id': None,
  u'in_reply_to_status_id_str': None,
  u'in_reply_to_user_id': None,
  u'in_reply_to_user_id_str': None,
  u'is_quote_status': False,
  u'lang': u'en',
  u'place': None,
  u'retweet_count': 449,
  u'retweeted': False,
  u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
  u'text': u'God will never drop you any farther than your knees and that is the perfect position to be in. P.R.A.Y. = Praise, Repent, Ask, Yield. #TLS',
  u'truncated': False,
  u'user': {u'contributors_enabled': False,
   u'created_at': u'Thu Oct 15 15:52:52 +0000 2009',
   u'default_profile': False,
   u'default_profile_image': False,
   u'description': u'True Love Forever!',
   u'entities': {u'description': {u'urls': []}},
   u'favourites_count': 24,
   u'follow_request_sent': False,
   u'followers_count': 1250455,
   u'following': False,
   u'friends_count': 27,
   u'geo_enabled': False,
   u'has_extended_profile': False,
   u'id': 82647069,
   u'id_str': u'82647069',
   u'is_translation_enabled': False,
   u'is_translator': False,
   u'lang': u'en',
   u'listed_count': 7879,
   u'location': u'',
   u'name': u'The Love Stories',
   u'notifications': False,
   u'profile_background_color': u'EBEBEB',
   u'profile_background_image_url': u'http://abs.twimg.com/images/themes/theme7/bg.gif',
   u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme7/bg.gif',
   u'profile_background_tile': False,
   u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/82647069/1412285293',
   u'profile_image_url': u'http://pbs.twimg.com/profile_images/517788093605097472/Zj4O8z4B_normal.jpeg',
   u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/517788093605097472/Zj4O8z4B_normal.jpeg',
   u'profile_link_color': u'990000',
   u'profile_sidebar_border_color': u'DFDFDF',
   u'profile_sidebar_fill_color': u'F3F3F3',
   u'profile_text_color': u'333333',
   u'profile_use_background_image': True,
   u'protected': False,
   u'screen_name': u'thelovestories',
   u'statuses_count': 31257,
   u'time_zone': u'Central Time (US & Canada)',
   u'url': None,
   u'utc_offset': -18000,
   u'verified': False}},
 'url_mapping': {},
 'usermention_mapping': {}}

In [56]:
with open("tweet_text_ids.txt", "wb") as g:
    with open("tweet_text.txt", "wb") as f:
        for tw in tweet_corpus.values():
            g.write(str(tw['tweet_info']['id'])+"\n")
            text = tw['tweet_info']['text']
            if "\n" in text or "\t" in text:
                print "11"
            text = text.replace("\n", "  ")
            text = text.replace("\t", "  ")
            f.write(text.encode("utf8")+"\n")


11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11
11

In [29]:
p = "plpl \nko"

In [30]:
print p


plpl 
ko

In [36]:
p[6:]


Out[36]:
'ko'

In [59]:
with open("tweet_text.txt", "rb") as f:
    for line in f:
        line.encode("utf8")


---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-59-67f6011cc69b> in <module>()
      1 with open("tweet_text.txt", "rb") as f:
      2     for line in f:
----> 3         line.encode("utf8")

UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 37: ordinal not in range(128)

In [68]:
with open("output.txt", "rb") as f:
    col = []
    for line in f:
        col.append(line.strip())

In [69]:
with open("tweet_text_ids.txt", "rb") as f:
    ids = []
    for line in f:
        ids.append(line.strip())

In [71]:
output = {}
for i in range(len(col)):
    output[ids[i]] = col[i]

In [75]:
output


Out[75]:
{'93314579924393984': [],
 '94884467910512640': ['Christian', 'Tyrone Power'],
 '94259157615648770': [],
 '93141776474456064': ['Diego'],
 '91707256726499328': [],
 '97794908630425600': [],
 '92955019615272961': [],
 '93759987540705280': [],
 '93404134618841088': [],
 '102312601190539265': [],
 '99288973943390209': ['Century Fox Animation',
  'Blue Sky Studios Announce Ice-Age Casting'],
 '95555757415743488': [],
 '102125361248215040': [],
 '101574290444521472': [],
 '100658108661112832': [],
 '100977986563604480': [],
 '94499293989322752': [],
 '95953148530790400': ['of America'],
 '91927932758540291': [],
 '96222040377393152': ['McNabb'],
 '92681654858035201': [],
 '92401321323409409': [],
 '95969501367111680': ['SportsXchange', 'the Raiders'],
 '92579651679039488': [],
 '102065579078332417': [],
 '92481778706690048': [],
 '92237433697091584': [],
 '97408964903448576': ['Calipari', 'UK Legends', 'Joe B'],
 '98431701365178369': [],
 '101419346307461120': [],
 '100152860288880640': [],
 '93455340603064320': [],
 '98025480908849153': [],
 '93548445998129152': [],
 '92643760508387328': [],
 '94063258499153920': [],
 '93500846666551296': [],
 '100571949746495488': [],
 '96985217134366720': [],
 '93899167105167360': ['Blames Crisis'],
 '100712904470429696': [],
 '94404324922363904': [],
 '96004045017321473': [],
 '95103154651275264': [],
 '91853498135752704': [],
 '100689319639007232': [],
 '93484337873420288': [],
 '95072342140653568': [],
 '100992810420936704': ['America'],
 '100644788671488000': [],
 '99611423281393664': [],
 '97665015968047104': ['Heat', 'Erik Spoelstra'],
 '100901337633521664': ['Guardian'],
 '93708758311632896': [],
 '102580796497461248': ['State Fair'],
 '95533101521977344': [],
 '92110996134576128': ['Geng'],
 '91840950862741504': [],
 '92744177418379265': [],
 '100671608204701696': [],
 '98181224346034176': [],
 '91872307479449600': ['Dog', 'Chelsea'],
 '93923166950395904': [],
 '93002028334317568': [],
 '103005657980084225': [],
 '93672807057203201': ['Smith'],
 '100982604374880256': [],
 '99670130161745920': [],
 '94462178547077123': [],
 '99147499725529088': [],
 '101478195630587904': ['POTTER', 'David Yates', 'Screenwriter Steve'],
 '98291102519537664': [],
 '92911563723386880': [],
 '95906498089189376': [],
 '94441788412133377': [],
 '95434223678263297': ['National Assembly'],
 '93154993573593088': [],
 '92043992925077505': [],
 '102306127076786176': [],
 '95289511801663488': [],
 '103209376944357376': [],
 '92665161021788160': [],
 '98037368111640576': ['Pope', 'UK', 'Mediterranean', 'Kirk'],
 '101315943799590915': [],
 '100998100411363329': [],
 '100653917553164288': [],
 '93429476393893888': [],
 '101573195743764482': ['Chelsea'],
 '94409462944768000': ['News', 'Norway'],
 '99659141890576384': ['Beach', 'Photographers', 'Videographers'],
 '93348468701151232': [],
 '92817093799587841': [],
 '94575922270838784': ['Field'],
 '91655593139511296': ['Johnson'],
 '92275090007404544': [],
 '94709694458900480': [],
 '102401405851140096': [],
 '99029732041031680': ['Furo', 'Rivers United U19', 'Helsinki', 'Nigeria'],
 '99287434281484288': [],
 '92689105703550976': [],
 '97376969657823233': [],
 '92006871375945728': [],
 '92884204265811970': [],
 '101371988911726592': [],
 '102865437901070336': [],
 '93273993544597504': [],
 '92968056053186562': ['Brothers'],
 '93443989793030144': [],
 '92446791315890176': [],
 '94518623552552961': [],
 '102411827916455936': [],
 '92713454162554880': [],
 '92284144960286721': [],
 '92201997121499136': [],
 '96956789559537665': [],
 '99556024939331584': [],
 '93324903985577984': [],
 '92294980080644096': [],
 '100529934270873600': [],
 '99653392141533184': ['States', 'AAA'],
 '96211977642315776': [],
 '95951788217352193': [],
 '93080394370519040': [],
 '96427426464284673': [],
 '96836366918352896': [],
 '97819281215074304': [],
 '95396113888194560': ['Jazeera Misr', 'Adli'],
 '96651234517323776': [],
 '94029148615483393': [],
 '92297618285273088': ['Award', 'Human Rights'],
 '91708290513383425': [],
 '92435485066145792': [],
 '99641382926434304': [],
 '95438229242773504': [],
 '92624675829596160': [],
 '93538364686483456': [],
 '102620482892869633': [],
 '94813064473481216': ['Winehouse'],
 '93731915043049473': [],
 '93031292421144576': ['Villa'],
 '93211671123861504': [],
 '93330419516313600': [],
 '102087976670076928': ['Herrera', 'Joel Campbell'],
 '92928036990746625': [],
 '100925943694110720': [],
 '96228351936692224': [],
 '92990179672985600': [],
 '94810322493390848': [],
 '93318722109964288': [],
 '99087290470629376': ['Monetary Fund'],
 '100732225204654080': [],
 '94811433027633152': ['Winehouse'],
 '93020527530221568': [],
 '94563016393232384': [],
 '95962079915290624': ['Harris', 'Blue Bombers'],
 '101260598460940289': ['MB', 'Umno Youth'],
 '93041278475972608': [],
 '92504145797451777': [],
 '93084582966079488': [],
 '91835092061794304': [],
 '92099152187633664': [],
 '91676265702100992': [],
 '93639890465595392': [],
 '93388656202620928': [],
 '92680294544588800': [],
 '92945537803886592': [],
 '93326271056060418': [],
 '98029817156468736': ['Conference'],
 '92688744313929728': [],
 '97311289960968192': [],
 '97826342103564289': ['Obama'],
 '94480845791956993': [],
 '100978397903192064': [],
 '94137640999325696': [],
 '101065852761284608': [],
 '102381137401348098': ['Su\xc3\xa1rez', 'Anfield', 'Gerrard'],
 '101808467257470976': ['Stevan Ridley'],
 '92952817727324160': [],
 '92574271288778752': ['May'],
 '93681637950365696': ['Era'],
 '98174256785010688': ['Giffords'],
 '100709634683305985': [],
 '94135082436132864': [],
 '93188706533515264': [],
 '97339570164731904': ['Directorate'],
 '102073483936268288': [],
 '94070517031636997': [],
 '97096475271634944': ['Newton'],
 '91796115715919873': [],
 '93737472516829184': [],
 '91803825563910144': [],
 '97722436182749184': [],
 '92931106382413824': [],
 '93772486549848064': [],
 '94859886923481088': [],
 '92981004804960256': [],
 '92668398948978688': [],
 '92755669811339264': [],
 '95555792912134144': [],
 '101571060847230976': [],
 '101032158956761088': [],
 '91883894542049281': [],
 '93322827813494785': [],
 '93398463697526784': [],
 '94558606829166595': [],
 '101056007421173760': [],
 '91769916096643072': [],
 '92206704678285312': [],
 '96190924983508992': [],
 '96953957791956992': [],
 '92828887821717504': [],
 '101874095125307393': [],
 '93374198965219330': ['Corp', 'Fox News'],
 '101020544421478400': [],
 '92700486129553408': [],
 '92647293202857984': [],
 '93018627565027329': [],
 '91779272523517953': ['Potter', 'Deathly Hallows'],
 '92594537654009856': [],
 '100659567297445888': [],
 '95182527685337089': [],
 '92683945992392704': [],
 '92705594049441793': [],
 '92019191024066560': [],
 '101080446888181760': [],
 '92239380411981825': [],
 '94398791322578944': [],
 '93999585655140352': [],
 '98694351315664896': [],
 '91676190217220096': [],
 '91845119975759872': ['FP2', 'Simoncelli'],
 '93499783599235072': [],
 '92468980765306881': ['Fever'],
 '94526163174690817': [],
 '91946592923430912': [],
 '95177954954448896': [],
 '93435019728650240': [],
 '92577654489886720': [],
 '92975510937800704': ['Geiger'],
 '94456468476137472': ['SWAT'],
 '102796460789211136': [],
 '92591462549700608': ['Agree Campbell'],
 '92876105060581376': [],
 '99480036737957888': [],
 '93170883702685696': [],
 '100947346967367680': [],
 '101736175064190976': [],
 '98501375574544385': ['star', 'Roberta Vinci'],
 '92077173778825216': [],
 '94429050306707456': ['Criss'],
 '100010138022330368': ['Heat'],
 '98052883123146752': ['Anthony'],
 '91628739892490240': [],
 '94808210258669568': [],
 '101414384043175937': [],
 '94934958866579456': ['Says Congress'],
 '93255899140997120': ['Cup'],
 '92701225673428992': ['World Cup'],
 '91928845980143616': [],
 '94807440419332097': [],
 '93071805631303680': [],
 '99594637676789760': [],
 '94828031247515648': ['WineHouse'],
 '91850107737227265': [],
 '103226937052381184': ['Song'],
 '92670268052484096': [],
 '97969923694931968': ['Jazeera English'],
 '94845125288665088': ['Winehouse'],
 '93246093017620480': [],
 '97462622781321216': [],
 '93246986748301312': [],
 '99121683209785344': [],
 '94156730929381376': [],
 '92369414032343040': ['Edge of Glory'],
 '92915112100380672': [],
 '102041024507031555': ['van Persie'],
 '92240386990407680': [],
 '101635276731981824': [],
 '91970961699647488': [],
 '92380102771474432': [],
 '92773735211335680': [],
 '98932393993449472': ['Sea'],
 '91839109567488000': [],
 '93093466476773377': [],
 '92803031367819264': [],
 '93368936996933632': [],
 '92627625608097795': [],
 '92487204324126720': [],
 '97853032653529089': ['Coal Rejects',
  'Macarthur Coal',
  'Australia',
  'Peabody Energy'],
 '92720622232276992': [],
 '99661771735969792': [],
 '95815783678087169': [],
 '91908013434216448': [],
 '93322370688880641': [],
 '100338505523281920': [],
 '96276255372091392': [],
 '99129277999611906': [],
 '96024488075997184': ['FRANCISCO'],
 '102744094140411904': [],
 '93202345378332673': [],
 '93485185114443777': [],
 '97668537476317184': ['Burress'],
 '93725261312438272': [],
 '93036974818074624': [],
 '92434814619226113': [],
 '96113851828940800': [],
 '93152330396082176': [],
 '92323388076470274': [],
 '92348579057909760': [],
 '98086877143379968': [],
 '94376906727763968': [],
 '101734277972107264': [],
 '92891661000130561': ['Cech'],
 '91845690656960512': [],
 '102668987531395072': [],
 '101721820142637057': ['Obama', 'Holland'],
 '93334427844878336': [],
 '93363101503729664': ['Brooks'],
 '99117434937679872': [],
 '92588165663162369': [],
 '91914130545573888': [],
 '97028520554938368': ['Ecclestone', 'Sky', 'TV'],
 '97939393377075200': [],
 '93697436974985216': [],
 '92663918589259776': [],
 '92701930975010816': [],
 '103170022179999745': [],
 '94593937909616640': [],
 '93064738422009857': [],
 '93411155669827584': ['King', 'America'],
 '92282588458258432': [],
 '101643392131403776': [],
 '91952178360631296': [],
 '103212458935726080': [],
 '99113051927752704': [],
 '96945755121659904': ['S', 'Dawan Landry'],
 '100581944642904064': [],
 '91991519430582272': [],
 '93239357858791425': [],
 '92203555385131008': [],
 '93052450197606400': [],
 '100994783358631936': [],
 '96473651876077568': [],
 '97733882346078208': ['City cruised', 'Inter Milan'],
 '92054549212299265': [],
 '93015347564777472': [],
 '93745034825961472': [],
 '101758768534138880': [],
 '93411920736034816': [],
 '93352717229109248': [],
 '92638811993030656': [],
 '101757466324385792': ['Raiders'],
 '93617326653587456': ['Terry', 'Roberto Di Matteo'],
 '102124541182091264': [],
 '100553819494158337': [],
 '99510126662254594': [],
 '100686707728453632': ['News'],
 '93389798248681472': ['Bay Islands'],
 '102068034964946945': [],
 '100687447985364992': [],
 '96258902395584512': ['City', 'Sergio Aguero'],
 '100980416290373632': [],
 '91962882371235840': [],
 '92296187314581504': [],
 '95489083136155649': [],
 '96402182710104065': ['Canada', 'Toronto', 'Sydney'],
 '93274772011626496': [],
 '93146504931442688': [],
 '100912986448863232': ['Haziq'],
 '93321380757639169': [],
 '101016893695082496': [],
 '98037598789971969': [],
 '96651090807894017': ['Kolb'],
 '100672810757783552': [],
 '101427796009689088': ['Smith'],
 '92033712904093696': [],
 '103207560827514880': ['Seal Team'],
 '97004151946481664': [],
 '93568254626369536': [],
 '100714522351906817': [],
 '100717045175418881': ['Lane'],
 '97070990114308097': [],
 '94829178528075776': [],
 '97995265683886081': [],
 '93265655243096064': [],
 '97345214657732608': [],
 '93490469958467584': [],
 '91915890144186368': [],
 '92706091976237056': [],
 '92224552599429120': [],
 '96677269535334400': [],
 '94523757728776192': [],
 '100733236031913984': [],
 '94138194685210624': [],
 '92705848836628480': [],
 '102676754094751744': [],
 '99760962835460096': [],
 '98173600703582208': [],
 '93710611619393537': [],
 '103139143063842816': [],
 '102019118319943680': [],
 '91871635388375040': [],
 '91937809920823296': ['McCartney'],
 '93103314736132096': [],
 '102314342346788865': [],
 '99861223440527360': ['Rivera'],
 '93050424524935168': ['Norris'],
 '93143598341685248': [],
 '94386507481219072': ['capital'],
 '93356496913567744': [],
 '101940709053042688': [],
 '94153790718099456': [],
 '91858594844246016': ['Kevin', 'Jeju Island'],
 '101392563843502082': [],
 '102784970023641089': [],
 '89391663738400769': [],
 '92075215315681281': [],
 '100660375749529600': [],
 '102008853369077761': ['360'],
 '96178288992587776': [],
 '93826957975883776': [],
 '92625159864856577': [],
 '93967144450138112': [],
 '103137083945783296': [],
 '93081565348892672': [],
 '93745589786902528': [],
 '93695066069151744': ['Chi Minh City'],
 '92333694446084096': [],
 '94867896928448513': ['Clayton'],
 '92245339104481281': [],
 '101058689523400704': [],
 '93334286316478464': [],
 '92794355353530370': [],
 '100654462988849152': [],
 '100704090522271744': [],
 '98919027602231296': [],
 '99171192048599040': [],
 '100342613860818944': [],
 '98107329765056512': [],
 '96542543348637697': [],
 '93023640379076609': [],
 '93905391481651200': [],
 '92540726604349440': [],
 '93511911219535872': [],
 '93359167267549184': ['Rebel EOS'],
 '93497872150036481': [],
 '94930009613017089': [],
 '93445783814291456': [],
 '91989088462307329': [],
 '92643959591022592': [],
 '96585350411071489': [],
 '96566158613938178': ['Reggie Bush'],
 '97732251214823424': ['Rowling', 'Ministry of Magic'],
 '92989573579288576': [],
 '96191043862663169': ['Bueno'],
 '91928522565754880': [],
 '96782542702780416': [],
 '91733741646516224': [],
 '100960424735940608': ['Winehouse', 'Norway', 'Rupert Murdoch'],
 '92166818927620096': ['Fire'],
 '98743218979618817': ['Hercules'],
 '94198829171216384': ['Khan'],
 '93660551019905025': [],
 '92246588633780226': [],
 '98371205341515776': [],
 '92675273430024192': [],
 '101975001678364672': ['Championship'],
 '100860202164830208': [],
 '93459204706275329': [],
 '93086151035994113': ['Newsnight', 'Nick Boles'],
 '94365640856580097': ['Page', 'Oracle', 'Google CEO'],
 '92732661361152000': [],
 '100933019145207808': ['Bernabeu'],
 '93634518149365760': [],
 '100623454981722112': [],
 '102862579310936064': ['Madrid'],
 '93166585153454081': [],
 '100736523179274240': ['Abbey', 'Sony'],
 '94556024995975169': [],
 '100620752272900097': ['League'],
 '95895148315164673': ['Gaga'],
 '93305039787012096': [],
 '92488433934680064': [],
 '94818150943686657': [],
 '102875807608881152': [],
 '96309990792507392': ['Vinatieri'],
 '100375503868923904': ['WR',
  'DeSean Jackson',
  'Philadelphia International Airport'],
 '94812166439452672': [],
 '91887708938579968': [],
 '96565555221368832': ['Bowl', 'Jason Babin'],
 '101087898002145280': ['Cross'],
 '96250830499479552': [],
 '91876443038027776': [],
 '94586978460385281': [],
 '98101823776374784': ['Moss'],
 '96203693912891394': [],
 '100130942361939968': [],
 '100202414153539585': [],
 '92974406980210688': [],
 '92229656115286016': [],
 '91966296882810880': [],
 '95637005131722752': [],
 '92353415123968000': [],
 '101815114281385984': [],
 '91693727273328641': [],
 '97462635209031680': [],
 '92860837802414081': [],
 '92718575407742976': [],
 '93477094557876224': [],
 '91814689134219264': [],
 '100913412317523969': [],
 '94421647397888000': [],
 '103225732452454400': ['Bendtner'],
 '92406762476535808': [],
 '98298979716055040': [],
 '95430079714299904': ['Villa', 'Wigan', "Charles N'zogbia....More"],
 '101044028170174466': [],
 '93750712361689088': ['Woods'],
 '93440864361263105': ['Potter'],
 '92105762414927872': [],
 '92406036845166594': [],
 '102532528845492224': [],
 '100325515235303424': [],
 '96772240854630400': [],
 '101038340438114304': [],
 '101491826229395456': [],
 '92313714224668672': ['Lama', 'President Barack Obama'],
 '101020620464197632': [],
 '93461072069140480': [],
 '98039628522725378': [],
 '93321511091445760': [],
 '91917092294627328': [],
 '102371087672807425': ['Fabregas'],
 '98069781692940288': [],
 '100864013872803841': [],
 '93377441845874688': [],
 '96657103237816320': [],
 '93317692710338560': [],
 '93477874656489472': [],
 '92749453886373888': [],
 '100719104977154048': ['city'],
 '101020302405931008': [],
 '101330147394850816': [],
 '102568154164768768': ['State'],
 '102844692646993921': ['Bowes', 'Ealing'],
 '93005124485652481': ['Hoare brave'],
 '97439276295397376': ['Marquis', 'Nats', 'Minor League'],
 '101756407149375489': [],
 '92703274603528192': [],
 '92463363644334080': [],
 '94319469920403456': [],
 '93060072762118144': ['Storm Bret', 'Florida'],
 '100900499162808321': [],
 '93006349599907840': [],
 '94103364815699969': [],
 '93286216094580736': [],
 '100793214600085504': [],
 '3543845320': [],
 '93346617897390082': [],
 '92789379453566976': [],
 '100373381538529280': [],
 '93063371515105280': [],
 '91970435507421184': [],
 '103073204700053505': [],
 '93316633199775744': [],
 '96966781851271169': [],
 '99273520994992130': [],
 '103207879510724608': [],
 '91961808604237824': [],
 '97476128305979392': [],
 '93476427013423104': [],
 '92907150279589888': [],
 '92916991156957184': [],
 '95317687516934144': [],
 '93168725246746624': [],
 '93691733627502593': [],
 '93405257639526400': [],
 '93412095177146369': [],
 '93448491019411456': [],
 '93372288518459393': [],
 '94209038987952128': [],
 '93709249020039168': [],
 '94452083499081728': [],
 '93319713035272192': [],
 '99885981905321985': [],
 '95906544255897600': [],
 '92699088658767872': [],
 '93378151698272256': [],
 '99598183302299648': [],
 '102486506131824641': [],
 '92542317491273729': [],
 '92683638923210752': ['Story'],
 '92276158808334336': [],
 '91812471555362816': [],
 '98017799158497280': [],
 '93734422158913536': [],
 '94810610474295297': [],
 '92433257878134784': [],
 '92812416949297152': [],
 '97296020681142272': [],
 '92712188497768449': [],
 '93165074721677312': [],
 '100625708447043585': ['Obama', 'U.S.'],
 '102568383198937088': [],
 '94823264559431683': [],
 '92712259708665856': [],
 '93046116983123968': ['Cross'],
 '93203339000561664': [],
 '102924579508461568': [],
 '91946438266851329': [],
 '98483906319368192': [],
 '98094167196049409': [],
 '100955195776827392': [],
 '92881941317165056': ['Football Club'],
 '94811243692568577': [],
 '101629780654436352': ['Club', 'Jose Enrique'],
 '98048170008903681': [],
 '88416243299786752': [],
 '97569036308709376': [],
 '99672140021903360': [],
 '93335385572257792': [],
 '99200989688643584': [],
 '97867813376622592': [],
 '102266212200890368': ['Retail Stores'],
 '93165092149006336': [],
 '91718858846633984': ['Spelling'],
 '102806874763694080': [],
 '94811470172393472': ['winehouse', 'janis joplin'],
 '100678378755067904': [],
 '93402528036827137': [],
 '102041478183927808': ['Clinton', 'Syria'],
 '101007518347694080': [],
 '92283176067661824': ['Potter', 'Deathly Hallows'],
 '96587380278050816': [],
 '94137483159281664': [],
 '93024530745921536': [],
 '98182828684087296': [],
 '94213202635788288': [],
 '95056222239211520': [],
 '92268098018754560': [],
 '93464745524465664': [],
 '100951827431948288': [],
 '95269953741144066': [],
 '98530236588761088': [],
 '100690934215344129': [],
 '92378631279611904': [],
 '93009463564447745': [],
 '99810287129067521': [],
 '92742671491284992': [],
 '91683689796354048': [],
 '101739359849562112': [],
 '94712231622746112': ['SWAT', 'Tomter'],
 '92658550811267072': [],
 '92289190707990528': ['Stephenson', 'NI'],
 '93654618210443264': [],
 '91986216400076800': [],
 '94179121093017600': [],
 '91714818343583744': [],
 '98838837832327168': ['Network', 'Kyle Orton'],
 '101037747019587584': [],
 '102660518929637377': [],
 '100966801382440960': ['Anderson'],
 '102350153272401920': ['Byrne'],
 '92829473745014784': [],
 '91911670112329728': [],
 '99204286419968000': [],
 '93755599896064001': [],
 '93648865886089216': [],
 '101495953428717569': ['House', 'WASHINGTON'],
 '101748047888920577': ['Perry', 'President', 'Charleston'],
 '101432724321091584': ['Carolina'],
 '102856587256926208': [],
 '93392986490155009': [],
 '95222918291783680': ['Guptill', 'Derbyshire', 'Kent'],
 '101186888085413889': ['johnson', 'BBC'],
 '92630700901138432': ['Breaking'],
 '103069454442823681': [],
 '99195943936724992': ['City'],
 '102581530127376384': [],
 '102784871444918273': ['York Gov'],
 '92683762680344576': [],
 '92861039120625664': [],
 '101983612601237505': ['Madrid'],
 '96931007181225986': [],
 '96064326309384192': ['Hutchinson', 'Ryan Longwell', 'Jared Allen'],
 '93048284138057729': [],
 '95645142622539776': ['Azure Center of Excellence', 'Mumbai', 'Capgemini'],
 '91666751665868800': [],
 '93214187051958273': [],
 '96984948635992065': ['Klinsmann', 'US national team'],
 '92647901381136384': [],
 '96988012839317504': [],
 '95647738900922368': ['Hills'],
 '92691023037349889': [],
 '94764774340046849': ['Arsenal'],
 '101235330262372352': ['Cameron'],
 '92020245019099136': [],
 '92489713902039040': [],
 '93055040520069120': [],
 '91944850680844288': ['Bond', 'Dana Ave'],
 '96569948897427456': [],
 '93364673793110020': [],
 '102759881064460288': [],
 '92284091642294272': [],
 '96404114954661888': [],
 '93017474072715264': [],
 '92088579400011776': [],
 '102747877671047168': [],
 '92624967635705856': [],
 '93419359736832000': [],
 '95990282411180033': [],
 '100939109257842689': [],
 '101867891749683200': ['West'],
 '93755926498127872': [],
 '93350745100922880': ['Murdoch', 'London'],
 '92267949112561664': [],
 '91811406156017664': ['Maps Updates Interface'],
 '93158117478645761': [],
 '93717377883189248': [],
 '92979130907377664': [],
 '94524421292834816': ['Obama'],
 '92074318242127872': [],
 '92508560365334528': [],
 '102413446699692032': [],
 '100227045275082752': ['Champions'],
 '93512635420647425': [],
 '95300837726887936': [],
 '94303538561290240': ['indus', 'Sun'],
 '98518238442434560': ['Barack Obama'],
 '94079773847986177': [],
 '103196131361697792': ['Annual Geek Awards'],
 '95724083647488000': [],
 '99538840863252480': [],
 '93456222954594304': [],
 '94183808546516992': [],
 '97963233780056064': [],
 '93505979316060160': [],
 '92497498207297536': [],
 '94810268923736064': [],
 '94767948295700480': [],
 '93779352625483776': [],
 '91974853590126592': [],
 '93592366388224000': [],
 '92586488394563584': [],
 '92934213975805952': [],
 '101022789389131776': [],
 '92866765834567680': [],
 '91812021514932225': [],
 '97352404718194688': [],
 '93147699544723457': [],
 '97759293780144128': ['Bedard'],
 '91804404180725760': [],
 '92205398123216896': [],
 '93483825652449280': [],
 '93289285981175808': [],
 '97074481746550785': [],
 '92453059170537474': [],
 '93786420036108288': [],
 '93629100102660096': [],
 '103127655129427968': [],
 '96879263122337792': ['Cameron', 'George Osborne'],
 '100766029856260097': [],
 '96924850878287872': ['Pereira'],
 '96688125719486464': ['Reporter', 'Kristin Cavallari'],
 '92519151947624448': [],
 '100871916511969280': [],
 '92673205621370880': [],
 '91681462000164864': [],
 '101371723798151169': [],
 '94537519206637569': [],
 '92291532354355200': [],
 '101300660988936193': ['Woods', 'PGA Championship'],
 '93324115456434176': [],
 '98094627499941888': ['Snider'],
 '91578861036380160': [],
 '92691491260084224': ['Boyz'],
 '101013010050592768': ['Express', 'West Midlands', 'Coventry'],
 '101708486022397952': [],
 '94900440759681024': [],
 '96455331181375488': [],
 '93535374181269504': ['Wright', 'Zealand', 'The Australian'],
 '94194936869683200': ['Smith', 'NFL'],
 '95127529593122816': ['Hari', 'Orwell Prize'],
 '94633563730874368': [],
 '103041969965645824': [],
 '93314053316939776': [],
 '92607649736167424': ['star', 'Steven Davis'],
 '94499737323057152': [],
 '99226394587971585': ['Releases Xcode'],
 '100644516125622272': ['Devon Smith'],
 '92969134756859906': ['Morris'],
 '93452805095960576': [],
 '103165156539904000': ['U'],
 '96548633469652992': [],
 '92350554352783361': [],
 '92454111949230080': ['Horn'],
 '97829869311901696': [],
 '93322219631026176': [],
 '97751607852273664': [],
 '93372491627642880': [', Mankins'],
 '96976835820269568': ['Willis McGahee'],
 '95489982260723712': [],
 '91652237725663232': [],
 '101024829385347073': [],
 '100731015894540288': [],
 '91999267694194688': ['Dinklage'],
 '96252784025931776': [],
 '99523095437651968': [],
 '98182476941373440': [],
 '101557840388440064': [],
 '91987850437988352': [],
 '92740682158047232': [],
 '101351686358048768': ['UK', 'Samsung Galaxy Tab'],
 '92671586628407296': [],
 '96039621137399809': [],
 '98034371264655360': [],
 '92640767331418113': ['. Clair'],
 '93009815311364096': [],
 '93699434617118721': [],
 '91998635692261376': [],
 '97815160516919296': [],
 '100914329276256257': [],
 '100685777289224192': [],
 '93661388681125888': [],
 '91810529621966848': [],
 '95629074516549632': ['A&M', 'the Longhorn Network'],
 '98027157300854784': [],
 '100868492621914112': [],
 '94500411716808705': [],
 '92145076297404416': [],
 '101654556852748288': [],
 '100724230852845568': [],
 '94481646916603904': [],
 '92209240676110336': [],
 '92706915892731904': [],
 '98924387897585664': [],
 '101428194460180480': [],
 '93409536827854848': [],
 '101992331275796480': [],
 '96665996416397313': [],
 '93960668197289984': [],
 '101029769130414081': ['Street'],
 '96956379297882112': [],
 '94509025118519296': [],
 '96410979562291200': ['Pros', 'Eric Bledsoe', 'DeMarcus Cousins'],
 '91772743091105793': [],
 '100902012501229568': [],
 '96622402867441664': [],
 '91900145075105793': [],
 '100998532747640832': [],
 '98446576988585984': [],
 '94013224856453120': [],
 '93800424825565184': [],
 '99227803861516288': [],
 '92786465272119296': ['Disney'],
 '91904310308384770': [],
 '92717757963059200': [],
 '97079031924666368': ['Lee Higgins', 'Eagles'],
 '94539178125176833': [],
 '100652144067223552': [],
 '100653353050193920': ['Ham', 'West Croydon'],
 '93546678673616896': [],
 '94441232654278656': ['Fabregas'],
 '92651400680587264': ['Brooks'],
 '101671002735521793': [],
 '102067318368108544': ['Bills'],
 '92768228354424832': [],
 '94453092074008577': [],
 '96324653890539521': ['Storm Don', 'Gulf'],
 '97827245397254145': ['Senate'],
 '101413281792671745': [],
 '92066368203141120': [],
 '96092892203978752': [],
 '94899543874867200': [],
 '95578608407552000': [],
 '92494167556636672': [],
 '93734032189304832': [],
 '102000318501490688': ['Fabregas', 'Barcelona'],
 '92361086627622912': [],
 '94457834649034754': [],
 '93014813629890561': [],
 '101830130401423360': [],
 '97129896891006976': [],
 '99852692247166976': [],
 '100668166455312385': ['York'],
 '99144320430518274': ['Camargo'],
 '91950032097525760': [],
 '91867884598476800': ['Icahn'],
 '97476522566361089': [],
 '92457484513574913': [],
 '98537413051293697': ['York'],
 '93784012509818880': [],
 '97040545645473792': ['The World Fashion Show', 'New Orleans'],
 '93598767844032512': [],
 '102086288940863488': [],
 '96229136179281920': ['CEO McNerney'],
 '93344390839410689': [],
 '93400837157695488': [],
 '96369935135158272': ['Schumann'],
 '102352338030837761': ['Bay', 'EPL'],
 '96992650850344961': [],
 '95502045565562880': [],
 '101389292311556096': [],
 '91792484321067008': [],
 '100536116343607296': ['St'],
 '92280866566455296': [],
 '101152953116803072': ['Casilla'],
 '92969106881515520': [],
 '102182296815276032': ['Kaepernick'],
 '92684412923289600': [],
 '93808293348257792': ['Revival'],
 '96185595470159873': ['Cofield'],
 '100644104689557505': ['Minister'],
 '95188303338422272': [],
 '93983936493010944': ['Kennedy Space Center'],
 '91961905131950080': [],
 '93769818116861953': [],
 '95898503749963777': [],
 '93099856930947072': [],
 '94502159269363712': [],
 '98807639470899200': ['Minister', 'David Ford', 'Roisin Lynch'],
 '93282854406078464': ['lifts', 'Gor Mahia'],
 '96575772726263809': [],
 '98017591959896064': ['target Mata'],
 '99357694883930112': ['Boat Team'],
 '102139053503283200': [],
 '93736461454688256': [],
 '101668632941178880': [],
 '102319781855772672': [],
 '93978341606043648': [],
 '97017783145070592': [],
 '96984991245942784': ['Henne'],
 '103120046653583360': [],
 '93794603127410688': [],
 '99618926908018689': [],
 '93398195266265088': [],
 '95861807780077568': [],
 '97379561582497792': [],
 '91794662699974656': [],
 '96405737714098176': ['Canada', 'airport', 'Sydney'],
 '96276025083822080': ['Gallery'],
 '92668693426880512': ['Krispy'],
 '97248713143099392': ['Bank', 'Fayasel'],
 '94142638093115392': [],
 '95132090957447168': [],
 '100662459853053952': [],
 '97457198254407681': ['Fabregas', 'Arsenal'],
 '93729457709383680': [],
 '101108656782839809': [],
 '92787340803715072': [],
 '101329150966640641': [],
 '97025714552971264': ['Finn', 'Middlesex'],
 '93590697944432640': [],
 '95660302888214529': ['Sox', 'Kenny Williams', 'Twins'],
 '91925590256525312': [],
 '93031684374667264': [],
 '101841857687977984': ['London Square'],
 '101671774474870785': [],
 '92247182312349696': [],
 '92508293959917568': [],
 '94815768675487745': [],
 '94759766982799360': ['Citizen'],
 '96116890295992320': [],
 '91786366156935168': [],
 '96354439341936640': ['Longwell'],
 '98068716322951169': [],
 '95128487727349760': ['Grand Prix'],
 '92926655382818819': [],
 '93059420296183808': [],
 '92370537531183104': [],
 '94201131856707584': [],
 '102923598414622720': [],
 '93291567711916032': [],
 '96726597469618176': [],
 '94510033202724865': ['Risk', 'Heat Stroke'],
 '97027268685213696': [],
 '97002463147720705': [],
 '93900949902462977': [],
 '93942339093004288': [],
 '96145938346811393': ['Madrid', 'Sergio Aguero', 'Manchester', 'City'],
 '97373690815197184': ['Knox'],
 '93750898479730688': ['Croft', 'Tom Smith'],
 '92848660353794049': [],
 '93378613063327744': [],
 '91904202460241920': [],
 '99954453586780161': ['Wenger', 'Barack Obama'],
 '92804182280634371': [],
 '93068195799371776': ['and Chandler'],
 '92648687725051905': [],
 '96979327224266752': ['Klinsmann'],
 '102441571353509888': [],
 '93709909803274240': [],
 '93105665089867776': [],
 '91871796252512256': [],
 '100720839560933376': [],
 '93671907987177473': [],
 '94807286882635776': [],
 '101197985895034880': [],
 '93761496332517376': [],
 '95999339016626176': [],
 ...}

In [74]:
for o in output.keys():
    output[o] = get_entities(output[o])

In [66]:
def get_entities(line_text):
    segments = line_text.split(" ")
    col = []
    for seg in segments:
        res = normalize(seg)
        if res is not None:
            col.append(res)
    fin = []
    temp = []
    for c in col:
        if c[1] == "B":
            if len(temp) == 0:
                pass
            else:
                fin.append(" ".join(temp))
                temp = [c[0]]
        elif c[1] == "I":
            temp.append(c[0])
    return fin

In [67]:
get_entities("CONFIRMED/O :/O Twentieth/B-ENTITY Century/I-ENTITY Fox/I-ENTITY Animation/I-ENTITY &/O Blue/B-ENTITY Sky/I-ENTITY Studios/I-ENTITY Announce/I-ENTITY Ice-Age/I-ENTITY Casting/I-ENTITY &/O Nicki/B-ENTITY Minaj/I-ENTITY is/O on/O the/O LINEUP/O http://t.co/IlQgO34/O")


Out[67]:
['Century Fox Animation', 'Blue Sky Studios Announce Ice-Age Casting']

In [65]:
def normalize(word):
    index = word.rfind('/')
    pure = word[:index]
    if "I-" in word[index+1:]:
        return (pure, "I")
    if "B-" in word[index+1:]:
        return (pure, "B")
    else:
        return None

In [78]:
temp1 = [1,2,3,4]
temp2 = [3,4,5,6]
len(set(temp1) - set(temp2))


Out[78]:
2

In [80]:
for tweet in tweet_corpus.values():
    tweet["mention_set"] = set([item['mention'] for item in tweet['goldens']])
    
stats = {"tp":0., "fp":0., "tn":0., "fn":0.}

for tweet in tweet_corpus.values():
    tid = str(tweet['tweet_info']['id'])
    ners = set(output[tid])
    golds = tweet["mention_set"]
    
    stats["fp"] += len(set(ners) - set(golds))
    stats["fn"] += len(set(golds) - set(ners))
    stats["tp"] += len(set(golds) & set(ners))

In [81]:
stats


Out[81]:
{'fn': 2262.0, 'fp': 345.0, 'tn': 0.0, 'tp': 114.0}

In [82]:
precision = stats['tp']/(stats['tp']+stats['fp'])
recall = stats['tp']/(stats['tp']+stats['fn'])

F = 2*precision*recall/(precision+recall)

In [83]:
print precision, recall, F


0.248366013072 0.0479797979798 0.0804232804233

In [ ]: