In [1]:
%run helper_functions.py
%run tweepy_wrapper.py
%run s3.py
%run mongo.py
%run df_functions.py
import pandas as pd
import string
from nltk.corpus import stopwords
nltk_stopwords = stopwords.words("english")+["rt", "via","-»","--»","--","---","-->","<--","->","<-","«--","«","«-","»","«»"]
In [2]:
gabr_tweets = extract_users_tweets("gabr_ibrahim", 2000)
In [3]:
gabr_dict = dict()
gabr_dict['gabr_ibrahim'] = {"content" : [], "hashtags" : [], "retweet_count": [], "favorite_count": []}
for tweet in gabr_tweets:
text = extract_text(tweet)
hashtags = extract_hashtags(tweet)
rts = tweet.retweet_count
fav = tweet.favorite_count
gabr_dict['gabr_ibrahim']['content'].append(text)
gabr_dict['gabr_ibrahim']['hashtags'].extend(hashtags)
gabr_dict['gabr_ibrahim']["retweet_count"].append(rts)
gabr_dict['gabr_ibrahim']["favorite_count"].append(fav)
In [4]:
gabr_tweets_df = pd.DataFrame.from_dict(gabr_dict, orient='index')
In [5]:
gabr_tweets_df.head()
Out[5]:
favorite_count
content
retweet_count
hashtags
gabr_ibrahim
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
[RT @UChicagoCAPP: Great turnout today! Hope y...
[5, 1, 1065, 1, 0, 11, 27, 1407, 728, 1107, 0,...
[opendata, NLP, spaCy, Metis, TopicModelling, ...
In [6]:
clean_gabr_tweets = filtration(gabr_tweets_df, "content")
In [7]:
clean_gabr_tweets = dataframe_to_dict(clean_gabr_tweets)
In [8]:
clean_gabr_tweets #this is a list of 1 dictionary
Out[8]:
[{'gabr_ibrahim': {'content': ['great turnout today hope able join us slides available link video webinar coming soon',
'ms capp student talks challenges value time',
'good news steve bannon gone bad news replaced sentient swastika right arm permanently',
'byyyeeeee',
'late night could possibly better',
'millennials killed confederate monument',
'find friends speakers around add yourself directory',
'in gop led homeland security committee declare charlottesville attack act domestic terrorism',
"republicans extremely opposed erasing history unless it's named obama insures million americans",
'mean ok',
'this',
'idea nazis people oppose nazis somehow equatable batshit fucking crazy shit i ever',
'god grant serenity accept get grant, courage write anyway, wisdom know',
'seriously though work castle',
'sure, cancer aggressive chemotherapy also aggressive aggression sides',
"problem another's solution; solution problem unknown",
'big oshit',
'j hus see keeps vibin late night coding session',
'note expensive network traffic explains shuffles bad data management important',
'humanized latency metrics useful remember you, uh, ever use network',
'academic datasets usually perfectly balanced real world datasets messy, unbalanced incomplete',
'clip made day',
'team hiring folks search, nlp, machine learning backgrounds; get touch know anyone',
'must read get extra dose imposter syndrome reading math heavy ml papers',
'bahahahahaa',
'he insult cabinet, party congress he insult presidents war heroes nazis putin?',
'violence, chaos, apparent loss life charlottesville fault sides racists',
'wow president cannot condemn white supremacy wow nobody surprised white supremacists',
'trump afraid say it, radical islamic terror white supremacy? nope 🤔',
'join us hear jens ludwig discuss trends public policy',
'customize start up blog post',
"understand people do want read research papers, it's non issue it's blatant",
'every time hit paywall, blows mind taxpayers are constantly fury pay research',
'you constant source inspiration thanks great role model i getting book asap',
'',
'aws s3 walkthrough, setup use harness power cloud computing blog post',
'ibm system claimed accuracy w75m images hrs previous record microsoft, days',
'',
'',
'flask app gross wait bootstrapping yaaaaaasss fully decked professional looking app mins',
'interviewing phd program, professor interviewing said focus, "technology affecting politics"',
'cores ram happy birthday',
'exceptional work metis sr data scientist',
'libraries are books almost public space left do like wallets',
'enough dongles',
'learned integrate ec2 s3 bless blog post tutorial coming soon',
'avoid overfitting',
'af researchers forced ghost bae ai creates language',
'replace multiplication gradient descent calls regulation ai see',
'bitcoin fork crypto market madness next hours',
"know that's meant,",
'scaramucci, barely knewcci',
'',
"nah it's pretty funny",
'soon therell enough former wh staffers entire season dancing stars',
'',
'bahahahahahahaha',
'shortest serving comms director history? along cos',
'best programming skills knowing walk away while oscar godson',
"it's amazing it's recommended use version pickle",
'standard pickle library great do use bs4 objects, leads',
'thanks renee',
"you tidy data me i'm going nest dictionaries levels deep",
'aws n_jobs= 1',
"discovered portillo's today cake shake changed lifechicago",
"simpson's paradox explained gif",
'really love power awsmwhahahahaall cores',
'reporter live real data scientist tell us sexy job me maniacally worked locally,',
'',
'frustrated white house would walk see jefferson read this hope',
'even motherfucking orrin hatch thinks bullshit orrin hatch',
'making aliases bash light speed workflow going want read this',
'prime factorization complexity blog post',
'project complete blog here',
"also learned r it's amazing resource promise do work them",
'',
'walkthrough set jupyter notebookshow use aliasing aws',
"deborah sql jockey join limit mikey statistician result, what's n?",
"clear i'm asking whether true",
'better always use stratifiedkfold kfold? shuffle=true cases stratified would better bias variance cv?',
'',
'',
'scientific process cool hard shit shit actually going work',
'yyyaaaaassss',
'🤔',
'tired worrying algorithms wired removing algorithms curriculum inspired algorithms',
'"you face classic prisoners dilemma do?" "i pardon guy myself also youre fired"',
'tired subjective nature tech hiring terrify us wired use algorithms tech hiring terrify us',
'do know, etl short part data science one wants do',
'gridsearchcv takes worries away',
'mastercard cisco join',
'oh wow could really end political career',
'"clean csv pre prepared meal kit" amazing analogy ✨',
'welcome meetup event',
'tired arguing whether data wired arguing whether we inspired arguing',
"week scraping cleaningit's finally dataframeanalysis tomorrow firstsleep",
'stop telling lies?',
'major progress project time sleep',
'breaking mcconnell concedes drive erase, replace obama health law failed; plans repeal vote, delay substitute',
'without try except clauses, do think web scraping would ever possible least way allows storage',
'bless any method',
"twice much twitter good me i'm running",
"got back tonight pro tip last season, get hbo's free month trial online live streaming that's episodes",
'dad, borrow computer? forgot magic word sudo ok then',
'britainwonderful',
'incredible inspiring read',
'',
"reason academia cutthroat there's little stake",
'wow incredible quote regarding government data',
'meanshift clustering described people wandering around foggy football field',
'event hosted event link',
'time flies you fun got home',
'one left office',
'multiprocessing library changed life forever',
'aliasing git commands greatest thing ever totally gonna blog',
'works pandas dataframes',
"i'm struggling finding interesting question",
'good ideaslinks cool datasets would perfect linear regression analysis python? looking fun project',
'project completed check details blog',
'blistering accurate see us',
"me, taking another toothpick i'm concerned sample size small farmer's market vendor please stop",
'shameless self promotion personal website running check out',
'got around making personal website check first blog post ever',
'largest product series',
'binary search implementation',
'spell?',
'pair programming',
'introduction',
'',
'mi5 sponsor data science challenge assist spotting terrorism',
'command line tip day use filename` clear content file without deleting it',
"legal technology, lawyers love say programmers need them think it's way around",
'really work i learned great deal made fantastic',
'beast woman',
'differentially private gans generate fake clinical trial data train ml algorithm without sacrificing privacy',
"breaking illinois house overrides governor's budget package veto enact first spending plan since",
'',
'',
'',
"best non technical pydata talk seen highlights many issues struggle with it's must watch",
'',
'happy birthday america 🤗',
'confirmationwhat way kick',
'ah shit',
'breaking new jersey gop gov chris christie signs budget deal following impasse led government shutdown',
'ny benefited lot great teachers, smart diverse group students',
'new senate quietly pushing subtle change health care bill gut medicaid, allow states',
'focus group could avoided focus group women',
'companies hired coding bootcamp graduate, said would',
'argue research do like',
'',
'',
'great high level overview neural networks easy read digestible',
"blockchain potential spur new era financial inclusion here's how",
'poll finds britons want keep eu citizenship',
'wrote python script check stashmetrics check feedback appreciated first attempt',
'sent prison software programs secret algorithms',
'rightly viewed, possesses truth supreme beauty russell',
'distill concept basics like explaining yr old helps solidify',
'dont fuck postdocs',
'still think nyc better?',
'🤔🤔 get bagel?',
'beats nyc smoked meat day',
'drake ferrell got rolling bro',
'alma mater',
'remember called every datacs curriculum include ethics security curriculum this?',
'def selfcore true',
"mock week really makes miss home bless vpn's bbc iplayer",
'key developer milestone you written enough code able reference code researching',
'st viateur bagels',
'',
'mount royal view top incredible la banquise poutine st denisst laurent',
"atleast it's stata",
'love',
'preach go grad school major problem apparent',
'really interesting analysis python imports files files github cc',
'intro blockchain goldman sachs absolutely incredible presentation ux mind blowing',
'interactive regex crossword help teach regex bless internet',
'',
'forgotten front finally here thanks everyone interest project cambridge discount',
'i never seen one sentence headline contradict',
'succinct overview bayes theorem come across',
'h 1b visas big issue go',
'',
'would love join slack channel',
'jaro winkler good compare us addresses bc penalizes edits closer end str city, st blocking key',
'passive aggressive logging',
'eid mubarak students, staff alumni celebrating today',
'uk parliament hit cyber attack, says liberal democrat peer lord rennard',
'keeping distributions relationships straight mind thank you',
'were hiring project coordinator join ethics governance project',
'',
"incredibly important today's information age",
'great read',
'fantastic read highly relevant interested finance',
'friends join us data science social',
"periodic reminder are following many women data science, it's bc are twitter",
'thank enriching coding life deserve award always dread looking plot',
'ugh fish worst',
'',
'daughter playing student',
'',
'responding reviewer comments',
'foucault education',
'one idols liked tweet 🤗 thank incredible role model us',
'strip mall starbucks, theory contends, ever gone war another',
'powerful paragraphs always remember intricacies data',
"yyyaaaaasss i'm budding data scientist polisci background",
'bash makes happy',
'i using markdown years never took time learn properly discovered create block quotes ',
'well written',
'chicago made public ranking arrestees risk, predicted algorithm',
'"ethereum internet be"',
'past self do worry general solution, ship something now current self fuck',
"it's minseth",
"i'm going sleep like baby ",
"i'm officially losing mind ",
"i'm screaming",
'',
'wow good labour stronger good brutal brexit rejected good next generation realized stakes spoke up',
'interesting watch republican senators challenge james comey seven minutes donald trump',
'breaking british news media say prime minister theresa may resign election setback',
'breaking british media report party hold majority surprising conservative fall, hung parliament',
"another reminder campaign manager great progressive hero barack obama ran theresa may's campaign",
"in it's officially hung parliament uk election party get overall majority",
"breaking uk pm may's conservatives longer able win outright majority parliament seats declared",
'done thanks looking forward seeing odsc team again',
'pols open sure get vote time complacency',
'hey odsc team idea expect free tickets upcoming odsc conference eagerly checking mail daily',
'gets mention apple keynote',
"wait whatit's like seconds",
'focus discrete math, stop looking ticker',
'in seven members public killed london attack, metropolitan police commissioner says',
'london terror attack police shoot dead three suspects rampage kills six',
'things have decades software experience partner child hobbies coding things do have',
'jeremy corbyn says labour suspend national campaigning evening following',
'final scene leaves many questions feels ☺️',
'hell hath fury like jupyter notebook many print statements',
'pyspark cluster keeps crashing databases assignment impossible use built functionality spark',
'lie pillow week go tons done forges warriors pits hell grad school 🤕',
'story four tweets hope understand muslim feels burden us crimes others',
'zbigniew brzezinski, former national security adviser long time wise man world affairs, dies rip',
'scientifically literate empower know someone else full shit',
'',
"challenging jared negotiate israel palestine peace plan he's indicted",
'brilliant primer ethereum cryptocurrency markets changing',
'even exposure',
'',
'brit expats us sending postal vote, add stamp free post valid uk',
'',
'graduate student unionisation demonstration',
"pm'd",
'ever invest yourself?',
'',
'truly appreciate modern machine learning age put away childish things age calculus, c++ age nips poster age',
'coinbase kraken excellent platforms wo answering second question, penny',
'absolutely incredible however, apr close cost prohibitive',
'theresa may suspends election campaigning following manchester explosion',
'theresa may says thoughts victims families affected treated police',
'updated police report multiple deaths following apparent blast ariana grande concert manchester',
'manchester arena police confirm fatalities explosion ariana grande concert – live',
'police say fatalities incident ariana grande concert',
'matter time',
'especially look tweet eth now',
'companies pic exact',
'new eea members',
'words must breaking news',
'breached',
'incredible read',
'breaking senate intelligence committee says former fbi director comey agrees testify open session statement',
'it return one month ',
'tears everywhere',
'okthis insane do know myself returns',
'aaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhh happening ',
"it's hits i'm gonna straight cry",
'serious crowd opening',
'breathing',
'fred wilson eth market cap surpass bitcoin market cap end year ',
'eth adoption',
'look list companies interested eth',
'too time alive',
'never understand art',
'price rise making dance dont dance',
'wwwwhhhhaaaaaaaaaaaaa focusing work tonight',
'happening yaaaaasssss im screaammiinngg',
'its you, its omission oxford comma',
'do make teaching mistakes create democratizing spaces discovery learning critical pedagogy',
'',
'stuck contraposition dayand boom youtube vidim roll today donating khan academy websites now',
'bad funny',
'series videos better mental health grades all',
'yet khan academy knocking outta park explanation proof induction syllabi grad school',
'love machine learning much next data nerd also',
'proof induction textbook youtube videos need brilliant educators online',
'',
'',
'no open positions deep learning experts, according gartner no deep learning experts, according',
'javier valdez, award winning journalist killed mexico, targeted reporting, writes',
'landmark european court case could curtail freedoms british dual nationals',
'polls undercount centrists, populists',
'ethereum gets avoid bad press',
'ransomware attacks devastating however, question mind isbitcoin? pfftat least pull right get ethereum',
'hour midterm finally understand big notation bless khan academy',
"reading shirt calm and conda install daddy, what's conda? know",
'breaking acting fbi director andrew mccabe tells senate panel update white house russia investigation',
'breaking acting fbi director calls trump russia investigation significant, contradicting white house claim',
'',
'amazing even lavrov shocked',
'printsortedlistsetlst 2 find highest val list sometimes, marvel elegant be',
'political scientists do time shit',
"george w bush's ethics lawyer",
"wh made calculation however bad looks, it's better letting comey continue supervise",
'data news',
'co authoring',
'listening morning like idea hackathons focus cleaning prepping',
'lunch order',
'trump delete webpage case court, ca delete bigotry behind',
'traced saga syrian family order resettled us vetting process steps total',
'heritage gets shoutout discrete math textbook',
'got email today bots becoming sophisticated knew could find casual encounters',
'best breakdown svd i ever seen incredible profs',
'matrix diagonalization beautiful',
'pyotr tchaikovsky—composer "the nutcracker"—was born',
'coding, tsa precheck best gift me',
'why?',
'highly recommend article people entering data science employers need help navigating',
"colbert calls trump cock gobbler fcc investigation, news agency's blast py it's cool?",
'fantastic read',
'breaking russia backed deal set zones syria designed reduce violence comes effect',
'breaking texas officer charged murder shooting black car leaving party, arrest warrant issued',
'does know anything cancer smart application antibodies',
'mind blowing applications healthcare talk google x labs',
'tutorial day great start',
'',
'how??',
'need contain emotions public then',
"am fact, i'm modelling ml algo's right now",
'think you enjoy presentation today',
'calls americans focus global refugee crisis amen',
'talk uhcr using data',
'',
'udemy flesh phenomenal explanations usual',
'conference fantastic mandatory blast',
'',
'thank free ticket future event knowledge',
'great talk loved every second yes, absolutely hilarious',
'learning news things every talk',
'yay thank you',
'best slide today far',
'watching loyal datasci subjects one datasci idols',
'interesting talk quantum computing amazing could impact cryptography',
'im excitteeedd',
'announcement juliadb',
'native linear algebra data type julia',
"julia unicode support that's right latex syntax assigned variable",
'ethereum cracked yaaaaasssssss',
'pumped presentation i heard much power time see action',
'free datasci goodies absolutely amazing community',
'congrats scikit learn winning award outstanding open source project much deserved cc',
"say say science'",
'yuge crowd keynote tom davenport following',
'skills',
'people data science days',
'wondering use help make broad impacts? stop table career',
'emergency meetings held buckingham palace',
'never sure you relationship qualitative researcher everything significant',
'information leakage',
'best me slide i ever seen almost reminds osi modelpacket encapsulation',
'learning code greatest gift ever given',
'rstudio',
'book signing coming',
"images' data sharing",
'plugging gaps self cleaning training sklearn șodsc realizing much still',
'analysis assumes clean data world rarely delivers it',
'preach',
'made exciting eat',
'excited full day scikit learn boston material',
'east',
'here',
'older man leans guy, really man sister?',
'period silence guests seated near couple burger better lit',
'never tell mom me? ashamed me tell her time w',
"trying keep options open fuck mean that's sister, respectful?",
'pours water food bounces craziest burger experience i ever had',
'yay see there',
'too presenting?',
'period silence guests seated near couple burger better lit',
'never tell mom me? ashamed me tell her time w guy beach?',
"trying keep options open fuck mean that's sister, respectful?",
'boston burger co couple next arguing whether frnds morewhat mean looking elsewhere?',
'got pass thank much excited tomorrow',
'do get bored reminding people labour changed britain',
'play three acts',
'hands best fried chicken i life bless chitown honneeeyy bbuutteerr',
'serious employer field ask github account need encourage',
"what's text? i'd interested giving read i'm ml course right now need knowledge get",
'writing book?',
"put logo syllabus first time it's clear, psyched job",
'',
"me lin alg hw stop looking price ethereum' mins later7450coin around room show money",
"brain food playlist spotify tends get zone i'm also looking suggestions",
'quote day american journalist edward r murrow',
"won won won won that? national front, macron's team? no, pollsters",
'time download',
'former president barack obama live noon et c span',
'doors open former president speech logan center',
'trump fired surgeon general called gun violence public health issue',
'whole days metric stupid you counting',
'worlds beautiful mathematical equation',
'sine game',
'pier review',
'research funding explained',
"forced guess i'd say le pen beats polls, that's bc terror attack otherwise might good",
'breaking champs elysees paris closed authorities telling people avoid area',
"team released ipython it's python only completion looks awesome",
"details ipython team's rationale behind dropping python support blog post last year",
'would incredible let know find resource',
'ordinary people, extraordinary things',
'',
'needed scale ml algo intricacies next customer walks door leaving money table',
'',
'next phase ml careful construction models fit nuances customer shelf lasso, rf, cnn',
'tongue in cheek math slide concerning traditional ml pipeline optimisation business',
'human impression advertising completed ms need able make decisions faster',
'prof sanjog misra taking stage ml making',
'',
'aparna pandey ca ml summit without talking deep learning increase literature around it',
'aparna pandey fear missing comes big companies machine learning',
'entire landscape business',
"ai hottest technolgies right it's also important remember it's new technology change",
'aparna',
'assisting human element',
'apama pandey uptake use ml better facilitate use resources large machine repairs replacing human element',
'szabolcs paldy empathise technology b2c applications b2c concerned solving needs indiv level',
'promus ventures working sentiment analysis unique data points minimum train algo',
'data trace back physical address deploy direct mail advertising based items dropped online shopping cart',
'job morris rise interactive technology monitor someone abandons online shopping cart, take ipmeta',
'panel session kicking now impressive lineup',
'sign available now free storage access spark cluster',
'first venn diagram',
'time spent data pre processingwhile may boring, crucial step',
'watson committed apache spark',
'lead product manager ibm watson stage',
'i live tweeting chicago booth machine learning summit',
'breaking fox news preparing cut ties bill oilly wake sexual harassment allegations',
'early voting numbers do matter early voting numbers do matter early voting numbers do matter early voting',
'trump says wants best people migrate america first, visas workers middling skills',
'thought could life quarterdatabases, ml proposal lin alg assignments due mondaycapp',
"kendrick's dna stepping away ml hw simply contemplate bars dropped",
'everything makes cry best ai algos used solve painfully world problems join',
'read this great article work matters esp communicating modern',
'nightmare',
"i'm imagining apsa attendees calling dr looking meaningfully grad students,",
'night long',
'student constantly interacts students diff disciplines, comes much',
"need playlist suggestions late nightproductive coding far, i found chainsmokers spotify's brainfood",
"let's get nitty gritty feature selection methods model evaluation also",
'look airspace around north korea',
"it's true get waves incredibly cathartic emotions clean, sort, split model ml algo's msc me",
'pepsi did work?',
'people, faced problem, think, know, i use statistics ± problems',
'people studying deterministic progress mocks person studying probabilistic process making progress',
'soon we war iraq, afghanistan, syria, north korea?',
'',
'solemnly swear never underappreciate lambda functions',
'trump wont post wh visitor logs, citing "security risks," says move save taxpayers one',
'need design ml pipeline tells eat designing ml pipelines',
'l shaped scatter plotsapparently people income debt ratioshmmmmore data cleaning',
'reminder attorney general united states committed perjury confirmation congress',
'oh wow professor slymany numbers do make sensesimply imputing isnt going workim data janitor',
'imputationimputation everywherebins everywhereso many numbers',
"ml hw data cleaning, eda, feature cleaning blown away people's debt ratio's wondering it's data entry error",
'might time head back canada',
"today's blog discusses pay close attention identity construct",
"spent two years collecting data ran anova p=0052 it's nice knowing everyone",
"i'm love feature selection method's scikit learn",
'amazing definition',
'ml algos tons algos algo cmplx sys lose algo ppl do theyre called academics',
'data science conference bingo',
'back envelope proof ∛2 irrational',
'make sure kids see graph',
'long read origin deep learning really gorgeous mathematicalintuitive explanations',
'data scientist like owning car taking numerical linear algebra like learning rebuild engine',
'schumpeter university chicago worries lack competition',
'favorite data viz week',
'trump policy reversals today fed hiring freeze, nato exim bank, labeling china currency',
'',
'lol, alright fine laughed',
'"should learn deep learning" new "should get phd"',
'plein air favourite place start day breakfast linear algebra',
'ctu guy legacy say tell tweak parameters s q l speed computer?',
"here's quote",
'hope post online would valuable resource learn from',
"i never get past powerful for loops are looping classification algo's cross validating breeze",
'us intelligence community consensus russia foreknowledge syrian chemical attack us official',
'alabama governor resigns scandal leads criminal charges',
'alabama governor resigns sex scandal top aide',
'congrats new director institute',
"young thug's lifestyle serving motivation get sql assignment basement",
'wins twitter today hands down',
'spicer recognizes icc?',
'visualise space right?',
'yikes getting better reading personal diary getting better linalg',
"insightful moments strang's book wave emotions getting better linalg reading equivalent personal diary?",
'suspect ui designer might lied cv',
'overheard really want shut airport syria, let delta gate agents run it',
'passion fruit machine learning',
'let drinking machine learning begin also someone remind turn aws gpu instance sleep',
'breaking russian military says help syria strengthen air defenses us strike',
'breaking swedish prime minister stefan lofven says everything indicates truck crashing department store terror attack',
"trump's decision strike syria carries considerable risks look could mean",
'',
'please remember republicans applauding strikes caring poor syrians fine supporting',
'beautiful pictures? fuck msnbc',
'serious question trump administration authority, credibility, clarity thought answer',
'boy, did take long discover clinton, bush, obama learned air strikes easy way look tough wo',
'middle east exploding trump response reduce state dept budget cut foreign aid close borders expand military',
"i'm sorry, makes fucking sense whatsoever",
'breaking homs governor tells us missile strikes syrian base result deaths',
'look',
'could kill emotional undergrad hutchinson feel bro hurting too',
'wo relational algebra homework conform flawless logic?',
'friday, joined universities filing amicus brief opposing revised executive order',
"singing mario's love you machine leaning homework",
'wife left note day suppose failed explaining living',
"data pre processing ensuring underlying assumptions met order ml algo's work properly",
'watching api request go like',
"you new deep learning, encouraged part need learn learnable nobody knows all it's",
'finally cracked census bureau api',
'',
'listening tswift ml polsky',
'know mike pence punches penis every night bad bad thoughts go back hell',
'breaking us judge approves settlement trump pay million trump university lawsuits, ending years litigation',
'cool crushing it',
"drake's got zone late night ml",
"current administration compared really it's like animal farm freddy got fingered",
'us applications mcgill mcmaster u toronto holy',
'honor bill congress passed started making thing cycle random websites sleep',
"realized none intro statsmetrics classes i taken grad school covered test sets training sets that's kinda crazy",
'simple linear regression absolute joy sklearn, perhaps even better r terms intuition behind code',
'god bless sklearn python need start teaching stats classes using sklearn library jupyter notebooks asap',
'id argue social science closer data science cs biased',
'canada comes online',
'anti islamist leaders do resolve this; likely produce violence see algerian military',
'rex tillerson lift human rights conditions arms sale bahrain',
"one best explanations ml it's relationship regressionclassifiers i ever come across",
'cards humanity matching donations chicago public schools make today',
'advice grad students days go computer science department take class machine learning',
'absolutely fantastic paper ml',
'"how built fully automated system restocks kitchens coffee amazon"',
"worries i'm excited get started day goes price gets higher estimate backlog?",
"it's well hours still account verification feedback backlog? i'd really like get on",
'companies do this descibe intern',
'oh yes, think stem majors college take statistics college students',
'breaking wife french presidential candidate francois fillon facing preliminary charges allegedly fake jobs',
"pm signs letter formally begin uk's departure european union",
'overheard reg cafe difference variable value anyway?',
'new nba',
'donald trump signs executive order energy policies rolls back obama era climate change rules says moves include end',
'me ticket say mr albon says dr albon wife person save lives',
'quarter day course cover semesters worth discrete math lin alg combinedin weeks let games begin',
'saying wrong dmv fun live diesnot uchicago',
"ru stand for? dunno, maybe russia? what's context? says r u'",
"bill does pass, call obamacare trumpcare say it's great worked employment numbers",
'breaking new budget office analysis revised gop health bill reduces deficit less earlier version, improve coverage',
'representing',
"thanks follow huge fan work would love visit next time i'm dc",
"mean repo? means think it's interesting, also you never look again",
"hiring interns? interested i'm getting error trying access career center webpage",
"initial thoughts yesterday's",
"i professionally years, honestly could tell ibm actually does i'm",
'course skipped minds tell sooner cheers see later today',
'got contacts there? think would little odd stopped by?',
"i'm pretty sure can free credit signed up",
'sidebar know capp related orgs dc visit today? gonna swing wanted ideas too',
'advantage everything gets sped use cc made requests single charge',
'credit card give something like free credit months free google cloud platform use',
'using google maps cs project recently remember correctly pretty high limit use a',
'"mathematicians, pure applied, think something weirdly different statistics right" james',
'like shootings attacks guns gun deaths year part living us republicannra',
'confirm house commons house lords sit tomorrow normal times',
'remember, people mow unarmed civilians bridge are they murderous criminals',
"us capp's dc would love catch urban institute you free",
'students making way dc annual career trek hear various orgs employment opportunities',
'',
'check amazing project',
'cobra govt emergency meeting chaired pm next couple hours',
'confirmed parliament sit normal tomorrow',
'',
'harris building',
'westminster attack know far',
'learned software developers get together stuff envelopes, get lot this',
'visited today disappointed',
'passed microecon',
"sinn fein's martin mcguinness, northern ireland's former deputy first minister, died aged",
'excited host upcoming hackathon join us',
'paper absolute game changer incentivising corrupt officials india take smaller bribes genius',
'giggs featuring drakes track',
"realised drake's new album dropped",
'long research rewarded cited peers vs used public, many academic fields remain',
"police man' paris orly airport",
'original poster returns meant adobe illustrator',
"links discussion follow breadth ai, mooc's, tutorials etc",
'oh slack would like learn use ai asap ideas someone could teach me, good course',
'pro tip arent "code challenged" havent coding long enough give time, keep pushing,',
'google open sources jpeg encoder reduces file sizes',
'committee women statistics established',
'pet peeve people reply does equal causation tweet study uses randomized',
'we hosting data democracy hackathon join us',
'thank god serena williams',
'breaking netherlands main exit poll suggests anti islam firebrand geert wilders unexpectedly poor showing election',
'going leave here',
"quarter out last weeks hardest academic career fellow capp's y'all warriors amazing break",
'excited board run joint cs public policy program',
'happy',
'barely inch snow winter night finals week inches tomorrow, inches tuesday inch wednesday',
'free podcast idea episode recording mock technical interview',
'middle eastern man sure many days worth airport detention clothes pack',
'polsci too',
'fuck happening?',
"craftsmanship important data science dont desk, instead put macbook pro potter's wheel",
'post uses fairml build important machine bias work',
"tomorrow's cs presentation let finishbut group greatest treemap time",
"dance moves i'm pulling ridiculous",
'omfg working demo',
'',
'like women like like coffee unrestricted gendered conceptions intellectual scientific pursuits',
'differentiated e^x hard got polynomial',
'rest wicked',
'statistics conduit dont trust, verify',
'lies, damned lies manipulated data statistics lie, people do',
'yaaaaaaaaaaaaaasssss',
'heh rumman would interested',
"shoutout google's api making cs122 project possible",
'knowing code weeks ago marvelling beautysophisticationpower code incredibly rewarding',
'code works',
'ca spell general jefferson beauregard sessions third without l y i n g u n d e r o a t h',
'taught seeing theory visual introduction probability statistics daniel kunin',
"political commentary codingit's kinda working me",
'duty keep good fight',
'look back night admonish ever letting happen',
'',
"fuck lyyyyyiiiiinnnn'",
'damn thanks reminding ai president world twat',
'strongly support nato, trump says good hear guess policy embrace nato tuesdays thursdays',
'refused intel briefings went advice joint chiefs fuckup',
"that's fucking problem",
'pr moment botched military action dare',
'country built backs, blood, sweat tears immigrants',
'military rhetoric extremely dangerous',
'shooting two innocent indians white terrorist??',
'taskforce immigration crime enforcement? sounds like gestapo',
'ca unify law enforcement overlook institutional racism',
'chiraq getting shoutout want great schools high paying jobswith fucking money?',
"alternative speech story man loves daughteryeah, i'd bang daughter",
'no rare disease audacity mocked disabled reporter',
'thats aca does',
"i'm literally screaming incomprehensible sentences substance",
's3 strikes uchicago',
'',
'somewhere deep inside amazon data center, engineer desperately willing segway go faster',
'hash tables econometricsharris mine yet',
'time bedmodellingdreamsahead',
'got harris blast spotify let data thing',
'clinton campaigns big data simulator went wrong early—and one noticed',
'google sha 1 collision really gonna fuck internet anything else today cloudflare hold beer',
'manuscripts └in preparation └final final └final edits └near final',
'women awarded literature',
'breaking trump administration considers mobilizing many national guard troops round unauthorized immigrants',
'hero',
'minimize sum squared residuals you best fit',
'overfitted models make red poor accuracy makes blue i checked hidden variables you missing few',
"valentine's day, happy show love equation share ocw someone love",
'card',
"data points red data points blue clusters linearly separable it's complicated",
'you average, mean world me, baby love wo ever deviate',
'get far much satisfaction checking things listi need get',
'damn alessia cara hits close home',
'sup',
'',
'',
'bloody brilliant fave',
'shoutout mfx package r making life easier calculating partial effects probit regressions',
'verdict in devdas soundtrack also conducive productive statistical thinking',
'',
"one hand, love girlfriend earth other, hate she's imaginary",
'n r p',
'computer render shimmering fantasy worlds dreams times second me search emails',
'learning high school math could make kids richer later, professor says',
"academia's crisis relevance role engaged scholar",
'regression sad',
"it's anecdote it's data",
'thesis motivation explained',
'reading blockchain could help reduce global inequality',
'since faked moon landing conspiracy theory white house aligned comfortably',
'publish',
'video like screencast',
'devdas soundtrack surprisingly conducive productive coding would known?',
'google uber api joy use',
'recommends use grad school up',
'discovered r markdown spellcheck feature game changer',
'obama rejects comparison trumps immigration policy own, encourages protests',
"washington state attorney general announces lawsuit president trump's immigration order",
'steve mnuchin captain titanic, hed deny ever hitting iceberg',
"ko ni, leading muslim lawyer adviser myanmar's ruling party, shot dead yangon airport",
'lt general mark hertling said cnn isis, recruiting pr equivalent abu',
'five reportedly shot dead attack quebec mosque',
'days achieving majority disapproval reagan bush i clinton bush ii obama',
'muslim ban protests erupt nationwide outside white house, trump holds private screening dory',
'shooting quebec city mosque, reports multiple wounded',
'remember cato institute calculated chance killed refugee america billion',
'president united states continues defy order federal court, impeached',
'youll definitely want follow',
'fortune companies founded immigrants children',
"i'm immigrant served american people dept defense white house served president's bush obama",
'american deaths us soil foreign terrorists muslim countries immigration ban',
'',
"that's judicial orders work all",
'students would love help',
'gotten disturbing reports refusing comply court order',
'round one victory emergency stay issued federal court trump order yale law',
"signed judge's order refugees going immediately deported",
'judge ruled today refugees put back planes sent back danger',
'stay granted green card holders nationally brooklyn judge ruled',
'airbnb providing free housing refugees anyone allowed us stayed tuned more, contact urgent',
'news victoryemergency stay reaches',
'judge donnelly stay granted',
'canadian colleagues',
'important information',
'detained ca airport executive order call local hotline sfo lax san',
"according attorneys, detainees o'hare include month old newborn, us citizens",
"o'hare now",
"people chicago's o'hare airport standing solidarity people detained trump's",
"protestors march o'hare's international terminal chanting hate fear refugees welcome here",
"everyone picture attorney o'hare's intl terminal",
'us tech companies founded generation immigrants apple google facebook amazon oracle ibm uber yahoo emc',
'too',
'aau administration order barring return wvisas fsome countries stranding students please end soon',
"people affected trump's order",
'lawyers stationed airports across us know someone entering country, tell sign anything',
'student one countries affected recent executive order please message us',
'translator helped us military iraq detained jfk airport family',
'much money data analysts make? booleans dollars',
'data people show you, suspicious intellectually vigilant be added ineq chapter',
'republicans take control washington, immediately charge taxpayers least billion concrete wall good',
'breaking president trump says intelligence officials told torture works',
'finally, sensible thoughts',
'',
'damn straight',
'indeed turns out, trump swamp',
'trying learn best',
'resistance live streamed twitch',
'literally words need students show up dive in stay it',
'learned capp last week',
'federal climate data backed hope able get all',
'per website may go offline early tomorrow read can',
'one greats',
'respect office president united states damn, moronic bully making challenge',
'long live you never see photograph women signing legislation men',
'advice grader start fighting ignorance knowledge',
"alternative fact i'm fine",
"spicer says there's expansion federal workforce recent years unequivocally false",
"me, i'm like smart person, said, like, smart person ever history",
'hundreds thousands people take part womens marches across america world',
'across world, terrorists dropping weapons giving up president finally said islamic',
'thanks come visit training work public policydatascience',
'hey chris, big fan ideas budding new data scientist like find summer internship?',
'president donald trump signs executive order declaring prompt repeal obamacare official policy',
'breaking white house says trump chief staff priebus issue government wide order immediately freezing regulations',
'feel you, michelle',
'little late perhaps find job entails opening mouth',
'final repatriation flights gambia tomorrow call thomas cook arrange uk flight',
'last week dc republican foreign policy type said trump transition good looks',
'mnuchin makes mistakes complicated paperwork, asks forgiveness customers made mistakes, took',
'office faces parade route wanted remind people coming celebrate mandate',
'results in footnotes go punctuation¹ ¹ course do',
'last energy secretary nuclear physicist one nobel prize rick perry',
'former president george hw bush, hospitalized houston, according chief staff',
'none might want check insinuate something like that',
"i'm still waiting trump say something global affairs has literally said first kremlin",
'breaking nigerian state official military jet mistakenly bombs refugee camp, kills',
'standards stopped using copious amount profanity wanted use this',
'breaking president barack obama commutes sentence chelsea manning, leaked army documents serving years',
'',
'£ fallen report theresa may signal plans quit single market',
'shoutout',
'britain starts reveal brexit plans one priority restoring british control immigration borders',
"obama resign day early make biden president ruin trump's merchandise",
'oh, honey, bless heart',
'president elect potentially compromised hostile foreign govt gonna it? away',
'would please identify cities presently fire?',
'want foreign trip w country matters us, go china closest us, canada you',
'trump makes things slightly better',
"ap fact check story president obama's mother in law receive lifetime federal pension false",
'yes, president elect, mlk weekend, says freedom rider, sncc coordinator civil rights icon john lewis',
"civil rights hero i'm pee play civil rights hero i'm pee play",
'discovered river dolphins whhaaaatttt bless planet earth ii',
"i'm mins planet earth ii i'm already blown away",
'holy shitrevoke phd',
'seen c span2 senate passes resolution, moves toward repeal affordable care act',
'press conference, trump filled room paid staffers clapped cheered blasted media',
'hear russians footage obama playing basketball fixing economy',
"it's tough competition, think there's bigger liar trumpworld kellyanne conway incredible",
'press conference impossible watch',
'obama without doubt, one greatest orators time period',
'go knock park obama',
'street almost completely shutdown secret service, police barricades everywhere obama valois',
'fucking kidding me???',
'trump, understand paying fucken wall clear us tax payers pay',
'',
"i'm board flight abu dhabi bet, land, trump tweetback plan congress fund",
'trump team broke w precedent issued blanket edict denying obama political ambassadors extensions post 120',
'time move on say someone crush, issue national security',
'doctoral student advisor walk bar advisor orders rough draft sit awkward silence',
'filled simple graphics synthesize otherwise complex topic concise clear highly recommended',
'one concise well written explanations come across excellent work i sharing around network',
'',
'',
'',
'',
'apologize in flight announcement require assistance doctorate holder assist row',
'gender bias academe annotated bibliography',
'friendly holiday function reminder',
'ca say anything nice¹ ¹say footnote',
'end history aleppo version quite fukuyama intended',
"leave voters willing see economy suffer, unemployment money hospitals down, reduce eu immigration that's",
'',
'fantastic read',
"rex tillerson's wikipedia page edited overnight removed? award russian order friendship",
'breaking state department source tells trump considering disgraced ex fox news boss roger ailes secretary',
'nobel lecture, juan manuel santos cited advice hks faculty motivation trying times',
'cappharris grades got feeling like',
"you looking tillerson's thoughts stuff might matter secretary state, try first",
'update turkey least killed, hurt twin blasts outside istanbul soccer stadium',
'i put good money gop try convert anger russian interference election',
'given supposedly us political institution hacked russia, rnc share',
'dutch court finds populist anti islam lawmaker geert wilders guilty hate speech party leads polls advance',
'south korea parliament votes impeach president park geun hye corruption scandal',
'south korea parliament votes impeach park geun hye votes',
'work life balance delicate sunday night art feeling like wasted entire weekend',
'winston churchill born read writing advice sent officials prime',
'breaking senior official trump offered retired lt gen michael flynn job national security adviser',
"it's plagiarism it's ironic re reading commentary linguistic ownership",
'evidence life life burning well, poetry ash',
'daily existence',
"it's comment question",
'transport london utilizing data optimize service provision',
"brexit means brexit andthat's far",
'correlation education white support trump disappears control racial resentment',
'thank talk open floor passive aggressive requests show slide,',
'breaking israeli ministerial committee approves bill legalize illegal outposts west bank built private',
'bannonbreitbart setting wh comms strategy, long look back fondly time fox news',
'education much stronger predictor county level change income',
'trump support partly economic insecurity, even moreso race gender',
'breaking magnitude quake strikes south island new zealand usgs',
"j hey here's idea b joe j let's say get faculty job b j could team teach a b j could share",
'quick question trump first president elect family members transition team?',
'bewilderingly, expresses bewilderment minorities; muslims; immigrants fear',
"news release trump's transition team said reince priebus steve bannon would work partners",
"steve bannon breitbart news named president elect donald trump's chief strategist senior counselor",
'stephen bannon senior counselor got shitting me?',
'trump anti establishment appoints reince priebus chief staff ca get establishment that',
"david chappelle's snl monologue full",
'"we waste two years tours world doesnt know" eu commission president',
'feeling advisor takes research presentation',
'south koreans gather en masse protest president',
'last year leonard cohen recorded flanders fields written canadian soldier john mccrae tribute',
'wins florida, forcing firewall michigan, wisconsin pennsylvania',
'indiana projected abc news full results',
'vermont projected abc news full results',
'kentucky projected abc news full results',
'm56 strikes km ne min ago effects reported witnesses',
"it's beautiful day neighbourhood",
'response retraction letter',
'staff armed tranquiliser guns currently scouring london zoo escaped gorilla',
"mean, would never guessed she'd say that respect children brilliant",
'trump, understand you coming many judges supreme court?',
'clinton calls universal background checks firearms purchases yes years, americans die',
'picking judges is selective dimwit',
'bear mind clinton law degree yale trump does understand definition sexual assault conception law',
'tremendous hatred, says world renowned expert hatred hate speech',
'saying irredeemable bad saying sexually assault women?',
'shoutout',
'foreign policy spokesperson hillary deleted',
'said osmosis? big word',
'calls deploarablw call them african americans rapist mexicans grab py extreme vetting',
'i bet good money donald does know mosul',
"agree american ground troops syria let's learn mistakes iraq",
'syria russia',
'oop pence trump are speaking',
'trump look, do even know running mate is, do agree all',
'trump would cut taxes rich poor, raising taxes middle clinton would raise taxes rich',
'ice did endorse you isis endorsed you',
'foreign policy requires something trump knows nothing about nuance in progress word salad proof',
'moderators able mute mics,',
"fact check yes, trump's iraq war claim debunked",
'man running president united states says knows nothing russia',
'wait paid taxes last ten years? you?',
'trump promised make wealthy pay fair share making pay less',
"reminder donald trump's tax plan would absolutely worthless middle class",
'cutting bigly? uneducated',
"last fact checked trump's repeated claim iraq war",
'clinton says trump did apologize insulting comments thats true',
'islamophobia question, trump says muslims collectively responsible terrorism',
'someone point direction office us government? also dont wait audit release taxes',
'dear gop congrats nominated misogynist, racist, vulgar, lying, ignorant, mad man nominee oh,',
"trump wants muslim immigrants sign values' groping women? relentless lying? tax evasion? muslim",
'trump lies refugees years long vetting process refugees go resettled',
"record, ban', originally announced, condemned trump's vp candidate, still",
'trump says syrian refugees coming thousands huh? far canada admitted',
'answer question job pay attention former debate moderators, past future',
'says captain khan would alive today president, does apologize family,',
'republicans love term islamic terrorism do ask define',
"you right islamophobia, that's shame but blah radical islamic terrorism blah blah",
'islamaphobia shame? solution label muslims, track muslims, make wear badges??',
'trump says us giving billion iran thats false',
'plans good solves it',
'anderson playing games today',
"structure donald's responses?? needs fire debate prep interns",
'sniffling',
'im gentleman??',
'boooooooom clinton rap battle game strong',
"donald's trump sniffing lot today",
'again, prepared, did, shows',
'gotta shut studio audience up presidential debate jerry springer show content',
'trump says charge, hillary clinton would jail donald, america, defeat election opponents,',
'hillary clinton deserves nobel peace prize grabbing trump pussy right',
'bill clinton abusive abusiveperfect logic trump',
'minutes hillary wiped floor trump',
'trump fact chk us trade deficit trade deficit goods; surplus',
'trump created jobs politifact claiming women respect him',
"bragging sexual assault acceptable?' grope women?' going beat isis'",
'anderson cooper games tonight',
"go clinton's response trump's comments",
'record live medieval times, fact, live safest time history',
'room talk????????',
'bring law order back',
'handshake',
'please shimmy',
'"well start shortly"',
'get ready play bingo us',
'gonna spicy debate',
'future harris arrived',
'shimon peres, former israeli prime minister nobel peace prize winner, dead age rip',
'breaking israeli media former israeli president shimon peres died two weeks suffering major stroke',
'former israeli pm president shimon peres dies aged following stroke two weeks ago, reports say',
'moment fully comprehend much work end year',
'so, recap, cambridge, stanford ubc recently selected mcgill grads leaders',
'first baby born using new person fertility technique carried us scientists, new scientist reveals',
'greatest debate time hillary fun, slayed',
'clinton calls trump perv racist',
'trump keeps dropping names, acronyms, allusions low information undecided voters idea about, without',
'please, please ask trump policy first use is, hillary',
'',
'trump became b 52 aviation expert',
'clinton might first person human history fun presidential debate',
'man gets provoked tweet hand nuclear codes',
"breaking blowing another country's ship act war according orangutang",
'someone needs call sean hannity right fucking second',
"trump's witness iraq sean hannity?? might well call youngest child witness sheesh",
'trump war iraq holt record says otherwise',
'please like tweet fan',
'this',
'thanks',
'record preventing lone wolf attacks close impossible incredibly stupid question',
'cyber one things',
'resolved use phrase cyber qualified president century',
'breaking hacker collectives recruiting morbidly obese cyber security experts',
'terrible disrespect?? mean, respect?? politicians talk good',
"even trump's claims hillary staff obama true didnt spend five years publicly questioning obama's",
'wtf birther rumours got moving onto isis?? answer question',
'politicians talk good bragadocious',
'stop frisk unconstitutional statistically ineffective orangutan',
'former dean stephen toope named vice chancellor cambridge',
"clinton's response implicit bias strong",
'clinton calls background checks buy guns polls show even nra members overwhelmingly support universal',
'white man telling black man stop and frisk did involve profiling front million ppl',
'live fact check clinton right guns biggest cause death young black men',
'trump endorsed stop frisk statistically proven ineffective racially biased also unconstitutional',
'trump wants new season law order',
"hillary, fuck's sake, said country's fucked iraq war republican endeavor bush's war,",
'talk race hold seats',
'omg brought architect in reeeekkkkkkkttttt',
'trump us companies do pay taxes bad do pay taxes smart',
'say bragadocious? didnt know word? life lie',
'wonder donald trump hiding tax returns',
'trump literally contradicted fed interest rates',
'hilary fighting isis? fuck happened?',
'trump going cut taxes big league, going raise taxes big league',
'hey guys big league end story',
'fact check take pick',
'hillary tell name is bill do defend bils mistakes nafta',
"plan trump do wrote book it it's called stronger together pick tomorrow",
"trump denied said climate change hoax created chinese say here's tweet",
'breaking trump believes forms energy',
'rooting housing market collapse? called business, interjects yikes',
'million people lost homes thats called business',
'wait mexican cookies gonna trend twitter',
"want happy it's important me heat",
'ms secretary ok? condescending prick',
...],
'favorite_count': [0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
9,
0,
1,
0,
2,
0,
4,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
0,
0,
13,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
3,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
3,
1,
0,
0,
0,
0,
2,
2,
0,
0,
1,
1,
0,
0,
0,
0,
0,
2,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
11,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
1,
0,
0,
1,
1,
0,
0,
0,
0,
5,
1,
0,
0,
0,
22,
0,
0,
1,
0,
0,
0,
0,
49,
0,
0,
2,
0,
1,
0,
0,
1,
0,
0,
0,
0,
0,
0,
8,
0,
5,
1,
0,
0,
0,
0,
0,
0,
2,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
1,
0,
0,
0,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
1,
0,
0,
2,
0,
1,
0,
0,
0,
0,
0,
0,
1,
0,
0,
26,
3,
2,
0,
0,
4,
1,
1,
1,
9,
0,
2,
1,
1,
0,
1,
0,
6,
2,
40,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
1,
1,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
9,
1,
2,
0,
1,
6,
8,
2,
10,
2,
0,
3,
5,
0,
0,
1,
0,
4,
0,
0,
0,
0,
0,
0,
2,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
0,
0,
0,
0,
7,
0,
0,
1,
0,
0,
0,
0,
0,
2,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
3,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
2,
7,
2,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
2,
0,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
6,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
2,
0,
0,
0,
2,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
5,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
3,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
...],
'hashtags': ['opendata',
'NLP',
'spaCy',
'Metis',
'TopicModelling',
'PCA',
'SVD',
'SentimentAnalysis',
'SoDS17',
'Metis',
'SoDS17',
'MachineLearning',
'Python',
'SoDS17',
'Python',
'AWS',
'S3',
'SoDS17',
'adoption',
'ETH',
'SoDS17',
'EC2',
'SoDS17',
'AWS',
'AWS',
'boto3',
'SoDS17',
'machinelearningflashcards',
'SquadGoals',
'Hodl',
'Ethereum',
'liftoff',
'MoochMadness',
'MoochMadness',
'Chicago',
'SoDS17',
'SoDS17',
'SoDS17',
'SoDS17',
'SoDS17',
'SoDS17',
'sklearn',
'SoDS17',
'EEA',
'Ethereum',
'Adoption',
'datascience',
'SoDS17',
'DataJanitor',
'algorithms',
'AI',
'MachineLearning',
'Metis',
'SoDS17',
'SoDS17',
'Python',
'SoDS17',
'SoDS17',
'Python',
'Metis',
'cantstopwontstop',
'Python',
'SoDS17',
'Python',
'SoDS17',
'SoDS17',
'SoDS17',
'SoDS17',
'SoDS17',
'CAPP',
'SoDS17',
'CAPP',
'July4',
'SoDS17',
'ETH',
'Mathematics',
'McGillPride',
'McGillPride',
'CAPP30271',
'adoption',
'SoDS17',
'CAPP',
'SoDS17',
'MIT',
'OCW',
'SoDS17',
'partofLSE',
'ThinkStats',
'SoDS17',
'AI',
'BKC',
'SoDS17',
'ThinkStats',
'CAPP',
'SodS17',
'SoDS',
'CAPP',
'ETH',
'ETH',
'GE2017',
'GeneralElection',
'iVoted',
'WWDC',
'LondonAttack',
'MasterofNone',
'CAPP',
'CAPP',
'CAPPemotions',
'polsky',
'UChicago',
'ETH',
'Ethereum',
'adoption',
'ETH',
'GE2017',
'GeneralElection',
'UChicago',
'ETH',
'Ethereum',
'ETH',
'Ethereum',
'adoption',
'ETH',
'Ethereum',
'ETH',
'Ethereum',
'pycon2017',
'ETH',
'ETHEREUM',
'ETH',
'ETHEREUM',
'tothemoon',
'hodl',
'ETH',
'ETHEREUM',
'ETHEREUM',
'PREACH',
'CAPP',
'CAPP30254',
'CAPP',
'Ivecomesofar',
'DataScience',
'MuslimBan',
'Gabr',
'CAPP',
'OnThisDay',
'odsc',
'curecancer',
'AI',
'pydataldn',
'ODSC',
'ODSC',
'UNHCR',
'jesuswasarefugee',
'UNHCR',
'lgbtrefugees',
'ODSCEast',
'prismoji',
'ODSC',
'ODSC',
'ODSC',
'ODSC',
'ODSC',
'ODSC',
'ODSC',
'ODSC',
'ibm',
'odsc',
'ODSC',
'ODSC',
'ODSC',
'ODSC',
'Julia',
'ODSC',
'ODSC',
'ODSC',
'odsc',
'ODSCEast',
'ODSC',
'datascience',
'ODSCEast',
'odsc',
'odsceast',
'bigdata',
'policy',
'ODSC',
'ODSC',
'MachineLearning',
'CAPP',
'odsc',
'odsc',
'cxo',
'odsc',
'datascience',
'ODSC',
'ODSC',
'ODSC',
'datascience',
'analytics',
'bigdata',
'MachineLearning',
'DL',
'ODSC',
'odsc',
'Boston',
'Boston',
'Gold',
'ODSC',
'CAPP',
'Harris',
'ETH',
'marchforscience',
'CMLVCSummit',
'UChicago',
'CAPP',
'CMLVCSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'FOMO',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'CAPP',
'MachineLearningSummit',
'UChicago',
'MachineLearningSummit',
'UChicago',
'MachineLearningSummit',
'UChicago',
'MachineLearningSummit',
'UChicago',
'CAPP',
'CAPP',
'F8',
'science',
'MachineLearning',
'MachineLearning',
'CAPP',
'Matplotlib',
'DataViz',
'Pandas',
'MachineLearning',
'CAPP',
'ifinallyseethelight',
'CAPP',
'ML',
'datamodels',
'MachineLearning',
'UChicago',
'Python',
'CAPP',
'DataScience',
'BigData',
'DataScientists',
'Harris',
'CAPP',
'United',
'CAPP30271',
'mathematicalrevelations',
'Drake',
'bigmistake',
'Trump',
'CAPP',
'MLPubPol',
'Harris',
'greys',
'spottedUChicago',
'Databases',
'Harris',
'Harris',
'MachineLearning',
'DanceParty',
'Harris',
'Harris',
'datadriven',
'sklearn',
'Python',
'BestSchoolDay',
'Brexit',
'Harris',
'londonattack',
'Squad',
'HarrisCareerTrek',
'NLP',
'machilearning',
'IDPs',
'HarrisCareerTrek',
'HarrisCareerTrek',
'ByeKonstantin',
'NoMoreEconForLife',
'Chicago',
'Peckham',
'TBT',
'Chicago',
'PiDay',
'CAPP',
'Harris',
'WEHAVEADEMO',
'WHATISLOVE',
'BABYDONTHURTME',
'NOMORE',
'CS122',
'CAPP',
'DOYOUTHINKPEOPLEPITYUS',
'IMGOINGTOCRY',
'IDLIKETOTHANKTHEACADEMY',
'FORMYCOMPSCIPTSD',
'CS122',
'CAPP',
'Statistics',
'BigData',
'DataScience',
'CAPPemotions',
'stats',
'Iwantmyownpodcast',
'JointSession',
'Jointsession',
'Jointsession',
'JointSession',
'JointSession',
'JointSession',
'JointSession',
'Jointsession',
'SouthSide',
'JointAddress',
'JointAddress',
'CAPP',
'modellingdreamsahead',
'Modelling',
'all',
'night',
'long',
'NobelPrize',
'DataScienceValentines',
'science',
'valentinesday',
'DataScienceValentines',
'learntolivealittle',
'academicvalentine',
'academicvalentine',
'AcademicValentine',
'Harris',
'AcademicValentine',
'MuslimBan',
'BREAKING',
'MuslimBan',
'SFO',
'MuslimBanprotest',
'ACLU',
'MuslimBan',
'WelcometoScotland',
'Preach',
'Resistance',
'draintheswamp',
'DataSci',
'Datagetsitdone',
'Harris',
'DataRefuge',
'opengov',
'climatechange',
'opendata',
'justsaying',
'womensmarch',
'inauguration',
'McGillPride',
'DanielLevitin',
'budget',
'ACA',
'ObamaFarewell',
'ObamaLegacy',
'Russia',
'impostersyndrome',
'harris',
'gradschoolproblems',
'onthisday',
'RIPLeonardCohen',
'DonaldTrump',
'HillaryClinton',
'ElectionNight',
'election2016',
'election2016',
'election2016',
'earthquake',
'terremoto',
'Roma',
'Italy',
'fall',
'mcgill',
'montreal',
'Debate',
'debate',
'debate',
'debate',
'Chicago',
'debate',
'debate',
'debate',
'debate',
'debate',
'debate',
'debate',
'debate',
'Debate',
'debate',
'debate',
'Debate',
'debate',
'debate',
'debate',
'CNNRealityCheck',
'DonaldTrump',
'debate',
'CNNRealityCheck',
'debate',
'debate',
'debate',
'debate',
'GRABHERBYTHEP',
'DontDebateonCoke',
'debate',
'debate',
'slay',
'debate',
'debate',
'debate',
'debate',
'debate',
'SVU',
'debate',
'debate',
'debate',
'debate',
'FutureHarris',
'DataGetsItDone',
'TheCyber',
'facts',
'BESTTVSHOW',
'debatenight',
'Debates2016',
'debatenight',
'debatenight',
'KINETIC',
'POTENTIAL',
'Debates2016',
'debatenight',
'DebateNight',
'sosweet',
'debatenight',
'debatenight',
'debatenight',
'ImWithHer',
'FutureHarris',
'RussellSquare',
'RussellSquare',
'Erdogan',
'AtaturkAirport',
'Turkey',
'Turkey',
'Ankara',
'Harbiye',
'BreakingNews',
'Turkey',
'Turkey',
'Ankara',
'Turkey',
'turkey',
'Turkey',
'Turkey',
'Turkey',
'Turkey',
'Turkey',
'Ankara',
'Turkey',
'Photo',
'Turkey',
'Turkey',
'Ankara',
'Turkey',
'Turkey',
'Ankara',
'Turkey',
'MilitaryCoup',
'Turkey',
'Turkey',
'Istanbul',
'Turkey',
'coup',
'Erdogan',
'Turkey',
'Syria',
'ISIL',
'Russia',
'Egypt',
'Turkey',
'Turkey',
'Turkey',
'Ankara',
'Turkey',
'Turkey',
'Updated',
'Breaking',
'Turkey',
'Baghdad',
'Istanbul',
'Atatürk',
'Brexit',
'EUref',
'indyref2',
'Brexit',
'EUref',
'Leave',
'Brexit',
'EUref',
'Brexit',
'EURefResults',
'Brexit',
'EUref',
'EUref',
'EURef',
'EURef',
'EURef',
'EURef',
'EURef',
'EURef',
'EURef',
'Leave',
'EUref',
'EURef',
'Leave',
'EUref',
'EUref',
'EURef',
'EURef',
'EUref',
'EURef',
'EURef',
'EURef',
'EUref',
'EUref',
'EURef',
'ivoted',
'StrongerIn',
'EUref',
'phdchat',
'PhD',
'research',
'HappyFathersDay',
'EUref',
'DDay',
'BreakingNews',
'EgyptAir',
'FlightMS804',
'EgyptAir',
'EgyptAir',
'EgyptAir',
'London',
'LondonElects',
'PanamaPapers',
'science',
'science',
'AcademicValentines',
'Conversation',
'rstats',
'GOPDebate',
'RandPaul',
'ChrisChritie',
'Christie',
'realitycheck',
'rstats',
'rstats',
'BreakingNews',
'Turkey',
'Ankara',
'Turkey',
'Mars',
'Saudi',
'Egypt',
'militants',
'Army',
'distractinglysexy',
'BREAKING',
'Houthi',
'QueensSpeech',
'StateOpening',
'ISIS',
'rstats',
'statistics',
'analytics',
'bigdata',
'MohamedFahmy',
'AlJazeera',
'Egypt',
'GE2015',
'live',
'GE2015',
'GE2015',
'Conservative',
'Labour',
'SNP',
'LibDems',
'Plaid15',
'UKIP',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'Labour',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'GE2015',
'Conservative',
'Labour',
'SNP',
'LibDems',
'Plaid15',
'UKIP',
'Greens',
'BigData',
'BigData',
'LeonardNimoy',
'LegionOfGeek',
'CORe',
'AmericanSniper',
'NUDivest',
'BreakingNews',
'Yemenis',
'LSEdata',
'Hezbollah',
'Iraq',
'ISIS',
'Derna',
'Ukraine',
'ceasefire',
'MinskSummit',
'ChapelHillShooting',
'DataScience',
'FGM',
'EndFGM',
'TogethertoEndFGM',
'RegisterToVote',
'TransAsia',
'Taiwan',
'Jordan',
'ISIS',
'kickass',
'TransAsia',
'TransAsia',
'DataScientists',
'DataScience',
'Toronto',
'Rotman',
'Charlie_Hebdo',
'Egypt',
'religion',
'CharlieHebdo',
'CharlieHebdo',
'CharlieHebdo',
'JeSuisCharlie',
'streetart',
'CharlieHebdo',
'parisattack',
'Montreal',
'JeSuisCharlie',
'polmtl',
'cdnpoli',
'Syria',
'SydneySiege',
'sydneysiege',
'sydneysiege',
'sydneysiege',
'SydneySiege',
'SydneySiege',
'SydneySiege',
'sydneysiege',
'sydneysiege',
'sydneysiege',
'sydneysiege',
'MartinPlaceSiege',
'Sydney',
'Mubarak',
'FergusonDecision',
'Ferguson',
'MichaelBrown',
'BREAKING',
'Sisi',
'Egypt',
'CharingCross',
'Eurozone',
'ttrends14',
'France',
'Saudi',
'Lebanese',
'Lebanon',
'Syria',
'Lebanon',
'London',
'indyref',
'Indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'Scotland',
'indyref',
'indyref',
'indyref',
'indyref',
'Scotland',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'indyref',
'Montreal',
'Alouettes',
'Tunisia',
'ISIS',
'socialmedia',
'Iraq',
'aid',
'Syria',
'R2P',
'Ferguson',
'BlackHat',
'DigitalLiteracy',
'Iraq',
'Mosul',
'Erbil',
'SouthSudan',
'Ukraine',
'Monstermind',
'syria',
'msf',
'Master',
'Postgraduate',
'Diplomacy',
'IS',
'Iraq',
'US',
'ISIS',
'Iraq',
'IS',
'ISIS',
'Israel',
'Hamas',
'Gaza',
'Kerry',
'BREAKING',
'Israeli',
'Gaza',
'BreakingNews',
'MH17',
'Nato',
'Gaza',
'Palestine',
'NetNeutrality',
'NetFreedom',
'MandelaDay',
'time2serve',
'Gaza',
'twitterversary',
'bornontheNHS',
'sorrynotsorry',
'geek',
'nerd',
'DDay70',
'Nigeria',
'BokoHaram',
'BokoHaram',
'breaking',
'Iraq',
'Syria',
'BokoHaram',
'r2p',
'Afghanistan',
'Privacy',
'Israel',
'Palestine',
'Russia',
'Syria',
'R2P',
'polisci',
'bigdata',
'DACnews',
'BreakingNews',
'ICC',
'Qaddafi',
'Libya',
'France',
'Mali',
'R2P',
'wefhyperc',
'wef',
'peacekeeping',
'Myanmar',
'Yale2014',
'CARcrisis',
'simlandia',
'Solidarité',
'simlandia',
'ICC',
'Syria',
'simlandia',
'BREAKING',
'BringBackOurGirls',
'MSF',
'Syria',
'HumTech2014',
'Syria',
'war',
'Egypt',
'Sisi',
'BREAKING',
'Yemen',
'refugees',
'IDPs',
'returnees',
'bringbackourgirls',
'Saudi',
'Iran',
'Afghanistan',
'CSISLive',
'CARcrisis',
'Ukraine',
'Syria',
'humanitarian',
'Kramatorsk',
'Syria',
'Aleppo',
'r2p',
'MERS',
'Saudi',
'R2P',
'Myanmar',
'genocide',
'cdnpoli',
'Norway',
'ArabSpring',
'WinstonChurchill',
'Harvard',
'BreakingNews',
'Yemen',
'Nigeria',
'Myanmar',
'genocide',
'JohnnieCarson',
'Dallaire',
'RIP'],
'retweet_count': [5,
1,
1065,
1,
0,
11,
27,
1407,
728,
1107,
0,
83014,
757,
1,
111312,
98,
2,
0,
5,
1,
1,
0,
14,
0,
0,
12,
46762,
19455,
1260,
3,
0,
4,
146,
0,
0,
0,
20,
0,
0,
0,
4,
0,
2,
12522,
3,
0,
28,
12,
1183,
0,
3847,
21354,
1,
168,
352,
0,
0,
0,
984,
0,
0,
0,
18,
0,
0,
990,
0,
14,
0,
16,
1767,
0,
0,
1,
0,
0,
6,
5,
0,
1,
0,
0,
1151,
0,
1,
2,
18,
8,
169,
0,
0,
127,
7,
2,
4,
0,
59,
0,
3532,
0,
0,
1,
0,
2091,
4,
0,
0,
1,
1,
7,
1,
0,
4,
0,
0,
0,
0,
2,
2,
44244,
39,
0,
0,
0,
0,
0,
0,
0,
0,
8,
44,
20,
1,
0,
267,
720,
0,
281,
0,
2,
0,
0,
0,
0,
125,
2,
20075,
32683,
93,
571,
0,
0,
0,
27,
877,
0,
0,
3,
0,
6,
0,
0,
0,
58829,
0,
5,
16,
0,
10,
0,
0,
0,
0,
0,
0,
19,
3,
0,
0,
40,
72562,
7,
99,
0,
0,
4,
3,
22,
1645,
13,
14,
0,
0,
0,
0,
6,
62,
0,
148,
0,
13,
811,
201,
588,
0,
8,
4,
0,
0,
0,
0,
7,
2,
264,
0,
0,
0,
0,
0,
1539,
20403,
313,
716,
614,
120,
692,
0,
0,
0,
17,
0,
0,
203,
73,
574,
20,
0,
3,
0,
0,
19112,
97,
85403,
0,
67,
2,
1,
1,
0,
0,
0,
0,
0,
0,
72,
0,
0,
194,
37,
381,
302,
7170,
0,
0,
0,
23,
1,
0,
0,
3555,
0,
0,
0,
1,
1,
4,
0,
0,
0,
0,
0,
3,
1,
0,
7,
266,
615,
0,
0,
30,
0,
0,
27,
0,
0,
0,
365,
78,
35,
67,
0,
0,
0,
8,
6484,
8477,
0,
0,
0,
1,
18718,
18899,
9,
293,
29,
2235,
4374,
21,
0,
0,
0,
0,
362,
0,
0,
6,
0,
0,
172,
3776,
7,
3,
10,
0,
0,
0,
0,
0,
4,
2,
2,
1,
1,
0,
1,
2,
2,
0,
4,
1,
2,
2,
2,
0,
0,
0,
0,
1,
31,
1,
4,
10,
2,
7,
0,
1130,
0,
13,
1,
1,
2,
1,
6,
14,
0,
0,
20,
4,
2,
0,
1,
1,
1,
0,
0,
0,
1,
1,
1,
0,
1,
853,
107,
0,
1,
0,
0,
1,
0,
1,
0,
1598,
232,
10,
323,
7,
129,
141,
0,
16665,
411,
2610,
28,
232,
209,
35,
0,
117,
0,
0,
0,
0,
0,
0,
0,
0,
0,
5,
0,
0,
0,
0,
0,
0,
0,
0,
0,
3,
0,
0,
0,
1,
0,
0,
3559,
58,
136,
0,
0,
63,
4,
0,
2,
0,
1,
0,
0,
526,
0,
3,
64,
3,
384,
15,
0,
15057,
0,
0,
24897,
0,
0,
0,
0,
3,
167,
1,
0,
0,
21,
376,
4296,
0,
58,
0,
2,
348,
115,
1,
8,
0,
5,
2,
0,
0,
48,
125,
36,
5,
0,
265,
1,
0,
0,
0,
1077,
1540,
0,
3,
9935,
4137,
109,
3179,
642,
0,
193,
1034,
375,
95,
759,
0,
0,
0,
4,
0,
1458,
1,
0,
84,
0,
0,
0,
2049,
1201,
1288,
0,
115,
313,
419,
0,
0,
0,
4,
55,
7,
585,
0,
327,
0,
0,
0,
0,
0,
14,
4,
205,
2424,
0,
1953,
44,
5,
0,
0,
1,
124,
5202,
0,
0,
4,
0,
14,
246,
0,
0,
0,
0,
0,
0,
0,
93,
1167,
5646,
3003,
0,
1,
0,
7,
85,
112,
0,
0,
1385,
169,
517,
0,
1279,
6,
0,
0,
0,
694,
200,
3,
3,
3,
8,
2,
10,
87,
4,
76646,
4753,
827,
0,
6,
10,
0,
10,
1033,
1,
0,
1,
14,
0,
0,
0,
0,
1752,
3,
0,
3,
13,
0,
0,
0,
0,
0,
8918,
6,
0,
0,
0,
0,
0,
0,
233,
0,
0,
0,
0,
0,
2,
0,
0,
0,
0,
0,
0,
0,
0,
0,
8,
0,
0,
0,
63,
1667,
2189,
517,
23511,
0,
49,
28,
125,
112,
44,
16,
0,
0,
94,
70,
60,
10,
0,
0,
0,
149,
226,
9593,
21,
304,
3168,
504,
372,
14,
246,
241,
0,
0,
0,
5,
0,
1863,
823,
3205,
235,
184,
74,
61162,
7084,
1198,
981,
24680,
244,
1161,
49,
1113,
4962,
829,
0,
7302,
1206,
20184,
10328,
793,
112493,
6635,
6871,
0,
0,
201,
9515,
3295,
348,
523,
2519,
47861,
29748,
129,
579,
50237,
11,
1332,
18,
399,
23926,
5396,
0,
0,
0,
2,
0,
3,
0,
0,
48,
229,
2,
142,
263442,
146,
2554,
13240,
119,
15532,
2104,
0,
0,
1353,
2630,
57362,
0,
28,
197,
22009,
10,
553,
515,
394,
601,
11582,
3350,
95,
5445,
0,
65,
0,
24,
40775,
29,
4083,
90,
873,
3189,
383,
2689,
334,
0,
0,
0,
505,
8366,
11239,
4189,
0,
0,
0,
1,
0,
193909,
0,
5,
2072,
14,
1660,
0,
0,
0,
0,
0,
0,
974,
235,
514,
990,
867,
95,
0,
0,
10934,
1492,
24,
0,
237,
57,
276,
292,
42,
7055,
1393,
311,
1776,
2532,
264,
446,
0,
542,
17,
130,
145,
726,
1575,
3,
237,
869,
108,
421,
0,
1014,
191,
1201,
0,
0,
14,
641,
500,
370,
1314,
27,
176,
165,
143,
856,
29,
2607,
190,
1170,
35,
991,
0,
0,
458,
0,
0,
62,
0,
0,
0,
0,
1,
267,
26,
155,
20,
2,
1486,
1190,
258,
66,
698,
1010,
0,
10,
195,
30,
0,
40201,
1465,
8023,
180,
1603,
427,
21,
20,
31,
0,
604,
349,
0,
0,
2,
0,
0,
0,
81,
20,
536,
1,
0,
0,
16,
208,
456,
103,
39,
0,
1,
0,
416,
0,
50,
206,
0,
0,
135,
464,
1300,
189,
21,
3236,
0,
0,
2,
9,
171,
0,
89,
65,
0,
0,
78,
93,
22,
157,
1,
0,
12,
53,
0,
0,
37,
0,
0,
0,
10,
23,
724,
1803,
1988,
0,
0,
4542,
0,
0,
577,
0,
8364,
35,
0,
22,
0,
419,
1079,
3,
86,
0,
1115,
0,
0,
1,
0,
...]}}]
In [9]:
import spacy
import nltk
from gensim.models import Phrases
from gensim.models.word2vec import LineSentence
from gensim.corpora import Dictionary, MmCorpus
from gensim.models.ldamulticore import LdaMulticore
import pyLDAvis
import pyLDAvis.gensim
from collections import Counter
from gensim.corpora.dictionary import Dictionary
nlp = spacy.load('en')
In [10]:
gabr_tweets = clean_gabr_tweets[0]['gabr_ibrahim']['content']
In [11]:
gabr_tweets[:5]
Out[11]:
['great turnout today hope able join us slides available link video webinar coming soon',
'ms capp student talks challenges value time',
'good news steve bannon gone bad news replaced sentient swastika right arm permanently',
'byyyeeeee',
'late night could possibly better']
Let's now proceed to tokenize these tweets in addition to lemmatizing them! This will help improve the performance of our LDA model!
I will utilise spacy for this process as it is a production grade NLP library that is exceptionally fast!
In [12]:
tokenized_tweets = []
for tweet in gabr_tweets:
tokenized_tweet = nlp(tweet)
tweet = "" # we want to keep each tweet seperate
for token in tokenized_tweet:
if token.is_space:
continue
elif token.is_punct:
continue
elif token.is_stop:
continue
elif token.is_digit:
continue
elif len(token) == 1:
continue
elif len(token) == 2:
continue
else:
tweet += str(token.lemma_) + " " #creating lemmatized version of tweet
tokenized_tweets.append(tweet)
tokenized_tweets = list(map(str.strip, tokenized_tweets)) # strip whitespace
tokenized_tweets = [x for x in tokenized_tweets if x != ""] # remove empty entries
In [13]:
tokenized_tweets[:5] # you can see how this is different to the raw tweets!
Out[13]:
['great turnout today hope able join slide available link video webinar come soon',
'capp student talk challenge value time',
'good news steve bannon go bad news replace sentient swastika right arm permanently',
'byyyeeeee',
'late night possibly better']
Lets now add these tokenized tweets to our dictionary!
In [14]:
clean_gabr_tweets[0]['gabr_ibrahim']['tokenized_tweets'] = tokenized_tweets
I will not turn the dictionary back into a dataframe, run it through the filtration function before re-casting the dataframe into a dictionary.
This time, we are running the filtration process on the tokenized tweets column and not the content column.
NLP models are very sensitive - ensuring consistent cleaning is important!
In [15]:
clean_gabr_tweets_df = pd.DataFrame.from_dict(clean_gabr_tweets[0], orient='index')
In [16]:
clean_gabr_tweets_df.head()
Out[16]:
favorite_count
content
tokenized_tweets
retweet_count
hashtags
gabr_ibrahim
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
[great turnout today hope able join us slides ...
[great turnout today hope able join slide avai...
[5, 1, 1065, 1, 0, 11, 27, 1407, 728, 1107, 0,...
[opendata, NLP, spaCy, Metis, TopicModelling, ...
In [17]:
clean_gabr_tweets_df = filtration(clean_gabr_tweets_df, "tokenized_tweets")
In [18]:
clean_gabr_tweets = dataframe_to_dict(clean_gabr_tweets_df)
In [19]:
clean_gabr_tweets[0]['gabr_ibrahim']['tokenized_tweets'][:5]
Out[19]:
['great turnout today hope able join slide available link video webinar come soon',
'capp student talk challenge value time',
'good news steve bannon go bad news replace sentient swastika right arm permanently',
'byyyeeeee',
'late night possibly better']
Fantastic - at this point, we have everything we need to proceed with LDA from the Gensim Library.
LDA via the Gensim library requires that our data be in a very specific format.
Broadly, LDA requires a Dictionary object that is later used to create a matrix called a corpus.
The Gensim LDA Dictionary will require that we pass in a list of lists. Every sublist will be a tweet that has been split.
Let's look at my first tweet as an example.
Before:
['great turnout today hope able join slide available link video webinar come soon', tweet 2, tweet 3, ...]
Correct Gensim Format:
[['great', 'turnout', 'today', 'hope', 'able', 'join', 'slide', 'available','link', 'video', 'webinar', 'come', 'soon'], [tweet 2 in split form], [...],...]
In [24]:
list_of_tweets_gabr = clean_gabr_tweets[0]['gabr_ibrahim']['tokenized_tweets']
In [25]:
gensim_format_tweets = []
for tweet in list_of_tweets_gabr:
list_form = tweet.split()
gensim_format_tweets.append(list_form)
In [26]:
gensim_format_tweets[:5]
Out[26]:
[['great',
'turnout',
'today',
'hope',
'able',
'join',
'slide',
'available',
'link',
'video',
'webinar',
'come',
'soon'],
['capp', 'student', 'talk', 'challenge', 'value', 'time'],
['good',
'news',
'steve',
'bannon',
'go',
'bad',
'news',
'replace',
'sentient',
'swastika',
'right',
'arm',
'permanently'],
['byyyeeeee'],
['late', 'night', 'possibly', 'better']]
In [30]:
gensim_dictionary = Dictionary(gensim_format_tweets)
Now, I will now filter out extreme words - that is words that appear far too often and words that are rare.
In [31]:
gensim_dictionary.filter_extremes(no_below=10, no_above=0.4)
gensim_dictionary.compactify() # remove gaps after words that were removed
We now need to voctorize all the tweets so that it can be fed to the LDA algorithm! To do this, we will create a bag of words model from our tweets.
After putting all our tweets through this bag of words model, we will end up with a 'corpus' that represents all the tweets for a particular user. In this case, that user is myself.
We will save this corpus to disk as we go along! We will use the MmCorpus object from Gensim to achieve this.
In [32]:
!pwd
/home/igabr/new-project-4
In [35]:
file_path_corpus = "/home/igabr/new-project-4"
In [34]:
def bag_of_words_generator(lst, dictionary):
assert type(dictionary) == Dictionary, "Please enter a Gensim Dictionary"
for i in lst:
yield dictionary.doc2bow(i)
In [36]:
MmCorpus.serialize(file_path_corpus+"{}.mm".format("gabr_ibrahim"), bag_of_words_generator(gensim_format_tweets, gensim_dictionary))
In [37]:
corpus = MmCorpus(file_path_corpus+"{}.mm".format("gabr_ibrahim"))
In [39]:
corpus.num_terms # the number of terms in our corpus!
Out[39]:
224
In [44]:
corpus.num_docs # the number of documets. These are the number of tweets!
Out[44]:
1708
I will be using the LDAMulticore class from gensim!
I set the passess parameter to 100 and the chunksize to 2000.
The chunksie will ensure it use's all the documents at once, and the passess parameter will ensure it looks at all the documents 100 times before converging.
As I am using my ENTIRE tweet history, I will create 30 topics!
I will adjust this to 10 when running lda on 2nd degree connections, as I will only have 200 of their tweets!
In [122]:
lda = LdaMulticore(corpus, num_topics=30, id2word=gensim_dictionary, chunksize=2000, workers=100, passes=100)
I can then save this lda model!
In [123]:
lda.save(file_path_corpus+"lda_model_{}".format("gabr_ibrahim"))
In [124]:
lda = LdaMulticore.load(file_path_corpus+"lda_model_{}".format("gabr_ibrahim"))
I now wish to extract all of the words that appear in each of the 30 topics that the LDA model was able to create.
For each word in a topic, I will ensure that it has a frequency not equal to 0.
I will place all these words into a list and then wrap a Counter object around it!
I am doing this as I want to see the distribution of words that appear accross all topics for a particular user. The LDA process will highlight key words that a particular user often uses in their twitter freed, across all topics that a particular user discusses. As such, the words they use will be indicitive of the topics a twitter user talks about!
The counter object will simply keep a count of how many times, out of a maximum of 30 (topics) a word appears, given it has a frequency greater than 0. That is, the word appears in a topic.
In [125]:
from collections import Counter
In [182]:
word_list = []
for i in range(30):
for term, frequency in lda.show_topic(i, topn=100): #returns top 100 words for a topic
if frequency != 0:
word_list.append(term)
temp = Counter(word_list)
In [183]:
len(temp)
Out[183]:
224
In [184]:
# This can be done later to help filter the important words.
important_words = []
for k, v in temp.items():
if v >= 10:
if k not in nltk_stopwords:
doc = nlp(k)
for token in doc:
if not token.is_stop:
if len(token) != 2:
important_words.append(k)
In [185]:
important_words
Out[185]:
['foreign',
'refugee',
'expert',
'age',
'russian',
'woman',
'clinton',
'syria',
'official',
'talk',
'share',
'court',
'london',
'security',
'turkey',
'military',
'ask',
'record',
'airport',
'year',
'find',
'coup',
'end',
'video',
'office',
'today',
'president',
'republican',
'know',
'blast',
'man',
'government',
'russia',
'british',
'political',
'free',
'week',
'important',
'work',
'plan',
'hillary',
'parliament',
'control',
'news',
'public',
'syrian',
'remember',
'late',
'second',
'student',
'tell',
'read',
'fuck',
'scotland',
'police',
'love',
'islamic',
'support',
'update',
'think',
'datum',
'policy',
'time',
'create',
'far',
'machine',
'hit',
'post',
'follow',
'cnn',
'order',
'come',
'new',
'open',
'country',
'change',
'good',
'right',
'night',
'die',
'national',
'confirm',
'turnout',
'isis',
'medium',
'live',
'long',
'tax',
'issue',
'hostage',
'stop',
'ankara',
'like',
'future',
'leave',
'force',
'staff',
'house',
'thing',
'people',
'white',
'law',
'air',
'turkish',
'muslim',
'help',
'break',
'result',
'school',
'hold',
'strike',
'sign',
'fire',
'shoot',
'state',
'number',
'vote',
'way',
'trump',
'try',
'minister',
'kill',
'israeli',
'party',
'referendum',
'start',
'iraq',
'accord',
'blog',
'dead',
'death',
'drop',
'watch',
'attack',
'job',
'attempt',
'leader',
'david',
'debate',
'day',
'thank',
'poll',
'run',
'incredible',
'grad',
'cameron',
'claim',
'fall',
'let',
'presidential',
'join',
'great',
'hear',
'life',
'big',
'bomb',
'lead',
'want',
'look',
'election',
'donald',
'capital',
'obama',
'use',
'question',
'near',
'source',
'world',
'war',
'report',
'science',
'mean',
'team',
'close',
'american',
'yes',
'check',
'city',
'east',
'remain',
'learn',
'win']
In [186]:
len(important_words)
Out[186]:
182
I will then place this LDA Counter Object back into our dictionary!
We will then pickle this object - we will use it again for our TF-IDF analysis!
Be sure to look at the file called lda.py to see how I stuructured the code to run through the 2nd degree connections!
In [215]:
clean_gabr_tweets[0]['gabr_ibrahim'].keys()
Out[215]:
dict_keys(['favorite_count', 'content', 'hashtags', 'retweet_count', 'tokenized_tweets'])
In [216]:
clean_gabr_tweets[0]['gabr_ibrahim']['LDA'] = temp
In [217]:
pickle_object(clean_gabr_tweets, "gabr_ibrahim_tweets_LDA_Complete")
In [ ]:
Content source: igabr/Metis_Projects_Chicago_2017
Similar notebooks: