notebook.community

Edit and run



In [16]:

    
import pandas as pd;
import numpy as np;
import matplotlib.pyplot as plt
%matplotlib inline



In [17]:

    
master = pd.read_csv('all_tweets_df.csv')
unigram_features = pd.read_csv('top_1000_unigram_features.csv')

Working subset (due to RAM constraints)



In [18]:

    
subset = master[master['type']=='sarcastic'][:8000].append(master[master['type']=='genuine'][:8000])
# test_subset = master[master['type']=='sarcastic'][6000:8000].append(master[master['type']=='genuine'][6000:8000])



In [19]:

    
from sklearn.feature_extraction.text import CountVectorizer
# from sklearn.feature_extraction import DictVectorizer



In [114]:

    
count_vec = CountVectorizer(max_features=5000)
vector = count_vec.fit_transform(subset['0'])
# vector = DictVectorizer().fit_transform(master['0'])
varr = vector.toarray()



In [117]:

    
# Unigrams
count_vec.get_feature_names()









    Out[117]:





['00',
 '000',
 '08',
 '09daytona',
 '10',
 '100',
 '100happydays',
 '10pm',
 '11',
 '110',
 '12',
 '13',
 '13th',
 '14',
 '15',
 '16',
 '17',
 '18',
 '1st',
 '20',
 '200',
 '2014',
 '2015',
 '2022',
 '21',
 '210',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '29',
 '2am',
 '2biblez',
 '2nd',
 '2turnt',
 '30',
 '30am',
 '35',
 '3500',
 '38',
 '39',
 '3dwhitestrips',
 '3rd',
 '40',
 '4am',
 '4rifgunawan',
 '4th',
 '50',
 '5am',
 '5th',
 '60',
 '65',
 '66',
 '77',
 '7am',
 '80',
 '80s',
 '81',
 '82',
 '90',
 '95',
 '99',
 '9th',
 '____',
 '_megancfc',
 '_ramishaa',
 'a5h0ka',
 'aaron_rs',
 'abc',
 'abiding',
 'ability',
 'able',
 'about',
 'aboutasmuchuseastitsonafish',
 'above',
 'absolute',
 'absolutely',
 'abt',
 'abu',
 'abusive',
 'acapella',
 'accepting',
 'access',
 'accident',
 'accommodating',
 'account',
 'accounting',
 'achieved',
 'act',
 'acting',
 'action',
 'activity',
 'actors',
 'actual',
 'actually',
 'ad',
 'add',
 'added',
 'addiction',
 'adding',
 'adrian',
 'adult',
 'advance',
 'advice',
 'aegonchampionships',
 'af',
 'afford',
 'afraid',
 'africa',
 'after',
 'afternoon',
 'again',
 'against',
 'age',
 'agent',
 'agian',
 'ago',
 'agreed',
 'ah',
 'aha',
 'ahead',
 'ahh',
 'aid',
 'ain',
 'aint',
 'air',
 'airlines',
 'airplanes',
 'airport',
 'airportboredom',
 'airtelng',
 'aka',
 'al',
 'alabama',
 'alan',
 'alarm',
 'album',
 'alcohol',
 'alert',
 'alexanderloete',
 'alexilalas',
 'alive',
 'all',
 'allies',
 'allocated',
 'allowed',
 'almost',
 'alone',
 'along',
 'already',
 'alright',
 'also',
 'alsohashtag',
 'although',
 'always',
 'am',
 'amazing',
 'amazon',
 'ameria',
 'america',
 'american',
 'americanair',
 'americans',
 'amerika_blog',
 'amirite',
 'amount',
 'amp',
 'amr',
 'an',
 'analysis',
 'anchor',
 'ancook26fans',
 'and',
 'andy',
 'angel',
 'angels',
 'animals',
 'ankle',
 'anniversary',
 'announced',
 'announcers',
 'annoy',
 'annoyed',
 'annoying',
 'anon',
 'anonymous',
 'another',
 'answer',
 'answers',
 'anthem',
 'anthony',
 'antlucas_',
 'antonio',
 'anxiety',
 'anxiousmuslimah',
 'any',
 'anybody',
 'anymore',
 'anyone',
 'anything',
 'anyway',
 'anyways',
 'ap',
 'apart',
 'apartment',
 'app',
 'apparently',
 'appear',
 'appeared',
 'apple',
 'apply',
 'appointment',
 'appreciate',
 'appreciated',
 'april',
 'arab',
 'arabcrunch',
 'are',
 'aren',
 'arequipa',
 'arg',
 'argbih',
 'argentina',
 'argentinabosnia',
 'argentinian',
 'argentinians',
 'argue',
 'argument',
 'arguments',
 'argvsbih',
 'arizonar12',
 'arm',
 'arms',
 'around',
 'arrive',
 'arrogant',
 'arsenal',
 'art',
 'article',
 'articles',
 'artistic',
 'arts',
 'as',
 'asalways',
 'asap',
 'ascertain',
 'asian',
 'asianeyes',
 'ask',
 'asked',
 'asking',
 'asks',
 'asleep',
 'aspire',
 'ass',
 'assadcrimes',
 'asshole',
 'assholes',
 'assures',
 'aswel',
 'at',
 'ate',
 'atheist',
 'athletes',
 'athletic',
 'athletics',
 'atl',
 'atlantajay1',
 'atleast',
 'atm',
 'attack',
 'attempt',
 'attention',
 'attitude',
 'attracting',
 'attractive',
 'aunt',
 'auspol',
 'australia',
 'authorization',
 'authorized',
 'auto',
 'available',
 'ave',
 'average',
 'avoid',
 'avrillavigne',
 'aw',
 'awake',
 'award',
 'aware',
 'away',
 'awe',
 'awesome',
 'awesomeness',
 'awful',
 'awkward',
 'aye',
 'aye_alize',
 'b9ace',
 'babe',
 'babies',
 'baby',
 'bacardi',
 'back',
 'backatone000',
 'backedbyresearch',
 'bad',
 'badass',
 'badinfiuence',
 'badly',
 'badmlbcontract',
 'bae',
 'bag',
 'baker',
 'bakershorty1022',
 'bakr',
 'balanced',
 'ball',
 'balls',
 'ban',
 'band',
 'bands',
 'bandwagon',
 'bang',
 'banter',
 'bar',
 'barackobama',
 'barely',
 'barn',
 'barnesffc',
 'barrel',
 'barsandmelody',
 'base',
 'baseball',
 'based',
 'basedmichael',
 'bases',
 'basic',
 'basically',
 'basilfawlty',
 'basketball',
 'bass',
 'bat',
 'bathroom',
 'batteries',
 'battery',
 'batting',
 'battle',
 'bay',
 'bayareasoxfan',
 'bbbots',
 'bbc',
 'bbcsport',
 'bbcworldcup',
 'bbmark',
 'bbpowertrip',
 'bbq',
 'bbtamara',
 'bbuk',
 'bc',
 'bced',
 'bcgovnews',
 'bcwestmind',
 'bday',
 'be',
 'beach',
 'beagrie',
 'bear',
 'beardy',
 'beasley',
 'beast',
 'beasts',
 'beat',
 'beats',
 'beautician',
 'beautiful',
 'beautifulstory',
 'beauty',
 'because',
 'becauseitsthecup',
 'beckhams',
 'become',
 'bed',
 'bedard',
 'beef',
 'been',
 'beens',
 'beer',
 'beerisbetter',
 'before',
 'begin',
 'begun',
 'behind',
 'being',
 'beings',
 'believe',
 'believed',
 'belive',
 'bellissimo',
 'belong',
 'belongs',
 'belt',
 'belter',
 'bench',
 'beneficial',
 'berbasaunt10',
 'berry',
 'besides',
 'best',
 'besties',
 'bet',
 'betances',
 'betfair',
 'betfairhelpdesk',
 'better',
 'betty_ninja',
 'between',
 'beyonc',
 'beyond',
 'beyourself',
 'bf',
 'bff',
 'bhill4three',
 'biased',
 'bible',
 'bidenshairplugs',
 'big',
 'bigboobprobs',
 'biggar',
 'bigger',
 'biggest',
 'bigots',
 'bih',
 'bike',
 'bill',
 'bills',
 'billy',
 'binanggg',
 'bing',
 'bio',
 'birth',
 'birthday',
 'bit',
 'bit99little',
 'bitch',
 'bitch_im_boss1',
 'bitches',
 'bitcoin',
 'bite',
 'bizz_marky',
 'bjp',
 'bjupton2',
 'bk',
 'black',
 'blackdye',
 'blacklagoon',
 'blair',
 'blame',
 'blaming',
 'blasphemy',
 'blast',
 'blazeit',
 'blazing',
 'bleach',
 'bleeding',
 'bless',
 'blessed',
 'blew',
 'blind',
 'block',
 'blocked',
 'blocking',
 'blog',
 'blonde',
 'blondehails',
 'blood',
 'bloody',
 'blow',
 'blown',
 'blue',
 'bluejays',
 'blumo0n',
 'bmw',
 'board',
 'boarding',
 'boardwalk',
 'boat',
 'bob',
 'bochy',
 'body',
 'bomb',
 'bond',
 'bonding',
 'boneralltimelow',
 'bonjovi',
 'bonnaroo',
 'bonucci',
 'boo',
 'boobs',
 'boogbloss',
 'book',
 'bookbuzzr',
 'boom',
 'booty',
 'bootyhatt',
 'booze',
 'bordercontrol',
 'bored',
 'boredom',
 'boreoff',
 'boring',
 'boringmilner',
 'born',
 'bos',
 'bosh',
 'bosnia',
 'bosniaherzegovina',
 'bosnian',
 'boss',
 'bossicelny',
 'boston',
 'bostonglobe',
 'both',
 'bottle',
 'bought',
 'bound',
 'bourbon',
 'bout',
 'bow',
 'bowl',
 'bowling',
 'box',
 'boy',
 'boyfriend',
 'boys',
 'bpunion',
 'brady',
 'brain',
 'brakecheck',
 'brand',
 'bras',
 'brassmonkey1066',
 'brave',
 'braves',
 'brazil',
 'brazilian',
 'brazilians',
 'break',
 'breakfast',
 'breaking',
 'brett',
 'brewers',
 'brewery',
 'bridge',
 'brien',
 'bright',
 'brightest',
 'briii_zeee',
 'brilliance',
 'brilliant',
 'brilliantday',
 'bring',
 'bringbackourboys',
 'bringing',
 'brings',
 'britabroad',
 'british',
 'britni113',
 'britnimcdonald',
 'brits',
 'bro',
 'broadband',
 'broadcast',
 'broke',
 'brolly',
 'bronx',
 'brooklyn',
 'bros',
 'brother',
 'brothers',
 'brought',
 'brown',
 'brucefeldmancfb',
 'bruh',
 'bruhhh',
 'bruiseseverywhere',
 'brutally',
 'bryanjfischer',
 'bstrassburg',
 'btw',
 'bubble',
 'bucket',
 'bucks',
 'bud',
 'buddy',
 'buffalo',
 'buggered',
 'bugs',
 'build',
 'building',
 'built',
 'bull',
 'bullpen',
 'bulls',
 'bullshit',
 'bum',
 'bummed',
 'bump',
 'bunch',
 'bundle',
 'burdick',
 'burger',
 'burkes',
 'burn',
 'burning',
 'burns',
 'burrito',
 'burst',
 'bursting',
 'bus',
 'buses',
 'bush',
 'bushwhackers',
 'business',
 'busy',
 'but',
 'butreally',
 'butt',
 'butter',
 'button',
 'buttongod_ks',
 'buy',
 'buys',
 'buzzing',
 'by',
 'bye',
 'bysarapaulson',
 'ca',
 'cafe',
 'cage',
 'caitfurm',
 'cake',
 'cal',
 'calendar',
 'cali',
 'california',
 'call',
 'called',
 'calling',
 'calls',
 'calm',
 'calum5sos',
 'came',
 'camera',
 'camp',
 'campaign',
 'campbell',
 'camping',
 'can',
 'cancelled',
 'canditotraining',
 'candle',
 'candy',
 'cannot',
 'cant',
 'cantwait',
 'cap',
 'car',
 'card',
 'cards',
 'care',
 'career',
 'carefully',
 'cares',
 'carlosmarsden',
 'carnival',
 'caroline',
 'carras16',
 'carrick',
 'cars',
 'carson',
 'carta',
 'carter',
 'case',
 'cash',
 'cashier',
 'castle',
 'casualty',
 'cat',
 'catch',
 'cats',
 'caught',
 'cause',
 'causing',
 'cbc',
 'ccot',
 'celebrate',
 'celebrated',
 'celebrates',
 'celebrating',
 'celebration',
 'celebrity',
 'celeste_pim',
 'celsius',
 'celtics',
 'center',
 'central',
 'centralpark',
 'cents',
 'ceremony',
 'certain',
 'certainly',
 'cfl',
 'chadders_5',
 'chalmers',
 'champ',
 'champions',
 'championship',
 'championships',
 'chance',
 'chances',
 'change',
 'changed',
 'changes',
 'changing',
 'channel',
 'channeling',
 'characters',
 'charge',
 'charger',
 'charismatic',
 'charmer',
 'chart',
 'chartercom',
 'chasing',
 'chat',
 'chatting',
 'cheating',
 'cheats',
 'check',
 'checking',
 'cheer',
 'cheers',
 'cheery',
 'cheese',
 'cheflife',
 'chefs',
 'chelsea',
 'chelsiemountain',
 'chem',
 'cheney',
 'cherish',
 'chest',
 'chicago',
 'chick',
 'chicken',
 'chicks',
 'child',
 'childhood',
 'children',
 'chiles',
 'chill',
 'chille',
 'chillin',
 'chilling',
 'chinatown',
 'chinese',
 'chirpy',
 'chit',
 'chloefromvine',
 'chocolate',
 'choice',
 'choose',
 'chose',
 'chris',
 'chrisbrown',
 'chriscpc11',
 'chrismara85',
 'chrisnlomas',
 'chrispatsimpson',
 'christ',
 'christianleft',
 'christians',
 'christiec733',
 'christinablank',
 'christopher',
 'chuck',
 'church',
 'chvrches',
 'cia',
 'cinematography',
 'cineworld',
 'circulate',
 'citi',
 'citizens',
 'city',
 'cityofsaskatoon',
 'civ',
 'civilcynic',
 'cj',
 'claim',
 'clarity',
 'clarkherlin',
 'class',
 'classic',
 'classmates',
 'classy',
 'cld',
 'clean',
 'cleaning',
 'clear',
 'clearance',
 'cleared',
 'clearly',
 'clearlyitsme',
 'clevelandfrowns',
 'clever',
 'cliche',
 'click',
 'clinton',
 'clive',
 'cloak',
 'clock',
 'close',
 'closed',
 'closes',
 'closing',
 'clothes',
 'clothing',
 'cloud',
 'club',
 'clue',
 'clutch',
 'cnn',
 'co',
 'coach',
 'coaches',
 'coast',
 'cobaine',
 'cockers',
 'code',
 'cody',
 'coffee',
 'coffeegirlsmile',
 'cold',
 'cole',
 'collapsed',
 'collected',
 'collection',
 'college',
 'colmorrisdavis',
 'colombia',
 'color',
 'colors',
 'coloured',
 'colours',
 'columbia',
 'colvsgre',
 'com',
 'come',
 'comeback',
 'comedian',
 'comedyhackday',
 'comeonengland',
 'comes',
 'comin',
 'coming',
 'comment',
 'commentary',
 'commentating',
 'commentators',
 'comments',
 'common',
 'commutingproblems',
 'company',
 'competition',
 'competitions',
 'complain',
 'complete',
 'completed',
 'completely',
 'completing',
 'comprehend',
 'computer',
 'computers',
 'concern',
 'concerned',
 'concert',
 'concerts',
 'conclusion',
 'concussions',
 'conditional',
 'condoms',
 'coneyisland',
 'conference',
 'confident',
 'confirm',
 'confirmation',
 'confiscating',
 'confused',
 'confusion',
 'congrats',
 'congratulate',
 'connect',
 'connection',
 'connydftba',
 'consider',
 'considering',
 'constant',
 'constantly',
 'constitution',
 'construction',
 'contact',
 'contain',
 'content',
 'contentmarketing',
 'contentstrategy',
 'continues',
 'continuing',
 'control',
 'conventions',
 'conversation',
 'conversations',
 'convinced',
 'cook',
 'cool',
 'cooperation',
 'cope',
 'cops',
 'cormicanfitness',
 'corner',
 'corporations',
 'correct',
 'correctly',
 'cos',
 'cosmosis_j0nes',
 'costa',
 'cote',
 'couch',
 'could',
 'couldabeenworse',
 'couldn',
 'council',
 'counter',
 'country',
 'countrysayingss',
 'county',
 'couple',
 'couples',
 'course',
 'courtcordova',
 'cousin',
 'cousins',
 'cover',
 'coverage',
 'cow',
 'cowell',
 'coz',
 'crab',
 'crack',
 'cracking',
 'crafty',
 'cramp',
 'cramps',
 'cranky',
 'crap',
 'crappy',
 'crash',
 'crawford',
 'crawlspace',
 'cray',
 'crazy',
 'crc',
 'cream',
 'create',
 'created',
 'creative',
 'credit',
 'creepy',
 'crew',
 'crib',
 'cried',
 'crime',
 'criminal',
 'crispyconcords',
 'cristiano',
 'criticism',
 'crop',
 'cross',
 'crosses',
 'crossing',
 'crowd',
 'crowded',
 'croydon',
 'crucial',
 'crueltyfree',
 'crush',
 ...]



In [123]:

    
# count_vec.transform()



In [99]:

    
master









    Out[99]:






  
    
      
      Unnamed: 0
      0
      type
      English
      ToUser
      Hashtags
      AllCapsCount
    
  
  
    
      0
      0
      Thanks
      sarcastic
      1
      0
      0
      0
    
    
      1
      1
      Top tip. To illicit a \"thank you\" from some...
      sarcastic
      1
      0
      1
      0
    
    
      2
      2
      Thanks to whoever just threw the bag of waterm...
      sarcastic
      1
      0
      1
      0
    
    
      3
      3
      yes let's #EndFathersDay because the mother i...
      sarcastic
      1
      0
      1
      0
    
    
      4
      4
      Well it's just gonna turn into a lovely day
      sarcastic
      1
      0
      0
      0
    
    
      5
      5
      Nothing to see here, move along     Lerner's L...
      sarcastic
      1
      0
      0
      1
    
    
      6
      6
      So who does Campbell play for?
      sarcastic
      1
      0
      0
      0
    
    
      7
      7
      @JamesBraginton @STEM08 @thinkprogress \nJames...
      sarcastic
      1
      1
      0
      1
    
    
      8
      8
      Does this make me fancy? #imsofancyyoualreadyk...
      sarcastic
      1
      0
      1
      0
    
    
      9
      9
      I love that Arequipa just shuts the water off ...
      sarcastic
      1
      0
      0
      1
    
    
      10
      10
      Everyone's at Notre Dame and I'm just sitting ...
      sarcastic
      1
      0
      1
      0
    
    
      11
      11
      Tweet of the day!!!!   \ud83d\ude1c  Holy shit...
      sarcastic
      1
      0
      0
      0
    
    
      12
      12
      @LUTZLOVER43
      sarcastic
      1
      1
      0
      1
    
    
      13
      13
      #orgasm
      sarcastic
      1
      0
      1
      0
    
    
      14
      14
      I hate to see Luis Suarez get injured, returni...
      sarcastic
      1
      0
      0
      1
    
    
      15
      15
      @jaycutlersux right bc Bush\/Cheney were total...
      sarcastic
      1
      1
      0
      0
    
    
      16
      16
      @TPoloking   don't wrry
      sarcastic
      1
      1
      0
      0
    
    
      17
      17
      wow today is going just absolutely SPECTACULAR
      sarcastic
      1
      0
      0
      1
    
    
      18
      18
      Looking forward to playing Costa Rica what wit...
      sarcastic
      1
      0
      0
      0
    
    
      19
      19
      After the last friendly, I can only be happy a...
      sarcastic
      1
      0
      0
      1
    
    
      20
      20
      Tithes paid. Bills paid. Now to go clock anoth...
      sarcastic
      1
      0
      0
      1
    
    
      21
      21
      Clive thought that was in, great commentary   ...
      sarcastic
      1
      0
      1
      0
    
    
      22
      22
      @New0rleans_Lady @Aaron_RS Lol Thats they fair...
      sarcastic
      1
      1
      0
      0
    
    
      23
      23
      \"Nothing says 'come to me baby' like a sexy p...
      sarcastic
      1
      0
      0
      0
    
    
      24
      24
      @ScottCubs36 why is white so positive, you rac...
      sarcastic
      1
      1
      1
      0
    
    
      25
      25
      I just love that when a celebrity or a youtube...
      sarcastic
      1
      0
      1
      1
    
    
      26
      26
      Loving the football tonight \ud83d\udc9c\ud83d...
      sarcastic
      1
      0
      1
      0
    
    
      27
      27
      I just pulled off 3 ticks from my hip yay!   #...
      sarcastic
      1
      0
      1
      1
    
    
      28
      28
      @francescaacox apparently people have been men...
      sarcastic
      1
      1
      0
      0
    
    
      29
      29
      The only downside is that it wouldn't destroy ...
      sarcastic
      1
      0
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      423593
      293542
      \u26bd\ufe0fOrgullosa \ud83d\ude0b\ud83c\udf34...
      genuine
      1
      1
      0
      0
    
    
      423594
      293543
      @gabeliedman I'll watch anything with Jan Hooks
      genuine
      1
      1
      0
      0
    
    
      423595
      293544
      .@BigCee302MVP R U sure about that? Many studi...
      genuine
      1
      1
      0
      2
    
    
      423596
      293545
      We know how to celebrate our freedom in SF! #h...
      genuine
      1
      1
      1
      1
    
    
      423597
      293546
      @jawnv6 I did not. Will read when I get a chan...
      genuine
      1
      1
      0
      2
    
    
      423598
      293547
      Just Switched Up Outfits, Looking Extraaa FLY!...
      genuine
      1
      0
      1
      1
    
    
      423599
      293548
      Happy 4th of July America!!the best country in...
      genuine
      1
      0
      1
      0
    
    
      423600
      293549
      @KelvinNeves @10stolemygoal2 hahaha stop that ...
      genuine
      1
      1
      0
      0
    
    
      423601
      293550
      Ready to be in Jersey by the pool alreadddy \u...
      genuine
      1
      0
      0
      0
    
    
      423602
      293551
      It's cold outside. I prefer this over yesterda...
      genuine
      1
      0
      0
      1
    
    
      423603
      293552
      I'm glad none of my friends know my female cou...
      genuine
      1
      0
      0
      0
    
    
      423604
      293553
      Happy 4th of July!! #nyc #macys #4thJuly #usa ...
      genuine
      1
      1
      1
      0
    
    
      423605
      293554
      @KevyKevv92 lmfao slut! \nHave fun at Vince's ...
      genuine
      1
      1
      0
      0
    
    
      423606
      293555
      @ShaniKelly almost time to see you\ud83d\ude08
      genuine
      1
      1
      0
      0
    
    
      423607
      293556
      Happy 4th of July!\n\ud83c\uddfa\ud83c\uddf8\u...
      genuine
      1
      1
      0
      0
    
    
      423608
      293557
      Happy 4Th Of July @ColinMrattt
      genuine
      1
      1
      0
      0
    
    
      423609
      293558
      I may be stuck in #NYC but my heart will alway...
      genuine
      1
      0
      1
      2
    
    
      423610
      293559
      @Kamryn_Jackson @LandriWilliams y'all are lowe...
      genuine
      1
      1
      0
      0
    
    
      423611
      293560
      why THE FUCK am I not consuming obscene amount...
      genuine
      1
      0
      0
      3
    
    
      423612
      293561
      Selfie with my crew #PartyForOne #best4thJulyE...
      genuine
      1
      0
      1
      0
    
    
      423613
      293562
      At least now I can go for Brazil since neymars...
      genuine
      1
      0
      0
      1
    
    
      423614
      293563
      @heyitsSamN better wake up for breakfast tonorrow
      genuine
      1
      1
      0
      0
    
    
      423615
      293564
      I'm actually so mad. I don't even know where t...
      genuine
      1
      0
      0
      3
    
    
      423616
      293565
      2 hours and 20 minutes until my birthday bitch...
      genuine
      1
      0
      0
      0
    
    
      423617
      293566
      Says the Floridian!  @Allie_Davison: @Tomas_Ve...
      genuine
      1
      1
      0
      0
    
    
      423618
      293567
      It has been a fun filled day with the family a...
      genuine
      1
      0
      0
      0
    
    
      423619
      293568
      Back at @TheStandNYC for a couple shows tonigh...
      genuine
      1
      1
      1
      0
    
    
      423620
      293569
      I love vagina shorts \ud83d\ude0d\ud83d\ude02\...
      genuine
      1
      1
      0
      1
    
    
      423621
      293570
      my little brother and his online minecraft bes...
      genuine
      1
      0
      1
      0
    
    
      423622
      293571
      Watching the fireworks over the Brooklyn bridg...
      genuine
      1
      0
      0
      0
    
  

423623 rows × 7 columns



In [21]:

    
unigrams = pd.DataFrame(varr)
unigrams['ToUser']=list(subset['ToUser'])
unigrams['Hashtags']=list(subset['Hashtags'])
unigrams['AllCapsCount']=list(subset['AllCapsCount'])

Tweets vectorized by Top-5000 Unigram vocabulary



In [22]:

    
unigrams









    Out[22]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      4993
      4994
      4995
      4996
      4997
      4998
      4999
      ToUser
      Hashtags
      AllCapsCount
    
  
  
    
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      3
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      4
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      5
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      6
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      7
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      1
    
    
      8
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      9
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      10
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      11
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      12
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      1
    
    
      13
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      14
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      15
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      16
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      17
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      18
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      19
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      20
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      21
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      22
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      23
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      24
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
    
    
      25
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      1
    
    
      26
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      27
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      1
    
    
      28
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      29
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      15970
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      15971
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      1
      3
    
    
      15972
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15973
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      15974
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      1
    
    
      15975
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      1
      0
    
    
      15976
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      2
    
    
      15977
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15978
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15979
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      1
    
    
      15980
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      15981
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      1
    
    
      15982
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      15983
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
    
      15984
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      1
    
    
      15985
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15986
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15987
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      15988
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      15989
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      15990
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15991
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      2
    
    
      15992
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      3
    
    
      15993
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15994
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      15995
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      15996
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      15997
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      4
    
    
      15998
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      1
      0
    
    
      15999
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      1
    
  

16000 rows × 5003 columns

Classification & Evaluation



In [23]:

    
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(unigrams, subset['type'], test_size=0.4, random_state=0)



In [78]:

    
from sklearn.svm import SVC
clf = SVC()
clf.fit(X_train, y_train) 
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
# print(clf.predict([[-0.8, -1]]))









    Out[78]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [79]:

    
clf.score(X_test,y_test)









    Out[79]:





0.58296875000000004



In [80]:

    
svc_predictions = clf.predict(X_test)



In [83]:

    
svc_preds_numeric = [1 if x=='sarcastic' else 0 for x in svc_predictions]
y_test_numeric = [1 if x=='sarcastic' else 0 for x in y_test]
fpr_svc, tpr_svc, thresh_svc = roc_curve(y_test_numeric, svc_preds_numeric)



In [98]:

    
from sklearn.metrics import accuracy_score
accuracy_score(y_test, svc_predictions)









    Out[98]:





0.58296875000000004



In [84]:

    
plt.figure()
lw = 2
plt.plot(fpr_svc, tpr_svc, color='blue',lw=lw, label='ROC curve',)
plt.title("SVM ROC")









    Out[84]:





<matplotlib.text.Text at 0x114126eb8>



In [86]:

    
tpr_svc









    Out[86]:





array([ 0.        ,  0.98584906,  1.        ])



In [87]:

    
fpr_svc









    Out[87]:





array([ 0.        ,  0.81490683,  1.        ])



In [26]:

    
from sklearn.linear_model import LogisticRegression
lr_clf= LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='liblinear', max_iter=100, multi_class='ovr', verbose=0, warm_start=False, n_jobs=1)



In [27]:

    
lr_clf.fit(X_train,y_train)









    Out[27]:





LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)



In [29]:

    
lr_clf.score(X_test,y_test)









    Out[29]:





0.86921875000000004



In [30]:

    
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
scores = cross_val_score(lr_clf, unigrams, subset['type'], cv=cv)
scores









    Out[30]:





array([ 0.88020833,  0.88145833,  0.87666667])



In [31]:

    
lr_clf.predict_proba(X_test)









    Out[31]:





array([[ 0.34393949,  0.65606051],
       [ 0.98309165,  0.01690835],
       [ 0.0990328 ,  0.9009672 ],
       ..., 
       [ 0.58789103,  0.41210897],
       [ 0.59976438,  0.40023562],
       [ 0.19569877,  0.80430123]])



In [75]:

    
lr_predictions = lr_clf.predict(X_test)

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn import metrics 

# roc_auc_score(y_test,lr_clf.predict_proba(X_test))
# roc_curve(y_test, lr_predictions, pos_label="sarcastic")



In [49]:

    
lr_preds_numeric = [1 if x=='sarcastic' else 0 for x in lr_predictions]
y_test_numeric = [1 if x=='sarcastic' else 0 for x in y_test]



In [69]:

    
fpr_lr, tpr_lr, thresh_lr = roc_curve(y_test_numeric, lr_preds_numeric)



In [51]:

    
pd.Series(lr_predictions).value_counts()









    Out[51]:





sarcastic    3209
genuine      3191
dtype: int64



In [72]:

    
plt.figure()
lw = 2
plt.plot(fpr_lr, tpr_lr, color='red',lw=lw, label='ROC curve',)
plt.title('LogReg ROC')









    Out[72]:





<matplotlib.text.Text at 0x11447edd8>



In [52]:

    
from sklearn import tree
dt_clf = tree.DecisionTreeClassifier()
dt_clf = dt_clf.fit(X_train, y_train)



In [53]:

    
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)









    Out[53]:





GaussianNB(priors=None)



In [54]:

    
NB_results = clf.score(X_test, y_test)



In [97]:

    
NB_results









    Out[97]:





0.78171875000000002



In [55]:

    
# cross_val_score(clf.predict_proba(X_test, y_test))
nb_predictions_positive = np.array([n[1] for n in clf.predict_proba(X_test)])



In [56]:

    
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_test, nb_predictions_positive, pos_label="sarcastic")



In [57]:

    
nb_predictions_positive









    Out[57]:





array([ 1.,  0.,  1., ...,  0.,  0.,  0.])



In [58]:

    
y_test_1 = [1 if n=='sarcastic' else 0 for n in y_test]



In [59]:

    
y_test_1 = np.array(y_test_1)



In [61]:

    
fpr, tpr, thresholds = roc_curve(y_test, nb_predictions_positive, pos_label="sarcastic")



In [73]:

    
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',lw=lw, label='ROC curve',)
plt.title("NaiveBayes ROC")









    Out[73]:





<matplotlib.text.Text at 0x119856320>



In [ ]:

    
# thresholds
(clf.sigma_[1]).argmax()
clf.sigma_[1]
# 1.4590845579489764
# len(clf.sigma_[1])
# clf.classes_

Last feature, "AllCapsCount", is most predictive

Testing on Election tweets



In [151]:

    
test_tweets = pd.read_csv('test_tweets_df.csv')
test_labels = test_tweets['label']



In [155]:

    
test_unigrams = pd.DataFrame(count_vec.transform(test_tweets['0']).toarray())
test_unigrams['ToUser'] = test_tweets['ToUser']
test_unigrams['Hashtags'] = test_tweets['Hashtags']
test_unigrams['AllCapsCount'] = test_tweets['AllCapsCount']



In [156]:

    
test_labels_numeric = [1 if x=='sarcastic' else 0 for x in test_labels]



In [163]:

    
# X_train, X_test, y_train, y_test = train_test_split(test_unigrams, test_labels_numeric, test_size=0.4, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(test_unigrams, test_labels, test_size=0.4, random_state=0)



In [171]:

    
lr_clf.score(X_test, y_test)
test_preds = lr_clf.predict(X_test)



In [168]:

    
clf.score(X_test, y_test)









    Out[168]:





0.48749999999999999



In [170]:

    
dt_clf.score(X_test, y_test)









    Out[170]:





0.5



In [175]:

    
test_preds_numeric = [1 if x=='sarcastic' else 0 for x in test_preds]
y_test_numeric = [1 if x=='sarcastic' else 0 for x in y_test]
fpr_test, tpr_test, thresh_test = roc_curve(y_test_numeric, test_preds_numeric)



In [179]:

    
plt.plot(fpr_test, tpr_test, color='green')
plt.title("LogReg on test data")









    Out[179]:





<matplotlib.text.Text at 0x119c92860>

Results

SVM: 58% accuracy

NaiveBayes: 78% accuracy

Logistic Regression: 86% accuracy!

	Unnamed: 0	0	type	English	ToUser	Hashtags	AllCapsCount
0	0	Thanks	sarcastic	1	0	0	0
1	1	Top tip. To illicit a \"thank you\" from some...	sarcastic	1	0	1	0
2	2	Thanks to whoever just threw the bag of waterm...	sarcastic	1	0	1	0
3	3	yes let's #EndFathersDay because the mother i...	sarcastic	1	0	1	0
4	4	Well it's just gonna turn into a lovely day	sarcastic	1	0	0	0
5	5	Nothing to see here, move along Lerner's L...	sarcastic	1	0	0	1
6	6	So who does Campbell play for?	sarcastic	1	0	0	0
7	7	@JamesBraginton @STEM08 @thinkprogress \nJames...	sarcastic	1	1	0	1
8	8	Does this make me fancy? #imsofancyyoualreadyk...	sarcastic	1	0	1	0
9	9	I love that Arequipa just shuts the water off ...	sarcastic	1	0	0	1
10	10	Everyone's at Notre Dame and I'm just sitting ...	sarcastic	1	0	1	0
11	11	Tweet of the day!!!! \ud83d\ude1c Holy shit...	sarcastic	1	0	0	0
12	12	@LUTZLOVER43	sarcastic	1	1	0	1
13	13	#orgasm	sarcastic	1	0	1	0
14	14	I hate to see Luis Suarez get injured, returni...	sarcastic	1	0	0	1
15	15	@jaycutlersux right bc Bush\/Cheney were total...	sarcastic	1	1	0	0
16	16	@TPoloking don't wrry	sarcastic	1	1	0	0
17	17	wow today is going just absolutely SPECTACULAR	sarcastic	1	0	0	1
18	18	Looking forward to playing Costa Rica what wit...	sarcastic	1	0	0	0
19	19	After the last friendly, I can only be happy a...	sarcastic	1	0	0	1
20	20	Tithes paid. Bills paid. Now to go clock anoth...	sarcastic	1	0	0	1
21	21	Clive thought that was in, great commentary ...	sarcastic	1	0	1	0
22	22	@New0rleans_Lady @Aaron_RS Lol Thats they fair...	sarcastic	1	1	0	0
23	23	\"Nothing says 'come to me baby' like a sexy p...	sarcastic	1	0	0	0
24	24	@ScottCubs36 why is white so positive, you rac...	sarcastic	1	1	1	0
25	25	I just love that when a celebrity or a youtube...	sarcastic	1	0	1	1
26	26	Loving the football tonight \ud83d\udc9c\ud83d...	sarcastic	1	0	1	0
27	27	I just pulled off 3 ticks from my hip yay! #...	sarcastic	1	0	1	1
28	28	@francescaacox apparently people have been men...	sarcastic	1	1	0	0
29	29	The only downside is that it wouldn't destroy ...	sarcastic	1	0	0	0
...	...	...	...	...	...	...	...
423593	293542	\u26bd\ufe0fOrgullosa \ud83d\ude0b\ud83c\udf34...	genuine	1	1	0	0
423594	293543	@gabeliedman I'll watch anything with Jan Hooks	genuine	1	1	0	0
423595	293544	.@BigCee302MVP R U sure about that? Many studi...	genuine	1	1	0	2
423596	293545	We know how to celebrate our freedom in SF! #h...	genuine	1	1	1	1
423597	293546	@jawnv6 I did not. Will read when I get a chan...	genuine	1	1	0	2
423598	293547	Just Switched Up Outfits, Looking Extraaa FLY!...	genuine	1	0	1	1
423599	293548	Happy 4th of July America!!the best country in...	genuine	1	0	1	0
423600	293549	@KelvinNeves @10stolemygoal2 hahaha stop that ...	genuine	1	1	0	0
423601	293550	Ready to be in Jersey by the pool alreadddy \u...	genuine	1	0	0	0
423602	293551	It's cold outside. I prefer this over yesterda...	genuine	1	0	0	1
423603	293552	I'm glad none of my friends know my female cou...	genuine	1	0	0	0
423604	293553	Happy 4th of July!! #nyc #macys #4thJuly #usa ...	genuine	1	1	1	0
423605	293554	@KevyKevv92 lmfao slut! \nHave fun at Vince's ...	genuine	1	1	0	0
423606	293555	@ShaniKelly almost time to see you\ud83d\ude08	genuine	1	1	0	0
423607	293556	Happy 4th of July!\n\ud83c\uddfa\ud83c\uddf8\u...	genuine	1	1	0	0
423608	293557	Happy 4Th Of July @ColinMrattt	genuine	1	1	0	0
423609	293558	I may be stuck in #NYC but my heart will alway...	genuine	1	0	1	2
423610	293559	@Kamryn_Jackson @LandriWilliams y'all are lowe...	genuine	1	1	0	0
423611	293560	why THE FUCK am I not consuming obscene amount...	genuine	1	0	0	3
423612	293561	Selfie with my crew #PartyForOne #best4thJulyE...	genuine	1	0	1	0
423613	293562	At least now I can go for Brazil since neymars...	genuine	1	0	0	1
423614	293563	@heyitsSamN better wake up for breakfast tonorrow	genuine	1	1	0	0
423615	293564	I'm actually so mad. I don't even know where t...	genuine	1	0	0	3
423616	293565	2 hours and 20 minutes until my birthday bitch...	genuine	1	0	0	0
423617	293566	Says the Floridian! @Allie_Davison: @Tomas_Ve...	genuine	1	1	0	0
423618	293567	It has been a fun filled day with the family a...	genuine	1	0	0	0
423619	293568	Back at @TheStandNYC for a couple shows tonigh...	genuine	1	1	1	0
423620	293569	I love vagina shorts \ud83d\ude0d\ud83d\ude02\...	genuine	1	1	0	1
423621	293570	my little brother and his online minecraft bes...	genuine	1	0	1	0
423622	293571	Watching the fireworks over the Brooklyn bridg...	genuine	1	0	0	0

	0	1	2	3	4	5	6	7	8	9	...	4993	4994	4995	4996	4997	4998	4999	ToUser	Hashtags	AllCapsCount
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
3	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
4	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
6	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
7	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	1
8	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
9	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
10	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
11	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
12	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	1
13	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
14	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
15	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
16	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
17	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
18	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
19	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
20	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
21	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
22	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
23	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
24	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	1	0
25	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	1
26	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
27	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	1
28	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
29	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
15970	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
15971	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	1	3
15972	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15973	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
15974	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	1
15975	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	1	0
15976	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	2
15977	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15978	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15979	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	1
15980	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
15981	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	1
15982	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
15983	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1
15984	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	1
15985	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15986	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15987	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
15988	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
15989	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
15990	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15991	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	2
15992	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	3
15993	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15994	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
15995	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
15996	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	1	0	0
15997	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	4
15998	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	1	0
15999	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	1