In [1]:

    
from __future__ import division
from __future__ import print_function
import csv
import datetime as dt
import os
import re

import pandas
from sklearn.feature_extraction.text import CountVectorizer

Open csv



In [2]:

    
def csv_to_df(csv_file):
    """Open csv, return Pandas DataFrame."""
    dataframe = pandas.read_csv(csv_file, 
                             delimiter='|', 
                             error_bad_lines=False, 
                             warn_bad_lines=False,
                            )
    return dataframe

Data cleanup



In [3]:

    
def make_lowercase(input_str):
    """Lowercase input string, return."""



In [4]:

    
def clean_whitespaces(input_str):
    """Use re library to replace all 
    whitespaces (newlines, etc.) with a simple ' ' space.
    """



In [5]:

    
def remove_puncutation(input_str):
    """Remove certain punctuation."""



In [6]:

    
stopwords = ['himself', 'very', 'those', 'most', 'this', 'it', 'did', 'be', 'each', 'you', 'was', 'should', 'down', 'if', 'that', 'no', 'itself', 'does', 'under', 'a', 'over', 'about', 'both', 'their', 'who', 'her', 'now', 'which', 'as', 'other', 'too', 'yourselves', 'and', 'why', 'how', 'your', 'into', 'i', 'before', 'by', 'again', 'having', 'during', 'of', 'after', 'against', 'is', 'here', 't', 'above', 'so', 'doing', 'me', 'between', 'are', 'whom', 'ours', 'ourselves', 'he', 'him', 'where', 'because', 'up', 'yours', 'out', 'more', 's', 'nor', 'just', 'then', 'don', 'myself', 'my', 'while', 'these', 'some', 'yourself', 'such', 'on', 'few', 'them', 'until', 'from', 'when', 'our', 'have', 'or', 'theirs', 'off', 'through', 'the', 'same', 'any', 'its', 'not', 'below', 'has', 'had', 'am', 'been', 'will', 'at', 'being', 'there', 'than', 'to', 'she', 'but', 'what', 'for', 'can', 'own', 'an', 'they', 'his', 'with', 'we', 'only', 'in', 'were', 'hers', 'once', 'all', 'further', 'do', 'themselves', 'herself']

def remove_stopwords(input_tokens):
    """Remove common words."""

Feature extraction

Word tokenization

Show plain function, maybe NLTK too



In [7]:

    
# A basic tokenizer

def tokenize_words(input_string):
    """Take a string, return a list of 
    strings broken on whitespace, but do 
    not break @mentions and URLs.
    
    Alternative: Try using something like `[word for word in re.sub('\W', ' ', s).split()]`.
    then stripping punct that isn't @ or #.
    """
    punctuation = [',', '!', '"', '. ', ': ']
    for char in punctuation:
        input_string = input_string.replace(char, ' ')
    
    return [w for w in input_string.split(' ') if w]  # rm empty strings



In [8]:

    
# See @users and http: not split
a_tweet = """@CuteEmergency: "I'm okay!" https://t.co/TWMwjG03Fd"""
tokenize_words(a_tweet)









    Out[8]:





['@CuteEmergency', "I'm", 'okay', 'https://t.co/TWMwjG03Fd']

Counting text

count chars
count words
links
count links
#hashtags
count #hashtags
@mentions
count @mentions



In [9]:

    
def get_urls(input_tokens):
    """Check incoming list of strings, check if token
    starts with `http(s)://`.
    
    Could be done with list comprehension, too:
    `[w for w in input_tokens if word.startswith('http')]`
    """
    urls = []
    for word in input_tokens:
        if word.startswith('http'):
            urls.append(word)
    return urls



In [10]:

    
def get_hashtags(input_tokens):
    """Check incoming list of strings, check if token
    starts with `#`.
    
    Could be done with list comprehension, too:
    `[w for w in input_tokens if word.startswith('#')]`
    """
    hashtags = []
    for word in input_tokens:
        if word.startswith('#'):
            hashtags.append(word)
    return hashtags



In [11]:

    
def get_mentions(input_tokens):
    """Check incoming list of strings, check if token
    starts with `@`.
    
    Could be done with list comprehension, too:
    `[w for w in input_tokens if word.startswith('@')]`
    """
    mentions = []
    for word in input_tokens:
        if word.startswith('@'):
            mentions.append(word)
    return mentions



In [12]:

    
def add_features_to_df(dataframe):
    """Take DataFrame of tweets, extract some specific 
    features and add to returned DataFrame.
    """
    #tokens = []  # list of strings
    char_count = []
    word_count = []
    urls = []
    url_counts = []
    hashtags = []
    hashtag_counts = []
    mentions = []
    mentions_counts = []

    for i, row in dataframe.iterrows():
        
        # Text and tokens
        tokens = tokenize_words(row['_text'])
        char_count.append(len(row['_text']))
        word_count.append(len(tokens))

        # URLs
        url_list = get_urls(tokens)
        urls.append(url_list)
        url_count = len(url_list)
        url_counts.append(url_count)

        # Hashtags
        hashtag_list = get_hashtags(tokens)
        hashtags.append(hashtag_list)
        hashtag_count = len(hashtag_list)
        hashtag_counts.append(hashtag_count)

        # Mentions
        mentions_list = get_mentions(tokens)
        mentions.append(mentions_list)
        mentions_count = len(mentions_list)
        mentions_counts.append(mentions_count)


    dataframe['_char_count'] = char_count
    dataframe['_word_count'] = word_count
    dataframe['_urls'] = urls
    dataframe['_url_count'] = url_counts
    dataframe['_hashtags'] = hashtags
    dataframe['_hashtag_count'] = hashtag_counts
    dataframe['_mentions'] = mentions
    dataframe['_mentions_count'] = mentions_counts
    
    return dataframe

TODO: Named entity recognition (NER)

Maybe show NLTK code, but don't do, too slow

Bag of words

Helpful links:

Brief example: https://github.com/kylepjohnson/lecture_nyc_ascent/blob/master/code_snippets/Example%20-%20Bag%20of%20words%20and%20Pandas%20df%20concat().ipynb.



In [14]:

    
def make_merge_bow_write(dataframe, save_path):
    """Take a dataframe, extract '_text' and make a Bag of Words.
    Write BoW features to their own file, then merge with input
    and return new dataframe.
    
    TODO: Revisit options for CountVectorizer() (lowercase, tokenizer, min freq)
    """
    # Get list of strings, for input into vectorizer
    text_list = dataframe['_text'].tolist()

    # Setup Vectorizer
    # Note that min_df is confusing; see http://stackoverflow.com/a/27697863
    # min_df + an integer: if word found in less than n docs, then ignore
    vectorizer = CountVectorizer(min_df=2)
    term_document_matrix = vectorizer.fit_transform(text_list)  # input is a list of strings, 1 per document

    # Put BoW vectors into a new df
    dataframe_bow = pandas.DataFrame(term_document_matrix.toarray(), columns=vectorizer.get_feature_names())
    
    # Write BoW to disk
    # Just the Bag of Words, in case we want to use it by itself later
    # TODO! Add '_popular' column to this, or ditch this csv altogether
    dataframe_bow.to_csv(save_path, sep='|', encoding='utf-8')
    
    # Merge BoW df with the original feature table df
    # Important: Make sure the concat() function uses the original id index of the first, text datafram
    dataframe = pandas.concat([dataframe, dataframe_bow], axis=1, join_axes=[dataframe.index])
    
    return dataframe



In [15]:

    
def make_merge_bow(dataframe):
    """Take a dataframe, extract '_text' and make a Bag of Words.
    Write BoW features to their own file, then merge with input
    and return new dataframe.
    
    TODO: Revisit options for CountVectorizer() (lowercase, tokenizer, min freq)
    """
    # Get list of strings, for input into vectorizer
    text_list = dataframe['_text'].tolist()

    # Setup Vectorizer
    # Note that min_df is confusing; see http://stackoverflow.com/a/27697863
    # min_df + an integer: if word found in less than n docs, then ignore
    vectorizer = CountVectorizer(min_df=2)  
    term_document_matrix = vectorizer.fit_transform(text_list)  # input is a list of strings, 1 per document

    # Put BoW vectors into a new df
    dataframe_bow = pandas.DataFrame(term_document_matrix.toarray(), columns=vectorizer.get_feature_names())
    
    return dataframe_bow

Topic modeling

Think about how to put into feature table

See https://github.com/kylepjohnson/lecture_nyc_ascent/blob/master/code_snippets/Example%20-%20Topic%20modeling.ipynb for example.

Write entire DataFrame to csv

The next notebook will pick up from here

Do everything again for the unpopular tweets



In [18]:

    
def make_all_features_for_tweets():
    """Do all the steps to create one feature 
    table of popular and unpopular tweets.
    """
    
    print('Startting feature extraction ...')
    t0 = dt.datetime.utcnow()
    # Make sure 'feature_tables' present
    features_dir = 'feature_tables'
    if not os.path.isdir(features_dir):
        os.mkdir(features_dir)

    # load csvs to dfs
    dataframe_popular = csv_to_df('tweets/tweets_popular.csv')
    dataframe_not_popular = csv_to_df('tweets/tweets_not_popular.csv')
    
    # Remove dupes
    dataframe_popular = dataframe_popular.drop_duplicates()
    dataframe_not_popular = dataframe_not_popular.drop_duplicates()
    
    # Add column '_popular' or '_unpopular' for each df
    dataframe_popular['_popular'] = True
    dataframe_not_popular['_popular'] = False
    
    # Append unpopular to popular df
    dataframe = pandas.concat([dataframe_popular, dataframe_not_popular])
    
    
    # Extract features from df, add back to df
    dataframe = add_features_to_df(dataframe)
    
    # Write df, now with basic extracted features, to .csv
    dataframe.to_csv('feature_tables/basics.csv', sep='|', encoding='utf-8')

    # Make BoW df, then write it to .csv
    #dataframe_bow = make_merge_bow(dataframe)
    
    # Just the Bag of Words, in case we want to use it by itself later
    #dataframe_bow.to_csv('feature_tables/bow.csv', sep='|', encoding='utf-8')
    
    # Merge BoW df with the original feature table df
    # Important: Make sure the concat() function uses the original id index of the first, text df
    #dataframe = pandas.concat([dataframe, dataframe_bow], axis=1, join_axes=[dataframe.index])
    #dataframe.to_csv('feature_tables/all.csv', sep='|', encoding='utf-8')
    
    print('... completed in {}.'.format(dt.datetime.utcnow() - t0))
    print('Total (rows, columns):', dataframe.shape)  # (rows, columns)
    
    return dataframe



In [19]:

    
df = make_all_features_for_tweets()









    



Startting feature extraction ...
... completed in 0:00:11.750256.
Total (rows, columns): (22706, 12)



In [26]:

    
list(df.columns.values)









    Out[26]:





['_text',
 '_rt_count',
 '_tweet_datetime',
 '_popular',
 '_char_count',
 '_word_count',
 '_urls',
 '_url_count',
 '_hashtags',
 '_hashtag_count',
 '_mentions',
 '_mentions_count']



In [30]:

    
df









    Out[30]:






  
    
      
      _text
      _rt_count
      _tweet_datetime
      _popular
      _char_count
      _word_count
      _urls
      _url_count
      _hashtags
      _hashtag_count
      _mentions
      _mentions_count
    
  
  
    
      0
      @CringeLMAO: Easy there m8 https://t.co/dnF3Wq...
      2084
      Mon Feb 15 20:44:33 +0000 2016
      True
      50
      5
      [https://t.co/dnF3Wqdt1C]
      1
      []
      0
      [@CringeLMAO]
      1
    
    
      1
      @AustinMahone: Just posted a photo https://t.c...
      1059
      Mon Feb 15 20:44:33 +0000 2016
      True
      58
      6
      [https://t.co/hXFg6TyuzE]
      1
      []
      0
      [@AustinMahone]
      1
    
    
      2
      @Ashton5SOS: Some days I drink way to much cof...
      24121
      Mon Feb 15 20:44:33 +0000 2016
      True
      136
      24
      []
      0
      []
      0
      [@Ashton5SOS]
      1
    
    
      3
      @lailamuhammad: When you nail that #Beyonc   m...
      801
      Mon Feb 15 20:44:33 +0000 2016
      True
      140
      21
      []
      0
      [#Beyonc, #slay]
      2
      [@lailamuhammad, @MarqCotton, @BuckeyeBond83, ...
      4
    
    
      4
      @BDBANDS: MOOD         https://t.co/NMlFBJZtic
      1856
      Mon Feb 15 20:44:33 +0000 2016
      True
      46
      3
      [https://t.co/NMlFBJZtic]
      1
      []
      0
      [@BDBANDS]
      1
    
    
      5
      @TheGRAMMYs: Congrats Best Pop Vocal Album @ta...
      3747
      Mon Feb 15 20:44:33 +0000 2016
      True
      99
      11
      [https://t.co/6gqbPR2JmW]
      1
      [#GRAMMYs]
      1
      [@TheGRAMMYs, @taylorswift13]
      2
    
    
      6
      @taylorcaniff: Never mind I'm snowed in again ...
      1961
      Mon Feb 15 20:44:33 +0000 2016
      True
      67
      11
      []
      0
      []
      0
      [@taylorcaniff]
      1
    
    
      7
      @Ashton5SOS: But this is the obvious reason I ...
      21948
      Mon Feb 15 20:44:33 +0000 2016
      True
      102
      19
      []
      0
      []
      0
      [@Ashton5SOS]
      1
    
    
      9
      @FemaleTexts: February 15th?? You mean annoy s...
      2426
      Mon Feb 15 20:44:33 +0000 2016
      True
      83
      9
      [https://t.co/k06HAUNeoM]
      1
      []
      0
      [@FemaleTexts]
      1
    
    
      10
      @AustinMahone: Maybe I'll make a country song ...
      1826
      Mon Feb 15 20:44:33 +0000 2016
      True
      51
      8
      []
      0
      []
      0
      [@AustinMahone]
      1
    
    
      11
      @DrakeBible_: this is the warriors reporter om...
      2659
      Mon Feb 15 20:44:33 +0000 2016
      True
      100
      13
      [https://t.co/sS09KSOXOB]
      1
      []
      0
      [@DrakeBible_]
      1
    
    
      12
      @CuteEmergency: "I'm okay!" https://t.co/TWMwj...
      617
      Mon Feb 15 20:44:33 +0000 2016
      True
      51
      4
      [https://t.co/TWMwjG03Fd]
      1
      []
      0
      [@CuteEmergency]
      1
    
    
      13
      @BabyAnimalPics: LOOK HOW HE JUST SHAKES OFF H...
      12067
      Mon Feb 15 20:44:33 +0000 2016
      True
      80
      10
      [https://t.co/i6dvIw2x7O]
      1
      []
      0
      [@BabyAnimalPics]
      1
    
    
      14
      @carterreynolds: There's nothing better than a...
      1278
      Mon Feb 15 20:44:33 +0000 2016
      True
      79
      13
      []
      0
      []
      0
      [@carterreynolds]
      1
    
    
      16
      @StockpiIe: I can send you a text at 12:04 &am...
      752
      Mon Feb 15 20:44:33 +0000 2016
      True
      67
      14
      []
      0
      []
      0
      [@StockpiIe]
      1
    
    
      17
      @LaurenJauregui: You are simply alive. https:/...
      8916
      Mon Feb 15 20:44:33 +0000 2016
      True
      62
      6
      [https://t.co/FSzuksBUpQ]
      1
      []
      0
      [@LaurenJauregui]
      1
    
    
      18
      @TheKitchensHeat: Eddie Griffin- on racism in ...
      4361
      Mon Feb 15 20:44:33 +0000 2016
      True
      79
      8
      [https://t.co/tFh5wMGJVb]
      1
      []
      0
      [@TheKitchensHeat]
      1
    
    
      19
      @TheFitGoals: Need https://t.co/NqqXrfPfQw
      1462
      Mon Feb 15 20:44:33 +0000 2016
      True
      42
      3
      [https://t.co/NqqXrfPfQw]
      1
      []
      0
      [@TheFitGoals]
      1
    
    
      20
      @Divergent: Crazy FOUR you, Initiates! Happy V...
      1231
      Mon Feb 15 20:44:33 +0000 2016
      True
      91
      10
      [https://t.co/T4NEOAVOqe]
      1
      []
      0
      [@Divergent]
      1
    
    
      21
      @kanyewest: This is a God dream
      16044
      Mon Feb 15 20:44:33 +0000 2016
      True
      31
      6
      []
      0
      []
      0
      [@kanyewest]
      1
    
    
      22
      @irlsimi: this article is fucking disgusting i...
      10799
      Mon Feb 15 20:48:46 +0000 2016
      True
      130
      21
      [https://t.co/S9g9yDwAYb]
      1
      []
      0
      [@irlsimi]
      1
    
    
      23
      @kanyewest: I   m this generation's Disney    ...
      22244
      Mon Feb 15 20:48:46 +0000 2016
      True
      87
      15
      []
      0
      []
      0
      [@kanyewest]
      1
    
    
      24
      @kanyewest: No matter what level you're at in ...
      46505
      Mon Feb 15 20:48:46 +0000 2016
      True
      76
      14
      []
      0
      []
      0
      [@kanyewest]
      1
    
    
      25
      @daggertattooH: when we heard louis' laugh com...
      549
      Mon Feb 15 20:48:46 +0000 2016
      True
      114
      16
      [https://t.co/AEYqtaXyXp]
      1
      []
      0
      [@daggertattooH]
      1
    
    
      26
      @WORLDSTARVlNE: THIS STILL KILLS ME      https...
      631
      Mon Feb 15 20:48:46 +0000 2016
      True
      64
      6
      [https://t.co/HfYQtEMY9C]
      1
      []
      0
      [@WORLDSTARVlNE]
      1
    
    
      27
      @broken: I like my music at a volume where I c...
      8627
      Mon Feb 15 20:48:46 +0000 2016
      True
      60
      13
      []
      0
      []
      0
      [@broken]
      1
    
    
      28
      @TheGRAMMYs: Congrats Best Pop Vocal Album @ta...
      7028
      Mon Feb 15 20:48:46 +0000 2016
      True
      99
      11
      [https://t.co/6gqbPR2JmW]
      1
      [#GRAMMYs]
      1
      [@TheGRAMMYs, @taylorswift13]
      2
    
    
      29
      @JusticeBlaine: Antonin #Scalia requested crem...
      19949
      Mon Feb 15 20:48:46 +0000 2016
      True
      139
      21
      []
      0
      [#Scalia]
      1
      [@JusticeBlaine]
      1
    
    
      30
      @ArianaGrande: 1 yr since 1 of my fav vids cam...
      4076
      Mon Feb 15 20:48:46 +0000 2016
      True
      143
      25
      [http]
      1
      []
      0
      [@ArianaGrande]
      1
    
    
      31
      @MallowandMarsh: It's #MallowMondays, and this...
      1381
      Mon Feb 15 20:48:46 +0000 2016
      True
      139
      23
      []
      0
      [#MallowMondays, #win]
      2
      [@MallowandMarsh]
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      18588
      Hello Amy Nadine Dix! Thank you for following ...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      120
      19
      []
      0
      []
      0
      []
      0
    
    
      18589
      @Michaelaa155 you can have Braxton https://t.c...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      58
      6
      [https://t.co/oaPvRkyjqe]
      1
      []
      0
      [@Michaelaa155]
      1
    
    
      18590
      Einstein says she is expecting actual results/...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      133
      21
      []
      0
      [#Babylon]
      1
      []
      0
    
    
      18591
      The highest investment fees are the ones you p...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      101
      14
      [https://t.co/RLIVw5p2fp]
      1
      [#money, #lifehacks]
      2
      []
      0
    
    
      18592
      @marciaxthree: @EbonyStarr5 Yes! The Black Pan...
      3
      Wed Feb 17 02:26:37 +0000 2016
      False
      139
      21
      []
      0
      []
      0
      [@marciaxthree, @EbonyStarr5, @michaeljackson]
      3
    
    
      18593
      @shelleyhennig follow me for a minute so I can...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      98
      18
      []
      0
      [#askmalia]
      1
      [@shelleyhennig]
      1
    
    
      18594
      @GIRLSKISSGlRLS: ok I'm gonna do a question th...
      4
      Wed Feb 17 02:26:37 +0000 2016
      False
      102
      16
      []
      0
      []
      0
      [@GIRLSKISSGlRLS]
      1
    
    
      18595
      @yaboijoseph: Got to love you
      1
      Wed Feb 17 02:26:37 +0000 2016
      False
      29
      5
      []
      0
      []
      0
      [@yaboijoseph]
      1
    
    
      18596
      John Newton~ How sweet the name of Jesus sound...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      132
      25
      []
      0
      []
      0
      []
      0
    
    
      18597
      @blakejamieson: Do what you can. https://t.co/...
      4
      Wed Feb 17 02:26:37 +0000 2016
      False
      56
      6
      [https://t.co/WZmJcwhoIU]
      1
      []
      0
      [@blakejamieson]
      1
    
    
      18598
      RT @NelsonsWay: Yaw are so stupid...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      79
      7
      [https://t.co/5yBOW82KDe]
      1
      []
      0
      [@NelsonsWay]
      1
    
    
      18599
      @based_joseph do it now! ! Or I will
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      36
      7
      []
      0
      []
      0
      [@based_joseph]
      1
    
    
      18600
      @thebestrips: Amsterdam in the Fall (2015) htt...
      1
      Wed Feb 17 02:26:37 +0000 2016
      False
      90
      8
      [https://t.co/EDfCZVRq4Y, https://t.co/z91yib7...
      2
      []
      0
      [@thebestrips]
      1
    
    
      18601
      @DaIndieSpot: New #Music     #YouSay By @AllHa...
      1
      Wed Feb 17 02:26:37 +0000 2016
      False
      141
      13
      [https://t.co/DwyxR7n1au, https://t.co/IYsnb]
      2
      [#Music, #YouSay, #DISPromo, #PromoBlast]
      4
      [@DaIndieSpot, @AllHailKao]
      2
    
    
      18602
      @thatnigga_twann stop playing with me nigga , ...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      74
      11
      []
      0
      []
      0
      [@thatnigga_twann]
      1
    
    
      18603
      @GregWQAD: Carter Milem drops in a short put b...
      1
      Wed Feb 17 02:26:37 +0000 2016
      False
      117
      20
      []
      0
      []
      0
      [@GregWQAD, @TheScoreWQAD]
      2
    
    
      18604
      @Kolby_NflBound ehhh, i guess i can be a littl...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      74
      13
      []
      0
      []
      0
      [@Kolby_NflBound]
      1
    
    
      18605
      Haha Nadine Deese...my exact thoughts! https:/...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      62
      6
      [https://t.co/WOh3vwNt6B]
      1
      []
      0
      []
      0
    
    
      18606
      eventhough i jump from fandoms to fandoms but ...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      130
      25
      []
      0
      []
      0
      []
      0
    
    
      18607
      @NepolitanSunday: #StyleSquadBET. Imma go with...
      2
      Wed Feb 17 02:26:37 +0000 2016
      False
      139
      18
      []
      0
      [#StyleSquadBET]
      1
      [@NepolitanSunday, @johnlegend, @chrissyteigen]
      3
    
    
      18608
      what a beautiful little bean louis is !! alway...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      131
      22
      [https://t.co/2qI5Iuu7OQ]
      1
      []
      0
      []
      0
    
    
      18609
      Not sure why, but I find this ridiculously fun...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      85
      10
      [https://t.co/bcjrVNC2Ig]
      1
      []
      0
      []
      0
    
    
      18610
      @_queenkennedy lol no I know your a humble person
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      49
      9
      []
      0
      []
      0
      [@_queenkennedy]
      1
    
    
      18611
      @dwolverton13: Look at how long our hair was. ...
      1
      Wed Feb 17 02:26:37 +0000 2016
      False
      142
      23
      [https://t.co/]
      1
      []
      0
      [@dwolverton13]
      1
    
    
      18612
      @AmazonUK how early do orders have to be made ...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      93
      19
      []
      0
      []
      0
      [@AmazonUK]
      1
    
    
      18613
      @KaliMarcum: Multipurpose Portable Pet Soft Cr...
      2
      Wed Feb 17 02:26:37 +0000 2016
      False
      139
      15
      [https://t.co/yoqk1t6kGK]
      1
      [#Dogs, #Outdoor]
      2
      [@KaliMarcum]
      1
    
    
      18614
      A Gn text would be nice             or a Gm te...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      61
      14
      []
      0
      []
      0
      []
      0
    
    
      18615
      @snbilaj you hate alternative. Remember?
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      40
      5
      []
      0
      []
      0
      [@snbilaj]
      1
    
    
      18616
      @BellaTwins it was painful for me to sit there...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      140
      22
      []
      0
      []
      0
      [@BellaTwins]
      1
    
    
      18617
      The weight of the Administration   s gargantua...
      0
      Wed Feb 17 02:26:37 +0000 2016
      False
      138
      22
      []
      0
      []
      0
      []
      0
    
  

22706 rows × 12 columns



In [ ]:

	_text	_rt_count	_tweet_datetime	_popular	_char_count	_word_count	_urls	_url_count	_hashtags	_hashtag_count	_mentions	_mentions_count
0	@CringeLMAO: Easy there m8 https://t.co/dnF3Wq...	2084	Mon Feb 15 20:44:33 +0000 2016	True	50	5	[https://t.co/dnF3Wqdt1C]	1	[]	0	[@CringeLMAO]	1
1	@AustinMahone: Just posted a photo https://t.c...	1059	Mon Feb 15 20:44:33 +0000 2016	True	58	6	[https://t.co/hXFg6TyuzE]	1	[]	0	[@AustinMahone]	1
2	@Ashton5SOS: Some days I drink way to much cof...	24121	Mon Feb 15 20:44:33 +0000 2016	True	136	24	[]	0	[]	0	[@Ashton5SOS]	1
3	@lailamuhammad: When you nail that #Beyonc m...	801	Mon Feb 15 20:44:33 +0000 2016	True	140	21	[]	0	[#Beyonc, #slay]	2	[@lailamuhammad, @MarqCotton, @BuckeyeBond83, ...	4
4	@BDBANDS: MOOD https://t.co/NMlFBJZtic	1856	Mon Feb 15 20:44:33 +0000 2016	True	46	3	[https://t.co/NMlFBJZtic]	1	[]	0	[@BDBANDS]	1
5	@TheGRAMMYs: Congrats Best Pop Vocal Album @ta...	3747	Mon Feb 15 20:44:33 +0000 2016	True	99	11	[https://t.co/6gqbPR2JmW]	1	[#GRAMMYs]	1	[@TheGRAMMYs, @taylorswift13]	2
6	@taylorcaniff: Never mind I'm snowed in again ...	1961	Mon Feb 15 20:44:33 +0000 2016	True	67	11	[]	0	[]	0	[@taylorcaniff]	1
7	@Ashton5SOS: But this is the obvious reason I ...	21948	Mon Feb 15 20:44:33 +0000 2016	True	102	19	[]	0	[]	0	[@Ashton5SOS]	1
9	@FemaleTexts: February 15th?? You mean annoy s...	2426	Mon Feb 15 20:44:33 +0000 2016	True	83	9	[https://t.co/k06HAUNeoM]	1	[]	0	[@FemaleTexts]	1
10	@AustinMahone: Maybe I'll make a country song ...	1826	Mon Feb 15 20:44:33 +0000 2016	True	51	8	[]	0	[]	0	[@AustinMahone]	1
11	@DrakeBible_: this is the warriors reporter om...	2659	Mon Feb 15 20:44:33 +0000 2016	True	100	13	[https://t.co/sS09KSOXOB]	1	[]	0	[@DrakeBible_]	1
12	@CuteEmergency: "I'm okay!" https://t.co/TWMwj...	617	Mon Feb 15 20:44:33 +0000 2016	True	51	4	[https://t.co/TWMwjG03Fd]	1	[]	0	[@CuteEmergency]	1
13	@BabyAnimalPics: LOOK HOW HE JUST SHAKES OFF H...	12067	Mon Feb 15 20:44:33 +0000 2016	True	80	10	[https://t.co/i6dvIw2x7O]	1	[]	0	[@BabyAnimalPics]	1
14	@carterreynolds: There's nothing better than a...	1278	Mon Feb 15 20:44:33 +0000 2016	True	79	13	[]	0	[]	0	[@carterreynolds]	1
16	@StockpiIe: I can send you a text at 12:04 &am...	752	Mon Feb 15 20:44:33 +0000 2016	True	67	14	[]	0	[]	0	[@StockpiIe]	1
17	@LaurenJauregui: You are simply alive. https:/...	8916	Mon Feb 15 20:44:33 +0000 2016	True	62	6	[https://t.co/FSzuksBUpQ]	1	[]	0	[@LaurenJauregui]	1
18	@TheKitchensHeat: Eddie Griffin- on racism in ...	4361	Mon Feb 15 20:44:33 +0000 2016	True	79	8	[https://t.co/tFh5wMGJVb]	1	[]	0	[@TheKitchensHeat]	1
19	@TheFitGoals: Need https://t.co/NqqXrfPfQw	1462	Mon Feb 15 20:44:33 +0000 2016	True	42	3	[https://t.co/NqqXrfPfQw]	1	[]	0	[@TheFitGoals]	1
20	@Divergent: Crazy FOUR you, Initiates! Happy V...	1231	Mon Feb 15 20:44:33 +0000 2016	True	91	10	[https://t.co/T4NEOAVOqe]	1	[]	0	[@Divergent]	1
21	@kanyewest: This is a God dream	16044	Mon Feb 15 20:44:33 +0000 2016	True	31	6	[]	0	[]	0	[@kanyewest]	1
22	@irlsimi: this article is fucking disgusting i...	10799	Mon Feb 15 20:48:46 +0000 2016	True	130	21	[https://t.co/S9g9yDwAYb]	1	[]	0	[@irlsimi]	1
23	@kanyewest: I m this generation's Disney ...	22244	Mon Feb 15 20:48:46 +0000 2016	True	87	15	[]	0	[]	0	[@kanyewest]	1
24	@kanyewest: No matter what level you're at in ...	46505	Mon Feb 15 20:48:46 +0000 2016	True	76	14	[]	0	[]	0	[@kanyewest]	1
25	@daggertattooH: when we heard louis' laugh com...	549	Mon Feb 15 20:48:46 +0000 2016	True	114	16	[https://t.co/AEYqtaXyXp]	1	[]	0	[@daggertattooH]	1
26	@WORLDSTARVlNE: THIS STILL KILLS ME https...	631	Mon Feb 15 20:48:46 +0000 2016	True	64	6	[https://t.co/HfYQtEMY9C]	1	[]	0	[@WORLDSTARVlNE]	1
27	@broken: I like my music at a volume where I c...	8627	Mon Feb 15 20:48:46 +0000 2016	True	60	13	[]	0	[]	0	[@broken]	1
28	@TheGRAMMYs: Congrats Best Pop Vocal Album @ta...	7028	Mon Feb 15 20:48:46 +0000 2016	True	99	11	[https://t.co/6gqbPR2JmW]	1	[#GRAMMYs]	1	[@TheGRAMMYs, @taylorswift13]	2
29	@JusticeBlaine: Antonin #Scalia requested crem...	19949	Mon Feb 15 20:48:46 +0000 2016	True	139	21	[]	0	[#Scalia]	1	[@JusticeBlaine]	1
30	@ArianaGrande: 1 yr since 1 of my fav vids cam...	4076	Mon Feb 15 20:48:46 +0000 2016	True	143	25	[http]	1	[]	0	[@ArianaGrande]	1
31	@MallowandMarsh: It's #MallowMondays, and this...	1381	Mon Feb 15 20:48:46 +0000 2016	True	139	23	[]	0	[#MallowMondays, #win]	2	[@MallowandMarsh]	1
...	...	...	...	...	...	...	...	...	...	...	...	...
18588	Hello Amy Nadine Dix! Thank you for following ...	0	Wed Feb 17 02:26:37 +0000 2016	False	120	19	[]	0	[]	0	[]	0
18589	@Michaelaa155 you can have Braxton https://t.c...	0	Wed Feb 17 02:26:37 +0000 2016	False	58	6	[https://t.co/oaPvRkyjqe]	1	[]	0	[@Michaelaa155]	1
18590	Einstein says she is expecting actual results/...	0	Wed Feb 17 02:26:37 +0000 2016	False	133	21	[]	0	[#Babylon]	1	[]	0
18591	The highest investment fees are the ones you p...	0	Wed Feb 17 02:26:37 +0000 2016	False	101	14	[https://t.co/RLIVw5p2fp]	1	[#money, #lifehacks]	2	[]	0
18592	@marciaxthree: @EbonyStarr5 Yes! The Black Pan...	3	Wed Feb 17 02:26:37 +0000 2016	False	139	21	[]	0	[]	0	[@marciaxthree, @EbonyStarr5, @michaeljackson]	3
18593	@shelleyhennig follow me for a minute so I can...	0	Wed Feb 17 02:26:37 +0000 2016	False	98	18	[]	0	[#askmalia]	1	[@shelleyhennig]	1
18594	@GIRLSKISSGlRLS: ok I'm gonna do a question th...	4	Wed Feb 17 02:26:37 +0000 2016	False	102	16	[]	0	[]	0	[@GIRLSKISSGlRLS]	1
18595	@yaboijoseph: Got to love you	1	Wed Feb 17 02:26:37 +0000 2016	False	29	5	[]	0	[]	0	[@yaboijoseph]	1
18596	John Newton~ How sweet the name of Jesus sound...	0	Wed Feb 17 02:26:37 +0000 2016	False	132	25	[]	0	[]	0	[]	0
18597	@blakejamieson: Do what you can. https://t.co/...	4	Wed Feb 17 02:26:37 +0000 2016	False	56	6	[https://t.co/WZmJcwhoIU]	1	[]	0	[@blakejamieson]	1
18598	RT @NelsonsWay: Yaw are so stupid...	0	Wed Feb 17 02:26:37 +0000 2016	False	79	7	[https://t.co/5yBOW82KDe]	1	[]	0	[@NelsonsWay]	1
18599	@based_joseph do it now! ! Or I will	0	Wed Feb 17 02:26:37 +0000 2016	False	36	7	[]	0	[]	0	[@based_joseph]	1
18600	@thebestrips: Amsterdam in the Fall (2015) htt...	1	Wed Feb 17 02:26:37 +0000 2016	False	90	8	[https://t.co/EDfCZVRq4Y, https://t.co/z91yib7...	2	[]	0	[@thebestrips]	1
18601	@DaIndieSpot: New #Music #YouSay By @AllHa...	1	Wed Feb 17 02:26:37 +0000 2016	False	141	13	[https://t.co/DwyxR7n1au, https://t.co/IYsnb]	2	[#Music, #YouSay, #DISPromo, #PromoBlast]	4	[@DaIndieSpot, @AllHailKao]	2
18602	@thatnigga_twann stop playing with me nigga , ...	0	Wed Feb 17 02:26:37 +0000 2016	False	74	11	[]	0	[]	0	[@thatnigga_twann]	1
18603	@GregWQAD: Carter Milem drops in a short put b...	1	Wed Feb 17 02:26:37 +0000 2016	False	117	20	[]	0	[]	0	[@GregWQAD, @TheScoreWQAD]	2
18604	@Kolby_NflBound ehhh, i guess i can be a littl...	0	Wed Feb 17 02:26:37 +0000 2016	False	74	13	[]	0	[]	0	[@Kolby_NflBound]	1
18605	Haha Nadine Deese...my exact thoughts! https:/...	0	Wed Feb 17 02:26:37 +0000 2016	False	62	6	[https://t.co/WOh3vwNt6B]	1	[]	0	[]	0
18606	eventhough i jump from fandoms to fandoms but ...	0	Wed Feb 17 02:26:37 +0000 2016	False	130	25	[]	0	[]	0	[]	0
18607	@NepolitanSunday: #StyleSquadBET. Imma go with...	2	Wed Feb 17 02:26:37 +0000 2016	False	139	18	[]	0	[#StyleSquadBET]	1	[@NepolitanSunday, @johnlegend, @chrissyteigen]	3
18608	what a beautiful little bean louis is !! alway...	0	Wed Feb 17 02:26:37 +0000 2016	False	131	22	[https://t.co/2qI5Iuu7OQ]	1	[]	0	[]	0
18609	Not sure why, but I find this ridiculously fun...	0	Wed Feb 17 02:26:37 +0000 2016	False	85	10	[https://t.co/bcjrVNC2Ig]	1	[]	0	[]	0
18610	@_queenkennedy lol no I know your a humble person	0	Wed Feb 17 02:26:37 +0000 2016	False	49	9	[]	0	[]	0	[@_queenkennedy]	1
18611	@dwolverton13: Look at how long our hair was. ...	1	Wed Feb 17 02:26:37 +0000 2016	False	142	23	[https://t.co/]	1	[]	0	[@dwolverton13]	1
18612	@AmazonUK how early do orders have to be made ...	0	Wed Feb 17 02:26:37 +0000 2016	False	93	19	[]	0	[]	0	[@AmazonUK]	1
18613	@KaliMarcum: Multipurpose Portable Pet Soft Cr...	2	Wed Feb 17 02:26:37 +0000 2016	False	139	15	[https://t.co/yoqk1t6kGK]	1	[#Dogs, #Outdoor]	2	[@KaliMarcum]	1
18614	A Gn text would be nice or a Gm te...	0	Wed Feb 17 02:26:37 +0000 2016	False	61	14	[]	0	[]	0	[]	0
18615	@snbilaj you hate alternative. Remember?	0	Wed Feb 17 02:26:37 +0000 2016	False	40	5	[]	0	[]	0	[@snbilaj]	1
18616	@BellaTwins it was painful for me to sit there...	0	Wed Feb 17 02:26:37 +0000 2016	False	140	22	[]	0	[]	0	[@BellaTwins]	1
18617	The weight of the Administration s gargantua...	0	Wed Feb 17 02:26:37 +0000 2016	False	138	22	[]	0	[]	0	[]	0