Replication for results in Davidson et al. 2017. "Automated Hate Speech Detection and the Problem of Offensive Language"



In [1]:

    
import pandas as pd
import numpy as np
import pickle
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.stem.porter import *
import string
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as VS
from textstat.textstat import *
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline

Loading the data



In [2]:

    
df = pd.read_csv("../data/labeled_data.csv")



In [3]:

    
df









    Out[3]:







  
    
      
      Unnamed: 0
      count
      hate_speech
      offensive_language
      neither
      class
      tweet
    
  
  
    
      0
      0
      3
      0
      0
      3
      2
      !!! RT @mayasolovely: As a woman you shouldn't...
    
    
      1
      1
      3
      0
      3
      0
      1
      !!!!! RT @mleew17: boy dats cold...tyga dwn ba...
    
    
      2
      2
      3
      0
      3
      0
      1
      !!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
    
    
      3
      3
      3
      0
      2
      1
      1
      !!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
    
    
      4
      4
      6
      0
      6
      0
      1
      !!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
    
    
      5
      5
      3
      1
      2
      0
      1
      !!!!!!!!!!!!!!!!!!"@T_Madison_x: The shit just...
    
    
      6
      6
      3
      0
      3
      0
      1
      !!!!!!"@__BrighterDays: I can not just sit up ...
    
    
      7
      7
      3
      0
      3
      0
      1
      !!!!&#8220;@selfiequeenbri: cause I'm tired of...
    
    
      8
      8
      3
      0
      3
      0
      1
      " &amp; you might not get ya bitch back &amp; ...
    
    
      9
      9
      3
      1
      2
      0
      1
      " @rhythmixx_ :hobbies include: fighting Maria...
    
    
      10
      10
      3
      0
      3
      0
      1
      " Keeks is a bitch she curves everyone " lol I...
    
    
      11
      11
      3
      0
      3
      0
      1
      " Murda Gang bitch its Gang Land "
    
    
      12
      12
      3
      0
      2
      1
      1
      " So hoes that smoke are losers ? " yea ... go...
    
    
      13
      13
      3
      0
      3
      0
      1
      " bad bitches is the only thing that i like "
    
    
      14
      14
      3
      1
      2
      0
      1
      " bitch get up off me "
    
    
      15
      15
      3
      0
      3
      0
      1
      " bitch nigga miss me with it "
    
    
      16
      16
      3
      0
      3
      0
      1
      " bitch plz whatever "
    
    
      17
      17
      3
      1
      2
      0
      1
      " bitch who do you love "
    
    
      18
      18
      3
      0
      3
      0
      1
      " bitches get cut off everyday B "
    
    
      19
      19
      3
      0
      3
      0
      1
      " black bottle &amp; a bad bitch "
    
    
      20
      20
      3
      0
      3
      0
      1
      " broke bitch cant tell me nothing "
    
    
      21
      21
      3
      0
      3
      0
      1
      " cancel that bitch like Nino "
    
    
      22
      22
      3
      0
      3
      0
      1
      " cant you see these hoes wont change "
    
    
      23
      23
      3
      0
      3
      0
      1
      " fuck no that bitch dont even suck dick " &#1...
    
    
      24
      24
      3
      0
      3
      0
      1
      " got ya bitch tip toeing on my hardwood floor...
    
    
      25
      25
      3
      0
      2
      1
      1
      " her pussy lips like Heaven doors " &#128524;
    
    
      26
      26
      3
      0
      3
      0
      1
      " hoe what its hitting for "
    
    
      27
      27
      3
      0
      3
      0
      1
      " i met that pussy on Ocean Dr . i gave that p...
    
    
      28
      28
      3
      0
      3
      0
      1
      " i need a trippy bitch who fuck on Hennessy "
    
    
      29
      29
      3
      0
      3
      0
      1
      " i spend my money how i want bitch its my bus...
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      24753
      25266
      3
      1
      2
      0
      1
      you ain't gotta be a dyke to like hoes
    
    
      24754
      25267
      3
      0
      3
      0
      1
      you are a hoe, hoe, &amp; a hoe.
    
    
      24755
      25268
      3
      0
      3
      0
      1
      you bitches love yall some corny nigga
    
    
      24756
      25269
      3
      0
      3
      0
      1
      you can masturbate anytime bitch lol &#8220;@g...
    
    
      24757
      25270
      3
      0
      3
      0
      1
      you can never get a group of hoes together wit...
    
    
      24758
      25271
      3
      0
      3
      0
      1
      you can tell when dick recently been in a puss...
    
    
      24759
      25272
      3
      0
      3
      0
      1
      you can't cuff a hoe lmao
    
    
      24760
      25273
      3
      0
      2
      1
      1
      you drove me redneck crazy
    
    
      24761
      25274
      3
      0
      3
      0
      1
      you fake niggah lolol
    
    
      24762
      25275
      3
      1
      2
      0
      1
      you got niggas, and i got bitches.
    
    
      24763
      25276
      3
      0
      2
      1
      1
      you gotta be a new breed of retarded if you do...
    
    
      24764
      25277
      3
      0
      3
      0
      1
      you gotta understand that these bitches are ch...
    
    
      24765
      25278
      3
      0
      3
      0
      1
      you hoe spice
    
    
      24766
      25279
      3
      0
      3
      0
      1
      you just want some attention hoe
    
    
      24767
      25280
      3
      0
      1
      2
      2
      you know what they say, the early bird gets th...
    
    
      24768
      25281
      3
      0
      3
      0
      1
      you know what your doing when you favorite a t...
    
    
      24769
      25282
      3
      0
      3
      0
      1
      you lil dumb ass bitch, i ain't fuckin wit chu...
    
    
      24770
      25283
      3
      0
      3
      0
      1
      you look like AC Green...bitch don't call here...
    
    
      24771
      25284
      3
      0
      3
      0
      1
      you look like your 12 stop talking about fucki...
    
    
      24772
      25285
      3
      0
      3
      0
      1
      you might as well gone pussy pop on a stage
    
    
      24773
      25286
      3
      1
      2
      0
      1
      you niggers cheat on ya gf's? smh....
    
    
      24774
      25287
      3
      0
      3
      0
      1
      you really care bout dis bitch. my dick all in...
    
    
      24775
      25288
      3
      0
      3
      0
      1
      you worried bout other bitches, you need me for?
    
    
      24776
      25289
      3
      3
      0
      0
      0
      you're all niggers
    
    
      24777
      25290
      3
      2
      1
      0
      0
      you're such a retard i hope you get type 2 dia...
    
    
      24778
      25291
      3
      0
      2
      1
      1
      you's a muthaf***in lie &#8220;@LifeAsKing: @2...
    
    
      24779
      25292
      3
      0
      1
      2
      2
      you've gone and broke the wrong heart baby, an...
    
    
      24780
      25294
      3
      0
      3
      0
      1
      young buck wanna eat!!.. dat nigguh like I ain...
    
    
      24781
      25295
      6
      0
      6
      0
      1
      youu got wild bitches tellin you lies
    
    
      24782
      25296
      3
      0
      0
      3
      2
      ~~Ruffled | Ntac Eileen Dahlia - Beautiful col...
    
  

24783 rows × 7 columns



In [4]:

    
df.describe()









    Out[4]:







  
    
      
      Unnamed: 0
      count
      hate_speech
      offensive_language
      neither
      class
    
  
  
    
      count
      24783.000000
      24783.000000
      24783.000000
      24783.000000
      24783.000000
      24783.000000
    
    
      mean
      12681.192027
      3.243473
      0.280515
      2.413711
      0.549247
      1.110277
    
    
      std
      7299.553863
      0.883060
      0.631851
      1.399459
      1.113299
      0.462089
    
    
      min
      0.000000
      3.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      25%
      6372.500000
      3.000000
      0.000000
      2.000000
      0.000000
      1.000000
    
    
      50%
      12703.000000
      3.000000
      0.000000
      3.000000
      0.000000
      1.000000
    
    
      75%
      18995.500000
      3.000000
      0.000000
      3.000000
      0.000000
      1.000000
    
    
      max
      25296.000000
      9.000000
      7.000000
      9.000000
      9.000000
      2.000000



In [5]:

    
df.columns









    Out[5]:





Index(['Unnamed: 0', 'count', 'hate_speech', 'offensive_language', 'neither',
       'class', 'tweet'],
      dtype='object')

Columns key:

count = number of CrowdFlower users who coded each tweet (min is 3, sometimes more users coded a tweet when judgments were determined to be unreliable by CF).

hate_speech = number of CF users who judged the tweet to be hate speech.

offensive_language = number of CF users who judged the tweet to be offensive.

neither = number of CF users who judged the tweet to be neither offensive nor non-offensive.

class = class label for majority of CF users.

0 - hate speech
1 - offensive  language
2 - neither

tweet = raw tweet text



In [6]:

    
df['class'].hist()









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x1108a1208>

This histogram shows the imbalanced nature of the task - most tweets containing "hate" words as defined by Hatebase were only considered to be offensive by the CF coders. More tweets were considered to be neither hate speech nor offensive language than were considered hate speech.



In [7]:

    
tweets=df.tweet

Feature generation



In [8]:

    
stopwords=stopwords = nltk.corpus.stopwords.words("english")

other_exclusions = ["#ff", "ff", "rt"]
stopwords.extend(other_exclusions)

stemmer = PorterStemmer()


def preprocess(text_string):
    """
    Accepts a text string and replaces:
    1) urls with URLHERE
    2) lots of whitespace with one instance
    3) mentions with MENTIONHERE

    This allows us to get standardized counts of urls and mentions
    Without caring about specific people mentioned
    """
    space_pattern = '\s+'
    giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
        '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    mention_regex = '@[\w\-]+'
    parsed_text = re.sub(space_pattern, ' ', text_string)
    parsed_text = re.sub(giant_url_regex, '', parsed_text)
    parsed_text = re.sub(mention_regex, '', parsed_text)
    return parsed_text

def tokenize(tweet):
    """Removes punctuation & excess whitespace, sets to lowercase,
    and stems tweets. Returns a list of stemmed tokens."""
    tweet = " ".join(re.split("[^a-zA-Z]*", tweet.lower())).strip()
    tokens = [stemmer.stem(t) for t in tweet.split()]
    return tokens

def basic_tokenize(tweet):
    """Same as tokenize but without the stemming"""
    tweet = " ".join(re.split("[^a-zA-Z.,!?]*", tweet.lower())).strip()
    return tweet.split()

vectorizer = TfidfVectorizer(
    tokenizer=tokenize,
    preprocessor=preprocess,
    ngram_range=(1, 3),
    stop_words=stopwords,
    use_idf=True,
    smooth_idf=False,
    norm=None,
    decode_error='replace',
    max_features=10000,
    min_df=5,
    max_df=0.75
    )



In [9]:

    
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)



In [10]:

    
#Construct tfidf matrix and get relevant scores
tfidf = vectorizer.fit_transform(tweets).toarray()
vocab = {v:i for i, v in enumerate(vectorizer.get_feature_names())}
idf_vals = vectorizer.idf_
idf_dict = {i:idf_vals[i] for i in vocab.values()} #keys are indices; values are IDF scores



In [11]:

    
#Get POS tags for tweets and save as a string
tweet_tags = []
for t in tweets:
    tokens = basic_tokenize(preprocess(t))
    tags = nltk.pos_tag(tokens)
    tag_list = [x[1] for x in tags]
    tag_str = " ".join(tag_list)
    tweet_tags.append(tag_str)



In [12]:

    
#We can use the TFIDF vectorizer to get a token matrix for the POS tags
pos_vectorizer = TfidfVectorizer(
    tokenizer=None,
    lowercase=False,
    preprocessor=None,
    ngram_range=(1, 3),
    stop_words=None,
    use_idf=False,
    smooth_idf=False,
    norm=None,
    decode_error='replace',
    max_features=5000,
    min_df=5,
    max_df=0.75,
    )



In [13]:

    
#Construct POS TF matrix and get vocab dict
pos = pos_vectorizer.fit_transform(pd.Series(tweet_tags)).toarray()
pos_vocab = {v:i for i, v in enumerate(pos_vectorizer.get_feature_names())}



In [14]:

    
#Now get other features
sentiment_analyzer = VS()

def count_twitter_objs(text_string):
    """
    Accepts a text string and replaces:
    1) urls with URLHERE
    2) lots of whitespace with one instance
    3) mentions with MENTIONHERE
    4) hashtags with HASHTAGHERE

    This allows us to get standardized counts of urls and mentions
    Without caring about specific people mentioned.
    
    Returns counts of urls, mentions, and hashtags.
    """
    space_pattern = '\s+'
    giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
        '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    mention_regex = '@[\w\-]+'
    hashtag_regex = '#[\w\-]+'
    parsed_text = re.sub(space_pattern, ' ', text_string)
    parsed_text = re.sub(giant_url_regex, 'URLHERE', parsed_text)
    parsed_text = re.sub(mention_regex, 'MENTIONHERE', parsed_text)
    parsed_text = re.sub(hashtag_regex, 'HASHTAGHERE', parsed_text)
    return(parsed_text.count('URLHERE'),parsed_text.count('MENTIONHERE'),parsed_text.count('HASHTAGHERE'))

def other_features(tweet):
    """This function takes a string and returns a list of features.
    These include Sentiment scores, Text and Readability scores,
    as well as Twitter specific features"""
    sentiment = sentiment_analyzer.polarity_scores(tweet)
    
    words = preprocess(tweet) #Get text only
    
    syllables = textstat.syllable_count(words)
    num_chars = sum(len(w) for w in words)
    num_chars_total = len(tweet)
    num_terms = len(tweet.split())
    num_words = len(words.split())
    avg_syl = round(float((syllables+0.001))/float(num_words+0.001),4)
    num_unique_terms = len(set(words.split()))
    
    ###Modified FK grade, where avg words per sentence is just num words/1
    FKRA = round(float(0.39 * float(num_words)/1.0) + float(11.8 * avg_syl) - 15.59,1)
    ##Modified FRE score, where sentence fixed to 1
    FRE = round(206.835 - 1.015*(float(num_words)/1.0) - (84.6*float(avg_syl)),2)
    
    twitter_objs = count_twitter_objs(tweet)
    retweet = 0
    if "rt" in words:
        retweet = 1
    features = [FKRA, FRE,syllables, avg_syl, num_chars, num_chars_total, num_terms, num_words,
                num_unique_terms, sentiment['neg'], sentiment['pos'], sentiment['neu'], sentiment['compound'],
                twitter_objs[2], twitter_objs[1],
                twitter_objs[0], retweet]
    #features = pandas.DataFrame(features)
    return features

def get_feature_array(tweets):
    feats=[]
    for t in tweets:
        feats.append(other_features(t))
    return np.array(feats)



In [15]:

    
other_features_names = ["FKRA", "FRE","num_syllables", "avg_syl_per_word", "num_chars", "num_chars_total", \
                        "num_terms", "num_words", "num_unique_words", "vader neg","vader pos","vader neu", \
                        "vader compound", "num_hashtags", "num_mentions", "num_urls", "is_retweet"]



In [16]:

    
feats = get_feature_array(tweets)



In [17]:

    
#Now join them all up
M = np.concatenate([tfidf,pos,feats],axis=1)



In [18]:

    
M.shape









    Out[18]:





(24783, 11172)



In [19]:

    
#Finally get a list of variable names
variables = ['']*len(vocab)
for k,v in vocab.items():
    variables[v] = k

pos_variables = ['']*len(pos_vocab)
for k,v in pos_vocab.items():
    pos_variables[v] = k

feature_names = variables+pos_variables+other_features_names

Running the model

The best model was selected using a GridSearch with 5-fold CV.



In [20]:

    
X = pd.DataFrame(M)
y = df['class'].astype(int)



In [21]:

    
from sklearn.model_selection import train_test_split



In [51]:

    
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.1)



In [52]:

    
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline



In [53]:

    
pipe = Pipeline(
        [('select', SelectFromModel(LogisticRegression(class_weight='balanced',
                                                  penalty="l1", C=0.01))),
        ('model', LogisticRegression(class_weight='balanced',penalty='l2'))])



In [54]:

    
param_grid = [{}] # Optionally add parameters here



In [55]:

    
grid_search = GridSearchCV(pipe, 
                           param_grid,
                           cv=StratifiedKFold(n_splits=5, 
                                              random_state=42).split(X_train, y_train), 
                           verbose=2)



In [56]:

    
model = grid_search.fit(X_train, y_train)









    



Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV]  ................................................................
[CV] ................................................. , total=   8.0s
[CV]  ................................................................






    



[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.4s remaining:    0.0s






    



[CV] ................................................. , total=   4.7s
[CV]  ................................................................
[CV] ................................................. , total=   4.8s
[CV]  ................................................................
[CV] ................................................. , total=   5.6s
[CV]  ................................................................
[CV] ................................................. , total=   4.1s






    



[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   29.2s finished



In [57]:

    
y_preds = model.predict(X_test)

Evaluating the results



In [58]:

    
report = classification_report( y_test, y_preds )

Note: Results in paper are from best model retrained on the entire dataset (see the other notebook). Here the results are reported after using cross-validation and only for the held-out set.



In [61]:

    
print(report)









    



             precision    recall  f1-score   support

          0       0.44      0.59      0.51       164
          1       0.96      0.91      0.93      1905
          2       0.83      0.94      0.88       410

avg / total       0.91      0.89      0.90      2479



In [62]:

    
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test,y_preds)
matrix_proportions = np.zeros((3,3))
for i in range(0,3):
    matrix_proportions[i,:] = confusion_matrix[i,:]/float(confusion_matrix[i,:].sum())
names=['Hate','Offensive','Neither']
confusion_df = pd.DataFrame(matrix_proportions, index=names,columns=names)
plt.figure(figsize=(5,5))
seaborn.heatmap(confusion_df,annot=True,annot_kws={"size": 12},cmap='gist_gray_r',cbar=False, square=True,fmt='.2f')
plt.ylabel(r'True categories',fontsize=14)
plt.xlabel(r'Predicted categories',fontsize=14)
plt.tick_params(labelsize=12)

#Uncomment line below if you want to save the output
#plt.savefig('confusion.pdf')



In [63]:

    
#True distribution
y.hist()









    Out[63]:





<matplotlib.axes._subplots.AxesSubplot at 0x1116a7c50>



In [64]:

    
pd.Series(y_preds).hist()









    Out[64]:





<matplotlib.axes._subplots.AxesSubplot at 0x114cd3dd8>



In [ ]:



In [ ]:



In [ ]:

	Unnamed: 0	count	hate_speech	offensive_language	neither	class	tweet
0	0	3	0	0	3	2	!!! RT @mayasolovely: As a woman you shouldn't...
1	1	3	0	3	0	1	!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2	2	3	0	3	0	1	!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3	3	3	0	2	1	1	!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4	4	6	0	6	0	1	!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...
5	5	3	1	2	0	1	!!!!!!!!!!!!!!!!!!"@T_Madison_x: The shit just...
6	6	3	0	3	0	1	!!!!!!"@__BrighterDays: I can not just sit up ...
7	7	3	0	3	0	1	!!!!“@selfiequeenbri: cause I'm tired of...
8	8	3	0	3	0	1	" & you might not get ya bitch back & ...
9	9	3	1	2	0	1	" @rhythmixx_ :hobbies include: fighting Maria...
10	10	3	0	3	0	1	" Keeks is a bitch she curves everyone " lol I...
11	11	3	0	3	0	1	" Murda Gang bitch its Gang Land "
12	12	3	0	2	1	1	" So hoes that smoke are losers ? " yea ... go...
13	13	3	0	3	0	1	" bad bitches is the only thing that i like "
14	14	3	1	2	0	1	" bitch get up off me "
15	15	3	0	3	0	1	" bitch nigga miss me with it "
16	16	3	0	3	0	1	" bitch plz whatever "
17	17	3	1	2	0	1	" bitch who do you love "
18	18	3	0	3	0	1	" bitches get cut off everyday B "
19	19	3	0	3	0	1	" black bottle & a bad bitch "
20	20	3	0	3	0	1	" broke bitch cant tell me nothing "
21	21	3	0	3	0	1	" cancel that bitch like Nino "
22	22	3	0	3	0	1	" cant you see these hoes wont change "
23	23	3	0	3	0	1	" fuck no that bitch dont even suck dick " &#1...
24	24	3	0	3	0	1	" got ya bitch tip toeing on my hardwood floor...
25	25	3	0	2	1	1	" her pussy lips like Heaven doors " 😌
26	26	3	0	3	0	1	" hoe what its hitting for "
27	27	3	0	3	0	1	" i met that pussy on Ocean Dr . i gave that p...
28	28	3	0	3	0	1	" i need a trippy bitch who fuck on Hennessy "
29	29	3	0	3	0	1	" i spend my money how i want bitch its my bus...
...	...	...	...	...	...	...	...
24753	25266	3	1	2	0	1	you ain't gotta be a dyke to like hoes
24754	25267	3	0	3	0	1	you are a hoe, hoe, & a hoe.
24755	25268	3	0	3	0	1	you bitches love yall some corny nigga
24756	25269	3	0	3	0	1	you can masturbate anytime bitch lol “@g...
24757	25270	3	0	3	0	1	you can never get a group of hoes together wit...
24758	25271	3	0	3	0	1	you can tell when dick recently been in a puss...
24759	25272	3	0	3	0	1	you can't cuff a hoe lmao
24760	25273	3	0	2	1	1	you drove me redneck crazy
24761	25274	3	0	3	0	1	you fake niggah lolol
24762	25275	3	1	2	0	1	you got niggas, and i got bitches.
24763	25276	3	0	2	1	1	you gotta be a new breed of retarded if you do...
24764	25277	3	0	3	0	1	you gotta understand that these bitches are ch...
24765	25278	3	0	3	0	1	you hoe spice
24766	25279	3	0	3	0	1	you just want some attention hoe
24767	25280	3	0	1	2	2	you know what they say, the early bird gets th...
24768	25281	3	0	3	0	1	you know what your doing when you favorite a t...
24769	25282	3	0	3	0	1	you lil dumb ass bitch, i ain't fuckin wit chu...
24770	25283	3	0	3	0	1	you look like AC Green...bitch don't call here...
24771	25284	3	0	3	0	1	you look like your 12 stop talking about fucki...
24772	25285	3	0	3	0	1	you might as well gone pussy pop on a stage
24773	25286	3	1	2	0	1	you niggers cheat on ya gf's? smh....
24774	25287	3	0	3	0	1	you really care bout dis bitch. my dick all in...
24775	25288	3	0	3	0	1	you worried bout other bitches, you need me for?
24776	25289	3	3	0	0	0	you're all niggers
24777	25290	3	2	1	0	0	you're such a retard i hope you get type 2 dia...
24778	25291	3	0	2	1	1	you's a muthaf***in lie “@LifeAsKing: @2...
24779	25292	3	0	1	2	2	you've gone and broke the wrong heart baby, an...
24780	25294	3	0	3	0	1	young buck wanna eat!!.. dat nigguh like I ain...
24781	25295	6	0	6	0	1	youu got wild bitches tellin you lies
24782	25296	3	0	0	3	2	~~Ruffled \| Ntac Eileen Dahlia - Beautiful col...

	Unnamed: 0	count	hate_speech	offensive_language	neither	class
count	24783.000000	24783.000000	24783.000000	24783.000000	24783.000000	24783.000000
mean	12681.192027	3.243473	0.280515	2.413711	0.549247	1.110277
std	7299.553863	0.883060	0.631851	1.399459	1.113299	0.462089
min	0.000000	3.000000	0.000000	0.000000	0.000000	0.000000
25%	6372.500000	3.000000	0.000000	2.000000	0.000000	1.000000
50%	12703.000000	3.000000	0.000000	3.000000	0.000000	1.000000
75%	18995.500000	3.000000	0.000000	3.000000	0.000000	1.000000
max	25296.000000	9.000000	7.000000	9.000000	9.000000	2.000000