In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 3)
plt.rcParams['font.family'] = 'sans-serif'
pd.set_option('display.width', 5000)
pd.set_option('display.max_columns', 60)
In [2]:
HC_baseline = pd.read_csv('./BASELINE/HC_baseline_full_ratings.csv')
DT_baseline = pd.read_csv('./BASELINE/DT_baseline_full_ratings.csv')
HC_imagebox = pd.read_csv('./IMAGE_BOX/HC_imagebox_full_ratings.csv')
DT_imagebox = pd.read_csv('./IMAGE_BOX/DT_imagebox_full_ratings.csv')
In [3]:
print("Baseline skew: ", stats.skew(HC_baseline.facebookbias_rating[HC_baseline.facebookbias_rating<3]))
print("Image Box skew: ", stats.skew(HC_imagebox.facebookbias_rating[HC_imagebox.facebookbias_rating<3]))
In [4]:
print("Baseline skew: ", stats.skewtest(HC_baseline.facebookbias_rating[HC_baseline.facebookbias_rating<3]))
print("Image Box skew: ", stats.skewtest(HC_imagebox.facebookbias_rating[HC_imagebox.facebookbias_rating<3]))
In [5]:
stats.ks_2samp(HC_baseline.facebookbias_rating[HC_baseline.facebookbias_rating<3],
HC_imagebox.facebookbias_rating[HC_imagebox.facebookbias_rating<3])
Out[5]:
In [6]:
HC_imagebox.facebookbias_rating.plot.hist(alpha=0.5, bins=20, range=(-1,1), color='blue')
Out[6]:
In [7]:
HC_baseline.facebookbias_rating.plot.hist(alpha=0.5, bins=20, range=(-1,1), color='green')
Out[7]:
In [8]:
print("Baseline skew: ", stats.skew(DT_baseline.facebookbias_rating[DT_baseline.facebookbias_rating<3]))
print("Image Box skew: ", stats.skew(DT_imagebox.facebookbias_rating[DT_imagebox.facebookbias_rating<3]))
In [9]:
stats.ks_2samp(DT_baseline.facebookbias_rating[DT_baseline.facebookbias_rating<3],
DT_imagebox.facebookbias_rating[DT_imagebox.facebookbias_rating<3])
Out[9]:
In [10]:
DT_imagebox.facebookbias_rating.plot.hist(alpha=0.5, bins=20, range=(-1,1), color='red')
Out[10]:
In [11]:
DT_baseline.facebookbias_rating.plot.hist(alpha=0.5, bins=20, range=(-1,1), color='green')
Out[11]:
In [12]:
print("Number of missing ratings for Hillary Clinton Baseline data: ", len(HC_baseline[HC_baseline.facebookbias_rating == 999]))
print("Number of missing ratings for Hillary Clinton Image Box data: ", len(HC_imagebox[HC_imagebox.facebookbias_rating == 999]))
print("Number of missing ratings for Donald Trump Baseline data: ", len(DT_baseline[DT_baseline.facebookbias_rating == 999]))
print("Number of missing ratings for Donald Trump Image Box data: ", len(DT_baseline[DT_imagebox.facebookbias_rating == 999]))
In [13]:
def convert_to_ints(col):
if col == 'Left':
return -1
elif col == 'Center':
return 0
elif col == 'Right':
return 1
else:
return np.nan
In [14]:
HC_imagebox['final_rating_ints'] = HC_imagebox.final_rating.apply(convert_to_ints)
DT_imagebox['final_rating_ints'] = DT_imagebox.final_rating.apply(convert_to_ints)
HC_baseline['final_rating_ints'] = HC_baseline.final_rating.apply(convert_to_ints)
DT_baseline['final_rating_ints'] = DT_baseline.final_rating.apply(convert_to_ints)
In [15]:
HC_imagebox.final_rating_ints.value_counts()
Out[15]:
In [16]:
DT_imagebox.final_rating_ints.value_counts()
Out[16]:
In [17]:
HC_baseline_counts = HC_baseline.final_rating.value_counts()
HC_imagebox_counts = HC_imagebox.final_rating.value_counts()
DT_baseline_counts = DT_baseline.final_rating.value_counts()
DT_imagebox_counts = DT_imagebox.final_rating.value_counts()
In [18]:
HC_baseline_counts.head()
Out[18]:
In [19]:
normalised_bias_ratings = pd.DataFrame({'HC_ImageBox':HC_imagebox_counts,
'HC_Baseline' : HC_baseline_counts,
'DT_ImageBox': DT_imagebox_counts,
'DT_Baseline': DT_baseline_counts} )
In [20]:
normalised_bias_ratings
Out[20]:
In [21]:
normalised_bias_ratings = normalised_bias_ratings[:3]
In [22]:
normalised_bias_ratings.loc[:,'HC_Baseline_pcnt'] = normalised_bias_ratings.HC_Baseline/normalised_bias_ratings.HC_Baseline.sum()*100
normalised_bias_ratings.loc[:,'HC_ImageBox_pcnt'] = normalised_bias_ratings.HC_ImageBox/normalised_bias_ratings.HC_ImageBox.sum()*100
normalised_bias_ratings.loc[:,'DT_Baseline_pcnt'] = normalised_bias_ratings.DT_Baseline/normalised_bias_ratings.DT_Baseline.sum()*100
normalised_bias_ratings.loc[:,'DT_ImageBox_pcnt'] = normalised_bias_ratings.DT_ImageBox/normalised_bias_ratings.DT_ImageBox.sum()*100
In [23]:
normalised_bias_ratings
Out[23]:
In [24]:
normalised_bias_ratings.columns
Out[24]:
In [25]:
HC_percentages = normalised_bias_ratings[['HC_Baseline_pcnt', 'HC_ImageBox_pcnt']]
DT_percentages = normalised_bias_ratings[['DT_Baseline_pcnt', 'DT_ImageBox_pcnt']]
In [26]:
stats.chisquare(f_exp=normalised_bias_ratings.HC_Baseline,
f_obs=normalised_bias_ratings.HC_ImageBox)
Out[26]:
In [27]:
HC_percentages.plot.bar()
Out[27]:
In [28]:
stats.chisquare(f_exp=normalised_bias_ratings.DT_Baseline,
f_obs=normalised_bias_ratings.DT_ImageBox)
Out[28]:
In [29]:
DT_percentages.plot.bar()
Out[29]:
In [ ]: