See this page for context.
In [1]:
import pandas as pd
In [2]:
percentify = lambda x: (x * 100).round(1).astype(str) + "%"
In [3]:
posts = pd.read_csv("../data/facebook-fact-check.csv")
In [4]:
len(posts)
Out[4]:
In [5]:
ENGAGEMENT_COLS = [
"share_count",
"reaction_count",
"comment_count"
]
In [6]:
RATINGS = ["mostly false", "mixture of true and false", "mostly true", "no factual content"]
FACTUAL_RATINGS = ["mostly false", "mixture of true and false", "mostly true"]
In [7]:
category_grp = posts.groupby("Category")
page_grp = posts.groupby([ "Category", "Page" ])
type_grp = posts.groupby([ "Category", "Page", "Post Type" ])
Counts:
In [8]:
rating_by_category = category_grp["Rating"].value_counts().unstack()[RATINGS].fillna(0)
rating_by_category["total"] = rating_by_category.sum(axis=1)
rating_by_category
Out[8]:
Percentages, of all posts:
In [9]:
(rating_by_category[RATINGS].T / rating_by_category[RATINGS].sum(axis=1)).T\
.pipe(percentify)
Out[9]:
Percentages, of posts not rated "no factual content":
In [10]:
(rating_by_category[FACTUAL_RATINGS].T / rating_by_category[FACTUAL_RATINGS].sum(axis=1)).T\
.pipe(percentify)
Out[10]:
Counts:
In [11]:
rating_by_page = page_grp["Rating"].value_counts().unstack()[RATINGS].fillna(0)
rating_by_page["total"] = rating_by_page.sum(axis=1)
rating_by_page
Out[11]:
Percentages, of all posts:
In [12]:
(rating_by_page[RATINGS].T / rating_by_page[RATINGS].sum(axis=1)).T\
.pipe(percentify)
Out[12]:
Percentages, of posts not rated "no factual content":
In [13]:
(rating_by_page[FACTUAL_RATINGS].T / rating_by_page[FACTUAL_RATINGS].sum(axis=1)).T\
.pipe(percentify)
Out[13]:
Counts:
In [14]:
posts_by_date_by_category = category_grp["Date Published"].value_counts().unstack()
posts_by_date_by_category["Avg. Per Day"] = posts_by_date_by_category.mean(axis=1).round(0)
posts_by_date_by_category
Out[14]:
In [15]:
posts_by_date_by_page = page_grp["Date Published"].value_counts().unstack()
posts_by_date_by_page["Avg. Per Day"] = posts_by_date_by_page.mean(axis=1).round(0)
posts_by_date_by_page
Out[15]:
In [16]:
rating_by_post_type = type_grp["Rating"].value_counts().unstack()[RATINGS].fillna(0)
rating_by_post_type["total"] = rating_by_post_type.sum(axis=1)
rating_by_post_type
Out[16]:
Count of missing engagement figures:
In [17]:
posts[ENGAGEMENT_COLS].isnull().sum()
Out[17]:
In [18]:
page_grp[ENGAGEMENT_COLS].median().round()
Out[18]:
In [19]:
page_grp[ENGAGEMENT_COLS].mean().round()
Out[19]:
In [20]:
grp = posts.groupby([ "Category", "Page", "Rating" ])
Counts:
In [21]:
grp[ENGAGEMENT_COLS].size().unstack().fillna(0)
Out[21]:
Medians:
In [22]:
grp[ENGAGEMENT_COLS].median().round()
Out[22]:
Averages:
In [23]:
grp[ENGAGEMENT_COLS].mean().round()
Out[23]:
Medians:
In [24]:
type_grp[ENGAGEMENT_COLS].median().round()
Out[24]:
Averages:
In [25]:
type_grp[ENGAGEMENT_COLS].mean().round()
Out[25]:
In [26]:
grp = posts.groupby([ "Category", "Page", posts["Rating"] == "no factual content" ])
pd.DataFrame({
"median": grp["share_count"].median(),
"average": grp["share_count"].mean()
}).round()\
.unstack().stack(level=0).rename(columns={True: "no factual content", False: "factual content"})
Out[26]:
In [27]:
grp = posts.groupby([ "Category", "Page", posts["Rating"] == "mostly true" ])
pd.DataFrame({
"median": grp["share_count"].median(),
"average": grp["share_count"].mean()
}).round()\
.unstack().stack(level=0).rename(columns={True: "mostly true", False: "everything else"})\
[[ "mostly true", "everything else" ]].loc[["left", "right"]]
Out[27]: