by Talha Oz & Manqi Li
This notebook is to report some descriptive statistics on our U.S. Governors dataset. Governors' Twitter account handles are retrieved from https://twitter.com/gov/lists/us-governors/members using DD-CSS (this website allows downloading the info in CSV; disclaimer: I built the website).
In [2]:
import pandas as pd
import math
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
In [3]:
# read and sort by number of followers
df = pd.read_table('../data/tw_gov.csv',parse_dates=['created_at'])
df = df.sort(columns='followers')
df
# Just to clarify the meanings of two potentially ambiguous field names:
# friends: number of people followed
# statuses: total number of tweets + retweets
Out[3]:
In [4]:
df.to_csv('../data/tw_gov2.csv',index=False)
In [3]:
df = pd.read_csv('../data/governors-challengers.csv')
gov = pd.read_table('../data/tw_gov.csv',usecols=['screen_name','followers'])
gov = gov.rename(columns={'screen_name':'twgov','followers':'folgov'})
df = df.merge(gov)
df['folnorm'] = df['folgov'] / df['population']
In [4]:
ax = df.plot(x='folnorm', y='folgov', kind='scatter', figsize=(15,15),logy=True,logx=True,
xlim=(df.folnorm.min(),df.folnorm.max()),ylim=(df.folgov.min(),df.folgov.max()*1.2))
df.apply(lambda x: ax.annotate(x['state'], (x['folnorm'],x['folgov']),
xytext=(-40, 7), textcoords='offset points'), axis=1);
ax.set_xlabel("Follower numbers normalized by his/her state population",fontsize=14);
ax.set_ylabel("Actual number of Twitter followers",fontsize=14);
ax.set_title('Twitter Followers of Incumbent U.S. State Governors',fontsize=18);
x = df.folnorm
y = df.folgov
logx = np.log(x)
logy = np.log(y)
coeffs = (slope, intercept) = np.polyfit(logx,logy,deg=1)
poly = np.poly1d(coeffs)
yfit = np.exp(poly(logx))
plt.plot(x,yfit,'-.')
formula = 'log(y) = {:2.2f}*log(x) + {:2.2f}'.format(slope,intercept)
plt.legend([formula],loc=2)
ax.text(0.01,0.95,'Correlation between followers and state population (p) : '+
'{:2.2f}'.format(df[['folgov','population']].corr().ix[0,1]),
horizontalalignment='left',verticalalignment='top',
transform=ax.transAxes,fontsize=12)
ax.text(0.05, 0.30,formula,transform=ax.transAxes,rotation=33);
In [5]:
# Beware, the y-axis is in log-scale (see the table above for exact numbers of followers where min. is ~300 and max is ~1M)
df = df.sort(columns='folgov',ascending=False)
df.plot(x='state',y='folgov', title='Twitter Follower Counts of U.S. State Governors',
kind='bar', legend=False, figsize=(16,4),logy=True,ylim=(1,1500000));
plt.xlabel('Incumbent Governors')
plt.ylabel('Number of Twitter Followers')
Out[5]:
In [6]:
# Beware, the y-axis is in log-scale (see the table above for exact numbers of followers where min. is ~300 and max is ~1M)
df = df.sort(columns='folnorm',ascending=False)
df['folnorm'] = df.folnorm * 10**7
df.plot(x='state',y=['folnorm','folgov'], title='Normalized Twitter Followers of U.S. State Governors',
kind='bar', legend=False, figsize=(16,4),logy=True);
plt.xlabel('Incumbent Governors')
plt.ylabel('Twitter Follower Rate Normalized by State Population')
Out[6]:
In [7]:
df = pd.read_csv('../data/governors-challengers.csv')
gov = pd.read_table('../data/tw_gov.csv',usecols=['screen_name','followers'])
gov = gov.rename(columns={'screen_name':'twgov','followers':'folgov'})
df = df.merge(gov)
ch = pd.read_table('../data/tw_ch.csv',usecols=['screen_name','followers_count'], encoding='utf-16')
ch = ch.rename(columns={'screen_name':'twch','followers_count':'folch'})
df = df.merge(ch)
# states whose incumbent governors have less followers than their challengers...
df[df['folch']>df['folgov']]
Out[7]:
In [9]:
df['odd'] = df['folgov']/df['folch']
ax = df.plot(x='state',y=['odd'],
title='Ratio of Sitting Governors\' Twitter Follower Counts to that of their Primary Challengers',
kind='bar', legend=False, figsize=(16,8),logy=True,
color=df['odd'].apply(lambda x: 'b' if x>1 else 'r'));
plt.rc('xtick', labelsize=18)
plt.text(30,5000,'Red bars denote that the primary challenger\nhas more followers than the sitting governor',fontsize=13);