In [2]:
from bubble_popper_model import twitter_profile,twitter_links,twitter_articles
from bubble_popper_model import clean_articles,article_topics,publication_scores
from bubble_popper_model import define_bubble,burst_bubble
In [3]:
import tweepy
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd
In [4]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
In [5]:
from time import sleep
import pickle
In [6]:
with open ("bubble_popper_twitter.txt","r") as myfile:
lines = [line.replace("\n","") for line in myfile.readlines()]
consumer_key, consumer_secret = lines[0], lines[1]
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
In [7]:
with open ("bubble_popper_postgres.txt","r") as myfile:
lines = [line.replace("\n","") for line in myfile.readlines()]
db, us, pw = 'bubble_popper', lines[0], lines[1]
engine = create_engine('postgresql://%s:%s@localhost:5432/%s'%(us,pw,db))
connstr = "dbname='%s' user='%s' host='localhost' password='%s'"%(db,us,pw)
conn = None; conn = psycopg2.connect(connstr)
Modified recommendation functions to perform leave-one-out validation
In [8]:
# Use full article set for recommendations
def run_popper(articles,tweets,links,badlinks,friends,comfort_level,conn):
doc_set = clean_articles(articles,tweets)
doc_data = article_topics(doc_set)
pub_data = publication_scores(links,badlinks,friends,conn)
user_score,user_bubble = define_bubble(pub_data.squeeze(),doc_data)
recs, alt_bubble = burst_bubble(user_score,user_bubble,comfort_level,conn)
return recs,user_score,user_bubble,alt_bubble
In [9]:
# Use article set minus one (for each article)
def leave_one_out(articles,tweets,links,badlinks,friends,comfort_level,conn):
recsList,scoreList,bubbleList,altList = [],[],[],[]
links = [link for link in links if link not in badlinks]
for leave_out in range(len(articles)):
article_out = articles.pop(leave_out)
link_out = links.pop(leave_out)
tweets_keep = tweets.drop(tweets.index[leave_out])
friend_out = friends.pop(leave_out)
doc_set = clean_articles(articles,tweets_keep)
doc_data = article_topics(doc_set)
pub_data = publication_scores(links,badlinks,friends,conn,num_articles=len(articles))
user_score,user_bubble = define_bubble(pub_data.squeeze(),doc_data)
recs,alt_bubble = burst_bubble(user_score,user_bubble,comfort_level,conn)
recsList.append(recs)
scoreList.append(user_score)
bubbleList.append(user_bubble)
altList.append(alt_bubble)
articles.insert(leave_out,article_out)
links.insert(leave_out,link_out)
friends.insert(leave_out,friend_out)
return recsList,scoreList,bubbleList,altList
Ran recommendation algorithm for the first 10 followers (with 1,000+ tweets and shared articles) each of a conservative publication and a liberal publication not on the list of publications used for model training
In [ ]:
sleep_time = 5
comfort_level = 2
recsFull,scoreFull,bubbleFull,altFull = {},{},{},{}
recsMinus,scoreMinus,bubbleMinus,altMinus = {},{},{},{}
with open ("bubble_popper_users.txt","r") as myfile:
users = [line.replace("\n","") for line in myfile.readlines()]
for user in users:
tweets,friends = twitter_profile(user,api)
links,tweets = twitter_links(tweets,conn)
articles,badlinks = twitter_articles(links)
recsFull[user],scoreFull[user],bubbleFull[user],altFull[user] = run_popper(articles,tweets,links,badlinks,friends,comfort_level,conn)
recsMinus[user],scoreMinus[user],bubbleMinus[user],altMinus[user] = leave_one_out(articles,tweets,links,badlinks,friends,comfort_level,conn)
sleep(sleep_time)
pickle.dump([recsFull,scoreFull,bubbleFull,altFull,recsMinus,scoreMinus,bubbleMinus,altMinus],open('bubble_popper_results.pkl','wb'))
Computed the average cosine similarity between user scores from the full and partial user dataset as a measure of recommendation stability
In [28]:
recStability = []
for user in scoreFull.keys():
cossim = []
for leave_out in range(len(scoreMinus[user])):
cossim.append(cosine_similarity(scoreFull[user].reshape(1,-1),scoreMinus[user][leave_out].reshape(1,-1)))
recStability.append(np.mean(cossim))
avgStability = np.mean(recStability)
avgStability
Out[28]:
Compared hand-labeled ideology and algorithm-predicted ideology of first 10 followers (with 1,000+ tweets and shared articles) each of a conservative publication and a liberal publication not on the list of publications used for model training
In [34]:
# 0 = mostly liberal, 1 = mostly conservative, 2 = mixed liberal, 3 = mixed conservative
ideoMatch = []
for user in bubbleFull.keys():
ideoMatch.append((handLabel[user],bubbleFull[user].tolist()[0],np.equal(handLabel[user],bubbleFull[user].tolist()[0])))
ideoMatch
Out[34]: