RECOMMENDATIONS

Recommendation Stability


In [2]:
from bubble_popper_model import twitter_profile,twitter_links,twitter_articles
from bubble_popper_model import clean_articles,article_topics,publication_scores
from bubble_popper_model import define_bubble,burst_bubble

In [3]:
import tweepy
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd

In [4]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [5]:
from time import sleep
import pickle

In [6]:
with open ("bubble_popper_twitter.txt","r") as myfile:
    lines = [line.replace("\n","") for line in myfile.readlines()]
consumer_key, consumer_secret = lines[0], lines[1]
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

In [7]:
with open ("bubble_popper_postgres.txt","r") as myfile:
    lines = [line.replace("\n","") for line in myfile.readlines()] 
db, us, pw = 'bubble_popper', lines[0], lines[1]                     
engine = create_engine('postgresql://%s:%s@localhost:5432/%s'%(us,pw,db))
connstr = "dbname='%s' user='%s' host='localhost' password='%s'"%(db,us,pw)
conn = None; conn = psycopg2.connect(connstr)

Modified recommendation functions to perform leave-one-out validation


In [8]:
# Use full article set for recommendations
def run_popper(articles,tweets,links,badlinks,friends,comfort_level,conn):

    doc_set = clean_articles(articles,tweets)
    doc_data = article_topics(doc_set)
    pub_data = publication_scores(links,badlinks,friends,conn)    
    user_score,user_bubble = define_bubble(pub_data.squeeze(),doc_data)
    recs, alt_bubble = burst_bubble(user_score,user_bubble,comfort_level,conn)
   
    return recs,user_score,user_bubble,alt_bubble

In [9]:
# Use article set minus one (for each article)
def leave_one_out(articles,tweets,links,badlinks,friends,comfort_level,conn):
    
    recsList,scoreList,bubbleList,altList = [],[],[],[]
    links = [link for link in links if link not in badlinks]
    
    for leave_out in range(len(articles)):
        
        article_out = articles.pop(leave_out)        
        link_out = links.pop(leave_out)
        tweets_keep = tweets.drop(tweets.index[leave_out])
        friend_out = friends.pop(leave_out)

        doc_set = clean_articles(articles,tweets_keep)
        doc_data = article_topics(doc_set)
        pub_data = publication_scores(links,badlinks,friends,conn,num_articles=len(articles))    
        user_score,user_bubble = define_bubble(pub_data.squeeze(),doc_data)
        recs,alt_bubble = burst_bubble(user_score,user_bubble,comfort_level,conn)
        
        recsList.append(recs)
        scoreList.append(user_score)
        bubbleList.append(user_bubble)
        altList.append(alt_bubble)
        
        articles.insert(leave_out,article_out)
        links.insert(leave_out,link_out)
        friends.insert(leave_out,friend_out)
   
    return recsList,scoreList,bubbleList,altList

Ran recommendation algorithm for the first 10 followers (with 1,000+ tweets and shared articles) each of a conservative publication and a liberal publication not on the list of publications used for model training


In [ ]:
sleep_time = 5
comfort_level = 2
recsFull,scoreFull,bubbleFull,altFull = {},{},{},{}
recsMinus,scoreMinus,bubbleMinus,altMinus = {},{},{},{}

with open ("bubble_popper_users.txt","r") as myfile:
    users = [line.replace("\n","") for line in myfile.readlines()]

for user in users: 
    tweets,friends = twitter_profile(user,api)
    links,tweets = twitter_links(tweets,conn)
    articles,badlinks = twitter_articles(links)    
    recsFull[user],scoreFull[user],bubbleFull[user],altFull[user] = run_popper(articles,tweets,links,badlinks,friends,comfort_level,conn)
    recsMinus[user],scoreMinus[user],bubbleMinus[user],altMinus[user] = leave_one_out(articles,tweets,links,badlinks,friends,comfort_level,conn)
    sleep(sleep_time)
    
pickle.dump([recsFull,scoreFull,bubbleFull,altFull,recsMinus,scoreMinus,bubbleMinus,altMinus],open('bubble_popper_results.pkl','wb'))

Computed the average cosine similarity between user scores from the full and partial user dataset as a measure of recommendation stability


In [28]:
recStability = []
for user in scoreFull.keys():
    cossim = []
    for leave_out in range(len(scoreMinus[user])):
        cossim.append(cosine_similarity(scoreFull[user].reshape(1,-1),scoreMinus[user][leave_out].reshape(1,-1)))
    recStability.append(np.mean(cossim))

avgStability = np.mean(recStability)
avgStability


Out[28]:
0.99881292370686248

Ideology Cluster Match

Compared hand-labeled ideology and algorithm-predicted ideology of first 10 followers (with 1,000+ tweets and shared articles) each of a conservative publication and a liberal publication not on the list of publications used for model training


In [34]:
# 0 = mostly liberal, 1 = mostly conservative, 2 = mixed liberal, 3 = mixed conservative
ideoMatch = []
for user in bubbleFull.keys():    
    ideoMatch.append((handLabel[user],bubbleFull[user].tolist()[0],np.equal(handLabel[user],bubbleFull[user].tolist()[0])))

ideoMatch


Out[34]:
[(1, 2, False),
 (1, 1, True),
 (0, 0, True),
 (1, 2, False),
 (0, 0, True),
 (0, 0, True),
 (1, 1, True),
 (0, 2, False),
 (1, 1, True),
 (1, 1, True),
 (1, 2, False),
 (1, 1, True),
 (0, 0, True),
 (0, 0, True),
 (0, 2, False),
 (0, 0, True),
 (1, 1, True),
 (0, 0, True),
 (0, 0, True),
 (1, 1, True)]