Export Tweets to Gephi CSVs



In [ ]:

    
import pickle
import csv
from collections import Counter

data = pickle.load(open("tweets.pkl", "rb"))

print(len(data))

Make User IDs



In [ ]:

    
sn_list = [t.user.screen_name for t in data]

for t in data:
    for sn in t.entities['user_mentions']:
        sn_list.append(sn['screen_name'])

user_count = dict(Counter(sn_list))

sn_list = list(set(sn_list))



In [ ]:

    
sn_dict = {}

idn = 1
for n in sn_list:
    sn_dict[n] = {'id': idn,
                  'mentions': [],
                  'hash': []
                 }
    idn += 1

Make Hashtag IDs



In [ ]:

    
hash_list = []

for t in data:
    for h in t.entities["hashtags"]:
        hash_list.append(h["text"])
        
hash_count = dict(Counter(hash_list))
        
hash_list = list(set(hash_list))



In [ ]:

    
hash_dict = {}

for n in hash_list:
    hash_dict[n] = idn

    idn += 1

Add Mentions and Hashtags to User



In [ ]:

    
for t in data:
    for sn in t.entities["user_mentions"]:
        sn_dict[t.user.screen_name]['mentions'].append(sn_dict[sn["screen_name"]]['id'])
    for h in t.entities["hashtags"]:
        hash_id = hash_dict[h["text"]]
        sn_dict[t.user.screen_name]['hash'].append(hash_id)

Write CSVs



In [ ]:

    
id_rows = [['Nodes', 'Id', 'Count']]

for k in sn_dict.keys():
    id_rows.append([k, sn_dict[k]['id'], user_count[k]])

with open("nodes-user-ids.csv", "w") as fp:
    wr = csv.writer(fp, dialect='excel')
    wr.writerows(id_rows)



In [ ]:

    
id_rows = [['Nodes', 'Id', 'Count']]

for k in hash_dict.keys():
    id_rows.append([k, hash_dict[k], hash_count[k]])

with open("nodes-hashtags-ids.csv", "w") as fp:
    wr = csv.writer(fp, dialect='excel')
    wr.writerows(id_rows)



In [ ]:

    
mention_rows = [['Source', 'Target', 'Count']]

for k in sn_dict.keys():
    if len(sn_dict[k]['mentions']) > 0:
        if len(list(set(sn_dict[k]['mentions']))) == 1:
            mention_rows.append([sn_dict[k]['id']] + list(set(sn_dict[k]['mentions'])) + [1])
        else:
            indiv_count = dict(Counter(sn_dict[k]['mentions']))
            for m in indiv_count.keys():
                mention_rows.append([sn_dict[k]['id']] + [m] + [indiv_count[m]])

with open("edges-user-mentions.csv", "w") as fp:
    wr = csv.writer(fp, dialect='excel')
    wr.writerows(mention_rows)



In [ ]:

    
hash_rows = [['Source', 'Target', 'Count']]

for k in sn_dict.keys():
    if len(sn_dict[k]['hash']) > 0:
        if len(list(set(sn_dict[k]['hash']))) == 1:
            hash_rows.append([sn_dict[k]['id']] + list(set(sn_dict[k]['hash'])) + [1])
        else:
            indiv_count = dict(Counter(sn_dict[k]['hash']))
            for m in indiv_count.keys():
                hash_rows.append([sn_dict[k]['id']] + [m] + [indiv_count[m]])

with open("edges-user-hashtags.csv", "w") as fp:
    wr = csv.writer(fp, dialect='excel')
    wr.writerows(hash_rows)



In [ ]: