In [ ]:
import pickle
import csv
from collections import Counter
data = pickle.load(open("tweets.pkl", "rb"))
print(len(data))
In [ ]:
sn_list = [t.user.screen_name for t in data]
for t in data:
for sn in t.entities['user_mentions']:
sn_list.append(sn['screen_name'])
user_count = dict(Counter(sn_list))
sn_list = list(set(sn_list))
In [ ]:
sn_dict = {}
idn = 1
for n in sn_list:
sn_dict[n] = {'id': idn,
'mentions': [],
'hash': []
}
idn += 1
In [ ]:
hash_list = []
for t in data:
for h in t.entities["hashtags"]:
hash_list.append(h["text"])
hash_count = dict(Counter(hash_list))
hash_list = list(set(hash_list))
In [ ]:
hash_dict = {}
for n in hash_list:
hash_dict[n] = idn
idn += 1
In [ ]:
for t in data:
for sn in t.entities["user_mentions"]:
sn_dict[t.user.screen_name]['mentions'].append(sn_dict[sn["screen_name"]]['id'])
for h in t.entities["hashtags"]:
hash_id = hash_dict[h["text"]]
sn_dict[t.user.screen_name]['hash'].append(hash_id)
In [ ]:
id_rows = [['Nodes', 'Id', 'Count']]
for k in sn_dict.keys():
id_rows.append([k, sn_dict[k]['id'], user_count[k]])
with open("nodes-user-ids.csv", "w") as fp:
wr = csv.writer(fp, dialect='excel')
wr.writerows(id_rows)
In [ ]:
id_rows = [['Nodes', 'Id', 'Count']]
for k in hash_dict.keys():
id_rows.append([k, hash_dict[k], hash_count[k]])
with open("nodes-hashtags-ids.csv", "w") as fp:
wr = csv.writer(fp, dialect='excel')
wr.writerows(id_rows)
In [ ]:
mention_rows = [['Source', 'Target', 'Count']]
for k in sn_dict.keys():
if len(sn_dict[k]['mentions']) > 0:
if len(list(set(sn_dict[k]['mentions']))) == 1:
mention_rows.append([sn_dict[k]['id']] + list(set(sn_dict[k]['mentions'])) + [1])
else:
indiv_count = dict(Counter(sn_dict[k]['mentions']))
for m in indiv_count.keys():
mention_rows.append([sn_dict[k]['id']] + [m] + [indiv_count[m]])
with open("edges-user-mentions.csv", "w") as fp:
wr = csv.writer(fp, dialect='excel')
wr.writerows(mention_rows)
In [ ]:
hash_rows = [['Source', 'Target', 'Count']]
for k in sn_dict.keys():
if len(sn_dict[k]['hash']) > 0:
if len(list(set(sn_dict[k]['hash']))) == 1:
hash_rows.append([sn_dict[k]['id']] + list(set(sn_dict[k]['hash'])) + [1])
else:
indiv_count = dict(Counter(sn_dict[k]['hash']))
for m in indiv_count.keys():
hash_rows.append([sn_dict[k]['id']] + [m] + [indiv_count[m]])
with open("edges-user-hashtags.csv", "w") as fp:
wr = csv.writer(fp, dialect='excel')
wr.writerows(hash_rows)
In [ ]: