In [1]:
from bigbang.archive import Archive
from bigbang.archive import load as load_archive
import bigbang.graph as graph
import networkx as nx
import os
import pandas as pd
In [2]:
icann_path = "../archives/http:/mm.icann.org/pipermail"
ncuc_path = "../archives/http:/lists.ncuc.org/pipermail"
paths = [os.path.join(icann_path,"ipc-gnso.csv"),
os.path.join(icann_path,"wp4.csv"),
os.path.join(icann_path,"alac.csv"),
os.path.join(icann_path,"gnso-rds-pdp-wg.csv"),
os.path.join(icann_path,"accountability-cross-community.csv"),
os.path.join(icann_path,"cc-humanrights.csv"),
os.path.join(ncuc_path,"ncuc-discuss.csv")]
datas = [load_archive(path).data for path in paths]
arx = Archive(pd.concat(datas))
In [3]:
words = ["human","rights","internet"]
data = arx.data
for word in words:
data[word] = data['Body'].apply(lambda x: x.count(word) if x else 0)
In [4]:
whosaidwhat = data.groupby('From').sum()
whosaidwhat
Out[4]:
In [5]:
## Before running this, you need to create the entity_matches.csv
## file from the SummerSchoolConsolidateUserNames notebook
matches = pd.Series.from_csv("entity_matches.csv")
In [6]:
whosaidwhat['Name'] = matches
In [7]:
whosaidwhat_named = whosaidwhat.groupby('Name').sum()
In [8]:
whosaidwhat_named.to_csv("whosaidwhat_named.csv")
In [ ]: