In [1]:
from bigbang.archive import Archive
from bigbang.archive import load as load_archive
import bigbang.graph as graph
import networkx as nx
import os
import pandas as pd

In [2]:
icann_path = "../archives/http:/mm.icann.org/pipermail"
ncuc_path = "../archives/http:/lists.ncuc.org/pipermail"

paths = [os.path.join(icann_path,"ipc-gnso.csv"),
        os.path.join(icann_path,"wp4.csv"),
        os.path.join(icann_path,"alac.csv"),
        os.path.join(icann_path,"gnso-rds-pdp-wg.csv"),
        os.path.join(icann_path,"accountability-cross-community.csv"),
        os.path.join(icann_path,"cc-humanrights.csv"),
        os.path.join(ncuc_path,"ncuc-discuss.csv")]

datas = [load_archive(path).data for path in paths]
         
arx = Archive(pd.concat(datas))


/home/sb/projects/nllz-bigbang/bigbang/bigbang/archive.py:73: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  self.data.sort(columns='Date', inplace=True)

In [3]:
words = ["human","rights","internet"]

data = arx.data

for word in words:
    data[word] = data['Body'].apply(lambda x: x.count(word) if x else 0)

In [4]:
whosaidwhat = data.groupby('From').sum()

whosaidwhat


Out[4]:
human rights internet
From
AAikman at lrrc.com (Aikman-Scalese, Anne) 0 0 0
AAikman at lrrlaw.com (Aikman-Scalese, Anne) 2 13 39
Alex_Deacon at mpaa.org (Deacon, Alex) 1 4 1
Amadeu at nominalia.com (Amadeu Abril i Abril) 0 3 0
Avri at ACM.ORG (Avri Doria) 0 0 0
BWinterfeldt at mayerbrown.com (Winterfeldt, Brian J.) 0 0 1
Becky.Burr at neustar.biz (Burr, Becky) 66 187 177
Beth.Allegretti at fox.com (Beth Allegretti) 0 0 1
Bradley.Silver at timewarner.com (Silver, Bradley) 1 50 10
Brenbe at CONSUMER.ORG (Brendler, Beau) 0 8 1
Brett.Schaefer at heritage.org (Schaefer, Brett) 39 70 129
Bruce at barelyadequate.info (Bruce Young) 0 0 0
Bruce.Tonkin at melbourneit.com.au (Bruce Tonkin) 62 95 15
CCHIU at ACLU.ORG (Chris Chiu) 4 4 46
CCHIU at aclu.org (Chris Chiu) 0 7 69
CHagstrom at lb.com (Hagstrom, Christopher) 0 0 0
CLackert at reedsmith.com (Lackert, Clark W.) 0 1 0
Camino.MANJON at ec.europa.eu (Camino.MANJON at ec.europa.eu) 130 141 4
Carlos.Souza at FGV.BR (Carlos Affonso Pereira de Souza) 0 0 2
DFares at 21cf.com (Fares, David) 0 0 4
DNSO.Secretariat at dnso.org (DNSO Secretariat) 0 1 0
DSimon at SONOSKY.COM (Donald Simon) 0 1 0
DannyYounger at cs.com (DannyYounger at cs.com) 1 4 1
David.Taylor at hoganlovells.com (Taylor, David) 0 0 4
Dixie at GLOBAL-PARTNERS.CO.UK (Dixie Hawtin) 0 0 1
Donna.Austin at neustar.biz (Austin, Donna) 0 0 0
Elisabeth.Porteneuve at cetp.ipsl.fr (Elisabeth Porteneuve) 0 5 0
Ellen.M.Blackler at disney.com (Blackler, Ellen M.) 8 8 0
FVayra at perkinscoie.com (Vayra, Fabricio (Perkins Coie)) 0 3 0
FinPet at erst.dk (Finn Petersen) 0 4 0
... ... ... ...
wendy at seltzer.com (Wendy Seltzer) 0 9 0
wgondwe at CC.AC.MW (Walu G. Gondwe) 0 0 1
william.drake at GRADUATEINSTITUTE.CH (Drake William) 0 0 0
william.drake at GRADUATEINSTITUTE.CH (William Drake) 11 53 18
william.drake at UZH.CH (William Drake) 9 49 59
william.drake at uzh.ch (William Drake) 49 82 185
willie.currie at gmail.com (william currie) 108 145 10
wilson at isoc.ug (Wilson Abigaba) 0 0 0
wisdom.dk at gmail.com (Wisdom Donkor) 2 4 7
wisdom.stoic at gmail.com (Rahul Sharma) 54 57 21
wjdrake at gmail.com (William Drake) 126 146 89
wolf-ulrich.knoben at t-online.de (WUKnoben) 0 1 3
wolf.ludwig at comunica-ch.net (Wolf Ludwig) 0 1 0
wolfgang.kleinwaechter at MEDIENKOMM.UNI-HALLE.DE (=?iso-8859-1?Q?=22Kleinw=E4chter=2C_Wolfgang=22?=) 0 0 0
wolfgang.kleinwaechter at medienkomm.uni-halle.de (=?iso-8859-1?Q?=22Kleinw=E4chter=2C_Wolfgang=22?=) 20 30 41
wpilimon at dfp-interactive.com (=?iso-8859-1?Q?Walter_Pilim=F3n?=) 0 0 0
wsaqaf at gmail.com (Walid AL-SAQAF) 8 7 3
wsis at modirian.net (Modirian) 1 3 0
xavier.calvez at icann.org (Xavier J. Calvez) 0 1 0
yakmutd at yahoo.com (Daniel Yakmut) 0 0 0
yameogoben at yahoo.fr (Bernard YAMEOGO) 1 0 2
yjpark at MYEPARK.COM (Y J Park) 0 0 0
yjpark at MYEPARK.COM (YJ Park) 0 2 0
yjpark at myepark.com (YJ Park) 0 3 0
yjpark21 at GMAIL.COM (YJ Park) 0 0 1
yjpark21 at GMAIL.COM (Youn Jung Park) 0 0 2
zainsyed45 at gmail.com (Zain Khan, CMC) 0 0 0
zakirbinrehman at yahoo.com (Zakir Syed) 0 0 2
zalnieriute at gmail.com (zalnieriute .) 146 166 6
zbone72 at yahoo.com (Brett Spears) 0 0 2

1166 rows × 3 columns


In [5]:
## Before running this, you need to create the entity_matches.csv
## file from the SummerSchoolConsolidateUserNames notebook

matches = pd.Series.from_csv("entity_matches.csv")

In [6]:
whosaidwhat['Name'] = matches

In [7]:
whosaidwhat_named = whosaidwhat.groupby('Name').sum()

In [8]:
whosaidwhat_named.to_csv("whosaidwhat_named.csv")

In [ ]: