In [3]:
from bigbang.archive import Archive
from bigbang.archive import load as load_archive
import bigbang.graph as graph
import networkx as nx
import os
import pandas as pd

In [4]:
ietf_path = "../archives/"
ncuc_path = "../archives/http:/lists.ncuc.org/pipermail"

paths = [os.path.join(ietf_path,"6lo.csv")]

datas = [load_archive(path).data for path in paths]
         
arx = Archive(pd.concat(datas))

In [5]:
words = ["Internet","should","rfc"]

data = arx.data

for word in words:
    data[word] = data['Body'].apply(lambda x: x.count(word) if x else 0)

In [6]:
whosaidwhat = data.groupby('From').sum()

whosaidwhat


Out[6]:
Internet should rfc
From
"6lo issue tracker" <trac+6lo@tools.ietf.org> 46 18 44
"Adrian Farrel" <adrian@olddog.co.uk> 17 12 3
"Alia Atlas" <akatlas@gmail.com> 0 0 0
"Alissa Cooper" <alissa@cooperw.in> 0 9 0
"Alvaro Retana (aretana)" <aretana@cisco.com> 0 0 0
"Amanda Baber via RT" <iana-issues-comment@iana.org> 0 1 0
"Barry Leiba" <barryleiba@computer.org> 0 1 0
"Ben Campbell" <ben@nostrum.com> 0 0 0
"Benoit Claise" <bclaise@cisco.com> 4 5 0
"Brett, Patricia (PA62)" <patricia.brett@honeywell.com> 0 1 10
"Brian Haberman" <brian@innovationslab.net> 0 0 0
"Cao,Zhen" <zehn.cao@gmail.com> 0 0 2
"Carles Gomez Montenegro" <carlesgo@entel.upc.edu> 41 22 7
"Cullen Jennings (fluffy)" <fluffy@cisco.com> 0 0 0
"Dijk, Esko" <esko.dijk@philips.com> 0 6 0
"Eric Vyncke (evyncke)" <evyncke@cisco.com> 0 0 0
"Ersue, Mehmet (NSN - DE/Munich)" <mehmet.ersue@nsn.com> 0 0 0
"Ersue, Mehmet (Nokia - DE/Munich)" <mehmet.ersue@nokia.com> 0 0 0
"Gianluca Rizzo" <Gianluca.Rizzo@hevs.ch> 11 4 0
"Hedanping (Ana)" <ana.hedanping@huawei.com> 14 8 0
"Henderickx, Wim (Nokia - BE)" <wim.henderickx@nokia.com> 0 1 0
"Houjianqiang (Derek)" <houjianqiang@huawei.com> 0 0 0
"Isomaki Markus (Nokia-TECH/Espoo)" <markus.isomaki@nokia.com> 0 0 0
"JP Vasseur (jvasseur)" <jvasseur@cisco.com> 0 0 0
"Jari Arkko" <jari.arkko@piuha.net> 0 2 0
"Joel Jaeggli" <joelja@bogus.com> 0 2 0
"Jonathan Hui (johui)" <johui@cisco.com> 1 3 2
"Kathleen Moriarty" <Kathleen.Moriarty.ietf@gmail.com> 0 1 0
"Lijo Thomas" <lijo@cdac.in> 90 14 37
"Liubing (Remy)" <remy.liubing@huawei.com> 0 0 0
... ... ... ...
Thomas Watteyne <watteyne@eecs.berkeley.edu> 0 0 0
Tianran Zhou <zhoutianran@huawei.com> 0 0 0
Tim Chown <Tim.Chown@jisc.ac.uk> 1 6 1
Tim Chown <tim.chown@jisc.ac.uk> 0 4 1
Tim Chown <tjc@ecs.soton.ac.uk> 1 6 0
Tom Taylor <tom.taylor.stds@gmail.com> 0 0 0
Ulrich Herberg <ulrich.herberg@us.fujitsu.com> 0 0 0
Ulrich Herberg <ulrich@herberg.name> 52 42 5
Wang Qin <wangqinster@gmail.com> 0 0 0
Warren Kumari <warren@kumari.net> 0 0 0
Xavier Vilajosana <xvilajosana@eecs.berkeley.edu> 0 16 14
Yasuyuki Tanaka <yasuyuki.tanaka@inria.fr> 0 1 0
Yasuyuki Tanaka <yasuyuki9.tanaka@toshiba.co.jp> 0 2 0
Yong-Geun Hong <yonggeun.hong@gmail.com> 28 7 8
YongGeun Hong <yonggeun.hong@gmail.com> 9 1 3
Zhen Cao <zhencao.ietf@gmail.com> 0 0 0
georgehanes@hushmail.com 0 1 0
gksrivas@andrew.cmu.edu 0 0 0
houjianqiang <houjianqiang@huawei.com> 2 0 0
internet-drafts@ietf.org 427 0 195
james woodyatt <jhw@google.com> 0 6 0
james woodyatt <jhw@nestlabs.com> 0 11 2
joel jaeggli <joelja@bogus.com> 0 5 0
mat nizam <matnizam9315@gmail.com> 0 0 0
mcr@sandelman.ca 0 0 0
peter van der Stok <stokcons@xs4all.nl> 5 17 6
rfc-editor@rfc-editor.org 30 10 104
sajjad akbar <sajjad.akr1@gmail.com> 29 2 26
samita.chakrabarti@verizon.com 2 0 0
worley@ariadne.com (Dale R. Worley) 2 16 0

248 rows × 3 columns


In [5]:
## Before running this, you need to create the entity_matches.csv
## file from the SummerSchoolConsolidateUserNames notebook

matches = pd.Series.from_csv("entity_matches.csv")

In [6]:
whosaidwhat['Name'] = matches

In [7]:
whosaidwhat_named = whosaidwhat.groupby('Name').sum()

In [8]:
whosaidwhat_named.to_csv("whosaidwhat_named.csv")

In [ ]: