notebook.community

Edit and run



In [1]:

    
from bigbang.archive import Archive
from bigbang.archive import load as load_archive
import bigbang.graph as graph
import networkx as nx
import os
import pandas as pd



In [2]:

    
icann_path = "../archives/http:/mm.icann.org/pipermail"
ncuc_path = "../archives/http:/lists.ncuc.org/pipermail"

paths = [os.path.join(icann_path,"ipc-gnso.csv"),
        os.path.join(icann_path,"wp4.csv"),
        os.path.join(icann_path,"alac.csv"),
        os.path.join(icann_path,"gnso-rds-pdp-wg.csv"),
        os.path.join(icann_path,"accountability-cross-community.csv"),
        os.path.join(icann_path,"cc-humanrights.csv"),
        os.path.join(ncuc_path,"ncuc-discuss.csv")]

datas = [load_archive(path).data for path in paths]
         
arx = Archive(pd.concat(datas))









    



/home/sb/projects/nllz-bigbang/bigbang/bigbang/archive.py:73: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  self.data.sort(columns='Date', inplace=True)



In [3]:

    
words = ["human","rights","internet"]

data = arx.data

for word in words:
    data[word] = data['Body'].apply(lambda x: x.count(word) if x else 0)



In [4]:

    
whosaidwhat = data.groupby('From').sum()

whosaidwhat









    Out[4]:






  
    
      
      human
      rights
      internet
    
    
      From
      
      
      
    
  
  
    
      AAikman at lrrc.com (Aikman-Scalese, Anne)
      0
      0
      0
    
    
      AAikman at lrrlaw.com (Aikman-Scalese, Anne)
      2
      13
      39
    
    
      Alex_Deacon at mpaa.org (Deacon, Alex)
      1
      4
      1
    
    
      Amadeu at nominalia.com (Amadeu Abril i Abril)
      0
      3
      0
    
    
      Avri at ACM.ORG (Avri Doria)
      0
      0
      0
    
    
      BWinterfeldt at mayerbrown.com (Winterfeldt, Brian J.)
      0
      0
      1
    
    
      Becky.Burr at neustar.biz (Burr, Becky)
      66
      187
      177
    
    
      Beth.Allegretti at fox.com (Beth Allegretti)
      0
      0
      1
    
    
      Bradley.Silver at timewarner.com (Silver, Bradley)
      1
      50
      10
    
    
      Brenbe at CONSUMER.ORG (Brendler, Beau)
      0
      8
      1
    
    
      Brett.Schaefer at heritage.org (Schaefer, Brett)
      39
      70
      129
    
    
      Bruce at barelyadequate.info (Bruce Young)
      0
      0
      0
    
    
      Bruce.Tonkin at melbourneit.com.au (Bruce Tonkin)
      62
      95
      15
    
    
      CCHIU at ACLU.ORG (Chris Chiu)
      4
      4
      46
    
    
      CCHIU at aclu.org (Chris Chiu)
      0
      7
      69
    
    
      CHagstrom at lb.com (Hagstrom, Christopher)
      0
      0
      0
    
    
      CLackert at reedsmith.com (Lackert, Clark W.)
      0
      1
      0
    
    
      Camino.MANJON at ec.europa.eu (Camino.MANJON at ec.europa.eu)
      130
      141
      4
    
    
      Carlos.Souza at FGV.BR (Carlos Affonso Pereira de Souza)
      0
      0
      2
    
    
      DFares at 21cf.com (Fares, David)
      0
      0
      4
    
    
      DNSO.Secretariat at dnso.org (DNSO Secretariat)
      0
      1
      0
    
    
      DSimon at SONOSKY.COM (Donald Simon)
      0
      1
      0
    
    
      DannyYounger at cs.com (DannyYounger at cs.com)
      1
      4
      1
    
    
      David.Taylor at hoganlovells.com (Taylor, David)
      0
      0
      4
    
    
      Dixie at GLOBAL-PARTNERS.CO.UK (Dixie Hawtin)
      0
      0
      1
    
    
      Donna.Austin at neustar.biz (Austin, Donna)
      0
      0
      0
    
    
      Elisabeth.Porteneuve at cetp.ipsl.fr (Elisabeth Porteneuve)
      0
      5
      0
    
    
      Ellen.M.Blackler at disney.com (Blackler, Ellen M.)
      8
      8
      0
    
    
      FVayra at perkinscoie.com (Vayra, Fabricio  (Perkins Coie))
      0
      3
      0
    
    
      FinPet at erst.dk (Finn Petersen)
      0
      4
      0
    
    
      ...
      ...
      ...
      ...
    
    
      wendy at seltzer.com (Wendy Seltzer)
      0
      9
      0
    
    
      wgondwe at CC.AC.MW (Walu G. Gondwe)
      0
      0
      1
    
    
      william.drake at GRADUATEINSTITUTE.CH (Drake William)
      0
      0
      0
    
    
      william.drake at GRADUATEINSTITUTE.CH (William Drake)
      11
      53
      18
    
    
      william.drake at UZH.CH (William Drake)
      9
      49
      59
    
    
      william.drake at uzh.ch (William Drake)
      49
      82
      185
    
    
      willie.currie at gmail.com (william currie)
      108
      145
      10
    
    
      wilson at isoc.ug (Wilson Abigaba)
      0
      0
      0
    
    
      wisdom.dk at gmail.com (Wisdom Donkor)
      2
      4
      7
    
    
      wisdom.stoic at gmail.com (Rahul Sharma)
      54
      57
      21
    
    
      wjdrake at gmail.com (William Drake)
      126
      146
      89
    
    
      wolf-ulrich.knoben at t-online.de (WUKnoben)
      0
      1
      3
    
    
      wolf.ludwig at comunica-ch.net (Wolf Ludwig)
      0
      1
      0
    
    
      wolfgang.kleinwaechter at MEDIENKOMM.UNI-HALLE.DE (=?iso-8859-1?Q?=22Kleinw=E4chter=2C_Wolfgang=22?=)
      0
      0
      0
    
    
      wolfgang.kleinwaechter at medienkomm.uni-halle.de (=?iso-8859-1?Q?=22Kleinw=E4chter=2C_Wolfgang=22?=)
      20
      30
      41
    
    
      wpilimon at dfp-interactive.com (=?iso-8859-1?Q?Walter_Pilim=F3n?=)
      0
      0
      0
    
    
      wsaqaf at gmail.com (Walid AL-SAQAF)
      8
      7
      3
    
    
      wsis at modirian.net (Modirian)
      1
      3
      0
    
    
      xavier.calvez at icann.org (Xavier J. Calvez)
      0
      1
      0
    
    
      yakmutd at yahoo.com (Daniel Yakmut)
      0
      0
      0
    
    
      yameogoben at yahoo.fr (Bernard YAMEOGO)
      1
      0
      2
    
    
      yjpark at MYEPARK.COM (Y J Park)
      0
      0
      0
    
    
      yjpark at MYEPARK.COM (YJ Park)
      0
      2
      0
    
    
      yjpark at myepark.com (YJ Park)
      0
      3
      0
    
    
      yjpark21 at GMAIL.COM (YJ Park)
      0
      0
      1
    
    
      yjpark21 at GMAIL.COM (Youn Jung Park)
      0
      0
      2
    
    
      zainsyed45 at gmail.com (Zain Khan, CMC)
      0
      0
      0
    
    
      zakirbinrehman at yahoo.com (Zakir Syed)
      0
      0
      2
    
    
      zalnieriute at gmail.com (zalnieriute .)
      146
      166
      6
    
    
      zbone72 at yahoo.com (Brett Spears)
      0
      0
      2
    
  

1166 rows × 3 columns



In [5]:

    
## Before running this, you need to create the entity_matches.csv
## file from the SummerSchoolConsolidateUserNames notebook

matches = pd.Series.from_csv("entity_matches.csv")



In [6]:

    
whosaidwhat['Name'] = matches



In [7]:

    
whosaidwhat_named = whosaidwhat.groupby('Name').sum()



In [8]:

    
whosaidwhat_named.to_csv("whosaidwhat_named.csv")



In [ ]:

	human	rights	internet
From
AAikman at lrrc.com (Aikman-Scalese, Anne)	0	0	0
AAikman at lrrlaw.com (Aikman-Scalese, Anne)	2	13	39
Alex_Deacon at mpaa.org (Deacon, Alex)	1	4	1
Amadeu at nominalia.com (Amadeu Abril i Abril)	0	3	0
Avri at ACM.ORG (Avri Doria)	0	0	0
BWinterfeldt at mayerbrown.com (Winterfeldt, Brian J.)	0	0	1
Becky.Burr at neustar.biz (Burr, Becky)	66	187	177
Beth.Allegretti at fox.com (Beth Allegretti)	0	0	1
Bradley.Silver at timewarner.com (Silver, Bradley)	1	50	10
Brenbe at CONSUMER.ORG (Brendler, Beau)	0	8	1
Brett.Schaefer at heritage.org (Schaefer, Brett)	39	70	129
Bruce at barelyadequate.info (Bruce Young)	0	0	0
Bruce.Tonkin at melbourneit.com.au (Bruce Tonkin)	62	95	15
CCHIU at ACLU.ORG (Chris Chiu)	4	4	46
CCHIU at aclu.org (Chris Chiu)	0	7	69
CHagstrom at lb.com (Hagstrom, Christopher)	0	0	0
CLackert at reedsmith.com (Lackert, Clark W.)	0	1	0
Camino.MANJON at ec.europa.eu (Camino.MANJON at ec.europa.eu)	130	141	4
Carlos.Souza at FGV.BR (Carlos Affonso Pereira de Souza)	0	0	2
DFares at 21cf.com (Fares, David)	0	0	4
DNSO.Secretariat at dnso.org (DNSO Secretariat)	0	1	0
DSimon at SONOSKY.COM (Donald Simon)	0	1	0
DannyYounger at cs.com (DannyYounger at cs.com)	1	4	1
David.Taylor at hoganlovells.com (Taylor, David)	0	0	4
Dixie at GLOBAL-PARTNERS.CO.UK (Dixie Hawtin)	0	0	1
Donna.Austin at neustar.biz (Austin, Donna)	0	0	0
Elisabeth.Porteneuve at cetp.ipsl.fr (Elisabeth Porteneuve)	0	5	0
Ellen.M.Blackler at disney.com (Blackler, Ellen M.)	8	8	0
FVayra at perkinscoie.com (Vayra, Fabricio (Perkins Coie))	0	3	0
FinPet at erst.dk (Finn Petersen)	0	4	0
...	...	...	...
wendy at seltzer.com (Wendy Seltzer)	0	9	0
wgondwe at CC.AC.MW (Walu G. Gondwe)	0	0	1
william.drake at GRADUATEINSTITUTE.CH (Drake William)	0	0	0
william.drake at GRADUATEINSTITUTE.CH (William Drake)	11	53	18
william.drake at UZH.CH (William Drake)	9	49	59
william.drake at uzh.ch (William Drake)	49	82	185
willie.currie at gmail.com (william currie)	108	145	10
wilson at isoc.ug (Wilson Abigaba)	0	0	0
wisdom.dk at gmail.com (Wisdom Donkor)	2	4	7
wisdom.stoic at gmail.com (Rahul Sharma)	54	57	21
wjdrake at gmail.com (William Drake)	126	146	89
wolf-ulrich.knoben at t-online.de (WUKnoben)	0	1	3
wolf.ludwig at comunica-ch.net (Wolf Ludwig)	0	1	0
wolfgang.kleinwaechter at MEDIENKOMM.UNI-HALLE.DE (=?iso-8859-1?Q?=22Kleinw=E4chter=2C_Wolfgang=22?=)	0	0	0
wolfgang.kleinwaechter at medienkomm.uni-halle.de (=?iso-8859-1?Q?=22Kleinw=E4chter=2C_Wolfgang=22?=)	20	30	41
wpilimon at dfp-interactive.com (=?iso-8859-1?Q?Walter_Pilim=F3n?=)	0	0	0
wsaqaf at gmail.com (Walid AL-SAQAF)	8	7	3
wsis at modirian.net (Modirian)	1	3	0
xavier.calvez at icann.org (Xavier J. Calvez)	0	1	0
yakmutd at yahoo.com (Daniel Yakmut)	0	0	0
yameogoben at yahoo.fr (Bernard YAMEOGO)	1	0	2
yjpark at MYEPARK.COM (Y J Park)	0	0	0
yjpark at MYEPARK.COM (YJ Park)	0	2	0
yjpark at myepark.com (YJ Park)	0	3	0
yjpark21 at GMAIL.COM (YJ Park)	0	0	1
yjpark21 at GMAIL.COM (Youn Jung Park)	0	0	2
zainsyed45 at gmail.com (Zain Khan, CMC)	0	0	0
zakirbinrehman at yahoo.com (Zakir Syed)	0	0	2
zalnieriute at gmail.com (zalnieriute .)	146	166	6
zbone72 at yahoo.com (Brett Spears)	0	0	2