In [ ]:<b>This notebook compute and plot the senders that have talked more about something</b> What it does: -given a word / sentence, it scans emails in one ore more mailing lists and compute the list of user including that word or sentence in their emails -it plots the top folks using that word / sentence Parameters to set options: -insert one or more urls of mailing lists (mls are aggregated) -insert one word or a sentence that you want to check for
In [ ]:%matplotlib inline
In :import bigbang.mailman as mailman from bigbang.archive import load as load_archive from bigbang.parse import get_date from bigbang.archive import Archive import pandas as pd import datetime import matplotlib.pyplot as plt from collections import defaultdict import numpy as np import math import pytz import pickle import os pd.options.display.mpl_style = 'default'
In :#insert one or more urls of the mailing lists you want to include in the analysis #(if more mailing lists are included, the data are aggregated and treated as a single object of analysis) urls = ["6lo", "5gang", "http://mm.icann.org/pipermail/ge/"] try: arch_paths = for url in urls: arch_paths.append('../archives/'+url[:-1].replace('://','_/')+'.csv') archives = [load_archive(arch_path).data for arch_path in arch_paths] except: arch_paths = for url in urls: arch_paths.append('../archives/'+url[:-1].replace('//','/')+'.csv') archives = [load_archive(arch_path).data for arch_path in arch_paths] mails = pd.concat(archives)
In :#insert a word or a sentence that you want to look up for sub_text = 'dio bono'
In :#counting how many people wrote that people_count = defaultdict(int) for mail in mails.iterrows(): text = mail['Body'] if sub_text in text: people_count[mail['From']] += 1
In :#insert how many top-people using that sentence you want to visualize top_people = 5
In :print(str(len(list(people_count.keys())))+' people are talking about "'+sub_text+'"') i = 0 for people, count in sorted(iter(people_count.items()), reverse = True, key = lambda k_v: (k_v,k_v)): print(people+' '+str(count)) i+=1 if i == top_people: break
0 people are talking about "dio bono"
In [ ]: