In [5]:
import sys
print(sys.version)
In [53]:
from googlegroupexporter.cli import arguments, verbosity, export_with_progress
from googlegroupexporter.exporters import CsvExporter, MailExporter
from googlegroupexporter.session import session_factory
from types import SimpleNamespace
In [61]:
data_dir = 'data/'
# you only need the cookie for private groups
with open(data_dir + 'ggcreds.txt') as f:
cookie = f.read().splitlines()[0] # we only need the first line in this case
# print(cookie)
# # 'SID=6QQE1nPtLxNmQwlIoZVO...WIYhFIRcQMWt0CqVl'
with open(data_dir + 'lists.txt') as f:
lists = f.read().splitlines()
# print(lists)
# # ['publiclaboratory', 'plots-spectrometry',...'grassrootsmapping', 'plots-infrared']
# all of these are probably not neccessary due to many of them being defaults but that's an expirement for another day
kwargs = {'verbose' : 1, 'cookies' : cookie, 'workers' : 10,
'cache_dir' : 'webcache', 'cache_days' : 7,
'cache_forever' : False, 'mode' : 'mbox-or-csv', 'group' : 'your-group-here'}
# since I am using a list of groups, I will fill in my own options.group but if you wanted to follow the code exactly
# you would put your group in the space that says 'your-group-here' similarly for 'mode' i'm just going to fill it in
# directly
options = SimpleNamespace(**kwargs)
session = session_factory(
options.cookies, options.workers,
options.cache_dir, options.cache_days, options.cache_forever)
In [57]:
# # MBOX
Exporter = dict(csv=CsvExporter, mbox=MailExporter)['mbox'] # <-- or options.mode
for group in lists:
try:
options.group = group
export_with_progress(Exporter(session), options.group)
except Exception as err:
print('SAD EMOTICON FACE: {0}'.format(err))
# # For as many groups as you have...
# # [04:25] 8389 downloads from 8389 requests
# # 0it [00:00, ?it/s]
# # 22 index pages listing 2152 topics with 6079 messages.
In [63]:
# # CSV
# # As an aside, this takes much less time because it is only scraping the index page
Exporter = dict(csv=CsvExporter, mbox=MailExporter)['csv'] # <-- or options.mode
for group in lists:
try:
print('Retreiving entries for {0}'.format(group)) # <-- or options.group
export_with_progress(Exporter(session), group)
print('')
except Exception as err:
print('SAD EMOTICON FACE: {0}'.format(err))
# # For as many groups as you have...
# # Retreiving entries for publiclaboratory
# # [00:10] 2175 downloads from 2175 requests
# # [00:00] 2189 downloads from 2277 requests
# # 2150 topics listed.
In [ ]: