In [171]:
import re
from csv import writer
from urllib.parse import unquote
from furl import furl
In [189]:
URL_QUERY_PARAM = re.compile(r'\"(https\:\/\/osf\.io\/share\/?\?[^\"]*)')
In [190]:
with open('log') as f:
matches = list(map(lambda x: furl(x[0]), filter(lambda x: x, [URL_QUERY_PARAM.findall(line) for line in f])))
In [191]:
matches = list(map(dict, {tuple(match.args.items()) for match in matches if len(match.args) > 1}))
In [192]:
keys = ['q', 'optional', 'required', 'sort']
In [198]:
lines = [keys] + list(map(get(match), keys) for match in matches)
In [199]:
with open('logs.csv', 'w') as f:
csv_writer = writer(f)
csv_writer.writerows(lines)
In [197]:
def get(match):
return lambda key: match.get(key, '').strip()
In [ ]: