In [ ]:
%matplotlib inline
In [ ]:
from __future__ import (print_function, unicode_literals, division)
In [ ]:
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
In [ ]:
from itertools import islice
import sys
# package up logic in a package
# from hypothesisapi import API
# include your hypothes.is USERNAME, TOKEN as parameters in a hypothesis_settings.py file in your sys.path
# get TOKEN at https://hypothes.is/profile/developer
from hypothesis_settings import (USERNAME, TOKEN)
from Hypothes_is import Hypothesis, HypothesisAnnotation
h_api = Hypothesis(USERNAME, TOKEN, max_results=sys.maxint)
In [ ]:
import numpy as np
import pandas as pd
from pandas import (DataFrame, Series)
import matplotlib.pyplot as plt
In [ ]:
# reading in all the annotations
annotations = list(h_api.search_all({'sort':'updated', 'order':'desc', 'user':'rdhyee@hypothes.is'}))
In [ ]:
len(annotations)
In [ ]:
annotations[0]
In [ ]:
# collect some of the stats
from itertools import islice
annotations = []
for (i, annot) in enumerate(islice(h_api.search_all({'sort':'updated', 'order':'desc'}), 5000)):
print("\r%d" % i , end="")
annotations.append(annot)
In [ ]:
# write annotations out
import json
with open("annotations.json", "wb") as f:
f.write(json.dumps(annotations))
In [ ]:
# read in annotations
import json
#annotations = json.loads(open("annotations.json").read())
In [ ]:
df = DataFrame(annotations)
In [ ]:
# date distribution
import dateutil.parser
s = df.created.apply(dateutil.parser.parse).apply(lambda d: (d.year, d.month)).value_counts()
s
In [ ]:
(s.sort_index(ascending=True).plot(kind='bar', color='green', # x='year/month', y='# of annotations'
))
In [ ]:
len(df.user.value_counts())
In [ ]:
df.user.value_counts()[:20]
In [ ]:
df.uri.apply(lambda url: urlparse(url)[1]).value_counts()
In [ ]:
# most annotated web pages
df.uri.value_counts()
In [ ]:
# look for pages annotated as part of climatefeedback.org
# http://climatefeedback.org/members/early-participants.html
climatefeedback_members = ['karmour', 'Alexis_b', 'drchavas', 'jgdwyer', 'emanuel', 'ed_hawkins', 'Dkambo',
'aklocker', 'james_kossin', 'jmlauderdale', 'mashayek', 's_perkins',
'andypitman', 'hramsay','kevenroy', 'martysingh','alexis.tantet',
'emvincent','bmv','DonWuebbles','aalpert']
clf_accts = ["acct:{user}@hypothes.is".format(user=user) for user in climatefeedback_members]
In [ ]:
clf_annots = df[df.user.isin(clf_accts)]
clf_annots.uri.value_counts()
In [ ]: