In [30]:
from __future__ import division
from collections import defaultdict

import pickle
with open('pypi-release.pickle', 'rb') as f:
    release = pickle.load(f)
with open('pypi-releaseurls.pickle', 'rb') as f:
    releaseurls = pickle.load(f)

In [4]:
def human_bytes(n):
    """
    Return the number of bytes n in more human readable form.
    """
    if n < 1024:
        return '%d B' % n
    k = n/1024
    if k < 1024:
        return '%d KB' % round(k)
    m = k/1024
    if m < 1024:
        return '%.1f MB' % m
    g = m/1024
    return '%.2f GB' % g

In [31]:
tot = 0
sizes = defaultdict(list)
for r in releaseurls:
    for version in releaseurls[r]:
        for download in releaseurls[r][version]:
            size = download['size']
            sizes[size].append(r)
            tot += size

In [32]:
tot


Out[32]:
81576275482

In [33]:
human_bytes(tot)


Out[33]:
'75.97 GB'

In [22]:
%matplotlib inline
from matplotlib.pylab import hist

In [25]:
hist(sizes, log=True)


Out[25]:
(array([  2.39611000e+05,   1.78000000e+03,   8.43000000e+02,
          3.05000000e+02,   8.40000000e+01,   5.50000000e+01,
          2.80000000e+01,   2.20000000e+01,   1.00000000e+00,
          4.00000000e+00]),
 array([        0. ,   5370933.2,  10741866.4,  16112799.6,  21483732.8,
         26854666. ,  32225599.2,  37596532.4,  42967465.6,  48338398.8,
         53709332. ]),
 <a list of 10 Patch objects>)

In [37]:
[(human_bytes(i), sizes[i]) for i in sorted(sizes, reverse=True)[:50]]


Out[37]:
[('51.2 MB', ['tai5_uan5_gian5_gi2_kang1_ku7']),
 ('49.1 MB', ['tai5_uan5_gian5_gi2_kang1_ku7']),
 ('49.1 MB', ['tai5_uan5_gian5_gi2_kang1_ku7']),
 ('48.8 MB', ['tai5_uan5_gian5_gi2_kang1_ku7']),
 ('43.1 MB', ['Shinken']),
 ('40.7 MB', ['matplotlib']),
 ('40.3 MB', ['KDVS']),
 ('37.7 MB', ['snownlp']),
 ('37.7 MB', ['snownlp']),
 ('37.7 MB', ['snownlp']),
 ('37.7 MB', ['snownlp']),
 ('37.7 MB', ['snownlp']),
 ('37.7 MB', ['snownlp']),
 ('37.2 MB', ['Scidoc']),
 ('37.2 MB', ['QSTK']),
 ('37.1 MB', ['pycortex']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('36.5 MB', ['b2gpopulate']),
 ('35.7 MB', ['ajenti']),
 ('35.7 MB', ['ajenti']),
 ('35.6 MB', ['ajenti']),
 ('35.6 MB', ['ajenti']),
 ('35.6 MB', ['ajenti']),
 ('35.6 MB', ['ajenti']),
 ('35.6 MB', ['ajenti']),
 ('35.6 MB', ['ajenti']),
 ('35.5 MB', ['ajenti']),
 ('34.7 MB', ['de423']),
 ('34.5 MB', ['BigJob2']),
 ('34.5 MB', ['BigJob2']),
 ('34.5 MB', ['BigJob2']),
 ('34.5 MB', ['BigJob2']),
 ('33.0 MB', ['qiime']),
 ('32.0 MB', ['pyopenms']),
 ('31.9 MB', ['music21']),
 ('31.6 MB', ['pyopenms']),
 ('31.3 MB', ['remix']),
 ('31.3 MB', ['remix']),
 ('31.3 MB', ['remix']),
 ('31.3 MB', ['remix']),
 ('31.3 MB', ['remix'])]

In [ ]: