In [337]:
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
%matplotlib inline

In [301]:
from idigbio import pandas, json
j = json()

In [213]:
def sumseries(series):
    c= Counter()
    c['count'] = 0
    for s in series:
        c.update(s)
        c['count'] += 1
    return c

sumseries([{'a':1}, {'a':13}])


Out[213]:
Counter({'a': 14, 'count': 2})

In [297]:
ssmonthly = j.stats(
    'search', date_interval='month',
    min_date='2015-07-01', max_date='2016-04-30')['dates']
sorted(ssmonthly.keys())


Out[297]:
['2015-07-01',
 '2015-08-01',
 '2015-09-01',
 '2015-10-01',
 '2015-11-01',
 '2015-12-01',
 '2016-01-01',
 '2016-02-01',
 '2016-03-01',
 '2016-04-01']

In [306]:
ssweekly = j.stats(
    'search', date_interval='week',
    min_date='2015-06-29', max_date='2016-05-08')['dates']
len(ssweekly.keys()), min(ssweekly.keys()), max(ssweekly.keys())


Out[306]:
(45, '2015-06-29', '2016-05-02')

In [330]:
statsbymonth = {mon: sumseries(data.values())
                for mon, data in ssmonthly.items()}
totals = sumseries(statsbymonth.values())
df = pd.DataFrame.from_dict(statsbymonth, orient='index')
dfmonth = df[['search','seen','download', 'viewed_records', 'viewed_media']]

Stats by month


In [331]:
dfmonth


Out[331]:
search seen download viewed_records viewed_media
2015-07-01 510681153547 56620791 36216970 844316 35380
2015-08-01 150191776985 1554424 164335546 1844569 159294
2015-09-01 656873703499 62352551 48072809 3056466 179929
2015-10-01 471265570875 33473866 22530025 2465100 331703
2015-11-01 140262874790 2550362 2663794 5785702 775581
2015-12-01 819596616564 64832910 895372 1144401 167203
2016-01-01 163402526827 1505558 17664302 2004067 257794
2016-02-01 636392641209 43531078 368652 2097269 297396
2016-03-01 209468898311 18925149 368498 4864818 606178
2016-04-01 231915095898 1629081 1499892 941139 77341

In [315]:
dfmonth.sum()


Out[315]:
count                      6843
search            3990050858505
seen                  286975770
viewed_media            2887799
download              294615860
viewed_records         25047847
dtype: int64

In [332]:
statsbyweek = {week: sumseries(data.values()) 
               for week, data in ssweekly.items()}
df = pd.DataFrame.from_dict(statsbyweek, orient='index')
dfweek = df[['search','seen','download', 'viewed_records', 'viewed_media']]

In [333]:
df = pd.DataFrame.from_dict(statsbyweek, orient='index')
df = df[['search','seen','download', 'viewed_records', 'viewed_media']]

Stats by week


In [334]:
dfweek


Out[334]:
search seen download viewed_records viewed_media
2015-06-29 32550933360 293547 440179 611608 24287
2015-07-06 24084303784 159848 32280649 92625 7191
2015-07-13 10725967254 71377 3496053 59071 2463
2015-07-20 418603045546 55830447 89 206219 5654
2015-07-27 36781604306 369060 34160 260989 12237
2015-08-03 35438259306 232478 30875162 469984 16257
2015-08-10 24695612197 243450 86924346 263234 16381
2015-08-17 42251987774 716324 319700 470232 52914
2015-08-24 41109556390 310939 46182178 524608 67948
2015-08-31 20518074344 198997 1744165 404066 28635
2015-09-07 83994537913 5915760 44824344 667896 39917
2015-09-14 512938145340 55418000 263062 830191 37334
2015-09-21 26318681183 705248 891312 512524 24241
2015-09-28 34124644588 306991 531932 1221789 127097
2015-10-05 30099476949 254305 21370728 736353 79905
2015-10-12 30038836796 332975 441983 602922 82587
2015-10-19 362521995776 32392865 390439 367659 68705
2015-10-26 31482956554 335069 144869 336073 29295
2015-11-02 36196351462 326897 2316002 723324 97067
2015-11-09 34243213266 629658 318332 990043 105060
2015-11-16 34119732561 946349 25636 2201408 215934
2015-11-23 29707730805 598290 3824 1709259 347179
2015-11-30 289117787105 26087565 201813 574335 90405
2015-12-07 108922210380 511714 0 154197 24163
2015-12-14 31203819874 298952 499661 167909 23249
2015-12-21 20741422148 241099 32555 210228 23675
2015-12-28 378654850186 37759916 163425 126457 18685
2016-01-04 34040722961 525942 2638 176569 21972
2016-01-11 39569873411 349046 5041802 388447 48672
2016-01-18 41724589742 318882 115726 844841 116908
2016-01-25 41862459627 266541 12502054 545138 62527
2016-02-01 468231503554 40517932 49411 427334 41435
2016-02-08 40899065103 645600 48902 627723 78871
2016-02-15 38211892137 489821 178331 335293 43473
2016-02-22 86348103942 1608153 92008 519530 82329
2016-02-29 41931746022 884462 187248 1054653 232213
2016-03-07 43732350716 462942 147040 2171279 264550
2016-03-14 40957697590 8050345 1833 1510896 141250
2016-03-21 28453417522 9060653 76 190378 13384
2016-03-28 75258272098 835609 110957 419627 19786
2016-04-04 57743519653 413877 416692 424767 47001
2016-04-11 54332393809 361891 278434 98389 6383
2016-04-18 42067694348 315044 580158 61269 5062
2016-04-25 61863823903 455565 145973 70197 5691
2016-05-02 56733194984 430652 256530 170535 14954

In [319]:
dfweek.sum()


Out[319]:
search            4055148058269
seen                  287481077
download              294872411
viewed_records         25532068
viewed_media            2914926
dtype: int64

plots


In [338]:
dfweek.plot(figsize=(16,4))


Out[338]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fc37e4f59e8>

plotting all of them on one chart wasn't very useful because the search results is so much larger than everything else


In [339]:
fig = dfweek.plot(kind='bar',
              figsize=(20,15),
              subplots=True, 
              sharex=False, 
              fontsize=8,
              rot=70,
             )



In [ ]:


In [ ]:


In [ ]:


In [327]:
import pandas as pd

pd.set_option('display.notebook_repr_html', True)

def _repr_latex_(self):
    return "\centering{%s}" % self.to_latex()

pd.DataFrame._repr_latex_ = _repr_latex_  # monkey patch pandas DataFrame

In [340]:
options(jupyter.plot_mimetypes = "image/svg+xml")


  File "<ipython-input-340-d23a00e9eda5>", line 1
    options(jupyter.plot_mimetypes = "image/svg+xml")
           ^
SyntaxError: keyword can't be an expression

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: