English Wikipedia page views, 2008 - 2017


In [11]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import matplotlib.dates as mdates
import datetime

Step 1: Data Acquisition

The provided sample API request is used to load data from pageviews and pagecount API. The data is collected from both API and save the raw result into 5 json files.

Load data from desktop-site pagecounts API

The first API being collected is desktop monthly pagecount from October 2008 to August 2016.


In [2]:
endpoint1 = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access}/{granularity}/{start}/{end}'

headers={'User-Agent' : 'https://github.com/jingyany', 'From' : 'jingyany@uw.edu'}

params1 = {'project' : 'en.wikipedia.org',
            'access' : 'desktop-site',
            'granularity' : 'monthly',
            'start' : '2008010100',
            'end' : '2016080100'
            }

api_call1 = requests.get(endpoint1.format(**params1))
response1 = api_call1.json()
print(response1)


{'items': [{'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008010100', 'count': 4930902570}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008020100', 'count': 4818393763}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008030100', 'count': 4955405809}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008040100', 'count': 5159162183}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008050100', 'count': 5584691092}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008060100', 'count': 5712104279}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008070100', 'count': 5306302874}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008080100', 'count': 5140155519}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008090100', 'count': 5479533823}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008100100', 'count': 5679440782}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008110100', 'count': 5415832071}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2008120100', 'count': 5211708451}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009010100', 'count': 5802681551}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009020100', 'count': 5547320860}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009030100', 'count': 6295159057}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009040100', 'count': 5988817321}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009050100', 'count': 6267516733}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009060100', 'count': 5818924182}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009070100', 'count': 5801646978}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009080100', 'count': 5790850384}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009090100', 'count': 4057515768}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009100100', 'count': 6016107147}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009110100', 'count': 5768486910}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2009120100', 'count': 5426505977}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010010100', 'count': 5703465285}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010020100', 'count': 5762451418}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010030100', 'count': 6661347946}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010040100', 'count': 6618552152}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010050100', 'count': 6410578775}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010060100', 'count': 4898035014}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010070100', 'count': 5296177638}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010080100', 'count': 7381346660}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010090100', 'count': 7546488744}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010100100', 'count': 10172844562}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010110100', 'count': 6948678354}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2010120100', 'count': 7001952100}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011010100', 'count': 7568511227}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011020100', 'count': 6906248849}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011030100', 'count': 7326545928}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011040100', 'count': 6835492088}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011050100', 'count': 7009799378}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011060100', 'count': 6536675534}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011070100', 'count': 6571730164}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011080100', 'count': 6514500361}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011090100', 'count': 5837738935}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011100100', 'count': 6974424665}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011110100', 'count': 7507641641}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2011120100', 'count': 6396357939}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012010100', 'count': 7808477339}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012020100', 'count': 7530127141}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012030100', 'count': 7358543625}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012040100', 'count': 7220029422}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012050100', 'count': 7784479302}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012060100', 'count': 7524265988}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012070100', 'count': 7819376527}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012080100', 'count': 7885641059}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012090100', 'count': 7913689733}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012100100', 'count': 8379960050}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012110100', 'count': 8219987195}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2012120100', 'count': 8033826900}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013010100', 'count': 9126210673}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013020100', 'count': 8100597857}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013030100', 'count': 8701419671}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013040100', 'count': 8049296729}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013050100', 'count': 8089099883}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013060100', 'count': 7759959725}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013070100', 'count': 8237301982}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013080100', 'count': 9306122925}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013090100', 'count': 9985326806}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013100100', 'count': 10775158269}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013110100', 'count': 10414877805}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2013120100', 'count': 7102459747}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014010100', 'count': 7026531269}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014020100', 'count': 6718243109}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014030100', 'count': 7464128273}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014040100', 'count': 7216245221}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014050100', 'count': 7543277646}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014060100', 'count': 6577352648}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014070100', 'count': 6761059566}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014080100', 'count': 6687313714}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014090100', 'count': 7564600999}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014100100', 'count': 6577533128}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014110100', 'count': 6153537606}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2014120100', 'count': 5830332248}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015010100', 'count': 6103767055}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015020100', 'count': 5602710439}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015030100', 'count': 6346602713}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015040100', 'count': 6198945657}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015050100', 'count': 6323801814}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015060100', 'count': 5165413640}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015070100', 'count': 5229226022}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015080100', 'count': 5035534449}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015090100', 'count': 5409631355}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015100100', 'count': 5535704471}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015110100', 'count': 5296956116}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2015120100', 'count': 5264446173}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016010100', 'count': 5569632502}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016020100', 'count': 5347709361}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016030100', 'count': 5407676056}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016040100', 'count': 5572235399}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016050100', 'count': 5330532334}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016060100', 'count': 4975092447}, {'project': 'en.wikipedia', 'access-site': 'desktop-site', 'granularity': 'monthly', 'timestamp': '2016070100', 'count': 5363966439}]}

In [3]:
json1 = json.dumps(response1)
f = open("pagecounts_desktop-site_200801-201607.json","w")
f.write(json1)
f.close()

Load data from mobile-site pagecounts API

The second API being collected is mobile monthly pagecount from October 2008 to August 2016.


In [4]:
endpoint2 = 'https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/{project}/{access}/{granularity}/{start}/{end}'

headers ={'User-Agent' : 'https://github.com/jingyany', 'From' : 'jingyany@uw.edu'}

params2 = {'project' : 'en.wikipedia.org',
            'access' : 'mobile-site',
            'granularity' : 'monthly',
            'start' : '2008010100',
            'end' : '2016080100'#use the first day of the following month to ensure a full month of data is collected
            }

api_call2 = requests.get(endpoint2.format(**params2))
response2 = api_call2.json()
print(response2)


{'items': [{'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2014100100', 'count': 3091546685}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2014110100', 'count': 3027489668}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2014120100', 'count': 3278950021}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015010100', 'count': 3485302091}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015020100', 'count': 3091534479}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015030100', 'count': 3330832588}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015040100', 'count': 3222089917}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015050100', 'count': 3334069483}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015060100', 'count': 3038162463}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015070100', 'count': 3254472695}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015080100', 'count': 3268487582}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015090100', 'count': 3172429827}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015100100', 'count': 3246082505}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015110100', 'count': 3218234512}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2015120100', 'count': 3387411863}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016010100', 'count': 3739628742}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016020100', 'count': 3333231392}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016030100', 'count': 3419853636}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016040100', 'count': 3301385124}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016050100', 'count': 3418435805}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016060100', 'count': 3372618063}, {'project': 'en.wikipedia', 'access-site': 'mobile-site', 'granularity': 'monthly', 'timestamp': '2016070100', 'count': 3500661121}]}

In [6]:
json2 = json.dumps(response2)
f = open("pagecounts_mobile-site_200801-201607.json","w")
f.write(json2)
f.close()

Load data from desktop pageviews API

The third API being collected is desktop monthly pageviews from July 2015 to October 2017.


In [13]:
endpoint3 = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

headers ={'User-Agent' : 'https://github.com/jingyany', 'From' : 'jingyany@uw.edu'}

params3 = {'project' : 'en.wikipedia.org',
            'access' : 'desktop',
            'agent' : 'user',
            'granularity' : 'monthly',
            'start' : '2015070100',
            'end' : '2017100100'#use the first day of the following month to ensure a full month of data is collected
            }

api_call3 = requests.get(endpoint3.format(**params3))
response3 = api_call3.json()
print(response3)


{'items': [{'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015070100', 'views': 4376666686}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015080100', 'views': 4332482183}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015090100', 'views': 4485491704}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015100100', 'views': 4477532755}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015110100', 'views': 4287720220}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015120100', 'views': 4100012037}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016010100', 'views': 4436179457}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016020100', 'views': 4250997185}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016030100', 'views': 4286590426}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016040100', 'views': 4149383857}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016050100', 'views': 4191778094}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016060100', 'views': 3888839711}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016070100', 'views': 4337865827}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016080100', 'views': 4695046216}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016090100', 'views': 4135006498}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016100100', 'views': 4361737690}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016110100', 'views': 4392068236}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016120100', 'views': 4209608578}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017010100', 'views': 4521980398}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017020100', 'views': 4026702163}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017030100', 'views': 4319971902}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017040100', 'views': 3951456992}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017050100', 'views': 4187870579}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017060100', 'views': 3604550997}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017070100', 'views': 3565444544}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017080100', 'views': 3575572313}, {'project': 'en.wikipedia', 'access': 'desktop', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017090100', 'views': 3547447892}]}

In [14]:
json3 = json.dumps(response3)
f = open("pageviews_desktop_201507-201709.json","w")
f.write(json3)
f.close()

Load data from mobile-web pageviews API

The fourth API being collected is mobile-web monthly pageviews from July 2015 to October 2017.


In [15]:
endpoint4 = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

headers ={'User-Agent' : 'https://github.com/jingyany', 'From' : 'jingyany@uw.edu'}

params4 = {'project' : 'en.wikipedia.org',
            'access' : 'mobile-web',
            'agent' : 'user',
            'granularity' : 'monthly',
            'start' : '2015070100',
            'end' : '2017100100'#use the first day of the following month to ensure a full month of data is collected
            }

api_call4 = requests.get(endpoint4.format(**params4))
response4 = api_call4.json()
print(response4)


{'items': [{'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015070100', 'views': 3179131148}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015080100', 'views': 3192663889}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015090100', 'views': 3073981649}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015100100', 'views': 3173975355}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015110100', 'views': 3142247145}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015120100', 'views': 3276836351}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016010100', 'views': 3611404079}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016020100', 'views': 3242448142}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016030100', 'views': 3288785117}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016040100', 'views': 3177044999}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016050100', 'views': 3296294723}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016060100', 'views': 3257882479}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016070100', 'views': 3395175122}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016080100', 'views': 3418646794}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016090100', 'views': 3310247842}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016100100', 'views': 3442109005}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016110100', 'views': 3507421156}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016120100', 'views': 3647567822}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017010100', 'views': 4020148351}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017020100', 'views': 3522702265}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017030100', 'views': 3719395296}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017040100', 'views': 3524571150}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017050100', 'views': 3567882051}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017060100', 'views': 3404097346}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017070100', 'views': 3600941034}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017080100', 'views': 3502234506}, {'project': 'en.wikipedia', 'access': 'mobile-web', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017090100', 'views': 3416989181}]}

In [16]:
json4 = json.dumps(response4)
f = open("pageviews_mobile-web_201507-201709.json","w")
f.write(json4)
f.close()

Load data from mobile-app pageviews API

The fifth API being collected is mobile-app monthly pageviews from July 2015 to October 2017.


In [17]:
endpoint5 = 'https://wikimedia.org/api/rest_v1/metrics/pageviews/aggregate/{project}/{access}/{agent}/{granularity}/{start}/{end}'

headers ={'User-Agent' : 'https://github.com/jingyany', 'From' : 'jingyany@uw.edu'}

params5 = {'project' : 'en.wikipedia.org',
            'access' : 'mobile-app',
            'agent' : 'user',
            'granularity' : 'monthly',
            'start' : '2015070100',
            'end' : '2017100100'#use the first day of the following month to ensure a full month of data is collected
            }

api_call5 = requests.get(endpoint5.format(**params5))
response5 = api_call5.json()
print(response5)


{'items': [{'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015070100', 'views': 109624146}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015080100', 'views': 109669149}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015090100', 'views': 96221684}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015100100', 'views': 94523777}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015110100', 'views': 94353925}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2015120100', 'views': 99438956}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016010100', 'views': 106432767}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016020100', 'views': 92414130}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016030100', 'views': 97899074}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016040100', 'views': 81719003}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016050100', 'views': 98738513}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016060100', 'views': 96908466}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016070100', 'views': 101398640}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016080100', 'views': 97172509}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016090100', 'views': 83037939}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016100100', 'views': 67174886}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016110100', 'views': 83623769}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2016120100', 'views': 128976033}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017010100', 'views': 211813191}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017020100', 'views': 189059134}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017030100', 'views': 184098693}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017040100', 'views': 115051969}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017050100', 'views': 118805669}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017060100', 'views': 115285847}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017070100', 'views': 124118219}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017080100', 'views': 119171796}, {'project': 'en.wikipedia', 'access': 'mobile-app', 'agent': 'user', 'granularity': 'monthly', 'timestamp': '2017090100', 'views': 114615188}]}

In [18]:
json5 = json.dumps(response5)
f = open("pageviews_mobile-app_201507-201709.json","w")
f.write(json5)
f.close()

Step 2: Data Processing

Merge views of mobile-web and mobile-app

First, load the data from two seperate json files, mobile-web pageviews and mobile-app pageviews. Flatten each json file into pandas dataframe. The total mobile views can be generated by adding views of mobile-web and mobile-app.


In [13]:
pv_mw = pd.read_json('pageviews_mobile-web_201507-201709.json',orient='columns')
pv_mw.head()


Out[13]:
items
0 {'project': 'en.wikipedia', 'access': 'mobile-...
1 {'project': 'en.wikipedia', 'access': 'mobile-...
2 {'project': 'en.wikipedia', 'access': 'mobile-...
3 {'project': 'en.wikipedia', 'access': 'mobile-...
4 {'project': 'en.wikipedia', 'access': 'mobile-...

In [14]:
pv_mw = pd.read_json(pv_mw['items'].to_json()).T
pv_mw.head()


Out[14]:
access agent granularity project timestamp views
0 mobile-web user monthly en.wikipedia 2015070100 3179131148
1 mobile-web user monthly en.wikipedia 2015080100 3192663889
10 mobile-web user monthly en.wikipedia 2016050100 3296294723
11 mobile-web user monthly en.wikipedia 2016060100 3257882479
12 mobile-web user monthly en.wikipedia 2016070100 3395175122

In [15]:
pv_ma = pd.read_json('pageviews_mobile-app_201507-201709.json',orient='columns')
pv_ma.head()


Out[15]:
items
0 {'project': 'en.wikipedia', 'access': 'mobile-...
1 {'project': 'en.wikipedia', 'access': 'mobile-...
2 {'project': 'en.wikipedia', 'access': 'mobile-...
3 {'project': 'en.wikipedia', 'access': 'mobile-...
4 {'project': 'en.wikipedia', 'access': 'mobile-...

In [16]:
pv_ma = pd.read_json(pv_ma['items'].to_json()).T
pv_ma.head()


Out[16]:
access agent granularity project timestamp views
0 mobile-app user monthly en.wikipedia 2015070100 109624146
1 mobile-app user monthly en.wikipedia 2015080100 109669149
10 mobile-app user monthly en.wikipedia 2016050100 98738513
11 mobile-app user monthly en.wikipedia 2016060100 96908466
12 mobile-app user monthly en.wikipedia 2016070100 101398640

In [17]:
pv_mw['views'] = pv_mw['views'] + pv_ma['views']
pv_mw.head()


Out[17]:
access agent granularity project timestamp views
0 mobile-web user monthly en.wikipedia 2015070100 3288755294
1 mobile-web user monthly en.wikipedia 2015080100 3302333038
10 mobile-web user monthly en.wikipedia 2016050100 3395033236
11 mobile-web user monthly en.wikipedia 2016060100 3354790945
12 mobile-web user monthly en.wikipedia 2016070100 3496573762

Combine desktop pageviews and mobile pageviews

Second, load the data from desktop pageviews. Flatten each json file into pandas dataframe. A new dataframe for pageview, "pv", is created with timestamp, desktop pageviews, mobile pageviews and all pageviews.


In [18]:
pv_d = pd.read_json('pageviews_desktop_201507-201709.json',orient='columns')
pv_d = pd.read_json(pv_d['items'].to_json()).T
pv_d.head()


Out[18]:
access agent granularity project timestamp views
0 desktop user monthly en.wikipedia 2015070100 4376666686
1 desktop user monthly en.wikipedia 2015080100 4332482183
10 desktop user monthly en.wikipedia 2016050100 4191778094
11 desktop user monthly en.wikipedia 2016060100 3888839711
12 desktop user monthly en.wikipedia 2016070100 4337865827

In [19]:
pv = pd.DataFrame()
pv['timestamp'] = pv_ma['timestamp']
pv['pageview_all_views'] = pv_mw['views'] + pv_d['views']
pv['pageview_desktop_views'] = pv_d['views']
pv['pageview_mobile_views'] = pv_mw['views']
pv.head()


Out[19]:
timestamp pageview_all_views pageview_desktop_views pageview_mobile_views
0 2015070100 7665421980 4376666686 3288755294
1 2015080100 7634815221 4332482183 3302333038
10 2016050100 7586811330 4191778094 3395033236
11 2016060100 7243630656 3888839711 3354790945
12 2016070100 7834439589 4337865827 3496573762

Combine desktop pagecount and mobile pagecount

Third, the desktop pagecount and mobile pagecount are loaded from json file. Since the time range of desktop pagecount and mobile pagecount are different, I use pandas' merge function to outer join these two dataframes. Before merge pagecount dataframe and pageviews dataframe, all cells with 'Nan' values in pagecount should be filled with 0.


In [20]:
pc_d = pd.read_json('pagecounts_desktop-site_200801-201607.json',orient='columns')
pc_d = pd.read_json(pc_d['items'].to_json()).T
pc_d.head()


Out[20]:
access-site count granularity project timestamp
0 desktop-site 4930902570 monthly en.wikipedia 2008010100
1 desktop-site 4818393763 monthly en.wikipedia 2008020100
10 desktop-site 5415832071 monthly en.wikipedia 2008110100
100 desktop-site 5330532334 monthly en.wikipedia 2016050100
101 desktop-site 4975092447 monthly en.wikipedia 2016060100

In [21]:
pc_m = pd.read_json('pagecounts_mobile-site_200801-201607.json',orient='columns')
pc_m = pd.read_json(pc_m['items'].to_json()).T
pc_m.head()


Out[21]:
access-site count granularity project timestamp
0 mobile-site 3091546685 monthly en.wikipedia 2014100100
1 mobile-site 3027489668 monthly en.wikipedia 2014110100
10 mobile-site 3268487582 monthly en.wikipedia 2015080100
11 mobile-site 3172429827 monthly en.wikipedia 2015090100
12 mobile-site 3246082505 monthly en.wikipedia 2015100100

In [22]:
pc_temp = pd.merge(pc_m, pc_d, how='outer', on='timestamp')
pc_temp.head()


Out[22]:
access-site_x count_x granularity_x project_x timestamp access-site_y count_y granularity_y project_y
0 mobile-site 3091546685 monthly en.wikipedia 2014100100 desktop-site 6577533128 monthly en.wikipedia
1 mobile-site 3027489668 monthly en.wikipedia 2014110100 desktop-site 6153537606 monthly en.wikipedia
2 mobile-site 3268487582 monthly en.wikipedia 2015080100 desktop-site 5035534449 monthly en.wikipedia
3 mobile-site 3172429827 monthly en.wikipedia 2015090100 desktop-site 5409631355 monthly en.wikipedia
4 mobile-site 3246082505 monthly en.wikipedia 2015100100 desktop-site 5535704471 monthly en.wikipedia

In [23]:
pc_temp.fillna(0, inplace=True)
pc_temp.head()


Out[23]:
access-site_x count_x granularity_x project_x timestamp access-site_y count_y granularity_y project_y
0 mobile-site 3091546685 monthly en.wikipedia 2014100100 desktop-site 6577533128 monthly en.wikipedia
1 mobile-site 3027489668 monthly en.wikipedia 2014110100 desktop-site 6153537606 monthly en.wikipedia
2 mobile-site 3268487582 monthly en.wikipedia 2015080100 desktop-site 5035534449 monthly en.wikipedia
3 mobile-site 3172429827 monthly en.wikipedia 2015090100 desktop-site 5409631355 monthly en.wikipedia
4 mobile-site 3246082505 monthly en.wikipedia 2015100100 desktop-site 5535704471 monthly en.wikipedia

In [24]:
pc = pd.DataFrame()
pc['timestamp'] = pc_temp['timestamp']
pc['pagecount_all_views'] = pc_temp['count_y'] + pc_temp['count_x']
pc['pagecount_desktop_views'] = pc_temp['count_y']
pc['pagecount_mobile_views'] = pc_temp['count_x']
pc.head()


Out[24]:
timestamp pagecount_all_views pagecount_desktop_views pagecount_mobile_views
0 2014100100 9669079813 6577533128 3091546685
1 2014110100 9181027274 6153537606 3027489668
2 2015080100 8304022031 5035534449 3268487582
3 2015090100 8582061182 5409631355 3172429827
4 2015100100 8781786976 5535704471 3246082505

In [25]:
pc.head()


Out[25]:
timestamp pagecount_all_views pagecount_desktop_views pagecount_mobile_views
0 2014100100 9669079813 6577533128 3091546685
1 2014110100 9181027274 6153537606 3027489668
2 2015080100 8304022031 5035534449 3268487582
3 2015090100 8582061182 5409631355 3172429827
4 2015100100 8781786976 5535704471 3246082505

Combine pagecount and pageviews

Last, use merge fundtion to combine pagecount and pageview dataframe to generate the final csv file.


In [26]:
all_temp = pd.merge(pc, pv, how='outer', on='timestamp')
all_temp.fillna(0, inplace=True)
all_temp.head()


Out[26]:
timestamp pagecount_all_views pagecount_desktop_views pagecount_mobile_views pageview_all_views pageview_desktop_views pageview_mobile_views
0 2014100100 9.669080e+09 6.577533e+09 3.091547e+09 0 0 0
1 2014110100 9.181027e+09 6.153538e+09 3.027490e+09 0 0 0
2 2015080100 8.304022e+09 5.035534e+09 3.268488e+09 7634815221 4332482183 3302333038
3 2015090100 8.582061e+09 5.409631e+09 3.172430e+09 7655695037 4485491704 3170203333
4 2015100100 8.781787e+09 5.535704e+09 3.246083e+09 7746031887 4477532755 3268499132

In [27]:
all_temp['timestamp'] =  pd.to_datetime(all_temp['timestamp'], format = '%Y%m%d%S')
all_temp['year'] = pd.DatetimeIndex(all_temp['timestamp']).year
all_temp['month'] = pd.DatetimeIndex(all_temp['timestamp']).month
all_temp.head()


Out[27]:
timestamp pagecount_all_views pagecount_desktop_views pagecount_mobile_views pageview_all_views pageview_desktop_views pageview_mobile_views year month
0 2014-10-01 9.669080e+09 6.577533e+09 3.091547e+09 0 0 0 2014 10
1 2014-11-01 9.181027e+09 6.153538e+09 3.027490e+09 0 0 0 2014 11
2 2015-08-01 8.304022e+09 5.035534e+09 3.268488e+09 7634815221 4332482183 3302333038 2015 8
3 2015-09-01 8.582061e+09 5.409631e+09 3.172430e+09 7655695037 4485491704 3170203333 2015 9
4 2015-10-01 8.781787e+09 5.535704e+09 3.246083e+09 7746031887 4477532755 3268499132 2015 10

In [28]:
final = pd.DataFrame()
final['year'] = all_temp['year']
final['month'] =  all_temp['month'].map("{:02}".format)
final['pagecount_all_views'] = all_temp['pagecount_all_views']
final['pagecount_desktop_views'] = all_temp['pagecount_desktop_views']
final['pagecount_mobile_views'] = all_temp['pagecount_mobile_views']
final['pageview_all_views'] = all_temp['pageview_all_views']
final['pageview_desktop_views'] = all_temp['pageview_desktop_views']
final['pageview_mobile_views'] = all_temp['pageview_mobile_views']

In [ ]:
final = final.sort(['year', 'month'], ascending=[True, True])
final.head()

In [30]:
final.to_csv('en-wikipedia_traffic_200801-201709.csv')

Step 3: Analysis

Load data from the csv file generated in step2. Plot six variables, mobile pagecount, desktop pagecount, all pagecount, mobile pageview, desktop pageview and all pageview with different color legends.


In [45]:
df = pd.read_csv('en-wikipedia_traffic_200801-201709.csv')
df = df.replace(0, np.nan)
df['year'] =  df['year'].astype(str)
df['month'] =  df['month'].map("{:02}".format)
df.head()


Out[45]:
Unnamed: 0 year month pagecount_all_views pagecount_desktop_views pagecount_mobile_views pageview_all_views pageview_desktop_views pageview_mobile_views
0 22.0 2008 01 4.930903e+09 4.930903e+09 NaN NaN NaN NaN
1 23.0 2008 02 4.818394e+09 4.818394e+09 NaN NaN NaN NaN
2 34.0 2008 03 4.955406e+09 4.955406e+09 NaN NaN NaN NaN
3 45.0 2008 04 5.159162e+09 5.159162e+09 NaN NaN NaN NaN
4 56.0 2008 05 5.584691e+09 5.584691e+09 NaN NaN NaN NaN

In [111]:
date = pd.to_datetime(df.year + df.month, format = '%Y%m')
years = mdates.YearLocator()   
months = mdates.MonthLocator()  
fmt = mdates.DateFormatter('%Y')

fig, ax = plt.subplots()
ax.plot(date, df['pagecount_mobile_views'], color = 'blue', alpha=0.7)
ax.plot(date, df['pagecount_desktop_views'], color = 'green', alpha=0.7)
ax.plot(date, df['pagecount_all_views'], color = 'purple', alpha=0.7)
ax.plot(date, df['pageview_all_views'], color = 'red', linestyle = '--', alpha=0.7)
ax.plot(date, df['pageview_mobile_views'], color = 'blue', linestyle = '--', alpha=0.7)
ax.plot(date, df['pageview_desktop_views'], color = 'purple', linestyle = '--', alpha=0.7)

# Set the ticks' format
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(fmt)
ax.xaxis.set_minor_locator(months)
datemin = datetime.date(date.min().year, 1, 1)
datemax = datetime.date(date.max().year + 1, 1, 1)
ax.set_xlim(datemin, datemax)
fig.autofmt_xdate()

# Set the plot's format
fig.set_size_inches(16, 11)
plt.legend(loc=4, bbox_to_anchor=(0.25, 0.75), prop={'size': 12})
fig.suptitle('Page Views on English Wikipedia', fontsize=25, x = 0.5, y = 0.92)
ax.xaxis.label.set_size(15)
ax.yaxis.label.set_size(15)
plt.show()



In [112]:
fig.savefig('page-views-english-wikipedia.png')