In [23]:
%matplotlib inline
path = 'data/usagov_bitly_data2012-03-16-1331923249.txt'
In [5]:
open(path).readline()
Out[5]:
'{ "a": "Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/535.11 (KHTML, like Gecko) Chrome\\/17.0.963.78 Safari\\/535.11", "c": "US", "nk": 1, "tz": "America\\/New_York", "gr": "MA", "g": "A6qOVH", "h": "wfLQtf", "l": "orofrog", "al": "en-US,en;q=0.8", "hh": "1.usa.gov", "r": "http:\\/\\/www.facebook.com\\/l\\/7AQEFzjSi\\/1.usa.gov\\/wfLQtf", "u": "http:\\/\\/www.ncbi.nlm.nih.gov\\/pubmed\\/22415991", "t": 1331923247, "hc": 1331822918, "cy": "Danvers", "ll": [ 42.576698, -70.954903 ] }\n'
In [6]:
import json
records = [json.loads(line) for line in open(path)]
In [7]:
records[0]
Out[7]:
{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',
u'al': u'en-US,en;q=0.8',
u'c': u'US',
u'cy': u'Danvers',
u'g': u'A6qOVH',
u'gr': u'MA',
u'h': u'wfLQtf',
u'hc': 1331822918,
u'hh': u'1.usa.gov',
u'l': u'orofrog',
u'll': [42.576698, -70.954903],
u'nk': 1,
u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',
u't': 1331923247,
u'tz': u'America/New_York',
u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}
In [8]:
records[0]['tz']
Out[8]:
u'America/New_York'
In [9]:
time_zones =[rec['tz'] for rec in records if 'tz' in rec]
In [10]:
time_zones[:10]
Out[10]:
[u'America/New_York',
u'America/Denver',
u'America/New_York',
u'America/Sao_Paulo',
u'America/New_York',
u'America/New_York',
u'Europe/Warsaw',
u'',
u'',
u'']
In [11]:
def get_counts(sequence):
counts = {}
for x in sequence:
if x in counts:
counts[x] += 1
else:
counts[x] = 1
return counts
In [13]:
counts = get_counts(time_zones)
counts['America/New_York']
Out[13]:
1251
In [14]:
len(time_zones)
Out[14]:
3440
In [15]:
def top_counts(count_dict, n = 10):
value_key_pairs = [(count, tz) for tz, count in count_dict.items()]
value_key_pairs.sort()
return value_key_pairs[-n:]
In [16]:
top_counts(counts)
Out[16]:
[(33, u'America/Sao_Paulo'),
(35, u'Europe/Madrid'),
(36, u'Pacific/Honolulu'),
(37, u'Asia/Tokyo'),
(74, u'Europe/London'),
(191, u'America/Denver'),
(382, u'America/Los_Angeles'),
(400, u'America/Chicago'),
(521, u''),
(1251, u'America/New_York')]
In [24]:
from pandas import DataFrame, Series
import pandas as pd; import numpy as np
In [26]:
frame = DataFrame(records)
frame
Out[26]:
_heartbeat_
a
al
c
cy
g
gr
h
hc
hh
kw
l
ll
nk
r
t
tz
u
0
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
en-US,en;q=0.8
US
Danvers
A6qOVH
MA
wfLQtf
1331822918
1.usa.gov
NaN
orofrog
[42.576698, -70.954903]
1
http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/...
1331923247
America/New_York
http://www.ncbi.nlm.nih.gov/pubmed/22415991
1
NaN
GoogleMaps/RochesterNY
NaN
US
Provo
mwszkS
UT
mwszkS
1308262393
j.mp
NaN
bitly
[40.218102, -111.613297]
0
http://www.AwareMap.com/
1331923249
America/Denver
http://www.monroecounty.gov/etc/911/rss.php
2
NaN
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...
en-US
US
Washington
xxr3Qb
DC
xxr3Qb
1331919941
1.usa.gov
NaN
bitly
[38.9007, -77.043098]
1
http://t.co/03elZC4Q
1331923250
America/New_York
http://boxer.senate.gov/en/press/releases/0316...
3
NaN
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...
pt-br
BR
Braz
zCaLwp
27
zUtuOu
1331923068
1.usa.gov
NaN
alelex88
[-23.549999, -46.616699]
0
direct
1331923249
America/Sao_Paulo
http://apod.nasa.gov/apod/ap120312.html
4
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
en-US,en;q=0.8
US
Shrewsbury
9b6kNl
MA
9b6kNl
1273672411
bit.ly
NaN
bitly
[42.286499, -71.714699]
0
http://www.shrewsbury-ma.gov/selco/
1331923251
America/New_York
http://www.shrewsbury-ma.gov/egov/gallery/1341...
5
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
en-US,en;q=0.8
US
Shrewsbury
axNK8c
MA
axNK8c
1273672506
bit.ly
NaN
bitly
[42.286499, -71.714699]
0
http://www.shrewsbury-ma.gov/selco/
1331923252
America/New_York
http://www.shrewsbury-ma.gov/egov/gallery/1341...
6
NaN
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1...
pl-PL,pl;q=0.8,en-US;q=0.6,en;q=0.4
PL
Luban
wcndER
77
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[51.116699, 15.2833]
0
http://plus.url.google.com/url?sa=z&n=13319232...
1331923255
Europe/Warsaw
http://www.nasa.gov/mission_pages/nustar/main/...
7
NaN
Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/2...
bg,en-us;q=0.7,en;q=0.3
None
NaN
wcndER
NaN
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
NaN
0
http://www.facebook.com/
1331923255
http://www.nasa.gov/mission_pages/nustar/main/...
8
NaN
Opera/9.80 (X11; Linux zbov; U; en) Presto/2.1...
en-US, en
None
NaN
wcndER
NaN
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
NaN
0
http://www.facebook.com/l.php?u=http%3A%2F%2F1...
1331923254
http://www.nasa.gov/mission_pages/nustar/main/...
9
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4
None
NaN
zCaLwp
NaN
zUtuOu
1331923068
1.usa.gov
NaN
alelex88
NaN
0
http://t.co/o1Pd0WeV
1331923255
http://apod.nasa.gov/apod/ap120312.html
10
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)...
en-us,en;q=0.5
US
Seattle
vNJS4H
WA
u0uD9q
1319563556
1.usa.gov
NaN
o_4us71ccioa
[47.5951, -122.332603]
1
direct
1331923258
America/Los_Angeles
https://www.nysdot.gov/rexdesign/design/commun...
11
NaN
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4...
en-us,en;q=0.5
US
Washington
wG7OIH
DC
A0nRz4
1331815838
1.usa.gov
NaN
darrellissa
[38.937599, -77.092796]
0
http://t.co/ND7SoPyo
1331923259
America/New_York
http://oversight.house.gov/wp-content/uploads/...
12
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)...
en-us,en;q=0.5
US
Alexandria
vNJS4H
VA
u0uD9q
1319563556
1.usa.gov
NaN
o_4us71ccioa
[38.790901, -77.094704]
1
direct
1331923259
America/New_York
https://www.nysdot.gov/rexdesign/design/commun...
13
1331923261
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
14
NaN
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US...
en-us,en;q=0.5
US
Marietta
2rOUYc
GA
2rOUYc
1255769846
1.usa.gov
NaN
bitly
[33.953201, -84.5177]
1
direct
1331923262
America/New_York
http://toxtown.nlm.nih.gov/index.php
15
NaN
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1...
zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4
HK
Central District
nQvgJp
00
rtrrth
1317318030
j.mp
NaN
walkeryuen
[22.2833, 114.150002]
1
http://forum2.hkgolden.com/view.aspx?type=BW&m...
1331923263
Asia/Hong_Kong
http://www.ssd.noaa.gov/PS/TROP/TCFP/data/curr...
16
NaN
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1...
zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4
HK
Central District
XdUNr
00
qWkgbq
1317318039
j.mp
NaN
walkeryuen
[22.2833, 114.150002]
1
http://forum2.hkgolden.com/view.aspx?type=BW&m...
1331923263
Asia/Hong_Kong
http://www.usno.navy.mil/NOOC/nmfc-ph/RSS/jtwc...
17
NaN
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; r...
en-us,en;q=0.5
US
Buckfield
zH1BFf
ME
x3jOIv
1331839576
1.usa.gov
NaN
andyzieminski
[44.299702, -70.369797]
0
http://t.co/6Cx4ROLs
1331923264
America/New_York
http://www.usda.gov/wps/portal/usda/usdahome?c...
18
NaN
GoogleMaps/RochesterNY
NaN
US
Provo
mwszkS
UT
mwszkS
1308262393
1.usa.gov
NaN
bitly
[40.218102, -111.613297]
0
http://www.AwareMap.com/
1331923262
America/Denver
http://www.monroecounty.gov/etc/911/rss.php
19
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
it-IT,it;q=0.8,en-US;q=0.6,en;q=0.4
IT
Venice
wcndER
20
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[45.438599, 12.3267]
0
http://www.facebook.com/
1331923264
Europe/Rome
http://www.nasa.gov/mission_pages/nustar/main/...
20
NaN
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...
es-ES
ES
Alcal
zQ95Hi
51
ytZYWR
1331670549
bitly.com
NaN
jplnews
[37.516701, -5.9833]
0
http://www.facebook.com/
1331923265
Africa/Ceuta
http://voyager.jpl.nasa.gov/imagesvideo/uranus...
21
NaN
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6...
en-us,en;q=0.5
US
Davidsonville
wcndER
MD
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[38.939201, -76.635002]
0
http://www.facebook.com/
1331923267
America/New_York
http://www.nasa.gov/mission_pages/nustar/main/...
22
NaN
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...
en-us
US
Hockessin
y3ZImz
DE
y3ZImz
1331064158
1.usa.gov
NaN
bitly
[39.785, -75.682297]
0
direct
1331923267
America/New_York
http://portal.hud.gov/hudportal/documents/hudd...
23
NaN
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3)...
en-us
US
Lititz
wWiOiD
PA
wWiOiD
1330217829
1.usa.gov
NaN
bitly
[40.174999, -76.3078]
0
http://www.facebook.com/l.php?u=http%3A%2F%2F1...
1331923267
America/New_York
http://www.tricare.mil/mybenefit/ProfileFilter...
24
NaN
Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES...
es-es,es;q=0.8,en-us;q=0.5,en;q=0.3
ES
Bilbao
wcndER
59
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[43.25, -2.9667]
0
http://www.facebook.com/
1331923268
Europe/Madrid
http://www.nasa.gov/mission_pages/nustar/main/...
25
NaN
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1...
en-GB,en;q=0.8,en-US;q=0.6,en-AU;q=0.4
MY
Kuala Lumpur
wcndER
14
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[3.1667, 101.699997]
0
http://www.facebook.com/
1331923269
Asia/Kuala_Lumpur
http://www.nasa.gov/mission_pages/nustar/main/...
26
NaN
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1...
ro-RO,ro;q=0.8,en-US;q=0.6,en;q=0.4
CY
Nicosia
wcndER
04
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[35.166698, 33.366699]
0
http://www.facebook.com/?ref=tn_tnmn
1331923268
Asia/Nicosia
http://www.nasa.gov/mission_pages/nustar/main/...
27
NaN
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...
en-US,en;q=0.8
BR
SPaulo
zCaLwp
27
zUtuOu
1331923068
1.usa.gov
NaN
alelex88
[-23.5333, -46.616699]
0
direct
1331923269
America/Sao_Paulo
http://apod.nasa.gov/apod/ap120312.html
28
NaN
Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X)...
en-us
None
NaN
vNJS4H
NaN
u0uD9q
1319563556
1.usa.gov
NaN
o_4us71ccioa
NaN
0
direct
1331923270
https://www.nysdot.gov/rexdesign/design/commun...
29
NaN
Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X...
en-us
None
NaN
FPX0IM
NaN
FPX0IL
1331922978
1.usa.gov
NaN
twittershare
NaN
1
http://t.co/5xlp0B34
1331923270
http://www.ed.gov/news/media-advisories/us-dep...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
3530
NaN
Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1...
en-US,en;q=0.8
US
San Francisco
xVZg4P
CA
wqUkTo
1331908247
go.nasa.gov
NaN
nasatwitter
[37.7645, -122.429398]
0
http://www.facebook.com/l.php?u=http%3A%2F%2Fg...
1331926815
America/Los_Angeles
http://www.nasa.gov/multimedia/imagegallery/im...
3531
NaN
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6...
en-US
None
NaN
wcndER
NaN
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
NaN
0
direct
1331926816
http://www.nasa.gov/mission_pages/nustar/main/...
3532
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)...
en-us,en;q=0.5
US
Washington
Au3aUS
DC
A9ct6C
1331926420
1.usa.gov
NaN
ncsha
[38.904202, -77.031998]
1
http://www.ncsha.org/
1331926817
America/New_York
http://portal.hud.gov/hudportal/HUD?src=/press...
3533
NaN
Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) A...
en-us
US
Jacksonville
b2UtUJ
FL
ieCdgH
1301393171
go.nasa.gov
NaN
nasatwitter
[30.279301, -81.585098]
1
direct
1331926818
America/New_York
http://apod.nasa.gov/apod/
3534
NaN
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...
en-us
US
Frisco
vNJS4H
TX
u0uD9q
1319563556
1.usa.gov
NaN
o_4us71ccioa
[33.149899, -96.855499]
1
direct
1331926820
America/Chicago
https://www.nysdot.gov/rexdesign/design/commun...
3535
NaN
Mozilla/5.0 (Windows NT 5.1; rv:10.0.2) Gecko/...
en-us
US
Houston
zIgLx8
TX
yrPaLt
1331903484
aash.to
NaN
aashto
[29.775499, -95.415199]
1
direct
1331926823
America/Chicago
http://ntl.bts.gov/lib/44000/44300/44374/FHWA-...
3536
NaN
Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; e...
en-US,en;q=0.5
None
NaN
xIcyim
NaN
yG1TTf
1331728309
go.nasa.gov
NaN
nasatwitter
NaN
0
http://t.co/g1VKE8zS
1331926824
http://www.nasa.gov/mission_pages/hurricanes/a...
3537
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)...
es-es,es;q=0.8,en-us;q=0.5,en;q=0.3
HN
Tegucigalpa
zCaLwp
08
w63FZW
1331546756
1.usa.gov
NaN
bufferapp
[14.1, -87.216698]
0
http://t.co/A8TJyibE
1331926825
America/Tegucigalpa
http://apod.nasa.gov/apod/ap120312.html
3538
NaN
Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma...
en-us
US
Los Angeles
qMac9k
CA
qds1Ge
1310473559
1.usa.gov
NaN
healthypeople
[34.041599, -118.298798]
0
direct
1331926825
America/Los_Angeles
http://healthypeople.gov/2020/connect/webinars...
3539
NaN
Mozilla/5.0 (compatible; Fedora Core 3) FC3 KDE
NaN
US
Bellevue
zu2M5o
WA
zDhdro
1331586192
bit.ly
NaN
glimtwin
[47.615398, -122.210297]
0
direct
1331926827
America/Los_Angeles
http://www.federalreserve.gov/newsevents/press...
3540
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
en-US,en;q=0.8
US
Payson
wcndER
UT
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
[40.014198, -111.738899]
0
http://www.facebook.com/l.php?u=http%3A%2F%2F1...
1331926828
America/Denver
http://www.nasa.gov/mission_pages/nustar/main/...
3541
NaN
Mozilla/5.0 (X11; U; OpenVMS AlphaServer_ES40;...
NaN
US
Bellevue
zu2M5o
WA
zDhdro
1331586192
1.usa.gov
NaN
glimtwin
[47.615398, -122.210297]
0
direct
1331926828
America/Los_Angeles
http://www.federalreserve.gov/newsevents/press...
3542
NaN
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...
en-us
US
Pittsburg
y3reI1
CA
y3reI1
1331926120
1.usa.gov
NaN
bitly
[38.0051, -121.838699]
0
http://www.facebook.com/l.php?u=http%3A%2F%2F1...
1331926829
America/Los_Angeles
http://www.sba.gov/community/blogs/community-b...
3543
1331926831
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
3544
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0.1) ...
en-us,en;q=0.5
US
Wentzville
vNJS4H
MO
u0uD9q
1319563556
1.usa.gov
NaN
o_4us71ccioa
[38.790001, -90.854897]
1
direct
1331926831
America/Chicago
https://www.nysdot.gov/rexdesign/design/commun...
3545
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)...
en-us,en;q=0.5
US
Saint Charles
vNJS4H
IL
u0uD9q
1319563556
1.usa.gov
NaN
o_4us71ccioa
[41.9352, -88.290901]
1
direct
1331926832
America/Chicago
https://www.nysdot.gov/rexdesign/design/commun...
3546
NaN
Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma...
en-us
US
Los Angeles
qMac9k
CA
qds1Ge
1310473559
1.usa.gov
NaN
healthypeople
[34.041599, -118.298798]
1
direct
1331926833
America/Los_Angeles
http://healthypeople.gov/2020/connect/webinars...
3547
NaN
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)...
en-us
US
Silver Spring
y0jYkg
MD
y0jYkg
1331851811
1.usa.gov
NaN
bitly
[39.052101, -77.014999]
1
direct
1331926836
America/New_York
http://www.epa.gov/otaq/regs/fuels/additive/e1...
3548
NaN
Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma...
en-us
US
Mcgehee
y5rMac
AR
xANY6O
1331916302
1.usa.gov
NaN
twitterfeed
[33.628399, -91.356903]
1
https://twitter.com/fdarecalls/status/18069759...
1331926836
America/Chicago
http://www.fda.gov/Safety/Recalls/ucm296326.htm
3549
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
sv-SE,sv;q=0.8,en-US;q=0.6,en;q=0.4
SE
Sollefte
eH8wu
24
7dtjei
1260316355
1.usa.gov
NaN
tweetdeckapi
[63.166698, 17.266701]
1
direct
1331926834
Europe/Stockholm
http://www.nasa.gov/mission_pages/WISE/main/in...
3550
NaN
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...
en-us
US
Conshohocken
A00b72
PA
yGSwzn
1331917632
1.usa.gov
NaN
addthis
[40.0798, -75.2855]
0
http://www.linkedin.com/home?trk=hb_tab_home_top
1331926837
America/New_York
http://www.nlm.nih.gov/medlineplus/news/fullst...
3551
NaN
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...
en-US,en;q=0.8
None
NaN
wcndER
NaN
zkpJBR
1331922854
1.usa.gov
NaN
bnjacobs
NaN
0
http://plus.url.google.com/url?sa=z&n=13319268...
1331926837
http://www.nasa.gov/mission_pages/nustar/main/...
3552
NaN
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US...
NaN
US
Decatur
rqgJuE
AL
xcz8vt
1331227417
1.usa.gov
NaN
bootsnall
[34.572701, -86.940598]
0
direct
1331926839
America/Chicago
http://travel.state.gov/passport/passport_5535...
3553
NaN
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
en-us
US
Shrewsbury
9b6kNl
MA
9b6kNl
1273672411
bit.ly
NaN
bitly
[42.286499, -71.714699]
0
http://www.shrewsbury-ma.gov/selco/
1331926840
America/New_York
http://www.shrewsbury-ma.gov/egov/gallery/1341...
3554
NaN
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ...
en-us
US
Shrewsbury
axNK8c
MA
axNK8c
1273672506
bit.ly
NaN
bitly
[42.286499, -71.714699]
0
http://www.shrewsbury-ma.gov/selco/
1331926840
America/New_York
http://www.shrewsbury-ma.gov/egov/gallery/1341...
3555
NaN
Mozilla/4.0 (compatible; MSIE 9.0; Windows NT ...
en
US
Paramus
e5SvKE
NJ
fqPSr9
1301298479
1.usa.gov
NaN
tweetdeckapi
[40.9445, -74.07]
1
direct
1331926841
America/New_York
http://www.fda.gov/AdvisoryCommittees/Committe...
3556
NaN
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1...
en-US,en;q=0.8
US
Oklahoma City
jQLtP4
OK
jQLtP4
1307530247
1.usa.gov
NaN
bitly
[35.4715, -97.518997]
0
http://www.facebook.com/l.php?u=http%3A%2F%2F1...
1331926844
America/Chicago
http://www.okc.gov/PublicNotificationSystem/Fo...
3557
NaN
GoogleMaps/RochesterNY
NaN
US
Provo
mwszkS
UT
mwszkS
1308262393
j.mp
NaN
bitly
[40.218102, -111.613297]
0
http://www.AwareMap.com/
1331926846
America/Denver
http://www.monroecounty.gov/etc/911/rss.php
3558
NaN
GoogleProducer
NaN
US
Mountain View
zjtI4X
CA
zjtI4X
1327528527
1.usa.gov
NaN
bitly
[37.419201, -122.057404]
0
direct
1331926847
America/Los_Angeles
http://www.ahrq.gov/qual/qitoolkit/
3559
NaN
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ...
en-US
US
Mc Lean
qxKrTK
VA
qxKrTK
1312897670
1.usa.gov
NaN
bitly
[38.935799, -77.162102]
0
http://t.co/OEEEvwjU
1331926849
America/New_York
http://herndon-va.gov/Content/public_safety/Pu...
3560 rows × 18 columns
In [27]:
frame['tz'][:10]
Out[27]:
0 America/New_York
1 America/Denver
2 America/New_York
3 America/Sao_Paulo
4 America/New_York
5 America/New_York
6 Europe/Warsaw
7
8
9
Name: tz, dtype: object
In [28]:
tz_counts = frame['tz'].value_counts()
In [29]:
tz_counts[:10]
Out[29]:
America/New_York 1251
521
America/Chicago 400
America/Los_Angeles 382
America/Denver 191
Europe/London 74
Asia/Tokyo 37
Pacific/Honolulu 36
Europe/Madrid 35
America/Sao_Paulo 33
dtype: int64
In [30]:
clean_tz = frame['tz'].fillna('Missing')
In [31]:
clean_tz[clean_tz == ''] = 'Unkown'
In [32]:
tz_counts = clean_tz.value_counts()
In [33]:
tz_counts[:10]
Out[33]:
America/New_York 1251
Unkown 521
America/Chicago 400
America/Los_Angeles 382
America/Denver 191
Missing 120
Europe/London 74
Asia/Tokyo 37
Pacific/Honolulu 36
Europe/Madrid 35
dtype: int64
In [34]:
tz_counts[:10].plot(kind='barh', rot=0)
Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x11911d350>
In [35]:
frame['a'][1]
Out[35]:
u'GoogleMaps/RochesterNY'
In [36]:
frame['a'][50]
Out[36]:
u'Mozilla/5.0 (Windows NT 5.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2'
In [37]:
frame['a'][51]
Out[37]:
u'Mozilla/5.0 (Linux; U; Android 2.2.2; en-us; LG-P925/V10e Build/FRG83G) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'
In [38]:
results = Series([x.split()[0] for x in frame.a.dropna()])
In [39]:
results[:5]
Out[39]:
0 Mozilla/5.0
1 GoogleMaps/RochesterNY
2 Mozilla/4.0
3 Mozilla/5.0
4 Mozilla/5.0
dtype: object
In [40]:
results.value_counts()[:8]
Out[40]:
Mozilla/5.0 2594
Mozilla/4.0 601
GoogleMaps/RochesterNY 121
Opera/9.80 34
TEST_INTERNET_AGENT 24
GoogleProducer 21
Mozilla/6.0 5
BlackBerry8520/5.0.0.681 4
dtype: int64
In [41]:
cframe= frame[frame.a.notnull()]
In [42]:
operating_system = np.where(cframe['a'].str.contains('Windows'), 'Windows','Not Windows')
In [43]:
operating_system[:5]
Out[43]:
array(['Windows', 'Not Windows', 'Windows', 'Not Windows', 'Windows'],
dtype='|S11')
In [44]:
by_tz_os = cframe.groupby(['tz', operating_system])
In [45]:
agg_counts = by_tz_os.size().unstack().fillna(0)
agg_counts[:10]
Out[45]:
Not Windows
Windows
tz
245
276
Africa/Cairo
0
3
Africa/Casablanca
0
1
Africa/Ceuta
0
2
Africa/Johannesburg
0
1
Africa/Lusaka
0
1
America/Anchorage
4
1
America/Argentina/Buenos_Aires
1
0
America/Argentina/Cordoba
0
1
America/Argentina/Mendoza
0
1
In [46]:
indexer= agg_counts.sum(1).argsort()
In [47]:
indexer[:10]
Out[47]:
tz
24
Africa/Cairo 20
Africa/Casablanca 21
Africa/Ceuta 92
Africa/Johannesburg 87
Africa/Lusaka 53
America/Anchorage 54
America/Argentina/Buenos_Aires 57
America/Argentina/Cordoba 26
America/Argentina/Mendoza 55
dtype: int64
In [48]:
count_subset = agg_counts.take(indexer)[-10:]
In [49]:
count_subset
Out[49]:
Not Windows
Windows
tz
America/Sao_Paulo
13
20
Europe/Madrid
16
19
Pacific/Honolulu
0
36
Asia/Tokyo
2
35
Europe/London
43
31
America/Denver
132
59
America/Los_Angeles
130
252
America/Chicago
115
285
245
276
America/New_York
339
912
In [50]:
count_subset.plot(kind='barh',stacked=True)
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x119196910>
In [51]:
normed_subset = count_subset.div(count_subset.sum(1), axis=0)
normed_subset.plot(kind='barh', stacked= True)
Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x1195af6d0>
In [ ]:
Content source: kkai/perception-aware
Similar notebooks: