In [36]:
%matplotlib inline

In [3]:
import json
path = '../data/book/usagov_bitly_data2012-03-16-1331923249.txt'
records = [json.loads(line) for line in open(path)]

In [4]:
records[0]


Out[4]:
{'a': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',
 'al': 'en-US,en;q=0.8',
 'c': 'US',
 'cy': 'Danvers',
 'g': 'A6qOVH',
 'gr': 'MA',
 'h': 'wfLQtf',
 'hc': 1331822918,
 'hh': '1.usa.gov',
 'l': 'orofrog',
 'll': [42.576698, -70.954903],
 'nk': 1,
 'r': 'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',
 't': 1331923247,
 'tz': 'America/New_York',
 'u': 'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}

In [5]:
records[0]['tz']


Out[5]:
'America/New_York'
Most Often-Occuring Time Zones

In [6]:
time_zones = [rec['tz'] for rec in records if 'tz' in rec]

In [8]:
time_zones[:10]


Out[8]:
['America/New_York',
 'America/Denver',
 'America/New_York',
 'America/Sao_Paulo',
 'America/New_York',
 'America/New_York',
 'Europe/Warsaw',
 '',
 '',
 '']

In [9]:
def get_counts(sequence):
    counts = {}
    for x in sequence:
        if x in counts:
            counts[x] += 1
        else:
            counts[x] = 1
    return counts

In [10]:
get_counts(time_zones)


Out[10]:
{'': 521,
 'Africa/Cairo': 3,
 'Africa/Casablanca': 1,
 'Africa/Ceuta': 2,
 'Africa/Johannesburg': 1,
 'Africa/Lusaka': 1,
 'America/Anchorage': 5,
 'America/Argentina/Buenos_Aires': 1,
 'America/Argentina/Cordoba': 1,
 'America/Argentina/Mendoza': 1,
 'America/Bogota': 3,
 'America/Caracas': 1,
 'America/Chicago': 400,
 'America/Chihuahua': 2,
 'America/Costa_Rica': 1,
 'America/Denver': 191,
 'America/Edmonton': 6,
 'America/Guayaquil': 2,
 'America/Halifax': 4,
 'America/Indianapolis': 20,
 'America/La_Paz': 1,
 'America/Lima': 1,
 'America/Los_Angeles': 382,
 'America/Managua': 3,
 'America/Mazatlan': 1,
 'America/Mexico_City': 15,
 'America/Monterrey': 1,
 'America/Montevideo': 1,
 'America/Montreal': 9,
 'America/New_York': 1251,
 'America/Phoenix': 20,
 'America/Puerto_Rico': 10,
 'America/Rainy_River': 25,
 'America/Recife': 2,
 'America/Santo_Domingo': 1,
 'America/Sao_Paulo': 33,
 'America/St_Kitts': 1,
 'America/Tegucigalpa': 1,
 'America/Vancouver': 12,
 'America/Winnipeg': 4,
 'Asia/Amman': 2,
 'Asia/Bangkok': 6,
 'Asia/Beirut': 4,
 'Asia/Calcutta': 9,
 'Asia/Dubai': 4,
 'Asia/Harbin': 3,
 'Asia/Hong_Kong': 10,
 'Asia/Istanbul': 9,
 'Asia/Jakarta': 3,
 'Asia/Jerusalem': 3,
 'Asia/Karachi': 3,
 'Asia/Kuala_Lumpur': 3,
 'Asia/Kuching': 1,
 'Asia/Manila': 1,
 'Asia/Nicosia': 1,
 'Asia/Novosibirsk': 1,
 'Asia/Pontianak': 1,
 'Asia/Riyadh': 1,
 'Asia/Seoul': 5,
 'Asia/Tokyo': 37,
 'Asia/Yekaterinburg': 1,
 'Australia/NSW': 6,
 'Australia/Queensland': 1,
 'Chile/Continental': 6,
 'Europe/Amsterdam': 22,
 'Europe/Athens': 6,
 'Europe/Belgrade': 2,
 'Europe/Berlin': 28,
 'Europe/Bratislava': 3,
 'Europe/Brussels': 4,
 'Europe/Bucharest': 4,
 'Europe/Budapest': 5,
 'Europe/Copenhagen': 5,
 'Europe/Dublin': 3,
 'Europe/Helsinki': 10,
 'Europe/Lisbon': 8,
 'Europe/Ljubljana': 1,
 'Europe/London': 74,
 'Europe/Madrid': 35,
 'Europe/Malta': 2,
 'Europe/Moscow': 10,
 'Europe/Oslo': 10,
 'Europe/Paris': 14,
 'Europe/Prague': 10,
 'Europe/Riga': 2,
 'Europe/Rome': 27,
 'Europe/Skopje': 1,
 'Europe/Sofia': 1,
 'Europe/Stockholm': 14,
 'Europe/Uzhgorod': 1,
 'Europe/Vienna': 6,
 'Europe/Vilnius': 2,
 'Europe/Volgograd': 1,
 'Europe/Warsaw': 16,
 'Europe/Zurich': 4,
 'Pacific/Auckland': 11,
 'Pacific/Honolulu': 36}

In [12]:
from collections import defaultdict
def get_counts2(sequence):
    counts = defaultdict(int)
    for x in sequence:
        counts[x] += 1
    return counts

In [14]:
counts = get_counts2(time_zones)

In [15]:
len(time_zones)


Out[15]:
3440

In [16]:
counts


Out[16]:
defaultdict(int,
            {'': 521,
             'Africa/Cairo': 3,
             'Africa/Casablanca': 1,
             'Africa/Ceuta': 2,
             'Africa/Johannesburg': 1,
             'Africa/Lusaka': 1,
             'America/Anchorage': 5,
             'America/Argentina/Buenos_Aires': 1,
             'America/Argentina/Cordoba': 1,
             'America/Argentina/Mendoza': 1,
             'America/Bogota': 3,
             'America/Caracas': 1,
             'America/Chicago': 400,
             'America/Chihuahua': 2,
             'America/Costa_Rica': 1,
             'America/Denver': 191,
             'America/Edmonton': 6,
             'America/Guayaquil': 2,
             'America/Halifax': 4,
             'America/Indianapolis': 20,
             'America/La_Paz': 1,
             'America/Lima': 1,
             'America/Los_Angeles': 382,
             'America/Managua': 3,
             'America/Mazatlan': 1,
             'America/Mexico_City': 15,
             'America/Monterrey': 1,
             'America/Montevideo': 1,
             'America/Montreal': 9,
             'America/New_York': 1251,
             'America/Phoenix': 20,
             'America/Puerto_Rico': 10,
             'America/Rainy_River': 25,
             'America/Recife': 2,
             'America/Santo_Domingo': 1,
             'America/Sao_Paulo': 33,
             'America/St_Kitts': 1,
             'America/Tegucigalpa': 1,
             'America/Vancouver': 12,
             'America/Winnipeg': 4,
             'Asia/Amman': 2,
             'Asia/Bangkok': 6,
             'Asia/Beirut': 4,
             'Asia/Calcutta': 9,
             'Asia/Dubai': 4,
             'Asia/Harbin': 3,
             'Asia/Hong_Kong': 10,
             'Asia/Istanbul': 9,
             'Asia/Jakarta': 3,
             'Asia/Jerusalem': 3,
             'Asia/Karachi': 3,
             'Asia/Kuala_Lumpur': 3,
             'Asia/Kuching': 1,
             'Asia/Manila': 1,
             'Asia/Nicosia': 1,
             'Asia/Novosibirsk': 1,
             'Asia/Pontianak': 1,
             'Asia/Riyadh': 1,
             'Asia/Seoul': 5,
             'Asia/Tokyo': 37,
             'Asia/Yekaterinburg': 1,
             'Australia/NSW': 6,
             'Australia/Queensland': 1,
             'Chile/Continental': 6,
             'Europe/Amsterdam': 22,
             'Europe/Athens': 6,
             'Europe/Belgrade': 2,
             'Europe/Berlin': 28,
             'Europe/Bratislava': 3,
             'Europe/Brussels': 4,
             'Europe/Bucharest': 4,
             'Europe/Budapest': 5,
             'Europe/Copenhagen': 5,
             'Europe/Dublin': 3,
             'Europe/Helsinki': 10,
             'Europe/Lisbon': 8,
             'Europe/Ljubljana': 1,
             'Europe/London': 74,
             'Europe/Madrid': 35,
             'Europe/Malta': 2,
             'Europe/Moscow': 10,
             'Europe/Oslo': 10,
             'Europe/Paris': 14,
             'Europe/Prague': 10,
             'Europe/Riga': 2,
             'Europe/Rome': 27,
             'Europe/Skopje': 1,
             'Europe/Sofia': 1,
             'Europe/Stockholm': 14,
             'Europe/Uzhgorod': 1,
             'Europe/Vienna': 6,
             'Europe/Vilnius': 2,
             'Europe/Volgograd': 1,
             'Europe/Warsaw': 16,
             'Europe/Zurich': 4,
             'Pacific/Auckland': 11,
             'Pacific/Honolulu': 36})

In [17]:
counts['America/Chicago']


Out[17]:
400

Most Often-Occuring Time Zones with Python standard library collections


In [18]:
from collections import Counter
counts = Counter(time_zones)

In [19]:
counts


Out[19]:
Counter({'': 521,
         'Africa/Cairo': 3,
         'Africa/Casablanca': 1,
         'Africa/Ceuta': 2,
         'Africa/Johannesburg': 1,
         'Africa/Lusaka': 1,
         'America/Anchorage': 5,
         'America/Argentina/Buenos_Aires': 1,
         'America/Argentina/Cordoba': 1,
         'America/Argentina/Mendoza': 1,
         'America/Bogota': 3,
         'America/Caracas': 1,
         'America/Chicago': 400,
         'America/Chihuahua': 2,
         'America/Costa_Rica': 1,
         'America/Denver': 191,
         'America/Edmonton': 6,
         'America/Guayaquil': 2,
         'America/Halifax': 4,
         'America/Indianapolis': 20,
         'America/La_Paz': 1,
         'America/Lima': 1,
         'America/Los_Angeles': 382,
         'America/Managua': 3,
         'America/Mazatlan': 1,
         'America/Mexico_City': 15,
         'America/Monterrey': 1,
         'America/Montevideo': 1,
         'America/Montreal': 9,
         'America/New_York': 1251,
         'America/Phoenix': 20,
         'America/Puerto_Rico': 10,
         'America/Rainy_River': 25,
         'America/Recife': 2,
         'America/Santo_Domingo': 1,
         'America/Sao_Paulo': 33,
         'America/St_Kitts': 1,
         'America/Tegucigalpa': 1,
         'America/Vancouver': 12,
         'America/Winnipeg': 4,
         'Asia/Amman': 2,
         'Asia/Bangkok': 6,
         'Asia/Beirut': 4,
         'Asia/Calcutta': 9,
         'Asia/Dubai': 4,
         'Asia/Harbin': 3,
         'Asia/Hong_Kong': 10,
         'Asia/Istanbul': 9,
         'Asia/Jakarta': 3,
         'Asia/Jerusalem': 3,
         'Asia/Karachi': 3,
         'Asia/Kuala_Lumpur': 3,
         'Asia/Kuching': 1,
         'Asia/Manila': 1,
         'Asia/Nicosia': 1,
         'Asia/Novosibirsk': 1,
         'Asia/Pontianak': 1,
         'Asia/Riyadh': 1,
         'Asia/Seoul': 5,
         'Asia/Tokyo': 37,
         'Asia/Yekaterinburg': 1,
         'Australia/NSW': 6,
         'Australia/Queensland': 1,
         'Chile/Continental': 6,
         'Europe/Amsterdam': 22,
         'Europe/Athens': 6,
         'Europe/Belgrade': 2,
         'Europe/Berlin': 28,
         'Europe/Bratislava': 3,
         'Europe/Brussels': 4,
         'Europe/Bucharest': 4,
         'Europe/Budapest': 5,
         'Europe/Copenhagen': 5,
         'Europe/Dublin': 3,
         'Europe/Helsinki': 10,
         'Europe/Lisbon': 8,
         'Europe/Ljubljana': 1,
         'Europe/London': 74,
         'Europe/Madrid': 35,
         'Europe/Malta': 2,
         'Europe/Moscow': 10,
         'Europe/Oslo': 10,
         'Europe/Paris': 14,
         'Europe/Prague': 10,
         'Europe/Riga': 2,
         'Europe/Rome': 27,
         'Europe/Skopje': 1,
         'Europe/Sofia': 1,
         'Europe/Stockholm': 14,
         'Europe/Uzhgorod': 1,
         'Europe/Vienna': 6,
         'Europe/Vilnius': 2,
         'Europe/Volgograd': 1,
         'Europe/Warsaw': 16,
         'Europe/Zurich': 4,
         'Pacific/Auckland': 11,
         'Pacific/Honolulu': 36})

In [20]:
counts.most_common(10)


Out[20]:
[('America/New_York', 1251),
 ('', 521),
 ('America/Chicago', 400),
 ('America/Los_Angeles', 382),
 ('America/Denver', 191),
 ('Europe/London', 74),
 ('Asia/Tokyo', 37),
 ('Pacific/Honolulu', 36),
 ('Europe/Madrid', 35),
 ('America/Sao_Paulo', 33)]

Most Often-Occuring Time Zones with panda library


In [24]:
from pandas import DataFrame, Series
import pandas as pd

In [25]:
frame = DataFrame(records)

In [26]:
frame


Out[26]:
_heartbeat_ a al c cy g gr h hc hh kw l ll nk r t tz u
0 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Danvers A6qOVH MA wfLQtf 1331822918 1.usa.gov NaN orofrog [42.576698, -70.954903] 1 http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/... 1331923247 America/New_York http://www.ncbi.nlm.nih.gov/pubmed/22415991
1 NaN GoogleMaps/RochesterNY NaN US Provo mwszkS UT mwszkS 1308262393 j.mp NaN bitly [40.218102, -111.613297] 0 http://www.AwareMap.com/ 1331923249 America/Denver http://www.monroecounty.gov/etc/911/rss.php
2 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-US US Washington xxr3Qb DC xxr3Qb 1331919941 1.usa.gov NaN bitly [38.9007, -77.043098] 1 http://t.co/03elZC4Q 1331923250 America/New_York http://boxer.senate.gov/en/press/releases/0316...
3 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... pt-br BR Braz zCaLwp 27 zUtuOu 1331923068 1.usa.gov NaN alelex88 [-23.549999, -46.616699] 0 direct 1331923249 America/Sao_Paulo http://apod.nasa.gov/apod/ap120312.html
4 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Shrewsbury 9b6kNl MA 9b6kNl 1273672411 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331923251 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
5 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Shrewsbury axNK8c MA axNK8c 1273672506 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331923252 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
6 NaN Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1... pl-PL,pl;q=0.8,en-US;q=0.6,en;q=0.4 PL Luban wcndER 77 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [51.116699, 15.2833] 0 http://plus.url.google.com/url?sa=z&n=13319232... 1331923255 Europe/Warsaw http://www.nasa.gov/mission_pages/nustar/main/...
7 NaN Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/2... bg,en-us;q=0.7,en;q=0.3 None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 http://www.facebook.com/ 1331923255 http://www.nasa.gov/mission_pages/nustar/main/...
8 NaN Opera/9.80 (X11; Linux zbov; U; en) Presto/2.1... en-US, en None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331923254 http://www.nasa.gov/mission_pages/nustar/main/...
9 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... pt-BR,pt;q=0.8,en-US;q=0.6,en;q=0.4 None NaN zCaLwp NaN zUtuOu 1331923068 1.usa.gov NaN alelex88 NaN 0 http://t.co/o1Pd0WeV 1331923255 http://apod.nasa.gov/apod/ap120312.html
10 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Seattle vNJS4H WA u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [47.5951, -122.332603] 1 direct 1331923258 America/Los_Angeles https://www.nysdot.gov/rexdesign/design/commun...
11 NaN Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4... en-us,en;q=0.5 US Washington wG7OIH DC A0nRz4 1331815838 1.usa.gov NaN darrellissa [38.937599, -77.092796] 0 http://t.co/ND7SoPyo 1331923259 America/New_York http://oversight.house.gov/wp-content/uploads/...
12 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Alexandria vNJS4H VA u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [38.790901, -77.094704] 1 direct 1331923259 America/New_York https://www.nysdot.gov/rexdesign/design/commun...
13 1331923261 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
14 NaN Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US... en-us,en;q=0.5 US Marietta 2rOUYc GA 2rOUYc 1255769846 1.usa.gov NaN bitly [33.953201, -84.5177] 1 direct 1331923262 America/New_York http://toxtown.nlm.nih.gov/index.php
15 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4 HK Central District nQvgJp 00 rtrrth 1317318030 j.mp NaN walkeryuen [22.2833, 114.150002] 1 http://forum2.hkgolden.com/view.aspx?type=BW&m... 1331923263 Asia/Hong_Kong http://www.ssd.noaa.gov/PS/TROP/TCFP/data/curr...
16 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4 HK Central District XdUNr 00 qWkgbq 1317318039 j.mp NaN walkeryuen [22.2833, 114.150002] 1 http://forum2.hkgolden.com/view.aspx?type=BW&m... 1331923263 Asia/Hong_Kong http://www.usno.navy.mil/NOOC/nmfc-ph/RSS/jtwc...
17 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; r... en-us,en;q=0.5 US Buckfield zH1BFf ME x3jOIv 1331839576 1.usa.gov NaN andyzieminski [44.299702, -70.369797] 0 http://t.co/6Cx4ROLs 1331923264 America/New_York http://www.usda.gov/wps/portal/usda/usdahome?c...
18 NaN GoogleMaps/RochesterNY NaN US Provo mwszkS UT mwszkS 1308262393 1.usa.gov NaN bitly [40.218102, -111.613297] 0 http://www.AwareMap.com/ 1331923262 America/Denver http://www.monroecounty.gov/etc/911/rss.php
19 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... it-IT,it;q=0.8,en-US;q=0.6,en;q=0.4 IT Venice wcndER 20 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [45.438599, 12.3267] 0 http://www.facebook.com/ 1331923264 Europe/Rome http://www.nasa.gov/mission_pages/nustar/main/...
20 NaN Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... es-ES ES Alcal zQ95Hi 51 ytZYWR 1331670549 bitly.com NaN jplnews [37.516701, -5.9833] 0 http://www.facebook.com/ 1331923265 Africa/Ceuta http://voyager.jpl.nasa.gov/imagesvideo/uranus...
21 NaN Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6... en-us,en;q=0.5 US Davidsonville wcndER MD zkpJBR 1331922854 1.usa.gov NaN bnjacobs [38.939201, -76.635002] 0 http://www.facebook.com/ 1331923267 America/New_York http://www.nasa.gov/mission_pages/nustar/main/...
22 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-us US Hockessin y3ZImz DE y3ZImz 1331064158 1.usa.gov NaN bitly [39.785, -75.682297] 0 direct 1331923267 America/New_York http://portal.hud.gov/hudportal/documents/hudd...
23 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3)... en-us US Lititz wWiOiD PA wWiOiD 1330217829 1.usa.gov NaN bitly [40.174999, -76.3078] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331923267 America/New_York http://www.tricare.mil/mybenefit/ProfileFilter...
24 NaN Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES... es-es,es;q=0.8,en-us;q=0.5,en;q=0.3 ES Bilbao wcndER 59 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [43.25, -2.9667] 0 http://www.facebook.com/ 1331923268 Europe/Madrid http://www.nasa.gov/mission_pages/nustar/main/...
25 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... en-GB,en;q=0.8,en-US;q=0.6,en-AU;q=0.4 MY Kuala Lumpur wcndER 14 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [3.1667, 101.699997] 0 http://www.facebook.com/ 1331923269 Asia/Kuala_Lumpur http://www.nasa.gov/mission_pages/nustar/main/...
26 NaN Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1... ro-RO,ro;q=0.8,en-US;q=0.6,en;q=0.4 CY Nicosia wcndER 04 zkpJBR 1331922854 1.usa.gov NaN bnjacobs [35.166698, 33.366699] 0 http://www.facebook.com/?ref=tn_tnmn 1331923268 Asia/Nicosia http://www.nasa.gov/mission_pages/nustar/main/...
27 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... en-US,en;q=0.8 BR SPaulo zCaLwp 27 zUtuOu 1331923068 1.usa.gov NaN alelex88 [-23.5333, -46.616699] 0 direct 1331923269 America/Sao_Paulo http://apod.nasa.gov/apod/ap120312.html
28 NaN Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X)... en-us None NaN vNJS4H NaN u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa NaN 0 direct 1331923270 https://www.nysdot.gov/rexdesign/design/commun...
29 NaN Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X... en-us None NaN FPX0IM NaN FPX0IL 1331922978 1.usa.gov NaN twittershare NaN 1 http://t.co/5xlp0B34 1331923270 http://www.ed.gov/news/media-advisories/us-dep...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3530 NaN Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1... en-US,en;q=0.8 US San Francisco xVZg4P CA wqUkTo 1331908247 go.nasa.gov NaN nasatwitter [37.7645, -122.429398] 0 http://www.facebook.com/l.php?u=http%3A%2F%2Fg... 1331926815 America/Los_Angeles http://www.nasa.gov/multimedia/imagegallery/im...
3531 NaN Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6... en-US None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 direct 1331926816 http://www.nasa.gov/mission_pages/nustar/main/...
3532 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Washington Au3aUS DC A9ct6C 1331926420 1.usa.gov NaN ncsha [38.904202, -77.031998] 1 http://www.ncsha.org/ 1331926817 America/New_York http://portal.hud.gov/hudportal/HUD?src=/press...
3533 NaN Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) A... en-us US Jacksonville b2UtUJ FL ieCdgH 1301393171 go.nasa.gov NaN nasatwitter [30.279301, -81.585098] 1 direct 1331926818 America/New_York http://apod.nasa.gov/apod/
3534 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... en-us US Frisco vNJS4H TX u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [33.149899, -96.855499] 1 direct 1331926820 America/Chicago https://www.nysdot.gov/rexdesign/design/commun...
3535 NaN Mozilla/5.0 (Windows NT 5.1; rv:10.0.2) Gecko/... en-us US Houston zIgLx8 TX yrPaLt 1331903484 aash.to NaN aashto [29.775499, -95.415199] 1 direct 1331926823 America/Chicago http://ntl.bts.gov/lib/44000/44300/44374/FHWA-...
3536 NaN Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; e... en-US,en;q=0.5 None NaN xIcyim NaN yG1TTf 1331728309 go.nasa.gov NaN nasatwitter NaN 0 http://t.co/g1VKE8zS 1331926824 http://www.nasa.gov/mission_pages/hurricanes/a...
3537 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... es-es,es;q=0.8,en-us;q=0.5,en;q=0.3 HN Tegucigalpa zCaLwp 08 w63FZW 1331546756 1.usa.gov NaN bufferapp [14.1, -87.216698] 0 http://t.co/A8TJyibE 1331926825 America/Tegucigalpa http://apod.nasa.gov/apod/ap120312.html
3538 NaN Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma... en-us US Los Angeles qMac9k CA qds1Ge 1310473559 1.usa.gov NaN healthypeople [34.041599, -118.298798] 0 direct 1331926825 America/Los_Angeles http://healthypeople.gov/2020/connect/webinars...
3539 NaN Mozilla/5.0 (compatible; Fedora Core 3) FC3 KDE NaN US Bellevue zu2M5o WA zDhdro 1331586192 bit.ly NaN glimtwin [47.615398, -122.210297] 0 direct 1331926827 America/Los_Angeles http://www.federalreserve.gov/newsevents/press...
3540 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 US Payson wcndER UT zkpJBR 1331922854 1.usa.gov NaN bnjacobs [40.014198, -111.738899] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331926828 America/Denver http://www.nasa.gov/mission_pages/nustar/main/...
3541 NaN Mozilla/5.0 (X11; U; OpenVMS AlphaServer_ES40;... NaN US Bellevue zu2M5o WA zDhdro 1331586192 1.usa.gov NaN glimtwin [47.615398, -122.210297] 0 direct 1331926828 America/Los_Angeles http://www.federalreserve.gov/newsevents/press...
3542 NaN Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ... en-us US Pittsburg y3reI1 CA y3reI1 1331926120 1.usa.gov NaN bitly [38.0051, -121.838699] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331926829 America/Los_Angeles http://www.sba.gov/community/blogs/community-b...
3543 1331926831 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3544 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0.1) ... en-us,en;q=0.5 US Wentzville vNJS4H MO u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [38.790001, -90.854897] 1 direct 1331926831 America/Chicago https://www.nysdot.gov/rexdesign/design/commun...
3545 NaN Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2)... en-us,en;q=0.5 US Saint Charles vNJS4H IL u0uD9q 1319563556 1.usa.gov NaN o_4us71ccioa [41.9352, -88.290901] 1 direct 1331926832 America/Chicago https://www.nysdot.gov/rexdesign/design/commun...
3546 NaN Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma... en-us US Los Angeles qMac9k CA qds1Ge 1310473559 1.usa.gov NaN healthypeople [34.041599, -118.298798] 1 direct 1331926833 America/Los_Angeles http://healthypeople.gov/2020/connect/webinars...
3547 NaN Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)... en-us US Silver Spring y0jYkg MD y0jYkg 1331851811 1.usa.gov NaN bitly [39.052101, -77.014999] 1 direct 1331926836 America/New_York http://www.epa.gov/otaq/regs/fuels/additive/e1...
3548 NaN Mozilla/5.0 (iPhone; CPU iPhone OS 5_1 like Ma... en-us US Mcgehee y5rMac AR xANY6O 1331916302 1.usa.gov NaN twitterfeed [33.628399, -91.356903] 1 https://twitter.com/fdarecalls/status/18069759... 1331926836 America/Chicago http://www.fda.gov/Safety/Recalls/ucm296326.htm
3549 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... sv-SE,sv;q=0.8,en-US;q=0.6,en;q=0.4 SE Sollefte eH8wu 24 7dtjei 1260316355 1.usa.gov NaN tweetdeckapi [63.166698, 17.266701] 1 direct 1331926834 Europe/Stockholm http://www.nasa.gov/mission_pages/WISE/main/in...
3550 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-us US Conshohocken A00b72 PA yGSwzn 1331917632 1.usa.gov NaN addthis [40.0798, -75.2855] 0 http://www.linkedin.com/home?trk=hb_tab_home_top 1331926837 America/New_York http://www.nlm.nih.gov/medlineplus/news/fullst...
3551 NaN Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi... en-US,en;q=0.8 None NaN wcndER NaN zkpJBR 1331922854 1.usa.gov NaN bnjacobs NaN 0 http://plus.url.google.com/url?sa=z&n=13319268... 1331926837 http://www.nasa.gov/mission_pages/nustar/main/...
3552 NaN Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US... NaN US Decatur rqgJuE AL xcz8vt 1331227417 1.usa.gov NaN bootsnall [34.572701, -86.940598] 0 direct 1331926839 America/Chicago http://travel.state.gov/passport/passport_5535...
3553 NaN Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... en-us US Shrewsbury 9b6kNl MA 9b6kNl 1273672411 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331926840 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
3554 NaN Mozilla/4.0 (compatible; MSIE 7.0; Windows NT ... en-us US Shrewsbury axNK8c MA axNK8c 1273672506 bit.ly NaN bitly [42.286499, -71.714699] 0 http://www.shrewsbury-ma.gov/selco/ 1331926840 America/New_York http://www.shrewsbury-ma.gov/egov/gallery/1341...
3555 NaN Mozilla/4.0 (compatible; MSIE 9.0; Windows NT ... en US Paramus e5SvKE NJ fqPSr9 1301298479 1.usa.gov NaN tweetdeckapi [40.9445, -74.07] 1 direct 1331926841 America/New_York http://www.fda.gov/AdvisoryCommittees/Committe...
3556 NaN Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1... en-US,en;q=0.8 US Oklahoma City jQLtP4 OK jQLtP4 1307530247 1.usa.gov NaN bitly [35.4715, -97.518997] 0 http://www.facebook.com/l.php?u=http%3A%2F%2F1... 1331926844 America/Chicago http://www.okc.gov/PublicNotificationSystem/Fo...
3557 NaN GoogleMaps/RochesterNY NaN US Provo mwszkS UT mwszkS 1308262393 j.mp NaN bitly [40.218102, -111.613297] 0 http://www.AwareMap.com/ 1331926846 America/Denver http://www.monroecounty.gov/etc/911/rss.php
3558 NaN GoogleProducer NaN US Mountain View zjtI4X CA zjtI4X 1327528527 1.usa.gov NaN bitly [37.419201, -122.057404] 0 direct 1331926847 America/Los_Angeles http://www.ahrq.gov/qual/qitoolkit/
3559 NaN Mozilla/4.0 (compatible; MSIE 8.0; Windows NT ... en-US US Mc Lean qxKrTK VA qxKrTK 1312897670 1.usa.gov NaN bitly [38.935799, -77.162102] 0 http://t.co/OEEEvwjU 1331926849 America/New_York http://herndon-va.gov/Content/public_safety/Pu...

3560 rows × 18 columns


In [27]:
frame['tz']


Out[27]:
0          America/New_York
1            America/Denver
2          America/New_York
3         America/Sao_Paulo
4          America/New_York
5          America/New_York
6             Europe/Warsaw
7                          
8                          
9                          
10      America/Los_Angeles
11         America/New_York
12         America/New_York
13                      NaN
14         America/New_York
15           Asia/Hong_Kong
16           Asia/Hong_Kong
17         America/New_York
18           America/Denver
19              Europe/Rome
20             Africa/Ceuta
21         America/New_York
22         America/New_York
23         America/New_York
24            Europe/Madrid
25        Asia/Kuala_Lumpur
26             Asia/Nicosia
27        America/Sao_Paulo
28                         
29                         
               ...         
3530    America/Los_Angeles
3531                       
3532       America/New_York
3533       America/New_York
3534        America/Chicago
3535        America/Chicago
3536                       
3537    America/Tegucigalpa
3538    America/Los_Angeles
3539    America/Los_Angeles
3540         America/Denver
3541    America/Los_Angeles
3542    America/Los_Angeles
3543                    NaN
3544        America/Chicago
3545        America/Chicago
3546    America/Los_Angeles
3547       America/New_York
3548        America/Chicago
3549       Europe/Stockholm
3550       America/New_York
3551                       
3552        America/Chicago
3553       America/New_York
3554       America/New_York
3555       America/New_York
3556        America/Chicago
3557         America/Denver
3558    America/Los_Angeles
3559       America/New_York
Name: tz, dtype: object

In [28]:
frame['tz'][:10]


Out[28]:
0     America/New_York
1       America/Denver
2     America/New_York
3    America/Sao_Paulo
4     America/New_York
5     America/New_York
6        Europe/Warsaw
7                     
8                     
9                     
Name: tz, dtype: object

In [29]:
tz_counts = frame['tz'].value_counts()

In [30]:
tz_counts


Out[30]:
America/New_York                  1251
                                   521
America/Chicago                    400
America/Los_Angeles                382
America/Denver                     191
Europe/London                       74
Asia/Tokyo                          37
Pacific/Honolulu                    36
Europe/Madrid                       35
America/Sao_Paulo                   33
Europe/Berlin                       28
Europe/Rome                         27
America/Rainy_River                 25
Europe/Amsterdam                    22
America/Phoenix                     20
America/Indianapolis                20
Europe/Warsaw                       16
America/Mexico_City                 15
Europe/Stockholm                    14
Europe/Paris                        14
America/Vancouver                   12
Pacific/Auckland                    11
Asia/Hong_Kong                      10
Europe/Oslo                         10
Europe/Moscow                       10
Europe/Helsinki                     10
Europe/Prague                       10
America/Puerto_Rico                 10
Asia/Calcutta                        9
Asia/Istanbul                        9
                                  ... 
Europe/Belgrade                      2
America/Lima                         1
Europe/Volgograd                     1
America/St_Kitts                     1
America/Argentina/Mendoza            1
Africa/Lusaka                        1
Asia/Manila                          1
Africa/Casablanca                    1
Australia/Queensland                 1
America/Argentina/Cordoba            1
Asia/Nicosia                         1
Europe/Skopje                        1
America/Tegucigalpa                  1
Africa/Johannesburg                  1
America/Monterrey                    1
Asia/Pontianak                       1
America/Costa_Rica                   1
Asia/Riyadh                          1
America/Mazatlan                     1
Asia/Novosibirsk                     1
America/Montevideo                   1
America/Santo_Domingo                1
America/Caracas                      1
America/La_Paz                       1
Europe/Ljubljana                     1
Europe/Uzhgorod                      1
Europe/Sofia                         1
Asia/Kuching                         1
America/Argentina/Buenos_Aires       1
Asia/Yekaterinburg                   1
Name: tz, dtype: int64

Plotting with matplotlib


In [40]:
tz_clean = frame.tz.fillna('Missing')
tz_clean[tz_clean == ''] = 'Unknown'
tz_clean_counts = tz_clean.value_counts()

In [41]:
tz_clean_counts


Out[41]:
America/New_York                  1251
Unknown                            521
America/Chicago                    400
America/Los_Angeles                382
America/Denver                     191
Missing                            120
Europe/London                       74
Asia/Tokyo                          37
Pacific/Honolulu                    36
Europe/Madrid                       35
America/Sao_Paulo                   33
Europe/Berlin                       28
Europe/Rome                         27
America/Rainy_River                 25
Europe/Amsterdam                    22
America/Phoenix                     20
America/Indianapolis                20
Europe/Warsaw                       16
America/Mexico_City                 15
Europe/Paris                        14
Europe/Stockholm                    14
America/Vancouver                   12
Pacific/Auckland                    11
Europe/Prague                       10
Europe/Oslo                         10
Europe/Moscow                       10
America/Puerto_Rico                 10
Asia/Hong_Kong                      10
Europe/Helsinki                     10
Asia/Calcutta                        9
                                  ... 
Europe/Riga                          2
America/Lima                         1
Asia/Manila                          1
Africa/Lusaka                        1
Africa/Casablanca                    1
America/Argentina/Mendoza            1
America/Argentina/Cordoba            1
Asia/Pontianak                       1
America/Argentina/Buenos_Aires       1
America/St_Kitts                     1
Europe/Skopje                        1
Asia/Riyadh                          1
Australia/Queensland                 1
America/Mazatlan                     1
America/Tegucigalpa                  1
Africa/Johannesburg                  1
America/Monterrey                    1
Asia/Nicosia                         1
America/Costa_Rica                   1
Europe/Sofia                         1
Asia/Kuching                         1
Asia/Novosibirsk                     1
America/Montevideo                   1
America/Santo_Domingo                1
Europe/Volgograd                     1
America/Caracas                      1
Europe/Ljubljana                     1
Europe/Uzhgorod                      1
Asia/Yekaterinburg                   1
America/La_Paz                       1
Name: tz, dtype: int64

In [42]:
tz_clean_counts[:10].plot(kind='barh', rot=0, title='top time zones in the 1.usa.gov sample data')


Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x107686a20>

In [43]:
results = Series([x.split()[0] for x in frame.a.dropna()])

In [44]:
results[:10]


Out[44]:
0               Mozilla/5.0
1    GoogleMaps/RochesterNY
2               Mozilla/4.0
3               Mozilla/5.0
4               Mozilla/5.0
5               Mozilla/5.0
6               Mozilla/5.0
7               Mozilla/5.0
8                Opera/9.80
9               Mozilla/5.0
dtype: object

In [45]:
browser_counts = results.value_counts()

In [46]:
browser_counts


Out[46]:
Mozilla/5.0                                          2594
Mozilla/4.0                                           601
GoogleMaps/RochesterNY                                121
Opera/9.80                                             34
TEST_INTERNET_AGENT                                    24
GoogleProducer                                         21
Mozilla/6.0                                             5
BlackBerry8520/5.0.0.681                                4
Dalvik/1.4.0                                            3
BlackBerry8520/5.0.0.592                                3
Goldfire                                                2
Socialite/7766                                          2
Acoon                                                   2
BlackBerry9630/5.0.0.975                                2
Opera/9.00                                              1
Vancouver                                               1
Opera/9.30                                              1
Mozilla/0.6                                             1
Opera/9.64(Windows                                      1
Opera/9.50                                              1
BlackBerry9700/5.0.0.423                                1
sometrik.com                                            1
BlackBerry8520/5.0.0.1067                               1
NokiaC3-00/5.0                                          1
HTTP_Request2/2.0.0                                     1
BlackBerry9300/5.0.0.997                                1
BlackBerry9700/5.0.0.862                                1
BlackBerry9530/5.0.0.328                                1
LG-GW382f/V10d                                          1
MOT-MB525/Blur_Version.34.4.709.MB525.Latam.en.01       1
ICE                                                     1
Vodafone/1.0/LG-KU990i/V10c                             1
SAMSUNG-SGH-A887/A887UCIJ1                              1
LG-LG220C[TF268435458416597116000000013524223841]       1
BlackBerry8530/5.0.0.654                                1
Nokia6790s-1b/ATT.03.22                                 1
dtype: int64

In [48]:
browser_counts[:10].plot(kind='barh', rot=0, title='top browser type in 1.usa.gov sample data')


Out[48]:
<matplotlib.axes._subplots.AxesSubplot at 0x106e7c898>

In [49]:
browser_counts[:10].plot(kind='bar', rot=0, title='top browser type in 1.usa.gov sample data')


Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x10806dcc0>

Windows User Counts


In [50]:
cframe = frame[frame.a.notnull()]

In [52]:
frame.shape


Out[52]:
(3560, 18)

In [53]:
cframe.shape


Out[53]:
(3440, 18)

In [54]:
import numpy as np

In [55]:
operating_system = np.where(cframe.a.str.contains('Window'), 'Windows', 'Not Windows')

In [56]:
operating_system[:10]


Out[56]:
array(['Windows', 'Not Windows', 'Windows', 'Not Windows', 'Windows',
       'Windows', 'Windows', 'Windows', 'Not Windows', 'Windows'], 
      dtype='<U11')

In [57]:
by_tz_os = cframe.groupby(['tz', operating_system])

In [58]:
aggregate_counts = by_tz_os.size().unstack().fillna(0)

In [59]:
aggregate_counts[:10]


Out[59]:
Not Windows Windows
tz
245 276
Africa/Cairo 0 3
Africa/Casablanca 0 1
Africa/Ceuta 0 2
Africa/Johannesburg 0 1
Africa/Lusaka 0 1
America/Anchorage 4 1
America/Argentina/Buenos_Aires 1 0
America/Argentina/Cordoba 0 1
America/Argentina/Mendoza 0 1

In [60]:
indexer = aggregate_counts.sum(1).argsort()

In [61]:
indexer[:10]


Out[61]:
tz
                                  24
Africa/Cairo                      20
Africa/Casablanca                 21
Africa/Ceuta                      92
Africa/Johannesburg               87
Africa/Lusaka                     53
America/Anchorage                 54
America/Argentina/Buenos_Aires    57
America/Argentina/Cordoba         26
America/Argentina/Mendoza         55
dtype: int64

In [62]:
count_subset = aggregate_counts.take(indexer)[-10:]

In [63]:
count_subset


Out[63]:
Not Windows Windows
tz
America/Sao_Paulo 13 20
Europe/Madrid 16 19
Pacific/Honolulu 0 36
Asia/Tokyo 2 35
Europe/London 43 31
America/Denver 132 59
America/Los_Angeles 130 252
America/Chicago 115 285
245 276
America/New_York 339 912

In [67]:
count_subset.plot(kind='barh', stacked=True, title='Top time zones by Windows and non-Windows users')


Out[67]:
<matplotlib.axes._subplots.AxesSubplot at 0x10c0d3a20>

In [65]:
normalized_count_subset = count_subset.div(count_subset.sum(1), axis=0)

In [69]:
normalized_count_subset.plot(kind='barh', stacked=True, title='Percentage Windows and non-Windows users in top-occuring time zones')


Out[69]:
<matplotlib.axes._subplots.AxesSubplot at 0x10c3a0fd0>

In [ ]: