In [1]:
import os
os.listdir('.')
Out[1]:
In [2]:
fname = 'alexa/cn.json'
with open(fname) as f:
import json
alexa_items = []
for line in f:
# print line, type(line)
alexa_item = json.loads(line)
# print alexa_item, type(alexa_item)
alexa_items.append(alexa_item)
In [3]:
len(alexa_items)
Out[3]:
In [4]:
print(alexa_items[0])
In [5]:
category_items = {}
for item in alexa_items:
c = item['category']
if c not in category_items:
category_items[c] = []
category_items[c].append(item)
print(len(category_items))
In [6]:
counts = {}
for category, items in category_items.items():
counts[category] = len(items)
import operator
sorted_counts = sorted(counts.items(), key=operator.itemgetter(1), reverse=True)
for category, counts in sorted_counts[:10]:
print category, counts
In [10]:
for k in sorted(category_items, key=lambda k: len(category_items[k]), reverse=True)[:10]:
print k, len(category_items[k])
In [ ]: