In [1]:
import pandas as pd
import json
from collections import Counter
import matplotlib.pyplot as plt


pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)

In [2]:
data = open('./data/test.20140209-140126.json', 'r').readlines()

In [3]:
cnt = Counter()

for tw in data:
    dt = json.loads(tw)
    hs = dt['entities']['hashtags']
    for item in hs:
        hashtag = item['text'].lower()
        if hashtag == 'nowlistening':
            continue
        cnt[hashtag] += 1

In [4]:
common_hashtags = cnt.most_common(5)
labels = []

top_count = 0
for label, count in common_hashtags:
    top_count += count
    labels.append(label)

common_hashtags.append(('others', sum(list(cnt.values())) - top_count))
labels.append('others')

In [5]:
df = pd.DataFrame(common_hashtags, columns=['hashtag', 'count']).set_index('hashtag')
df.head()


Out[5]:
            count
hashtag          
music        1009
nowplaying    404
なうぷれ          125
再生中            93
浜さん只今再生中       93

[5 rows x 1 columns]

In [6]:
figure(1, figsize=(12, 12))

plt.pie(df, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)


Out[6]:
([<matplotlib.patches.Wedge at 0x10e36d510>,
  <matplotlib.patches.Wedge at 0x10e37af10>,
  <matplotlib.patches.Wedge at 0x10e386a10>,
  <matplotlib.patches.Wedge at 0x10e391550>,
  <matplotlib.patches.Wedge at 0x10e39c090>,
  <matplotlib.patches.Wedge at 0x10e3a2b90>],
 [<matplotlib.text.Text at 0x10e37a250>,
  <matplotlib.text.Text at 0x10e381d90>,
  <matplotlib.text.Text at 0x10e38c8d0>,
  <matplotlib.text.Text at 0x10e396410>,
  <matplotlib.text.Text at 0x10e39cf10>,
  <matplotlib.text.Text at 0x10e3a7a50>],
 [<matplotlib.text.Text at 0x10e37a910>,
  <matplotlib.text.Text at 0x10e386410>,
  <matplotlib.text.Text at 0x10e38cf10>,
  <matplotlib.text.Text at 0x10e396a50>,
  <matplotlib.text.Text at 0x10e3a2590>,
  <matplotlib.text.Text at 0x10e3ac0d0>])