In [1]:
import json
import pandas as pd
import numpy as np

In [16]:
hts = pd.read_csv("data/top_hashtags.csv")
# Should have made a better query
hts.head()


Out[16]:
n deg
0 {"tagid":"h3","hashtag":"paris"} 719571
1 {"tagid":"h2","hashtag":"prayforparis"} 368931
2 {"tagid":"h5","hashtag":"parisattacks"} 206159
3 {"tagid":"h19","hashtag":"prayers4paris"} 52805
4 {"tagid":"h8","hashtag":"bataclan"} 51524

In [17]:
min_deg = hts["deg"].min()
min_deg


Out[17]:
1798

In [18]:
max_deg = hts["deg"].max()
max_deg


Out[18]:
719571

In [19]:
converted = [{"text": "#" + json.loads(row[1]["n"])["hashtag"],
              "size": np.interp(row[1]["deg"], [min_deg, max_deg], [5, 100])} for row in hts.iterrows()]

In [20]:
f = open('data/hashtag_sizes.js', 'w')
json.dump(converted, f)
f.close()

In [ ]: