In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
df = pd.read_json('Topwords_abstract_bypage_v4.json')

In [3]:
df.head()


Out[3]:
page pagegroup value word year
0 2 1 20.000000 bark 1845
1 2 1 15.833333 received 1845
2 2 1 15.000000 Steamer 1845
3 2 1 14.166667 Dutch 1845
4 2 1 14.166667 China 1845

In [4]:
page1 = df[df['pagegroup']==1]

In [27]:
page1.groupby('word')['year'].count().reset_index().sort(columns='year',ascending=False)[25:75]


Out[27]:
word year
336 instant 13
405 received 13
244 Tin 13
280 arrival 13
240 Tengku 12
207 Sarawak 12
11 Bali 12
325 general 12
34 Captain 12
235 Tan 11
267 York 11
350 lie 11
363 military 11
0 Abdul 11
335 ill 11
19 Black 11
205 Sago 11
407 recently 11
158 Malaysia 10
6 American 10
54 Company 10
53 Communist 10
31 Calcutta 10
89 Goh 10
357 mail 10
48 Coffee 10
111 India 10
449 visit 9
328 help 9
274 announce 9
275 annual 9
87 German 9
397 present 9
124 Japanese 9
412 set 9
402 quality 8
181 Pearl 8
361 men 8
79 Flour 8
81 Foreign 8
448 via 8
25 Britain 8
347 left 8
345 lbs 8
114 Indonesian 7
330 home 7
386 passengers 7
113 Indonesia 7
252 Urn 7
221 South 7

In [16]:
plt.plot(df[df['word']=='British']['year'],df[df['word']=='British']['value'], sns.xkcd_rgb["denim blue"],
        label = 'British')
plt.plot(df[df['word']=='Chinese']['year'],df[df['word']=='Chinese']['value'], sns.xkcd_rgb["pale red"],
        label = 'Chinese')
plt.xlabel('Year')
plt.ylabel('Count')
plt.title('The Fall of "British" and the Rise of "Chinese"')
plt.legend(loc='upper right')
sns.set_style('white')
sns.despine()
plt.savefig('BritishChinese.pdf')



In [32]:
plt.plot(df[df['word']=='steamer']['year'],df[df['word']=='steamer']['value'])
plt.plot(df[df['word']=='Wharf']['year'],df[df['word']=='Wharf']['value'], color='red')
plt.plot(df[df['word']=='tons']['year'],df[df['word']=='tons']['value'], color='green')
plt.plot(df[df['word']=='bark']['year'],df[df['word']=='bark']['value'], color='purple')
plt.plot(df[df['word']=='ship']['year'],df[df['word']=='ship']['value'], color='pink')


Out[32]:
[<matplotlib.lines.Line2D at 0x10ddddd10>]

In [26]:
plt.plot(df[df['word']=='minister']['year'],df[df['word']=='minister']['value'])


Out[26]:
[<matplotlib.lines.Line2D at 0x106617550>]

In [ ]:
plt.plot(df[df['word']=='Rubber']['year'],df[df['word']=='Rubber']['value'], sns.xkcd_rgb["greyish"],
        label = 'Rubber')
plt.plot(df[df['word']=='Pepper']['year'],df[df['word']=='Pepper']['value'], sns.xkcd_rgb["faded green"],
        label = 'Pepper')
plt.xlabel('Year')
plt.ylabel('Count')
plt.title("Singapore's Transition from Agriculture")
plt.legend(loc='upper right')
sns.set_style('white')
sns.despine()
plt.savefig('BritishChinese.pdf')