In [1]:
import pandas as pd
In [2]:
df = pd.read_json('Topwords_abstract_bycategory_v1.json')
In [3]:
df.head()
Out[3]:
category
page
pagegrpnew
stfreq
word
year
0
business
2
front
20.000000
bark
1845
1
personplace
2
front
19.166667
Singapore
1845
2
events
2
front
15.833333
received
1845
3
business
2
front
15.000000
Steamer
1845
4
personplace
2
front
14.166667
China
1845
In [5]:
data = dict()
data['name'] = 'news'
data['children'] = []
for key in df.groupby('category'):
print key
(u'business', category page pagegrpnew stfreq word year
0 business 2 front 20.000000 bark 1845
3 business 2 front 15.000000 Steamer 1845
7 business 2 front 11.666667 Ship 1845
16 business 3 front 13.084112 Intelligence 1845
17 business 3 front 12.149533 brig 1845
21 business 3 front 10.280374 bark 1845
22 business 3 front 9.345794 schooner 1845
28 business 3 front 7.476636 Ship 1845
38 business 2 front 21.148036 Steamer 1846
42 business 2 front 11.480363 bark 1846
45 business 2 front 10.574018 brig 1846
46 business 2 front 10.271903 Ship 1846
50 business 3 front 12.068966 bark 1846
51 business 3 front 10.632184 Intelligence 1846
54 business 3 front 8.908046 brig 1846
55 business 3 front 8.045977 Ship 1846
60 business 4 middle 46.478873 bark 1846
64 business 4 middle 28.732394 brig 1846
66 business 4 middle 25.633803 cwt 1846
73 business 2 front 28.662420 Passenger 1848
82 business 3 front 19.915254 Intelligence 1848
92 business 3 front 5.508475 Ditto 1848
96 business 4 middle 30.514706 cent 1848
98 business 4 middle 27.573529 brig 1848
99 business 4 middle 27.205882 bark 1848
100 business 4 middle 26.470588 Ditto 1848
102 business 4 middle 23.529412 cwt 1848
103 business 4 middle 21.323529 dollar 1848
110 business 3 front 15.454545 Passenger 1849
116 business 3 front 7.272727 accounts 1849
... ... ... ... ... ... ...
10156 business 30 back 4.671858 billion 2005
10159 business 30 back 4.115684 company 2005
10160 business 30 back 4.115684 Market 2005
10163 business 30 back 3.893215 dollar 2005
10199 business 15 middle 6.482282 cent 2006
10204 business 15 middle 6.136560 Stock 2006
10206 business 15 middle 4.840104 Index 2006
10209 business 20 middle 7.560137 cent 2006
10212 business 20 middle 4.123711 dollar 2006
10216 business 20 middle 3.780069 billion 2006
10224 business 25 back 4.742268 cent 2006
10226 business 25 back 4.432990 Top 2006
10233 business 30 back 7.364341 cent 2006
10235 business 30 back 6.201550 billion 2006
10317 business 22 back 3.697479 cent 2007
10332 business 1 front 6.620429 Price 2008
10334 business 1 front 5.674653 cent 2008
10380 business 22 middle 6.511628 financial 2008
10401 business 25 back 4.275862 Price 2008
10402 business 25 back 4.137931 month 2008
10411 business 27 back 4.750869 Price 2008
10415 business 27 back 3.939745 month 2008
10420 business 1 front 7.062392 cent 2009
10426 business 1 front 4.766031 billion 2009
10428 business 1 front 4.722704 month 2009
10441 business 5 front 5.365854 financial 2009
10457 business 10 middle 4.817518 chief 2009
10481 business 20 middle 4.452467 financial 2009
10492 business 25 back 4.639805 economic 2009
10508 business 30 back 4.236453 Top 2009
[1688 rows x 6 columns])
(u'events ', category page pagegrpnew stfreq word year
2 events 2 front 15.833333 received 1845
8 events 2 front 10.000000 arrival 1845
9 events 2 front 10.000000 arrived 1845
18 events 3 front 12.149533 received 1845
32 events 3 front 6.542056 time 1845
33 events 3 front 6.542056 understand 1845
35 events 3 front 5.607477 left 1845
36 events 3 front 5.607477 sent 1845
40 events 2 front 16.012085 arrived 1846
44 events 2 front 10.876133 received 1846
57 events 3 front 6.896552 received 1846
58 events 3 front 6.896552 time 1846
59 events 3 front 6.609195 arrived 1846
61 events 4 middle 45.070423 sight 1846
62 events 4 middle 43.380282 days 1846
70 events 2 front 36.942675 arrival 1848
72 events 2 front 34.713376 announce 1848
75 events 2 front 17.197452 received 1848
79 events 2 front 10.828025 arrived 1848
84 events 3 front 9.745763 arrived 1848
87 events 3 front 7.203390 place 1848
90 events 3 front 6.355932 left 1848
91 events 3 front 6.355932 received 1848
104 events 4 middle 21.323529 sight 1848
107 events 3 front 20.909091 arrival 1849
108 events 3 front 20.000000 announce 1849
112 events 3 front 12.727273 received 1849
118 events 4 front 22.580645 announce 1849
119 events 4 front 21.774194 arrival 1849
121 events 4 front 14.516129 received 1849
... ... ... ... ... ... ...
10417 events 27 back 3.823870 home 2008
10421 events 1 front 6.629116 million 2009
10425 events 1 front 5.025997 global 2009
10434 events 5 front 7.317073 police 2009
10435 events 5 front 6.829268 work 2009
10437 events 5 front 5.853659 family 2009
10438 events 5 front 5.853659 help 2009
10439 events 5 front 5.853659 students 2009
10443 events 5 front 4.878049 show 2009
10451 events 10 middle 5.401460 help 2009
10452 events 10 middle 5.109489 police 2009
10458 events 10 middle 4.817518 party 2009
10470 events 15 middle 5.882353 police 2009
10473 events 20 middle 8.062575 letter 2009
10477 events 20 middle 5.776173 report 2009
10482 events 20 middle 4.452467 many 2009
10483 events 20 middle 4.332130 help 2009
10484 events 20 middle 4.332130 time 2009
10485 events 20 middle 4.211793 public 2009
10487 events 25 back 6.837607 time 2009
10491 events 25 back 4.884005 help 2009
10493 events 25 back 4.517705 letter 2009
10495 events 25 back 4.029304 home 2009
10496 events 25 back 4.029304 workers 2009
10497 events 25 back 3.907204 world 2009
10500 events 30 back 6.403941 world 2009
10502 events 30 back 6.108374 win 2009
10503 events 30 back 6.009852 million 2009
10506 events 30 back 5.123153 time 2009
10507 events 30 back 4.630542 Cup 2009
[3216 rows x 6 columns])
(u'government', category page pagegrpnew stfreq word year
11 government 2 front 9.166667 British 1845
15 government 3 front 14.953271 British 1845
19 government 3 front 10.280374 Brit 1845
20 government 3 front 10.280374 European 1845
29 government 3 front 6.542056 Court 1845
48 government 3 front 15.229885 European 1846
49 government 3 front 14.080460 British 1846
63 government 4 middle 42.253521 British 1846
81 government 3 front 22.033898 European 1848
89 government 3 front 6.779661 Chinese 1848
94 government 3 front 5.508475 Government 1848
95 government 3 front 5.508475 Governor 1848
97 government 4 middle 28.676471 British 1848
105 government 4 middle 20.955882 Brit 1848
134 government 6 middle 15.748031 British 1849
150 government 6 middle 5.511811 European 1849
155 government 6 middle 4.724409 Brit 1849
174 government 6 middle 3.937008 meeting 1849
179 government 7 middle 24.637681 British 1849
180 government 7 middle 23.913043 Brit 1849
192 government 3 front 8.196721 Court 1850
213 government 3 front 4.098361 Governor 1850
245 government 5 middle 4.347826 Governor 1850
266 government 6 middle 4.854369 European 1850
273 government 7 middle 22.848665 Brit 1850
285 government 8 back 22.848665 Brit 1850
331 government 5 middle 5.263158 Court 1851
332 government 5 middle 5.263158 Government 1851
353 government 5 middle 3.759398 meeting 1851
357 government 6 middle 7.826087 British 1851
... ... ... ... ... ... ...
10364 government 19 middle 9.523810 Minister 2008
10369 government 19 middle 5.782313 Prime 2008
10370 government 19 middle 5.782313 Government 2008
10371 government 19 middle 5.555556 President 2008
10374 government 19 middle 5.328798 State 2008
10378 government 22 middle 6.821705 Minister 2008
10386 government 22 middle 4.496124 Chinese 2008
10388 government 22 middle 4.341085 President 2008
10394 government 25 back 5.793103 Minister 2008
10398 government 25 back 4.689655 Chinese 2008
10400 government 25 back 4.275862 President 2008
10407 government 27 back 7.647740 Minister 2008
10419 government 1 front 8.362218 Minister 2009
10431 government 5 front 10.243902 Minister 2009
10436 government 5 front 6.341463 Prime 2009
10447 government 10 middle 10.364964 Chinese 2009
10448 government 10 middle 9.927007 Minister 2009
10449 government 10 middle 7.007299 Government 2009
10450 government 10 middle 6.569343 President 2009
10456 government 10 middle 4.817518 Prime 2009
10461 government 15 middle 10.943912 Minister 2009
10462 government 15 middle 9.575923 President 2009
10463 government 15 middle 9.165527 Chinese 2009
10465 government 15 middle 8.344733 Government 2009
10469 government 15 middle 6.292750 Prime 2009
10471 government 15 middle 5.745554 State 2009
10474 government 20 middle 6.738869 Minister 2009
10480 government 20 middle 5.174489 President 2009
10490 government 25 back 5.128205 Minister 2009
10504 government 30 back 5.221675 World 2009
[2471 rows x 6 columns])
(u'personplace', category page pagegrpnew stfreq word year
1 personplace 2 front 19.166667 Singapore 1845
4 personplace 2 front 14.166667 China 1845
5 personplace 2 front 14.166667 Dutch 1845
6 personplace 2 front 13.333333 Manila 1845
10 personplace 2 front 9.166667 Bombay 1845
12 personplace 2 front 9.166667 Calcutta 1845
13 personplace 2 front 8.333333 Malacca 1845
14 personplace 3 front 15.887850 China 1845
23 personplace 3 front 8.411215 Sir 1845
24 personplace 3 front 7.476636 Dutch 1845
25 personplace 3 front 7.476636 Manila 1845
26 personplace 3 front 7.476636 Rhio 1845
27 personplace 3 front 7.476636 Singapore 1845
30 personplace 3 front 6.542056 General 1845
31 personplace 3 front 6.542056 Hongkong 1845
34 personplace 3 front 5.607477 Calcutta 1845
37 personplace 2 front 41.691843 Singapore 1846
39 personplace 2 front 16.918429 China 1846
41 personplace 2 front 13.897281 Calcutta 1846
43 personplace 2 front 11.178248 Captain 1846
47 personplace 3 front 17.528736 China 1846
52 personplace 3 front 9.770115 Singapore 1846
53 personplace 3 front 8.908046 Calcutta 1846
56 personplace 3 front 6.896552 Dutch 1846
65 personplace 4 middle 27.605634 Singapore 1846
67 personplace 4 middle 24.507042 Goan 1846
68 personplace 4 middle 24.225352 China 1846
69 personplace 4 middle 23.380282 Bombay 1846
71 personplace 2 front 36.305732 Singapore 1848
74 personplace 2 front 17.197452 China 1848
... ... ... ... ... ... ...
10430 personplace 5 front 10.243902 China 2009
10432 personplace 5 front 8.780488 Tan 2009
10433 personplace 5 front 8.292683 Lim 2009
10440 personplace 5 front 5.365854 Lee 2009
10442 personplace 5 front 4.878049 United 2009
10444 personplace 10 middle 11.678832 China 2009
10445 personplace 10 middle 10.802920 Malaysia 2009
10446 personplace 10 middle 10.802920 Singapore 2009
10453 personplace 10 middle 4.817518 Chinas 2009
10454 personplace 10 middle 4.817518 Hong 2009
10455 personplace 10 middle 4.817518 Indonesia 2009
10459 personplace 10 middle 4.525547 Lee 2009
10460 personplace 15 middle 14.227086 China 2009
10464 personplace 15 middle 8.344733 United 2009
10466 personplace 15 middle 6.976744 Obama 2009
10467 personplace 15 middle 6.566347 Singapore 2009
10468 personplace 15 middle 6.566347 States 2009
10472 personplace 20 middle 14.681107 Singapore 2009
10475 personplace 20 middle 6.738869 United 2009
10476 personplace 20 middle 5.776173 States 2009
10478 personplace 20 middle 5.294826 Tan 2009
10479 personplace 20 middle 5.174489 China 2009
10486 personplace 25 back 14.041514 Singapore 2009
10488 personplace 25 back 6.349206 Tan 2009
10489 personplace 25 back 5.494505 United 2009
10494 personplace 25 back 4.029304 China 2009
10498 personplace 30 back 13.103448 Singapore 2009
10499 personplace 30 back 7.192118 United 2009
10501 personplace 30 back 6.206897 China 2009
10505 personplace 30 back 5.123153 Tan 2009
[3134 rows x 6 columns])
In [ ]:
Content source: SingaporeNews/singaporenews.github.io
Similar notebooks: