In [87]:
import matplotlib.pyplot as plt
import json
import pandas as pd
import numpy as np
%matplotlib inline

In [88]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
ts = ts.cumsum()
ts.plot()


Out[88]:
<matplotlib.axes.AxesSubplot at 0x7f9f35c47c90>

In [6]:
f = pd.read_csv('/home/ch/edx_note/edx_data.csv')

In [7]:
f.columns


Out[7]:
Index([u'course_id', u'userid_DI', u'registered', u'viewed', u'explored',
       u'certified', u'final_cc_cname_DI', u'LoE_DI', u'YoB', u'gender',
       u'grade', u'start_time_DI', u'last_event_DI', u'nevents', u'ndays_act',
       u'nplay_video', u'nchapters', u'nforum_posts', u'roles',
       u'incomplete_flag'],
      dtype='object')

In [8]:
nation = f['final_cc_cname_DI']

In [11]:
count_nation = nation.value_counts()
print count_nation

In [76]:
count_nation.plot(kind='barh',title='nation count',figsize=(8,6),legend=True,grid=True)
plt.savefig('/home/ch/pycharm_code/edx_analytics_ustc/images/nation_count.png',dpi=600,bbox_inches="tight")



In [90]:
dict_nation = count_nation.to_dict()
json.dump(dict_nation, open('count_nation.json', 'w'))