In [1]:
%matplotlib inline

from pandas import Series, DataFrame
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-a41dc1103790> in <module>()
      6 import matplotlib
      7 from matplotlib import pyplot as plt
----> 8 import seaborn as sns

ImportError: No module named seaborn

In [44]:
url = 'https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population'
#countries = pd.read_html(url,skiprows=0,index_col=0,header=0,match='China')
countries = pd.read_html(url,header=0,index_col=1,match='China')
print len(countries)  
countries=countries[0]
countries[0:5]


1
Out[44]:
Rank Population Date % of world population Source
Country (or dependent territory)
China[Note 2] 1 1372780000 October 23, 2015 18.9% Official population clock
India 2 1278980000 October 23, 2015 17.6% Official population clock
United States[Note 3] 3 322082000 October 23, 2015 4.43% Official population clock
Indonesia 4 257080000 October 11, 2015 3.53% Official projection
Brazil 5 205084000 October 23, 2015 2.82% Official population clock

In [45]:
countries.columns


Out[45]:
Index([u'Rank', u'Population', u'Date', u'% of world population', u'Source'], dtype='object')

In [46]:
type(countries)


Out[46]:
pandas.core.frame.DataFrame

In [54]:
df_toptten = DataFrame(countries[0:10],columns=['Population'])
df_toptten


Out[54]:
Population
Country (or dependent territory)
China[Note 2] 1372780000
India 1278980000
United States[Note 3] 322082000
Indonesia 257080000
Brazil 205084000
Pakistan 191165000
Nigeria 183541000
Bangladesh 159241000
Russia[Note 4] 146412580
Japan 126832000

In [55]:
df_toptten[0:10].plot(kind='bar')


Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0xda31198>

In [86]:
df = pd.DataFrame(countries[0:20],columns=[ '% of world population'],)
df['% of world population']=df['% of world population'].str.replace('%', '')
df['PercentNum'] = df['% of world population'].astype(float)
df['PercentNum'][0:10]


Out[86]:
Country (or dependent territory)
China[Note 2]            18.90
India                    17.60
United States[Note 3]     4.43
Indonesia                 3.53
Brazil                    2.82
Pakistan                  2.63
Nigeria                   2.50
Bangladesh                2.19
Russia[Note 4]            2.01
Japan                     1.74
Name: PercentNum, dtype: float64

In [85]:
plt.pie(df['PercentNum'][0:10])


Out[85]:
([<matplotlib.patches.Wedge at 0x22abf1d0>,
  <matplotlib.patches.Wedge at 0x22abfa90>,
  <matplotlib.patches.Wedge at 0x22b11320>,
  <matplotlib.patches.Wedge at 0x22b11b70>,
  <matplotlib.patches.Wedge at 0x22b1f400>,
  <matplotlib.patches.Wedge at 0x22b1fc50>,
  <matplotlib.patches.Wedge at 0x22b2c4e0>,
  <matplotlib.patches.Wedge at 0x22b2cd30>,
  <matplotlib.patches.Wedge at 0x22b3b5c0>,
  <matplotlib.patches.Wedge at 0x22b3be10>],
 [<matplotlib.text.Text at 0x22abf6d8>,
  <matplotlib.text.Text at 0x22abff60>,
  <matplotlib.text.Text at 0x22b117f0>,
  <matplotlib.text.Text at 0x22b11f98>,
  <matplotlib.text.Text at 0x22b1f8d0>,
  <matplotlib.text.Text at 0x22b2c160>,
  <matplotlib.text.Text at 0x22b2c9b0>,
  <matplotlib.text.Text at 0x22b3b240>,
  <matplotlib.text.Text at 0x22b3ba90>,
  <matplotlib.text.Text at 0x22c3f320>])

In [ ]: