notebook.community

Edit and run



In [1]:

    
#Preparing the data



In [2]:

    
import pandas as pd



In [3]:

    
pop = pd.read_csv("data/population.csv")
continent = pd.read_csv("data/continent-mapping.csv")



In [4]:

    
pop.head(2)









    Out[4]:






  
    
      
      Country Name
      Country Code
      Indicator Name
      Indicator Code
      1960
      1961
      1962
      1963
      1964
      1965
      ...
      2007
      2008
      2009
      2010
      2011
      2012
      2013
      2014
      2015
      Unnamed: 60
    
  
  
    
      0
      Aruba
      ABW
      Population, total
      SP.POP.TOTL
      54208.0
      55435.0
      56226.0
      56697.0
      57029.0
      57360.0
      ...
      101218.0
      101342.0
      101416.0
      101597.0
      101936.0
      102393.0
      102921.0
      103441.0
      NaN
      NaN
    
    
      1
      Andorra
      AND
      Population, total
      SP.POP.TOTL
      13414.0
      14376.0
      15376.0
      16410.0
      17470.0
      18551.0
      ...
      84878.0
      85616.0
      85474.0
      84419.0
      82326.0
      79316.0
      75902.0
      72786.0
      NaN
      NaN
    
  

2 rows × 61 columns



In [5]:

    
#we only keep the interesting fields
pop = pop[["Country Name","2014"]]
pop.columns = ["country","population"]
pop.head(2)









    Out[5]:






  
    
      
      country
      population
    
  
  
    
      0
      Aruba
      103441.0
    
    
      1
      Andorra
      72786.0



In [6]:

    
continent.head(2)









    Out[6]:






  
    
      
      Country Name
      Country Code
      Region
      IncomeGroup
      SpecialNotes
      Unnamed: 5
    
  
  
    
      0
      Aruba
      ABW
      Latin America & Caribbean
      High income: nonOECD
      SNA data for 2000-2011 are updated from offici...
      NaN
    
    
      1
      Afghanistan
      AFG
      South Asia
      Low income
      Fiscal year end: March 20; reporting period fo...
      NaN



In [8]:

    
continent = continent[["Country Name","Region"]]
continent.columns=["country","region"]
continent.head(2)









    Out[8]:






  
    
      
      country
      region
    
  
  
    
      0
      Aruba
      Latin America & Caribbean
    
    
      1
      Afghanistan
      South Asia



In [9]:

    
pop = pop.merge(continent)
pop.head(2)









    Out[9]:






  
    
      
      country
      population
      region
    
  
  
    
      0
      Aruba
      103441.0
      Latin America & Caribbean
    
    
      1
      Andorra
      72786.0
      Europe & Central Asia



In [10]:

    
open("data/regionpop.csv","w").write(pop.to_csv())



In [11]:

    
#We convert to a tree
def extractCountries(df):
    return df.apply(lambda row: {"name":row["country"],'size':int(row["population"])},1).tolist()

lists = pop.groupby("region").apply(extractCountries).to_dict()
result = {}
result["name"]="World"
result["children"]=[]
for region in lists:
    temp = {}
    temp["name"] = region
    temp["children"] = lists[region]
    result["children"].append(temp)



In [12]:

    
import jupyterviz



In [14]:

    
jupyterviz.viz(type="circlepack",data=result)









    Out[14]:



In [ ]:

	Country Name	Country Code	Indicator Name	Indicator Code	1960	1961	1962	1963	1964	1965	...	2007	2008	2009	2010	2011	2012	2013	2014	2015	Unnamed: 60
0	Aruba	ABW	Population, total	SP.POP.TOTL	54208.0	55435.0	56226.0	56697.0	57029.0	57360.0	...	101218.0	101342.0	101416.0	101597.0	101936.0	102393.0	102921.0	103441.0	NaN	NaN
1	Andorra	AND	Population, total	SP.POP.TOTL	13414.0	14376.0	15376.0	16410.0	17470.0	18551.0	...	84878.0	85616.0	85474.0	84419.0	82326.0	79316.0	75902.0	72786.0	NaN	NaN

	Country Name	Country Code	Region	IncomeGroup	SpecialNotes	Unnamed: 5
0	Aruba	ABW	Latin America & Caribbean	High income: nonOECD	SNA data for 2000-2011 are updated from offici...	NaN
1	Afghanistan	AFG	South Asia	Low income	Fiscal year end: March 20; reporting period fo...	NaN