In [1]:
#Preparing the data

In [2]:
import pandas as pd

In [3]:
pop = pd.read_csv("data/population.csv")
continent = pd.read_csv("data/continent-mapping.csv")

In [4]:
pop.head(2)


Out[4]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 Unnamed: 60
0 Aruba ABW Population, total SP.POP.TOTL 54208.0 55435.0 56226.0 56697.0 57029.0 57360.0 ... 101218.0 101342.0 101416.0 101597.0 101936.0 102393.0 102921.0 103441.0 NaN NaN
1 Andorra AND Population, total SP.POP.TOTL 13414.0 14376.0 15376.0 16410.0 17470.0 18551.0 ... 84878.0 85616.0 85474.0 84419.0 82326.0 79316.0 75902.0 72786.0 NaN NaN

2 rows × 61 columns


In [5]:
#we only keep the interesting fields
pop = pop[["Country Name","2014"]]
pop.columns = ["country","population"]
pop.head(2)


Out[5]:
country population
0 Aruba 103441.0
1 Andorra 72786.0

In [6]:
continent.head(2)


Out[6]:
Country Name Country Code Region IncomeGroup SpecialNotes Unnamed: 5
0 Aruba ABW Latin America & Caribbean High income: nonOECD SNA data for 2000-2011 are updated from offici... NaN
1 Afghanistan AFG South Asia Low income Fiscal year end: March 20; reporting period fo... NaN

In [8]:
continent = continent[["Country Name","Region"]]
continent.columns=["country","region"]
continent.head(2)


Out[8]:
country region
0 Aruba Latin America & Caribbean
1 Afghanistan South Asia

In [9]:
pop = pop.merge(continent)
pop.head(2)


Out[9]:
country population region
0 Aruba 103441.0 Latin America & Caribbean
1 Andorra 72786.0 Europe & Central Asia

In [10]:
open("data/regionpop.csv","w").write(pop.to_csv())

In [11]:
#We convert to a tree
def extractCountries(df):
    return df.apply(lambda row: {"name":row["country"],'size':int(row["population"])},1).tolist()

lists = pop.groupby("region").apply(extractCountries).to_dict()
result = {}
result["name"]="World"
result["children"]=[]
for region in lists:
    temp = {}
    temp["name"] = region
    temp["children"] = lists[region]
    result["children"].append(temp)

In [12]:
import jupyterviz

In [14]:
jupyterviz.viz(type="circlepack",data=result)


Out[14]:

In [ ]: