In [1]:
# -*- coding: UTF-8 -*-
# Render our plots inline
%matplotlib inline
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import re
In [2]:
# Download and clean from OECD:
# Demographic Statistics_TL3: Regional typology (urban=1/intermediate=2/rural=3,4,5)
In [3]:
# Load csv file first
data = pd.read_csv("5ce591ed-8cc2-4176-8eb7-69d10501b672.csv", encoding="utf-8")
In [4]:
data[0:2]
Out[4]:
In [5]:
data.loc[0][0]
Out[5]:
In [6]:
countries = {}
regions = {}
In [7]:
for i in data.iterrows():
if pd.isnull(i[1][0]) == False:
# Get countries: names and codes
current_country = i[1][0]
current_country_split = current_country.split(" ",1)
countries[current_country_split[1]] = current_country_split[0]
current_country_name = current_country_split[1]
# Get regions
current_region = i[1][1]
current_region_split = current_region.split(": ",1)
# FI194
current_region_name = unicode(current_region_split[1])
current_region_code = re.sub(r' ', "", current_region_split[0])
regions[current_region_code] = {"name":current_region_name, "code":current_region_code, "country": countries[current_country_name]}
In [8]:
import json
with open('regions.json', 'w') as fp:
json.dump(regions, fp)
In [ ]: