notebook.community

Edit and run



In [1]:

    
# -*- coding: UTF-8 -*-

# Render our plots inline
%matplotlib inline 

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import re



In [2]:

    
# Download and clean from OECD: 
# Demographic Statistics_TL3: Regional typology (urban=1/intermediate=2/rural=3,4,5)



In [3]:

    
# Load csv file first
data = pd.read_csv("5ce591ed-8cc2-4176-8eb7-69d10501b672.csv", encoding="utf-8")



In [4]:

    
data[0:2]









    Out[4]:






  
    
      
      country
      region
    
  
  
    
      0
      AUS: Australia
      AU105: Sydney, NSW
    
    
      1
      NaN
      AU110: Hunter, NSW



In [5]:

    
data.loc[0][0]









    Out[5]:





u'AUS: Australia'



In [6]:

    
countries = {}
regions = {}



In [7]:

    
for i in data.iterrows():
    if pd.isnull(i[1][0]) == False:
        # Get countries: names and codes
        current_country = i[1][0]
        current_country_split = current_country.split(" ",1)
        countries[current_country_split[1]] = current_country_split[0]
        current_country_name = current_country_split[1]
    # Get regions
    current_region = i[1][1]
    current_region_split = current_region.split(": ",1)
    # FI194
    current_region_name = unicode(current_region_split[1])
    current_region_code = re.sub(r' ', "", current_region_split[0])
    regions[current_region_code] = {"name":current_region_name, "code":current_region_code, "country": countries[current_country_name]}



In [8]:

    
import json
with open('regions.json', 'w') as fp:
    json.dump(regions, fp)



In [ ]: