In [153]:
from fig_utils import *
import matplotlib.pyplot as plt
import time
%matplotlib inline
In these experiments, we compare the performance of the transfer learning model based on satellite imagery with the performance of a model that uses nightlights.
The parameters needed to produce the plots for Panels A and B are as follows:
For many trials, it will take several minutes or more to produce the plots. For 100 trials, it should take 40-60 minutes for LSMS and longer for DHS.
Each data directory should contain the following 4 files:
Each data directory should also contain one of the following:
Exact results may differ slightly with each run due to randomly splitting data into training and test sets.
In [148]:
country_path = '../data/output/LSMS/pooled/'
percentiles = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35,
0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70,
0.75, 0.80, 0.85, 0.90, 0.95, 1.00]
survey = 'lsms'
dimension = 10
k = 10
k_inner = 5
trials = 5
poverty_line = 1.90
multiples = [1, 2, 3]
In [149]:
t0 = time.time()
compare_models(country_path, survey, percentiles, dimension, k, k_inner,
trials, poverty_line, multiples)
t1 = time.time()
print 'Finished in {} seconds'.format(t1-t0)
In [151]:
country_path = '../data/output/DHS/pooled/'
percentiles = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35,
0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70,
0.75, 0.80, 0.85, 0.90, 0.95, 1.00]
survey = 'dhs'
dimension = 10
k = 5
k_inner = 3
trials = 3
In [152]:
t0 = time.time()
compare_models(country_path, survey, percentiles, dimension, k, k_inner,
trials, poverty_line, multiples)
t1 = time.time()
print 'Finished in {} seconds'.format(t1-t0)
In these experiments, we randomly reassign daytime imagery to survey locations and retrain the model on incorrect images (see SM 1.7).
The parameters needed to produce the plots for Panels C and D are as follows:
If trials is large (>100), producing the plots will take more than a couple of minutes.
Each data directory should contain the following 3 files:
Each data directory should also contain one of the following:
Exact results may differ slightly with each run due to randomly splitting data into training and test sets.
In [50]:
# Parameters
country_names = ['nigeria', 'tanzania', 'uganda', 'malawi', 'pooled']
country_paths = ['../data/output/LSMS/nigeria/',
'../data/output/LSMS/tanzania/',
'../data/output/LSMS/uganda/',
'../data/output/LSMS/malawi/',
'../data/output/LSMS/pooled/']
survey = 'lsms'
dimension = 100
k = 3
k_inner = 3
points = 10
alpha_low = 0
alpha_high = 3
trials = 100
In [51]:
run_randomization_test(country_names, country_paths, survey, dimension,
k, k_inner, points, alpha_low, alpha_high, trials)
In [52]:
# Parameters
country_names = ['nigeria', 'tanzania', 'uganda', 'malawi', 'rwanda', 'pooled']
country_paths = ['../data/output/DHS/nigeria/',
'../data/output/DHS/tanzania/',
'../data/output/DHS/uganda/',
'../data/output/DHS/malawi/',
'../data/output/DHS/rwanda/',
'../data/output/DHS/pooled/']
survey = 'dhs'
dimension = 100
k = 3
k_inner = 3
points = 10
alpha_low = 0
alpha_high = 3
trials = 100
In [53]:
run_randomization_test(country_names, country_paths, survey, dimension,
k, k_inner, points, alpha_low, alpha_high, trials)
In [ ]: