In [193]:
# Render our plots inline
%matplotlib inline
%pylab inline  
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab

# General Plotting Parameters
mpl.rcParams['figure.figsize'] = (8,5)
mpl.rcParams['lines.linewidth'] = 2.5
mpl.rcParams['font.weight'] = 'bold'
mpl.rcParams['axes.linewidth'] = 1.5
mpl.rcParams['font.size'] = 14.
mpl.rcParams['legend.fontsize'] = 12.
mpl.rcParams['axes.labelsize'] = 12.
mpl.rcParams['xtick.labelsize'] = 10.
mpl.rcParams['ytick.labelsize'] = 10.
mpl.rcParams['xtick.minor.pad'] = 4
mpl.rcParams['xtick.direction'] = 'out'
mpl.rcParams['ytick.direction'] = 'out'

#Git says this is patched, but it doesn't work from Pip --upgrade 26-mar-2015
#mpl.rcParams['xtick.minor.visible'] = True  

# These are the "Tableau 20" colors as RGB.  
tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14),
             (255, 187, 120), (44, 160, 44), (152, 223, 138),
              (148, 103, 189),
             (197, 176, 213), (140, 86, 75), (196, 156, 148),  
             (227, 119, 194), (247, 182, 210), (127, 127, 127),
             (199, 199, 199), (188, 189, 34), (219, 219, 141),
             (23, 190, 207), (158, 218, 229),(214, 39, 40), (255, 152, 150)]  
    
# Scale the RGB values to the [0, 1] range,
# which is the format matplotlib accepts.  
for i in range(len(tableau20)): 
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)  

ps = 1


Populating the interactive namespace from numpy and matplotlib

In [194]:
import hist_bench
reload(hist_bench)
import gen_fns
reload(gen_fns)
from gen_fns import get_data
from gen_fns import ls_fit
from hist_bench import calc_pursuit
from hist_bench import get_pes
from hist_bench import calc_weights

bin_edges = [0,1,2,3,4,5,6,7,8,9,10]

In [195]:
path = "/Users/mbmcgarry/git/historical_prolif/"
file = "historical_factors.csv"

factor_weights = calc_weights(path+file)

countries, col_names, all_vals = get_data(path+file, n_header=1, col_list=range(2,11))
status = all_vals[:,0]
raw_data = np.delete(all_vals, 0, 1)

for i in range(len(factor_weights)):
    print col_names[i+3], "   ", factor_weights[i]


Auth     0.120515075204
Mil_Iso     0.0751684084996
Reactor     0.176589325711
En_Repr     0.102212269973
Sci_Net     0.0494756908663
Mil_Sp     0.211042298267
Conflict     0.26499693148
U_Res     0.0

In [196]:
all_pe_vals = calc_pursuit(raw_data, factor_weights)
pursue_states, pursue_pes = get_pes(countries, all_pe_vals, "Pursue")
prolif_states, prolif_pes = get_pes(countries, all_pe_vals, "Prolif")

pursue_hist, edges_pur = np.histogram(pursue_pes,bins=bin_edges)
prolif_hist, edges_pro = np.histogram(prolif_pes,bins=bin_edges)
all_hist, edges_a = np.histogram(all_pe_vals, bins=bin_edges)

In [197]:
frac_pursue=np.nan_to_num(pursue_hist.astype(float)/all_hist.astype(float))
#frac_pursue=np.nan_to_num(pursue_hist.astype(float)/190)
frac_prolif=np.nan_to_num(prolif_hist.astype(float)/all_hist.astype(float))
prolif_pursue=np.nan_to_num(prolif_hist.astype(float)/pursue_hist.astype(float))

denom_pursue = np.full(len(pursue_hist),pursue_hist.sum()) 
frac_pursue_weight = pursue_hist/denom_pursue

denom_succ = np.full(len(prolif_pursue),prolif_pursue.sum()) 
frac_succ_weight = prolif_pursue/denom_succ

In [ ]:


In [198]:
barwidth=0.8
index = np.delete(bin_edges,-1) + (1-barwidth)
xvals = np.arange(1,11)

int_pursue, m_pursue = ls_fit(xvals, frac_pursue, frac_pursue_weight)
line_vals = m_pursue*xvals + int_pursue

pow_yoff = 0.1
pow_yexp = 1.48
pow_norm = 0.044
pow_vals = pow_norm*pow(xvals, pow_yexp)-pow_yoff
bounds = []

fit_pursue = np.zeros(10)
for i in range(len(xvals)):
    fit_pursue[i] = frac_pursue[i]
    if (i == 9):
        fit_pursue[i] = 1
    if (pow_vals[i] >= 0): # and (pow_vals[i] <=1)):
        bounds.append(i)
resid_pow = np.sum(pow((pow_vals[bounds] - fit_pursue[bounds]),2))
resid_lin = np.sum(pow((line_vals[bounds] - fit_pursue[bounds]),2))
fit_pursue


('weights ', array([ 0.  ,  0.  ,  0.  ,  0.03,  0.1 ,  0.17,  0.38,  0.17,  0.14,  0.  ]))
Out[198]:
array([ 0.  ,  0.  ,  0.  ,  0.2 ,  0.33,  0.83,  0.79,  1.  ,  1.  ,  1.  ])

In [199]:
str_resid_lin = str(round(resid_lin,2))
str_resid_pow = str(round(resid_pow,2))

line_spec_str = "y = "+str(round(m_pursue,2))+ "x - " + str(round(abs(int_pursue),2)) +", rms="+str_resid_lin
pow_spec_str = "y = " + str(pow_norm) + "x^["+str(pow_yexp) + "] - "+str(pow_yoff)+", rms="+str_resid_pow
spec_str = line_spec_str + "\n" + pow_spec_str

In [200]:
plt.bar(index, frac_pursue, barwidth) #, label=spec_str)
plt.scatter(xvals[bounds], line_vals[bounds], color = 'red',label=line_spec_str, zorder=3)
plt.plot(xvals[bounds], line_vals[bounds], color = 'red', zorder=2, linestyle='--')
plt.scatter(xvals[bounds], pow_vals[bounds], color='black',label=pow_spec_str, zorder=3)
plt.plot(xvals[bounds], pow_vals[bounds], color='black', zorder=2, linestyle='--')
plt.title("Likelihood of Pursuit (1942-2017)")
plt.xlabel("Pursuit Score")
plt.ylabel("# Pursuits/# States")
plt.legend(loc='best')

if (ps == 1):
    savefig(path + 'pe_likely.png')

plt.show()


#plt.plot(xvals, pow(xvals/10,4), color='green')
print "Line Residuals: ", resid_lin, " PowerLaw Residuals: ", resid_pow


Line Residuals:  0.200222292543  PowerLaw Residuals:  0.200306193951

In [ ]:


In [201]:
time_frac = 1./75
for i in range(len(frac_pursue)):
    cur_val = pow_vals[i]
    if (cur_val >= 1) or (xvals[i] == 10):
        cur_val = 0.999999999
    elif (cur_val < 0):
        cur_val = 0
    likelihood = (1.0 - math.pow((1.0-cur_val), time_frac))
    print "score: ",xvals[i], " frac: ", frac_pursue[i], "fit_val", pow_vals[i],"likely: ", likelihood


score:  1  frac:  0.0 fit_val -0.056 likely:  0.0
score:  2  frac:  0.0 fit_val 0.0227374426388 likely:  0.000306618645558
score:  3  frac:  0.0 fit_val 0.123661963642 likely:  0.00175849704543
score:  4  frac:  0.2 fit_val 0.242374541489 likely:  0.00369404195763
score:  5  frac:  0.333333333333 fit_val 0.376352318005 likely:  0.00627582002216
score:  6  frac:  0.833333333333 fit_val 0.523902214387 likely:  0.00984629815815
score:  7  frac:  0.785714285714 fit_val 0.683786494266 likely:  0.01523394001
score:  8  frac:  1.0 fit_val 0.855049446523 likely:  0.0254227594026
score:  9  frac:  1.0 fit_val 1.03692440864 likely:  0.241422425257
score:  10  frac:  0.0 fit_val 1.22877875698 likely:  0.241422425257

In [202]:
barwidth=0.8
plt.bar(index, frac_prolif, barwidth)

plt.title("Abs. Likelihood of Proliferate")
plt.xlabel("Pursuit Score")
plt.ylabel("Prolif/All States")


Out[202]:
<matplotlib.text.Text at 0x1161c7b90>

In [203]:
barwidth=0.8

int_succ, m_succ = ls_fit(xvals, prolif_pursue, frac_succ_weight)

plt.bar(index+(barwidth/2), prolif_pursue, barwidth)
#plt.plot(xvals, m_succ*xvals + int_succ, color = 'red')

plt.title("Likelihood of Success given Pursuit")
plt.xlabel("Pursuit Score")
plt.ylabel("Prolif/Pursue")


('weights ', array([ 0.  ,  0.  ,  0.  ,  0.  ,  0.1 ,  0.31,  0.25,  0.18,  0.15,  0.  ]))
Out[203]:
<matplotlib.text.Text at 0x115952550>

In [204]:
# Time to acquire. States that never succeeded have negative years in the database, set them all to 30yrs
# (longest time to successfully acquire was 26yrs, all failed attempts were greater than 30yrs from 2015
# except Syria (15yrs) 

from hist_bench import time_to_acquire

# returns time to acquire for all states that have attempted pursuit
acq_times = time_to_acquire()

pursuit_scores = []
corr_acq_times = []


for idx, cur_state in enumerate(countries):
    if (cur_state in acq_times) and (status[idx] == 2):
        cur_time = acq_times[cur_state]
        if (cur_time >= 0):
            corr_acq_times.append(cur_time)
            pursuit_scores.append(all_pe_vals[idx])
            print "State ",cur_state, " Score ", all_pe_vals[idx], " Time ", cur_time
#        To look at the distribution of countries that pursued but never acquired
#        else:
#            corr_acq_times.append(-1*cur_time)
#            pursuit_scores.append(all_pe_vals[idx])
#            print "State ",cur_state, " Score ", all_pe_vals[idx], " Time ", cur_time


int_time, m_time = ls_fit(np.asarray(pursuit_scores), np.asarray(corr_acq_times))


State  US  Score  6.4872  Time  3
State  USSR  Score  8.859  Time  4
State  UK  Score  5.8464  Time  5
State  France  Score  6.7933  Time  6
State  China  Score  6.3277  Time  9
State  Israel  Score  6.1962  Time  9
State  India  Score  6.4368  Time  10
State  Pakist  Score  5.2904  Time  15
State  S-Afric  Score  5.8925  Time  5
State  N-Korea  Score  7.7035  Time  26
('weights ', array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]))

In [205]:
line_spec_str = " m="+str(round(m_time,3))+ "\n b=" + str(round(int_time,3))
median_str = "median: " + str(np.median(corr_acq_times))

plt.scatter(pursuit_scores, corr_acq_times, label=line_spec_str+'\n'+median_str)
plt.xlabel('pursuit score')
plt.ylabel('time to acquire')
#plt.plot(pursuit_scores, m_time*np.asarray(pursuit_scores) + int_time, color = 'red')
plt.legend(loc='best')


Out[205]:
<matplotlib.legend.Legend at 0x11620cb50>

In [206]:
# NOT WORKING

# Linear Fit
import scipy as sp
from gen_fns import fit_exp_linear

C0 = 20
# Linear Fit (Note that we have to provide the y-offset ("C") value!!
A, K = fit_exp_linear(np.asarray(pursuit_scores), np.asarray(corr_acq_times), C0)
fit_y= A * np.exp(K * np.asarray(pursuit_scores)) + C0

In [ ]: