In [1]:
import correlation_matrix as co
import numpy as np

In [2]:
mn_status = 0 # do not use status = -1 (2015 data for states that gave up pursuit)
mx_status = 2 # do not use status = 3 (acquire date results, not correlated to pursuit decision)
correl_min = 1e-6 # correlation values below this (or negative) will be reset to zero, so they have no effect

filename = 'historical_factors.csv'
#filename = 'postreactor_test.csv'
#filename = 'noreactor_test.csv'
data_file = open(filename, 'r')
full_matrix = np.loadtxt(data_file, skiprows=1,usecols=(2,3,4,5,6,7,8,9,10))
#full_matrix = np.loadtxt(data_file, skiprows=1,usecols=(2,3,4,5,6,7,8,9))
relevant_mask = ((full_matrix[:,0] >= mn_status) & (full_matrix[:,0] <= mx_status))
matrix = full_matrix[relevant_mask]

cor = co.Cor_matrix(matrix)
factor = np.array(cor[0,1:])[0]
f_list = ("Auth   ", "Mil_Iso", "Reactor", "En_Repr",  "Sci_Net", "Mil_Sp ", "Conflict","U_Res  ")
#f_list = ("Auth   ", "Mil_Iso", "En_Repr",  "Sci_Net", "Mil_Sp ", "Conflict","U_Res  ")

for i in range(len(factor)):
    print(f_list[i], "\t", factor[i])


('Auth   ', '\t', 0.37335663257377466)
('Mil_Iso', '\t', 0.23287230934325245)
('Reactor', '\t', 0.54707509317303271)
('En_Repr', '\t', 0.31665440078934504)
('Sci_Net', '\t', 0.15327607193445852)
('Mil_Sp ', '\t', 0.65381066790713205)
('Conflict', '\t', 0.82096253777748895)
('U_Res  ', '\t', -0.044341812950695371)

In [3]:
data_file = open(filename, 'r')
lines = data_file.readlines()
mn = 100
mx = 0
for line in lines[1:]:
    words = line.split('\t');
    name = words[0] + '_' + words[1]
    # skip countries acquisition scores (status=3) because they aren't correlated to anything relevant here
    # also skip -1's because the negative status may mess up the correlation and it's a marginally relevant dataset
    if ((float(words[2]) >= mn_status) and (float(words[2]) <= mx_status)):
        weight = words[3:]
        weight = [float(i) for i in weight]
        score = co.Compute_Score(weight,factor)
        if (score < mn):
            mn = score
        elif (score > mx):
            mx = score

In [4]:
data_file = open(filename, 'r')
lines = data_file.readlines()
print( 'Country \t Cal \t Bib \t Dif')
for line in lines[1:]:
    words = line.split('\t');
    name = words[0] + '_' + words[1]
    if ((float(words[2]) >= mn_status) and (float(words[2]) <= mx_status)):
        weight = words[3:]
        weight = [float(i) for i in weight]
        score = co.Compute_Score(weight,factor)
        cal_sc = round((score - mn)/(mx -mn)*30)
        print(name, "\t", cal_sc, '\t', words[2],'\t') #, cal_sc-(float(words[2])*10)


Country 	 Cal 	 Bib 	 Dif
('Belarus_2015', '\t', 13.0, '\t', '0.0', '\t')
('Germany_2015', '\t', 5.0, '\t', '0.0', '\t')
('Japan_2015', '\t', 7.0, '\t', '0.0', '\t')
('Armenia_2015', '\t', 16.0, '\t', '0.0', '\t')
('Belgium_2015', '\t', 0.0, '\t', '0.0', '\t')
('Bulgar_2015', '\t', 5.0, '\t', '0.0', '\t')
('Canada_2015', '\t', 0.0, '\t', '0.0', '\t')
('Czech-R_2015', '\t', 1.0, '\t', '0.0', '\t')
('Finland_2015', '\t', 5.0, '\t', '0.0', '\t')
('Hungary_2015', '\t', 2.0, '\t', '0.0', '\t')
('Kazak_2015', '\t', 8.0, '\t', '0.0', '\t')
('Lithua_2015', '\t', 8.0, '\t', '0.0', '\t')
('Mexico_2015', '\t', 4.0, '\t', '0.0', '\t')
('Nether_2015', '\t', 7.0, '\t', '0.0', '\t')
('Saudi-A_2015', '\t', 22.0, '\t', '0.0', '\t')
('Slovak_2015', '\t', 3.0, '\t', '0.0', '\t')
('Spain_2015', '\t', 2.0, '\t', '0.0', '\t')
('Ukraine_2015', '\t', 8.0, '\t', '0.0', '\t')
('UAE_2015', '\t', 21.0, '\t', '0.0', '\t')
('Sweden_2015', '\t', 3.0, '\t', '0.0', '\t')
('Switz_2015', '\t', 2.0, '\t', '0.0', '\t')
('Indones_2015', '\t', 6.0, '\t', '0.0', '\t')
('Algeria_2015', '\t', 15.0, '\t', '0.0', '\t')
('Romania_2015', '\t', 4.0, '\t', '0.0', '\t')
('Sweden_1946', '\t', 15.0, '\t', '1.0', '\t')
('Switz_1946', '\t', 13.0, '\t', '1.0', '\t')
('Indones_1965', '\t', 13.0, '\t', '1.0', '\t')
('Algeria_1983', '\t', 21.0, '\t', '1.0', '\t')
('Romania_1985', '\t', 13.0, '\t', '1.0', '\t')
('Austral_1961', '\t', 11.0, '\t', '2.0', '\t')
('Egypt_1965', '\t', 24.0, '\t', '2.0', '\t')
('Libya_1970', '\t', 25.0, '\t', '2.0', '\t')
('S-Korea_1970', '\t', 13.0, '\t', '2.0', '\t')
('Argent_1978', '\t', 19.0, '\t', '2.0', '\t')
('Brazil_1978', '\t', 12.0, '\t', '2.0', '\t')
('Iraq_1983', '\t', 28.0, '\t', '2.0', '\t')
('Iran_1985', '\t', 28.0, '\t', '2.0', '\t')
('Syria_2000', '\t', 23.0, '\t', '2.0', '\t')
('US_1942', '\t', 21.0, '\t', '2.0', '\t')
('USSR_1945', '\t', 30.0, '\t', '2.0', '\t')
('UK_1947', '\t', 19.0, '\t', '2.0', '\t')
('France_1954', '\t', 22.0, '\t', '2.0', '\t')
('China_1955', '\t', 20.0, '\t', '2.0', '\t')
('Israel_1960', '\t', 20.0, '\t', '2.0', '\t')
('India_1964', '\t', 21.0, '\t', '2.0', '\t')
('Pakist_1972', '\t', 17.0, '\t', '2.0', '\t')
('S-Afric_1974', '\t', 19.0, '\t', '2.0', '\t')
('N-Korea_1980', '\t', 26.0, '\t', '2.0', '\t')
# CORRECT FULL ASSESSMENT: # looking at 0,1,2 (includes countries that haven't pursued) Auth 0.373356632574 Mil_Iso 0.232872309343 Reactor -0.547075093173 En_Repr 0.316654400789 Sci_Net 0.153276071934 Mil_Sp 0.653810667907 Conflict 0.0612444298104 U_Res -0.0443418129507 # Excluding US 1942(pursuit) from the analysis Auth 0.400954127305 Mil_Iso 0.227239545169 Reactor -0.530719785766 En_Repr 0.338186538485 Sci_Net 0.123302270056 Mil_Sp 0.642358825048 Conflict 0.0490435140176 U_Res -0.065984525834 # Excluding reactor data (noreactor_test) Auth 0.373356632574 Mil_Iso 0.232872309343 En_Repr 0.316654400789 Sci_Net 0.153276071934 Mil_Sp 0.653810667907 Conflict 0.0612444298104 U_Res -0.0443418129507 # Looking only at states that pursued after 1965 (postreactor_test) Auth 0.548561422466 Mil_Iso 0.261492965407 Reactor -0.422998285844 En_Repr 0.410064930872 Sci_Net -0.0317517512246 Mil_Sp 0.569401466399 Conflict -0.225465503146 U_Res -0.0924126508345 # only looking at 1 and 2 (explore and pursue) # -- This doesn't make sense because PCA code must compare # between states that did and did not pursue # Auth -0.0146586038571 Mil_Iso -0.175188768929 Reactor 0.12387733004 En_Repr 0.0173421993905 Sci_Net 0.301624673409 Mil_Sp 0.611843446272 Conflict 0.410526315789 U_Res -0.0173421993905

In [5]:
print(factor)

weights = factor
weights[factor < correl_min] = 0
f_tot = weights.sum()
weights = weights/f_tot  # normalize weights to sum to one

for i in range(len(weights)):
    print(f_list[i], "\t", weights[i])


[ 0.37  0.23  0.55  0.32  0.15  0.65  0.82 -0.04]
('Auth   ', '\t', 0.12051507520365551)
('Mil_Iso', '\t', 0.075168408499627956)
('Reactor', '\t', 0.17658932571063155)
('En_Repr', '\t', 0.10221226997261315)
('Sci_Net', '\t', 0.049475690866298265)
('Mil_Sp ', '\t', 0.21104229826749007)
('Conflict', '\t', 0.26499693147968356)
('U_Res  ', '\t', 0.0)

In [6]:
from gen_fns import get_data
from hist_bench import calc_pursuit

#factor_weights = np.array([0.15, 0.1, 0.16, 0.09, 0.1,0.1, 0.15, 0.15])

countries, col_names, all_vals = get_data(filename, n_header=1, col_list=range(2,11))
status = all_vals[:,0]
raw_data = np.delete(all_vals, 0, 1)
all_pe_vals = calc_pursuit(raw_data, weights)

for i in range(len(countries)):
    print(countries[i], "\t", all_pe_vals[i])


('Belarus', '\t', 4.4081)
('Germany', '\t', 2.469)
('Japan', '\t', 3.0346)
('Armenia', '\t', 5.0012)
('Belgium', '\t', 0.9708)
('Bulgar', '\t', 2.3579)
('Canada', '\t', 1.1818)
('Czech-R', '\t', 1.2984)
('Finland', '\t', 2.5047)
('Hungary', '\t', 1.6171)
('Kazak', '\t', 3.0794)
('Lithua', '\t', 3.0643)
('Mexico', '\t', 2.0287)
('Nether', '\t', 2.7337)
('Saudi-A', '\t', 6.5442)
('Slovak', '\t', 1.8282)
('Spain', '\t', 1.5632)
('Ukraine', '\t', 3.1507)
('UAE', '\t', 6.3051)
('Sweden', '\t', 1.9749)
('Switz', '\t', 1.4989)
('Indones', '\t', 2.675)
('Algeria', '\t', 4.8847)
('Romania', '\t', 2.2097)
('Sweden', '\t', 4.796)
('Switz', '\t', 4.4279)
('Indones', '\t', 4.4407)
('Algeria', '\t', 6.6426)
('Romania', '\t', 4.4948)
('Austral', '\t', 3.8757)
('Egypt', '\t', 7.273)
('Libya', '\t', 7.4493)
('S-Korea', '\t', 4.3643)
('Argent', '\t', 6.0935)
('Brazil', '\t', 4.2875)
('Iraq', '\t', 8.1545)
('Iran', '\t', 8.323)
('Syria', '\t', 6.8914)
('US', '\t', 6.4872)
('USSR', '\t', 8.859)
('UK', '\t', 5.8464)
('France', '\t', 6.7933)
('China', '\t', 6.3277)
('Israel', '\t', 6.1962)
('India', '\t', 6.4368)
('Pakist', '\t', 5.2904)
('S-Afric', '\t', 5.8925)
('N-Korea', '\t', 7.7035)
('US', '\t', 6.538)
('USSR', '\t', 8.594)
('UK', '\t', 6.3385)
('France', '\t', 5.4125)
('China', '\t', 7.0116)
('Israel', '\t', 6.4233)
('India', '\t', 4.4657)
('Pakist', '\t', 6.2646)
('S-Afric', '\t', 5.2083)
('N-Korea', '\t', 7.1042)
Chris Historical Results Auth 0.15 0.208437699671 Mil_Iso 0.10 0.130008051932 Reactor 0.10 0.0 En_Repr 0.16 0.176781953588 Sci_Net 0.10 0.0855710306482 Mil_Sp 0.15 0.365009697831 Conflict 0.10 0.0341915663299 U_Res 0.09 0.0

In [18]:
data_file = open(filename, 'r')
lines = data_file.readlines()
my_countries = []
my_years = []
my_rawdata = []
my_color_tracker = []
for line in lines[1:]:
    words = line.split('\t');
    if ((float(words[2]) >= mn_status) and (float(words[2]) <= mx_status)):
        if (float(words[2]) == mn_status):
            my_color_tracker.append(0)
        else:
            my_color_tracker.append(1)
        my_countries.append(words[0])
        my_years.append(words[1])
        weight = words[3:]
        weight = [float(i) for i in weight]
        my_rawdata.append(weight)
my_rawdata = np.asarray( my_rawdata)

my_score = calc_pursuit(my_rawdata, weights)   

    
my_years_sorted =  [x for y, x in sorted(zip(my_score, my_years), reverse=True)]
my_countries_sorted =  [x for y, x in sorted(zip(my_score, my_countries), reverse=True)]
my_color_tracker =  [x for y, x in sorted(zip(my_score, my_color_tracker), reverse=True)]
my_score_sorted = sorted(my_score, reverse=True)
for i in range(len(my_countries_sorted)):
    score_str = str(round(my_score_sorted[i],1))
    if (my_color_tracker[i] == 1):
        score_str = "\color{red}{" + score_str + "}"
    print my_countries_sorted[i], "&", my_years_sorted[i], "&", score_str, "\\\\"


USSR & 1945 & \color{red}{8.9} \\
Iran & 1985 & \color{red}{8.3} \\
Iraq & 1983 & \color{red}{8.2} \\
N-Korea & 1980 & \color{red}{7.7} \\
Libya & 1970 & \color{red}{7.4} \\
Egypt & 1965 & \color{red}{7.3} \\
Syria & 2000 & \color{red}{6.9} \\
France & 1954 & \color{red}{6.8} \\
Algeria & 1983 & \color{red}{6.6} \\
Saudi-A & 2015 & 6.5 \\
US & 1942 & \color{red}{6.5} \\
India & 1964 & \color{red}{6.4} \\
China & 1955 & \color{red}{6.3} \\
UAE & 2015 & 6.3 \\
Israel & 1960 & \color{red}{6.2} \\
Argent & 1978 & \color{red}{6.1} \\
S-Afric & 1974 & \color{red}{5.9} \\
UK & 1947 & \color{red}{5.8} \\
Pakist & 1972 & \color{red}{5.3} \\
Armenia & 2015 & 5.0 \\
Algeria & 2015 & 4.9 \\
Sweden & 1946 & \color{red}{4.8} \\
Romania & 1985 & \color{red}{4.5} \\
Indones & 1965 & \color{red}{4.4} \\
Switz & 1946 & \color{red}{4.4} \\
Belarus & 2015 & 4.4 \\
S-Korea & 1970 & \color{red}{4.4} \\
Brazil & 1978 & \color{red}{4.3} \\
Austral & 1961 & \color{red}{3.9} \\
Ukraine & 2015 & 3.2 \\
Kazak & 2015 & 3.1 \\
Lithua & 2015 & 3.1 \\
Japan & 2015 & 3.0 \\
Nether & 2015 & 2.7 \\
Indones & 2015 & 2.7 \\
Finland & 2015 & 2.5 \\
Germany & 2015 & 2.5 \\
Bulgar & 2015 & 2.4 \\
Romania & 2015 & 2.2 \\
Mexico & 2015 & 2.0 \\
Sweden & 2015 & 2.0 \\
Slovak & 2015 & 1.8 \\
Hungary & 2015 & 1.6 \\
Spain & 2015 & 1.6 \\
Switz & 2015 & 1.5 \\
Czech-R & 2015 & 1.3 \\
Canada & 2015 & 1.2 \\
Belgium & 2015 & 1.0 \\

In [ ]:


In [ ]:


In [ ]: