In [1]:
import correlation_matrix as co
import numpy as np
In [2]:
mn_status = 0 # do not use status = -1 (2015 data for states that gave up pursuit)
mx_status = 2 # do not use status = 3 (acquire date results, not correlated to pursuit decision)
correl_min = 1e-6 # correlation values below this (or negative) will be reset to zero, so they have no effect
filename = 'historical_factors.csv'
#filename = 'postreactor_test.csv'
#filename = 'noreactor_test.csv'
data_file = open(filename, 'r')
full_matrix = np.loadtxt(data_file, skiprows=1,usecols=(2,3,4,5,6,7,8,9,10))
#full_matrix = np.loadtxt(data_file, skiprows=1,usecols=(2,3,4,5,6,7,8,9))
relevant_mask = ((full_matrix[:,0] >= mn_status) & (full_matrix[:,0] <= mx_status))
matrix = full_matrix[relevant_mask]
cor = co.Cor_matrix(matrix)
factor = np.array(cor[0,1:])[0]
f_list = ("Auth ", "Mil_Iso", "Reactor", "En_Repr", "Sci_Net", "Mil_Sp ", "Conflict","U_Res ")
#f_list = ("Auth ", "Mil_Iso", "En_Repr", "Sci_Net", "Mil_Sp ", "Conflict","U_Res ")
for i in range(len(factor)):
print(f_list[i], "\t", factor[i])
In [3]:
data_file = open(filename, 'r')
lines = data_file.readlines()
mn = 100
mx = 0
for line in lines[1:]:
words = line.split('\t');
name = words[0] + '_' + words[1]
# skip countries acquisition scores (status=3) because they aren't correlated to anything relevant here
# also skip -1's because the negative status may mess up the correlation and it's a marginally relevant dataset
if ((float(words[2]) >= mn_status) and (float(words[2]) <= mx_status)):
weight = words[3:]
weight = [float(i) for i in weight]
score = co.Compute_Score(weight,factor)
if (score < mn):
mn = score
elif (score > mx):
mx = score
In [4]:
data_file = open(filename, 'r')
lines = data_file.readlines()
print( 'Country \t Cal \t Bib \t Dif')
for line in lines[1:]:
words = line.split('\t');
name = words[0] + '_' + words[1]
if ((float(words[2]) >= mn_status) and (float(words[2]) <= mx_status)):
weight = words[3:]
weight = [float(i) for i in weight]
score = co.Compute_Score(weight,factor)
cal_sc = round((score - mn)/(mx -mn)*30)
print(name, "\t", cal_sc, '\t', words[2],'\t') #, cal_sc-(float(words[2])*10)
In [5]:
print(factor)
weights = factor
weights[factor < correl_min] = 0
f_tot = weights.sum()
weights = weights/f_tot # normalize weights to sum to one
for i in range(len(weights)):
print(f_list[i], "\t", weights[i])
In [6]:
from gen_fns import get_data
from hist_bench import calc_pursuit
#factor_weights = np.array([0.15, 0.1, 0.16, 0.09, 0.1,0.1, 0.15, 0.15])
countries, col_names, all_vals = get_data(filename, n_header=1, col_list=range(2,11))
status = all_vals[:,0]
raw_data = np.delete(all_vals, 0, 1)
all_pe_vals = calc_pursuit(raw_data, weights)
for i in range(len(countries)):
print(countries[i], "\t", all_pe_vals[i])
In [18]:
data_file = open(filename, 'r')
lines = data_file.readlines()
my_countries = []
my_years = []
my_rawdata = []
my_color_tracker = []
for line in lines[1:]:
words = line.split('\t');
if ((float(words[2]) >= mn_status) and (float(words[2]) <= mx_status)):
if (float(words[2]) == mn_status):
my_color_tracker.append(0)
else:
my_color_tracker.append(1)
my_countries.append(words[0])
my_years.append(words[1])
weight = words[3:]
weight = [float(i) for i in weight]
my_rawdata.append(weight)
my_rawdata = np.asarray( my_rawdata)
my_score = calc_pursuit(my_rawdata, weights)
my_years_sorted = [x for y, x in sorted(zip(my_score, my_years), reverse=True)]
my_countries_sorted = [x for y, x in sorted(zip(my_score, my_countries), reverse=True)]
my_color_tracker = [x for y, x in sorted(zip(my_score, my_color_tracker), reverse=True)]
my_score_sorted = sorted(my_score, reverse=True)
for i in range(len(my_countries_sorted)):
score_str = str(round(my_score_sorted[i],1))
if (my_color_tracker[i] == 1):
score_str = "\color{red}{" + score_str + "}"
print my_countries_sorted[i], "&", my_years_sorted[i], "&", score_str, "\\\\"
In [ ]:
In [ ]:
In [ ]: