In [1]:
using Optim
using StatsFuns
using DataFrames
# inverse beta distribution function を行列に対応するように拡張
import StatsFuns.betainvcdf
betainvcdf(alpha::Number, beta::Number, x::Array) = reshape([betainvcdf(alpha, beta, i) for i in reshape(x, 1, size(x, 1)*size(x, 2)) ], size(x, 1), size(x, 2))
# maxをArray{String, 1}に対応するように拡張
# しかしArray型で入っているのでAnyに対応させる
import Base.max
max(number::Real, comparison::Any) = [max(number, parse(Int, i)) for i in comparison]
# normpdfを配列に拡張
import StatsFuns.normpdf
normpdf(array::Array{Float64, 2}) = reshape([normpdf(i) for i in reshape(array, 1, size(array, 1)*size(array, 2))], size(array, 1), size(array, 2))
# parameterの初期値を作成
# inivalueは260こ
# learning_paramsは13こ
iii = 1 #kokokaeruiii;
ini = DataFrame(randn(260, 100))
learn = DataFrame(randn(13, 100))
writetable(*("inivalue_", "$iii", ".txt"), ini)
writetable(*("learning_params", "$iii", ".txt"), learn)
In [2]:
# define T as num of period
T = 14
bandwidth = 0.05
N_sim = 100
n_eval = 0
I = 4
N_cand = 4
# load iii.txt -ASCII;
# load jjj.txt -ASCII;
# dataのロード
data = readtable("data.csv", header = false)
data = convert(Array, data)
# initial valueのロード
iii = 1
jjj = 1
kkk = 1
inivalue = readtable(*("inivalue_", "$iii", ".txt"))
inivalue = inivalue[:, jjj]
learning_params = readtable(*("learning_params", "$iii", ".txt"))
learning_params = learning_params[:, jjj]
parameter = vcat(1000000000, 0, inivalue, learning_params)
writetable(*("parameter_", "$iii", "_", "$jjj", "_", "$kkk", ".txt"), DataFrame(X = parameter))
# ここから本題
D = data[:,45] .> 100 # Remove municipality with small populaion <100
# 行削除はどうするのか、そもそもいるのか
# data[D, :] = [];
data = data[D, :]
# Construct X : Regressor for individual characteristics
X = [1 1 1 1 1];
dFX = ones(size(data, 1))
Race = [1 0 0; 0 1 0; 0 0 1];
Educ = 0.1*[16;14;12;9];
Incm = log([20000;35000;72500;120000]);
# (white,black,otherasian+indian+other)
dFXRace = [data[:,121] data[:,124] data[:,122] + data[:,123] + data[:,125]];
dFXEduc = [data[:,126:128] (1- sum(data[:,126:128], 2))]; # (overba,underba,hs,other)
dFXIncm = [sum(data[:,129:132], 2) sum(data[:,133:136], 2) sum(data[:,137:140], 2) sum(data[:,141:144], 2)];
# dFXIncm=data(:,129:144); % (income 16 category:
# -10K,15K,20K,25K,30K,35K,40,45,50,60,75,100,125,150,200,200+)
# ここは精査する必要がありそう。
for r in 1:3
for e in 1:4
for i in 1:4
X = [X; reshape([Race[r,1:3] ; Educ[e] ; Incm[i]], 1, 5)];
dFX = [dFX dFXRace[:,r].*dFXEduc[:,e].*dFXIncm[:,i]];
end
end
end
del = trues(size(X, 1))
del[1] = false
X = X[del, :]
del2 = trues(size(dFX, 2))
del2[1] = false
dFX = dFX[:, del2]
N_dFX = size(dFX,2);
N_muni = size(dFX,1); # (number of municipalities)
# data3,4,5,6はまさかのstring型
# (votes: clark, dean, edwards, kerry)municipalityごとに、いない奴は0になるようにしてる
Votes = [max(0,data[:,3]) max(0,data[:,4]) max(0,data[:,5]) max(0,data[:,6])];
VTotal = data[:,117]; # (total number of votes)
RDemHat = data[:,148]; # (registred number of democrats predicted)
PopTot = data[:,45]; # (total population)
Open = data[:,118]; # open
MOpen = data[:,119]; # modified open
VOther = VTotal - sum(Votes,2); #Votes of Penna & Sharpton etc.
RegDem = RDemHat./PopTot; # (fraction of registered democrats in population)
# make Cand
# 何で18列目から21列めまでを開けてる?
Cand = zeros(35,27);
# Candidates whose name is on ballot (column 1;4)
# Column 5 corresponds to ichiban maeno parameter
# noichi (for T_ij), Col6 to ichiban saigono parameter.
# Same for Columns 7 and 8 (for E[qi|omega]).
# Column 9: Who's in the race.
# Column 10: 1 if Before (and including) super tuesday
# Column 11: ?
# Column 12: ?
# Column 13: Date t (1,2,...14). 6 correspond to
# super Tues.
# Column 14: Municipality Number Start
# Column 15: Municipality Number End;
# 下のE[qi|omega_piv]-E[qi|omega_piv]って0では?
# Column 16: Atamadashi for
# E[qi|omega_piv]-E[qi|omega_piv]
# Column 17: Owaridashi for
# E[qi|omega_piv]-E[qi|omega_piv]
# the remains is explained below
j = 0
N_T_ij = [0;1;3;6]; # The number of T_ij that we need when the number of candidates are 1,2,3,4.
for i in 1:size(data, 1)
if j == data[i,149] - 1
Cand[j+1,1:4] = squeeze(ones(1,4), 1)+min(data[i,3:6], squeeze(zeros(1,4), 1));
if j == 0
Cand[j+1,5] = 0;
Cand[j+1,6] = 0;
Cand[j+1,11] = 0;
else
Cand[j+1,5] = Cand[j,6] + (sum(Cand[j+1,1:4])>1);
Cand[j+1,6] = Cand[j+1,5] + N_T_ij[convert(Int64, sum(Cand[j+1,1:4])),1] - 1+(sum(Cand[j+1,1:4])==1);
end
# 1 if before or on super Tues.
Cand[j+1,10] = (data[i,120] - 92<=0);
Cand[j+1,7] = sum(sum(Cand[1:j+1,1:4]))-sum(Cand[j+1,1:4])+1;
Cand[j+1,8] = sum(sum(Cand[1:j+1,1:4]));
Cand[j+1,17] = sum(sum(Cand[1:j+1,1:4].*(Cand[1:j+1,10]*ones(1,4))));
Cand[j+1,16] = Cand[j+1,17]-sum(Cand[j+1,1:4]*Cand[j+1,10])+1;
Cand[j+1,16:17] = Cand[j+1,16:17]*Cand[j+1,10];
# Baai-wake: [Clark,Dean,Edawards]
# Type 0: [000] Type 1:[111] Type2:[101] Type3: [011] Type4: [010]
if Cand[j+1,1:3] == [0,0,0]
Cand[j+1,9] = 0;
elseif Cand[j+1,1:3] == [1, 1, 1]
Cand[j+1,9] = 1;
elseif Cand[j+1,1:3] == [1, 0, 1]
Cand[j+1,9] = 2;
elseif Cand[j+1,1:3] == [0, 1, 1]
Cand[j+1,9] = 3;
else
Cand[j+1,9] = 4;
end
#Atamadashi for params
if j > 0
a = sum((Cand[1:j,6] - Cand[1:j,5]+1).*Cand[1:j,10],1)+Cand[j+1,10]*(sum(Cand[j+1,1:4]) > 1)
Cand[j+1,11] = a[1];
b = sum((Cand[1:j+1,6] - Cand[1:j+1,5]+1).*Cand[1:j+1,10],1)
Cand[j+1,12] = b[1];
end
Cand[j+1,13] = data[i,120];
Cand[j+1,14] = i;
j = j+1;
end
Cand[:,11] = Cand[:,10].*Cand[:,11]; # Atamadashi ignoring post-super
Cand[:,12] = Cand[:,10].*Cand[:,12]; # Tues states
Cand[1:end-1,15] = Cand[2:end,14]-1;
Cand[end,15] = i;
end
iij = unique(Cand[:,13]);
for i in 1:size(unique(Cand[:,13]),1)
for j in 1:size(Cand[:,13],1)
if Cand[j,13] == iij[i]
Cand[j,13] = i;
end
end
end
w = convert(Int64, maximum(Cand[:,13]))
NNCan = ones(w, 1)
A_Exi = ones(w, 2)
PatternCandall = ones(w, 4)
NNCan[1,1] = 4;
A_Exi[1,:] = [1 4];
PatternCandall[1,:] = [1 1 1 1];
for S in 2:w
# of candidate on date S
Temp = Cand;
# 行削除はどうする
# Temp[(Temp[:,13] .!= S), :]=[];
Temp = Temp[Temp[:,13] .== S, :]
NNCan[S,1] = sum(sum(Temp[:,1:4],1) .> 0);
PatternCandall[S,1:4] = (sum(Temp[:,1:4]) > 0);
# Atamadashi for ExiOmega
A_Exi[S,:] = [A_Exi[S-1,2]+1 (A_Exi[S-1,2] + NNCan[S,1])];
end
for S = 1:size(Cand,1)
SS = convert(Int64, Cand[S,13]);
Cand[S,22:23] = A_Exi[SS,:];
Cand[S,24:27] = PatternCandall[SS,:];
end
# parameter description
# Xi|Omg (118x1) : For each state, we have at most 4 values (total 118)
# Xi|OmgPiv (56x1) : For each state, we have at most 3 values (total 56)
# Tij (105x1) : For each state, we have at most 6 values (total 105)
# Set to zero if after Super Tuesday.
# Cx (sizeXk x1) : Cost function parameter. Number of Xk
# Cz (3 x 1) : Cost function parameter related to other elections
# vk (sizeXk x4) : Preference paramether. Number of Xk x Number of Cand
# FAlph (2x1) : Distribution of alpha (beta dist), 2x1
# Sig_xsi (1x1) : variance of Xsi
# DeltaO (1x1) : Increase of eligible voters for open election
# DeltaMO (1X1) : Increase of eligible voters for modified opene elec.
# load inivalue.txt inivalue -ASCII
# load indicator.txt indicator -ASCII;
# load inival.txt inival -ASCII
DATA = data;
srand(10);
# add XiOmg
# load the simulated XiOmg data
# reshape the data to [T, N_cand, N_sim]
SimXsi = randn(N_muni, N_cand, N_sim);
SimAlp = rand(N_muni, N_sim);
Cand = convert(Array{Int64, 2}, Cand)
Cand = sortrows(Cand, by = x->(x[13]))
Out[2]:
candに少数がないので、まとめてInt64に変更
さらに、sortrowsの挙動を確保するために関数の外に出し、事前に処理しておく。
loglikの関数にCandを入れる
reshapeとかはfor文の方がいいかな
In [19]:
function new_loglike(param::Array{Float64,1}, DATA::Array{Real,2}, Cand::Array{Int64, 2})
# これなんだかわからないのでコメントアウト
#if i_bayes == 1
#param = param
#end
FAlph = Array(Float64, 2, 1)
# setting parameters
param[2] = -1
C0 = 0
# Raceの変更に伴い、Cxも4つの要素にする必要がある。
q = 4
Cx = param[1:q]
Cz = -abs(param[q+1:q+3])
FAlph[1,1] = abs(param[q+4])
FAlph[2,1] = abs(param[q+5])
param[q+4:q+5] = FAlph[1:2,1]
Sig_xsi = max(0.5, abs(param[q+6]))
param[q+6] = Sig_xsi
DeltaO = 0.6891
DeltaMO = 0.5366
# Raceをいじった関係でこのvkを4*4 = 16この要素にしなくちゃいけない
vk = param[12:27]
composite = param[75:149]
Tij = abs(param[150:260])
# new parameters
rho_eta = abs(param[261])
rho_chi = param[262:265]
mu_chi = param[266:269]
chi = param[270:273]
# making log likelihood
# simulated values
N_T_ij = [0;1;3;6]
# 上で拡張した関数を使用
Alpha = betainvcdf(FAlph[1,1],FAlph[2,1], SimAlp)
Xsi = Sig_xsi*SimXsi
# calculating utilities
# Base Utility for Sincere, [# of categories X # of candidates]
# VSinは48×4
VSin = X[:,2:end]*[vk[1:4] vk[5:8] vk[9:12] vk[13:16]]-C0-X[:, 2:end]*Cx*ones(1,4)
# Base Utility for Strategic, [# of categories X # of candidates]
# VStrも48×4
VStr = X[:, 2:end]*[vk[1:4] vk[5:8] vk[9:12] vk[13:16]]
#Eligible voters accounting for Open and Modified Open
DELTA = DeltaO*Open+DeltaMO*MOpen
RTOT = RDemHat.*(1+DELTA)-VOther
# store signals in advance
signals = randn(4,T) * sqrt(1/rho_eta)
for i in 1:4
signals[i, :] = signals[i, :] + chi[i, 1]
end
loglik_s = zeros(size(Cand,1),1)
for S in 1:size(Cand,1)
if Cand[S, 15] - Cand[S, 14] < 21 || S == 30 || S == 34 #Excluding Utah, Wisconsin, and small
else
N_candS = sum(Cand[S,1:4] .!= 0)
M = Cand[S, 15] - Cand[S, 14] + 1 # Number of municipalities in State S
if Cand[S, 11] == 0
T_s = zeros(N_T_ij[N_candS,1],1)
COMPOSITE = zeros(N_candS,1)
else
T_s = Tij[Cand[S, 11]:Cand[S, 12],1]
COMPOSITE = composite[Cand[S, 16]:Cand[S, 17],1]
end
# VSTR_s = []
Dropped_s = find(Cand[S,1:4] .== 0) # index of candidates withdrawn.
Rem = Cand[S,1:4] .!= 0
Candidate_s = find(Cand[S,1:4] .== 1) # index of cadidate
Senate_s = DATA[Cand[S, 14], 27]*Cz[1,1] # Cost from Senate elections
Governer_s = DATA[Cand[S, 14], 29]*Cz[3,1] # Cost from GOvernor elections
dFX_s = dFX[Cand[S, 14]:Cand[S, 15], :]
date = Cand[S, 13] # election date (1 ~ 14)
# というか列の削除をする必要があるのか
temp = Cand[S,1:4]
PatternCand_d = Cand[S, 24:27]
# temp[:, find(Cand[S,24:27] .== 0)] = []
# PatternCand_d[:, find(Cand[S,24:27] .== 0)] = []
temp = temp[Cand[S,24:27] .!= 0.0]
PatternCand_d = PatternCand_d[Cand[S,24:27] .!= 0.0]
Alpha_s = reshape(Alpha[Cand[S, 14]:Cand[S, 15], :, :], length(Cand[S, 14]:Cand[S, 15]), 100)
Xsi_s = Xsi[Cand[S, 14]:Cand[S, 15], :, :]
# そもそも削除する必要があるか
VSin_s = VSin
# VSin_s[:, Dropped_s] = []
VSin_s = VSin_s[:, Rem]
VStr_s = VStr
# println(size(VStr_s, 2))
# VStr_s[:, Dropped_s] = []
VStr_s = VStr_s[:, Rem]
# make XiOmg_s whose size is [1, num of candidates in the state]
# take necessary parameters
rho_chi_s = rho_chi[Candidate_s, 1]
mu_chi_s = mu_chi[Candidate_s, 1]
chi_s = chi[Candidate_s, 1]
# calculating XiOmg_s
# signals_s = signals[:, date]
# cumsum は1次元配列にしか使えないので注意ダメだったらsqueeze
cum_signals = cumsum(signals, 2)
upper = rho_chi_s + mu_chi_s + rho_eta * cum_signals[Candidate_s, date]
under = rho_chi_s + date * rho_eta
XiOmg_s = upper ./ under
VSin_s = VSin_s + ones(size(VSin_s,1), 1)*XiOmg_s' -Senate_s-Governer_s
# col9は0~4、4は?
if Cand[S, 9] == 1 && Cand[S, 10] == 1
VSTR_s = ones(48, 4)
Composite = ones(size(X,1),1)*COMPOSITE'
VSTR_s[:,1] = T_s[1,1]*(VStr_s[:,1]-VStr_s[:,2])+T_s[2,1]*(VStr_s[:,1]-VStr_s[:,3])
+T_s[3,1]*(VStr_s[:,1]-VStr_s[:,4])+Composite[:,1]
VSTR_s[:,2] = T_s[1,1]*(VStr_s[:,2]-VStr_s[:,1])+T_s[4,1]*(VStr_s[:,2]-VStr_s[:,3])
+T_s[5,1]*(VStr_s[:,2]-VStr_s[:,4])+Composite[:,2]
VSTR_s[:,3] = T_s[2,1]*(VStr_s[:,3]-VStr_s[:,1])+T_s[4,1]*(VStr_s[:,3]-VStr_s[:,2])
+T_s[6,1]*(VStr_s[:,3]-VStr_s[:,4])+Composite[:,3]
VSTR_s[:,4] = T_s[3,1]*(VStr_s[:,4]-VStr_s[:,1])+T_s[5,1]*(VStr_s[:,4]-VStr_s[:,2])
+T_s[6,1]*(VStr_s[:,4]-VStr_s[:,3])+Composite[:,4]
elseif (Cand[S, 9] == 2 || Cand[S, 9] == 3 || Cand[S, 9] == 4) && Cand[S, 10] == 1
VSTR_s = ones(48, 3)
Composite = ones(size(X,1),1)*COMPOSITE'
VSTR_s[:,1] = T_s[1,1]*(VStr_s[:,1]-VStr_s[:,2])+T_s[2,1]*(VStr_s[:,1]-VStr_s[:,3])
+Composite[:,1]
VSTR_s[:,2] = T_s[1,1]*(VStr_s[:,2]-VStr_s[:,1])+T_s[3,1]*(VStr_s[:,2]-VStr_s[:,3])
+Composite[:,2]
VSTR_s[:,3] = T_s[2,1]*(VStr_s[:,3]-VStr_s[:,1])+T_s[3,1]*(VStr_s[:,3]-VStr_s[:,2])
+Composite[:,3]
elseif Cand[S, 10] == 0 # after super tuesday
# VSTR_s=VSin_s+C0+X(:,2:end)*Cx(1:3)*ones(1,N_candS) ?
VSTR_s = VSin_s + 2*(Senate_s+Governer_s)
end
# Utiltiy of Strategic with no house elections
VSTR_s = VSTR_s - C0 - X[:,2:end]*Cx*ones(1,N_candS) - Senate_s - Governer_s
# eligible voters
RTot_s = RTOT[Cand[S, 14]:Cand[S, 15], :]
RTot_s = max(RTot_s, sum(Votes[Cand[S, 14]:Cand[S, 15], :], 2))
Votes_s = Votes[Cand[S, 14]:Cand[S, 15], :]./(RTot_s*ones(1,4)) #vote share data
# 行削除
# Votes_s[:, Dropped_s] = []
Votes_s = Votes_s[:, Rem]
loglik_m = zeros(M,1)
VSTR_ss = Array(Float64, N_sim,N_candS)
VSIN_ss = Array(Float64, N_sim,N_candS)
# ここ以降が遅い
# どっちも遅いが、beforeの方が平均的に遅い
if Cand[S, 10] == 1
A1 = Array(Float64, N_dFX, N_candS)
A2 = Array(Float64, N_dFX, N_candS)
B1 = Array(Float64, 1, N_candS)
B2 = Array(Float64, 1, N_candS)
cont1 = Array(Float64, N_dFX)
cont2 = Array(Float64, N_dFX)
#eVSIN_ss = Array(Float64, N_dFX, N_candS)
#eVSTR_ss = Array(Float64, N_dFX, N_candS)
for m in 1:M
VSin_s = VSin_s - Cz[2,1]*DATA[Cand[S, 14] + m - 1, 28]
VSTR_s = VSTR_s - Cz[2,1]*DATA[Cand[S, 14] + m - 1, 28]
# for文に書き換え
for sim in 1:N_sim
# nakami = max(min(VSin_s + ones(N_dFX,1)*reshape(Xsi_s[m,1:N_candS, sim], 1, N_candS), 200), -200)
# nakami2 = max(min(VSTR_s + ones(N_dFX,1)*reshape(Xsi_s[m,1:N_candS,sim], 1, N_candS), 200), -200)
for i in 1:N_candS
for j in 1:N_dFX
A1[j, i] = VSin_s[j, i] + Xsi_s[m, i, sim]
A2[j, i] = VSTR_s[j, i] + Xsi_s[m, i, sim]
end
end
nakami = max(min(A1, 200.0), -200.0)
nakami2 = max(min(A2, 200.0), -200.0)
eVSIN_ss = exp(nakami)./ (1+sum(exp(nakami),2)*ones(1,N_candS))
eVSTR_ss = exp(nakami2)./ (1+sum(exp(nakami2),2)*ones(1,N_candS))
#naka = sum(exp(nakami), 2)
#naka2 = sum(exp(nakami2), 2)
#for i in 1:N_candS
# for j in 1:N_dFX
# eVSIN_ss[j, i] = exp(nakami[j,i])/(1+ naka[j])
# eVSTR_ss[j, i] = exp(nakami2[j,i])/(1+ naka2[j])
#end
#end
# VSTR_ss[sim, :] = reshape(dFX_s[m, :],1, 48)*eVSTR_ss
# VSIN_ss[sim, :] = reshape(dFX_s[m, :], 1, 48)*eVSIN_ss
for i in 1:N_candS
for j in 1:N_dFX
cont1[j] = dFX_s[m, j]*eVSTR_ss[j, i]
cont2[j] = dFX_s[m, j]*eVSIN_ss[j, i]
end
VSTR_ss[sim, i] = sum(cont1)
VSIN_ss[sim, i] = sum(cont2)
end
end
Alp_ss = Alpha_s[m, :]
VSHARE = VSTR_ss.*(Alp_ss*ones(1,N_candS)) + VSIN_ss.*(1-Alp_ss*ones(1,N_candS))
# pdfはStatsFunsのnorrmpdfを使用
loglik_m[m,1] = log(sum(prod(normpdf((ones(N_sim,1)*reshape(Votes_s[m,:], 1, N_candS) - VSHARE)/bandwidth),2),1)/N_sim)[1]
end
loglik_s[S,1] = sum(loglik_m)
elseif Cand[S, 10] == 0
A1 = Array(Float64, N_dFX, N_candS)
B2 = Array(Float64, 1, N_candS)
cont2 = Array(Float64, N_dFX)
#eVSIN_ss = Array(Float64, N_dFX, N_candS)
for m in 1:M
VSin_s = VSin_s-Cz[2,1]*DATA[Cand[S, 14]+m-1, 28]
AST = 1./(1+exp(C0+X[:,2:end]*Cx*ones(1,N_candS) + Cz[2,1]*DATA[Cand[S, 14]+m-1, 28]+Senate_s+Governer_s))
# AST: Turnout of strategic voters after super tuesday
for sim in 1:N_sim
# nakami = max(min(VSin_s + ones(N_dFX,1)*reshape(Xsi_s[m,1:N_candS, sim], 1, N_candS), 200), -200)
for i in 1:N_candS
for j in 1:N_dFX
A1[j, i] = VSin_s[j, i] + Xsi_s[m, i, sim]
end
end
nakami = max(min(A1, 200.0), -200.0)
eVSIN_ss = exp(nakami)./ (1 + sum(exp(nakami),2)*ones(1,N_candS))
# naka = sum(exp(nakami), 2)
# for i in 1:N_candS
# for j in 1:N_dFX
# eVSIN_ss[j, i] = exp(nakami[j,i])/(1+ naka[j])
#end
#end
# VSIN_ss[sim, :] = reshape(dFX_s[m, :], 1, 48) * eVSIN_ss
for i in 1:N_candS
for j in 1:N_dFX
cont2[j] = dFX_s[m, j]*eVSIN_ss[j, i]
end
VSIN_ss[sim, i] = sum(cont2)
end
end
VSHARE = VSIN_ss
# pdfはStatsFunsのnorrmpdfを使用
loglik_m[m,1] = log(sum(prod(normpdf((ones(N_sim,1)*reshape(Votes_s[m,:], 1, N_candS) - VSHARE)/bandwidth),2),1)/N_sim)[1]
end
loglik_s[S,1] = sum(loglik_m)
end
end
# これなんだろ
if S == 10
S = S
end
end
loglik = sum(loglik_s)
return loglik = -loglik
end
Out[19]:
In [16]:
# 全部をfor文に
@time new_loglike(parameter, DATA, Cand)
Out[16]:
In [20]:
# 真ん中だけ行列演算
@time new_loglike(parameter, DATA, Cand)
Out[20]:
In [12]:
[a[i]+b[j, i] for j in 1:2 for i in 1:2]
Out[12]:
In [ ]: