In [65]:
import numpy as np
import random
from math import floor
from keras.models import Sequential
from keras.layers.core import Dense, Activation
import csv
import copy
from math import sqrt
In [66]:
with open ('train.csv') as source:
spam = csv.reader(source)
trainset = list(spam)
tags =lst2=copy.deepcopy(trainset[0])
del trainset[0]
In [81]:
def ortho_poly_fit(x, degree = 1):
n = degree + 1
x = np.asarray(x).flatten()
if(degree >= len(np.unique(x))):
stop("'degree' must be less than number of unique points")
xbar = np.mean(x)
x = x - xbar
X = np.fliplr(np.vander(x, n))
q,r = np.linalg.qr(X)
z = np.diag(np.diag(r))
raw = np.dot(q, z)
norm2 = np.sum(raw**2, axis=0)
alpha = (np.sum((raw**2)*np.reshape(x,(-1,1)), axis=0)/norm2 + xbar)[:degree]
Z = raw / np.sqrt(norm2)
return Z, norm2, alpha
In [80]:
[train['connections'],train['foreign_conns'],train['foreign_conns2'],train['ccf'],train['conns_new']
poly(conns_new,4)+poly(time,2)+
poly(ccf_old,2)+account_age+poly(agegroup,4)+poly(browserID,3)+poly(days,4)+poly(usage1,4)+
usage2+poly(usage3,4)+poly(usage4,4)+poly(usage5,4)+poly(usage5,4)
In [67]:
def getData(splitper):
random.shuffle(trainset)
splitper = int(floor(splitper * len(trainset)) + 1)
#get The Informations
traininglst = trainset[splitper:]
testinglst = trainset[:splitper]
train_X=[]
train_Y=[]
test_X=[]
test_Y=[]
for element in traininglst:
train_X.append(element[0:16])
train_Y.append(element[16])
for element in testinglst:
test_X.append(element[0:16])
test_Y.append(element[16])
return (np.array(train_X),np.array(train_Y),np.array(test_X),np.array(test_Y))
In [68]:
(train_X,train_Y,test_X,test_Y)= getData(0.2)
In [69]:
print 'trainx:{},trainy:{},testx:{},testy{}'.format(train_X.shape,train_Y.shape,test_X.shape,test_Y.shape)
batchsize=1
In [70]:
print 'Building model'
model = Sequential()
model.add(Dense(10, input_dim=16))
model.add(Activation('tanh'))
model.add(Dense(1, input_shape=(16,)))
model.compile(loss='mse', optimizer='rmsprop')
print 'start fitting'
model.fit(train_X, train_Y, nb_epoch=30, batch_size=batchsize,validation_split=0.05, show_accuracy=True)
score = model.evaluate(test_X, test_Y, batch_size=batchsize,show_accuracy=True)
In [50]:
print sqrt(score[0])
Out[50]:
In [52]:
with open ('test.csv') as source:
spam = csv.reader(source)
testset = list(spam)
testtags =lst2=copy.deepcopy(trainset[0])
del testset[0]
In [55]:
topredict_X=[]
for i in testset:
topredict_X.append( i[1:])
In [57]:
topredict_X= np.array(topredict_X)
In [61]:
prediction_Y= model.predict(topredict_X,batch_size=16,verbose=0)
In [64]:
with open('submit 004.csv','w') as output:
output.write('ID,target\n')
for i in range(len(prediction_Y)):
output.write('{},{}'.format(i+1,prediction_Y[i][0])+'\n')
In [ ]: