In [5]:
import numpy as np
import pandas
import scipy, scipy.spatial
import sklearn
import sys
import os, re
from matplotlib import pyplot as plt
%matplotlib inline
In [17]:
flst = [f for f in os.listdir('../results/lower/') if re.match('res\.[0-9]+.*\.pred$', f)]
len(flst)
Out[17]:
In [36]:
dsel = pandas.read_table('../data/data_test.lower.txt', sep=',', header=None)[0]
df = pandas.read_table('/home/vahid/Downloads/data/ml/data_test.txt', sep=',', header=None)[0]
print(dsel.shape, df.shape)
In [46]:
idx = np.zeros(shape=dsel.shape[0], dtype=int)
js = 0
for i in range(dsel.shape[0]):
for j in range(js,df.shape[0]):
if dsel[i] == df[j]:
if i%500 ==0:
sys.stdout.write('%d %d\t'%(i,j))
idx[i] = j
js = j+1
break
In [50]:
np.savetxt('../data/index_lower.txt', idx+1, fmt='%d')
In [77]:
def combKNN(ic, thresh=20000):
mydict = {}
for f in flst:
df = pandas.read_table('../results/lower/'+f, sep=' ', header=None)
for i in range(df.shape[0]):
if np.any(ic == df.iloc[i,:]):
s = np.sum(ic == df.iloc[i,:])
if i in mydict:
mydict[i] += s
else:
mydict[i] = s
if np.sum(np.array(mydict.values())) > thresh:
break
else:
sys.stderr.write('%d\t'%len(mydict.keys()))
res = pandas.DataFrame(mydict.items())
res = res.iloc[(-res.iloc[:,1]).argsort(),:].head(n=100)
res.iloc[:,0] = idx[res.iloc[:,0]] + 1
np.savetxt('../results/res.c%d.dat'%ic, res, fmt='%d')
In [ ]:
for k in range(1,101):
sys.stderr.write('Class %d ==> '%k)
combKNN(k)
sys.stderr.write('\tFinished!!\n')
In [71]:
idx[res.iloc[:10,0]]
Out[71]:
In [ ]: