In [1]:
import pandas as pd
In [2]:
def knn0(datacsv,testcsv):
global data,test,i,d,ac
data=pd.read_csv(datacsv)
test=pd.read_csv(testcsv)
test['pre']='0'
i = pd.DataFrame()
d = pd.DataFrame()
return
In [ ]:
In [ ]:
In [3]:
def knn1():
for a in range(len(test.index)):
global d
data[a] = ((data.rWC-test.rWC[a])**2+(data.rCh-test.rCh[a])**2)**0.5
#print(data)
d = data
return d
In [ ]:
In [4]:
def knn2(k):
global test,d,ac,acc
for a in range(len(test.index)):
i = data.sort_values([a], ascending=True).head(n=k)
test.loc[a,'pre'] = pd.value_counts(i['Type']).idxmax()
if test.loc[a,'pre'] == test.loc[a,'Type']:
ac +=1
acc = ac/len(test.index)
return
In [ ]:
In [5]:
def knn(datacsv,testcsv,k):
global test,ac
knn0(datacsv,testcsv)
knn1()
dic={}
for ii in range(1,k+1):
ac = 0
knn2(ii)
#print(test,ac)
key={ii:acc}
dic.update(key)
return test,dic
In [6]:
knn('atomsradii.csv','testing.csv',7)
Out[6]:
In [ ]: