In [1]:
from collections import Counter
import math, random
In [2]:
def split_data(data, prob):
"""split data into fractions [prob, 1 - prob]"""
results = [], []
for row in data:
results[0 if random.random() < prob else 1].append(row)
return results
def train_test_split(x, y, test_pct):
data = list(zip(x, y)) # pair corresponding values
train, test = split_data(data, 1 - test_pct) # split the dataset of pairs
x_train, y_train = list(zip(*train)) # magical un-zip trick
x_test, y_test = list(zip(*test))
return x_train, x_test, y_train, y_test
In [3]:
iris_data ={
"Sepal.Length": [5.1,4.9,4.7,4.6,5,5.4,4.6,5,4.4,4.9,5.4,4.8,
4.8,4.3,5.8,5.7,5.4,5.1,5.7,5.1,5.4,5.1,4.6,
5.1,4.8,5,5,5.2,5.2,4.7,4.8,5.4,5.2,5.5,4.9,
5,5.5,4.9,4.4,5.1,5,4.5,4.4,5,5.1,4.8,5.1,4.6,5.3,5],
"Sepal.Width": [3.5,3,3.2,3.1,3.6,3.9,3.4,3.4,2.9,3.1,3.7,3.4,3,3,4,
4.4,3.9,3.5,3.8,3.8,3.4,3.7,3.6,3.3,3.4,3,3.4,3.5,
3.4,3.2,3.1,3.4,4.1,4.2,3.1,3.2,3.5,3.6,3,3.4,
3.5,2.3,3.2,3.5,3.8,3,3.8,3.2,3.7,3.3]
}
training_data,test_data = split_data(iris_data["Sepal.Length"], 0.66)
In [4]:
len(iris_data["Sepal.Length"])
Out[4]:
In [5]:
len(test_data) # 학습후 평가대상이 되는 데이터 1/3
Out[5]:
In [6]:
len(training_data) # 학습을 위한 데이터 2/3
Out[6]:
In [7]:
x_train, x_test, y_train, y_yest = train_test_split(iris_data["Sepal.Length"], iris_data["Sepal.Width"], 0.33)
In [8]:
def accuracy(tp, fp, fn, tn):
correct = tp + tn
total = tp + fp + fn + tn
return correct / total
def precision(tp, fp, fn, tn):
return tp / (tp + fp)
def recall(tp, fp, fn, tn):
return tp / (tp + fn)
def f1_score(tp, fp, fn, tn):
p = precision(tp, fp, fn, tn)
r = recall(tp, fp, fn, tn)
return 2 * p * r / (p + r)
print("accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930, 981070))
print("precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930, 981070))
print("recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070))
print("f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930, 981070))