notebook.community

Edit and run



In [1]:

    
from collections import Counter
import math, random



In [2]:

    
def split_data(data, prob):
    """split data into fractions [prob, 1 - prob]"""
    results = [], []
    for row in data:
        results[0 if random.random() < prob else 1].append(row)
    return results

def train_test_split(x, y, test_pct):
    data = list(zip(x, y))                        # pair corresponding values
    train, test = split_data(data, 1 - test_pct)  # split the dataset of pairs
    x_train, y_train = list(zip(*train))          # magical un-zip trick
    x_test, y_test = list(zip(*test))
    return x_train, x_test, y_train, y_test



In [3]:

    
iris_data ={
"Sepal.Length": [5.1,4.9,4.7,4.6,5,5.4,4.6,5,4.4,4.9,5.4,4.8,
                 4.8,4.3,5.8,5.7,5.4,5.1,5.7,5.1,5.4,5.1,4.6,
                 5.1,4.8,5,5,5.2,5.2,4.7,4.8,5.4,5.2,5.5,4.9,
                 5,5.5,4.9,4.4,5.1,5,4.5,4.4,5,5.1,4.8,5.1,4.6,5.3,5], 
"Sepal.Width": [3.5,3,3.2,3.1,3.6,3.9,3.4,3.4,2.9,3.1,3.7,3.4,3,3,4,
                4.4,3.9,3.5,3.8,3.8,3.4,3.7,3.6,3.3,3.4,3,3.4,3.5,
                3.4,3.2,3.1,3.4,4.1,4.2,3.1,3.2,3.5,3.6,3,3.4,
                3.5,2.3,3.2,3.5,3.8,3,3.8,3.2,3.7,3.3]
}

training_data,test_data =  split_data(iris_data["Sepal.Length"], 0.66)



In [4]:

    
len(iris_data["Sepal.Length"])









    Out[4]:





50



In [5]:

    
len(test_data) # 학습후 평가대상이 되는 데이터 1/3









    Out[5]:





17



In [6]:

    
len(training_data) # 학습을 위한 데이터 2/3









    Out[6]:





33



In [7]:

    
x_train, x_test, y_train, y_yest = train_test_split(iris_data["Sepal.Length"], iris_data["Sepal.Width"], 0.33)



In [8]:

    
def accuracy(tp, fp, fn, tn):
    correct = tp + tn
    total = tp + fp + fn + tn
    return correct / total

def precision(tp, fp, fn, tn):
    return tp / (tp + fp)

def recall(tp, fp, fn, tn):
    return tp / (tp + fn)

def f1_score(tp, fp, fn, tn):
    p = precision(tp, fp, fn, tn)
    r = recall(tp, fp, fn, tn)

    return 2 * p * r / (p + r)

print("accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930, 981070))
print("precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930, 981070))
print("recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070))
print("f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930, 981070))









    



accuracy(70, 4930, 13930, 981070) 0.98114
precision(70, 4930, 13930, 981070) 0.014
recall(70, 4930, 13930, 981070) 0.005
f1_score(70, 4930, 13930, 981070) 0.00736842105263158