In [ ]:
import pandas as
In [ ]:
df = pd.read_csv('../3-data/phmrc_cleaned.csv
In [ ]:
import numpy as
In [ ]:
X = np.array(df.filter(regex='^(s[0-9]+|age|sex)').fillna(0))
y = np.array(df.gs_text34)
In [ ]:
import sklearn.naive_bayes
In [ ]:
clf = sklearn.naive_bayes.BernoulliNB()
In [ ]:
import sklearn.m_d_l_s_l_ct__n
In [ ]:
cv = sklearn.model_selection.
In [ ]:
for train, test in cv.split(X, y):
In [ ]:
# refactor this into a function
def measure_acc(rep):
acc_list = []
...refactored code here...
return acc_list
measure_acc(rep=0)
In [ ]:
# repeat it 10 times
acc_list = []
for rep in range(10):
In [ ]:
pd.Series(acc_list).describe(percentiles=[.025, .975])