notebook.community

Edit and run



In [3]:

    
import numpy as np



In [4]:

    
X_train=[]
Y_train=[]
for line in open('data/shuttle.trn'):
    line=[float(x) for x in line.strip().split()]
    X_train.append(line[:-1])
    Y_train.append(int(line[-1]))
    
X_train=np.array(X_train)
Y_train=np.array(Y_train)

X_test=[]
Y_test=[]
for line in open('data/shuttle.tst'):
    line=[float(x) for x in line.strip().split()]
    X_test.append(line[:-1])
    Y_test.append(int(line[-1]))

X_test=np.array(X_test)
Y_test=np.array(Y_test)



In [5]:

    
print("Number of samples",len(X_train))
print("Number of dimensions",X_train.shape[1])









    



Number of samples 43500
Number of dimensions 9



In [6]:

    
import matplotlib.pyplot as plt

plt.figure()
plt.boxplot(X_train[:,8])
plt.show()



In [7]:

    
plt.figure()
plt.hist(Y_train)
plt.show()



In [8]:

    
print(1.0*len(Y_train[Y_train==1])/len(Y_train))









    



0.7840919540229885



In [9]:

    
from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()
clf.fit(X_train,Y_train)
preds=clf.predict(X_test)



In [10]:

    
print(preds[:10])









    



[2 1 1 2 4 1 7 1 1 1]



In [11]:

    
from sklearn.metrics import classification_report

print(classification_report(Y_test,preds))









    



             precision    recall  f1-score   support

          1       0.95      0.88      0.92     11478
          2       0.01      0.92      0.02        13
          3       0.11      0.59      0.19        39
          4       0.89      0.54      0.67      2155
          5       0.99      0.82      0.90       809
          6       0.40      1.00      0.57         4
          7       0.00      1.00      0.01         2

avg / total       0.94      0.83      0.88     14500



In [12]:

    
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(penalty='l2',solver="newton-cg")
clf.fit(X_train,Y_train)
preds=clf.predict(X_test)



In [13]:

    
print(classification_report(Y_test,preds))









    



             precision    recall  f1-score   support

          1       0.94      0.99      0.96     11478
          2       0.00      0.00      0.00        13
          3       0.00      0.00      0.00        39
          4       0.90      0.65      0.75      2155
          5       1.00      1.00      1.00       809
          6       0.00      0.00      0.00         4
          7       0.00      0.00      0.00         2

avg / total       0.93      0.94      0.93     14500







    



/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)



In [ ]: