notebook.community

Edit and run



In [54]:

    
import pandas as pd
import numpy as np



In [55]:

    
data = pd.read_csv("student-alcohol-consumption/student-mat.csv")



In [60]:

    
data.head()









    Out[60]:







  
    
      
      school
      sex
      age
      address
      famsize
      Pstatus
      Medu
      Fedu
      Mjob
      Fjob
      ...
      internet
      romantic
      famrel
      freetime
      goout
      health
      absences
      G1
      G2
      G3
    
  
  
    
      0
      GP
      F
      18
      U
      GT3
      A
      4
      4
      at_home
      teacher
      ...
      no
      no
      4
      3
      4
      3
      6
      5
      6
      6
    
    
      1
      GP
      F
      17
      U
      GT3
      T
      1
      1
      at_home
      other
      ...
      yes
      no
      5
      3
      3
      3
      4
      5
      5
      6
    
    
      2
      GP
      F
      15
      U
      LE3
      T
      1
      1
      at_home
      other
      ...
      yes
      no
      4
      3
      2
      3
      10
      7
      8
      10
    
    
      3
      GP
      F
      15
      U
      GT3
      T
      4
      2
      health
      services
      ...
      yes
      yes
      3
      2
      2
      5
      2
      15
      14
      15
    
    
      4
      GP
      F
      16
      U
      GT3
      T
      3
      3
      other
      other
      ...
      no
      no
      4
      3
      2
      5
      4
      6
      10
      10
    
  

5 rows × 31 columns



In [57]:

    
y = np.array(data[["Dalc","Walc"]])



In [58]:

    
labels = 2*y[:,0] + y[:,1]



In [59]:

    
data.drop(["Dalc","Walc"], inplace=True, axis = 1)



In [62]:

    
data.drop(["school", "age", "reason","guardian", "schoolsup", "famsup", "nursery", "higher","internet", "romantic", "freetime","health", "absences"], inplace = True, axis = 1)



In [63]:

    
data









    Out[63]:







  
    
      
      sex
      address
      famsize
      Pstatus
      Medu
      Fedu
      Mjob
      Fjob
      traveltime
      studytime
      failures
      paid
      activities
      famrel
      goout
      G1
      G2
      G3
    
  
  
    
      0
      F
      U
      GT3
      A
      4
      4
      at_home
      teacher
      2
      2
      0
      no
      no
      4
      4
      5
      6
      6
    
    
      1
      F
      U
      GT3
      T
      1
      1
      at_home
      other
      1
      2
      0
      no
      no
      5
      3
      5
      5
      6
    
    
      2
      F
      U
      LE3
      T
      1
      1
      at_home
      other
      1
      2
      3
      yes
      no
      4
      2
      7
      8
      10
    
    
      3
      F
      U
      GT3
      T
      4
      2
      health
      services
      1
      3
      0
      yes
      yes
      3
      2
      15
      14
      15
    
    
      4
      F
      U
      GT3
      T
      3
      3
      other
      other
      1
      2
      0
      yes
      no
      4
      2
      6
      10
      10
    
    
      5
      M
      U
      LE3
      T
      4
      3
      services
      other
      1
      2
      0
      yes
      yes
      5
      2
      15
      15
      15
    
    
      6
      M
      U
      LE3
      T
      2
      2
      other
      other
      1
      2
      0
      no
      no
      4
      4
      12
      12
      11
    
    
      7
      F
      U
      GT3
      A
      4
      4
      other
      teacher
      2
      2
      0
      no
      no
      4
      4
      6
      5
      6
    
    
      8
      M
      U
      LE3
      A
      3
      2
      services
      other
      1
      2
      0
      yes
      no
      4
      2
      16
      18
      19
    
    
      9
      M
      U
      GT3
      T
      3
      4
      other
      other
      1
      2
      0
      yes
      yes
      5
      1
      14
      15
      15
    
    
      10
      F
      U
      GT3
      T
      4
      4
      teacher
      health
      1
      2
      0
      yes
      no
      3
      3
      10
      8
      9
    
    
      11
      F
      U
      GT3
      T
      2
      1
      services
      other
      3
      3
      0
      no
      yes
      5
      2
      10
      12
      12
    
    
      12
      M
      U
      LE3
      T
      4
      4
      health
      services
      1
      1
      0
      yes
      yes
      4
      3
      14
      14
      14
    
    
      13
      M
      U
      GT3
      T
      4
      3
      teacher
      other
      2
      2
      0
      yes
      no
      5
      3
      10
      10
      11
    
    
      14
      M
      U
      GT3
      A
      2
      2
      other
      other
      1
      3
      0
      no
      no
      4
      2
      14
      16
      16
    
    
      15
      F
      U
      GT3
      T
      4
      4
      health
      other
      1
      1
      0
      no
      no
      4
      4
      14
      14
      14
    
    
      16
      F
      U
      GT3
      T
      4
      4
      services
      services
      1
      3
      0
      yes
      yes
      3
      3
      13
      14
      14
    
    
      17
      F
      U
      GT3
      T
      3
      3
      other
      other
      3
      2
      0
      no
      yes
      5
      2
      8
      10
      10
    
    
      18
      M
      U
      GT3
      T
      3
      2
      services
      services
      1
      1
      3
      no
      yes
      5
      5
      6
      5
      5
    
    
      19
      M
      U
      LE3
      T
      4
      3
      health
      other
      1
      1
      0
      yes
      yes
      3
      3
      8
      10
      10
    
    
      20
      M
      U
      GT3
      T
      4
      3
      teacher
      other
      1
      2
      0
      no
      no
      4
      1
      13
      14
      15
    
    
      21
      M
      U
      GT3
      T
      4
      4
      health
      health
      1
      1
      0
      yes
      no
      5
      2
      12
      15
      15
    
    
      22
      M
      U
      LE3
      T
      4
      2
      teacher
      other
      1
      2
      0
      no
      yes
      4
      1
      15
      15
      16
    
    
      23
      M
      U
      LE3
      T
      2
      2
      other
      other
      2
      2
      0
      no
      yes
      5
      4
      13
      13
      12
    
    
      24
      F
      R
      GT3
      T
      2
      4
      services
      health
      1
      3
      0
      yes
      yes
      4
      2
      10
      9
      8
    
    
      25
      F
      U
      GT3
      T
      2
      2
      services
      services
      1
      1
      2
      yes
      no
      1
      2
      6
      9
      8
    
    
      26
      M
      U
      GT3
      T
      2
      2
      other
      other
      1
      1
      0
      yes
      no
      4
      2
      12
      12
      11
    
    
      27
      M
      U
      GT3
      T
      4
      2
      health
      services
      1
      1
      0
      yes
      no
      2
      4
      15
      16
      15
    
    
      28
      M
      U
      LE3
      A
      3
      4
      services
      other
      1
      2
      0
      no
      yes
      5
      3
      11
      11
      11
    
    
      29
      M
      U
      GT3
      T
      4
      4
      teacher
      teacher
      1
      2
      0
      yes
      yes
      4
      5
      10
      12
      11
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      365
      M
      R
      GT3
      T
      1
      3
      at_home
      other
      2
      2
      0
      yes
      no
      3
      4
      10
      10
      10
    
    
      366
      M
      U
      LE3
      T
      4
      4
      teacher
      services
      2
      3
      0
      yes
      no
      4
      2
      13
      13
      13
    
    
      367
      F
      R
      GT3
      T
      1
      1
      other
      services
      3
      1
      1
      yes
      no
      5
      1
      7
      6
      0
    
    
      368
      F
      U
      GT3
      T
      2
      3
      at_home
      services
      2
      1
      0
      yes
      no
      5
      3
      11
      10
      10
    
    
      369
      F
      R
      GT3
      T
      4
      4
      other
      teacher
      3
      2
      0
      yes
      no
      3
      2
      14
      12
      11
    
    
      370
      F
      U
      LE3
      T
      3
      2
      services
      services
      2
      2
      2
      no
      yes
      3
      2
      7
      7
      9
    
    
      371
      M
      R
      LE3
      T
      1
      2
      at_home
      services
      3
      1
      0
      yes
      yes
      4
      3
      14
      12
      12
    
    
      372
      F
      U
      GT3
      T
      2
      2
      other
      at_home
      1
      3
      0
      no
      yes
      3
      3
      13
      11
      11
    
    
      373
      F
      R
      GT3
      T
      1
      2
      other
      other
      1
      1
      0
      no
      yes
      3
      5
      6
      5
      5
    
    
      374
      F
      R
      LE3
      T
      4
      4
      other
      other
      2
      3
      0
      no
      no
      5
      4
      19
      18
      19
    
    
      375
      F
      R
      GT3
      T
      1
      1
      other
      other
      4
      3
      0
      no
      no
      4
      2
      8
      8
      10
    
    
      376
      F
      U
      GT3
      T
      4
      2
      health
      other
      2
      3
      2
      yes
      no
      5
      3
      15
      14
      15
    
    
      377
      F
      R
      LE3
      T
      4
      4
      teacher
      services
      1
      2
      0
      yes
      yes
      5
      3
      8
      9
      10
    
    
      378
      F
      U
      GT3
      T
      3
      3
      other
      other
      1
      2
      0
      yes
      no
      4
      3
      15
      15
      15
    
    
      379
      F
      R
      GT3
      T
      3
      1
      at_home
      other
      1
      2
      0
      yes
      yes
      4
      4
      10
      10
      10
    
    
      380
      M
      U
      GT3
      T
      4
      4
      teacher
      teacher
      1
      2
      0
      yes
      yes
      3
      4
      15
      14
      14
    
    
      381
      M
      R
      GT3
      T
      2
      1
      other
      other
      2
      1
      0
      no
      yes
      4
      3
      7
      6
      7
    
    
      382
      M
      U
      GT3
      T
      2
      3
      other
      services
      2
      2
      0
      no
      yes
      4
      3
      11
      11
      10
    
    
      383
      M
      R
      GT3
      T
      1
      1
      other
      services
      2
      1
      1
      no
      no
      4
      2
      6
      5
      0
    
    
      384
      M
      R
      GT3
      T
      4
      2
      other
      other
      2
      1
      1
      yes
      no
      5
      3
      6
      5
      5
    
    
      385
      F
      R
      GT3
      T
      2
      2
      at_home
      other
      2
      3
      0
      yes
      no
      5
      3
      10
      9
      10
    
    
      386
      F
      R
      GT3
      T
      4
      4
      teacher
      at_home
      3
      1
      0
      yes
      yes
      4
      3
      6
      5
      6
    
    
      387
      F
      R
      GT3
      T
      2
      3
      services
      other
      1
      3
      1
      no
      yes
      5
      2
      7
      5
      0
    
    
      388
      F
      U
      LE3
      T
      3
      1
      teacher
      services
      1
      2
      0
      yes
      no
      4
      4
      7
      9
      8
    
    
      389
      F
      U
      GT3
      T
      1
      1
      other
      other
      2
      2
      1
      no
      yes
      1
      1
      6
      5
      0
    
    
      390
      M
      U
      LE3
      A
      2
      2
      services
      services
      1
      2
      2
      yes
      no
      5
      4
      9
      9
      9
    
    
      391
      M
      U
      LE3
      T
      3
      1
      services
      services
      2
      1
      0
      no
      no
      2
      5
      14
      16
      16
    
    
      392
      M
      R
      GT3
      T
      1
      1
      other
      other
      1
      1
      3
      no
      no
      5
      3
      10
      8
      7
    
    
      393
      M
      R
      LE3
      T
      3
      2
      services
      other
      3
      1
      0
      no
      no
      4
      1
      11
      12
      10
    
    
      394
      M
      U
      LE3
      T
      1
      1
      other
      at_home
      1
      1
      0
      no
      no
      3
      3
      8
      9
      9
    
  

395 rows × 18 columns



In [64]:

    
grades = np.array(data[["G1","G2","G3"]])



In [65]:

    
per = grades[:,0] + grades[:,1] + grades[:, 2]



In [67]:

    
per = per*5/3



In [118]:

    
per.shape









    Out[118]:





(395,)



In [69]:

    
data.drop(["G1", "G2", "G3"], inplace = True, axis = 1)



In [79]:

    
data.head(10)



In [77]:

    
data['address'].value_counts()









    Out[77]:





U    307
R     88
Name: address, dtype: int64



In [ ]:



In [78]:

    
di = { 'U' : 0, 'R' : 1}
data.replace({'address':di},inplace=True)



In [ ]:



In [80]:

    
data['famsize'].value_counts()









    Out[80]:





GT3    281
LE3    114
Name: famsize, dtype: int64



In [81]:

    
di = {'LE3' : 0,'GT3' : 1}
data.replace({'famsize':di},inplace = True)



In [82]:

    
data.head(6)



In [83]:

    
di = { 'A' : 0, 'T' : 1}
data.replace({'Pstatus':di},inplace=True)



In [84]:

    
data.head(6)



In [87]:

    
data['Mjob'].value_counts()









    Out[87]:





other       141
services    103
at_home      59
teacher      58
health       34
Name: Mjob, dtype: int64



In [93]:

    
di = { 'teacher' : 0, 'health' : 1, 'services' : 2, 'at_home' : 3, 'other' : 4}
data.replace({'Mjob':di},inplace=True)



In [94]:

    
data.head(6)



In [90]:

    
data['Fjob'].value_counts()









    Out[90]:





other       217
services    111
teacher      29
at_home      20
health       18
Name: Fjob, dtype: int64



In [95]:

    
di = { 'teacher' : 0, 'health' : 1, 'services' : 2, 'at_home' : 3, 'other' : 4}
data.replace({'Fjob':di},inplace=True)



In [96]:

    
data.head(6)



In [98]:

    
data['paid'].value_counts()









    Out[98]:





no     214
yes    181
Name: paid, dtype: int64



In [102]:

    
di = { 'no' : 0, 'yes' : 1}
data.replace({'paid':di},inplace=True)



In [103]:

    
di = { 'no' : 0, 'yes' : 1}
data.replace({'activities':di},inplace=True)



In [106]:

    
data.shape









    Out[106]:





(395, 15)



In [105]:

    
test = np.array(data)



In [109]:

    
test.shape









    Out[109]:





(395, 15)



In [121]:

    
train = np.zeros((395,16))



In [122]:

    
train[:,:15] = test[:,:]



In [123]:

    
train[:,15] = per



In [130]:

    
train.shape









    Out[130]:





(395, 16)



In [132]:

    
labels = labels // 10



In [134]:

    
labels.shape









    Out[134]:





(395,)



In [156]:

    
from sklearn.ensemble import RandomForestClassifier



In [157]:

    
clf = RandomForestClassifier(n_estimators=60)



In [158]:

    
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, labels, test_size=0.2)



In [159]:

    
X_test.shape









    Out[159]:





(79, 16)



In [160]:

    
y_test.shape









    Out[160]:





(79,)



In [161]:

    
clf.fit(X_train, y_train)









    Out[161]:





RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=60, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)



In [162]:

    
y_A = clf.predict(X_test)



In [171]:

    
y_A









    Out[171]:





array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])



In [172]:

    
y_test









    Out[172]:





array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])



In [177]:

    
100*float((y_A==y_test).sum())/y_test.shape[0]









    Out[177]:





92.40506329113924

	school	sex	age	address	famsize	Pstatus	Medu	Fedu	Mjob	Fjob	...	internet	romantic	famrel	freetime	goout	health	absences	G1	G2	G3
0	GP	F	18	U	GT3	A	4	4	at_home	teacher	...	no	no	4	3	4	3	6	5	6	6
1	GP	F	17	U	GT3	T	1	1	at_home	other	...	yes	no	5	3	3	3	4	5	5	6
2	GP	F	15	U	LE3	T	1	1	at_home	other	...	yes	no	4	3	2	3	10	7	8	10
3	GP	F	15	U	GT3	T	4	2	health	services	...	yes	yes	3	2	2	5	2	15	14	15
4	GP	F	16	U	GT3	T	3	3	other	other	...	no	no	4	3	2	5	4	6	10	10

	sex	famsize	Pstatus	Medu	Fedu	Mjob	Fjob	traveltime	studytime	failures	paid	activities	famrel	goout
0	0	1	0	4	4	3	0	2	2	0	no	no	4	4
1	0	1	1	1	1	3	4	1	2	0	no	no	5	3
2	0	0	1	1	1	3	4	1	2	3	yes	no	4	2
3	0	1	1	4	2	1	2	1	3	0	yes	yes	3	2
4	0	1	1	3	3	4	4	1	2	0	yes	no	4	2
5	1	0	1	4	3	2	4	1	2	0	yes	yes	5	2

	sex	famsize	Pstatus	Medu	Fedu	Mjob	Fjob	traveltime	studytime	failures	paid	activities	famrel	goout
0	0	1	0	4	4	3	0	2	2	0	no	no	4	4
1	0	1	1	1	1	3	4	1	2	0	no	no	5	3
2	0	0	1	1	1	3	4	1	2	3	yes	no	4	2
3	0	1	1	4	2	1	2	1	3	0	yes	yes	3	2
4	0	1	1	3	3	4	4	1	2	0	yes	no	4	2
5	1	0	1	4	3	2	4	1	2	0	yes	yes	5	2

	sex	famsize	Pstatus	Medu	Fedu	Mjob	Fjob	traveltime	studytime	failures	paid	activities	famrel	goout
0	0	1	0	4	4	3	0	2	2	0	no	no	4	4
1	0	1	1	1	1	3	4	1	2	0	no	no	5	3
2	0	0	1	1	1	3	4	1	2	3	yes	no	4	2
3	0	1	1	4	2	1	2	1	3	0	yes	yes	3	2
4	0	1	1	3	3	4	4	1	2	0	yes	no	4	2
5	1	0	1	4	3	2	4	1	2	0	yes	yes	5	2