notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
# ライブラリのインポート
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.cross_validation import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.grid_search import GridSearchCV
from IPython.display import Image
pd.set_option('chained_assignment', None)
plt.style.use('ggplot')
plt.rc('xtick.major', size=0)
plt.rc('ytick.major', size=0)



In [3]:

    
# 学習用データの読み込み
user_tags = pd.read_csv("user_tags_merge.csv")



In [4]:

    
# 読み込んだデータの表示
user_tags









    Out[4]:






  
    
      
      user_id
      user_name
      gender_male
      airport
      animal
      apple
      auto
      autograph
      autumn
      baby
      ...
      tree
      vatican
      vegetable
      violin
      volleyball
      water
      waterfall
      wedding
      wine
      winter
    
  
  
    
      0
      963865524
      e34_1023
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      1
      231764687
      kojita_na
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      2
      35262468
      kanacom02
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.689974
      0.000000
      ...
      0.622459
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      3
      12889622
      sato_charlotte
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.890903
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      4
      1338792479
      nkmrerk
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.975755
      0.000000
      ...
      0.475021
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      5
      632296001
      yukamoumoon
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.549834
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      6
      204010752
      keeeei_t
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.890903
    
    
      7
      1441642315
      yukako0924
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      8
      472108366
      keikinoshi
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      9
      1457971013
      mymt_yk
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.425557
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.908877
      0.000000
      0.000000
    
    
      10
      341181859
      hino6x9
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      11
      1433525330
      nm.kyoki
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.425557
    
    
      12
      1614214864
      14aryib_5
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      13
      443212411
      710rumi
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      14
      20674860
      accorone
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.5
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      15
      14068459
      anarchistraw
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.802184
      0.000000
    
    
      16
      3710300
      ant_62
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.689974
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      17
      1391844497
      ayaka.88
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      18
      1618281134
      ayakaaaaa1002
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      19
      1631841910
      bubupig16
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.524979
      0.000000
      0.000000
    
    
      20
      1591441193
      emikokatsumata
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.377541
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      21
      1345412468
      er1i18
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      1.798109
      0.000000
      0.000000
    
    
      22
      249005215
      hodaka_t
      1
      0.689974
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      23
      1542515338
      k___s___m
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      24
      307131495
      kana_co_kana
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      25
      1356904794
      kanet1027
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      26
      242340652
      kishiasa
      0
      0.000000
      0.869892
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      27
      331584585
      kuruton4423
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      28
      1449505380
      maiko_totoro
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      29
      28551727
      mizuking1
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      30
      1717694344
      mm_knk823
      0
      0.000000
      0.000000
      0.000000
      0.817574
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      31
      1628779627
      murakamigram
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.731059
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      32
      1552060948
      nene_uehara
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      33
      1685189231
      nyu3uki
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      34
      992448809
      ochan1227
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      35
      1650824134
      okuchan551204
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.785835
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      36
      1491616121
      polocco19
      0
      0.000000
      0.000000
      0.598688
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.645656
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      37
      21034569
      ricktocaster
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      38
      1588713355
      saaya24v_
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      39
      181680157
      sao_tason
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      40
      31106041
      sbytmk
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      41
      1575747845
      segawa_ayaka
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      42
      197588060
      sejusonia_new
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      43
      1528196058
      shimpeterrr
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      44
      802303590
      shioringo_k
      0
      0.000000
      0.000000
      0.000000
      0.817574
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      45
      1108233901
      shokosmo
      0
      0.000000
      0.987872
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      46
      572019031
      slrymn
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.998499
      0.000000
      0.000000
    
    
      47
      1507811323
      syo_7tri
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      48
      12444261
      taikin1015
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      1.475873
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      49
      240997870
      takagishingo
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.645656
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      50
      610808878
      takara0626
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      51
      1500745758
      takurafu
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.268941
      0.000000
      0.000000
      0.000000
    
    
      52
      1476912852
      tomshir13
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      53
      51128075
      uyeda612
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      54
      709439195
      whistle9
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      55
      1302120936
      ymuta
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      56
      568710865
      yohhatu
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      57
      1315136665
      yugaharada
      1
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      58
      1743972349
      yuikotaniguchi
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      59
      1548402109
      zashikane
      0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.924142
      0.0
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
  

60 rows × 144 columns



In [5]:

    
X = user_tags[['person', 'food']] # 'person'タグと'food'タグのみをXとして抽出
X.tail()



In [6]:

    
y = user_tags['gender_male'] # 性別をyとして抽出



In [7]:

    
# 'person'タグと'food'タグ、および性別の関係を散布図として描画
np.random.seed = 0

xmin, xmax = -2, 12
ymin, ymax = -2, 17

index_male = y[y==1].index # 男性
index_female = y[y==0].index # 女性

fig, ax = plt.subplots()
cm = plt.cm.RdBu
cm_bright = ListedColormap(['#FF0000', '#0000FF']) 
sc = ax.scatter(X.loc[index_male, 'food'],
                X.loc[index_male, 'person']+(np.random.rand(len(index_male))-0.5)*0.1,
                color='b', label='male', alpha=0.3) 
sc = ax.scatter(X.loc[index_female, 'food'],
                X.loc[index_female, 'person']+(np.random.rand(len(index_female))-0.5)*0.1,
                color='r', label='female', alpha=0.3)
ax.set_xlabel('food') # x軸ラベル
ax.set_ylabel('person') # y軸ラベル
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
ax.legend(bbox_to_anchor=(1.4, 1.03))
plt.show()



In [8]:

    
X = user_tags[['nail', 'person', 'sport', 'food','coffee','cake','beer','sky']] # 説明変数Xの更新（タグを追加）
y = user_tags["gender_male"]



In [10]:

    
# クロスバリデーション関数の定義
# clf : 識別器モデル
# X : 説明変数
# y : 被説明変数
# K : データの分割数
def cross_val(clf, X, y, K, random_state=0):
    cv = KFold(len(y), K, shuffle=True, random_state=random_state)
    scores = cross_val_score(clf, X, y, cv=cv)
    return scores



In [12]:

    
clf = LogisticRegression() # 識別器モデルとしてロジスティクス回帰を読み込み
for i in range(2,12): # クロスバリデーションのKを2から12で繰り返し実行
    scores = cross_val(clf, X, y, i)
    print(i)
    print('Scores:', scores)
    print('Mean Score: {0:.3f} (+/-{1:.3f})'.format(scores.mean(), scores.std()*2))









    



2
Scores: [ 0.56666667  0.46666667]
Mean Score: 0.517 (+/-0.100)
3
Scores: [ 0.45  0.55  0.65]
Mean Score: 0.550 (+/-0.163)
4
Scores: [ 0.46666667  0.53333333  0.26666667  0.8       ]
Mean Score: 0.517 (+/-0.382)
5
Scores: [ 0.33333333  0.58333333  0.5         0.33333333  0.83333333]
Mean Score: 0.517 (+/-0.371)
6
Scores: [ 0.3  0.6  0.6  0.2  0.7  0.9]
Mean Score: 0.550 (+/-0.473)
7
Scores: [ 0.33333333  0.66666667  0.44444444  0.66666667  0.125       0.75        0.875     ]
Mean Score: 0.552 (+/-0.485)
8
Scores: [ 0.375       0.5         0.5         0.5         0.28571429  0.42857143
  0.71428571  0.85714286]
Mean Score: 0.520 (+/-0.344)
9
Scores: [ 0.28571429  0.57142857  0.42857143  0.71428571  0.57142857  0.
  0.66666667  1.          0.83333333]
Mean Score: 0.563 (+/-0.562)
10
Scores: [ 0.16666667  0.5         0.83333333  0.33333333  0.5         0.66666667
  0.          0.66666667  1.          0.83333333]
Mean Score: 0.550 (+/-0.597)
11
Scores: [ 0.16666667  0.5         0.83333333  0.33333333  0.5         0.4         0.4
  0.6         0.6         0.8         1.        ]
Mean Score: 0.558 (+/-0.465)



In [13]:

    
clf = DecisionTreeClassifier(criterion='entropy', max_depth=2, min_samples_leaf=2) # 識別器モデルとして決定木を読み込み
for i in range(2,12): 
    scores = cross_val(clf, X, y, i)
    print(i)
    print('Scores:', scores)
    print('Mean Score: {0:.3f} (+/-{1:.3f})'.format(scores.mean(), scores.std()*2))









    



2
Scores: [ 0.4  0.5]
Mean Score: 0.450 (+/-0.100)
3
Scores: [ 0.5   0.45  0.6 ]
Mean Score: 0.517 (+/-0.125)
4
Scores: [ 0.46666667  0.53333333  0.2         0.66666667]
Mean Score: 0.467 (+/-0.340)
5
Scores: [ 0.33333333  0.5         0.58333333  0.25        0.66666667]
Mean Score: 0.467 (+/-0.309)
6
Scores: [ 0.3  0.7  0.6  0.3  0.6  0.6]
Mean Score: 0.517 (+/-0.314)
7
Scores: [ 0.33333333  0.44444444  0.44444444  0.33333333  0.25        0.75        0.625     ]
Mean Score: 0.454 (+/-0.328)
8
Scores: [ 0.375       0.5         0.5         0.5         0.28571429  0.28571429
  0.71428571  0.57142857]
Mean Score: 0.467 (+/-0.273)
9
Scores: [ 0.28571429  0.57142857  0.42857143  0.57142857  0.57142857  0.42857143
  0.5         0.83333333  0.5       ]
Mean Score: 0.521 (+/-0.282)
10
Scores: [ 0.16666667  0.5         0.5         0.33333333  0.5         0.5         0.5
  0.5         0.83333333  0.5       ]
Mean Score: 0.483 (+/-0.314)
11
Scores: [ 0.16666667  0.5         0.5         0.33333333  0.5         0.2         0.6
  0.2         0.8         0.6         0.6       ]
Mean Score: 0.455 (+/-0.389)



In [14]:

    
X = user_tags[['nail','hair', 'person', 'sport', 'food','night','coffee','wedding','cake','beer', 'dog', 'animal', 'tree','blossom','cat', 'flower','sky','nature','cherry']]  # 説明変数Xの更新（タグを追加
y = user_tags["gender_male"]



In [15]:

    
# タグの統合・整理
X['animal']=X['animal']+X['dog']+X['cat']
X['cosme']=X['hair']+X['nail']
X['nature']=X['nature']+X['sky']+X['flower']+X['tree']+X['blossom']+X['cherry']
X = X.drop(['nail','hair', 'dog', 'cat', 'sky','flower','tree','blossom','cherry'],axis=1)



In [16]:

    
clf = LogisticRegression() # 識別器モデルとしてロジスティクス回帰を読み込み
for i in range(2,12):
    scores = cross_val(clf, X, y, i)
    print(i)
    print('Scores:', scores)
    print('Mean Score: {0:.3f} (+/-{1:.3f})'.format(scores.mean(), scores.std()*2))









    



2
Scores: [ 0.6  0.5]
Mean Score: 0.550 (+/-0.100)
3
Scores: [ 0.6   0.65  0.75]
Mean Score: 0.667 (+/-0.125)
4
Scores: [ 0.66666667  0.6         0.33333333  0.8       ]
Mean Score: 0.600 (+/-0.340)
5
Scores: [ 0.58333333  0.58333333  0.58333333  0.41666667  0.83333333]
Mean Score: 0.600 (+/-0.267)
6
Scores: [ 0.5  0.5  0.6  0.2  0.8  0.8]
Mean Score: 0.567 (+/-0.411)
7
Scores: [ 0.55555556  0.66666667  0.33333333  0.66666667  0.875       0.75        0.75      ]
Mean Score: 0.657 (+/-0.321)
8
Scores: [ 0.625       0.625       0.5         0.625       0.57142857  0.85714286
  0.71428571  0.71428571]
Mean Score: 0.654 (+/-0.202)
9
Scores: [ 0.57142857  0.71428571  0.42857143  0.57142857  0.57142857  0.71428571
  0.66666667  1.          0.66666667]
Mean Score: 0.656 (+/-0.297)
10
Scores: [ 0.5         0.66666667  0.83333333  0.33333333  0.66666667  0.33333333
  0.83333333  0.66666667  1.          0.66666667]
Mean Score: 0.650 (+/-0.407)
11
Scores: [ 0.5         0.66666667  0.83333333  0.33333333  0.66666667  0.4         0.6
  1.          0.6         0.8         0.8       ]
Mean Score: 0.655 (+/-0.377)



In [17]:

    
# 識別器をファイルとして出力
from sklearn.externals import joblib #ライブラリのインポート
clf = LogisticRegression()
clf.fit(X,y) # 識別器を作成
joblib.dump(clf, 'clf.pkl') # clf.pklとしてファイル出力









    Out[17]:





['clf.pkl', 'clf.pkl_01.npy', 'clf.pkl_02.npy', 'clf.pkl_03.npy']



In [ ]:

	user_id	user_name	gender_male	airport	animal	apple	auto	autograph	autumn	baby	...	tree	vatican	vegetable	violin	volleyball	water	waterfall	wedding	wine	winter
0	963865524	e34_1023	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
1	231764687	kojita_na	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
2	35262468	kanacom02	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.689974	0.000000	...	0.622459	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
3	12889622	sato_charlotte	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.890903	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
4	1338792479	nkmrerk	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.975755	0.000000	...	0.475021	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
5	632296001	yukamoumoon	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.549834	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
6	204010752	keeeei_t	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.890903
7	1441642315	yukako0924	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
8	472108366	keikinoshi	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
9	1457971013	mymt_yk	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.425557	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.908877	0.000000	0.000000
10	341181859	hino6x9	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
11	1433525330	nm.kyoki	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.425557
12	1614214864	14aryib_5	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
13	443212411	710rumi	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
14	20674860	accorone	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.5	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
15	14068459	anarchistraw	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.802184	0.000000
16	3710300	ant_62	1	0.000000	0.000000	0.000000	0.000000	0.689974	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
17	1391844497	ayaka.88	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
18	1618281134	ayakaaaaa1002	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
19	1631841910	bubupig16	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.524979	0.000000	0.000000
20	1591441193	emikokatsumata	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.377541	0.000000	0.000000	0.000000	0.000000
21	1345412468	er1i18	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	1.798109	0.000000	0.000000
22	249005215	hodaka_t	1	0.689974	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
23	1542515338	k___s___m	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
24	307131495	kana_co_kana	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25	1356904794	kanet1027	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
26	242340652	kishiasa	0	0.000000	0.869892	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
27	331584585	kuruton4423	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
28	1449505380	maiko_totoro	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
29	28551727	mizuking1	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
30	1717694344	mm_knk823	0	0.000000	0.000000	0.000000	0.817574	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
31	1628779627	murakamigram	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.731059	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
32	1552060948	nene_uehara	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
33	1685189231	nyu3uki	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
34	992448809	ochan1227	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
35	1650824134	okuchan551204	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.785835	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
36	1491616121	polocco19	0	0.000000	0.000000	0.598688	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.645656	0.000000	0.000000	0.000000	0.000000	0.000000
37	21034569	ricktocaster	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
38	1588713355	saaya24v_	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
39	181680157	sao_tason	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
40	31106041	sbytmk	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
41	1575747845	segawa_ayaka	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
42	197588060	sejusonia_new	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
43	1528196058	shimpeterrr	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
44	802303590	shioringo_k	0	0.000000	0.000000	0.000000	0.817574	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
45	1108233901	shokosmo	0	0.000000	0.987872	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
46	572019031	slrymn	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.998499	0.000000	0.000000
47	1507811323	syo_7tri	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
48	12444261	taikin1015	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	1.475873	0.000000	0.000000	0.000000	0.000000	0.000000
49	240997870	takagishingo	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.645656	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50	610808878	takara0626	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
51	1500745758	takurafu	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.268941	0.000000	0.000000	0.000000
52	1476912852	tomshir13	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
53	51128075	uyeda612	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
54	709439195	whistle9	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
55	1302120936	ymuta	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
56	568710865	yohhatu	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
57	1315136665	yugaharada	1	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
58	1743972349	yuikotaniguchi	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
59	1548402109	zashikane	0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.924142	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000

	person	food
55	5.406591	5.783906
56	0.999590	8.565152
57	2.848325	5.342605
58	8.415150	1.440234
59	0.000000	0.000000