ディレクトリ内の画像ファイルを元にディープラーニング


In [1]:
import os
import numpy as np
import cv2

TRAIN_DIR = './data/flickr_logos_27_dataset_cropped_augmented_images/'

NUM_CLASSES = 27 # 分類するクラス数
IMG_SIZE = 32 # 画像の1辺の長さ

# 画像のあるディレクトリ
train_img_dirs = ['Adidas','Apple','BMW','Citroen','Cocacola','DHL','Fedex','Ferrari','Ford','Google','Heineken','HP','Intel','McDonalds','Mini','Nbc','Nike','Pepsi','Porsche','Puma','RedBull','Sprite','Starbucks','Texaco','Unicef','Vodafone','Yahoo']

In [2]:
def load_data():

    # 学習画像データ
    images = []
    # 学習データのラベル
    labels = []

    for i, d in enumerate(train_img_dirs):
        # ./data/以下の各ディレクトリ内のファイル名取得
        files = os.listdir(TRAIN_DIR + d)
        for f in files:
            # 画像読み込み
            img = cv2.imread(TRAIN_DIR + d + '/' + f)
            # 1辺がIMG_SIZEの正方形にリサイズ
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            # 1列にして
            img = img.flatten().astype(np.float32)/255.0
            images.append(img)

            # one_hot_vectorを作りラベルとして追加
            tmp = np.zeros(NUM_CLASSES)
            tmp[i] = 1
            labels.append(tmp)

    # numpy配列に変換
    images = np.asarray(images)
    labels = np.asarray(labels)
    
    return images, labels

モデルの作成


In [3]:
def build_model():
    # モデルの作成
    model = Sequential()
    #model.add(Dense(512, input_shape=(784,)))
    model.add(Dense(512, input_shape=(3072,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
 
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
 
    model.add(Dense(NUM_CLASSES))
    model.add(Activation('softmax'))
 
    # 損失関数の定義
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(),
        metrics=['accuracy'])
    
    return model

plot用関数


In [4]:
def plot_history(history):
    # 精度の履歴をプロット
    plt.plot(history.history['acc'],"o-",label="accuracy")
    plt.plot(history.history['val_acc'],"o-",label="val_acc")
    plt.title('model accuracy')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.legend(loc="lower right")
    plt.show()
 
    # 損失の履歴をプロット
    plt.plot(history.history['loss'],"o-",label="loss",)
    plt.plot(history.history['val_loss'],"o-",label="val_loss")
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='lower right')
    plt.show()

機械学習開始


In [5]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import Adam 
from keras.utils import np_utils
 
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt


Using TensorFlow backend.

In [6]:
X, y = load_data()
print('images',X)
print('labels',y)


images [[ 1.          0.79607844  0.16862746 ...,  1.          0.73333335
   0.17254902]
 [ 1.          0.76862746  0.20784314 ...,  0.95686275  0.74901962
   0.14901961]
 [ 1.          0.73333335  0.27058825 ...,  1.          0.71372551
   0.21568628]
 ..., 
 [ 0.01568628  0.03137255  0.01176471 ...,  0.03529412  0.00784314
   0.04705882]
 [ 0.45882353  0.40000001  0.98039216 ...,  1.          0.9254902   1.        ]
 [ 0.96862745  1.          0.92156863 ...,  1.          0.97647059  1.        ]]
labels [[ 1.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  1.]]

テストデータと教師データに分割


In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1)

機械学習の条件定義


In [ ]:
nb_epoch = 50 #学習させる回数. pytyonのnb_epochとはrangeの繰り返しのこと
batch_size = 128 #無作為に128画像取得。128という数字に理論的な根拠は考慮していない

In [ ]:
# データで訓練
model = build_model()
history = model.fit(X_train, y_train, 
    nb_epoch=nb_epoch, 
    batch_size=batch_size,
    validation_data=(X_test, y_test)
)


/Users/kubota/.pyenv/versions/anaconda3-4.4.0/lib/python3.6/site-packages/keras/models.py:848: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.
  warnings.warn('The `nb_epoch` argument in `fit` '
Train on 173990 samples, validate on 43498 samples
Epoch 1/50
173990/173990 [==============================] - 97s - loss: 1.3203 - acc: 0.6294 - val_loss: 0.6865 - val_acc: 0.8172
Epoch 2/50
173990/173990 [==============================] - 92s - loss: 0.7233 - acc: 0.7938 - val_loss: 0.4730 - val_acc: 0.8720
Epoch 3/50
173990/173990 [==============================] - 94s - loss: 0.5756 - acc: 0.8343 - val_loss: 0.3504 - val_acc: 0.9051
Epoch 4/50
173990/173990 [==============================] - 87s - loss: 0.4963 - acc: 0.8553 - val_loss: 0.2831 - val_acc: 0.9250
Epoch 5/50
173990/173990 [==============================] - 82s - loss: 0.4357 - acc: 0.8726 - val_loss: 0.2478 - val_acc: 0.9358
Epoch 6/50
173990/173990 [==============================] - 77s - loss: 0.4077 - acc: 0.8807 - val_loss: 0.2176 - val_acc: 0.9410
Epoch 7/50
173990/173990 [==============================] - 83s - loss: 0.3773 - acc: 0.8895 - val_loss: 0.2170 - val_acc: 0.9395
Epoch 8/50
173990/173990 [==============================] - 91s - loss: 0.3511 - acc: 0.8973 - val_loss: 0.1922 - val_acc: 0.9486
Epoch 9/50
173990/173990 [==============================] - 93s - loss: 0.3362 - acc: 0.9013 - val_loss: 0.1805 - val_acc: 0.9513
Epoch 10/50
173990/173990 [==============================] - 82s - loss: 0.3177 - acc: 0.9074 - val_loss: 0.1480 - val_acc: 0.9603
Epoch 11/50
173990/173990 [==============================] - 79s - loss: 0.3084 - acc: 0.9101 - val_loss: 0.1582 - val_acc: 0.9590
Epoch 12/50
173990/173990 [==============================] - 79s - loss: 0.2986 - acc: 0.9126 - val_loss: 0.1640 - val_acc: 0.9548
Epoch 13/50
173990/173990 [==============================] - 77s - loss: 0.2877 - acc: 0.9155 - val_loss: 0.1416 - val_acc: 0.9629
Epoch 14/50
173990/173990 [==============================] - 79s - loss: 0.2784 - acc: 0.9192 - val_loss: 0.1427 - val_acc: 0.9617
Epoch 15/50
173990/173990 [==============================] - 88s - loss: 0.2730 - acc: 0.9202 - val_loss: 0.1514 - val_acc: 0.9591
Epoch 16/50
173990/173990 [==============================] - 78s - loss: 0.2692 - acc: 0.9220 - val_loss: 0.1213 - val_acc: 0.9668
Epoch 17/50
173990/173990 [==============================] - 70s - loss: 0.2700 - acc: 0.9219 - val_loss: 0.1352 - val_acc: 0.9638
Epoch 18/50
173990/173990 [==============================] - 70s - loss: 0.2606 - acc: 0.9243 - val_loss: 0.1231 - val_acc: 0.9682
Epoch 19/50
173990/173990 [==============================] - 70s - loss: 0.2584 - acc: 0.9251 - val_loss: 0.1156 - val_acc: 0.9687
Epoch 20/50
173990/173990 [==============================] - 70s - loss: 0.2450 - acc: 0.9289 - val_loss: 0.1175 - val_acc: 0.9670
Epoch 21/50
173990/173990 [==============================] - 99s - loss: 0.2448 - acc: 0.9293 - val_loss: 0.1256 - val_acc: 0.9644
Epoch 22/50
173990/173990 [==============================] - 80s - loss: 0.2436 - acc: 0.9300 - val_loss: 0.1353 - val_acc: 0.9643
Epoch 23/50
173990/173990 [==============================] - 80s - loss: 0.2360 - acc: 0.9321 - val_loss: 0.1110 - val_acc: 0.9715
Epoch 24/50
173990/173990 [==============================] - 78s - loss: 0.2364 - acc: 0.9320 - val_loss: 0.1155 - val_acc: 0.9683
Epoch 25/50
173990/173990 [==============================] - 82s - loss: 0.2418 - acc: 0.9304 - val_loss: 0.0965 - val_acc: 0.9735
Epoch 26/50
173990/173990 [==============================] - 82s - loss: 0.2288 - acc: 0.9329 - val_loss: 0.1129 - val_acc: 0.9718
Epoch 27/50
173990/173990 [==============================] - 80s - loss: 0.2206 - acc: 0.9368 - val_loss: 0.0980 - val_acc: 0.9734
Epoch 28/50
173990/173990 [==============================] - 76s - loss: 0.2214 - acc: 0.9364 - val_loss: 0.0988 - val_acc: 0.9755
Epoch 29/50
173990/173990 [==============================] - 78s - loss: 0.2247 - acc: 0.9357 - val_loss: 0.1052 - val_acc: 0.9716
Epoch 30/50
173990/173990 [==============================] - 86s - loss: 0.2153 - acc: 0.9375 - val_loss: 0.0956 - val_acc: 0.9742
Epoch 31/50
173990/173990 [==============================] - 89s - loss: 0.2160 - acc: 0.9379 - val_loss: 0.0961 - val_acc: 0.9745
Epoch 32/50
173990/173990 [==============================] - 77s - loss: 0.2165 - acc: 0.9379 - val_loss: 0.0900 - val_acc: 0.9764
Epoch 33/50
173990/173990 [==============================] - 79s - loss: 0.2126 - acc: 0.9398 - val_loss: 0.0909 - val_acc: 0.9750
Epoch 34/50
173990/173990 [==============================] - 97s - loss: 0.2158 - acc: 0.9391 - val_loss: 0.0937 - val_acc: 0.9759
Epoch 35/50
173990/173990 [==============================] - 89s - loss: 0.2109 - acc: 0.9403 - val_loss: 0.1062 - val_acc: 0.9720
Epoch 36/50
173990/173990 [==============================] - 108s - loss: 0.2065 - acc: 0.9411 - val_loss: 0.0903 - val_acc: 0.9757
Epoch 37/50
173990/173990 [==============================] - 84s - loss: 0.2043 - acc: 0.9419 - val_loss: 0.0995 - val_acc: 0.9724
Epoch 38/50
173990/173990 [==============================] - 88s - loss: 0.2012 - acc: 0.9429 - val_loss: 0.0916 - val_acc: 0.9766
Epoch 39/50
173990/173990 [==============================] - 83s - loss: 0.2015 - acc: 0.9424 - val_loss: 0.0922 - val_acc: 0.9753
Epoch 40/50
173990/173990 [==============================] - 98s - loss: 0.2019 - acc: 0.9431 - val_loss: 0.0938 - val_acc: 0.9730
Epoch 41/50
173990/173990 [==============================] - 107s - loss: 0.1977 - acc: 0.9444 - val_loss: 0.0884 - val_acc: 0.9772
Epoch 42/50
173990/173990 [==============================] - 85s - loss: 0.2026 - acc: 0.9428 - val_loss: 0.0866 - val_acc: 0.9766
Epoch 43/50
173990/173990 [==============================] - 83s - loss: 0.1906 - acc: 0.9462 - val_loss: 0.0900 - val_acc: 0.9764
Epoch 44/50
173990/173990 [==============================] - 85s - loss: 0.1987 - acc: 0.9445 - val_loss: 0.0996 - val_acc: 0.9736
Epoch 45/50
173990/173990 [==============================] - 80s - loss: 0.2006 - acc: 0.9440 - val_loss: 0.0776 - val_acc: 0.9796
Epoch 46/50
173990/173990 [==============================] - 88s - loss: 0.1929 - acc: 0.9458 - val_loss: 0.0913 - val_acc: 0.9746
Epoch 47/50
173990/173990 [==============================] - 82s - loss: 0.1947 - acc: 0.9452 - val_loss: 0.0786 - val_acc: 0.9784
Epoch 48/50
173990/173990 [==============================] - 89s - loss: 0.1867 - acc: 0.9479 - val_loss: 0.0844 - val_acc: 0.9785
Epoch 49/50
173990/173990 [==============================] - 94s - loss: 0.1944 - acc: 0.9457 - val_loss: 0.0881 - val_acc: 0.9766
Epoch 50/50
 45312/173990 [======>.......................] - ETA: 70s - loss: 0.1788 - acc: 0.9490

In [ ]:
#学習モデルの保存
json_string = model.to_json()

#モデルのファイル名 拡張子.json
open('logo_model.json', 'w').write(json_string)
#重みファイルの保存 拡張子がhdf5
model.save_weights('out/logo_model.hdf5')
 
# モデルの評価を行う
score = model.evaluate(X_test, y_test, verbose=1)
 
print('loss=', score[0])
print('accuracy=', score[1])
    
# modelに学習させた時の変化の様子をplot
plot_history(history)

In [ ]: