In [97]:
import sys
sys.path.append('..')


from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, Input
from keras.models import Model

from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.utils import to_categorical

from SR_module import *


%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [102]:
ROOT = os.environ.get('DATA', os.path.join(
    os.path.dirname(os.path.abspath('..')), 'data', 'mst'))
print(ROOT)


/home/luo.sz/beLLE/data/mst

In [3]:
def load_img(img_list): 

    imgs = []
    for img_path in img_list:
        img = image.load_img(img_path, target_size=(224, 224))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        imgs.append(x)

    imgs = np.concatenate(imgs, 0)
    imgs = preprocess_input(imgs)
    return imgs

load images and labels


In [139]:
season = "春"

prop_name = '%dspring_m.csv' if season == "春" else '%dsummer_m.csv'
PROP_COLS = [ u'产品系列(旧)',  u'帮面材质-主要', u'款型', u'跟型', u'楦型',
                  u'跟高',  u'帮面颜色', u'鞋头', u'开口深浅', u'有无配饰', u'穿法', u'帮面材质唯一']
COLS = [u'产品系列(旧)', u'款型',u'跟型',u'跟高',u'帮面颜色',u'有无配饰']
feature_num = len(COLS)
PROP = []
for yr in range(2016, 2019):
    prop = data_prop(os.path.join(ROOT, prop_name%(yr%100)))
    PROP.append(prop) 

p_total = pd.concat(PROP)[COLS]

In [119]:
def common_list(img_root, prop_index):
    ext = 'jpg'
    base_list = os.listdir(img_root)
    img_code = [f.split('.')[0] for f in base_list if f.endswith(ext)]
    final_code = p_total.index.intersection(img_code).tolist()
    return final_code

In [120]:
def gen_X_y(img_root, prop_index, multiout=False):
    
    f_code = common_list(img_root, prop_index)

    file_list = [os.path.join(img_root, '%s.jpg'%f) for f in f_code]
    imgs = load_img(file_list)
    labels = p_total.loc[f_code]
    
    if multiout:
        label_one = []
        for col in labels.columns:
            label_one.append(pd.get_dummies(labels[col]).values)
    else:
        label_one = pd.get_dummies(labels).values
    
    return imgs, label_one

In [140]:
# multi output
img_roots = ['/home/luo.sz/beLLE/data/imgs/', '/home/luo.sz/beLLE/data/ygimgs/']
X_li = []
y_li = []
for root in img_roots:
    imgs, label_one = gen_X_y(root, p_total.index, multiout=True)
    X_li.append(imgs)
    y_li.append(label_one)


X = np.concatenate(X_li, axis=0)
y = [np.concatenate([y_[i] for y_ in y_li], axis=0) for i in range(feature_num)]

In [48]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=23)

In [136]:
# X, y = np.concatenate([img16, img17], axis=0), np.concatenate([label_one16.toarray(), label_one17.toarray()], axis=0)

total_len = X.shape[0]
train_len = int(total_len * 0.9)


np.random.seed(23)
random_index = np.random.permutation(total_len)
train_index, test_index = random_index[:train_len], random_index[train_len:]
X_train, y_train = X[train_index, :], [y_[train_index, :] for y_ in y]
X_test, y_test = X[test_index, :], [y_[test_index, :] for y_ in y]

In [138]:
def BelleNet(input_shape=(224, 224, 3), labels=29):
    model = Sequential()
    
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(labels, activation='sigmoid'))  ## !IMPORTANT: use sigmoid in multi-label task
    
    return model

# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

def BelleMultiNet(input_shape=(224, 224, 3), labels=[3,6,7,4,4,2]):
    label_len = len(labels)
    input_ = Input(shape=input_shape, name='input')
    x = Conv2D(32, (3, 3), activation='relu')(input_)
    x = Conv2D(32, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    
    x = Flatten()(x)
    x = Dense(256, activation='relu', name='extract')(x)
    x = Dropout(0.25)(x)
    
    mid = [Dense(128, activation='relu')(x) for _ in range(label_len)]
    mid = [Dense(64, activation='relu')(m) for m in mid]
    mid = [Dense(64, activation='relu')(m) for m in mid]
    
    outs = []
    for i, m, l in zip(range(label_len), mid, labels):
        outs.append(Dense(l, activation='softmax', name='out'+str(i+1))(m))
    
    model = Model(inputs=input_, outputs=outs)
    
    return model

In [ ]:
epochs = 70 #70 is enough

model = BelleMultiNet(labels=[y_.shape[1] for y_ in y])
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics={'out%d'%(i+1):'categorical_accuracy' for i in range(feature_num)})
# model.fit(X_train, y_train, batch_size=32, epochs=10)

train_datagen = image.ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True)

test_datagen = image.ImageDataGenerator( rescale=1./255)

model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32, multiout=True),
                    steps_per_epoch=len(X_train) / 32, epochs=epochs)

In [144]:
model.evaluate_generator(test_datagen.flow(X_test, y_test, multiout=True), 10)


Out[144]:
[4.750553393363953,
 0.3793038740754128,
 0.9147469818592071,
 0.5966550067067147,
 0.512187322974205,
 1.6708511054515838,
 0.676808986067772,
 0.890625,
 0.81875,
 0.84375,
 0.875,
 0.675,
 0.79375]
  • V1 is trained using one img repo, while
  • V2 is trained using two img repos, /home/luo.sz/beLLE/data/imgs/ and /home/luo.sz/beLLE/data/ygimgs/

In [145]:
model.save_weights(os.path.join(ROOT, 'BelleMultiNet_Weights_V2.h5')) 
json_string = model.to_json()
with open(os.path.join(ROOT, 'BelleMultiNet_V2.json'), 'w') as f:
    f.write(json_string)

In [ ]:
score = model.evaluate(X_test, y_test, batch_size=32)

In [ ]:
epochs = 100 #70 is enough

model = BelleNet(labels=y_test.shape[1])
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy']) ## !IMPORTANT: use binary_crossentropy in multi-label task


train_datagen = image.ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True)

test_datagen = image.ImageDataGenerator( rescale=1./255)

model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, epochs=epochs)

# model.fit(x_train, y_train, batch_size=32, epochs=10)
# score = model.evaluate(x_test, y_test, batch_size=32)

In [ ]:
model.evaluate_generator(test_datagen.flow(X_test, y_test), 10)

In [52]:
model.save_weights(os.path.join(ROOT, 'BelleNet_Weights.h5')) 
json_string = model.to_json()
with open(os.path.join(ROOT, 'BelleNet.json'), 'w') as f:
    f.write(json_string)