In [97]:
import sys
sys.path.append('..')
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense, Input
from keras.models import Model
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.utils import to_categorical
from SR_module import *
%load_ext autoreload
%autoreload 2
In [102]:
ROOT = os.environ.get('DATA', os.path.join(
os.path.dirname(os.path.abspath('..')), 'data', 'mst'))
print(ROOT)
In [3]:
def load_img(img_list):
imgs = []
for img_path in img_list:
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
imgs.append(x)
imgs = np.concatenate(imgs, 0)
imgs = preprocess_input(imgs)
return imgs
In [139]:
season = "春"
prop_name = '%dspring_m.csv' if season == "春" else '%dsummer_m.csv'
PROP_COLS = [ u'产品系列(旧)', u'帮面材质-主要', u'款型', u'跟型', u'楦型',
u'跟高', u'帮面颜色', u'鞋头', u'开口深浅', u'有无配饰', u'穿法', u'帮面材质唯一']
COLS = [u'产品系列(旧)', u'款型',u'跟型',u'跟高',u'帮面颜色',u'有无配饰']
feature_num = len(COLS)
PROP = []
for yr in range(2016, 2019):
prop = data_prop(os.path.join(ROOT, prop_name%(yr%100)))
PROP.append(prop)
p_total = pd.concat(PROP)[COLS]
In [119]:
def common_list(img_root, prop_index):
ext = 'jpg'
base_list = os.listdir(img_root)
img_code = [f.split('.')[0] for f in base_list if f.endswith(ext)]
final_code = p_total.index.intersection(img_code).tolist()
return final_code
In [120]:
def gen_X_y(img_root, prop_index, multiout=False):
f_code = common_list(img_root, prop_index)
file_list = [os.path.join(img_root, '%s.jpg'%f) for f in f_code]
imgs = load_img(file_list)
labels = p_total.loc[f_code]
if multiout:
label_one = []
for col in labels.columns:
label_one.append(pd.get_dummies(labels[col]).values)
else:
label_one = pd.get_dummies(labels).values
return imgs, label_one
In [140]:
# multi output
img_roots = ['/home/luo.sz/beLLE/data/imgs/', '/home/luo.sz/beLLE/data/ygimgs/']
X_li = []
y_li = []
for root in img_roots:
imgs, label_one = gen_X_y(root, p_total.index, multiout=True)
X_li.append(imgs)
y_li.append(label_one)
X = np.concatenate(X_li, axis=0)
y = [np.concatenate([y_[i] for y_ in y_li], axis=0) for i in range(feature_num)]
In [48]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=23)
In [136]:
# X, y = np.concatenate([img16, img17], axis=0), np.concatenate([label_one16.toarray(), label_one17.toarray()], axis=0)
total_len = X.shape[0]
train_len = int(total_len * 0.9)
np.random.seed(23)
random_index = np.random.permutation(total_len)
train_index, test_index = random_index[:train_len], random_index[train_len:]
X_train, y_train = X[train_index, :], [y_[train_index, :] for y_ in y]
X_test, y_test = X[test_index, :], [y_[test_index, :] for y_ in y]
In [138]:
def BelleNet(input_shape=(224, 224, 3), labels=29):
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(labels, activation='sigmoid')) ## !IMPORTANT: use sigmoid in multi-label task
return model
# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
def BelleMultiNet(input_shape=(224, 224, 3), labels=[3,6,7,4,4,2]):
label_len = len(labels)
input_ = Input(shape=input_shape, name='input')
x = Conv2D(32, (3, 3), activation='relu')(input_)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256, activation='relu', name='extract')(x)
x = Dropout(0.25)(x)
mid = [Dense(128, activation='relu')(x) for _ in range(label_len)]
mid = [Dense(64, activation='relu')(m) for m in mid]
mid = [Dense(64, activation='relu')(m) for m in mid]
outs = []
for i, m, l in zip(range(label_len), mid, labels):
outs.append(Dense(l, activation='softmax', name='out'+str(i+1))(m))
model = Model(inputs=input_, outputs=outs)
return model
In [ ]:
epochs = 70 #70 is enough
model = BelleMultiNet(labels=[y_.shape[1] for y_ in y])
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics={'out%d'%(i+1):'categorical_accuracy' for i in range(feature_num)})
# model.fit(X_train, y_train, batch_size=32, epochs=10)
train_datagen = image.ImageDataGenerator(
rescale=1. / 255,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True)
test_datagen = image.ImageDataGenerator( rescale=1./255)
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32, multiout=True),
steps_per_epoch=len(X_train) / 32, epochs=epochs)
In [144]:
model.evaluate_generator(test_datagen.flow(X_test, y_test, multiout=True), 10)
Out[144]:
/home/luo.sz/beLLE/data/imgs/
and /home/luo.sz/beLLE/data/ygimgs/
In [145]:
model.save_weights(os.path.join(ROOT, 'BelleMultiNet_Weights_V2.h5'))
json_string = model.to_json()
with open(os.path.join(ROOT, 'BelleMultiNet_V2.json'), 'w') as f:
f.write(json_string)
In [ ]:
score = model.evaluate(X_test, y_test, batch_size=32)
In [ ]:
epochs = 100 #70 is enough
model = BelleNet(labels=y_test.shape[1])
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy']) ## !IMPORTANT: use binary_crossentropy in multi-label task
train_datagen = image.ImageDataGenerator(
rescale=1. / 255,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True)
test_datagen = image.ImageDataGenerator( rescale=1./255)
model.fit_generator(train_datagen.flow(X_train, y_train, batch_size=32),
steps_per_epoch=len(X_train) / 32, epochs=epochs)
# model.fit(x_train, y_train, batch_size=32, epochs=10)
# score = model.evaluate(x_test, y_test, batch_size=32)
In [ ]:
model.evaluate_generator(test_datagen.flow(X_test, y_test), 10)
In [52]:
model.save_weights(os.path.join(ROOT, 'BelleNet_Weights.h5'))
json_string = model.to_json()
with open(os.path.join(ROOT, 'BelleNet.json'), 'w') as f:
f.write(json_string)