notebook.community

Edit and run



In [ ]:

    
import numpy as np
import keras
from keras import models
from keras import layers
import glob
import PIL.Image as Image
import os

import matplotlib.pyplot as plt
# from keras.preprocessing import image
# from keras.applications.mobilenetv2 import preprocess_input, decode_predictions
# from keras.applications import nasnet


%matplotlib inline



In [ ]:

    
def norm(a):
    return a/np.linalg.norm(a)

def encode2(img_file, model):
#     img = image.load_img(img_file, target_size=(224,224))
#     x = image.img_to_array(img)
#     x = np.expand_dims(x, axis=0)
#     x = nasnet.preprocess_input(x)
#     preds = model.predict(x)
#     preds = preds.squeeze()
    
    img = image.load_img(file, target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = nasnet.preprocess_input(x)
    pred = model.predict(x)
    pred = pred.squeeze()
    return pred

def compare(target, features):
    d = np.linalg.norm(features-target, axis=1)
    print(d)
    return np.argsort(d)[:30], d

def init_mobile():
    conv_base = keras.applications.MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
    model = models.Sequential()
    model.add(conv_base)
    model.add(layers.MaxPool2D((7,7)))
    conv_base.trainable = False
    return model



In [ ]:

    
def init(model):
    files = []
    for f in glob.glob('e:/pics/test/*.jpg'):
        files.append(f)
    files = np.array(files)

    features = []
    for f in glob.glob('e:/pics/test/*.jpg'):
    #     img = image.load_img(f, target_size=(224,224))
        feature = encode2(f, model)
        features.append(feature)
    features = np.array(features)
    features = features.squeeze()
    return files, features



In [ ]:

    
a = np.array([1,2,3,4])
np.linalg.norm(a)



In [ ]:

    
np.sqrt(np.sum(a**2))



In [ ]:

    
a/np.linalg.norm(a)



In [ ]:

    
(a/np.linalg.norm(a)).sum()



In [ ]:

    
a = np.array([1,2,3,4])
a = norm(a)



In [ ]:

    
b = np.array([1,2,2.5,4])
b = norm(b)
c = np.array([2,5,3,6])
c = norm(c)

f = [b,c]



In [ ]:

    
b-a



In [ ]:

    
f-a



In [ ]:

    
d = np.linalg.norm(f-a, axis=1)
np.argsort(d)[:30]



In [ ]:

    
# with top
model.summary()



In [ ]:

    
# no top
model.summary()



In [ ]:

    
img1 = image.load_img('e:/pics/test/a1.jpg', target_size=(224,224))
img1



In [ ]:

    
# change a1, toddle
img2 = image.load_img('e:/pics/test/a1_c.jpg', target_size=(224,224))
img2



In [ ]:

    
def encode(img):
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    preds = model.predict(x)
    preds = preds.reshape(1,-1)
    print(preds.shape)
#     preds = keras.layers.MaxPool2D(7)(preds)
#     print(preds.shape)
    return preds



In [ ]:

    
r1 =encode(img1)
r2 = encode(img2)



In [ ]:

    
r = r.reshape(1,-1)
r.shape



In [ ]:

    
r = features - r1
r.shape



In [ ]:

    
np.linalg.norm(features-r1, axis=1)



In [ ]:

    
compare(r1, features)



In [ ]:

    
img = image.load_img('e:/pics/test/a1_c3.jpg', target_size=(224,224))
r =encode(img)
rank, d = compare(r, features)



In [ ]:

    
rank



In [ ]:

    
rank



In [ ]:

    
files[rank]



In [ ]:

    
model.summary()



In [ ]:

    
preds.shape



In [ ]:

    
features = []
for f in glob.glob('e:/pics/test/*.jpg'):
    feature = encode2(f)
    features.append(feature)
features = np.array(features)
features = features.squeeze()
print(features.shape)



In [ ]:

    
r = encode2('e:/pics/test/a1.jpg')
r = r.reshape(1,-1)
print(r.shape)



In [ ]:

    
# with max pool, similarity is better than no pool
r = encode2('e:/pics/test/a1.jpg')
r = r.reshape(1,-1)
print(r.shape)

r, d= compare(r, features)
d = d.reshape(-1,1)
d*=1.0/d.max()
d = 1-d
print(d)



In [ ]:

    
from sklearn import preprocessing



In [ ]:

    
norm = preprocessing.MinMaxScaler()
d = d.reshape(-1,1)
norm.fit_transform(d)



In [ ]:

    
nas = nasnet.NASNetMobile(weights='imagenet', include_top=False, input_shape = (224, 224, 3) )
model = models.Sequential()
model.add(nas)
model.add(layers.MaxPool2D((7,7)))



In [ ]:

    
nas.trainable = False



In [ ]:

    
files, featurs = init(nas)



In [ ]:

    
img = encode2('e:/pics/test/a1.jpg', nas)
img.shape



In [ ]:

    
compare(img, features)



In [ ]:

    
img = image.load_img('e:/pics/test/a1.jpg', target_size=(224,224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = nasnet.preprocess_input(x)
pred = nas.predict(x)
pred = pred.squeeze()
print(pred.shape)



In [ ]:

    
pred.shape



In [ ]:

    
import torchvision as tv
import torch as t
from torchvision import transforms



In [ ]:

    
resnet34 = tv.models.resnet34(pretrained=True)



In [ ]:

    
model = list(resnet34.children())[:-1]
model = t.nn.Sequential(*model)



In [ ]:

    
for param in model.parameters():
    param.required_grad=False



In [ ]:

    
transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])



In [ ]:

    
import time



In [ ]:

    
features = []
files = []
for i, file in enumerate(glob.glob('e:/pics/mag/*.jpg')):
    tic = time.time()
    code = encode(file, model)
    toc = time.time()
    files.append(file)
    features.append(code)
    if i%20==0:
        print('file', file, (toc-tic))
    if i>500:
        break
features = np.array(features)
files = np.array(files)



In [ ]:

    
print(len(features), len(files))



In [ ]:

    
def encode(file, model):
    img = Image.open(file)
    img = transform(img)
    img = img[None,:]
    pred = model(img)
    pred = pred.data.squeeze_().numpy()
    return pred



In [ ]:

    
files[:10]



In [ ]:

    
file = 'e:/pics/test/img10_2.jpg'
f = encode(file, model)
f = f.reshape(1,-1)
r, d = compare(f, features)
# d*=1.0/d.max()
# d = 1-d
print(r)
print(d)



In [ ]:

    
np.save('d:/tmp/features.np', features)
np.save('d:/tmp/files.np', files)



In [ ]:

    
features.nbytes/(1024*1024)
# cost 0.98 MB



In [ ]:

    
same_file = files[r]
for i, f in enumerate(same_file):
    _ = plt.figure()
    img = plt.imread(f)
    plt.imshow(img)
    plt.title(d[i])
    plt.axis('off')
# change file, similarity not as you think



In [84]:

    
features.shape









    Out[84]:





(502, 512)



In [ ]:

    
def compare(target, features):
    d = np.linalg.norm(features-target, axis=1)
    rank = np.argsort(d)[:10]
    return rank, d[rank]



In [ ]:



In [ ]:



In [ ]:

    
path = 'e:\\pics\\mag'
for i, f in enumerate(os.listdir(path)):
    name, ext = os.path.splitext(f)
    new_name = 'img_'+str(i) + ext
    print(new_name)
    os.rename(os.path.join(path, f), os.path.join(path, new_name))
#     break



In [ ]: