In [46]:

    
import tensorflow as tf
import keras
import numpy as np
from tqdm import tqdm
from keras.layers import LSTM, Conv1D, Flatten, Dropout, Dense, InputLayer, ZeroPadding2D, Convolution2D, MaxPooling2D, Conv2D
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.regularizers import l2 as L2
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import pickle

l2 = L2(0.01)

Images analysis



In [121]:

    
# Batch generator
import numpy as np

class DataGenerator(object):
    
    def __init__(self, batch_size = 32, dim_x = 224, dim_y = 224, dim_z = 3, shuffle = True):
        self.dim_x = dim_x
        self.dim_y = dim_y
        self.dim_z = dim_z
        self.batch_size = batch_size
        self.shuffle = shuffle

    def generate(self, labels, pic_names):
        # Infinite loop
        while 1:
        # Generate order of exploration of dataset
            indexes = self.__get_exploration_order(pic_names)

            # Generate batches
            imax = int(len(indexes)/self.batch_size)
            for i in range(imax):
                # Find list of IDs
                list_IDs_temp = [pic_names[k] for k in indexes[i*self.batch_size:(i+1)*self.batch_size]]

                # Generate data
                X, y = self.__data_generation(labels, list_IDs_temp)

                yield X, y

    def __get_exploration_order(self, pic_names):
        
        # Find exploration order
        indexes = np.arange(len(pic_names))
        if self.shuffle == True:
            np.random.shuffle(indexes)

        return indexes

    def __data_generation(self, labels, list_IDs_temp):
        # X : (n_samples, v_size, v_size, v_size, n_channels)
        # Initialization
        X = np.empty((self.batch_size, self.dim_x, self.dim_y, self.dim_z))
        y = np.empty((self.batch_size), dtype = np.float32)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store volume
            img = cv2.imread('Dataset/images/'+ID)
            img = cv2.resize(img,(224,224))
            img = img/255
            #print(img)
            X[i, :, :, :] = img
            #print(X)

            # Store class
            y[i] = labels[labels.id == ID].likes#labels[int(ID[:-4])]
            #print(labels.iloc[int(ID[:-4])].likes)
        return X, y
    def debug(self, labels, pic_names):
        indexes = self.__get_exploration_order(pic_names)
        imax = int(len(indexes)/self.batch_size)
        
        for i in range(1):
            # Find list of IDs
            list_IDs_temp = [pic_names[k] for k in indexes[i*self.batch_size:(i+1)*self.batch_size]]

            # Generate data
            X, y = self.__data_generation(labels, list_IDs_temp)
            print(X.shape)
DataGenerator().debug(dataset_train, train_names)









    



(32, 224, 224, 3)



In [120]:

    
dataset_train[dataset_train.id == '3453.jpg'].likes.values









    Out[120]:





array([ 0.05645522])



In [110]:

    
#Loading data
data = pd.read_csv('Dataset/dataset_raw.csv')
data.likes = data.likes/data.likes.max()

with open ('Dataset/dataset_train.pkl', 'rb') as f:
    dataset_train = pickle.load(f)
with open ('Dataset/dataset_val.pkl', 'rb') as f:
    dataset_val = pickle.load(f)

train_names = dataset_train.id.values
val_names = dataset_val.id.values



In [24]:

    
# pic_names = os.listdir('Dataset/images/')
# msk = np.random.rand(len(pic_names)) < 0.8
# train_names = np.array(pic_names)[msk]
# val_names = np.array(pic_names)[~msk]

# msk = np.random.rand(len(val_names)) < 0.5
# test_names = val_names[msk]
# val_names = val_names[~msk]

# print(train_names.shape)
# print(val_names.shape)
# print(test_names.shape)









    



(12539,)
(1547,)
(1628,)



In [5]:

    
# train_y = np.array([data.iloc[int(i[:-4])].likes for i in train_names])
# val_y = np.array([data.iloc[int(i[:-4])].likes for i in val_names])
# print(train_y.shape)
# print(val_y.shape)









    



(12450,)
(3264,)



In [61]:

    
# # load images
# train_X =[]
# val_X=[]
# for pic in train_names:
#     img = cv2.imread('Dataset/images/'+pic)
#     img = cv2.resize(img, (224,224))
#     img = img/255
#     train_X.append(img)
# for pic in val_names:
#     img = cv2.imread('Dataset/images/'+pic)
#     img = cv2.resize(img, (224,224))
#     img = img/255
#     val_X.append(img)
# train_X = np.array()









    



(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)
(224, 224, 3)






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-61-ae1a7376173b> in <module>()
      3 val_X=[]
      4 for pic in train_names:
----> 5     img = cv2.imread('Dataset/images/'+pic)
      6     img = cv2.resize(img, (224,224))
      7     print(img.shape)

KeyboardInterrupt:



In [ ]:



In [122]:

    
img_input = keras.layers.Input(shape=(224,224,3))
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

# Block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

# Block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

# Block 5
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(2048, activation='relu', name='fc2')(x)
output = Dense(1, activation=None, name='predictions')(x)
model = keras.models.Model(img_input, output, name='model_1')
print(model.summary())









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_8 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 2048)              8390656   
_________________________________________________________________
predictions (Dense)          (None, 1)                 2049      
=================================================================
Total params: 125,871,937
Trainable params: 125,871,937
Non-trainable params: 0
_________________________________________________________________
None



In [127]:

    
optimizer = keras.optimizers.SGD(lr=0.00001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
batch_size = 16
training_generator = DataGenerator(batch_size = batch_size).generate(dataset_train, train_names)
validation_generator = DataGenerator(batch_size = batch_size).generate(dataset_val, val_names)



In [128]:

    
model.fit_generator(generator = training_generator,
                    steps_per_epoch = len(train_names)//batch_size,
                    validation_data = validation_generator,
                    validation_steps = len(val_names)//batch_size, verbose = True,epochs= 10)
#model.fit(X_train_emb, y_train,validation_data=(X_val_emb, y_val),verbose=True, epochs=20)









    



Epoch 1/10
502/779 [==================>...........] - ETA: 251s - loss: 0.0041 - mean_squared_error: 0.0041





    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-128-aa68ebe1718d> in <module>()
      2                     steps_per_epoch = len(train_names)//batch_size,
      3                     validation_data = validation_generator,
----> 4                     validation_steps = len(val_names)//batch_size, verbose = True,epochs= 10)
      5 #model.fit(X_train_emb, y_train,validation_data=(X_val_emb, y_val),verbose=True, epochs=20)

~/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
     85                 warnings.warn('Update your `' + object_name +
     86                               '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 87             return func(*args, **kwargs)
     88         wrapper._original_function = func
     89         return wrapper

~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
   2040                     outs = self.train_on_batch(x, y,
   2041                                                sample_weight=sample_weight,
-> 2042                                                class_weight=class_weight)
   2043 
   2044                     if not isinstance(outs, list):

~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
   1760             ins = x + y + sample_weights
   1761         self._make_train_function()
-> 1762         outputs = self.train_function(ins)
   1763         if len(outputs) == 1:
   1764             return outputs[0]

~/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2271         updated = session.run(self.outputs + [self.updates_op],
   2272                               feed_dict=feed_dict,
-> 2273                               **self.session_kwargs)
   2274         return updated[:len(self.outputs)]
   2275 

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    893     try:
    894       result = self._run(None, fetches, feed_dict, options_ptr,
--> 895                          run_metadata_ptr)
    896       if run_metadata:
    897         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1122     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1123       results = self._do_run(handle, final_targets, final_fetches,
-> 1124                              feed_dict_tensor, options, run_metadata)
   1125     else:
   1126       results = []

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1319     if handle is None:
   1320       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1321                            options, run_metadata)
   1322     else:
   1323       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1325   def _do_call(self, fn, *args):
   1326     try:
-> 1327       return fn(*args)
   1328     except errors.OpError as e:
   1329       message = compat.as_text(e.message)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1304           return tf_session.TF_Run(session, options,
   1305                                    feed_dict, fetch_list, target_list,
-> 1306                                    status, run_metadata)
   1307 
   1308     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt:



In [129]:

    
model.save('saved_models/image_model_v1.h5')



In [59]:

    
img = cv2.imread('Dataset/images/2858.jpg')
img = cv2.resize(img, (224,224))
img = img/255

img =np.expand_dims(img,axis = 0)
print(model.predict(img))
print(data.iloc[2858].likes)









    



[[ 0.05503913]]
0.117081010701

Texts analysis



In [130]:

    
with open ('Dataset/dataset_train.pkl', 'rb') as f:
    dataset_train = pickle.load(f)
with open ('Dataset/dataset_val.pkl', 'rb') as f:
    dataset_val = pickle.load(f)



In [131]:

    
X_train = dataset_train.texts.values
y_train = dataset_train.likes.values

X_val = dataset_val.texts.values
y_val = dataset_val.likes.values
y_val









    Out[131]:





array([ 0.28431053,  0.04317044,  0.01828946, ...,  0.18059568,
        0.15221549,  0.02284656])



In [132]:

    
def char_to_vocab (sentence):
    vocabulary = ['а','б','в','г','д','е','ё','ж','з','и','й','к','л','м','н','о','п','р','с','т','у','ф','х','ц','ч','ш','щ','ъ','ы','ь','э','ю','я',
              'А','Б','В','Г','Д','Е','Ё','Ж','З','И','Й','К','Л','М','Н','О','П','Р','С','Т','У','Ф','Х','Ц','Ч','Ш','Щ','Ъ','Ы','Ь','Э','Ю','Я',
             '!','@','#','$','%','^','&','*','(',')',':',';','/',',','.','%','№','?','~','-','+','=',' ',
              'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
             'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
    result = []
    for char in sentence:
        if char in vocabulary:
            result.append(vocabulary.index(char)+1)
            
    result = np.array(result)
    #encoded = one_hot(sentence)
    num_of_lett = 300
    if len(result)<num_of_lett:
        result = np.concatenate((result, np.zeros(num_of_lett-result.shape[0])))
    if len(result)>num_of_lett:
        result = result[:num_of_lett]
    return list(result)

X_train_encoded = []
for sentence in tqdm(X_train):
    X_train_encoded.append(char_to_vocab(sentence))
X_train = np.array(X_train_encoded)

X_val_encoded = []
for sentence in tqdm(X_val):
    X_val_encoded.append(char_to_vocab(sentence))
X_val = np.array(X_val_encoded)









    



100%|██████████| 12473/12473 [00:03<00:00, 3605.22it/s]
100%|██████████| 1497/1497 [00:00<00:00, 3576.87it/s]



In [133]:

    
X_train.shape









    Out[133]:





(12473, 300)



In [135]:

    
input = keras.layers.Input(shape=(X_train.shape[1],))
l_init = keras.layers.embeddings.Embedding(300, 80, input_length=300)(input)
l_0_0 = keras.layers.Dense(128)(l_init) 
l_0_1 = keras.layers.Conv1D(filters=128, kernel_size=2, activation='relu', kernel_regularizer=l2, padding='same')(l_init) 
l_0_2 = keras.layers.Conv1D(filters=256, kernel_size=4, activation='relu', kernel_regularizer=l2,padding='same')(l_init) 
l_0_3 = keras.layers.Conv1D(filters=256, kernel_size=8, activation='relu', kernel_regularizer=l2,padding='same')(l_init)
l_0_4 = keras.layers.Conv1D(filters=512, kernel_size=16, activation='relu', kernel_regularizer=l2,padding='same')(l_init) 
l_0_5 = keras.layers.Conv1D(filters=512, kernel_size=32, activation='relu', kernel_regularizer=l2,padding='same')(l_init) 

merge = keras.layers.Merge(mode='concat', concat_axis=2) ([l_0_0, l_0_1, l_0_2, l_0_3,l_0_4,l_0_5]) 
l_5 = keras.layers.GlobalMaxPooling1D()(merge) 
l_6 = keras.layers.Dropout(0.5)(l_5) 
output = keras.layers.Dense(1, activation=None, kernel_regularizer=l2)(l_6) 
model = keras.models.Model(input, output, name='model_5')
print(model.summary())









    



____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_10 (InputLayer)            (None, 300)           0                                            
____________________________________________________________________________________________________
embedding_5 (Embedding)          (None, 300, 80)       24000       input_10[0][0]                   
____________________________________________________________________________________________________
dense_9 (Dense)                  (None, 300, 128)      10368       embedding_5[0][0]                
____________________________________________________________________________________________________
conv1d_21 (Conv1D)               (None, 300, 128)      20608       embedding_5[0][0]                
____________________________________________________________________________________________________
conv1d_22 (Conv1D)               (None, 300, 256)      82176       embedding_5[0][0]                
____________________________________________________________________________________________________
conv1d_23 (Conv1D)               (None, 300, 256)      164096      embedding_5[0][0]                
____________________________________________________________________________________________________
conv1d_24 (Conv1D)               (None, 300, 512)      655872      embedding_5[0][0]                
____________________________________________________________________________________________________
conv1d_25 (Conv1D)               (None, 300, 512)      1311232     embedding_5[0][0]                
____________________________________________________________________________________________________
merge_5 (Merge)                  (None, 300, 1792)     0           dense_9[0][0]                    
                                                                   conv1d_21[0][0]                  
                                                                   conv1d_22[0][0]                  
                                                                   conv1d_23[0][0]                  
                                                                   conv1d_24[0][0]                  
                                                                   conv1d_25[0][0]                  
____________________________________________________________________________________________________
global_max_pooling1d_5 (GlobalMa (None, 1792)          0           merge_5[0][0]                    
____________________________________________________________________________________________________
dropout_5 (Dropout)              (None, 1792)          0           global_max_pooling1d_5[0][0]     
____________________________________________________________________________________________________
dense_10 (Dense)                 (None, 1)             1793        dropout_5[0][0]                  
====================================================================================================
Total params: 2,270,145
Trainable params: 2,270,145
Non-trainable params: 0
____________________________________________________________________________________________________
None






    



/home/ludapak/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:10: UserWarning: The `Merge` layer is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.
  # Remove the CWD from sys.path while we load stuff.



In [138]:

    
optimizer = keras.optimizers.SGD(lr=0.0001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
model.fit(X_train, y_train,validation_data=(X_val, y_val),verbose=True, epochs=20)









    



Train on 12473 samples, validate on 1497 samples
Epoch 1/20
12473/12473 [==============================] - 29s - loss: 5.7203 - mean_squared_error: 0.0071 - val_loss: 5.7128 - val_mean_squared_error: 0.0040
Epoch 2/20
12473/12473 [==============================] - 28s - loss: 5.7116 - mean_squared_error: 0.0072 - val_loss: 5.7039 - val_mean_squared_error: 0.0040
Epoch 3/20
12473/12473 [==============================] - 28s - loss: 5.7024 - mean_squared_error: 0.0070 - val_loss: 5.6950 - val_mean_squared_error: 0.0040
Epoch 4/20
12473/12473 [==============================] - 28s - loss: 5.6936 - mean_squared_error: 0.0070 - val_loss: 5.6861 - val_mean_squared_error: 0.0040
Epoch 5/20
12473/12473 [==============================] - 28s - loss: 5.6848 - mean_squared_error: 0.0071 - val_loss: 5.6772 - val_mean_squared_error: 0.0040
Epoch 6/20
 3712/12473 [=======>......................] - ETA: 19s - loss: 5.6789 - mean_squared_error: 0.0070





    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-138-b14fafd32dc4> in <module>()
      1 optimizer = keras.optimizers.SGD(lr=0.0001)
      2 model.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
----> 3 model.fit(X_train, y_train,validation_data=(X_val, y_val),verbose=True, epochs=20)

~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1596                               initial_epoch=initial_epoch,
   1597                               steps_per_epoch=steps_per_epoch,
-> 1598                               validation_steps=validation_steps)
   1599 
   1600     def evaluate(self, x, y,

~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1181                     batch_logs['size'] = len(batch_ids)
   1182                     callbacks.on_batch_begin(batch_index, batch_logs)
-> 1183                     outs = f(ins_batch)
   1184                     if not isinstance(outs, list):
   1185                         outs = [outs]

~/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2271         updated = session.run(self.outputs + [self.updates_op],
   2272                               feed_dict=feed_dict,
-> 2273                               **self.session_kwargs)
   2274         return updated[:len(self.outputs)]
   2275 

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    893     try:
    894       result = self._run(None, fetches, feed_dict, options_ptr,
--> 895                          run_metadata_ptr)
    896       if run_metadata:
    897         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1122     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1123       results = self._do_run(handle, final_targets, final_fetches,
-> 1124                              feed_dict_tensor, options, run_metadata)
   1125     else:
   1126       results = []

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1319     if handle is None:
   1320       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1321                            options, run_metadata)
   1322     else:
   1323       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1325   def _do_call(self, fn, *args):
   1326     try:
-> 1327       return fn(*args)
   1328     except errors.OpError as e:
   1329       message = compat.as_text(e.message)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1304           return tf_session.TF_Run(session, options,
   1305                                    feed_dict, fetch_list, target_list,
-> 1306                                    status, run_metadata)
   1307 
   1308     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt:



In [139]:

    
model.save('saved_models/text_model_v1.h5')



In [ ]:

    
with open ('Dataset/dataset_test.pkl', 'rb') as f:
    dataset_test = pickle.load(f)
    
X_test = dataset_test.texts.values
y_test = dataset_test.likes.values

X_test_encoded = []
for sentence in tqdm(X_test):
    X_test_encoded.append(char_to_vocab(sentence))
X_test = np.array(X_test_encoded)



print(model.predict(X_test[100:105]))
print(y_test[100:105])



In [ ]: