In [1]:
from __future__ import division

import numpy as np
from numpy import *

import os

import tensorflow as tf

import PIL
from PIL import Image
import matplotlib.pyplot as plt

from skimage import data, io, filters

from matplotlib.path import Path
import matplotlib.patches as patches

import pandas as pd

In [2]:
path_to_strokes =  "tiny/airplane.npy"
X = np.load(path_to_strokes)[()]

In [3]:
print('Example sketch has ', str(shape(X['airplane'][0][0])[0]), ' strokes')
print('Corresponds to photo: ', X['airplane'][1][0])

path_to_source_photos = "../tiny/photo/airplane/"
photo = os.path.join(path_to_source_photos,'n02691156_10151.jpg')


('Example sketch has ', '169', ' strokes')
('Corresponds to photo: ', 'n02691156_10151')

In [4]:
class TinyDataset():
    """tiny airplane dataset of photos and sketches for pix2svg."""

    def __init__(self, npy_file, root_dir, transform=None):
        """
        Args:
            npy_file (string): Path to the numpy file with stroke-5 representation and corresponding photos.
                    # to get stroke-5 representation of svg
                    x['airplane'][0][5]
                    # to get corresponding photos
                    x['airplane'][1][5]
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.stroke_dir = npy_file
        self.photo_dir = os.path.join(root_dir,'photo')
        self.strokes = np.load(npy_file)[()]
        self.transform = transform
    
    def __len__(self):
        return len(self.strokes['airplane'][0])

    def __getitem__(self, idx):
        img_name = os.path.join(self.photo_dir,'airplane',X['airplane'][1][idx]+ '.jpg')
        photo = io.imread(img_name)
        photo = photo.astype(float)
        strokes = self.strokes['airplane'][0][idx]
        sample = {'photo': photo, 'strokes': strokes,'name': X['airplane'][1][idx]+ '.jpg'}
        
        if self.transform:
            sample = self.transform(sample)

        return sample    

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, strokes, name = sample['photo'], sample['strokes'], sample['name']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))        
        return {'tensor': tf.divide(tf.stack(sample['photo']),255),
                'strokes': strokes,
                'name': name,
                'photo': image}    

def to_normal_strokes(big_stroke):
    """Convert from stroke-5 format (from sketch-rnn paper) back to stroke-3."""
    l = 0
    for i in range(len(big_stroke)):
        if big_stroke[i, 4] > 0:
            l = i
            break
    if l == 0:
        l = len(big_stroke)
    result = np.zeros((l, 3))
    result[:, 0:2] = big_stroke[0:l, 0:2]
    result[:, 2] = big_stroke[0:l, 3]
    return result    
    
def strokes_to_lines(strokes):
    """
    Convert stroke-3 format to polyline format.
    List contains sublist of continuous line segments (strokes).    
    """
    x = 0
    y = 0
    lines = []
    line = []
    for i in range(len(strokes)):
        if strokes[i, 2] == 1:
            x += float(strokes[i, 0])
            y += float(strokes[i, 1])
            line.append([x, y])
            lines.append(line)
            line = []
        else:
            x += float(strokes[i, 0])
            y += float(strokes[i, 1])
            line.append([x, y])
    return lines

def polyline_pathmaker(lines):
    x = []
    y = []

    codes = [Path.MOVETO] # start with moveto command always
    for i,l in enumerate(lines):
        for _i,_l in enumerate(l):
            x.append(_l[0])
            y.append(_l[1])
            if _i<len(l)-1:
                codes.append(Path.LINETO) # keep pen on page
            else:
                if i != len(lines)-1: # final vertex
                    codes.append(Path.MOVETO)
    verts = zip(x,y)            
    return verts, codes

def path_renderer(verts, codes):
    path = Path(verts, codes)
    patch = patches.PathPatch(path, facecolor='none', lw=2)
    ax.add_patch(patch)
    ax.set_xlim(0,max(max(verts)))
    ax.set_ylim(0,max(max(verts)))
    ax.axis('off')
    plt.gca().invert_yaxis() # y values increase as you go down in image
    plt.show()

In [5]:
%ls


batch_render_tiny_intermediates.ipynb   MNIST_data/
checkpoints/                            pix2svg/
generative/                             README.md
helpers.py                              sketchy/
helpers.pyc                             test.png
ipynb/                                  tf_sketchy.ipynb
LICENSE                                 tiny/
mdn.py                                  ___tiny_intermediates/
mdn.pyc                                 tiny_intermediates/
mixture_density_network_tutorial.ipynb  tiny_intermediates_resized/

In [6]:
## load in airplanes dataset
airplanes = TinyDataset(npy_file='/home/jefan/ptsketchy/tiny/airplane.npy',root_dir='/home/jefan/ptsketchy/tiny',transform=None)

In [7]:
## display given photo and corresponding sketch from stroke-5 representation
i = 100
sample = airplanes[i]
print(i, sample['photo'].shape, sample['strokes'].shape)

plt.figure()
ax = plt.subplot(121)
ax.set_title(sample['name'])
ax.axis('off')
img = np.reshape(sample['photo'],(256,256,3))
plt.imshow(img,interpolation='nearest')

ax = plt.subplot(122)
lines = strokes_to_lines(to_normal_strokes(sample['strokes']))
verts,codes = polyline_pathmaker(lines)
path_renderer(verts,codes)

plt.show()


(100, (256, 256, 3), (99, 5))

In [8]:
# load in airplanes dataset
airplanes = TinyDataset(npy_file='/home/jefan/ptsketchy/tiny/airplane.npy',
                        root_dir='/home/jefan/ptsketchy/tiny',
                        transform=ToTensor())

In [9]:
# load in features for photos
path_to_features = 'sketchy/triplet_features'
photo_features = np.load(os.path.join(path_to_features,'photo_features.npy'))
F = photo_features

In [10]:
# read in filenames and generate pandas dataframe with object labels
_filenames = pd.read_csv(os.path.join(path_to_features,'photo_filenames.txt'),header=None,names=['filename'])

filenames = []
for i in range(len(_filenames)):
    filenames.append(_filenames[_filenames.index==i].values[0][0])
filenames = ['sketchy' + f[1:] for f in filenames]
path = filenames
obj = [f.split('/')[3] for f in filenames]
img = [f.split('/')[4] for f in filenames]

data = {'path': path,
         'object': obj,
         'filename': img}
X = pd.DataFrame.from_dict(data)

In [11]:
# subset airplane features only
matches = X['object']=='airplane'
inds = np.where(matches==True)
X0 = X[matches]
F0 = F[inds]

In [ ]:


In [12]:
# construct (11094,1024) version of photo feature matrix, called PF, that matches indexing of the sketch feature matrix
sketch_features = np.load('sketchy/airplane_features/airplane_sketch_features.npy')
_sketch_filenames = pd.read_csv('sketchy/airplane_features/airplane_sketch_filenames.txt',header=None,names=['filename'])
sketch_filenames = []
for i in range(len(_sketch_filenames)):
    sketch_filenames.append(_sketch_filenames[_sketch_filenames.index==i].values[0][0])
PF = []
inds = [] 
for sf in sketch_filenames:
    q = sf.split('/')[2]+'.jpg'
    PF.append(F0[X0['filename']==q])
    inds.append(np.where(X0['filename']==q)[0][0])
PF = np.squeeze(np.array(PF))
SF = sketch_features
inds = np.array(inds)

## zip together/concatenate the photo and sketch features
_F = np.hstack((PF,SF))

In [13]:
### now get a (11094,5) representation of the 'next stroke'
### no wait, instead, just bump up the dimensionality of these feature matrices to fit that of 
### the (delta_x,delta_y) stroke representation
### Strokes dataframe ("S") is of dimensionality (55855,5).
### So, resize and re-index the feature matrix to match S. 

S = pd.read_csv('tiny/stroke_dataframe.csv')
S1 = S
photo_dir = np.array([sf.split('/')[2] for sf in sketch_filenames]) # photo dir
sketch_dir = np.array(map(int,[sf.split('/')[3] for sf in sketch_filenames])) # sketch dir
stroke_png = np.array(map(int,[sf.split('/')[4].split('.')[0] for sf in sketch_filenames])) # stroke png

F = []
for index, row in S.iterrows():
    # get ind of the original small (11094,5) matrix that corresponds to this row of S (55855,5)
    ind = np.intersect1d(np.intersect1d(np.where(photo_dir==row['photoID']),
              np.where(sketch_dir==row['sketchID'])),
              np.where(stroke_png==row['strokeID']))[0]
    F.append(_F[ind])
    
F = np.array(F)
F1 = F # protected F1 matrix

convert strokes matrix from absolute to relative coordinates


In [14]:
from copy import deepcopy
unique_photos = np.unique(S1.photoID)
r = pd.DataFrame(columns=list(S1.keys()))

run_this = False
if run_this:
    for i, p in enumerate(unique_photos):
        print 'Processing ' + p + ': ' + str(i+1) + ' of ' + str(len(unique_photos)) + ' photos.'
        s1 = S1[S1.photoID==p]
        unique_sketches_of_photo = np.unique(s1.sketchID)
        for sketch in unique_sketches_of_photo:
            this_sketch = s1[s1.sketchID==sketch]
            for index, row in this_sketch.iterrows():
                _row = deepcopy(row)
                if index==min(this_sketch.index): # first stroke
                    r = r.append(row)
                    this_x = row.x
                    this_y = row.y
                else:
                    x_offset = _row.x-this_x
                    y_offset = _row.y-this_y
                    row.x = x_offset
                    row.y = y_offset
                    r = r.append(row)
                this_x = _row.x # hold onto current row so you can compute difference with next one
                this_y = _row.y
    # save out relative strokes matrix as S2
    S2 = r   
    print 'Saving out stroke_dataframe_relative.csv'
    S2.to_csv('tiny/stroke_dataframe_relative.csv')

In [ ]:


In [15]:
# check if S2 exists, if not, load it in
try:
    S2
except:
    S2 = pd.read_csv('tiny/stroke_dataframe_relative.csv')
    
# define S to either be ABSOLUTE strokes matrix (S1) or RELATIVE strokes matrix (S2)
S = S2

# generate 55855-long vector of photo indices
print 'generating list of photo indices based on X0'
inds = [] 
for index, row in S.iterrows():
    q = row['photoID']+'.jpg'
    inds.append(np.where(X0['filename']==q)[0][0])
inds = np.array(inds)

# generate random index to do train/val/test split
print 'generating random index to do train/val/test split'
_idx = np.arange(len(X0))
np.random.seed(seed=0)
np.random.shuffle(_idx)
train_len = int(len(_idx)*0.85)
val_len = int(len(_idx)*0.05)
test_len = int(len(_idx*0.10))

# indices of 100 photos that will go into train/val/test split
train_inds = _idx[:train_len] 
val_inds = _idx[train_len:train_len+val_len]
test_inds = _idx[train_len+val_len:len(_idx)]

print 'constructing 55855 vectors that correspond to membership in train/val/test splits'
# construct 55855 vectors that correspond to membership in train/val/test splits
train_vec = np.zeros(len(F)).astype(bool)
val_vec = np.zeros(len(F)).astype(bool)
test_vec = np.zeros(len(F)).astype(bool)

for i in train_inds:
    train_vec[inds==i] = True
for i in val_inds:
    val_vec[inds==i] = True    
for i in test_inds:
    test_vec[inds==i] = True 
    
assert sum(train_vec)+ sum(val_vec) + sum(test_vec) == len(train_vec)    
print ' '
print str(sum(train_vec)/len(train_vec) * len(F)) + ' sketch intermediates to train on.'
print str(sum(val_vec)/len(val_vec) * len(F)) + ' sketch intermediates to validate on.'
print str(sum(test_vec)/len(test_vec) * len(F)) + ' sketch intermediates to test on.'    
print ' '
print 'Now actually splitting data.'
# now actually split data
F_train = F[train_vec]
F_val = F[val_vec]
F_test = F[test_vec]

S_train = S[train_vec]
S_val = S[val_vec]
S_test = S[test_vec]

S_train = S_train[['x', 'y', 'pen']].copy()
S_val = S_val[['x', 'y', 'pen']].copy()
S_test = S_test[['x', 'y', 'pen']].copy()


generating list of photo indices based on X0
generating random index to do train/val/test split
constructing 55855 vectors that correspond to membership in train/val/test splits
 
45041.0 sketch intermediates to train on.
4185.0 sketch intermediates to validate on.
6629.0 sketch intermediates to test on.
 
Now actually splitting data.

In [16]:
## training helpers
def minibatch(data, minibatch_idx):
    return data[minibatch_idx] if type(data) is np.ndarray else [data[i] for i in minibatch_idx] 

def minibatches(data, batch_size, shuffle=True):
    batches = [np.array(col) for col in zip(*data)]
    return get_minibatches(batches, batch_size, shuffle)

def get_minibatches(data, minibatch_size, shuffle=True):
    """
    Iterates through the provided data one minibatch at at time. You can use this function to
    iterate through data in minibatches as follows:
        for inputs_minibatch in get_minibatches(inputs, minibatch_size):
            ...
    Or with multiple data sources:
        for inputs_minibatch, labels_minibatch in get_minibatches([inputs, labels], minibatch_size):
            ...
    Args:
        data: there are two possible values:
            - a list or numpy array
            - a list where each element is either a list or numpy array
        minibatch_size: the maximum number of items in a minibatch
        shuffle: whether to randomize the order of returned data
    Returns:
        minibatches: the return value depends on data:
            - If data is a list/array it yields the next minibatch of data.
            - If data a list of lists/arrays it returns the next minibatch of each element in the
              list. This can be used to iterate through multiple data sources
              (e.g., features and labels) at the same time.
    """
    list_data = type(data) is list and (type(data[0]) is list or type(data[0]) is np.ndarray)
    data_size = len(data[0]) if list_data else len(data)
    indices = np.arange(data_size)
    if shuffle:
        np.random.shuffle(indices)
    for minibatch_start in np.arange(0, data_size, minibatch_size):
        minibatch_indices = indices[minibatch_start:minibatch_start + minibatch_size]
        yield [minibatch(d, minibatch_indices) for d in data] if list_data \
            else minibatch(data, minibatch_indices), minibatch_indices

# usage notes:
# for m in get_minibatches([F_train,S_train.as_matrix()],batch_size,shuffle=True):
#     print len(m),m[0].shape,m[1].shape

In [ ]:


In [ ]:

quality assurance make sure that image classification is working as advertised based on triplet features


In [261]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn import linear_model

# reminders:
# S1 is stroke matrix
# F1 is feature matrix
# PF is photo-feature matrix
# SF is sketch-feature matrix/


# get np.array of source photo labels
photos = np.array([sf.split('/')[2] for sf in sketch_filenames])

In [ ]:


In [267]:
## get image classification within airplane class
run_this = 1
FEAT = SF_complete
LABELS = photos_complete
if run_this:
    # split sketch feature data for linear classification
    X_train, X_test, y_train, y_test = train_test_split(
        FEAT, LABELS, test_size=0.2, random_state=0)

    # check dimensionality of split data
    print 'dimensionality of train/test split'
    print X_train.shape, y_train.shape
    print X_test.shape, y_test.shape
    print ' '
    cval = True
    if cval==False:
        # compute linear classification accuracy (takes a minute or so to run)
        clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
        clf.score(X_test, y_test)   
    else:
        # compute linear classification accuracy (takes several minutes to run)       
#         clf = svm.SVC(kernel='linear', C=1)
        clf = linear_model.LogisticRegression(penalty='l2')
        scores = cross_val_score(clf, FEAT, LABELS, cv=2)
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        ## SVM Accuracy: 0.41 (+/- 0.08) with cv=5 achieved on 6/26/17 on intermediate sketches
        ## softmax Accuracy: 0.43 (+/- 0.01) with cv=2 achieved on 9/11/17


dimensionality of train/test split
(536, 1024) (536,)
(135, 1024) (135,)
 
Accuracy: 0.43 (+/- 0.01)

In [ ]:


In [ ]:

compute pairwise euclidean distances between sketches of same class vs. different class


In [ ]:
### this was done on 9/11/17 in order to debug the vgg embedding

In [300]:
print 'Why are there fewer sketch intermediate files than total sketch files?'
print str(len(os.listdir('./tiny/sketch/airplane'))) + ' total sketch files.'

all_sketches = os.listdir('./tiny/sketch/airplane')
all_sketches_with_intermediates = [s.split('/')[-2]+'-'+s.split('/')[-1]+'.png' for s in sketch_folders]
print str(len(all_sketches_with_intermediates)) + ' sketch files after extracting intermediates.'
missing_ones = [i for i in all_sketches if i not in all_sketches_with_intermediates]


Why are there fewer sketch intermediate files than total sketch files?
709 total sketch files.
671 sketch files after extracting intermediates.

In [ ]:


In [ ]:


In [226]:
## get just complete sketches from each sketch folder
sketch_folders = np.unique([os.path.dirname(s) for s in sketch_filenames])
complete_paths = []
SF_complete = []
photos_complete = []
for (j,s) in enumerate(sketch_folders):
    complete_sketch = str(max([int(i.split('.')[0]) for i \
                               in os.listdir(s)])) + '.png'
    complete_paths.append(os.path.join(os.path.dirname(s),complete_sketch))
    SF_complete.append(SF[j])
    photos_complete.append(os.path.dirname(s).split('/')[-1])
SF_complete = np.array(SF_complete) 
photos_complete = np.array(photos_complete)

In [227]:
from sklearn.metrics.pairwise import pairwise_distances
def rmse(x):
    return np.sqrt(np.sum(x**2))

In [228]:
euc = pairwise_distances(SF_complete,metric='euclidean')
print euc.shape


(671, 671)

In [245]:
p_ind = 4
fp = 20
fig = plt.figure(figsize=(9,9))
for (_i,p_ind) in enumerate(np.arange(fp,fp+9)):
    unique_photos = np.unique(photos_complete)
    inds = np.where(photos_complete==unique_photos[p_ind])[0]
    start = inds[0]
    stop = inds[-1]

    # get within-photo sketch distances
    within_block = euc[start:stop+1,start:stop+1]
    assert len(within_block[np.triu_indices(len(within_block),k=1)])==(len(within_block)**2-len(within_block))/2
    within_distances = within_block[np.triu_indices(len(within_block),k=1)]

    # get between-photo sketch distances
    all_inds = np.arange(len(photos_complete))
    non_matches = [i for i in all_inds if i not in inds]
    _non_matches_shuff = np.random.RandomState(seed=0).permutation(non_matches)
    non_matches_shuff = _non_matches_shuff[:len(inds)]
    btw_distances = euc[start:stop+1,non_matches_shuff].flatten()

    # plot    
    plt.subplot(3,3,_i+1)
    h = plt.hist(within_distances,bins=20,alpha=0.3)
    h = plt.hist(btw_distances,bins=20,alpha=0.3)
    plt.title(str(p_ind))
plt.show()



In [ ]:
## get image classification within airplane class
run_this = 1
FEAT = SF_complete
LABELS = photos_complete
if run_this:
    # split sketch feature data for linear classification
    X_train, X_test, y_train, y_test = train_test_split(
        FEAT, LABELS, test_size=0.2, random_state=0)

    # check dimensionality of split data
    print 'dimensionality of train/test split'
    print X_train.shape, y_train.shape
    print X_test.shape, y_test.shape
    print ' '
    cval = True
    if cval==False:
        # compute linear classification accuracy (takes a minute or so to run)
        clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
        clf.score(X_test, y_test)   
    else:
        # compute linear classification accuracy (takes several minutes to run)       
        clf = svm.SVC(kernel='linear', C=1)
        scores = cross_val_score(clf, SF, photos, cv=5)
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        ## Accuracy: 0.41 (+/- 0.08) achieved on 6/26/17

In [ ]:


In [279]:
from glob import glob
def list_files(path, ext='jpg'):
    result = [y for x in os.walk(path)
              for y in glob(os.path.join(x[0], '*.%s' % ext))]
    return result


airplane_dir = '/home/jefan/full_sketchy_dataset/sketches/airplane'

In [280]:
airplane_paths = list_files(airplane_dir,ext='png')

In [281]:



Out[281]:
709

draft of model for single minibatch


In [19]:
RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)

# reset entire graph
tf.reset_default_graph()
sess = tf.InteractiveSession()

# weight on offset loss
offset_weight = 100.

# learning rate
learning_rate = 0.01

# get minibatch
batch_size = 10
F_batch = F_train[:batch_size,:]
S_batch = S_train.head(n=batch_size)

# reserve numpy version 
F_batch_array = F_batch
S_batch_array = S_batch.as_matrix().astype('float32')

# convert to tensorflow tensor
F_batch = tf.cast(tf.stack(F_batch,name='F_batch'),tf.float32)
S_batch = tf.cast(tf.stack(S_batch.as_matrix().astype('float32'),name='S_batch'),tf.float32)

# Layer's sizes
x_size = F_batch.shape[1]   # Number of input nodes: 2048 features and 1 bias
h_size = 256                # Number of hidden nodes
y_size = S_batch.shape[1]   # Number of outcomes (x,y,pen)

# Symbols
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
output = tf.placeholder("float", shape=[None,y_size])

# Weight initializations
W1 = tf.get_variable('W1', [x_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b1 = tf.get_variable('b1', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W2 = tf.get_variable('W2', [h_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b2 = tf.get_variable('b2', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W3 = tf.get_variable('W3', [h_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b3 = tf.get_variable('b3', [y_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

# forward propagation
fc1 = tf.nn.relu(tf.nn.xw_plus_b(F_batch, W1, b1,name='fc1'))
fc2 = tf.nn.relu(tf.nn.xw_plus_b(fc1, W2, b2,name='fc2'))
output = tf.nn.xw_plus_b(fc1, W3, b3,name='output')

actual_offset = tf.slice(S_batch,[0,0],[batch_size,2])
actual_pen = tf.slice(S_batch,[0,2],[batch_size,-1]) 
pred_offset = tf.multiply(tf.slice(output,[0,0],[batch_size,2]),offset_weight)
pred_pen = tf.nn.softmax(tf.slice(output,[0,2],[batch_size,-1]))
# currently doesn't properly handle the pen state loss
offset_loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(pred_offset,actual_offset)),axis=1)))
pen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels = actual_pen,
            logits = pred_pen))

loss = tf.add(offset_loss,pen_loss)

# run backprop
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

# # get predicted stroke vector
strokes = tf.concat([pred_offset, pred_pen],axis=1)

tf.global_variables_initializer().run()


# sess.close()

In [20]:
updates = sess.run([fc1,output,pred_offset,
                    pred_pen,actual_offset, actual_pen,
                    offset_loss,pen_loss,loss,
                    train_op,strokes], feed_dict={X:F_batch_array,y:S_batch_array})

In [ ]:


In [140]:
fc1 = updates[0]
output = updates[1]
pred_offset = updates[2]
pred_pen = updates[3]
actual_offset = updates[4]
actual_pen = updates[5]
offset_loss = updates[6]
pen_loss = updates[7]
loss = updates[8]
train_op = updates[9]
strokes = updates[10]

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [149]:



Out[149]:
TensorShape([Dimension(None), Dimension(2048)])

run multiple batches of MLP version


In [108]:
RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)

# reset entire graph
tf.reset_default_graph()

# weight on offset loss
offset_weight = 0.1

# amount to multiply predicted offset values by
offset_multiplier = 100.

# learning rate
learning_rate = 0.001

# set batch size
batch_size = 10

# epoch counter
epoch_num = 0

# feed in only current features, or also features on the next time step as well?
now_plus_next = True

# initialize variables
if now_plus_next:
    F = tf.placeholder("float", shape=[None, 4096])  # features (input)
else:
    F = tf.placeholder("float", shape=[None, 2048])  # features (input)
S = tf.placeholder("float", shape=[None, 3]) # strokes (output)

# Layer's sizes
x_size = F.shape[1]   # Number of input nodes: 2048 features and 1 bias
h_size = 256        # Number of hidden nodes
y_size = S.shape[1]   # Number of outcomes (x,y,pen)

output = tf.placeholder("float", shape=[None,y_size])

# convert to tensorflow tensor
F = tf.cast(tf.stack(F,name='F'),tf.float32)
S = tf.cast(tf.stack(S,name='S'),tf.float32)

# Weight initializations
W1 = tf.get_variable('W1', [x_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b1 = tf.get_variable('b1', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W2 = tf.get_variable('W2', [h_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b2 = tf.get_variable('b2', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W3 = tf.get_variable('W3', [h_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b3 = tf.get_variable('b3', [y_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

# forward propagation
fc1 = tf.nn.relu(tf.nn.xw_plus_b(F, W1, b1,name='fc1'))
fc2 = tf.nn.relu(tf.nn.xw_plus_b(fc1, W2, b2,name='fc2'))
output = tf.nn.xw_plus_b(fc1, W3, b3,name='output')

actual_offset = tf.slice(So,[0,0],[batch_size,2])
actual_pen = tf.squeeze(tf.slice(So,[0,2],[batch_size,-1]))
pred_offset = tf.multiply(tf.slice(output,[0,0],[batch_size,2]),offset_multiplier)
pred_pen = tf.squeeze(tf.slice(output,[0,2],[batch_size,-1]))

offset_loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(pred_offset,actual_offset)),axis=1)))
pen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels = actual_pen,
            logits = pred_pen))

loss = tf.add(tf.multiply(offset_weight,offset_loss),pen_loss)

# run backprop
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

# saver
save = False
saver = tf.train.Saver()

# get predicted stroke vector
strokes = tf.concat([pred_offset, tf.expand_dims(pred_pen,1)],axis=1)

# run batches
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for m,idx in get_minibatches([F_train,S_train.as_matrix().astype('float32')],batch_size,shuffle=True):        
        
        if m[0].shape[0]==batch_size:
            F_batch = m[0]
            S_batch = m[1] 
            
            # use idx to retrieve the features of the subsequent row in the feature matrix, so you
            # effectively feed in sketch_so_far and sketch_so_far_plus_next_xy, as well as pen (absolute?) location and state
            if (max(idx)<45040):
                F_batch_next = F_train[idx+1].shape 
                F_now_plus_next = np.hstack((F_train[idx],F_train[idx+1]))
            
            if (now_plus_next) & (max(idx)<45040):
                updates = sess.run([offset_loss, pen_loss, loss, pred_offset], feed_dict={F:F_now_plus_next,S:S_batch})
            else:
                try:
                    updates = sess.run([offset_loss, pen_loss, loss, pred_offset], feed_dict={F:F_batch,S:S_batch}) 
                except:
                    pass

            offset_loss_ = updates[0]
            pen_loss_ = updates[1]
            loss_ = updates[2]
            pred_offset_ = updates[3]

            if epoch_num%200==0:
                print "Epoch: " + str(epoch_num) + " | Loss: " + str(loss_) + \
                " | Offset loss: " + str(offset_loss_) + " | Pen loss: " + str(pen_loss_)

            # save
            if save:
                saver.save(sess, 'checkpoints/pix2svg_train_0')
                
            # increment epoch number
            epoch_num += 1

In [ ]:
## meeting notes
# june 26: validate triplet network to make sure it does the task -- QA
# does it take in pen location? put in pen location, pen state 
# put in sketch so far + (sketch so far +1)
# delta x, delta y -- make sure the thing it spits out, after getting squashed by tanh, or whatever, is well centered

simpler version that goes from last pen offset to next pen offset


In [110]:
## now try simpler version that just tries to predict the next offset based on previous offset 

RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)

# reset entire graph
tf.reset_default_graph()

# weight on offset loss
offset_weight = 0.8

# amount to multiply predicted offset values by
offset_multiplier = 100.

# learning rate
learning_rate = 0.001

# set batch size
batch_size = 10

# epoch counter
epoch_num = 0

# initialize variables
Si = tf.placeholder("float", shape=[None, 3]) # strokes (input)
So = tf.placeholder("float", shape=[None, 3]) # strokes (output)

# Layer's sizes
x_size = Si.shape[1]   # Number of input nodes: x, y, state
h_size = 256        # Number of hidden nodes
y_size = So.shape[1]   # Number of outcomes (x,y,pen)

output = tf.placeholder("float", shape=[None,y_size])

# convert to tensorflow tensor
Si = tf.cast(tf.stack(Si,name='Si'),tf.float32)
So = tf.cast(tf.stack(So,name='So'),tf.float32)

# Weight initializations
W1 = tf.get_variable('W1', [x_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b1 = tf.get_variable('b1', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W2 = tf.get_variable('W2', [h_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b2 = tf.get_variable('b2', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W3 = tf.get_variable('W3', [h_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b3 = tf.get_variable('b3', [y_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

# forward propagation
fc1 = tf.nn.relu(tf.nn.xw_plus_b(Si, W1, b1,name='fc1'))
fc2 = tf.nn.relu(tf.nn.xw_plus_b(fc1, W2, b2,name='fc2'))
output = tf.nn.xw_plus_b(fc1, W3, b3,name='output')

actual_offset = tf.slice(So,[0,0],[batch_size,2])
actual_pen = tf.squeeze(tf.slice(So,[0,2],[batch_size,-1]))
pred_offset = tf.multiply(tf.slice(output,[0,0],[batch_size,2]),offset_multiplier)
pred_pen = tf.squeeze(tf.slice(output,[0,2],[batch_size,-1]))

offset_loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(pred_offset,actual_offset)),axis=1)))
pen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels = actual_pen,
            logits = pred_pen))

loss = tf.add(tf.multiply(offset_weight,offset_loss),tf.multiply(1-offset_weight,pen_loss))

# run backprop
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

# saver
save = False
saver = tf.train.Saver()

# get predicted stroke vector
strokes = tf.concat([pred_offset, tf.expand_dims(tf.round(tf.sigmoid(pred_pen)+1),1)],axis=1)
Strokes = []

# run batches
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for m,idx in get_minibatches(S_train.as_matrix().astype('float32')[:len(S_train)-1],batch_size,shuffle=True):        

            Si_batch = m # batch of current strokes 
            
            if (max(idx)<45040):
                So_batch = S_train.iloc[idx+1].as_matrix().astype('float32')
                
            updates = sess.run([offset_loss, pen_loss, loss, pred_offset, actual_pen, pred_pen, strokes], feed_dict={Si:Si_batch,So:So_batch}) 

            offset_loss_ = updates[0]
            pen_loss_ = updates[1]
            loss_ = updates[2]
            pred_offset_ = updates[3]
            actual_pen_ = updates[4]
            pred_pen_ = updates[5]
            strokes_ = updates[6]
            
            if epoch_num%200==0:
                print "Epoch: " + str(epoch_num) + " | Loss: " + str(loss_) + \
                " | Offset loss: " + str(offset_loss_) + " | Pen loss: " + str(pen_loss_) 

            # save
            if save:
                saver.save(sess, 'checkpoints/pix2svg_train_svg2svg_0')
                
            # increment epoch number
            epoch_num += 1


Epoch: 0 | Loss: 8250.34 | Offset loss: 10273.1 | Pen loss: 159.433
Epoch: 200 | Loss: 5645.42 | Offset loss: 7020.43 | Pen loss: 145.378
Epoch: 400 | Loss: 8678.95 | Offset loss: 10783.6 | Pen loss: 260.344
Epoch: 600 | Loss: 4909.4 | Offset loss: 6119.17 | Pen loss: 70.3102
Epoch: 800 | Loss: 7867.83 | Offset loss: 9817.23 | Pen loss: 70.2685
Epoch: 1000 | Loss: 2980.32 | Offset loss: 3712.18 | Pen loss: 52.8845
Epoch: 1200 | Loss: 6821.04 | Offset loss: 8498.8 | Pen loss: 110.025
Epoch: 1400 | Loss: 8527.17 | Offset loss: 10632.4 | Pen loss: 106.13
Epoch: 1600 | Loss: 6072.67 | Offset loss: 7574.29 | Pen loss: 66.2271
Epoch: 1800 | Loss: 6337.97 | Offset loss: 7834.5 | Pen loss: 351.836
Epoch: 2000 | Loss: 10150.8 | Offset loss: 12651.0 | Pen loss: 149.963
Epoch: 2200 | Loss: 3221.87 | Offset loss: 4009.85 | Pen loss: 69.9651
Epoch: 2400 | Loss: 7817.52 | Offset loss: 9739.13 | Pen loss: 131.052
Epoch: 2600 | Loss: 2849.59 | Offset loss: 3548.3 | Pen loss: 54.7421
Epoch: 2800 | Loss: 7948.94 | Offset loss: 9887.3 | Pen loss: 195.522
Epoch: 3000 | Loss: 12629.5 | Offset loss: 15757.5 | Pen loss: 117.646
Epoch: 3200 | Loss: 3830.88 | Offset loss: 4769.15 | Pen loss: 77.7746
Epoch: 3400 | Loss: 4502.43 | Offset loss: 5598.59 | Pen loss: 117.814
Epoch: 3600 | Loss: 13247.9 | Offset loss: 16517.6 | Pen loss: 168.919
Epoch: 3800 | Loss: 6957.13 | Offset loss: 8674.43 | Pen loss: 87.9368
Epoch: 4000 | Loss: 5322.95 | Offset loss: 6609.43 | Pen loss: 177.03
Epoch: 4200 | Loss: 3501.16 | Offset loss: 4359.94 | Pen loss: 66.0247
Epoch: 4400 | Loss: 3366.67 | Offset loss: 4193.48 | Pen loss: 59.4112

In [ ]:


In [111]:
plt.scatter(strokes_[:,0],strokes_[:,1])
plt.show()



In [112]:
### demo of the difference between the absolute pen position and the relative pen position
plt.figure()
inds = list(S[(S.photoID=='n02691156_10151') & (S.sketchID==0)].index)
verts = zip(S1.loc[inds].x.values,S1.loc[inds].y.values)
codes = S1.loc[inds].pen.values
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=2)
ax = plt.subplot(121)
ax.add_patch(patch)
ax.set_xlim(0,600)
ax.set_ylim(0,600) 
ax.axis('off')
plt.gca().invert_yaxis() # y values increase as you go down in image
plt.show()

inds = list(S[(S.photoID=='n02691156_10151') & (S.sketchID==0)].index)
verts = zip(S2.loc[inds].x.values,S2.loc[inds].y.values)
codes = S2.loc[inds].pen.values
path = Path(verts, codes)
patch = patches.PathPatch(path, facecolor='none', lw=2)
ax = plt.subplot(122)
ax.add_patch(patch)
ax.set_xlim(-200,200)
ax.set_ylim(-200,200) 
ax.axis('off')
plt.gca().invert_yaxis() # y values increase as you go down in image
plt.show()


predict next offset on basis of previous 4 offsets


In [ ]:


In [113]:
RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)

# reset entire graph
tf.reset_default_graph()

# weight on offset loss
offset_weight = 1.

# amount to multiply predicted offset values by
offset_multiplier = 100.

# learning rate
learning_rate = 0.001

# set batch size
batch_size = 10

# epoch counter
epoch_num = 0

# initialize variables
Si4 = tf.placeholder("float", shape=[None, 3]) # strokes (input) -- 4th to last
Si3 = tf.placeholder("float", shape=[None, 3]) # strokes (input) -- 3rd to last
Si2 = tf.placeholder("float", shape=[None, 3]) # strokes (input) -- 2nd to last
Si1 = tf.placeholder("float", shape=[None, 3]) # strokes (input) -- previous one
So = tf.placeholder("float", shape=[None, 3]) # strokes (output)

# Layer's sizes
x_size = Si1.shape[1]*4   # Number of input nodes: x, y, state
h_size = 512        # Number of hidden nodes
y_size = So.shape[1]   # Number of outcomes (x,y,pen)

output = tf.placeholder("float", shape=[None,y_size])

# convert to tensorflow tensor
Si4 = tf.cast(tf.stack(Si4,name='Si4'),tf.float32)
Si3 = tf.cast(tf.stack(Si3,name='Si3'),tf.float32)
Si2 = tf.cast(tf.stack(Si2,name='Si2'),tf.float32)
Si1 = tf.cast(tf.stack(Si1,name='Si1'),tf.float32)
Si = tf.concat([Si4,Si3,Si2,Si1],axis=1)
So = tf.cast(tf.stack(So,name='So'),tf.float32)

# Weight initializations
W1 = tf.get_variable('W1', [x_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b1 = tf.get_variable('b1', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W2 = tf.get_variable('W2', [h_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b2 = tf.get_variable('b2', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W3 = tf.get_variable('W3', [h_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b3 = tf.get_variable('b3', [y_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

# forward propagation
fc1 = tf.nn.relu(tf.nn.xw_plus_b(Si, W1, b1,name='fc1'))
fc2 = tf.nn.relu(tf.nn.xw_plus_b(fc1, W2, b2,name='fc2'))
output = tf.nn.xw_plus_b(fc1, W3, b3,name='output')

actual_offset = tf.slice(So,[0,0],[batch_size,2])
actual_pen = tf.squeeze(tf.slice(So,[0,2],[batch_size,-1]))
pred_offset = tf.multiply(tf.slice(output,[0,0],[batch_size,2]),offset_multiplier)
pred_pen = tf.squeeze(tf.slice(output,[0,2],[batch_size,-1]))

offset_loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(pred_offset,actual_offset)),axis=1)))
pen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels = actual_pen,
            logits = pred_pen))

loss = tf.add(tf.multiply(offset_weight,offset_loss),tf.multiply(1-offset_weight,pen_loss))

# run backprop
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

# saver
save = False
saver = tf.train.Saver()

# get predicted stroke vector
strokes = tf.concat([pred_offset, tf.expand_dims(tf.round(tf.sigmoid(pred_pen)+1),1)],axis=1)
Strokes = []

# run batches
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for m,idx in get_minibatches(S_train.as_matrix().astype('float32')[:len(S_train)-1],batch_size,shuffle=True):        

            Si1_batch = m # batch of current strokes 
            Si2_batch = S_train.iloc[idx-1].as_matrix().astype('float32')
            Si3_batch = S_train.iloc[idx-2].as_matrix().astype('float32')
            Si4_batch = S_train.iloc[idx-3].as_matrix().astype('float32')
            
            if (max(idx)<45040):
                So_batch = S_train.iloc[idx+1].as_matrix().astype('float32')
                
            updates = sess.run([offset_loss, pen_loss, loss, 
                                pred_offset, actual_pen, pred_pen, strokes], 
                               feed_dict={Si1:Si1_batch,Si2:Si2_batch,Si3:Si3_batch,Si4:Si4_batch,
                                          So:So_batch}) 

            offset_loss_ = updates[0]
            pen_loss_ = updates[1]
            loss_ = updates[2]
            pred_offset_ = updates[3]
            actual_pen_ = updates[4]
            pred_pen_ = updates[5]
            strokes_ = updates[6]
            
            if epoch_num%200==0:
                print "Epoch: " + str(epoch_num) + " | Loss: " + str(loss_) + \
                " | Offset loss: " + str(offset_loss_) + " | Pen loss: " + str(pen_loss_) 

            # save
            if save:
                saver.save(sess, 'checkpoints/pix2svg_train_svg2svg_0')
                
            # increment epoch number
            epoch_num += 1

now trying to predict mixture of gaussians rather than naive nonlinear function... because pen offsets can't be modeled by any function

reverting to trying to predict next pen offset based on single most recent pen offset


In [ ]:


In [197]:
## now trying to predict mixture of gaussians rather than naive nonlinear function... because pen offsets can't be modeled by any function

import mdn as mdn ## import mixture density network helpers
reload(mdn)

RANDOM_SEED = 42
tf.set_random_seed(RANDOM_SEED)

# reset entire graph
tf.reset_default_graph()

# weight on offset loss
offset_weight = 0.8

# amount to multiply predicted offset values by
offset_multiplier = 100.

# learning rate
learning_rate = 0.001

# set batch size
batch_size = 10

# epoch counter
epoch_num = 0

# initialize variables
Si = tf.placeholder("float", shape=[None, 3]) # strokes (input)
So = tf.placeholder("float", shape=[None, 3]) # strokes (output) 
r_cost = tf.placeholder("float", shape=[None])
x1_data = tf.placeholder("float",shape=[None])
x2_data = tf.placeholder("float",shape=[None])
pen_data = tf.placeholder("float",shape=[None])
offset_loss = tf.placeholder("float",shape=[None])
state_loss = tf.placeholder("float",shape=[None])
recon_loss = tf.placeholder("float",shape=[None])

# Layer's sizes
x_size = Si.shape[1]   # Number of input nodes: x, y, state
h_size = 384        # Number of hidden nodes 6*64
# y_size = So.shape[1]   # Number of outcomes (x,y,pen)
y_size = 8 ## split this into MDN parameters: first two elements are pen state logits (1 or 2), next 384/6 are for estimating the other parameters

output = tf.placeholder("float", shape=[None,y_size])

# # convert to tensorflow tensor
Si = tf.cast(tf.stack(Si,name='Si'),tf.float32)
So = tf.cast(tf.stack(So,name='So'),tf.float32)

# Weight initializations
W1 = tf.get_variable('W1', [x_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b1 = tf.get_variable('b1', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W2 = tf.get_variable('W2', [h_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b2 = tf.get_variable('b2', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

W3 = tf.get_variable('W3', [h_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
b3 = tf.get_variable('b3', [y_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

# forward propagation
fc1 = tf.nn.relu(tf.nn.xw_plus_b(Si, W1, b1,name='fc1'))
fc2 = tf.nn.relu(tf.nn.xw_plus_b(fc1, W2, b2,name='fc2'))
output = tf.nn.xw_plus_b(fc2, W3, b3,name='output')

# get mixture distribution parameters
out = mdn.get_mixture_coef(output)
[o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_pen, o_pen_logits] = out ## each of these are the size of the batch

# get target for prediction
target = So # shape: (batch_size, 3) 

[x1_data, x2_data, pen_data] = tf.split(target, 3, 1)
x1_data = tf.squeeze(x1_data) # shape (batch_size,)       
x2_data = tf.squeeze(x2_data) # shape (batch_size,)       
pen_data = tf.squeeze(pen_data) # shape (batch_size,) 
pen_data = tf.subtract(pen_data,1) # classes need to be in the range [0, num_classes-1]

# compute reconstruction loss
offset_loss, state_loss = mdn.get_lossfunc(o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr,
                        o_pen_logits, x1_data, x2_data, pen_data)
offset_loss = tf.squeeze(offset_loss)
recon_loss = tf.add(offset_loss,state_loss)
loss = tf.reduce_sum(recon_loss,axis=0)

# # run backprop
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for m,idx in get_minibatches(S_train.as_matrix().astype('float32')[:len(S_train)-1],batch_size,shuffle=True):        

        Si_batch = m # batch of current strokes 
        if (max(idx)<45040):
            So_batch = S_train.iloc[idx+1].as_matrix().astype('float32')  
       
        results = sess.run([o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_pen, o_pen_logits, offset_loss, state_loss, recon_loss, loss], feed_dict={Si:Si_batch,So:So_batch}) 
        
        _o_pi = results[0]
        _o_mu1 = results[1]
        _o_mu2 = results[2]
        _o_sigma1 = results[3]
        _o_sigma2 = results[4]
        _o_corr = results[5]
        _o_pen = results[6]
        _o_pen_logits = results[7]   
        _offset_loss = results[8]
        _state_loss = results[9]
        _recon_loss = results[10]
        _loss = results[11]
        
        if epoch_num%100==0:
            print('Epoch Num: ', epoch_num, 'Reconstruction Loss:', _loss)
   
        epoch_num += 1


('Epoch Num: ', 0, 'Reconstruction Loss:', 142.40862)
('Epoch Num: ', 100, 'Reconstruction Loss:', 146.73276)
('Epoch Num: ', 200, 'Reconstruction Loss:', 143.53572)
('Epoch Num: ', 300, 'Reconstruction Loss:', 146.30927)
('Epoch Num: ', 400, 'Reconstruction Loss:', 143.62589)
('Epoch Num: ', 500, 'Reconstruction Loss:', 142.88931)
('Epoch Num: ', 600, 'Reconstruction Loss:', 143.24689)
('Epoch Num: ', 700, 'Reconstruction Loss:', 144.42648)
('Epoch Num: ', 800, 'Reconstruction Loss:', 144.39893)
('Epoch Num: ', 900, 'Reconstruction Loss:', 91.308319)
('Epoch Num: ', 1000, 'Reconstruction Loss:', nan)
('Epoch Num: ', 1100, 'Reconstruction Loss:', 143.09306)
('Epoch Num: ', 1200, 'Reconstruction Loss:', nan)
('Epoch Num: ', 1300, 'Reconstruction Loss:', 145.02594)
('Epoch Num: ', 1400, 'Reconstruction Loss:', 147.22566)
('Epoch Num: ', 1500, 'Reconstruction Loss:', 147.19031)
('Epoch Num: ', 1600, 'Reconstruction Loss:', 126.70847)
('Epoch Num: ', 1700, 'Reconstruction Loss:', nan)
('Epoch Num: ', 1800, 'Reconstruction Loss:', 126.05428)
('Epoch Num: ', 1900, 'Reconstruction Loss:', 136.02991)
('Epoch Num: ', 2000, 'Reconstruction Loss:', 144.82986)
('Epoch Num: ', 2100, 'Reconstruction Loss:', nan)
('Epoch Num: ', 2200, 'Reconstruction Loss:', 137.37674)
('Epoch Num: ', 2300, 'Reconstruction Loss:', 134.56625)
('Epoch Num: ', 2400, 'Reconstruction Loss:', nan)
('Epoch Num: ', 2500, 'Reconstruction Loss:', 146.08743)
('Epoch Num: ', 2600, 'Reconstruction Loss:', nan)
('Epoch Num: ', 2700, 'Reconstruction Loss:', 141.49911)
('Epoch Num: ', 2800, 'Reconstruction Loss:', 154.62381)
('Epoch Num: ', 2900, 'Reconstruction Loss:', 122.30074)
('Epoch Num: ', 3000, 'Reconstruction Loss:', nan)
('Epoch Num: ', 3100, 'Reconstruction Loss:', 146.26424)
('Epoch Num: ', 3200, 'Reconstruction Loss:', nan)
('Epoch Num: ', 3300, 'Reconstruction Loss:', nan)
('Epoch Num: ', 3400, 'Reconstruction Loss:', 144.92764)
('Epoch Num: ', 3500, 'Reconstruction Loss:', 139.46045)
('Epoch Num: ', 3600, 'Reconstruction Loss:', 142.2424)
('Epoch Num: ', 3700, 'Reconstruction Loss:', 146.02829)
('Epoch Num: ', 3800, 'Reconstruction Loss:', 136.65732)
('Epoch Num: ', 3900, 'Reconstruction Loss:', nan)
('Epoch Num: ', 4000, 'Reconstruction Loss:', 143.90469)
('Epoch Num: ', 4100, 'Reconstruction Loss:', 144.3707)
('Epoch Num: ', 4200, 'Reconstruction Loss:', 144.22266)
('Epoch Num: ', 4300, 'Reconstruction Loss:', 116.4599)
('Epoch Num: ', 4400, 'Reconstruction Loss:', 144.01218)
('Epoch Num: ', 4500, 'Reconstruction Loss:', nan)

In [ ]:


In [ ]:


In [ ]:


In [195]:
a = tf.constant([2.,2.,1.,2.,2.,1.,1.,2.,1.,2.])
b = tf.constant([1.,1.,1.,1.,1.,1.,1.,1.,1.,1.])
c = tf.reshape(a,[-1,10]) 
### reshape with a -1 nests a tensor, so one that is originally of shape (10,) becomes (1,10)
d = tf.split(c,10,1)
e = tf.constant([-0.4])

result =  tf.nn.softmax_cross_entropy_with_logits(labels=a,logits=b)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
print result.eval()

print a.eval(), a.get_shape()
print c.eval(), c.get_shape()
print tf.nn.softmax(c).eval()
print tf.nn.softmax(e).eval()

sess.close()


WARNING:tensorflow:From <ipython-input-195-12c34c9f189d>:10: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
36.8414
[ 2.  2.  1.  2.  2.  1.  1.  2.  1.  2.] (10,)
[[ 2.  2.  1.  2.  2.  1.  1.  2.  1.  2.]] (1, 10)
[[ 0.13384162  0.13384162  0.04923758  0.13384162  0.13384162  0.04923758
   0.04923758  0.13384162  0.04923758  0.13384162]]
[ 1.]

In [ ]:


In [ ]:


In [ ]:


In [193]:
# NSAMPLE = 1000
x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
r_data = np.float32(np.random.normal(size=(NSAMPLE,1)))
y_data = np.float32(np.sin(0.75*x_data)*7.0+x_data*0.5+r_data*1.0)

# plt.figure(figsize=(8, 8))
# plot_out = plt.plot(x_data,y_data,'ro',alpha=0.3)
# plt.show()

# temp_data = x_data
# x_data = y_data
# y_data = temp_data

# plt.figure(figsize=(8, 8))
# plot_out = plt.plot(x_data,y_data,'ro',alpha=0.3)
# plt.show()

In [259]:
import mdn as mdn

In [ ]:
mdn

In [203]:


In [263]:
x1 = tf.random_normal([1], mean=0, stddev=0.1)
x2 = tf.random_normal([1], mean=1, stddev=0.1)
mu1 = tf.constant(0., dtype=tf.float32)
mu2 = tf.constant(1., dtype=tf.float32)
s1 = tf.constant(1., dtype=tf.float32)
s2 = tf.constant(1., dtype=tf.float32)
rho = tf.constant(0., dtype=tf.float32)
result = mdn.tf_2d_normal(x1, x2, mu1, mu2, s1, s2, rho)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print x1.eval()
print x2.eval()
print result.eval()
sess.close()


[ 0.02453992]
[ 0.95884156]
[ 0.1577232]

In [ ]:


In [222]:
## self contained example of sinusoidal function fitting
NSAMPLE = 1000
x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
r_data = np.float32(np.random.normal(size=(NSAMPLE,1)))
y_data = np.float32(np.sin(0.75*x_data)*7.0+x_data*0.5+r_data*1.0)

x = tf.placeholder(dtype=tf.float32, shape=[None,1])
y = tf.placeholder(dtype=tf.float32, shape=[None,1])

NHIDDEN = 20
W = tf.Variable(tf.random_normal([1,NHIDDEN], stddev=1.0, dtype=tf.float32))
b = tf.Variable(tf.random_normal([1,NHIDDEN], stddev=1.0, dtype=tf.float32))

W_out = tf.Variable(tf.random_normal([NHIDDEN,1], stddev=1.0, dtype=tf.float32))
b_out = tf.Variable(tf.random_normal([1,1], stddev=1.0, dtype=tf.float32))

hidden_layer = tf.nn.tanh(tf.matmul(x, W) + b)
y_out = tf.matmul(hidden_layer,W_out) + b_out
lossfunc = tf.nn.l2_loss(y_out-y)
train_op = tf.train.RMSPropOptimizer(learning_rate=0.1, decay=0.8).minimize(lossfunc)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
NEPOCH = 1000
for i in range(NEPOCH):
    sess.run(train_op,feed_dict={x: x_data, y: y_data})
    
x_test = np.float32(np.arange(-10.5,10.5,0.1))
x_test = x_test.reshape(x_test.size,1)
y_test = sess.run(y_out,feed_dict={x: x_test})

plt.figure(figsize=(8, 8))
plt.plot(x_data,y_data,'ro', x_test,y_test,'bo',alpha=0.3)
plt.show()

sess.close()



In [234]:
NHIDDEN = 24
STDEV = 0.5
KMIX = 24 # number of mixtures
NOUT = KMIX * 3 # pi, mu, stdev

x = tf.placeholder(dtype=tf.float32, shape=[None,1], name="x")
y = tf.placeholder(dtype=tf.float32, shape=[None,1], name="y")

Wh = tf.Variable(tf.random_normal([1,NHIDDEN], stddev=STDEV, dtype=tf.float32))
bh = tf.Variable(tf.random_normal([1,NHIDDEN], stddev=STDEV, dtype=tf.float32))

Wo = tf.Variable(tf.random_normal([NHIDDEN,NOUT], stddev=STDEV, dtype=tf.float32))
bo = tf.Variable(tf.random_normal([1,NOUT], stddev=STDEV, dtype=tf.float32))

hidden_layer = tf.nn.tanh(tf.matmul(x, Wh) + bh)
output = tf.matmul(hidden_layer,Wo) + bo

def get_mixture_coef(output):
    out_pi = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam")
    out_sigma = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam")
    out_mu = tf.placeholder(dtype=tf.float32, shape=[None,KMIX], name="mixparam")

    out_pi, out_sigma, out_mu = tf.split(output,3,1)

    max_pi = tf.reduce_max(out_pi, 1, keep_dims=True)
    out_pi = tf.subtract(out_pi, max_pi)

    out_pi = tf.exp(out_pi)

    normalize_pi = tf.reciprocal(tf.reduce_sum(out_pi, 1, keep_dims=True))
    out_pi = tf.multiply(normalize_pi, out_pi)

    out_sigma = tf.exp(out_sigma)

    return out_pi, out_sigma, out_mu

out_pi, out_sigma, out_mu = get_mixture_coef(output)


Tensor("Mul_35:0", shape=(?, 24), dtype=float32) Tensor("Exp_11:0", shape=(?, 24), dtype=float32) Tensor("split_9:2", shape=(?, 24), dtype=float32)

In [ ]:


In [240]:
NSAMPLE = 2500

y_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
r_data = np.float32(np.random.normal(size=(NSAMPLE,1))) # random noise
x_data = np.float32(np.sin(0.75*y_data)*7.0+y_data*0.5+r_data*1.0)

oneDivSqrtTwoPI = 1 / math.sqrt(2*math.pi) # normalisation factor for gaussian, not needed.
def tf_normal(y, mu, sigma):
    result = tf.subtract(y, mu)
    result = tf.multiply(result,tf.reciprocal(sigma))
    result = -tf.square(result)/2
    return tf.multiply(tf.exp(result),tf.reciprocal(sigma))*oneDivSqrtTwoPI

def get_lossfunc(out_pi, out_sigma, out_mu, y):
    result = tf_normal(y, out_mu, out_sigma)
    result = tf.multiply(result, out_pi)
    result = tf.reduce_sum(result, 1, keep_dims=True)
    result = -tf.log(result)
    return tf.reduce_mean(result)

lossfunc = get_lossfunc(out_pi, out_sigma, out_mu, y)
train_op = tf.train.AdamOptimizer().minimize(lossfunc)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

NEPOCH = 10000
loss = np.zeros(NEPOCH) # store the training progress here.
for i in range(NEPOCH):
    sess.run(train_op,feed_dict={x: x_data, y: y_data})
    loss[i] = sess.run(lossfunc, feed_dict={x: x_data, y: y_data})

In [241]:
plt.figure(figsize=(8, 8))
plt.plot(np.arange(100, NEPOCH,1), loss[100:], 'r-')
plt.show()



In [265]:
x_test = np.float32(np.arange(-15,15,0.1))
NTEST = x_test.size
x_test = x_test.reshape(NTEST,1) # needs to be a matrix, not a vector

def get_pi_idx(x, pdf):
    N = pdf.size
    accumulate = 0
    for i in range(0, N):
        accumulate += pdf[i]
        if (accumulate > x):
            return i
    print 'error with sampling ensemble'
    return -1

def generate_ensemble(out_pi, out_mu, out_sigma, M = 10):
    NTEST = x_test.size
    result = np.random.rand(NTEST, M) # initially random [0, 1]
    rn = np.random.randn(NTEST, M) # normal random matrix (0.0, 1.0)
    mu = 0
    std = 0
    idx = 0

    # transforms result into random ensembles
    for j in range(0, M): # mixtures
        for i in range(0, NTEST): # datapoints
            idx = get_pi_idx(result[i, j], out_pi[i])
            mu = out_mu[i, idx]
            std = out_sigma[i, idx]
            result[i, j] = mu + rn[i, j]*std
    return result

In [ ]:


In [249]:
out_pi_test, out_sigma_test, out_mu_test = sess.run(get_mixture_coef(output), feed_dict={x: x_test})

y_test = generate_ensemble(out_pi_test, out_mu_test, out_sigma_test)

plt.figure(figsize=(8, 8))
plt.plot(x_data,y_data,'ro', x_test,y_test,'bo',alpha=0.3)
plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:
#####========================================================================

# actual_offset = tf.slice(So,[0,0],[batch_size,2])
# actual_pen = tf.squeeze(tf.slice(So,[0,2],[batch_size,-1]))

# pred_offset = tf.multiply(tf.slice(output,[0,0],[batch_size,2]),offset_multiplier)
# pred_pen = tf.squeeze(tf.slice(output,[0,2],[batch_size,-1]))

# offset_loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(pred_offset,actual_offset)),axis=1)))
# pen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
#             labels = actual_pen,
#             logits = pred_pen))

# loss = tf.add(tf.multiply(offset_weight,offset_loss),tf.multiply(1-offset_weight,pen_loss))

# # run backprop
# optimizer = tf.train.AdamOptimizer(learning_rate)
# train_op = optimizer.minimize(loss)

# # saver
# save = False
# saver = tf.train.Saver()

# # get predicted stroke vector
# strokes = tf.concat([pred_offset, tf.expand_dims(tf.round(tf.sigmoid(pred_pen_)+1),1)],axis=1)
# Strokes = []

# # run batches
# with tf.Session() as sess:
#     tf.global_variables_initializer().run()
#     for m,idx in get_minibatches(S_train.as_matrix().astype('float32')[:len(S_train)-1],batch_size,shuffle=True):        
#             Si_batch = m # batch of current strokes             
#             if (max(idx)<45040):
#                 So_batch = S_train.iloc[idx+1].as_matrix().astype('float32')                
#             updates = sess.run([offset_loss, pen_loss, loss, pred_offset, actual_pen, pred_pen, strokes], feed_dict={Si:Si_batch,So:So_batch}) 
#             offset_loss_ = updates[0]
#             pen_loss_ = updates[1]
#             loss_ = updates[2]
#             pred_offset_ = updates[3]
#             actual_pen_ = updates[4]
#             pred_pen_ = updates[5]
#             strokes_ = updates[6]            
#             if epoch_num%200==0:
#                 print "Epoch: " + str(epoch_num) + " | Loss: " + str(loss_) + \
#                 " | Offset loss: " + str(offset_loss_) + " | Pen loss: " + str(pen_loss_) 
#             # save
#             if save:
#                 saver.save(sess, 'checkpoints/pix2svg_train_svg2svg_0')                
#             # increment epoch number
#             epoch_num += 1

run rnn version


In [150]:
# RANDOM_SEED = 42
# tf.set_random_seed(RANDOM_SEED)

# # reset entire graph
# tf.reset_default_graph()

# # weight on offset loss
# offset_weight = 1000.

# # learning rate
# learning_rate = 0.01

# # set batch size
# batch_size = 10

# # epoch counter
# epoch_num = 0

# # max strokes
# max_strokes = 200

# # initialize variables
# F = tf.placeholder("float", shape=[None, 2048])  # features (input)
# S = tf.placeholder("float", shape=[None, 3]) # strokes (output)

# # layer sizes
# x_size = F.shape[1]   # Number of input nodes: 2048 features and 1 bias
# h_size = 512        # Number of hidden nodes
# y_size = S.shape[1]   # Number of outcomes (x,y,pen)

# # rnn hyperparameters
# rnn_hidden_size = 512 # number of rnn hidden units

# output = tf.placeholder("float", shape=[None,y_size])

# # convert to tensorflow tensor
# F = tf.cast(tf.stack(F,name='F'),tf.float32)
# S = tf.cast(tf.stack(S,name='S'),tf.float32)

# # Weight initializations
# W1 = tf.get_variable('W1', [x_size, h_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
# b1 = tf.get_variable('b1', [h_size],initializer=tf.zeros_initializer(), dtype=tf.float32)
# W2 = tf.get_variable('W2', [h_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)
# b2 = tf.get_variable('b2', [y_size],initializer=tf.zeros_initializer(), dtype=tf.float32)

# # forward propagation
# # Run RNN and run linear layer to fit to correct size.

# rnn_input = tf.nn.xw_plus_b(F, W1, b1,name='rnn_input')
# cell = tf.contrib.rnn.BasicLSTMCell(rnn_hidden_size)
# starting_state = cell.zero_state(batch_size=batch_size, dtype=tf.float32)
# outputs, final_rnn_state = tf.contrib.rnn.static_rnn(cell, 
#                             [rnn_input]*max_strokes,
#                             initial_state=starting_state,
#                             dtype=tf.float32)
# W_hy = tf.get_variable('W_hy', [rnn_hidden_size, y_size],initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32)

# preds = []
# for output in outputs:
#     preds.append(tf.matmul(outputs, W_hy))


# # output = tf.nn.xw_plus_b(fc1, W2, b2,name='output')

# # actual_offset = tf.slice(S,[0,0],[batch_size,2])
# # actual_pen = tf.slice(S,[0,2],[batch_size,-1]) 
# # pred_offset = tf.multiply(tf.slice(output,[0,0],[batch_size,2]),offset_weight)
# # pred_pen = tf.nn.softmax(tf.slice(output,[0,2],[batch_size,-1]))

# # offset_loss = tf.reduce_sum(tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(pred_offset,actual_offset)),axis=1)))
# # pen_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
# #             labels = actual_pen,
# #             logits = pred_pen))

# # loss = tf.add(offset_loss,pen_loss)

# # # run backprop
# # optimizer = tf.train.AdamOptimizer(learning_rate)
# # train_op = optimizer.minimize(loss)

# # saver
# save = False
# saver = tf.train.Saver()

# # # get predicted stroke vector
# # strokes = tf.concat([pred_offset, pred_pen],axis=1)

# # run batches
# with tf.Session() as sess:
#     tf.global_variables_initializer().run()
#     for m in get_minibatches([F_train,S_train.as_matrix()],batch_size,shuffle=True):

#         if m[0].shape[0]==batch_size:
#             F_batch = m[0]
#             S_batch = m[1]    

#             updates = sess.run([preds], feed_dict={F:F_batch,S:S_batch})    

#             preds_ = updates[0]

#             if epoch_num%200==0:
#                 print "Epoch: " + str(epoch_num) 

#             # save
#             if save:
#                 saver.save(sess, 'checkpoints/pix2svg_train_rnn_0')
                
#             # increment epoch number
#             epoch_num += 1

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [337]:
# for epoch in range(50):
#     # Train with each examplea
#     for i in range(len(F_batch)):
#         sess.run(updates, feed_dict={X: F_batch[i: i + 1], y: S_batch[i: i + 1]})
        
#         loss = sess.run(output, feed_dict={X: F_batch, y: S_batch})

In [290]:
fig = plt.figure()
im = plt.matshow(np.corrcoef(F_batch_array),vmin=0.5)
plt.show()


<matplotlib.figure.Figure at 0x7f474cfcf050>

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [20]:
sess.close()

In [174]:
# get minibatch
batch_size = 1500
F_batch = F_train[:batch_size,:]
S_batch = S_train.head(n=batch_size)

# reserve numpy version 
F_batch_array = F_batch
S_batch_array = S_batch.as_matrix()

plt.matshow(np.corrcoef(F_batch_array))
plt.show()



In [198]:
SF_ = SF[100:110,:]
plt.matshow(np.corrcoef(SF_))
plt.show()



In [200]:
PF_ = PF[:batch_size,:]
plt.matshow(np.corrcoef(PF_))
plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: