Project/embed real images into StyleGANv2 latent space.


In [ ]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
from datetime import datetime
from tqdm import tqdm

# ffmpeg installation location, for creating videos
plt.rcParams['animation.ffmpeg_path'] = str(Path.home() / "Documents/dev_tools/ffmpeg-20190623-ffa64a4-win64-static/bin/ffmpeg.exe")

%load_ext autoreload
%autoreload 2

# StyleGAN Utils
from stylegan_utils import load_network, gen_image_fun, synth_image_fun, create_video

import dnnlib
import dataset_tool
import run_projector
import projector
import training.dataset
import training.misc

# Specific of encoder repos, comment out if not needed
#from encoder.perceptual_model import PerceptualModel
#from encoder.generator_model import Generator

# Data Science Utils
sys.path.append(os.path.join(os.pardir, 'data-science-learning'))

from ds_utils import generative_utils

In [ ]:
res_dir = Path.home() / 'Documents/generated_data/stylegan'

Load Network


In [ ]:
MODELS_DIR = Path("C:/Users/User/Documents/models/stylegan2")
MODEL_NAME = 'original_ffhq'
SNAPSHOT_NAME = 'stylegan2-ffhq-config-f'

Gs, Gs_kwargs, noise_vars = load_network(str(MODELS_DIR / MODEL_NAME / SNAPSHOT_NAME) + '.pkl')

Z_SIZE = Gs.input_shape[1:][0]
IMG_SIZE = Gs.output_shape[2:]
IMG_SIZE

Project


In [ ]:
def project_images(images_dir, tfrecord_dir, data_dir, num_steps, num_snapshots, pure_projector=False):
    # setup projector
    print('Setting up projector')
    proj = projector.Projector(num_steps=num_steps, pure_projector=pure_projector)
    proj.set_network(Gs)
    
    # generate tfrecords
    nb_images = dataset_tool.create_from_images(str(tfrecord_dir), str(images_dir), True)

    # loading images from tfrecords
    dataset_obj = training.dataset.load_dataset(data_dir=str(data_dir), tfrecord_dir=tfrecord_dir, 
                                                max_label_size=0, verbose=True, repeat=False, shuffle_mb=0)
    assert dataset_obj.shape == Gs.output_shape[1:]
    
    # project all loaded images
    print('=======================')
    for image_idx in tqdm(range(nb_images)):
        print(f'Projecting image {image_idx}/{nb_images}')
        
        images, _labels = dataset_obj.get_minibatch_np(1)
        images = training.misc.adjust_dynamic_range(images, [0, 255], [-1, 1])
        
        run_path = data_dir / f'out_{image_idx}'
        run_path.mkdir()
        run_projector.project_image(proj, targets=images, 
                                    png_prefix=dnnlib.make_run_dir_path(str(run_path / 'image_')), 
                                    num_snapshots=num_snapshots)

In [ ]:
data_dir = res_dir / 'projection' / MODEL_NAME / SNAPSHOT_NAME / datetime.now().strftime("%Y%m%d_%H%M%S") # where the projections results will be saved
images_dir = Path.home() / 'Documents/generated_data/face_extract' / 'tmp_portraits'

tfrecord_dir = data_dir / 'tfrecords'
project_images(images_dir=images_dir, tfrecord_dir=tfrecord_dir, data_dir=data_dir, 
               num_steps=1000, num_snapshots=100, pure_projector=True)

In [ ]:
create_video(data_dir / 'out_2', 
             res_dir / 'projection' / 'out_{}.mp4'.format(''))

Encode

This does not use the official StyleGAN v2 projector, but instead relies on the direct encoder setup used by the community for v1.


In [ ]:
BATCH_SIZE = 1
PERCEPTUAL_MODEL_IMG_SIZE = 256

# setup utils generator and perceptual model
generator = Generator(Gs, BATCH_SIZE, randomize_noise=False)
perceptual_model = PerceptualModel(PERCEPTUAL_MODEL_IMG_SIZE, layer=9, batch_size=BATCH_SIZE)
perceptual_model.build_perceptual_model(generator.generated_image)

In [ ]:
def split_to_batches(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [ ]:
def encode_images(images_dir, data_dir, iterations, learning_rate=1.):
    # collect images
    images_paths = [str(img) for img in images_dir.glob('*')]
    
    GEN_IMAGES_DIR = data_dir / '{}'.format(iterations) / 'gen_images'
    GEN_DLATENT_DIR = data_dir / '{}'.format(iterations) / 'latents'
    GEN_IMAGES_DIR.mkdir(parents=True, exist_ok=True)
    GEN_DLATENT_DIR.mkdir(parents=True, exist_ok=True)
    
    # project all loaded images
    count = 0
    for images_batch in tqdm(split_to_batches(images_paths, BATCH_SIZE), total=len(images_paths)//BATCH_SIZE):
            images_names = [os.path.splitext(os.path.basename(img_path))[0] for img_path in images_batch]

            perceptual_model.set_reference_images(images_batch)
            optimizer = perceptual_model.optimize(generator.dlatent_variable, 
                                           iterations=iterations, 
                                           learning_rate=learning_rate)
            pbar = tqdm(optimizer, leave=False, mininterval=9, total=iterations)
            for loss in pbar:
                pass
                #pbar.set_description(' '.join(names)+' Loss: %.2f' % loss)
            print(' '.join(images_names), ' loss:', loss)

            # generate images from found dlatents and save them
            generated_images = generator.generate_images()
            generated_dlatents = generator.get_dlatents()
            for img_array, dlatent, img_name in zip(generated_images, generated_dlatents, images_names):
                img = Image.fromarray(img_array, 'RGB')
                img.save(str(GEN_IMAGES_DIR / f'{img_name}.png'), 'PNG')
                np.save(str(GEN_DLATENT_DIR / f'{img_name}.npy'), dlatent)

            generator.reset_dlatents()
            count += 1

Generate Images


In [ ]:
target_latents = np.random.rand(18, Z_SIZE)

In [ ]:
img = gen_image_fun(Gs, target_latents, Gs_kwargs, noise_vars, truncation_psi=0.5)
plt.imshow(img)

In [ ]:
img = synth_image_fun(Gs, target_latents[np.newaxis,:,:], Gs_kwargs, randomize_noise=True)
plt.imshow(img)

Projected Latent Initialization

Test network used to learn an initial mapping from an image to the intermediate StyleGAN latent


In [ ]:
from PIL import Image
from keras.models import load_model

In [ ]:
resnet = load_model(MODELS_DIR / MODEL_NAME / 'resnet' / 'finetuned_resnet.h5')

In [ ]:
resnet_img_size = (256, 256)

In [ ]:
resnet.summary()

In [ ]:
target_img = Image.open("")
target_img = target_img.resize(resnet_img_size)
plt.imshow(target_img)

In [ ]:
predicted_latent = resnet.predict(np.array(target_img)[np.newaxis,:])

In [ ]:
img = synth_image_fun(Gs, predicted_latent, Gs_kwargs, randomize_noise=True)
plt.imshow(img)