In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This notebook illustrates how to run Neural Voxel Renderer (CVPR2020). The input scene consists of a voxelized object, the ground, and a point light source and the network produces an image that correspond to the setting of the scene. In this version of the code, all operations are differentiable.
|
In [0]:
!pip install tensorflow_graphics
In [0]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow_graphics.projects.neural_voxel_renderer import helpers
from tensorflow_graphics.projects.neural_voxel_renderer import models
from tensorflow_graphics.rendering.voxels import visual_hull
In [0]:
!rm -f /tmp/example_data.p*
!wget -P /tmp/ https://storage.googleapis.com/tensorflow-graphics/notebooks/neural_voxel_renderer/example_data.p
with open('/tmp/example_data.p', 'rb') as f:
example_data = pickle.load(f)
object_voxels = example_data['object_voxels']
camera_rotation_matrix = example_data['camera_rotation_matrix']
camera_translation_vector = example_data['camera_translation_vector']
focal = example_data['focal']
principal_point = example_data['principal_point']
light_position = example_data['light_position']
object_rotation = example_data['object_rotation']
object_translation = example_data['object_translation']
object_elevation = example_data['object_elevation']
In [0]:
VOXEL_SIZE = 128
IMAGE_SIZE = 256
BLENDER_SCALE = 2
DIAMETER = 4.2 # The voxel area in world coordinates
GROUND_COLOR = np.array((136., 162, 199))/255.
object_rotation_v = object_rotation
object_translation_v = object_translation[:, 0, [1, 0, 2]]*BLENDER_SCALE
object_elevation_v = object_elevation
ground_occupancy = np.zeros((VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 1),
dtype=np.float32)
ground_occupancy[-2, 1:-2, 1:-2, 0] = 1
ground_voxel_color = np.ones((VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 3),
dtype=np.float32)*\
np.array(GROUND_COLOR, dtype=np.float32)
ground_voxel_color = np.concatenate([ground_voxel_color, ground_occupancy],
axis=-1)
scene_voxels = object_voxels*(1-ground_occupancy) + \
ground_voxel_color*ground_occupancy
euler_angles_x = np.deg2rad(180-object_rotation_v)*np.array([1, 0, 0],
dtype=np.float32)
euler_angles_y = np.deg2rad(90-object_elevation_v)*np.array([0, 1, 0],
dtype=np.float32)
translation_vector = (object_translation_v/(DIAMETER*0.5))
interpolated_voxels = helpers.object_to_world(scene_voxels,
euler_angles_x,
euler_angles_y,
translation_vector)
color_input, alpha_input = tf.split(interpolated_voxels, [3, 1], axis=-1)
voxel_img = visual_hull.render(color_input*alpha_input)
_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.imshow(voxel_img[0])
ax.axis('off')
ax.set_title('Voxel Visualization')
plt.show()
The second input of the NVR+ network is an image directly rendered from the voxels. In the original paper, this was performed by splatting the center of the voxels in the image plane. In this notebook, the image is synthesized using a differentiable volumetric renderer.
In [0]:
ground_image, ground_alpha = \
helpers.generate_ground_image(IMAGE_SIZE, IMAGE_SIZE, focal, principal_point,
camera_rotation_matrix,
camera_translation_vector[:, :, 0],
GROUND_COLOR)
_, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.imshow((ground_image*ground_alpha)[0])
ax.axis('off')
ax.set_title('Ground image')
plt.show()
Given the colored voxels and the camera parameters we can estimate a rendered image of the input voxels using volumetric rendering techniques (see Henzler et al ICCV 2019). Briefly, for each pixel we cast a ray towards the volumetric scene and we estimate its final color based on the occupancy and color of the intersected voxels.
Note that this image contains the original colors of the voxels (with projective texturing artifacts due to the different orientation of the object) and does not include shadows or other illumination effects.
In [0]:
object_rotation_dvr = np.array(np.deg2rad(object_rotation),
dtype=np.float32)
object_translation_dvr = np.array(object_translation[..., [0, 2, 1]],
dtype=np.float32)
object_translation_dvr -= np.array([0, 0, helpers.OBJECT_BOTTOM],
dtype=np.float32)
rerendering = \
helpers.render_voxels_from_blender_camera(object_voxels,
object_rotation_dvr,
object_translation_dvr,
256,
256,
focal,
principal_point,
camera_rotation_matrix,
camera_translation_vector,
absorption_factor=1.0,
cell_size=1.1,
depth_min=3.0,
depth_max=5.0,
frustum_size=(128, 128, 128))
rerendering_image, rerendering_alpha = tf.split(rerendering, [3, 1], axis=-1)
rerendering_image = tf.image.resize(rerendering_image, (256, 256))
rerendering_alpha = tf.image.resize(rerendering_alpha, (256, 256))
BACKGROUND_COLOR = 0.784
final_composite = BACKGROUND_COLOR*(1-rerendering_alpha)*(1-ground_alpha) + \
ground_image*(1-rerendering_alpha)*ground_alpha + \
rerendering_image*rerendering_alpha
_, ax = plt.subplots(1, 2, figsize=(10, 10))
ax[0].imshow(rerendering_image[0])
ax[0].axis('off')
ax[0].set_title('Object rerendering')
ax[1].imshow(final_composite[0])
ax[1].axis('off')
ax[1].set_title('Final composition')
plt.show()
In [0]:
# Downloading the data
!rm -rf /tmp/checkpoint
!mkdir /tmp/checkpoint
!wget -P /tmp/checkpoint https://storage.googleapis.com/tensorflow-graphics/notebooks/neural_voxel_renderer/model.ckpt-126650.data-00000-of-00001
!wget -P /tmp/checkpoint https://storage.googleapis.com/tensorflow-graphics/notebooks/neural_voxel_renderer/model.ckpt-126650.index
!wget -P /tmp/checkpoint https://storage.googleapis.com/tensorflow-graphics/notebooks/neural_voxel_renderer/model.ckpt-126650.meta
In [0]:
latest_checkpoint = '/tmp/checkpoint/model.ckpt-126650'
tf.compat.v1.reset_default_graph()
g = tf.compat.v1.Graph()
with g.as_default():
vol_placeholder = tf.compat.v1.placeholder(tf.float32,
shape=[None, VOXEL_SIZE, VOXEL_SIZE, VOXEL_SIZE, 4],
name='input_voxels')
rerender_placeholder = tf.compat.v1.placeholder(tf.float32,
shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3],
name='rerender')
light_placeholder = tf.compat.v1.placeholder(tf.float32,
shape=[None, 3],
name='input_light')
model = models.neural_voxel_renderer_plus(vol_placeholder,
rerender_placeholder,
light_placeholder)
predicted_image_logits, = model.outputs
saver = tf.compat.v1.train.Saver()
a = interpolated_voxels.numpy()
b = final_composite.numpy()*2.-1
c = light_position
with tf.compat.v1.Session(graph=g) as sess:
saver.restore(sess, latest_checkpoint)
feed_dict = {vol_placeholder: a,
rerender_placeholder: b,
light_placeholder: c}
predictions = sess.run(predicted_image_logits, feed_dict)
In [0]:
#@title NVR+ Output { vertical-output: true, run: "auto" }
view = 8 #@param {type:"slider", min:0, max:9, step:1}
_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.imshow(predictions[view]*0.5+0.5)
ax.axis('off')
ax.set_title('NVR+ prediction')
plt.show()