Micah P. Dombrowski / Jul 11 2018

Tensorflow 1.8.0-rc1

1.
Building Tensorflow

Building Tensorflow allows use of CPU enhancements like SSE. We need an older version of gcc, various cuda build tools, and some library linking to make things visible.

echo 'deb http://us.archive.ubuntu.com/ubuntu/ trusty main' >> \
    /etc/apt/sources.list

apt-get update &>/dev/null
apt-get install -y --allow-downgrades \
  pkg-config zlib1g-dev git zip xutils-dev \
  gcc=4:4.8.2-1ubuntu6 \
  g++=4:4.8.2-1ubuntu6 \

dpkg -i --no-debsig \
  libcudnn7_7.1.4.18-1+cuda8.0_amd64.deb \
  libcudnn7-dev_7.1.4.18-1+cuda8.0_amd64.deb \
  nccl-repo-ubuntu1604-2.2.13-ga-cuda8.0_1-1_amd64.deb

mkdir /opt/cuda
lndir -silent -ignorelinks /usr/local/cuda /opt/cuda
cd /opt/cuda/nvvm/libdevice/
ln -sf libdevice.compute_50.10.bc libdevice.10.bc

ldconfig

Install Bazel. This is very persnickity and likes to break compatibility with new versions quite often.

export BAZEL_VERSION=0.11.1
export BAZEL_FILE=bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh
wget --progress=dot:giga \
  github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/$BAZEL_FILE
chmod +x $BAZEL_FILE
./$BAZEL_FILE

Clone the source and switch to the right version, before a change that interfered with compiling under nvcc.

git clone https://github.com/tensorflow/tensorflow
cd tensorflow
git checkout f506183dd148d97e3378eb994b2ac9c948ef0ada

This configure script uses environment variables to do a non-interactive config.

7.1s
cd /tensorflow

export TF_ROOT="/opt/tensorflow"

export PYTHON_BIN_PATH="/opt/conda/bin/python"
export PYTHON_LIB_PATH="$($PYTHON_BIN_PATH -c 'import site; print(site.getsitepackages()[0])')"
export PYTHONPATH=${TF_ROOT}/lib
export PYTHON_ARG=${TF_ROOT}/lib
export CUDA_TOOLKIT_PATH=/opt/cuda
export CUDNN_INSTALL_PATH=/usr

export TF_NEED_GCP=1
export TF_NEED_S3=0
export TF_NEED_KAFKA=0
export TF_NEED_GDR=0
export TF_NEED_CUDA=1
export TF_CUDA_VERSION="$($CUDA_TOOLKIT_PATH/bin/nvcc --version | sed -n 's/^.*release \(.*\),.*/\1/p')"
export TF_CUDA_COMPUTE_CAPABILITIES=6.1,5.2,3.5
export TF_NEED_HDFS=0
export TF_NEED_OPENCL=0
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_JEMALLOC=1
export TF_ENABLE_XLA=0
export TF_NEED_VERBS=0
export TF_CUDA_CLANG=0
export TF_CUDNN_VERSION="$(sed -n 's/^#define CUDNN_MAJOR\s*\(.*\).*/\1/p' $CUDNN_INSTALL_PATH/include/cudnn.h)"
export TF_NEED_MKL=0
export TF_DOWNLOAD_MKL=0
export TF_NEED_MPI=0
export TF_NEED_TENSORRT=0
export TF_NCCL_VERSION=1.3.5
export TF_SET_ANDROID_WORKSPACE=0

export GCC_HOST_COMPILER_PATH=$(which gcc)
export CC_OPT_FLAGS="-march=corei7"

./configure

Finally, the build—this takes over two hours.

export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/nvidia/lib64"
export CUDNN_INCLUDE_DIR="/usr/include"
export CUDNN_LIBRARY="/usr/lib/x86_64-linux-gnu/libcudnn.so"

cd /tensorflow
bazel build --config=opt --config=cuda --verbose_failures --jobs 5 \
  --action_env="LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \
  --action_env="CUDNN_INCLUDE_DIR=${CUDNN_INCLUDE_DIR}" \
  --action_env="CUDNN_LIBRARY=${CUDNN_LIBRARY}" \
  //tensorflow/tools/pip_package:build_pip_package 

We'll export this environment just in case anyone wants to play with the compiled result, but the important part here is the creation of a .whl wheel file which can be installed via pip.

cd /tensorflow
bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
cp /tmp/tensorflow_pkg/tensorflow*.whl /results/
tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl

Finally, we'll install the package we created in a clean environment.

dpkg -i --no-debsig \
  libcudnn7_7.1.4.18-1+cuda8.0_amd64.deb \
  libcudnn7-dev_7.1.4.18-1+cuda8.0_amd64.deb \
  nccl-repo-ubuntu1604-2.2.13-ga-cuda8.0_1-1_amd64.deb

# pip needs a specific filename format
cp tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl \
  /tmp/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
pip install /tmp/tensorflow*.whl

ldconfig 2>/dev/null

2.
Use Case

We'll follow the deep convolutional generative adversarial networks (DCGAN) example by Aymeric Damien, from the Tensorflow Examples project, to generate digit images from a noise distribution.

Reference paper: Unsupervised representation learning with deep convolutional generative adversarial networks. A Radford, L Metz, S Chintala. arXiv:1511.06434.

First, parameters.

# Training Params
num_steps = 5000
batch_size = 32

# Network Params
image_dim = 784 # 28*28 pixels * 1 channel
gen_hidden_dim = 256
disc_hidden_dim = 256
noise_dim = 200 # Noise data points

Define networks.

0.1s
# Generator Network
# Input: Noise, Output: Image
def generator(x, reuse=False):
    with tf.variable_scope('Generator', reuse=reuse):
        # TensorFlow Layers automatically create variables and calculate their
        # shape, based on the input.
        x = tf.layers.dense(x, units=6 * 6 * 128)
        x = tf.nn.tanh(x)
        # Reshape to a 4-D array of images: (batch, height, width, channels)
        # New shape: (batch, 6, 6, 128)
        x = tf.reshape(x, shape=[-1, 6, 6, 128])
        # Deconvolution, image shape: (batch, 14, 14, 64)
        x = tf.layers.conv2d_transpose(x, 64, 4, strides=2)
        # Deconvolution, image shape: (batch, 28, 28, 1)
        x = tf.layers.conv2d_transpose(x, 1, 2, strides=2)
        # Apply sigmoid to clip values between 0 and 1
        x = tf.nn.sigmoid(x)
        return x

# Discriminator Network
# Input: Image, Output: Prediction Real/Fake Image
def discriminator(x, reuse=False):
    with tf.variable_scope('Discriminator', reuse=reuse):
        # Typical convolutional neural network to classify images.
        x = tf.layers.conv2d(x, 64, 5)
        x = tf.nn.tanh(x)
        x = tf.layers.average_pooling2d(x, 2, 2)
        x = tf.layers.conv2d(x, 128, 5)
        x = tf.nn.tanh(x)
        x = tf.layers.average_pooling2d(x, 2, 2)
        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, 1024)
        x = tf.nn.tanh(x)
        # Output 2 classes: Real and Fake images
        x = tf.layers.dense(x, 2)
    return x

Network setup.

6.6s
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

# Import MNIST data (http://yann.lecun.com/exdb/mnist/)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Build Networks
# Network Inputs
noise_input = tf.placeholder(tf.float32, shape=[None, noise_dim])
real_image_input = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])

# Build Generator Network
gen_sample = generator(noise_input)

# Build 2 Discriminator Networks (one from noise input, one from generated samples)
disc_real = discriminator(real_image_input)
disc_fake = discriminator(gen_sample, reuse=True)
disc_concat = tf.concat([disc_real, disc_fake], axis=0)

# Build the stacked generator/discriminator
stacked_gan = discriminator(gen_sample, reuse=True)

# Build Targets (real or fake images)
disc_target = tf.placeholder(tf.int32, shape=[None])
gen_target = tf.placeholder(tf.int32, shape=[None])

# Build Loss
disc_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=disc_concat, labels=disc_target))
gen_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=stacked_gan, labels=gen_target))

# Build Optimizers
optimizer_gen = tf.train.AdamOptimizer(learning_rate=0.001)
optimizer_disc = tf.train.AdamOptimizer(learning_rate=0.001)

# Training Variables for each optimizer
# By default in TensorFlow, all variables are updated by each optimizer, so we
# need to precise for each one of them the specific variables to update.
# Generator Network Variables
gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator')
# Discriminator Network Variables
disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator')

# Create training operations
train_gen = optimizer_gen.minimize(gen_loss, var_list=gen_vars)
train_disc = optimizer_disc.minimize(disc_loss, var_list=disc_vars)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

Finally, training.

82.3s
# Start training
sess = tf.Session()

# Run the initializer
sess.run(init)

for step in range(1, num_steps+1):

	# Prepare Input Data
	# Get the next batch of MNIST data (only images are needed, not labels)
	batch_x, _ = mnist.train.next_batch(batch_size)
	batch_x = np.reshape(batch_x, newshape=[-1, 28, 28, 1])
	# Generate noise to feed to the generator
	z = np.random.uniform(-1., 1., size=[batch_size, noise_dim])

	# Prepare Targets (Real image: 1, Fake image: 0)
	# The first half of data fed to the generator are real images,
	# the other half are fake images (coming from the generator).
	batch_disc_y = np.concatenate(
		[np.ones([batch_size]), np.zeros([batch_size])], axis=0)
	# Generator tries to fool the discriminator, thus targets are 1.
	batch_gen_y = np.ones([batch_size])

	# Training
	feed_dict = {real_image_input: batch_x, noise_input: z,
				 disc_target: batch_disc_y, gen_target: batch_gen_y}
	_, _, gl, dl = sess.run([train_gen, train_disc, gen_loss, disc_loss],
							feed_dict=feed_dict)
	if step % 1000 == 0 or step == 1:
		print('Step %i: Generator Loss: %f, Discriminator Loss: %f' % (step, gl, dl))
		
		# Generate images from noise, using the generator network.
		f, a = plt.subplots(4, 10, figsize=(10, 4))
		for i in range(10):
			# Noise input.
			z = np.random.uniform(-1., 1., size=[4, noise_dim])
			g = sess.run(gen_sample, feed_dict={noise_input: z})
			for j in range(4):
				# Generate image from noise. Extend to 3 channels for matplot figure.
				img = np.reshape(np.repeat(g[j][:, :, np.newaxis], 3, axis=2),
					newshape=(28, 28, 3))
				a[j][i].imshow(img)
				
		#f.show()
		plt.suptitle("Step {}".format(step))
		plt.savefig("/results/step-{}.svg".format(step))
		plt.close()