人脸生成


layout: post title: 人脸生成 tags: deep-learning —

人脸生成

在这个项目中,我们将使用生成对抗网络来生成新的人脸图片。项目来源于udacity-deeplearning

获取数据

在这个项目中所使用的数据集是:

  • MNIST
  • CelebA

因为你celebA数据集比较复杂,所以在用GAN处理之前,我们先使用简单的数据集MNIST来验证,之后如果网络性能还不错,我们在用celebA数据集进行训练测试。

data_dir = './data'

# FloydHub - Use with data ID "R5KrjnANiKVhLWAkpXhNBe"
#data_dir = '/input'


"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import helper

helper.download_extract('mnist', data_dir)
helper.download_extract('celeba', data_dir)
Downloading mnist: 9.92MB [00:11, 870KB/s]                             
Extracting mnist: 100%|██████████| 60.0K/60.0K [00:10<00:00, 5.59KFile/s]
Downloading celeba: 1.44GB [01:58, 12.2MB/s]                               


Extracting celeba...

查看数据

MNIST

show_n_images = 25

"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
%matplotlib inline
import os
from glob import glob
from matplotlib import pyplot

mnist_images = helper.get_batch(glob(os.path.join(data_dir, 'mnist/*.jpg'))[:show_n_images], 28, 28, 'L')
pyplot.imshow(helper.images_square_grid(mnist_images, 'L'), cmap='gray')
<matplotlib.image.AxesImage at 0x7f95676c1550>

png

CelebA

CelebFaces Attributes Dataset (CelebA) 数据集包含200,000张带有标记的图片,但是我们并不需要这些标记。

show_n_images = 25

"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
mnist_images = helper.get_batch(glob(os.path.join(data_dir, 'img_align_celeba/*.jpg'))[:show_n_images], 28, 28, 'RGB')
pyplot.imshow(helper.images_square_grid(mnist_images, 'RGB'))
<matplotlib.image.AxesImage at 0x7f9571a9e518>

png

数据预处理

` MNISTCelebA 数据集的值是在-0.5 到 0.5,图片是 28x28的。 MNIST的图片都是2维度的, CelebA`的图片都是3维的。

构建神经网络

GAN主要由一下几个部分构成:

  • model_inputs
  • discriminator
  • generator
  • model_loss
  • model_opt
  • train

检查tensorflow版本

"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
TensorFlow Version: 1.0.0
Default GPU Device: /gpu:0

输入

实现model_inputs 函数为网络构建输入张量:

import problem_unittests as tests

def model_inputs(image_width, image_height, image_channels, z_dim):
    """
    Create the model inputs
    :param image_width: The input image width
    :param image_height: The input image height
    :param image_channels: The number of image channels
    :param z_dim: The dimension of Z
    :return: Tuple of (tensor of real input images, tensor of z data, learning rate)
    """
    # TODO: Implement Function
    input_real = tf.placeholder(tf.float32, (None, image_width, image_height, image_channels ), name='input_real')
    input_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')
    lr = tf.placeholder(tf.float32, name='learning_rate')
    return input_real, input_z, lr


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_model_inputs(model_inputs)
Tests Passed

判别器

实现函数 discriminator为网络创建判别器。

def discriminator(images, reuse=False):
    """
    Create the discriminator network
    :param images: Tensor of input image(s)
    :param reuse: Boolean if the weights should be reused
    :return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator)
    """
    # TODO: Implement Function

    with tf.variable_scope('discriminator', reuse=reuse):
        # Input layer is 28 x 28 x 3 to output layer is 14 x 14 x 64
        alpha = 0.1
        x1 = tf.layers.conv2d(images, 64, 3, strides=2, padding='same')
        relu1 = tf.maximum(alpha * x1, x1)
        
        # Input layer is 14 x 14 x 64 to output layer is 7 x 7 x 128
        x2 = tf.layers.conv2d(relu1, 128, 3, strides=2, padding='same')
        bn2 = tf.layers.batch_normalization(x2, training=True)
        relu2 = tf.maximum(alpha * bn2, bn2)
        
        # Input layer is 7 x 7 x 128 to output layer is 7 x 7 x 256
        x3 = tf.layers.conv2d(relu2, 256, 3, strides=1, padding='same')
        bn3 = tf.layers.batch_normalization(x3, training=True)
        relu3 = tf.maximum(alpha * bn3, bn3)
        
        # Input layer is 7 x 7 x 256 to output layer is 7 x 7 x 512
        x4 = tf.layers.conv2d(relu3, 512, 3, strides=1, padding='same')
        bn4 = tf.layers.batch_normalization(x4, training=True)
        relu4 = tf.maximum(alpha * bn4, bn4)
        
        flat = tf.reshape(relu4, (-1, 7 * 7 * 512))
        logits = tf.layers.dense(flat,1)
        out = tf.sigmoid(logits)
        
        return out, logits


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_discriminator(discriminator, tf)
Tests Passed

生成器

实现函数 generator为网络构建输入。

def generator(z, out_channel_dim,is_train=True):
    """
    Create the generator network
    :param z: Input z
    :param out_channel_dim: The number of channels in the output image
    :param is_train: Boolean if generator is being used for training
    :return: The tensor output of the generator
    """
    # TODO: Implement Function
    
    with tf.variable_scope('generator', reuse=not is_train):
        alpha = 0.1
        # First fully connected layer
        x1 = tf.layers.dense(z, 7 * 7 * 512)
        x1 = tf.reshape(x1, (-1, 7, 7, 512))
        x1 = tf.layers.batch_normalization(x1, training=is_train)
        x1 = tf.maximum(alpha * x1, x1)
        
        
        x2 = tf.layers.conv2d_transpose(x1, 256, 3 ,strides=1, padding='same')
        x2 = tf.layers.batch_normalization(x2, training=is_train)
        x2 = tf.maximum(alpha * x2, x2)
        
        x3 = tf.layers.conv2d_transpose(x2, 128, 3 ,strides=1, padding='same')
        x3 = tf.layers.batch_normalization(x3, training=is_train)
        x3 = tf.maximum(alpha * x3, x3)
        
        x4 = tf.layers.conv2d_transpose(x3, 64, 3 ,strides=2, padding='same')
        x4 = tf.layers.batch_normalization(x4, training=is_train)
        x4 = tf.maximum(alpha * x4, x4)
        
        # Output layer, 28 x 28 x 3
        logits = tf.layers.conv2d_transpose(x4, out_channel_dim, 3 ,strides=2, padding='same')
        
        out = tf.tanh(logits)
        
        return out


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_generator(generator, tf)
Tests Passed

损失

实现函数 model_loss为网络计算损失。

def model_loss(input_real, input_z, out_channel_dim):
    """
    Get the loss for the discriminator and generator
    :param input_real: Images from the real dataset
    :param input_z: Z input
    :param out_channel_dim: The number of channels in the output image
    :return: A tuple of (discriminator loss, generator loss)
    """
    # TODO: Implement Function
    
    g_out = generator(input_z, out_channel_dim, is_train=True)
    d_out_real, d_logits_real = discriminator(input_real)
    d_out_fake, d_logits_fake = discriminator(g_out, reuse=True)
    
    d_loss_real = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_out_real) * 0.9))
    d_loss_fake = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_out_fake)))
    
    g_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_out_fake)))
    
    d_loss = d_loss_real + d_loss_fake
    return d_loss, g_loss


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_model_loss(model_loss)
Tests Passed

优化

实现函数 model_opt 对网络进行优化。

def model_opt(d_loss, g_loss, learning_rate, beta1):
    """
    Get optimization operations
    :param d_loss: Discriminator loss Tensor
    :param g_loss: Generator loss Tensor
    :param learning_rate: Learning Rate Placeholder
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :return: A tuple of (discriminator training operation, generator training operation)
    """
    # TODO: Implement Function
    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if var.name.startswith('discriminator')]
    g_vars = [var for var in t_vars if var.name.startswith('generator')]
    
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars)
        g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars)

    return d_train_opt, g_train_opt


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_model_opt(model_opt, tf)
Tests Passed

训练神经网络

"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import numpy as np

def show_generator_output(sess, n_images, input_z, out_channel_dim, image_mode):
    """
    Show example output for the generator
    :param sess: TensorFlow session
    :param n_images: Number of Images to display
    :param input_z: Input Z Tensor
    :param out_channel_dim: The number of channels in the output image
    :param image_mode: The mode to use for images ("RGB" or "L")
    """
    cmap = None if image_mode == 'RGB' else 'gray'
    z_dim = input_z.get_shape().as_list()[-1]
    example_z = np.random.uniform(-1, 1, size=[n_images, z_dim])

    samples = sess.run(
        generator(input_z, out_channel_dim, False),
        feed_dict={input_z: example_z})

    images_grid = helper.images_square_grid(samples, image_mode)
    pyplot.imshow(images_grid, cmap=cmap)
    pyplot.show()

训练

实现函数 train 对网络进行训练,直接使用已经写好的函数就行:

  • model_inputs(image_width, image_height, image_channels, z_dim)
  • model_loss(input_real, input_z, out_channel_dim)
  • model_opt(d_loss, g_loss, learning_rate, beta1)
def train(epoch_count, batch_size, z_dim, learning_rate, beta1, get_batches, data_shape, data_image_mode):
    """
    Train the GAN
    :param epoch_count: Number of epochs
    :param batch_size: Batch Size
    :param z_dim: Z dimension
    :param learning_rate: Learning Rate
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :param get_batches: Function to get batches
    :param data_shape: Shape of the data
    :param data_image_mode: The image mode to use for images ("RGB" or "L")
    """
    # TODO: Build Model
    input_real, input_z, lr = model_inputs(data_shape[1], data_shape[2], data_shape[3], z_dim)
    d_loss, g_loss = model_loss(input_real, input_z, data_shape[3])
    d_train_opt, g_train_opt= model_opt(d_loss, g_loss, learning_rate, beta1)
    
    steps = 0
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch_i in range(epoch_count):
            for batch_images in get_batches(batch_size):
                # TODO: Train Model
                steps += 1
                
                batch_images = batch_images * 2 
                
                batch_z = np.random.uniform(-1, 1, size=(batch_size, z_dim))
                
                _ = sess.run(d_train_opt, feed_dict={input_real: batch_images, input_z: batch_z, lr:learning_rate})
                _ = sess.run(g_train_opt, feed_dict={input_z: batch_z, input_real: batch_images, lr:learning_rate})
                
                if steps % 10 == 0:
                    train_loss_d = d_loss.eval({input_z: batch_z, input_real: batch_images})
                    train_loss_g = g_loss.eval({input_z: batch_z})
                    
                    print("Epoch {}/{}...".format(epoch_i+1, epoch_count),
                          "Discriminator Loss: {:.4f}...".format(train_loss_d),
                          "Generator Loss: {:.4f}".format(train_loss_g))
                    
                if steps % 100 == 0:
                    show_generator_output(sess, 4, input_z, data_shape[3], data_image_mode)
                    
    print("Finish training")

MNIST

使用 MNIST 数据集来验证网络架构.

batch_size = 128
z_dim = 100
learning_rate = 0.0005
beta1 = 0.5


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
epochs = 2

mnist_dataset = helper.Dataset('mnist', glob(os.path.join(data_dir, 'mnist/*.jpg')))
with tf.Graph().as_default():
    train(epochs, batch_size, z_dim, learning_rate, beta1, mnist_dataset.get_batches,
          mnist_dataset.shape, mnist_dataset.image_mode)
Epoch 1/2... Discriminator Loss: 2.5670... Generator Loss: 0.4125
Epoch 1/2... Discriminator Loss: 0.5662... Generator Loss: 5.8088
Epoch 1/2... Discriminator Loss: 2.6611... Generator Loss: 0.3710
Epoch 1/2... Discriminator Loss: 1.7154... Generator Loss: 0.5251
Epoch 1/2... Discriminator Loss: 1.4534... Generator Loss: 5.4623
Epoch 1/2... Discriminator Loss: 0.8610... Generator Loss: 1.7478
Epoch 1/2... Discriminator Loss: 0.8701... Generator Loss: 2.8445
Epoch 1/2... Discriminator Loss: 0.9579... Generator Loss: 3.6692
Epoch 1/2... Discriminator Loss: 0.7607... Generator Loss: 1.4849
Epoch 1/2... Discriminator Loss: 1.1003... Generator Loss: 0.9405

png

Epoch 1/2... Discriminator Loss: 0.8156... Generator Loss: 2.5537
Epoch 1/2... Discriminator Loss: 1.1982... Generator Loss: 0.7492
Epoch 1/2... Discriminator Loss: 0.7526... Generator Loss: 2.4862
Epoch 1/2... Discriminator Loss: 0.8648... Generator Loss: 1.1423
Epoch 1/2... Discriminator Loss: 1.6047... Generator Loss: 0.5022
Epoch 1/2... Discriminator Loss: 1.3362... Generator Loss: 0.7429
Epoch 1/2... Discriminator Loss: 0.7280... Generator Loss: 2.1738
Epoch 1/2... Discriminator Loss: 0.9456... Generator Loss: 1.0319
Epoch 1/2... Discriminator Loss: 0.6420... Generator Loss: 2.0461
Epoch 1/2... Discriminator Loss: 1.2976... Generator Loss: 0.6159

png

Epoch 1/2... Discriminator Loss: 1.1248... Generator Loss: 0.7653
Epoch 1/2... Discriminator Loss: 0.7744... Generator Loss: 1.6158
Epoch 1/2... Discriminator Loss: 0.6849... Generator Loss: 1.9795
Epoch 1/2... Discriminator Loss: 1.0444... Generator Loss: 0.9345
Epoch 1/2... Discriminator Loss: 2.5948... Generator Loss: 0.2506
Epoch 1/2... Discriminator Loss: 0.9160... Generator Loss: 2.5226
Epoch 1/2... Discriminator Loss: 0.8180... Generator Loss: 1.2042
Epoch 1/2... Discriminator Loss: 3.0264... Generator Loss: 0.1181
Epoch 1/2... Discriminator Loss: 1.5977... Generator Loss: 0.5586
Epoch 1/2... Discriminator Loss: 1.4799... Generator Loss: 0.5423

png

Epoch 1/2... Discriminator Loss: 1.7227... Generator Loss: 0.3871
Epoch 1/2... Discriminator Loss: 0.8242... Generator Loss: 1.3010
Epoch 1/2... Discriminator Loss: 0.8275... Generator Loss: 1.7792
Epoch 1/2... Discriminator Loss: 1.4575... Generator Loss: 0.5364
Epoch 1/2... Discriminator Loss: 1.6397... Generator Loss: 0.4820
Epoch 1/2... Discriminator Loss: 1.1027... Generator Loss: 2.5782
Epoch 1/2... Discriminator Loss: 1.1037... Generator Loss: 0.7890
Epoch 1/2... Discriminator Loss: 0.9587... Generator Loss: 1.2423
Epoch 1/2... Discriminator Loss: 1.5567... Generator Loss: 2.4239
Epoch 1/2... Discriminator Loss: 1.5680... Generator Loss: 0.6865

png

Epoch 1/2... Discriminator Loss: 1.3410... Generator Loss: 2.2070
Epoch 1/2... Discriminator Loss: 1.7490... Generator Loss: 3.2341
Epoch 1/2... Discriminator Loss: 0.9146... Generator Loss: 1.9964
Epoch 1/2... Discriminator Loss: 2.8071... Generator Loss: 4.3768
Epoch 1/2... Discriminator Loss: 1.2631... Generator Loss: 0.7122
Epoch 1/2... Discriminator Loss: 1.0083... Generator Loss: 1.5106
Epoch 2/2... Discriminator Loss: 0.8099... Generator Loss: 1.7368
Epoch 2/2... Discriminator Loss: 0.9017... Generator Loss: 1.1692
Epoch 2/2... Discriminator Loss: 0.9267... Generator Loss: 2.0585
Epoch 2/2... Discriminator Loss: 4.4688... Generator Loss: 4.9588

png

Epoch 2/2... Discriminator Loss: 2.0713... Generator Loss: 0.2946
Epoch 2/2... Discriminator Loss: 1.5370... Generator Loss: 0.5216
Epoch 2/2... Discriminator Loss: 1.3747... Generator Loss: 0.6113
Epoch 2/2... Discriminator Loss: 1.5562... Generator Loss: 0.4826
Epoch 2/2... Discriminator Loss: 1.0379... Generator Loss: 1.0576
Epoch 2/2... Discriminator Loss: 1.7146... Generator Loss: 0.4087
Epoch 2/2... Discriminator Loss: 1.1321... Generator Loss: 2.0963
Epoch 2/2... Discriminator Loss: 1.6731... Generator Loss: 0.4216
Epoch 2/2... Discriminator Loss: 1.6752... Generator Loss: 0.4001
Epoch 2/2... Discriminator Loss: 1.1817... Generator Loss: 0.7585

png

Epoch 2/2... Discriminator Loss: 1.2264... Generator Loss: 3.2529
Epoch 2/2... Discriminator Loss: 0.9499... Generator Loss: 1.1761
Epoch 2/2... Discriminator Loss: 1.1673... Generator Loss: 0.7926
Epoch 2/2... Discriminator Loss: 0.9397... Generator Loss: 2.5452
Epoch 2/2... Discriminator Loss: 1.7447... Generator Loss: 0.4614
Epoch 2/2... Discriminator Loss: 0.9644... Generator Loss: 1.7437
Epoch 2/2... Discriminator Loss: 1.3251... Generator Loss: 2.7100
Epoch 2/2... Discriminator Loss: 2.1874... Generator Loss: 0.3228
Epoch 2/2... Discriminator Loss: 1.5370... Generator Loss: 0.4851
Epoch 2/2... Discriminator Loss: 0.9423... Generator Loss: 2.0052

png

Epoch 2/2... Discriminator Loss: 0.8966... Generator Loss: 1.3532
Epoch 2/2... Discriminator Loss: 0.9183... Generator Loss: 2.4473
Epoch 2/2... Discriminator Loss: 0.9276... Generator Loss: 1.0986
Epoch 2/2... Discriminator Loss: 1.0378... Generator Loss: 2.4902
Epoch 2/2... Discriminator Loss: 1.0954... Generator Loss: 0.8773
Epoch 2/2... Discriminator Loss: 1.4540... Generator Loss: 3.4679
Epoch 2/2... Discriminator Loss: 2.2756... Generator Loss: 0.2780
Epoch 2/2... Discriminator Loss: 0.9586... Generator Loss: 1.2197
Epoch 2/2... Discriminator Loss: 1.0734... Generator Loss: 0.8667
Epoch 2/2... Discriminator Loss: 0.8090... Generator Loss: 1.7418

png

Epoch 2/2... Discriminator Loss: 1.2716... Generator Loss: 0.7460
Epoch 2/2... Discriminator Loss: 0.8679... Generator Loss: 2.0178
Epoch 2/2... Discriminator Loss: 1.0569... Generator Loss: 0.9753
Epoch 2/2... Discriminator Loss: 1.3461... Generator Loss: 0.6070
Epoch 2/2... Discriminator Loss: 1.0119... Generator Loss: 2.3068
Epoch 2/2... Discriminator Loss: 0.8770... Generator Loss: 1.3781
Epoch 2/2... Discriminator Loss: 0.9784... Generator Loss: 1.2783
Epoch 2/2... Discriminator Loss: 1.0566... Generator Loss: 0.9749
Epoch 2/2... Discriminator Loss: 1.1071... Generator Loss: 0.8777
Epoch 2/2... Discriminator Loss: 1.1916... Generator Loss: 0.8338

png

Epoch 2/2... Discriminator Loss: 0.8422... Generator Loss: 1.3306
Epoch 2/2... Discriminator Loss: 1.2982... Generator Loss: 0.6625
Epoch 2/2... Discriminator Loss: 1.6153... Generator Loss: 0.6769
Finish training

CelebA

使用数据集CelebA来验证网络架构.

batch_size = 128
z_dim = 100
learning_rate = 0.0005
beta1 = 0.5


"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
epochs = 1

celeba_dataset = helper.Dataset('celeba', glob(os.path.join(data_dir, 'img_align_celeba/*.jpg')))
with tf.Graph().as_default():
    train(epochs, batch_size, z_dim, learning_rate, beta1, celeba_dataset.get_batches,
          celeba_dataset.shape, celeba_dataset.image_mode)
Epoch 1/1... Discriminator Loss: 0.7530... Generator Loss: 8.6871
Epoch 1/1... Discriminator Loss: 0.9298... Generator Loss: 15.7398
Epoch 1/1... Discriminator Loss: 2.8263... Generator Loss: 0.2851
Epoch 1/1... Discriminator Loss: 1.6981... Generator Loss: 0.6993
Epoch 1/1... Discriminator Loss: 1.5391... Generator Loss: 1.0450
Epoch 1/1... Discriminator Loss: 1.4355... Generator Loss: 0.7837
Epoch 1/1... Discriminator Loss: 1.3560... Generator Loss: 0.9238
Epoch 1/1... Discriminator Loss: 1.9604... Generator Loss: 4.4131
Epoch 1/1... Discriminator Loss: 2.1643... Generator Loss: 0.3025
Epoch 1/1... Discriminator Loss: 2.4849... Generator Loss: 0.2103

png

Epoch 1/1... Discriminator Loss: 0.9599... Generator Loss: 1.2785
Epoch 1/1... Discriminator Loss: 0.8042... Generator Loss: 1.8656
Epoch 1/1... Discriminator Loss: 2.9833... Generator Loss: 0.1235
Epoch 1/1... Discriminator Loss: 1.0662... Generator Loss: 1.1685
Epoch 1/1... Discriminator Loss: 1.0549... Generator Loss: 1.1269
Epoch 1/1... Discriminator Loss: 1.0664... Generator Loss: 1.4223
Epoch 1/1... Discriminator Loss: 1.4356... Generator Loss: 0.6057
Epoch 1/1... Discriminator Loss: 1.3182... Generator Loss: 0.7285
Epoch 1/1... Discriminator Loss: 2.1460... Generator Loss: 0.3177
Epoch 1/1... Discriminator Loss: 0.8770... Generator Loss: 1.4075

png

Epoch 1/1... Discriminator Loss: 1.2831... Generator Loss: 0.7655
Epoch 1/1... Discriminator Loss: 1.5447... Generator Loss: 2.6052
Epoch 1/1... Discriminator Loss: 2.0801... Generator Loss: 3.9197
Epoch 1/1... Discriminator Loss: 1.4028... Generator Loss: 2.4999
Epoch 1/1... Discriminator Loss: 1.2157... Generator Loss: 2.1566
Epoch 1/1... Discriminator Loss: 0.7763... Generator Loss: 1.6683
Epoch 1/1... Discriminator Loss: 1.2153... Generator Loss: 1.7246
Epoch 1/1... Discriminator Loss: 0.9812... Generator Loss: 1.2962
Epoch 1/1... Discriminator Loss: 1.0366... Generator Loss: 0.9386
Epoch 1/1... Discriminator Loss: 1.0379... Generator Loss: 1.5070

png

Epoch 1/1... Discriminator Loss: 1.0926... Generator Loss: 0.9736
Epoch 1/1... Discriminator Loss: 1.0136... Generator Loss: 1.6398
Epoch 1/1... Discriminator Loss: 1.1990... Generator Loss: 1.2142
Epoch 1/1... Discriminator Loss: 1.1516... Generator Loss: 0.9133
Epoch 1/1... Discriminator Loss: 1.4690... Generator Loss: 0.5589
Epoch 1/1... Discriminator Loss: 1.2786... Generator Loss: 0.7575
Epoch 1/1... Discriminator Loss: 1.1647... Generator Loss: 1.2803
Epoch 1/1... Discriminator Loss: 1.6237... Generator Loss: 0.4568
Epoch 1/1... Discriminator Loss: 1.1671... Generator Loss: 2.3657
Epoch 1/1... Discriminator Loss: 1.3119... Generator Loss: 0.6340

png

Epoch 1/1... Discriminator Loss: 1.0171... Generator Loss: 1.0055
Epoch 1/1... Discriminator Loss: 1.0119... Generator Loss: 1.4181
Epoch 1/1... Discriminator Loss: 0.9128... Generator Loss: 1.1611
Epoch 1/1... Discriminator Loss: 1.1578... Generator Loss: 0.8007
Epoch 1/1... Discriminator Loss: 1.2020... Generator Loss: 1.6199
Epoch 1/1... Discriminator Loss: 1.3808... Generator Loss: 0.5759
Epoch 1/1... Discriminator Loss: 1.0826... Generator Loss: 1.0683
Epoch 1/1... Discriminator Loss: 1.2612... Generator Loss: 0.7148
Epoch 1/1... Discriminator Loss: 1.0948... Generator Loss: 1.7166
Epoch 1/1... Discriminator Loss: 1.2473... Generator Loss: 1.0475

png

Epoch 1/1... Discriminator Loss: 2.3647... Generator Loss: 3.9816
Epoch 1/1... Discriminator Loss: 1.7586... Generator Loss: 0.4085
Epoch 1/1... Discriminator Loss: 1.3968... Generator Loss: 0.6017
Epoch 1/1... Discriminator Loss: 1.2627... Generator Loss: 0.8135
Epoch 1/1... Discriminator Loss: 1.1537... Generator Loss: 1.4317
Epoch 1/1... Discriminator Loss: 1.2106... Generator Loss: 0.9707
Epoch 1/1... Discriminator Loss: 1.2024... Generator Loss: 1.0595
Epoch 1/1... Discriminator Loss: 1.1008... Generator Loss: 1.7186
Epoch 1/1... Discriminator Loss: 1.1918... Generator Loss: 0.9864
Epoch 1/1... Discriminator Loss: 1.6864... Generator Loss: 2.5932

png

Epoch 1/1... Discriminator Loss: 1.9789... Generator Loss: 2.8634
Epoch 1/1... Discriminator Loss: 1.0457... Generator Loss: 1.1890
Epoch 1/1... Discriminator Loss: 1.2952... Generator Loss: 0.5719
Epoch 1/1... Discriminator Loss: 0.8850... Generator Loss: 1.5313
Epoch 1/1... Discriminator Loss: 1.6824... Generator Loss: 0.5078
Epoch 1/1... Discriminator Loss: 1.1641... Generator Loss: 0.7723
Epoch 1/1... Discriminator Loss: 1.1622... Generator Loss: 1.4156
Epoch 1/1... Discriminator Loss: 1.4422... Generator Loss: 0.5242
Epoch 1/1... Discriminator Loss: 1.4936... Generator Loss: 0.5004
Epoch 1/1... Discriminator Loss: 1.0462... Generator Loss: 1.5340

png

Epoch 1/1... Discriminator Loss: 1.3382... Generator Loss: 0.6465
Epoch 1/1... Discriminator Loss: 1.6028... Generator Loss: 0.6008
Epoch 1/1... Discriminator Loss: 1.1647... Generator Loss: 1.0593
Epoch 1/1... Discriminator Loss: 1.0436... Generator Loss: 0.9482
Epoch 1/1... Discriminator Loss: 1.6042... Generator Loss: 0.4477
Epoch 1/1... Discriminator Loss: 1.9857... Generator Loss: 2.4600
Epoch 1/1... Discriminator Loss: 1.0933... Generator Loss: 1.0793
Epoch 1/1... Discriminator Loss: 1.3125... Generator Loss: 0.6747
Epoch 1/1... Discriminator Loss: 1.2430... Generator Loss: 0.8488
Epoch 1/1... Discriminator Loss: 1.6676... Generator Loss: 2.6519

png

Epoch 1/1... Discriminator Loss: 1.1548... Generator Loss: 1.0204
Epoch 1/1... Discriminator Loss: 0.9753... Generator Loss: 1.3047
Epoch 1/1... Discriminator Loss: 1.2424... Generator Loss: 1.6186
Epoch 1/1... Discriminator Loss: 1.1086... Generator Loss: 0.7826
Epoch 1/1... Discriminator Loss: 1.1165... Generator Loss: 1.5525
Epoch 1/1... Discriminator Loss: 1.2074... Generator Loss: 1.2530
Epoch 1/1... Discriminator Loss: 1.1495... Generator Loss: 0.9164
Epoch 1/1... Discriminator Loss: 1.1734... Generator Loss: 0.8843
Epoch 1/1... Discriminator Loss: 1.2493... Generator Loss: 1.7003
Epoch 1/1... Discriminator Loss: 1.3382... Generator Loss: 2.3015

png

Epoch 1/1... Discriminator Loss: 1.3273... Generator Loss: 0.6338
Epoch 1/1... Discriminator Loss: 1.0015... Generator Loss: 0.9939
Epoch 1/1... Discriminator Loss: 0.8871... Generator Loss: 1.0526
Epoch 1/1... Discriminator Loss: 1.8449... Generator Loss: 0.3400
Epoch 1/1... Discriminator Loss: 1.1675... Generator Loss: 1.3872
Epoch 1/1... Discriminator Loss: 1.4931... Generator Loss: 0.7872
Epoch 1/1... Discriminator Loss: 0.9699... Generator Loss: 0.9360
Epoch 1/1... Discriminator Loss: 1.0660... Generator Loss: 1.2564
Epoch 1/1... Discriminator Loss: 1.4150... Generator Loss: 0.5328
Epoch 1/1... Discriminator Loss: 1.1689... Generator Loss: 1.0345

png

Epoch 1/1... Discriminator Loss: 1.4803... Generator Loss: 1.9765
Epoch 1/1... Discriminator Loss: 1.1737... Generator Loss: 1.4223
Epoch 1/1... Discriminator Loss: 1.0635... Generator Loss: 0.9262
Epoch 1/1... Discriminator Loss: 1.5213... Generator Loss: 2.0839
Epoch 1/1... Discriminator Loss: 1.1173... Generator Loss: 1.3409
Epoch 1/1... Discriminator Loss: 0.9329... Generator Loss: 1.2308
Epoch 1/1... Discriminator Loss: 1.4536... Generator Loss: 0.6013
Epoch 1/1... Discriminator Loss: 1.4308... Generator Loss: 0.5963
Epoch 1/1... Discriminator Loss: 1.0420... Generator Loss: 1.1049
Epoch 1/1... Discriminator Loss: 1.5334... Generator Loss: 0.5061

png

Epoch 1/1... Discriminator Loss: 2.4393... Generator Loss: 3.5009
Epoch 1/1... Discriminator Loss: 1.3208... Generator Loss: 0.7757
Epoch 1/1... Discriminator Loss: 1.3835... Generator Loss: 0.6689
Epoch 1/1... Discriminator Loss: 1.1176... Generator Loss: 0.9044
Epoch 1/1... Discriminator Loss: 1.1363... Generator Loss: 1.6484
Epoch 1/1... Discriminator Loss: 1.0346... Generator Loss: 1.6511
Epoch 1/1... Discriminator Loss: 1.0575... Generator Loss: 1.4003
Epoch 1/1... Discriminator Loss: 1.2227... Generator Loss: 1.5416
Epoch 1/1... Discriminator Loss: 1.3300... Generator Loss: 0.7293
Epoch 1/1... Discriminator Loss: 1.7233... Generator Loss: 2.1256

png

Epoch 1/1... Discriminator Loss: 1.1216... Generator Loss: 1.2158
Epoch 1/1... Discriminator Loss: 1.2921... Generator Loss: 1.4218
Epoch 1/1... Discriminator Loss: 1.2042... Generator Loss: 1.2719
Epoch 1/1... Discriminator Loss: 1.1837... Generator Loss: 0.8664
Epoch 1/1... Discriminator Loss: 2.2012... Generator Loss: 0.2531
Epoch 1/1... Discriminator Loss: 1.0234... Generator Loss: 0.9543
Epoch 1/1... Discriminator Loss: 1.1222... Generator Loss: 1.3613
Epoch 1/1... Discriminator Loss: 1.1693... Generator Loss: 1.2244
Epoch 1/1... Discriminator Loss: 1.1226... Generator Loss: 0.9946
Epoch 1/1... Discriminator Loss: 1.1872... Generator Loss: 0.9885

png

Epoch 1/1... Discriminator Loss: 1.8105... Generator Loss: 0.4197
Epoch 1/1... Discriminator Loss: 1.1215... Generator Loss: 1.0932
Epoch 1/1... Discriminator Loss: 0.9746... Generator Loss: 1.2067
Epoch 1/1... Discriminator Loss: 1.1488... Generator Loss: 0.7006
Epoch 1/1... Discriminator Loss: 1.2598... Generator Loss: 0.6282
Epoch 1/1... Discriminator Loss: 1.1882... Generator Loss: 0.7306
Epoch 1/1... Discriminator Loss: 1.5521... Generator Loss: 2.4704
Epoch 1/1... Discriminator Loss: 1.4303... Generator Loss: 0.6865
Epoch 1/1... Discriminator Loss: 1.1430... Generator Loss: 1.3274
Epoch 1/1... Discriminator Loss: 1.0744... Generator Loss: 1.7194

png

Epoch 1/1... Discriminator Loss: 1.1720... Generator Loss: 0.9640
Epoch 1/1... Discriminator Loss: 1.1455... Generator Loss: 1.1421
Epoch 1/1... Discriminator Loss: 1.1719... Generator Loss: 0.8071
Epoch 1/1... Discriminator Loss: 1.1591... Generator Loss: 1.6390
Epoch 1/1... Discriminator Loss: 2.7530... Generator Loss: 3.1758
Epoch 1/1... Discriminator Loss: 1.1756... Generator Loss: 1.0707
Epoch 1/1... Discriminator Loss: 1.1652... Generator Loss: 0.7919
Epoch 1/1... Discriminator Loss: 1.0631... Generator Loss: 1.0180
Epoch 1/1... Discriminator Loss: 1.3033... Generator Loss: 0.8269
Epoch 1/1... Discriminator Loss: 0.9151... Generator Loss: 1.2596

png

Epoch 1/1... Discriminator Loss: 1.0702... Generator Loss: 1.2079
Epoch 1/1... Discriminator Loss: 1.1871... Generator Loss: 0.6943
Epoch 1/1... Discriminator Loss: 1.1647... Generator Loss: 0.9901
Epoch 1/1... Discriminator Loss: 1.1179... Generator Loss: 1.0453
Epoch 1/1... Discriminator Loss: 1.1129... Generator Loss: 1.2422
Epoch 1/1... Discriminator Loss: 0.9526... Generator Loss: 1.2542
Epoch 1/1... Discriminator Loss: 1.4053... Generator Loss: 0.5359
Epoch 1/1... Discriminator Loss: 1.5096... Generator Loss: 0.5478

生成对抗网络

生成对抗网络

在这篇文章中,我们构建了生成对抗网络( generative adversarial network (GAN)),所用的数据集是MNIST,我们将会生成新的手写体数字。文章来源与udacityGAN是由 Ian Goodfellow于2014年提出的,你可以阅读论文原文,GAN自提出以来就变得非常流行,这儿还有一些其他的成果:

GAN背后的原理是你必须有两个网络,一个生成网络G,一个判别网络D,这两个网络互相竞争。生成网络主要是产生假的数据,然后传到判别网络,判别网络接收真的数据和假的数据,然后对他们进行判断,哪些是真的,哪些是假的。为了欺骗判别网络,那么生成网络就会进行训练,它尽可能生成数据,让判别网络无法区分这生成的数据是假的。判别网络为了区分出真假数据也会进行不断训练。最终,判别网络无法区分出生成网络生成的假数据。

GAN diagram 上图显示的是GAN的一般架构,使用的数据集是MNIST,生成网络生成随机的数据构建图片,然后不断训练,欺骗判别网络。判别网络输出只有0和1,0表示一张假的图片,1表示一张真的图片。

%matplotlib inline

import pickle as pkl
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data')
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST_data\train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz

模型输入

首先我们需要创建两个输入,一个用于生成器inputs_z,一个用于判别器inouts_real

def model_inputs(real_dim, z_dim):
    inputs_real = tf.placeholder(tf.float32, (None, real_dim) , name ='input_real')
    inputs_z = tf.placeholder(tf.float32, (None, z_dim) ,name='input_z')
    
    return inputs_real, inputs_z

生成网络

GAN Network

为了更具有一般性,我们使用了一个隐藏层,激活函数使用了leaky ReLU,反向传播时可以流过该层。

变量范围

我们使用tf.variable_scope是为了区分开生成器的所有变量与判别器的所有变量,这有利于后面对两个网络进行分开训练。

我们可以使用 tf.name_scope来设置变量的名字,关键字reuse可以告诉TensorFlow图可以重复使用这些变量,而不是重新创建新的变量。

with tf.variable_scope('scope_name', reuse=False):
   

the TensorFlow documentation variable_scope

Leaky ReLU

TensorFlow并没有直接提供leaky ReLU激活函数,所以我们得自己创建。leaky ReLU对于大于0的直接输出,小于0的输出一个alpha * x的值。

Tanh Output

生成器使用的输出层激活函数Tanh的性能会更好,所以我们需要对图像数据缩放到-1到1,而不是0到1.

def generator(z, out_dim, n_units=128, reuse=False,  alpha=0.01):
    ''' Build the generator network.
    
        Arguments
        ---------
        z : Input tensor for the generator
        out_dim : Shape of the generator output
        n_units : Number of units in hidden layer
        reuse : Reuse the variables with tf.variable_scope
        alpha : leak parameter for leaky ReLU
        
        Returns
        -------
        out: 
    '''
    with tf.variable_scope('generator', reuse=reuse): # finish this
        # Hidden layer
        h1 = tf.layers.dense(z, n_units, activation=None)
        # Leaky ReLU
        h1 = tf.maximum(h1, h1 * alpha)
        
        # Logits and tanh output
        logits = tf.layers.dense(h1, out_dim, activation=None)
        out = tf.tanh(logits)
        
        return out

判别器

判别器与生成器的网络架构很相似,只是输出层不一样,因为判别器输出是0和1,所以使用的激活函数是sigmoid

def discriminator(x, n_units=128, reuse=False, alpha=0.01):
    ''' Build the discriminator network.
    
        Arguments
        ---------
        x : Input tensor for the discriminator
        n_units: Number of units in hidden layer
        reuse : Reuse the variables with tf.variable_scope
        alpha : leak parameter for leaky ReLU
        
        Returns
        -------
        out, logits: 
    '''
    with tf.variable_scope('discriminator',reuse=reuse): # finish this
        # Hidden layer
        h1 = tf.layers.dense(x, n_units, activation=None)
        # Leaky ReLU
        h1 = tf.maximum(h1, h1 * alpha)
        
        logits = tf.layers.dense(h1, 1, activation=None)
        out = tf.sigmoid(logits)
        
        return out, logits

超参数

# Size of input image to discriminator
input_size = 784 # 28x28 MNIST images flattened
# Size of latent vector to generator
z_size = 100
# Sizes of hidden layers in generator and discriminator
g_hidden_size = 128
d_hidden_size = 128
# Leak factor for leaky ReLU
alpha = 0.01
# Label smoothing 
smooth = 0.1

构建网络

现在我们可以使用上面的函数来构建我们的网络了,从 model_inputs得到输入input_real, input_z,然后创建生成器generator(input_z, input_size),然后创建判别器,注意这儿,我们需要创建两个判别器,一个接收真的数据。一个接收假的数据,但是他们使用的权重是一样的,所以reuse=True

tf.reset_default_graph()
# Create our input placeholders
input_real, input_z = model_inputs(input_size, z_size)

# Generator network here
g_model = generator(input_z, input_size, n_units=g_hidden_size, alpha=alpha)
# g_model is the generator output

# Disriminator network here
d_model_real, d_logits_real = discriminator(input_real, n_units=d_hidden_size, alpha=alpha)
d_model_fake, d_logits_fake = discriminator(g_model, reuse=True , n_units = d_hidden_size, alpha=alpha)

生成器与判别器的损失

我们需要计算函数的损失,对于判别器的损失由来个来源,一个是真的数据产生的损失,一个是假的数据产生的损失, d_loss = d_loss_real + d_loss_fake.

tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))

logits就是判别器的logits的输出值,labels对于真的数据我们希望是1,对于假的数据我们希望是0。为了让网络具有更好的泛化能力,我们进行了平滑操作 labels = tf.ones_like(tensor) * (1 - smooth)

对于生成器的损失,logits是判别器的假的数据logits输出值,因为生成器本来产生的就是假数据,但是labels的值为1,因为我们希望生成器生成的家数据是真数据,用于欺骗判别器。

# Calculate losses

d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_logits_real,
                                                                     labels = tf.ones_like(d_logits_real) * (1 - smooth)))

d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_logits_fake,
                                                                     labels = tf.zeros_like(d_logits_fake)))

d_loss = d_loss_real + d_loss_fake

g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = d_logits_fake,
                                                                     labels = tf.ones_like(d_logits_fake)))

优化

生成器的参数与判别器的参数的训练我们需要分开进行。各自降低两个网络的损失。

# Optimizers
learning_rate = 0.002

# Get the trainable_variables, split into G and D parts
t_vars = tf.trainable_variables()
g_vars = [var for var in t_vars if var.name.startswith('generator')]
d_vars = [var for var in t_vars if var.name.startswith('discriminator')]

d_train_opt = tf.train.AdamOptimizer(learning_rate).minimize(d_loss, var_list=d_vars)
g_train_opt = tf.train.AdamOptimizer(learning_rate).minimize(g_loss, var_list=g_vars)

Training

batch_size = 100
epochs = 100
samples = []
losses = []
# Only save generator variables
saver = tf.train.Saver(var_list=g_vars)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for e in range(epochs):
        for ii in range(mnist.train.num_examples//batch_size):
            batch = mnist.train.next_batch(batch_size)
            
            # Get images, reshape and rescale to pass to D
            batch_images = batch[0].reshape((batch_size, 784))
            batch_images = batch_images*2 - 1 #scale to -1,1
            
            # Sample random noise for G
            batch_z = np.random.uniform(-1, 1, size=(batch_size, z_size))
            
            # Run optimizers
            _ = sess.run(d_train_opt, feed_dict={input_real: batch_images, input_z: batch_z})
            _ = sess.run(g_train_opt, feed_dict={input_z: batch_z})
        
        # At the end of each epoch, get the losses and print them out
        train_loss_d = sess.run(d_loss, {input_z: batch_z, input_real: batch_images})
        train_loss_g = g_loss.eval({input_z: batch_z})
            
        print("Epoch {}/{}...".format(e+1, epochs),
              "Discriminator Loss: {:.4f}...".format(train_loss_d),
              "Generator Loss: {:.4f}".format(train_loss_g))    
        # Save losses to view after training
        losses.append((train_loss_d, train_loss_g))
        
        # Sample from generator as we're training for viewing afterwards
        sample_z = np.random.uniform(-1, 1, size=(16, z_size))
        gen_samples = sess.run(
                       generator(input_z, input_size, n_units=g_hidden_size, reuse=True, alpha=alpha),
                       feed_dict={input_z: sample_z})
        samples.append(gen_samples)
        saver.save(sess, './checkpoints/generator.ckpt')

# Save training generator samples
with open('train_samples.pkl', 'wb') as f:
    pkl.dump(samples, f)
Epoch 1/100... Discriminator Loss: 0.3665... Generator Loss: 3.5270
Epoch 2/100... Discriminator Loss: 0.3890... Generator Loss: 3.8497
Epoch 3/100... Discriminator Loss: 0.3980... Generator Loss: 4.3148
Epoch 4/100... Discriminator Loss: 2.5614... Generator Loss: 3.9421
Epoch 5/100... Discriminator Loss: 0.6711... Generator Loss: 4.3231
Epoch 6/100... Discriminator Loss: 1.6751... Generator Loss: 1.2985
Epoch 7/100... Discriminator Loss: 0.9973... Generator Loss: 1.7754
Epoch 8/100... Discriminator Loss: 2.1265... Generator Loss: 1.9723
Epoch 9/100... Discriminator Loss: 1.1451... Generator Loss: 1.9348
Epoch 10/100... Discriminator Loss: 1.7930... Generator Loss: 3.1384
Epoch 11/100... Discriminator Loss: 1.7409... Generator Loss: 1.8317
Epoch 12/100... Discriminator Loss: 2.7053... Generator Loss: 1.4636
Epoch 13/100... Discriminator Loss: 1.0681... Generator Loss: 3.5390
Epoch 14/100... Discriminator Loss: 1.6029... Generator Loss: 1.2599
Epoch 15/100... Discriminator Loss: 1.2468... Generator Loss: 1.6186
Epoch 16/100... Discriminator Loss: 1.0815... Generator Loss: 1.8498
Epoch 17/100... Discriminator Loss: 0.9844... Generator Loss: 1.2992
Epoch 18/100... Discriminator Loss: 0.9230... Generator Loss: 2.1901
Epoch 19/100... Discriminator Loss: 0.8258... Generator Loss: 3.2655
Epoch 20/100... Discriminator Loss: 0.8584... Generator Loss: 2.1915
Epoch 21/100... Discriminator Loss: 0.6526... Generator Loss: 2.5069
Epoch 22/100... Discriminator Loss: 0.8377... Generator Loss: 2.4397
Epoch 23/100... Discriminator Loss: 0.7968... Generator Loss: 2.3466
Epoch 24/100... Discriminator Loss: 0.9963... Generator Loss: 2.2792
Epoch 25/100... Discriminator Loss: 1.1942... Generator Loss: 1.7112
Epoch 26/100... Discriminator Loss: 0.8386... Generator Loss: 1.6728
Epoch 27/100... Discriminator Loss: 0.9020... Generator Loss: 2.4640
Epoch 28/100... Discriminator Loss: 0.9796... Generator Loss: 2.1753
Epoch 29/100... Discriminator Loss: 0.9199... Generator Loss: 2.5527
Epoch 30/100... Discriminator Loss: 1.0509... Generator Loss: 1.8442
Epoch 31/100... Discriminator Loss: 0.7627... Generator Loss: 3.0345
Epoch 32/100... Discriminator Loss: 1.1164... Generator Loss: 1.5489
Epoch 33/100... Discriminator Loss: 0.8950... Generator Loss: 2.3809
Epoch 34/100... Discriminator Loss: 1.1739... Generator Loss: 2.0350
Epoch 35/100... Discriminator Loss: 0.8261... Generator Loss: 2.5511
Epoch 36/100... Discriminator Loss: 1.1356... Generator Loss: 1.9123
Epoch 37/100... Discriminator Loss: 0.7850... Generator Loss: 2.0336
Epoch 38/100... Discriminator Loss: 1.3770... Generator Loss: 1.8081
Epoch 39/100... Discriminator Loss: 1.3912... Generator Loss: 1.3591
Epoch 40/100... Discriminator Loss: 1.1284... Generator Loss: 1.4507
Epoch 41/100... Discriminator Loss: 0.8900... Generator Loss: 1.8905
Epoch 42/100... Discriminator Loss: 0.9838... Generator Loss: 1.8828
Epoch 43/100... Discriminator Loss: 0.9963... Generator Loss: 1.9200
Epoch 44/100... Discriminator Loss: 1.4808... Generator Loss: 1.5501
Epoch 45/100... Discriminator Loss: 0.9672... Generator Loss: 1.8741
Epoch 46/100... Discriminator Loss: 1.0026... Generator Loss: 1.8940
Epoch 47/100... Discriminator Loss: 1.0286... Generator Loss: 1.5216
Epoch 48/100... Discriminator Loss: 1.2535... Generator Loss: 1.4766
Epoch 49/100... Discriminator Loss: 1.0187... Generator Loss: 1.3388
Epoch 50/100... Discriminator Loss: 1.0499... Generator Loss: 1.6826
Epoch 51/100... Discriminator Loss: 1.0148... Generator Loss: 1.9221
Epoch 52/100... Discriminator Loss: 1.1113... Generator Loss: 1.9696
Epoch 53/100... Discriminator Loss: 0.9164... Generator Loss: 1.9217
Epoch 54/100... Discriminator Loss: 1.0550... Generator Loss: 1.7317
Epoch 55/100... Discriminator Loss: 1.3500... Generator Loss: 1.1638
Epoch 56/100... Discriminator Loss: 0.9911... Generator Loss: 1.5880
Epoch 57/100... Discriminator Loss: 0.8094... Generator Loss: 2.0780
Epoch 58/100... Discriminator Loss: 0.9349... Generator Loss: 1.9257
Epoch 59/100... Discriminator Loss: 0.8955... Generator Loss: 2.5756
Epoch 60/100... Discriminator Loss: 0.8442... Generator Loss: 2.3068
Epoch 61/100... Discriminator Loss: 0.8275... Generator Loss: 2.0744
Epoch 62/100... Discriminator Loss: 0.9792... Generator Loss: 1.4697
Epoch 63/100... Discriminator Loss: 1.1776... Generator Loss: 1.3126
Epoch 64/100... Discriminator Loss: 0.8305... Generator Loss: 2.4137
Epoch 65/100... Discriminator Loss: 0.9313... Generator Loss: 2.0773
Epoch 66/100... Discriminator Loss: 0.8766... Generator Loss: 3.3415
Epoch 67/100... Discriminator Loss: 0.8594... Generator Loss: 2.1336
Epoch 68/100... Discriminator Loss: 1.1959... Generator Loss: 1.6913
Epoch 69/100... Discriminator Loss: 1.0146... Generator Loss: 2.2956
Epoch 70/100... Discriminator Loss: 0.9191... Generator Loss: 2.0312
Epoch 71/100... Discriminator Loss: 0.9018... Generator Loss: 2.0220
Epoch 72/100... Discriminator Loss: 1.1151... Generator Loss: 1.4409
Epoch 73/100... Discriminator Loss: 1.0630... Generator Loss: 1.5294
Epoch 74/100... Discriminator Loss: 0.9483... Generator Loss: 1.8939
Epoch 75/100... Discriminator Loss: 1.0796... Generator Loss: 1.4881
Epoch 76/100... Discriminator Loss: 1.0368... Generator Loss: 1.5860
Epoch 77/100... Discriminator Loss: 0.9743... Generator Loss: 1.7110
Epoch 78/100... Discriminator Loss: 0.9401... Generator Loss: 1.9215
Epoch 79/100... Discriminator Loss: 1.1534... Generator Loss: 1.6998
Epoch 80/100... Discriminator Loss: 0.9709... Generator Loss: 1.7046
Epoch 81/100... Discriminator Loss: 0.8844... Generator Loss: 1.7949
Epoch 82/100... Discriminator Loss: 1.0132... Generator Loss: 2.1153
Epoch 83/100... Discriminator Loss: 0.9493... Generator Loss: 1.6879
Epoch 84/100... Discriminator Loss: 0.9492... Generator Loss: 1.5327
Epoch 85/100... Discriminator Loss: 1.1477... Generator Loss: 1.4398
Epoch 86/100... Discriminator Loss: 0.9071... Generator Loss: 1.9815
Epoch 87/100... Discriminator Loss: 1.1913... Generator Loss: 1.4262
Epoch 88/100... Discriminator Loss: 1.0941... Generator Loss: 1.8383
Epoch 89/100... Discriminator Loss: 0.8030... Generator Loss: 1.9573
Epoch 90/100... Discriminator Loss: 1.0562... Generator Loss: 2.1593
Epoch 91/100... Discriminator Loss: 1.0917... Generator Loss: 1.8404
Epoch 92/100... Discriminator Loss: 1.0762... Generator Loss: 1.3095
Epoch 93/100... Discriminator Loss: 0.9516... Generator Loss: 1.8261
Epoch 94/100... Discriminator Loss: 0.9504... Generator Loss: 2.1033
Epoch 95/100... Discriminator Loss: 1.0374... Generator Loss: 1.8829
Epoch 96/100... Discriminator Loss: 0.8951... Generator Loss: 1.9297
Epoch 97/100... Discriminator Loss: 1.1448... Generator Loss: 2.1090
Epoch 98/100... Discriminator Loss: 0.9450... Generator Loss: 1.6973
Epoch 99/100... Discriminator Loss: 1.1092... Generator Loss: 1.5661
Epoch 100/100... Discriminator Loss: 0.9818... Generator Loss: 1.9050

训练损失

参看训练过程中,生成器与判别器的损失。

%matplotlib inline

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Discriminator')
plt.plot(losses.T[1], label='Generator')
plt.title("Training Losses")
plt.legend()
<matplotlib.legend.Legend at 0x11c72f28>

png

参看训练中的样本

def view_samples(epoch, samples):
    fig, axes = plt.subplots(figsize=(7,7), nrows=4, ncols=4, sharey=True, sharex=True)
    for ax, img in zip(axes.flatten(), samples[epoch]):
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        im = ax.imshow(img.reshape((28,28)), cmap='Greys_r')
    
    return fig, axes
# Load samples from generator taken while training
with open('train_samples.pkl', 'rb') as f:
    samples = pkl.load(f)
_ = view_samples(-1, samples)

png

生成器在训练过程中产出的样本

rows, cols = 10, 6
fig, axes = plt.subplots(figsize=(7,12), nrows=rows, ncols=cols, sharex=True, sharey=True)

for sample, ax_row in zip(samples[::int(len(samples)/rows)], axes):
    for img, ax in zip(sample[::int(len(sample)/cols)], ax_row):
        ax.imshow(img.reshape((28,28)), cmap='Greys_r')
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)

png

刚开始生成的都是噪声,后面逐渐改善了,可以产生一些数字图片。

从生成器中采样

从刚才训练好的生成器中,我们随机生成数字,看看结果如何

saver = tf.train.Saver(var_list=g_vars)
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    sample_z = np.random.uniform(-1, 1, size=(16, z_size))
    gen_samples = sess.run(
                   generator(input_z, input_size, n_units=g_hidden_size, reuse=True, alpha=alpha),
                   feed_dict={input_z: sample_z})
view_samples(0, [gen_samples])
(<matplotlib.figure.Figure at 0x11ae25c0>,
 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000014904BE0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x00000000148216A0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000014338630>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x00000000143148D0>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x00000000143220F0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000019F5E940>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000019F716D8>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000014B6D470>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000014BE01D0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000014BFC630>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000015062390>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x00000000150870F0>],
        [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000015093DD8>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000014C05C18>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000019E53EF0>,
         <matplotlib.axes._subplots.AxesSubplot object at 0x0000000019E79DA0>]], dtype=object))

png

单词嵌入

Skip-gram word2vec

这篇文章主要是基于skip-gram 架构,使用TensorFlow来实现单词嵌入,在自然语言处理或机器翻译中是很有效的。文章来源于udacity

参考文章

单词嵌入

当你处理文本中的单词时,你最终会有成千上万的类要预测,每个单词一个。使用one-hot编码是低效的,其中只有一个是1,而其他都是0。尤其是进行矩阵运算时,效率就更低了。

one-hot encodings 为了解决这个问题和提高网络的效率,我们使用单词嵌入。所谓的嵌入其实就是一个全连接操作,我们把这一层叫做嵌入层,其中的权重叫做嵌入权重。

lookup 我们并不进行矩阵乘法操作,而是将权重矩阵作为一个查找表。将所有要输入的单词编码为一个整数,比如heart被编码为958,mind被编码为18094。然后获取单词heart所对应的隐藏层的值,也就是嵌入矩阵的第958行,那么这一行所对应的数据就是这个单词所包含的信息。这个处理过程叫做embedding lookup ,隐藏单元的个数叫做 embedding dimension。当然,这个权重系数与其他权重一样也会被网络进行训练。

png

Word2Vec

通过一个矢量来表示一个单词的方法叫做Word2Vec,这种方式是非常有效的,这个矢量包含了单词的语义信息。一个单词会出现在相似的上下文中,比如black,white,red它们的矢量形式将会非常接近。实现word2vec有两种结构,一种是CBOW(多对一),一种是Skip-gram(一对多) 在这次的实验中,我们使用的是Skip-gram结构,它比CBOW结构更有效率。我们传入一个单词,想要得到的输出是与这个单词相近的多个单词。接下来,我们将会构建网络并进行训练。

import time

import numpy as np
import tensorflow as tf

import utils

此次,我们所使用的数据集是text8 dataset

from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
import zipfile

dataset_folder_path = 'data'
dataset_filename = 'text8.zip'
dataset_name = 'Text8 Dataset'

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile(dataset_filename):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc=dataset_name) as pbar:
        urlretrieve(
            'http://mattmahoney.net/dc/text8.zip',
            dataset_filename,
            pbar.hook)

if not isdir(dataset_folder_path):
    with zipfile.ZipFile(dataset_filename) as zip_ref:
        zip_ref.extractall(dataset_folder_path)
        
with open('data/text8') as f:
    text = f.read()

Preprocessing

在这里,将修改文本以使训练更容易。“预处理”功能将所有标点符号转换为标记,比如.将变为<PERIOD> 。同时还将删除数据集中显示五次或更少次数的单词。这将大大减少由于数据中的噪声引起的问题,并提高矢量表示的质量。当然这些你都可以直接实现。

words = utils.preprocess(text)
print(words[:30])
['anarchism', 'originated', 'as', 'a', 'term', 'of', 'abuse', 'first', 'used', 'against', 'early', 'working', 'class', 'radicals', 'including', 'the', 'diggers', 'of', 'the', 'english', 'revolution', 'and', 'the', 'sans', 'culottes', 'of', 'the', 'french', 'revolution', 'whilst']
print("Total words: {}".format(len(words)))
print("Unique words: {}".format(len(set(words))))
Total words: 16680599
Unique words: 63641

接下来,将创建两个字典,用于表示单词所对应的整数和整数所对应的单词。字典中的顺序是降序的,也就是说,出现最高频率的单词the的整数值是0,次高频率的整数值是1。int_words表示全部的words对应的整数值。

vocab_to_int, int_to_vocab = utils.create_lookup_tables(words)
int_words = [vocab_to_int[word] for word in words]
print(vocab_to_int['the'])
print(int_to_vocab[0])
print(int_words[0])
print(words[0])
print(vocab_to_int['anarchism'])
print(len(int_words))
0
the
5242
anarchism
5242
16680599

子采样

有些单词的出现并不能提供有用的上下文信息,比如the,of,for,如果我们忽略了这些单词,将可以有效的减少数据集中的噪声,提高网络的训练速度和更好的矢量表示。一个单词被忽略的概率是:

png

其中t是一个阈值,f(w)是单词出现的频率

from collections import Counter
import random

threshold = 1e-5
word_counts = Counter(int_words)
total_count = len(int_words)
freqs = {word: count/total_count for word, count in word_counts.items()}
p_drop = {word: 1 - np.sqrt(threshold/freqs[word]) for word in word_counts}
train_words = [word for word in int_words if random.random() < (1 - p_drop[word])]
print(train_words[:10])
print(len(train_words))
[5242, 3080, 127, 10586, 27770, 15175, 58343, 854, 3581, 10768]
4629165

创建批次

现在,我们需要以恰当的形式输入到网络中,我们使用的是Skip-gram架构,即对于文本中的每个单词,我们想要抓取它周围的好几个词,这个大小我们设置为C

你可以参考: Mikolov et al.:

“Since the more distant words are usually less related to the current word than those close to it, we give less weight to the distant words by sampling less from those words in our training examples… If we choose $C = 5$, for each training word we will select randomly a number $R$ in range $< 1; C >$, and then use $R$ words from history and $R$ words from the future of the current word as correct labels.”

def get_target(words, idx, window_size=5):
    ''' Get a list of words in a window around an index. '''
    
    R = np.random.randint(1, window_size+1)
    start = idx - R if (idx - R) > 0 else 0
    stop = idx + R
    target_words = set(words[start:idx] + words[idx+1:stop+1])
    
    return list(target_words)
创建批次用于网络的输入 
def get_batches(words, batch_size, window_size=5):
    ''' Create a generator of word batches as a tuple (inputs, targets) '''
    
    n_batches = len(words)//batch_size
    
    # only full batches
    words = words[:n_batches*batch_size]
    
    for idx in range(0, len(words), batch_size):
        x, y = [], []
        batch = words[idx:idx+batch_size]
        for ii in range(len(batch)):
            batch_x = batch[ii]
            batch_y = get_target(batch, ii, window_size)
            y.extend(batch_y)
            x.extend([batch_x]*len(batch_y))
        yield x, y
    
test_word=[1,2,3,4,5,6,7,8,9]
test_1 = get_batches(test_word,4)
x ,y = next(test_1)
print(x)
print(np.array(y)[:,None])
[1, 1, 1, 2, 2, 3, 3, 3, 4]
[[2]
 [3]
 [4]
 [1]
 [3]
 [1]
 [2]
 [4]
 [3]]

构建图

Chris McCormick’s blog博客中的网络架构: embedding_network

输入单词以独立热编码的形式进行输入,然后进过隐藏层,再进过softmax层作为预测。而我们想要训练隐藏层的权重矩阵来找到最有效的单词表示,所以我们会忽略输出层,我们并不做预测,只是想得到权重矩阵。

train_graph = tf.Graph()
with train_graph.as_default():
    inputs = tf.placeholder(tf.int32, [None], name='inputs')
    labels = tf.placeholder(tf.int32, [None, None], name='labels')

嵌入

嵌入矩阵的大小就是所有单词的大小,比如,你有10,000个单词和300个隐藏单元(隐藏单元的个数,自己设置就好),那么嵌入矩阵的大小就是10000 * 300.

print(len(int_to_vocab))
63641
n_vocab = len(int_to_vocab)
n_embedding = 200 # Number of embedding features 
with train_graph.as_default():
    embedding = tf.Variable(tf.random_uniform((n_vocab, n_embedding), -1, 1))# 创建权重矩阵,并随机初始化
    embed = tf.nn.embedding_lookup(embedding, inputs)#得到word2vec

负采样

在进行更新的时候,我们更新全部正确标签的值,但是只更新少量不正确的值.如何选取少量不正确的值的方式叫做负采样 “negative sampling”. TensorFlow提供了这样的方法 tf.nn.sampled_softmax_loss.

# Number of negative labels to sample
n_sampled = 100
with train_graph.as_default():
    softmax_w = tf.Variable(tf.truncated_normal((n_vocab, n_embedding), stddev=0.1))
    softmax_b = tf.Variable(tf.zeros(n_vocab))
    
    # Calculate the loss using negative sampling
    loss = tf.nn.sampled_softmax_loss(softmax_w, softmax_b, 
                                      labels, embed,
                                      n_sampled, n_vocab)
    
    cost = tf.reduce_mean(loss)
    optimizer = tf.train.AdamOptimizer().minimize(cost)

验证

我们选取常用的与不常用的单词,单词所包含的信息相近那么他们也就靠的的很近。

with train_graph.as_default():
    ## From Thushan Ganegedara's implementation
    valid_size = 16 # Random set of words to evaluate similarity on.
    valid_window = 100
    # pick 8 samples from (0,100) and (1000,1100) each ranges. lower id implies more frequent 
    valid_examples = np.array(random.sample(range(valid_window), valid_size//2))
    valid_examples = np.append(valid_examples, 
                               random.sample(range(1000,1000+valid_window), valid_size//2))

    valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
    
    # We use the cosine distance:
    norm = tf.sqrt(tf.reduce_sum(tf.square(embedding), 1, keep_dims=True))
    normalized_embedding = embedding / norm
    valid_embedding = tf.nn.embedding_lookup(normalized_embedding, valid_dataset)
    similarity = tf.matmul(valid_embedding, tf.transpose(normalized_embedding))
# If the checkpoints directory doesn't exist:
!mkdir checkpoints
epochs = 10
batch_size = 1000
window_size = 10

with train_graph.as_default():
    saver = tf.train.Saver()

with tf.Session(graph=train_graph) as sess:
    iteration = 1
    loss = 0
    sess.run(tf.global_variables_initializer())

    for e in range(1, epochs+1):
        batches = get_batches(train_words, batch_size, window_size)
        start = time.time()
        for x, y in batches: 
            
            feed = {inputs: x,
                    labels: np.array(y)[:, None]}
            train_loss, _ = sess.run([cost, optimizer], feed_dict=feed)
            
            loss += train_loss
            
            if iteration % 100 == 0: 
                end = time.time()
                print("Epoch {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Avg. Training loss: {:.4f}".format(loss/100),
                      "{:.4f} sec/batch".format((end-start)/100))
                loss = 0
                start = time.time()
            
            if iteration % 1000 == 0:
                # note that this is expensive (~20% slowdown if computed every 500 steps)
                sim = similarity.eval()
                for i in range(valid_size):
                    valid_word = int_to_vocab[valid_examples[i]]
                    top_k = 8 # number of nearest neighbors
                    nearest = (-sim[i, :]).argsort()[1:top_k+1]
                    log = 'Nearest to %s:' % valid_word
                    for k in range(top_k):
                        close_word = int_to_vocab[nearest[k]]
                        log = '%s %s,' % (log, close_word)
                    print(log)
            
            iteration += 1
    save_path = saver.save(sess, "checkpoints/text8.ckpt")
    embed_mat = sess.run(normalized_embedding)
with train_graph.as_default():
    saver = tf.train.Saver()

with tf.Session(graph=train_graph) as sess:
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    embed_mat = sess.run(embedding)

单词向量可视化

我们使用T-SNE来可视化我们的高维数据,T-SNE可以把高维变成二维同时保留信息。参考博文: this post from Christopher Olah

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
viz_words = 500
tsne = TSNE()
embed_tsne = tsne.fit_transform(embed_mat[:viz_words, :])
fig, ax = plt.subplots(figsize=(14, 14))
for idx in range(viz_words):
    plt.scatter(*embed_tsne[idx, :], color='steelblue')
    plt.annotate(int_to_vocab[idx], (embed_tsne[idx, 0], embed_tsne[idx, 1]), alpha=0.7)

png