DCGAN

5 minute read

DCGAN

  • refer to github, YBIGTA
    DCGAN

GAN practice

Generator

generator

Discriminator

Discriminator

understanding DCGAN code review of DCGAN

Convolution

sess = tf.InteractiveSession()
image = np.array([[[[1],[2],[3]],
                   [[4],[5],[6]], 
                   [[7],[8],[9]]]], dtype=np.float32)
print(image.shape)
plt.imshow(image.reshape(3,3), cmap='Greys')

# print("imag:\n", image)
print("image.shape", image.shape)
weight = tf.constant([[[[1.]],[[1.]]],
                      [[[1.]],[[1.]]]])
print("weight.shape", weight.shape)
conv2d = tf.nn.conv2d(image, weight, strides=[1, 1, 1, 1], padding='VALID')
conv2d_img = conv2d.eval()
print("conv2d_img.shape", conv2d_img.shape)
conv2d_img = np.swapaxes(conv2d_img, 0, 3)
for i, one_img in enumerate(conv2d_img):
    print(one_img.reshape(2,2))
    plt.subplot(1,2,i+1), plt.imshow(one_img.reshape(2,2), cmap='gray')

mnist

def mnist():
    X, Y = util.get_mnist()
    X = X.reshape(len(X), 28, 28, 1)
    dim = X.shape[1]
    colors = X.shape[-1]

    # for mnist
    d_sizes = {
    'conv_layers': [(2, 5, 2, False), (64, 5, 2, True)],
    'dense_layers': [(1024, True)],
    }
    g_sizes = {
    'z': 100,
    'projection': 128,
    'bn_after_project': False,
    'conv_layers': [(128, 5, 2, True), (colors, 5, 2, False)],
    'dense_layers': [(1024, True)],
    'output_activation': tf.sigmoid,
    }



    gan = DCGAN(dim, colors, d_sizes, g_sizes)
    gan.fit(X)

DCGAN

self.img_length = img_length  ## 28
self.num_colors = num_colors  ## 1
self.latent_dims = g_sizes['z'] ## 100 

# define the input data
self.X = tf.placeholder( # 28 x 28 x 1
  tf.float32,
  shape=(None, img_length, img_length, num_colors),
  name='X'
)
self.Z = tf.placeholder( # 100 
  tf.float32,
  shape=(None, self.latent_dims),
  name='Z'
)

DCGAN

  • refer comments
class DCGAN:
    def __init__(self, img_length, num_colors, d_sizes, g_sizes):

        # save for later
        self.img_length = img_length # 28 
        self.num_colors = num_colors # 1
        self.latent_dims = g_sizes['z'] # 100

        # define the input data
        # X input placeholder (N,28,28,1)
        self.X = tf.placeholder(
            tf.float32,
            shape=(None, img_length, img_length, num_colors),
            name='X')
        # Z input placeholder (N,100)
        self.Z = tf.placeholder(
            tf.float32, shape=(None, self.latent_dims), name='Z')

        # note: by making batch_sz a placeholder, we can specify a variable
        # number of samples in the FS-conv operation where we are required
        # to pass in output_shape
        # we need only pass in the batch size via feed_dict
        self.batch_sz = tf.placeholder(tf.int32, shape=(), name='batch_sz')

        # build the discriminator
        # input X , d_sizes(conv_layer,dense_layers parameters) returns logits
        logits = self.build_discriminator(self.X, d_sizes)

        # build generator
        # input Z, g_sizes(dense_layers,conv_layers parameters) returns generated_samples
        self.sample_images = self.build_generator(self.Z, g_sizes)

        # get sample logits
        # sample_images from generator is input to the discriminator
        # discriminator returns sample_logits
        with tf.variable_scope("discriminator") as scope:
            scope.reuse_variables()
            sample_logits = self.d_forward(self.sample_images, True)

        # get sample images for test time (batch norm is different)
        # Z input to the generator and generators returns the sample_images_test
        with tf.variable_scope("generator") as scope:
            scope.reuse_variables()
            self.sample_images_test = self.g_forward(
                self.Z, reuse=True, is_training=False)

        # build costs
        # d_cost_real cross entropy cost function is cost function from real input X
        self.d_cost_real = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=logits, labels=tf.ones_like(logits))
        # d_cost_fake cross entropy cost function is from generator output sample_images
        self.d_cost_fake = tf.nn.sigmoid_cross_entropy_with_logits(
            logits=sample_logits, labels=tf.zeros_like(sample_logits))
        
        # total discriminator cost function is d_cost_real + d_cost_fake
        self.d_cost = tf.reduce_mean(self.d_cost_real) + tf.reduce_mean(
            self.d_cost_fake)
        
        # generator cost function is D(G(z))
        # sample_logits from discriminator with sample_images from generator
        self.g_cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=sample_logits, labels=tf.ones_like(sample_logits)))
        # logits is discriminator return from input X
        real_predictions = tf.cast(logits > 0, tf.float32)
        # sample_logits is D(G(z)) 
        fake_predictions = tf.cast(sample_logits < 0, tf.float32)
        num_predictions = 2.0 * BATCH_SIZE
        num_correct = tf.reduce_sum(real_predictions) + tf.reduce_sum(
            fake_predictions)
        self.d_accuracy = num_correct / num_predictions

        # optimizers
        # d_params are discriminator's trainable parameters
        self.d_params = [
            t for t in tf.trainable_variables() if t.name.startswith('d') 
        ]
        # g_params are generator's trainable parameters
        self.g_params = [
            t for t in tf.trainable_variables() if t.name.startswith('g')
        ]
        
        # optimizer parameter for discriminator
        # learning rate, beta, discriminator cost function, trainable parameters
        self.d_train_op = tf.train.AdamOptimizer(
            LEARNING_RATE, beta1=BETA1).minimize(
                self.d_cost, var_list=self.d_params)
        
        # optimizer parameter for generator
        # learning rate, beta, generator cost function, trainable parameters
        self.g_train_op = tf.train.AdamOptimizer(
            LEARNING_RATE, beta1=BETA1).minimize(
                self.g_cost, var_list=self.g_params)

        # show_all_variables()
        # exit()

        # set up session and variables for later
        self.init_op = tf.global_variables_initializer()
        self.sess = tf.InteractiveSession()
        self.sess.run(self.init_op)

    def build_discriminator(self, X, d_sizes):
        with tf.variable_scope("discriminator") as scope:

            # build conv layers
            self.d_convlayers = []
            mi = self.num_colors # 1
            dim = self.img_length # 28
            count = 0
            
            # conv_layers(mi(1),mo(2),batchnorm(False),filtersz(5),stride(2),lreru)
            # -> conv_layers(mi(2),mo(64),batchnorm(True),filtersz(5),stride(2),lreru)
            for mo, filtersz, stride, apply_batch_norm in d_sizes['conv_layers']:  # 2,5,2 , False/True
                # make up a name - used for get_variable
                name = "convlayer_%s" % count
                count += 1
                # (mi:1,mo:2,filtersz:5,stride:2)
                # (mi:2,mo:64,filtersz:5,stride:2)
                layer = ConvLayer(name, mi, mo, apply_batch_norm, filtersz,
                                  stride, lrelu)
                self.d_convlayers.append(layer)
                mi = mo
                print("dim:", dim)
                dim = int(np.ceil(float(dim) / stride))

            # mi(64)*dim(28)*dim(28)
            mi = mi * dim * dim
            # build dense layers
            # dense_layer(mi(64*28*28),mo(1024),batchnorm(True),lrelu)
            self.d_denselayers = []
            for mo, apply_batch_norm in d_sizes['dense_layers']:
                name = "denselayer_%s" % count
                count += 1

                layer = DenseLayer(name, mi, mo, apply_batch_norm, lrelu)
                mi = mo
                self.d_denselayers.append(layer)

            # final logistic layer
            # final dense_layer(mi(1024),1,)
            name = "denselayer_%s" % count
            self.d_finallayer = DenseLayer(name, mi, 1, False, lambda x: x)

            # get the logits
            logits = self.d_forward(X)

            # build the cost later
            return logits

    def d_forward(self, X, reuse=None, is_training=True):
        ...

    def build_generator(self, Z, g_sizes):
        with tf.variable_scope("generator") as scope:

            # determine the size of the data at each step
            dims = [self.img_length] # [28]
            dim = self.img_length # 28
            # dims [28,14,7]
            for _, _, stride, _ in reversed(g_sizes['conv_layers']):
                dim = int(np.ceil(float(dim) / stride))
                dims.append(dim)

            # note: dims is actually backwards
            # the first layer of the generator is actually last
            # so let's reverse it
            # reversed dims [7,14,28]
            dims = list(reversed(dims))
            print("dims:", dims)
            self.g_dims = dims

            # dense layers
            mi = self.latent_dims  # 100
            self.g_denselayers = []
            count = 0
            # dense_layers(mi(100),mo(1024),batchnorm(True))
            for mo, apply_batch_norm in g_sizes['dense_layers']:  # 1024,True
                name = "g_denselayer_%s" % count
                count += 1

                # (mi:100,mo:1024,True) dense layer
                layer = DenseLayer(name, mi, mo, apply_batch_norm)
                self.g_denselayers.append(layer)
                mi = mo

            # final dense layer
            # final dense_layer(mi(1024),mo(128*7*7))
            mo = g_sizes['projection'] * dims[0] * dims[0]
            name = "g_denselayer_%s" % count
            layer = DenseLayer(name, mi, mo, not g_sizes['bn_after_project'])
            self.g_denselayers.append(layer)

            # fs-conv layers
            mi = g_sizes['projection']
            self.g_convlayers = []

            # output may use tanh or sigmoid
            num_relus = len(g_sizes['conv_layers']) - 1
            activation_functions = [tf.nn.relu] * num_relus + [
                g_sizes['output_activation']
            ]
            
            # conv_layer(mi(128),mo(128),outshape(N,14,14),batchnorm(True),filtersz(5),stride(2))
            # -> conv_layer(mi(128),mo(1),outshape(N,28,28),batchnorm(False),filtersz(5),stride(2))
            for i in range(len(g_sizes['conv_layers'])):
                name = "fs_convlayer_%s" % i
                mo, filtersz, stride, apply_batch_norm = g_sizes[
                    'conv_layers'][i]
                f = activation_functions[i]
                output_shape = [self.batch_sz, dims[i + 1], dims[i + 1], mo]
                print("mi:", mi, "mo:", mo, "outp shape:", output_shape)
                layer = FractionallyStridedConvLayer(
                    name, mi, mo, output_shape, apply_batch_norm, filtersz,
                    stride, f)
                self.g_convlayers.append(layer)
                mi = mo

            # get the output
            self.g_sizes = g_sizes
            return self.g_forward(Z)

    def g_forward(self, Z, reuse=None, is_training=True):
        ...
        
    def fit(self, X):
    	...