优秀的编程知识分享平台

网站首页 > 技术文章 正文

InfoGAN模型生成具有可控因素的手写数字图像

nanyue 2024-08-05 20:12:49 技术文章 6 ℃

#头条创作挑战赛#

当使用Keras实现InfoGAN模型生成具有可控因素的手写数字图像时,需要按照以下步骤进行操作。

首先,导入所需的库和模块:

from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, concatenate
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt

接下来,定义生成器模型和判别器模型:

def build_generator(latent_dim, num_classes):

    model = Sequential()

    model.add(Dense(128 * 7 * 7, activation="relu", input_dim=latent_dim))
    model.add(Reshape((7, 7, 128)))
    model.add(UpSampling2D())
    model.add(Conv2D(128, kernel_size=3, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(UpSampling2D())
    model.add(Conv2D(64, kernel_size=3, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(Conv2D(1, kernel_size=3, padding="same"))
    model.add(Activation("tanh"))

    model.summary()

    noise = Input(shape=(latent_dim,))
    label = Input(shape=(num_classes,))
    img = model(noise)

    return Model([noise, label], img)


def build_discriminator(img_shape, num_classes):

    model = Sequential()

    model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=img_shape, padding="same"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, kernel_size=3, strides=2, padding="same"))
    model.add(ZeroPadding2D(padding=((0, 1), (0, 1))))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Conv2D(256, kernel_size=3, strides=1, padding="same"))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Flatten())
    
    model.summary()

    img = Input(shape=img_shape)
    features = model(img)

    validity = Dense(1, activation='sigmoid')(features)
    label = Dense(num_classes, activation='softmax')(features)

    return Model(img, [validity, label])

然后,定义InfoGAN模型:

def build_infogan(generator, discriminator, latent_dim, num_classes):

    generator_input = Input(shape=(latent_dim,))
    generator_label = Input(shape=(num_classes,))
    generator_output = generator([generator_input, generator_label])

    discriminator_output, discriminator_label = discriminator(generator_output)

    return Model([generator_input, generator_label], [discriminator_output, discriminator_label, generator_output])

接下来,加载MNIST数据集并进行预处理:

(X_train, y_train), (_, _) = mnist.load_data()

X_train = X_train / 127.5 - 1.
X_train = np.expand_dims(X_train, axis=3)

num_classes = 10
y_train = y_train.reshape(-1, 1)
y_train = np.eye(num_classes)[y_train]

定义一些超参数:

img_shape = (28, 28, 1)
latent_dim = 62
epochs = 20000
batch_size = 128
sample_interval = 1000

然后,编译生成器、判别器和InfoGAN模型:

generator = build_generator(latent_dim, num_classes)
discriminator = build_discriminator(img_shape, num_classes)
infogan = build_infogan(generator, discriminator, latent_dim, num_classes)

discriminator.compile(loss=['binary_crossentropy', 'categorical_crossentropy'],
                      loss_weights=[1, 1],
                      optimizer=Adam(0.0002, 0.5))
infogan.compile(loss=['binary_crossentropy', 'categorical_crossentropy', 'mse'],
                loss_weights=[1, 1, 0.1],
                optimizer=Adam(0.0002, 0.5))

接下来,定义训练过程:

for epoch in range(epochs):

    # ---------------------
    #  训练判别器
    # ---------------------

    # 从训练集中随机选择一批图像和标签
    idx = np.random.randint(0, X_train.shape[0], batch_size)
    real_imgs = X_train[idx]
    labels = y_train[idx]

    # 生成一批噪声数据和随机标签
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    sampled_labels = np.random.randint(0, num_classes, (batch_size, 1))
    sampled_labels = np.eye(num_classes)[sampled_labels]

    # 使用生成器生成一批假图像
    gen_imgs = generator.predict([noise, sampled_labels])

    # 训练判别器
    d_loss_real = discriminator.train_on_batch(real_imgs, [np.ones((batch_size, 1)), labels])
    d_loss_fake = discriminator.train_on_batch(gen_imgs, [np.zeros((batch_size, 1)), sampled_labels])
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # ---------------------
    #  训练生成器和InfoGAN
    # ---------------------

    # 生成一批噪声数据和随机标签
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    sampled_labels = np.random.randint(0, num_classes, (batch_size, 1))
    sampled_labels = np.eye(num_classes)[sampled_labels]

    # 训练生成器和InfoGAN
    g_loss = infogan.train_on_batch([noise, sampled_labels], [np.ones((batch_size, 1)), sampled_labels, noise])

    # 打印损失值
    print("%d [D loss: %f, acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss[0]))

    # 每隔一段时间保存并输出生成的图像样本
    if epoch % sample_interval == 0:
        r, c = 10, 10
        noise = np.random.normal(0, 1, (r * c, latent_dim))
        sampled_labels = np.array([num for _ in range(r) for num in range(c)])
        sampled_labels = np.eye(num_classes)[sampled_labels]

        gen_imgs = generator.predict([noise, sampled_labels])
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
                axs[i, j].axis('off')
                cnt += 1
        plt.show()
        plt.close()

最后,运行训练过程:

infogan.fit([X_train, y_train], [np.ones((X_train.shape[0], 1)), y_train, X_train],
            epochs=epochs,
            batch_size=batch_size,
            sample_interval=sample_interval)

这是一个简单的使用Keras实现InfoGAN模型生成具有可控因素的手写数字图像的示例。请注意,这只是一个基础的实现,您可以根据需要进行修改和改进。

Tags:

最近发表
标签列表