Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
62 views
in Technique[技术] by (71.8m points)

python - Error in implementation of gradient penalty in WGAN

I am trying to implement the gradient penalty part into the WGAN implementation of DeBlurGAN. I check the already existing implementation on gradient penalty and tried adding it to the deblurGAN implementation. However, I am getting an error in the 'partial gradient penalty loss' part.

The error I get.

InvalidArgumentError: in user code:

    <ipython-input-7-ef8baa4c530e>:25 mix  *
        vgg = VGG16(include_top=False, weights='imagenet', input_shape=image_shape)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/applications/vgg16.py:222 VGG16  **
        model.load_weights(weights_path)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_v1.py:236 load_weights
        return super(Model, self).load_weights(filepath, by_name, skip_mismatch)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:2234 load_weights
        hdf5_format.load_weights_from_hdf5_group(f, self.layers)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/saving/hdf5_format.py:710 load_weights_from_hdf5_group
        K.batch_set_value(weight_value_tuples)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:3731 batch_set_value
        get_session().run(assign_ops, feed_dict=feed_dict)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:644 get_session
        _initialize_variables(session)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:1089 _initialize_variables
        [variables_module.is_variable_initialized(v) for v in candidate_vars])
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:968 run
        run_metadata_ptr)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1191 _run
        feed_dict_tensor, options, run_metadata)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1369 _do_run
        run_metadata)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1394 _do_call
        raise type(e)(node_def, op, message)

    InvalidArgumentError: Node 'training/Adam/gradients/gradients/loss/Discriminator_2_loss/gradients/Discriminator_12/batch_normalization_12/cond_grad/StatelessIf_grad/StatelessIf': Connecting to invalid output 5 of source node 
 loss/Discriminator_2_loss/gradients/Discriminator_12/batch_normalization_12/cond_grad/StatelessIf which has 5 outputs. Try using tf.compat.v1.experimental.output_all_intermediates(True).

Implementation of Gradient penalty part

def gradient_penalty_loss(y_true, y_pred, averaged_samples):
  gradients = K.gradients(y_pred, averaged_samples)[0]
  gradients_sqr = K.square(gradients)
  gradients_sqr_sum = K.sum(gradients_sqr, axis=np.arange(1, len(gradients_sqr.shape)))
  gradient_l2_norm = K.sqrt(gradients_sqr_sum)
  gradient_penalty = K.square(1 - gradient_l2_norm)
  return K.mean(gradient_penalty)


class RandomWeightedAverage(keras.layers.Concatenate):
    def _merge_function(self, inputs):
        weights = K.random_uniform((1, 1, 1, 1))
        return (weights * inputs[0]) + ((1 - weights) * inputs[1])

Implementation of the part that throws the error

def train_multiple_outputs(batch_size, epoch_num, critic_updates=5, train=False, shard_count=0, start=0):
    TRAINING_RATIO = critic_updates
    g = generator_model()
    d = discriminator_model()
    d_on_g = generator_containing_discriminator_multiple_outputs(g, d)

    d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    d_on_g_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    # To resume training from a particular epoch, load weights here
    g_name, d_name = "", ""
    if start:
      pass
    elif shard_count==0:
      g_name = "/content/drive/My Drive/SavedModel/generator_4.h5"
      d_name = "/content/drive/My Drive/SavedModel/discriminator_4.h5" 
    else:  
      g_name = "/content/drive/My Drive/SavedModel/generator_"+ str(shard_count-1) + ".h5"
      d_name = "/content/drive/My Drive/SavedModel/discriminator_"+ str(shard_count-1) + ".h5"

    print("Model Name: " + g_name + " " + d_name)

    if(g_name==""):
      pass
    else:
      g.load_weights(g_name)
      d.load_weights(d_name)
      print("Model loaded")

    #START - WGAN
    d.trainable = True
    real_samples = Input(shape=image_shape)
    generator_input_for_discriminator = Input(shape=image_shape)
    generated_samples_for_discriminator = g(generator_input_for_discriminator) #generated image
    discriminator_output_from_generator = d(generated_samples_for_discriminator) #Discrminator output for gen samples
    discriminator_output_from_real_samples = d(real_samples) #disc op real samples

    averaged_samples = RandomWeightedAverage(1)([real_samples,
                                            generated_samples_for_discriminator])
    averaged_samples_out = d(averaged_samples)
    # Works fine till above. Checked by replacing partial_gp_loss with wasserstien loss in discriminator compile
   
    partial_gp_loss = partial(gradient_penalty_loss,
                          averaged_samples=averaged_samples)
    partial_gp_loss.__name__ = 'gradient_penalty'

    discriminator_model1 = Model(inputs=[real_samples, generator_input_for_discriminator],
                            outputs=[discriminator_output_from_real_samples,
                                     discriminator_output_from_generator,
                                     averaged_samples_out])
    discriminator_model1.compile(optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9),
                            loss=[wasserstein_loss,
                                  wasserstein_loss,
                                  partial_gp_loss])
    # discriminator_model1.summary()
    #END    

    #START - Genrator with Discriminator
    d.trainable = False
    g.trainable = True
    loss = [mix, wasserstein_loss]
    loss_weights = [100, 1]
    d_on_g.compile(optimizer=d_on_g_opt, loss=loss, loss_weights=loss_weights)
    d.trainable = True
    #END

    # This is not used
    output_true_batch, output_false_batch = np.ones((batch_size, 1)), -np.ones((batch_size, 1))

    # DATASET part START
    x_name = "/content/drive/MyDrive/shard/A"+str(shard_count)+".npy"
    y_name = "/content/drive/MyDrive/shard/B"+str(shard_count)+".npy"

    print("Name : " + x_name + " " + y_name)

    x_train = np.load(x_name)
    y_train = np.load(y_name)

    print("Data loaded")
    print(x_train.shape)
    # DATASET part END

    positive_y = np.ones((batch_size, 1), dtype=np.float32)
    negative_y = -positive_y
    dummy_y = np.zeros((batch_size, 1), dtype=np.float32)

    for epoch in tqdm.tqdm(range(epoch_num)):
        permutated_indexes = np.random.permutation(x_train.shape[0])

        d_losses = []
        d_on_g_losses = []
        minibatches_size = batch_size * TRAINING_RATIO
        for i in range(int(x_train.shape[0] // (batch_size * TRAINING_RATIO))):
            batch_indexes = permutated_indexes[i*batch_size:(i+1)*batch_size]
            image_blur_batch = x_train[i * minibatches_size:(i + 1) * minibatches_size]
            image_full_batch = y_train[i * minibatches_size:(i + 1) * minibatches_size]
            generated_images = g.predict(x=image_blur_batch, batch_size=batch_size) #check The BS
            for j in range(critic_updates):

                sharp = image_full_batch[j * batch_size:(j + 1) * batch_size]
                blur = generated_images[j * batch_size:(j + 1) * batch_size]
                d_losses.append(discriminator_model1.train_on_batch([sharp, blur], [positive_y, negative_y, dummy_y]))

            d.trainable = False
            output_true_batch1 = np.ones((critic_updates, 1))
            d_on_g_loss = d_on_g.train_on_batch(image_blur_batch, [image_full_batch, output_true_batch1])
            d_on_g_losses.append(d_on_g_loss)

            d.trainable = True

        print(np.mean(d_losses), np.mean(d_on_g_losses))

    save_all_weights2(d, g, shard_count)

An observation is that if I replace the 'partial_gp_loss' with 'wasserstein_loss' in the training part i.e. not use gradient penalty at all the implementation works fine.

Link to colab notebook.

question from:https://stackoverflow.com/questions/65847461/error-in-implementation-of-gradient-penalty-in-wgan

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)
Waitting for answers

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...