I am trying to implement the gradient penalty part into the WGAN implementation of DeBlurGAN. I check the already existing implementation on gradient penalty and tried adding it to the deblurGAN implementation. However, I am getting an error in the 'partial gradient penalty loss' part.
The error I get.
InvalidArgumentError: in user code:
<ipython-input-7-ef8baa4c530e>:25 mix *
vgg = VGG16(include_top=False, weights='imagenet', input_shape=image_shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/applications/vgg16.py:222 VGG16 **
model.load_weights(weights_path)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_v1.py:236 load_weights
return super(Model, self).load_weights(filepath, by_name, skip_mismatch)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:2234 load_weights
hdf5_format.load_weights_from_hdf5_group(f, self.layers)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/saving/hdf5_format.py:710 load_weights_from_hdf5_group
K.batch_set_value(weight_value_tuples)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:3731 batch_set_value
get_session().run(assign_ops, feed_dict=feed_dict)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:644 get_session
_initialize_variables(session)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:1089 _initialize_variables
[variables_module.is_variable_initialized(v) for v in candidate_vars])
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:968 run
run_metadata_ptr)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1191 _run
feed_dict_tensor, options, run_metadata)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1369 _do_run
run_metadata)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1394 _do_call
raise type(e)(node_def, op, message)
InvalidArgumentError: Node 'training/Adam/gradients/gradients/loss/Discriminator_2_loss/gradients/Discriminator_12/batch_normalization_12/cond_grad/StatelessIf_grad/StatelessIf': Connecting to invalid output 5 of source node
loss/Discriminator_2_loss/gradients/Discriminator_12/batch_normalization_12/cond_grad/StatelessIf which has 5 outputs. Try using tf.compat.v1.experimental.output_all_intermediates(True).
Implementation of Gradient penalty part
def gradient_penalty_loss(y_true, y_pred, averaged_samples):
gradients = K.gradients(y_pred, averaged_samples)[0]
gradients_sqr = K.square(gradients)
gradients_sqr_sum = K.sum(gradients_sqr, axis=np.arange(1, len(gradients_sqr.shape)))
gradient_l2_norm = K.sqrt(gradients_sqr_sum)
gradient_penalty = K.square(1 - gradient_l2_norm)
return K.mean(gradient_penalty)
class RandomWeightedAverage(keras.layers.Concatenate):
def _merge_function(self, inputs):
weights = K.random_uniform((1, 1, 1, 1))
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
Implementation of the part that throws the error
def train_multiple_outputs(batch_size, epoch_num, critic_updates=5, train=False, shard_count=0, start=0):
TRAINING_RATIO = critic_updates
g = generator_model()
d = discriminator_model()
d_on_g = generator_containing_discriminator_multiple_outputs(g, d)
d_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
d_on_g_opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
# To resume training from a particular epoch, load weights here
g_name, d_name = "", ""
if start:
pass
elif shard_count==0:
g_name = "/content/drive/My Drive/SavedModel/generator_4.h5"
d_name = "/content/drive/My Drive/SavedModel/discriminator_4.h5"
else:
g_name = "/content/drive/My Drive/SavedModel/generator_"+ str(shard_count-1) + ".h5"
d_name = "/content/drive/My Drive/SavedModel/discriminator_"+ str(shard_count-1) + ".h5"
print("Model Name: " + g_name + " " + d_name)
if(g_name==""):
pass
else:
g.load_weights(g_name)
d.load_weights(d_name)
print("Model loaded")
#START - WGAN
d.trainable = True
real_samples = Input(shape=image_shape)
generator_input_for_discriminator = Input(shape=image_shape)
generated_samples_for_discriminator = g(generator_input_for_discriminator) #generated image
discriminator_output_from_generator = d(generated_samples_for_discriminator) #Discrminator output for gen samples
discriminator_output_from_real_samples = d(real_samples) #disc op real samples
averaged_samples = RandomWeightedAverage(1)([real_samples,
generated_samples_for_discriminator])
averaged_samples_out = d(averaged_samples)
# Works fine till above. Checked by replacing partial_gp_loss with wasserstien loss in discriminator compile
partial_gp_loss = partial(gradient_penalty_loss,
averaged_samples=averaged_samples)
partial_gp_loss.__name__ = 'gradient_penalty'
discriminator_model1 = Model(inputs=[real_samples, generator_input_for_discriminator],
outputs=[discriminator_output_from_real_samples,
discriminator_output_from_generator,
averaged_samples_out])
discriminator_model1.compile(optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9),
loss=[wasserstein_loss,
wasserstein_loss,
partial_gp_loss])
# discriminator_model1.summary()
#END
#START - Genrator with Discriminator
d.trainable = False
g.trainable = True
loss = [mix, wasserstein_loss]
loss_weights = [100, 1]
d_on_g.compile(optimizer=d_on_g_opt, loss=loss, loss_weights=loss_weights)
d.trainable = True
#END
# This is not used
output_true_batch, output_false_batch = np.ones((batch_size, 1)), -np.ones((batch_size, 1))
# DATASET part START
x_name = "/content/drive/MyDrive/shard/A"+str(shard_count)+".npy"
y_name = "/content/drive/MyDrive/shard/B"+str(shard_count)+".npy"
print("Name : " + x_name + " " + y_name)
x_train = np.load(x_name)
y_train = np.load(y_name)
print("Data loaded")
print(x_train.shape)
# DATASET part END
positive_y = np.ones((batch_size, 1), dtype=np.float32)
negative_y = -positive_y
dummy_y = np.zeros((batch_size, 1), dtype=np.float32)
for epoch in tqdm.tqdm(range(epoch_num)):
permutated_indexes = np.random.permutation(x_train.shape[0])
d_losses = []
d_on_g_losses = []
minibatches_size = batch_size * TRAINING_RATIO
for i in range(int(x_train.shape[0] // (batch_size * TRAINING_RATIO))):
batch_indexes = permutated_indexes[i*batch_size:(i+1)*batch_size]
image_blur_batch = x_train[i * minibatches_size:(i + 1) * minibatches_size]
image_full_batch = y_train[i * minibatches_size:(i + 1) * minibatches_size]
generated_images = g.predict(x=image_blur_batch, batch_size=batch_size) #check The BS
for j in range(critic_updates):
sharp = image_full_batch[j * batch_size:(j + 1) * batch_size]
blur = generated_images[j * batch_size:(j + 1) * batch_size]
d_losses.append(discriminator_model1.train_on_batch([sharp, blur], [positive_y, negative_y, dummy_y]))
d.trainable = False
output_true_batch1 = np.ones((critic_updates, 1))
d_on_g_loss = d_on_g.train_on_batch(image_blur_batch, [image_full_batch, output_true_batch1])
d_on_g_losses.append(d_on_g_loss)
d.trainable = True
print(np.mean(d_losses), np.mean(d_on_g_losses))
save_all_weights2(d, g, shard_count)
An observation is that if I replace the 'partial_gp_loss' with 'wasserstein_loss' in the training part i.e. not use gradient penalty at all the implementation works fine.
Link to colab notebook.
question from:
https://stackoverflow.com/questions/65847461/error-in-implementation-of-gradient-penalty-in-wgan