本文整理汇总了Python中tensorflow.python.ops.clip_ops.global_norm函数的典型用法代码示例。如果您正苦于以下问题:Python global_norm函数的具体用法?Python global_norm怎么用?Python global_norm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了global_norm函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: add_gradients_summaries
def add_gradients_summaries(grads_and_vars):
"""Add summaries to gradients.
Args:
grads_and_vars: A list of gradient to variable pairs (tuples).
Returns:
The list of created summaries.
"""
summaries = []
for grad, var in grads_and_vars:
if grad is not None:
if isinstance(grad, ops.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
summaries.append(
summary.histogram(var.op.name + '_gradient', grad_values))
summaries.append(
summary.scalar(var.op.name + '_gradient_norm',
clip_ops.global_norm([grad_values])))
else:
logging.info('Var %s has no gradient', var.op.name)
return summaries
开发者ID:Albert-Z-Guo,项目名称:tensorflow,代码行数:25,代码来源:training.py
示例2: clip_gradients_by_global_norm
def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
"""Clips gradients of a multitask loss by their global norm.
Ignores all-zero tensors when computing the global norm.
Args:
gradients_variables: a list of pairs (gradient, variable).
clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
Returns:
list: A list of pairs of the same type as gradients_variables,.
fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
"""
gradients, variables = six.moves.zip(*gradients_variables)
def _replace_nonexisting_grad(grad):
if grad is None:
return grad
all_zeros = _is_all_zeros(grad)
return control_flow_ops.cond(all_zeros,
lambda: array_ops.zeros(
[], dtype=dtypes.as_dtype(grad.dtype)),
lambda: grad)
nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm,
use_norm=fixed_global_norm)
return list(six.moves.zip(gradients, variables)), fixed_global_norm
开发者ID:SylChan,项目名称:tensorflow,代码行数:26,代码来源:multitask_optimizer_wrapper.py
示例3: gradient_clipping
def gradient_clipping(grads_and_vars):
"""Internal function for adaptive clipping."""
grads, variables = zip(*grads_and_vars)
norm = clip_ops.global_norm(grads)
max_norm, log_mean = _adaptive_max_norm(norm, std_factor, decay,
global_step, epsilon, name)
# reports the max gradient norm for debugging
if report_summary:
summary.scalar("global_norm/adaptive_max_gradient_norm", max_norm)
# factor will be 1. if norm is smaller than max_norm
factor = array_ops.where(norm < max_norm,
array_ops.ones_like(norm),
math_ops.exp(log_mean) / norm)
if static_max_norm is not None:
factor = math_ops.minimum(static_max_norm / norm, factor)
# apply factor
clipped_grads = []
for grad in grads:
if grad is None:
clipped_grads.append(None)
elif isinstance(grad, ops.IndexedSlices):
clipped_grads.append(
ops.IndexedSlices(grad.values * factor, grad.indices,
grad.dense_shape))
else:
clipped_grads.append(grad * factor)
return list(zip(clipped_grads, variables))
开发者ID:AlbertXiebnu,项目名称:tensorflow,代码行数:34,代码来源:optimizers.py
示例4: test_stable_global_norm_avoids_overflow
def test_stable_global_norm_avoids_overflow(self):
tensors = [array_ops.ones([4]), array_ops.ones([4, 4]) * 1e19, None]
gnorm_is_inf = math_ops.is_inf(clip_ops.global_norm(tensors))
stable_gnorm_is_inf = math_ops.is_inf(
tfgan_losses._numerically_stable_global_norm(tensors))
with self.test_session(use_gpu=True):
self.assertTrue(gnorm_is_inf.eval())
self.assertFalse(stable_gnorm_is_inf.eval())
开发者ID:1000sprites,项目名称:tensorflow,代码行数:9,代码来源:losses_impl_test.py
示例5: test_stable_global_norm_unchanged
def test_stable_global_norm_unchanged(self):
"""Test that preconditioning doesn't change global norm value."""
random_seed.set_random_seed(1234)
tensors = [random_ops.random_uniform([3]*i, -10.0, 10.0) for i in range(6)]
gnorm = clip_ops.global_norm(tensors)
precond_gnorm = tfgan_losses._numerically_stable_global_norm(tensors)
with self.test_session(use_gpu=True) as sess:
for _ in range(10): # spot check closeness on more than one sample.
gnorm_np, precond_gnorm_np = sess.run([gnorm, precond_gnorm])
self.assertNear(gnorm_np, precond_gnorm_np, 1e-5)
开发者ID:1000sprites,项目名称:tensorflow,代码行数:11,代码来源:losses_impl_test.py
示例6: _numerically_stable_global_norm
def _numerically_stable_global_norm(tensor_list):
"""Compute the global norm of a list of Tensors, with improved stability.
The global norm computation sometimes overflows due to the intermediate L2
step. To avoid this, we divide by a cheap-to-compute max over the
matrix elements.
Args:
tensor_list: A list of tensors, or `None`.
Returns:
A scalar tensor with the global norm.
"""
if np.all([x is None for x in tensor_list]):
return 0.0
list_max = math_ops.reduce_max([math_ops.reduce_max(math_ops.abs(x)) for x in
tensor_list if x is not None])
return list_max * clip_ops.global_norm([x / list_max for x in tensor_list
if x is not None])
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:20,代码来源:losses_impl.py
示例7: model_fn
def model_fn(features, labels, mode, params):
"""Model function defining an inpainting estimator."""
batch_size = params['batch_size']
z_shape = [batch_size] + params['z_shape']
add_summaries = params['add_summaries']
input_clip = params['input_clip']
z = variable_scope.get_variable(
name=INPUT_NAME, initializer=random_ops.truncated_normal(z_shape),
constraint=lambda x: clip_ops.clip_by_value(x, -input_clip, input_clip))
generator = functools.partial(generator_fn, mode=mode)
discriminator = functools.partial(discriminator_fn, mode=mode)
gan_model = tfgan_train.gan_model(generator_fn=generator,
discriminator_fn=discriminator,
real_data=labels,
generator_inputs=z,
check_shapes=False)
loss = loss_fn(gan_model, features, labels, add_summaries)
# Use a variable scope to make sure that estimator variables dont cause
# save/load problems when restoring from ckpts.
with variable_scope.variable_scope(OPTIMIZER_NAME):
opt = optimizer(learning_rate=params['learning_rate'],
**params['opt_kwargs'])
train_op = opt.minimize(
loss=loss, global_step=training_util.get_or_create_global_step(),
var_list=[z])
if add_summaries:
z_grads = gradients_impl.gradients(loss, z)
summary.scalar('z_loss/z_grads', clip_ops.global_norm(z_grads))
summary.scalar('z_loss/loss', loss)
return model_fn_lib.EstimatorSpec(mode=mode,
predictions=gan_model.generated_data,
loss=loss,
train_op=train_op)
开发者ID:Albert-Z-Guo,项目名称:tensorflow,代码行数:39,代码来源:latent_gan_estimator_impl.py
示例8: basic_CNN
#.........这里部分代码省略.........
h_conv2_flat = tf.reshape(h_conv2, [-1, D*filt_2[0]])
h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)
with tf.name_scope("Fully_Connected2") as scope:
W_fc2 = weight_variable([num_fc_1,num_fc_2], 'Fully_Connected_layer_2')
b_fc2 = bias_variable([num_fc_2], 'bias_for_Fully_Connected_Layer_2')
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
with tf.name_scope("Output") as scope:
#postfix _o represent variables for output layer
h_o_drop = tf.nn.dropout(h_fc2, keep_prob)
W_o = tf.Variable(tf.truncated_normal([num_fc_2, 1], stddev=0.1),name = 'W_o')
b_o = tf.Variable(tf.constant(0.1, shape=[1]),name = 'b_o')
h_o = tf.matmul(h_o_drop, W_o) + b_o
sm_o = tf.sigmoid(h_o)
with tf.name_scope("Sigmoid") as scope:
loss = tf.square(sm_o-tf.to_float(y_))
cost = tf.reduce_mean(loss)
loss_summ = tf.scalar_summary("cross entropy_loss", cost)
with tf.name_scope("train") as scope:
tvars = tf.trainable_variables()
#We clip the gradients to prevent explosion
grads = tf.gradients(cost, tvars)
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients = zip(grads, tvars)
train_step = optimizer.apply_gradients(gradients)
# The following block plots for every trainable variable
# - Histogram of the entries of the Tensor
# - Histogram of the gradient over the Tensor
# - Histogram of the grradient-norm over the Tensor
numel = tf.constant([[0]])
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
numel +=tf.reduce_sum(tf.size(variable))
h1 = tf.histogram_summary(variable.name, variable)
h2 = tf.histogram_summary(variable.name + "/gradients", grad_values)
h3 = tf.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values]))
#tf.gradients returns a list. We cannot fetch a list. therefore we fetch the tensor that is the 0-th element of the list
vis = tf.gradients(loss, x_feed)[0]
with tf.name_scope("Evaluating_accuracy") as scope:
correct_prediction = tf.equal(tf.argmax(h_o,1), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
accuracy_summary = tf.scalar_summary("accuracy", accuracy)
#Define one op to call all summaries
merged = tf.merge_all_summaries()
# For now, we collect performances in a Numpy array.
# In future releases, I hope TensorBoard allows for more
# flexibility in plotting
perf_collect = np.zeros((4,int(np.floor(max_iterations /100))))
with tf.Session() as sess:
writer = tf.train.SummaryWriter('/home/siddhu/FBIRN/cnn/log/', sess.graph)
sess.run(tf.initialize_all_variables())
step = 0 # Step is a counter for filling the numpy array perf_collect
for i in range(max_iterations):
batch_ind = np.random.choice(N,batch_size,replace=False)
check = sess.run([size1],feed_dict={ x: X_val, y_: y_val, keep_prob: 1.0, bn_train : False})
#print check[0]
if i==0:
# Use this line to check before-and-after test accuracy
result = sess.run(accuracy, feed_dict={ x: X_val, y_: y_val, keep_prob: 1.0, bn_train : False})
acc_test_before = result
if i%100 == 0:
#Check training performance
result = sess.run([accuracy,cost],feed_dict = { x: X_train, y_: y_train, keep_prob: 1.0, bn_train : False})
perf_collect[0,step] = result[0]
perf_collect[1,step] = result[1]
#Check validation performance
result = sess.run([accuracy,cost,merged], feed_dict={ x: X_val, y_: y_val, keep_prob: 1.0, bn_train : False})
acc = result[0]
perf_collect[2,step] = acc
perf_collect[3,step] = result[1]
#Write information to TensorBoard
summary_str = result[2]
writer.add_summary(summary_str, i)
writer.flush() #Don't forget this command! It makes sure Python writes the summaries to the log-file
#print(" Validation accuracy at %s out of %s is %s" % (i,max_iterations, acc))
step +=1
sess.run(train_step,feed_dict={x:X_train[batch_ind], y_: y_train[batch_ind], keep_prob: dropout, bn_train : True})
#In the next line we also fetch the softmax outputs
result = sess.run([accuracy,numel,sm_o, x_pad], feed_dict={ x: X_val, y_: y_val, keep_prob: 1.0, bn_train : False})
acc_test = result[0]
tf.reset_default_graph()
return acc_test
开发者ID:siddhu95,项目名称:cnn_timeseries,代码行数:101,代码来源:cnn.py
示例9: optimize_loss
#.........这里部分代码省略.........
# Make sure update ops are ran before computing loss.
if update_ops:
loss = control_flow_ops.with_dependencies(update_ops, loss)
# Moving average of the loss with decay.
# TODO(b/30439864): moving_average_decay should be removed.
if moving_average_decay is not None:
logging.warn("'moving_average_decay' is deprecated. Please use "
"tensorboard's builtin averaging instead.")
# Generate moving averages of the loss.
loss_averages = train.ExponentialMovingAverage(moving_average_decay,
name="avg")
loss_averages_op = loss_averages.apply([loss])
logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
loss = control_flow_ops.with_dependencies([loss_averages_op], loss)
# Learning rate variable, with possible decay.
if (isinstance(learning_rate, ops.Tensor)
and learning_rate.get_shape().ndims == 0):
lr = learning_rate
elif isinstance(learning_rate, float):
lr = vs.get_variable(
"learning_rate", [], trainable=False,
initializer=init_ops.constant_initializer(learning_rate))
else:
raise ValueError("Learning rate should be 0d Tensor or float. "
"Got %s of type %s" % (
str(learning_rate), str(type(learning_rate))))
if summaries is None:
summaries = ["loss", "learning_rate"]
if learning_rate_decay_fn is not None:
lr = learning_rate_decay_fn(lr, global_step)
if "learning_rate" in summaries:
logging_ops.scalar_summary("learning_rate", lr)
# Create optimizer, given specified parameters.
if isinstance(optimizer, six.string_types):
if optimizer not in OPTIMIZER_CLS_NAMES:
raise ValueError(
"Optimizer name should be one of [%s], you provided %s."
% (", ".join(OPTIMIZER_CLS_NAMES), optimizer))
opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
elif isinstance(optimizer, type) and issubclass(optimizer,
optimizer_.Optimizer):
opt = optimizer(learning_rate=lr)
elif isinstance(optimizer, optimizer_.Optimizer):
opt = optimizer
else:
raise ValueError("Unrecognized optimizer: should be string, "
"subclass of Optimizer or instance of "
"subclass of Optimizer. Got %s." % str(optimizer))
# All trainable variables, if specific variables are not specified.
if variables is None:
variables = vars_.trainable_variables()
# Compute gradients.
gradients = opt.compute_gradients(loss, variables)
# Optionally add gradient noise.
if gradient_noise_scale is not None:
gradients = _add_scaled_noise_to_gradients(
gradients, gradient_noise_scale)
# Multiply some gradients.
if gradient_multipliers is not None:
gradients = _multiply_gradients(gradients, gradient_multipliers)
# Optionally clip gradients by global norm.
if clip_gradients is not None:
gradients = _clip_gradients_by_norm(gradients, clip_gradients)
# Add scalar summary for loss.
if "loss" in summaries:
logging_ops.scalar_summary("loss", loss)
# Add histograms for variables, gradients and gradient norms.
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
if "gradients" in summaries:
logging_ops.histogram_summary(variable.name + "/gradients",
grad_values)
if "gradient_norm" in summaries:
logging_ops.histogram_summary(variable.name + "/gradient_norm",
clip_ops.global_norm([grad_values]))
# Create gradient updates.
grad_updates = opt.apply_gradients(gradients,
global_step=global_step,
name="train")
# Ensure the train_tensor computes grad_updates.
train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)
return train_tensor
开发者ID:perhapszzy,项目名称:tensorflow,代码行数:101,代码来源:optimizers.py
示例10: isinstance
# The following block plots for every trainable variable
# - Histogram of the entries of the Tensor
# - Histogram of the gradient over the Tensor
# - Histogram of the grradient-norm over the Tensor
numel = tf.constant([[0]])
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
numel +=tf.reduce_sum(tf.size(variable))
h1 = tf.histogram_summary(variable.name, variable)
h2 = tf.histogram_summary(variable.name + "/gradients", grad_values)
h3 = tf.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values]))
with tf.name_scope("Evaluating_accuracy") as scope:
correct_prediction = tf.equal(tf.argmax(h_fc2,1), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
accuracy_summary = tf.scalar_summary("accuracy", accuracy)
#Define one op to call all summaries
merged = tf.merge_all_summaries()
def print_tvars():
tvars = tf.trainable_variables()
for variable in tvars:
print(variable.name)
return
print_tvars()
开发者ID:ericsolo,项目名称:python,代码行数:31,代码来源:CNN_tsc_main.py
示例11: optimize_loss
#.........这里部分代码省略.........
if isinstance(optimizer, six.string_types):
if lr is None:
raise ValueError("Learning rate is None, but should be specified if "
"optimizer is string (%s)." % optimizer)
if optimizer not in OPTIMIZER_CLS_NAMES:
raise ValueError(
"Optimizer name should be one of [%s], you provided %s." %
(", ".join(OPTIMIZER_CLS_NAMES), optimizer))
opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
elif (isinstance(optimizer, type) and
issubclass(optimizer, optimizer_.Optimizer)):
if lr is None:
raise ValueError("Learning rate is None, but should be specified if "
"optimizer is class (%s)." % optimizer)
opt = optimizer(learning_rate=lr)
elif isinstance(optimizer, optimizer_.Optimizer):
opt = optimizer
elif callable(optimizer):
if learning_rate is not None:
opt = optimizer(lr)
else:
opt = optimizer()
if not isinstance(opt, optimizer_.Optimizer):
raise ValueError("Unrecognized optimizer: function should return "
"subclass of Optimizer. Got %s." % str(opt))
else:
raise ValueError("Unrecognized optimizer: should be string, "
"subclass of Optimizer, instance of "
"subclass of Optimizer or function with one argument. "
"Got %s." % str(optimizer))
# All trainable variables, if specific variables are not specified.
if variables is None:
variables = vars_.trainable_variables()
# Compute gradients.
gradients = opt.compute_gradients(
loss,
variables,
colocate_gradients_with_ops=colocate_gradients_with_ops)
# Optionally add gradient noise.
if gradient_noise_scale is not None:
gradients = _add_scaled_noise_to_gradients(gradients,
gradient_noise_scale)
# Multiply some gradients.
if gradient_multipliers is not None:
gradients = _multiply_gradients(gradients, gradient_multipliers)
if not gradients:
raise ValueError(
"Empty list of (gradient, var) pairs encountered. This is most "
"likely to be caused by an improper value of gradient_multipliers.")
if "gradient_norm" in summaries:
summary.scalar("global_norm/gradient_norm",
clip_ops.global_norm(list(zip(*gradients))[0]))
# Optionally clip gradients by global norm.
if isinstance(clip_gradients, float):
gradients = _clip_gradients_by_norm(gradients, clip_gradients)
elif callable(clip_gradients):
gradients = clip_gradients(gradients)
elif clip_gradients is not None:
raise ValueError(
"Unknown type %s for clip_gradients" % type(clip_gradients))
# Add scalar summary for loss.
if "loss" in summaries:
summary.scalar("loss", loss)
# Add histograms for variables, gradients and gradient norms.
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
var_name = variable.name.replace(":", "_")
if "gradients" in summaries:
summary.histogram("gradients/%s" % var_name, grad_values)
if "gradient_norm" in summaries:
summary.scalar("gradient_norm/%s" % var_name,
clip_ops.global_norm([grad_values]))
if clip_gradients is not None and "gradient_norm" in summaries:
summary.scalar("global_norm/clipped_gradient_norm",
clip_ops.global_norm(list(zip(*gradients))[0]))
# Create gradient updates.
grad_updates = opt.apply_gradients(
gradients,
global_step=global_step if increment_global_step else None,
name="train")
# Ensure the train_tensor computes grad_updates.
train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)
return train_tensor
开发者ID:AlbertXiebnu,项目名称:tensorflow,代码行数:101,代码来源:optimizers.py
示例12: optimize_loss
def optimize_loss(loss,
global_step,
learning_rate,
optimizer,
clip_gradients=None,
moving_average_decay=0.9,
learning_rate_decay_fn=None,
variables=None):
"""Given loss and parameters for optimizer, returns a training op.
Args:
loss: Tensor, 0 dimensional.
global_step: Tensor, step counter for each update.
learning_rate: float or Tensor, magnitude of update per each training step.
optimizer: string or function, used as optimizer for training.
clip_gradients: float or None, clips gradients by this value.
moving_average_decay: float or None, takes into account previous loss
to make learning smoother due to outliers.
learning_rate_decay_fn: function, takes learning_rate and global_step
Tensors, returns Tensor. Can be used to implement
any learning rate decay funcitons.
For example: tf.train.exponential_decay.
variables: list of variables to optimizer or none.
Returns:
Training op.
Raises:
ValueError: if optimizer is wrong type.
"""
# Moving average of the loss with decay.
if moving_average_decay is not None:
# Generate moving averages of the loss.
loss_averages = train.ExponentialMovingAverage(moving_average_decay,
name="avg")
loss_averages_op = loss_averages.apply([loss])
logging_ops.scalar_summary("loss/mean", loss_averages.average(loss))
loss = control_flow_ops.with_dependencies([loss_averages_op], loss)
# Convert optimizer into the optimizer class.
if isinstance(optimizer, str):
opt_cls = OPTIMIZER_CLS_NAMES[optimizer]
elif callable(optimizer):
opt_cls = optimizer
else:
raise ValueError("Unrecognized optimizer: should be string or function.")
# Learning rate variable, with possible decay.
lr = vs.get_variable("learning_rate",
[],
trainable=False,
initializer=init_ops.constant_initializer(learning_rate))
if learning_rate_decay_fn is not None:
lr = learning_rate_decay_fn(lr, global_step)
# Create optimizer.
opt = opt_cls(learning_rate=lr)
# All trainable variables, if specific variables are not specified.
if variables is None:
variables = vars_.trainable_variables()
# Compute gradients and clip them if provided.
gradients = opt.compute_gradients(loss, variables)
if clip_gradients is not None:
clipped_gradients, _ = clip_ops.clip_by_global_norm(gradients,
clip_gradients)
gradients = zip(clipped_gradients, variables)
# Add scalar summary for loss.
logging_ops.scalar_summary("loss", loss)
# Add histograms for variables, gradients and gradient norms.
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
logging_ops.histogram_summary(variable.name, variable)
logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
logging_ops.histogram_summary(variable.name + "/gradient_norm",
clip_ops.global_norm([grad_values]))
# Create gradient updates.
grad_updates = opt.apply_gradients(gradients,
global_step=global_step,
name="train")
# Make sure total_loss is valid.
final_loss = array_ops.check_numerics(loss, "Loss is inf or nan")
# Ensure the train_tensor computes grad_updates.
train_tensor = control_flow_ops.with_dependencies([grad_updates], final_loss)
return train_tensor
开发者ID:4chin,项目名称:tensorflow,代码行数:94,代码来源:optimizers.py
示例13: _get_train_ops
def _get_train_ops(self,
loss,
tf_variables,
global_step,
grad_bound=1.25,
lr_init=1e-3,
lr_dec=0.9,
start_decay_step=10000,
decay_steps=100,
optimizer_type="adam"):
"""Loss optimizer.
Args:
loss: scalar tf tensor
tf_variables: list of training variables, typically
tf.trainable_variables()
global_step: global_step
grad_bound: max gradient norm
lr_init: initial learning rate
lr_dec: leaning rate decay coefficient
start_decay_step: start decaying learning rate after this many steps
decay_steps: apply decay rate factor at this step intervals
optimizer_type: optimizer type should be either adam or sgd
Returns:
train_op: training op
learning_rate: scalar learning rate tensor
grad_norm: l2 norm of the gradient vector
all_grad_norms: l2 norm of each component
"""
lr_gstep = global_step - start_decay_step
def f1():
return constant_op.constant(lr_init)
def f2():
return learning_rate_decay.exponential_decay(lr_init, lr_gstep,
decay_steps, lr_dec, True)
learning_rate = control_flow_ops.cond(
math_ops.less(global_step, start_decay_step),
f1,
f2,
name="learning_rate")
if optimizer_type == "adam":
opt = adam.AdamOptimizer(learning_rate)
elif optimizer_type == "sgd":
opt = gradient_descent.GradientDescentOptimizer(learning_rate)
grads_and_vars = opt.compute_gradients(loss, tf_variables)
grad_norm = clip_ops.global_norm([g for g, v in grads_and_vars])
all_grad_norms = {}
clipped_grads = []
clipped_rate = math_ops.maximum(grad_norm / grad_bound, 1.0)
for g, v in grads_and_vars:
if g is not None:
if isinstance(g, tf_ops.IndexedSlices):
clipped = g.values / clipped_rate
norm_square = math_ops.reduce_sum(clipped * clipped)
clipped = tf_ops.IndexedSlices(clipped, g.indices)
else:
clipped = g / clipped_rate
norm_square = math_ops.reduce_sum(clipped * clipped)
all_grad_norms[v.name] = math_ops.sqrt(norm_square)
clipped_grads.append((clipped, v))
train_op = opt.apply_gradients(clipped_grads, global_step)
return train_op, learning_rate, grad_norm, all_grad_norms
开发者ID:neuroradiology,项目名称:tensorflow,代码行数:68,代码来源:hierarchical_controller.py
示例14: optimize_loss
#.........这里部分代码省略.........
lr = None
if learning_rate is not None:
if isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0:
lr = learning_rate
elif isinstance(learning_rate, float):
lr = vs.get_variable(
"learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)
)
else:
raise ValueError(
"Learning rate should be 0d Tensor or float. "
"Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))
)
if summaries is None:
summaries = ["loss", "learning_rate"]
if learning_rate is not None and learning_rate_decay_fn is not None:
lr = learning_rate_decay_fn(lr, global_step)
if "learning_rate" in summaries:
logging_ops.scalar_summary("learning_rate", lr)
# Create optimizer, given specified parameters.
if isinstance(optimizer, six.string_types):
if lr is None:
raise ValueError(
"Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer
)
if optimizer not in OPTIMIZER_CLS_NAMES:
raise ValueError(
"Optimizer name should be one of [%s], you provided %s."
% (", ".join(OPTIMIZER_CLS_NAMES), optimizer)
)
opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr)
elif isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer):
if lr is None:
raise ValueError(
"Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer
)
opt = optimizer(learning_rate=lr)
elif isinstance(optimizer, optimizer_.Optimizer):
opt = optimizer
elif callable(optimizer):
if learning_rate is not None:
opt = optimizer(lr)
else:
opt = optimizer()
if not isinstance(opt, optimizer_.Optimizer):
raise ValueError(
"Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)
)
else:
raise ValueError(
"Unrecognized optimizer: should be string, "
"subclass of Optimizer, instance of "
"subclass of Optimizer or function with one argument. "
"Got %s." % str(optimizer)
)
# All trainable variables, if specific variables are not specified.
if variables is None:
variables = vars_.trainable_variables()
# Compute gradients.
gradients = opt.compute_gradients(loss, variables)
# Optionally add gradient noise.
if gradient_noise_scale is not None:
gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale)
# Multiply some gradients.
if gradient_multipliers is not None:
gradients = _multiply_gradients(gradients, gradient_multipliers)
# Optionally clip gradients by global norm.
if clip_gradients is not None:
gradients = _clip_gradients_by_norm(gradients, clip_gradients)
# Add scalar summary for loss.
if "loss" in summaries:
logging_ops.scalar_summary("loss", loss)
# Add histograms for variables, gradients and gradient norms.
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
if "gradients" in summaries:
logging_ops.histogram_summary(variable.name + "/gradients", grad_values)
if "gradient_norm" in summaries:
logging_ops.histogram_summary(variable.name + "/gradient_norm", clip_ops.global_norm([grad_values]))
# Create gradient updates.
grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train")
# Ensure the train_tensor computes grad_updates.
train_tensor = control_flow_ops.with_dependencies([grad_updates], loss)
return train_tensor
开发者ID:jendap,项目名称:tensorflow,代码行数:101,代码来源:optimizers.py
示例15: create_train_op
def create_train_op(
total_loss,
optimizer,
global_step=None,
update_ops=None,
variables_to_train=None,
clip_gradient_norm=0,
summarize_gradients=False,
gate_gradients=tf_optimizer.Optimizer.GATE_OP,
aggregation_method=None,
colocate_gradients_with_ops=False):
"""Creates an `Operation` that evaluates the gradients and returns the loss.
Args:
total_loss: A `Tensor` representing the total loss.
optimizer: A tf.Optimizer to use for computing the gradients.
global_step: A `Tensor` representing the global step variable. If left as
`None`, then slim.variables.global_step() is used.
update_ops: an optional list of updates to execute. Note that the update_ops
that are used are the union of those update_ops passed to the function and
the value of slim.ops.GetUpdateOps(). Therefore, if `update_ops` is None,
then the value of slim.ops.GetUpdateOps() is still used.
variables_to_train: an optional list of variables to train. If None, it will
default to all tf.trainable_variables().
clip_gradient_norm: If greater than 0 then the gradients would be clipped
by it.
summarize_gradients: Whether or not add summaries for each gradient.
gate_gradients: How to gate the computation of gradients. See tf.Optimizer.
aggregation_method: Specifies the method used to combine gradient terms.
Valid values are defined in the class `AggregationMethod`.
colocate_gradients_with_ops: Whether or not to try colocating the gradients
with the ops that generated them.
Returns:
A `Tensor` that when evaluated, computes the gradients and returns the total
loss value.
"""
if global_step is None:
global_step = variables.get_or_create_global_step()
update_ops = set(update_ops or [])
# Make sure update_ops are computed before total_loss.
if update_ops:
with control_flow_ops.control_dependencies(update_ops):
barrier = control_flow_ops.no_op(name='update_barrier')
total_loss = control_flow_ops.with_dependencies([barrier], total_loss)
if variables_to_train is None:
# Default to tf.trainable_variables()
variables_to_train = tf_variables.trainable_variables()
else:
# Make sure that variables_to_train are in tf.trainable_variables()
for v in variables_to_train:
assert v in tf_variables.trainable_variables()
assert variables_to_train
# Create the gradients. Note that apply_gradients adds the gradient
# computation to the current graph.
grads = optimizer.compute_gradients(
total_loss, variables_to_train, gate_gradients=gate_gradients,
aggregation_method=aggregation_method,
colocate_gradients_with_ops=colocate_gradients_with_ops)
# Clip gradients.
if clip_gradient_norm > 0:
grads = clip_gradient_norms(grads, clip_gradient_norm)
# Summarize gradients.
if summarize_gradients:
for grad, var in grads:
if grad is not None:
if isinstance(grad, ops.IndexedSlices):
grad_values = grad.values
else:
grad_values = grad
logging_ops.histogram_summary(var.op.name + ':gradient', grad_values)
logging_ops.histogram_summary(var.op.name + ':gradient_norm',
clip_ops.global_norm([grad_values]))
else:
logging.info('Var %s has no gradient', var.op.name)
# Create gradient updates.
grad_updates = optimizer.apply_gradients(grads, global_step=global_step)
# Make sure total_loss is valid.
total_loss = array_ops.check_numerics(total_loss, 'LossTensor is inf or nan')
# Ensure the train_tensor computes grad_updates.
return control_flow_ops.with_dependencies([grad_updates], total_loss)
开发者ID:AngleFork,项目名称:tensorflow,代码行数:91,代码来源:learning.py
示例16: optimize_loss
#.........这里部分代码省略.........
raise ValueError("Unknown automatic loss scaling algorithm: %s."
% automatic_loss_sclaing)
if dtype != "mixed":
raise ValueError("Automatic loss scaling can be used only with "
"dtype=mixed.")
loss_scale = AutomaticLossScaler(algorithm=automatic_loss_scaling)
if dtype == 'mixed':
opt = MixedPrecisionOptimizerWrapper(opt, loss_scale=loss_scale)
if on_horovod:
opt = DistributedOptimizer(opt)
# Compute gradients.
gradients = opt.compute_gradients(
loss, variables,
colocate_gradients_with_ops=colocate_gradients_with_ops,
)
# Optionally add gradient noise.
if gradient_noise_scale is not None:
gradients = _add_scaled_noise_to_gradients(gradients,
gradient_noise_scale)
# Multiply some gradients.
if gradient_multipliers is not None:
gradients = _multiply_gradients(gradients, gradient_multipliers)
if not gradients:
raise ValueError(
"Empty list of (gradient, var) pairs encountered. This is most "
"likely to be caused by an improper value of gradient_multipliers.")
if "global_gradient_norm" in summaries or "gradient_norm" in summaries:
summary.scalar(
"global_norm/gradient_norm",
clip_ops.global_norm(list(map(
lambda x: tf.cast(x, tf.float32),
list(zip(*gradients))[0])
)),
)
# Optionally clip gradients by global norm.
if clip_gradients is not None and larc_params is not None:
raise AttributeError(
"LARC and gradient norm clipping should not be used together"
)
if isinstance(clip_gradients, float):
gradients = _clip_gradients_by_norm(gradients, clip_gradients)
elif callable(clip_gradients):
gradients = clip_gradients(gradients)
elif clip_gradients is not None:
raise ValueError(
"Unknown type %s for clip_gradients" % type(clip_gradients))
# Add histograms for variables, gradients and gradient norms.
for gradient, variable in gradients:
if isinstance(gradient, ops.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if isinstance(variable, ops.IndexedSlices):
var_values = variable.values
else:
var_values = variable
if grad_values is not None:
开发者ID:fotwo,项目名称:OpenSeq2Seq,代码行数:67,代码来源:optimizers.py
示例17: optimize_loss
def optimize_loss(loss,
global_step,
learning_rate,
optimizer,
clip_gradients=None,
moving_average_decay=0.9,
learning_rate_decay_fn=None,
variables=None):
"""Given loss and parameters for optimizer, returns a training op.
Args:
loss: Tensor, 0 dimensional.
global_step: Tensor, step counter for each update.
learning_rate: float or Tensor, magnitude of update per each training step.
optimizer: string, class or optimizer instance, used as trainer.
string should be name of optimizer, like 'SGD',
'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant.
class should be sub-class of tf.Optimizer that implements
`compute_gradients` and `apply_gradients` functions.
optimizer instance shoul
|
请发表评论