本文整理汇总了Python中tensorflow.clip_by_norm函数的典型用法代码示例。如果您正苦于以下问题:Python clip_by_norm函数的具体用法?Python clip_by_norm怎么用?Python clip_by_norm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了clip_by_norm函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: clip_by_norm
def clip_by_norm(gvs, grad_norm_thresh, scope="grad_clip"):
"""
Clip gradients by norm, and scope.
Args:
gvs: list of gradient variable tuples
grad_norm_thresh: norm threshold to clip
scope: scope for the clip operation
"""
new_gvs = []
if scope:
with tf.name_scope(scope):
#gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
# for gv in gvs if gv[0]]
#return gvs
for gv in gvs:
if gv[0]:
new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]))
else:
print("no gradient for %s" % gv[1].op.name)
#raise
new_gvs.append(gv)
return new_gvs
else:
#gvs = [(tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]) \
# for gv in gvs if gv[0]]
#return gvs
for gv in gvs:
if gv[0]:
new_gvs.append((tf.clip_by_norm(gv[0], grad_norm_thresh), gv[1]))
else:
print("no gradient for %s" % gv[1].op.name)
#raise
new_gvs.append(gv)
return new_gvs
开发者ID:ulysseses,项目名称:sr_exp2,代码行数:35,代码来源:tools.py
示例2: two_linear
def two_linear( self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx ):
"""
Make a bi-linear block with optional residual connection
Args
xin: the batch that enters the block
linear_size: integer. The size of the linear units
residual: boolean. Whether to add a residual connection
dropout_keep_prob: float [0,1]. Probability of dropping something out
max_norm: boolean. Whether to clip weights to 1-norm
batch_norm: boolean. Whether to do batch normalization
dtype: type of the weigths. Usually tf.float32
idx: integer. Number of layer (for naming/scoping)
Returns
y: the batch after it leaves the block
"""
with vs.variable_scope( "two_linear_"+str(idx) ) as scope:
input_size = int(xin.get_shape()[1])
# Linear 1
w2 = tf.get_variable( name="w2_"+str(idx), initializer=kaiming, shape=[input_size, linear_size], dtype=dtype)
b2 = tf.get_variable( name="b2_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype)
w2 = tf.clip_by_norm(w2,1) if max_norm else w2
y = tf.matmul(xin, w2) + b2
if batch_norm:
y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization1"+str(idx))
y = tf.nn.relu( y )
y = tf.nn.dropout( y, dropout_keep_prob )
# Linear 2
w3 = tf.get_variable( name="w3_"+str(idx), initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype)
b3 = tf.get_variable( name="b3_"+str(idx), initializer=kaiming, shape=[linear_size], dtype=dtype)
w3 = tf.clip_by_norm(w3,1) if max_norm else w3
y = tf.matmul(y, w3) + b3
if batch_norm:
y = tf.layers.batch_normalization(y,training=self.isTraining,name="batch_normalization2"+str(idx))
y = tf.nn.relu( y )
y = tf.nn.dropout( y, dropout_keep_prob )
# Residual every 2 blocks
y = (xin + y) if residual else y
return y
开发者ID:neherh,项目名称:3d-pose-baseline,代码行数:48,代码来源:linear_model.py
示例3: __init__
def __init__(self, sess, pred_network, env, stat, conf, target_network=None):
super(DeepQ, self).__init__(sess, pred_network, target_network, env, stat, conf)
# Optimizer
with tf.variable_scope('optimizer'):
self.targets = tf.placeholder('float32', [None], name='target_q_t')
self.actions = tf.placeholder('int64', [None], name='action')
actions_one_hot = tf.one_hot(self.actions, self.env.action_size, 1.0, 0.0, name='action_one_hot')
pred_q = tf.reduce_sum(self.pred_network.outputs * actions_one_hot, reduction_indices=1, name='q_acted')
self.delta = self.targets - pred_q
if self.max_delta and self.min_delta:
self.delta = tf.clip_by_value(self.delta, self.min_delta, self.max_delta, name='clipped_delta')
self.loss = tf.reduce_mean(tf.square(self.delta), name='loss')
self.learning_rate_op = tf.maximum(self.learning_rate_minimum,
tf.train.exponential_decay(
self.learning_rate,
self.stat.t_op,
self.learning_rate_decay_step,
self.learning_rate_decay,
staircase=True))
optimizer = tf.train.RMSPropOptimizer(
self.learning_rate_op, momentum=0.95, epsilon=0.01)
grads_and_vars = optimizer.compute_gradients(self.loss)
for idx, (grad, var) in enumerate(grads_and_vars):
if grad is not None:
grads_and_vars[idx] = (tf.clip_by_norm(grad, self.max_grad_norm), var)
self.optim = optimizer.apply_gradients(grads_and_vars)
开发者ID:ashiqrh,项目名称:deep-rl-tensorflow,代码行数:33,代码来源:deep_q.py
示例4: dpg
def dpg(q_max, a_max, dqda_clipping=None, clip_norm=False, name="DpgLearning"):
"""Implements the Deterministic Policy Gradient (DPG) loss as a TensorFlow Op.
This op implements the loss for the `actor`, the `critic` can instead be
updated by minimizing the `value_ops.td_learning` loss.
See "Deterministic Policy Gradient Algorithms" by Silver, Lever, Heess,
Degris, Wierstra, Riedmiller (http://proceedings.mlr.press/v32/silver14.pdf).
Args:
q_max: Tensor holding Q-values generated by Q network with the input of
(state, a_max) pair, shape `[B]`.
a_max: Tensor holding the optimal action, shape `[B, action_dimension]`.
dqda_clipping: `int` or `float`, clips the gradient dqda element-wise
between `[-dqda_clipping, dqda_clipping]`.
clip_norm: Whether to perform dqda clipping on the vector norm of the last
dimension, or component wise (default).
name: name to prefix ops created within this op.
Returns:
A namedtuple with fields:
* `loss`: a tensor containing the batch of losses, shape `[B]`.
* `extra`: a namedtuple with fields:
* `q_max`: Tensor holding the optimal Q values, `[B]`.
* `a_max`: Tensor holding the optimal action, `[B, action_dimension]`.
* `dqda`: Tensor holding the derivative dq/da, `[B, action_dimension]`.
Raises:
ValueError: If `q_max` doesn't depend on `a_max` or if `dqda_clipping <= 0`.
"""
# DPG op.
with tf.name_scope(name, values=[q_max, a_max]):
# Calculate the gradient dq/da.
dqda = tf.gradients([q_max], [a_max])[0]
# Check that `q_max` depends on `a_max`.
if dqda is None:
raise ValueError("q_max needs to be a function of a_max")
# Clipping the gradient dq/da.
if dqda_clipping is not None:
if dqda_clipping <= 0:
raise ValueError("dqda_clipping should be bigger than 0, {} found"
.format(dqda_clipping))
if clip_norm:
dqda = tf.clip_by_norm(dqda, dqda_clipping, axes=-1)
else:
dqda = tf.clip_by_value(dqda, -1. * dqda_clipping, dqda_clipping)
# Target_a ensures correct gradient calculated during backprop.
target_a = dqda + a_max
# Stop the gradient going through Q network when backprop.
target_a = tf.stop_gradient(target_a)
# Gradient only go through actor network.
loss = 0.5 * tf.reduce_sum(tf.square(target_a - a_max), axis=-1)
return base_ops.LossOutput(
loss, DPGExtra(q_max=q_max, a_max=a_max, dqda=dqda))
开发者ID:wmiao1769,项目名称:trfl,代码行数:60,代码来源:dpg_ops.py
示例5: _init_train
def _init_train(self):
readout = tf.stop_gradient(self.target_network.readout)
# 0 if terminal, max(prediction) if not
future_rewards = tf.reduce_max(readout, reduction_indices=[1,]) * (1 - self.terminals)
tf.histogram_summary("rewards_future", future_rewards)
wanted = self.rewards + self.settings['discount'] * future_rewards
tf.histogram_summary("rewards_wanted", wanted)
current = tf.reduce_sum(
self.act_network.readout * self.action_mask,
reduction_indices=[1,],
name="rewards_current"
)
tf.histogram_summary("rewards_current", current)
loss = tf.square(current - wanted)
self.error = tf.reduce_sum(loss, name="prediction_error")
tf.scalar_summary('error', self.error)
grad_vars = self.settings['optimizer'].compute_gradients(self.error)
clipped_grad_vars = [(tf.clip_by_norm(grad, 10) if grad else None, var)
for (grad, var) in grad_vars]
for grad, var in clipped_grad_vars:
tf.histogram_summary(var.name, var)
if grad:
tf.histogram_summary(var.name + "_clipgrad", grad)
self.train_op = self.settings['optimizer'].apply_gradients(clipped_grad_vars, global_step=self.global_step)
开发者ID:amharc,项目名称:jnp3,代码行数:33,代码来源:model.py
示例6: make_tf_Linv
def make_tf_Linv(layer, V_shape, c_shape, lr, act=tf.nn.tanh):
""" builds graph for layer-local training of V and c """
with tf.name_scope('layer'+str(layer)+'_inv') as scope:
V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.orthogonal_initializer(0.95))
#V = tf.get_variable(scope+'V', shape=V_shape, dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32))
c = tf.get_variable(scope+'c', shape=c_shape, dtype=tf.float32, initializer=tf.constant_initializer(0.))
W = tf.placeholder(tf.float32, shape=[V_shape[1], V_shape[0]], name='W')
b = tf.placeholder(tf.float32, shape=[1, V_shape[0]], name='b')
x_0 = tf.placeholder(tf.float32, shape=[None, V_shape[1]], name='input')
fx = act(tf.matmul(x_0, W) + b)
loss = 0.5*tf.reduce_mean((act(tf.matmul(fx, V) + c) - x_0)**2, name='loss')
s1 = tf.summary.scalar('log_loss'+str(layer), tf.log(loss))
s2 = tf.summary.histogram('V'+str(layer), V)
s3 = tf.summary.histogram('c'+str(layer), c)
opt = tf.train.RMSPropOptimizer(lr)
gvs = opt.compute_gradients(loss, var_list=[V, c])
sg = [tf.summary.scalar('norm_grad'+var.name[-3], tf.nn.l2_loss(grad)) for grad, var in gvs] # var.name = 'namescope/V:0' and we want just 'V'
clipped_gvs = [(tf.clip_by_norm(grad, 100.), var) for grad, var in gvs]
return opt.apply_gradients(clipped_gvs), tf.summary.merge([s1] + sg)
开发者ID:jsseely,项目名称:tensorflow-target-prop,代码行数:25,代码来源:tprop_train_stable.py
示例7: create_update_op_backup
def create_update_op_backup(self):
optimizer = tf.train.MomentumOptimizer(self.config.learning_rate, self.config.momentum)
#self.update_op = optimizer.minimize(self.loss)
g_list = optimizer.compute_gradients(self.loss)
# 000
g_list_new = [(tf.clip_by_norm(g, 5), v) for g, v in g_list]
# g_list_new = []
# for g, v in g_list:
# g_not_finite = tf.logical_or(tf.is_nan(g), tf.is_inf(g))
# 001
# g = tf.select(g_not_finite, tf.zeros_like(g), g)
# g = tf.clip_by_norm(g, 5)
# g = tf.select(g_not_finite, 0.1*v, g)
# 002
# g = tf.convert_to_tensor(g)
# g_norm = tf.sqrt(tf.reduce_sum(tf.square(g)))
# g = tf.select(g_not_finite, 0.1*v, g*5/g_norm)
# g_list_new.append((g, v))
self.update_op = optimizer.apply_gradients(g_list_new)
return
开发者ID:jjery2243542,项目名称:tf_rnn,代码行数:26,代码来源:tf_rnn.py
示例8: train
def train(lr, total_loss, global_step):
# Variables that affect learning rate.
# Compute gradients.
#with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(total_loss)
# Add histograms for gradients.
for i, (grad, var) in enumerate(grads):
if grad is not None:
tf.histogram_summary(var.op.name + '/gradients', grad)
grads[i] = (tf.clip_by_norm(grad, 5), var)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.histogram_summary(var.op.name, var)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
开发者ID:danfeiX,项目名称:drl,代码行数:29,代码来源:dqn.py
示例9: create_variables_for_optimization
def create_variables_for_optimization(self):
with tf.name_scope("optimization"):
with tf.name_scope("masker"):
self.mask = tf.sequence_mask(self.seq_len, self.num_step)
self.mask = tf.reshape(tf.cast(self.mask, tf.float32), (-1,))
if self.loss_function == "cross_entropy":
self.pl_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=self.logit,
labels=self.actions_flatten)
elif self.loss_function == "l2":
self.one_hot_actions = tf.one_hot(self.actions_flatten, self.num_actions)
self.pl_loss = tf.reduce_mean((self.probs - self.one_hot_actions) ** 2,
axis=1)
else:
raise ValueError("loss function type is not defined")
self.pl_loss = tf.multiply(self.pl_loss, self.mask)
self.pl_loss = tf.reduce_mean(tf.multiply(self.pl_loss, self.returns_flatten))
self.entropy = tf.multiply(self.entropy, self.mask)
self.entropy = tf.reduce_mean(self.entropy)
self.loss = self.pl_loss - self.entropy_bonus * self.entropy
self.trainable_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="policy_network")
self.gradients = self.optimizer.compute_gradients(self.loss, var_list=self.trainable_variables)
self.clipped_gradients = [(tf.clip_by_norm(grad, self.max_gradient), var)
for grad, var in self.gradients]
self.train_op = self.optimizer.apply_gradients(self.clipped_gradients,
self.global_step)
self.grad_norm = tf.global_norm([grad for grad, var in self.gradients])
self.var_norm = tf.global_norm(self.trainable_variables)
开发者ID:csawtelle,项目名称:pg_rnn,代码行数:32,代码来源:pg_rnn.py
示例10: make_accumulated_gradients
def make_accumulated_gradients(self):
reset_accum_grads = []
new_grads_and_vars = []
# 1. Prepare accum_grads
self.accum_grads = {}
self.add_accum_grads = {}
for step, network in enumerate(self.networks):
grads_and_vars = self.global_optim.compute_gradients(network.total_loss, network.w.values())
_add_accum_grads = []
for grad, var in tuple(grads_and_vars):
if grad is not None:
shape = grad.get_shape().as_list()
name = 'accum/%s' % "/".join(var.name.split(':')[0].split('/')[-3:])
if step == 0:
self.accum_grads[name] = tf.Variable(
tf.zeros(shape), trainable=False, name=name)
global_v = global_var[re.sub(r'.*\/A3C_\d+\/', '', var.name)]
new_grads_and_vars.append((tf.clip_by_norm(self.accum_grads[name].ref(), self.max_grad_norm), global_v))
reset_accum_grads.append(self.accum_grads[name].assign(tf.zeros(shape)))
_add_accum_grads.append(tf.assign_add(self.accum_grads[name], grad))
# 2. Add gradient to accum_grads
self.add_accum_grads[step] = tf.group(*_add_accum_grads)
开发者ID:BinbinBian,项目名称:deep-rl-tensorflow,代码行数:30,代码来源:n_step_q.py
示例11: _clip_gradients
def _clip_gradients(self, grads_and_vars):
"""Clip gradients.
Args:
grads_and_vars (list): list of tuples of `(grads, vars)`
Returns:
clipped_grads_and_vars (list): list of tuple of
`(clipped grads, vars)`
"""
# TODO: Optionally add gradient noise
clipped_grads_and_vars = []
# Clip gradient norm
for grad, var in grads_and_vars:
if grad is not None:
clipped_grads_and_vars.append(
(tf.clip_by_norm(grad, clip_norm=self.clip_grad_norm),
var))
# Clip gradient
# for grad, var in grads_and_vars:
# if grad is not None:
# clipped_grads_and_vars.append(
# (tf.clip_by_value(grad,
# clip_value_min=-self.clip_grad_norm,
# clip_value_max=self.clip_grad_norm),
# var))
# TODO: Add histograms for variables, gradients (norms)
# self._tensorboard(trainable_vars)
return clipped_grads_and_vars
开发者ID:seasky100,项目名称:tensorflow_end2end_speech_recognition,代码行数:32,代码来源:model_base.py
示例12: __init__
def __init__(self, optimizer, devices, input_placeholders,
per_device_batch_size, build_loss, logdir,
grad_norm_clipping=None):
self.optimizer = optimizer
self.devices = devices
self.batch_size = per_device_batch_size * len(devices)
self.per_device_batch_size = per_device_batch_size
self.input_placeholders = input_placeholders
self.build_loss = build_loss
self.logdir = logdir
# First initialize the shared loss network
with tf.variable_scope(TOWER_SCOPE_NAME):
self._shared_loss = build_loss(*input_placeholders)
# Then setup the per-device loss graphs that use the shared weights
self._batch_index = tf.placeholder(tf.int32)
# Split on the CPU in case the data doesn't fit in GPU memory.
with tf.device("/cpu:0"):
data_splits = zip(
*[tf.split(ph, len(devices)) for ph in input_placeholders])
self._towers = []
for device, device_placeholders in zip(self.devices, data_splits):
self._towers.append(self._setup_device(device,
device_placeholders))
avg = average_gradients([t.grads for t in self._towers])
if grad_norm_clipping:
for i, (grad, var) in enumerate(avg):
if grad is not None:
avg[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var)
self._train_op = self.optimizer.apply_gradients(avg)
开发者ID:adgirish,项目名称:ray,代码行数:34,代码来源:multi_gpu_impl.py
示例13: build_model
def build_model(self, mode, embedding_method):
self.build_memory()
# self.skip_model = skip.load_model()
self.skip_model = None
self.reg_loss = tf.mul(tf.nn.l2_loss(self.T), self.gamma, name='regularization_loss')
self.data_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.probs, self.target, name='data_loss')
self.loss = tf.add(self.reg_loss, self.data_loss, name = 'total_loss')
self.average_loss = tf.reduce_mean(self.loss)
self.opt = tf.train.GradientDescentOptimizer(self.lr)
self.correct_prediction = tf.equal(self.target, tf.argmax(self.probs,1))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
grads_and_vars = self.opt.compute_gradients(self.loss, self.params)
cliped_grads_and_vars = [(tf.clip_by_norm(gv[0], 40), gv[1]) for gv in grads_and_vars]
inc_op = self.global_step.assign_add(1)
with tf.control_dependencies([inc_op]):
self.apply_grad_op = self.opt.apply_gradients(cliped_grads_and_vars)
self.saver = tf.train.Saver()
# At Inference mode
if mode == 'inference':
if embedding_method == 'word2vec':
self.saver.restore(self.sess, './demo/MN_shortcut/model.ckpt')
elif embedding_method == 'skip':
print 'Restoring model from ./demo/MN_shortcut/skip_plot_40.ckpt'
self.saver.restore(self.sess, './demo/MN_shortcut/skip_plot_40.ckpt')
else:
tf.initialize_all_variables().run()
开发者ID:fodrh1201,项目名称:demo_movieQA,代码行数:29,代码来源:model.py
示例14: adv_target_net2
def adv_target_net2(input_images, clip_norm=1.5):
with tf.variable_scope('adv_encoder') as scope:
width = 32
height = 32
batch_size = 128
# code_length = 6000
input_images = input_images/255
# clip bound box
mean, var = tf.nn.moments(input_images, axes=tuple(range(1,len(input_images.shape))), keep_dims=True)
normed_input_images = (input_images-mean)/var
# Convolutional layer 1
conv1 = tf.layers.conv2d(inputs=normed_input_images,
filters=32,
kernel_size=(5, 5),
# kernel_initializer=tf.contrib.layers.xavier_initializer(),
activation=tf.nn.leaky_relu,
padding='SAME',
name='adv_conv1')
# maxpool layer1
maxpool1 = tf.layers.max_pooling2d(conv1, (3,3), (2,2), 'SAME')
# Convolutional layer 2
conv2 = tf.layers.conv2d(inputs=maxpool1,
filters=64,
kernel_size=(5, 5),
# kernel_initializer=tf.contrib.layers.xavier_initializer(),
activation=tf.nn.leaky_relu,
padding='SAME',
name='adv_conv2')
# maxpool layer2
maxpool2 = tf.layers.max_pooling2d(conv2, (3,3), (2,2), 'SAME')
deconv1 = tf.layers.conv2d_transpose(maxpool2, 32, (5,5), (2,2), 'SAME',
activation=tf.nn.leaky_relu,
name='adv_deconv1')
adv_mask = tf.layers.conv2d_transpose(deconv1, 3, (5,5), (2,2), 'SAME',
activation=tf.nn.tanh,
name='adv_deconv2')
scaled_adv_mask = tf.clip_by_norm(adv_mask, clip_norm, axes=list(range(1,len(adv_mask.shape))))
adv_images = tf.clip_by_value(scaled_adv_mask+input_images,0,1)
output_images = tf.reshape(adv_images, (batch_size, height, width, 3)) * 255.0
dif = adv_images - input_images
tf.summary.image('adv_images', output_images)
# Reconstruction L2 loss
mean_square_error = tf.reduce_mean(tf.square(dif), axis=list(range(1,len(dif.shape))))
loss = tf.reduce_mean(mean_square_error, name='dis_loss')
return loss, output_images
开发者ID:Jack-lx-jiang,项目名称:Adversarial-Example-Generative-Net,代码行数:59,代码来源:adv_net.py
示例15: build_model
def build_model(self, reuse, dev, ntype):
with tf.variable_scope(self.name) and tf.device(dev):
if reuse:
tf.get_variable_scope().reuse_variables()
assert tf.get_variable_scope().reuse
# Set inputs of networks
self.minimap = tf.placeholder(tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap')
self.screen = tf.placeholder(tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen')
self.info = tf.placeholder(tf.float32, [None, self.isize], name='info')
# Build networks
net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype)
self.spatial_action, self.non_spatial_action, self.value = net
# Set targets and masks
self.valid_spatial_action = tf.placeholder(tf.float32, [None], name='valid_spatial_action')
self.spatial_action_selected = tf.placeholder(tf.float32, [None, self.ssize**2], name='spatial_action_selected')
self.valid_non_spatial_action = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action')
self.non_spatial_action_selected = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected')
self.value_target = tf.placeholder(tf.float32, [None], name='value_target')
# Compute log probability
spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1)
spatial_action_log_prob = tf.log(tf.clip_by_value(spatial_action_prob, 1e-10, 1.))
non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.non_spatial_action_selected, axis=1)
valid_non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.valid_non_spatial_action, axis=1)
valid_non_spatial_action_prob = tf.clip_by_value(valid_non_spatial_action_prob, 1e-10, 1.)
non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob
non_spatial_action_log_prob = tf.log(tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.))
self.summary.append(tf.summary.histogram('spatial_action_prob', spatial_action_prob))
self.summary.append(tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob))
# Compute losses, more details in https://arxiv.org/abs/1602.01783
# Policy loss and value loss
action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob
advantage = tf.stop_gradient(self.value_target - self.value)
policy_loss = - tf.reduce_mean(action_log_prob * advantage)
value_loss = - tf.reduce_mean(self.value * advantage)
self.summary.append(tf.summary.scalar('policy_loss', policy_loss))
self.summary.append(tf.summary.scalar('value_loss', value_loss))
# TODO: policy penalty
loss = policy_loss + value_loss
# Build the optimizer
self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate')
opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10)
grads = opt.compute_gradients(loss)
cliped_grad = []
for grad, var in grads:
self.summary.append(tf.summary.histogram(var.op.name, var))
self.summary.append(tf.summary.histogram(var.op.name+'/grad', grad))
grad = tf.clip_by_norm(grad, 10.0)
cliped_grad.append([grad, var])
self.train_op = opt.apply_gradients(cliped_grad)
self.summary_op = tf.summary.merge(self.summary)
self.saver = tf.train.Saver(max_to_keep=100)
开发者ID:fanyp17,项目名称:pysc2-agents,代码行数:59,代码来源:a3c_agent.py
示例16: create_variables
def create_variables(self):
self.target_q_network = self.q_network.copy(scope="target_network")
# FOR REGULAR ACTION SCORE COMPUTATION
with tf.name_scope("taking_action"):
self.observation = self.q_network.input_placeholder("observation")
self.action_scores = tf.identity(self.q_network(self.observation), name="action_scores")
tf.histogram_summary("action_scores", self.action_scores)
self.predicted_actions = tf.argmax(self.action_scores, dimension=1, name="predicted_actions")
with tf.name_scope("estimating_future_rewards"):
# FOR PREDICTING TARGET FUTURE REWARDS
self.next_observation = self.q_network.input_placeholder("next_observation")
self.next_observation_mask = tf.placeholder(tf.float32,
(None,),
name="next_observation_mask")
self.next_action_scores = self.target_q_network(self.next_observation)
tf.histogram_summary("target_action_scores", self.next_action_scores)
self.rewards = tf.placeholder(tf.float32, (None,), name="rewards")
target_values = \
tf.reduce_max(self.next_action_scores, reduction_indices=[1,]) * self.next_observation_mask
self.future_rewards = self.rewards + self.discount_rate * target_values
with tf.name_scope("q_value_precition"):
# FOR PREDICTION ERROR
self.action_mask = tf.placeholder(tf.float32,
self.q_network.output_shape(),
name="action_mask")
self.masked_action_scores = tf.reduce_sum(self.action_scores * self.action_mask, reduction_indices=[1,])
temp_diff = self.masked_action_scores - self.future_rewards
self.prediction_error = tf.reduce_mean(tf.square(temp_diff))
gradients = self.optimizer.compute_gradients(
self.prediction_error,
var_list=self.q_network.variables())
for i, (grad, var) in enumerate(gradients):
if grad is not None:
gradients[i] = (tf.clip_by_norm(grad, 5), var)
# Add histograms for gradients.
for grad, var in gradients:
tf.histogram_summary(var.name, var)
if grad is not None:
tf.histogram_summary(var.name + '/gradients', grad)
self.train_op = self.optimizer.apply_gradients(gradients)
# UPDATE TARGET NETWORK
with tf.name_scope("target_network_update"):
self.target_network_update = []
for v_source, v_target in zip(self.q_network.variables(), self.target_q_network.variables()):
# this is equivalent to target = (1-alpha) * target + alpha * source
update_op = v_target.assign_sub(self.target_network_update_rate * (v_target - v_source))
self.target_network_update.append(update_op)
self.target_network_update = tf.group(*self.target_network_update)
# summaries
tf.scalar_summary("prediction_error", self.prediction_error)
self.summarize = tf.merge_all_summaries()
self.no_op1 = tf.no_op()
开发者ID:imclab,项目名称:deeprl,代码行数:59,代码来源:discrete_deepq.py
示例17: clip_by_norm
def clip_by_norm(tensor, clip_norm, axes=None, name=None):
"""Implement clip_by_norm in Tensorflow backend.
See :func:`luchador.nn.ops.clip_by_norm` for the detail.
"""
_tensor = tf.clip_by_norm(
tensor.unwrap(), clip_norm=clip_norm, axes=axes, name=name)
return Tensor(tensor=_tensor, name=name)
开发者ID:mthrok,项目名称:luchador,代码行数:8,代码来源:clip.py
示例18: flatgrad
def flatgrad(loss, var_list, clip_norm=None):
grads = tf.gradients(loss, var_list)
if clip_norm is not None:
grads = [tf.clip_by_norm(grad, clip_norm=clip_norm) for grad in grads]
return tf.concat(axis=0, values=[
tf.reshape(grad if grad is not None else tf.zeros_like(v), [numel(v)])
for (v, grad) in zip(var_list, grads)
])
开发者ID:Divyankpandey,项目名称:baselines,代码行数:8,代码来源:tf_util.py
示例19: create_grads
def create_grads(self, loss, exclude, network, global_network):
vs = list(set(network.var.keys()) - exclude)
gs = tf.gradients(loss, [network.var[v] for v in vs])
for i in xrange(len(gs)):
if self.max_grad_norm > 0.:
gs[i] = tf.clip_by_norm(gs[i], self.max_grad_norm)
gs[i] /= self.n_threads
return zip(gs, map(global_network.var.get, vs))
开发者ID:rhaps0dy,项目名称:deep-rl-tensorflow,代码行数:8,代码来源:async_agent.py
示例20: testClipByNormClipped
def testClipByNormClipped(self):
# Norm clipping when clip_norm < 5
with self.test_session():
x = tf.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3])
# Norm of x = sqrt(3^2 + 4^2) = 5
np_ans = [[-2.4, 0.0, 0.0],
[3.2, 0.0, 0.0]]
clip_norm = 4.0
ans = tf.clip_by_norm(x, clip_norm)
tf_ans = ans.eval()
clip_tensor = tf.constant(4.0)
ans = tf.clip_by_norm(x, clip_norm)
tf_ans_tensor = ans.eval()
self.assertAllClose(np_ans, tf_ans)
self.assertAllClose(np_ans, tf_ans_tensor)
开发者ID:JamesFysh,项目名称:tensorflow,代码行数:17,代码来源:clip_ops_test.py
注:本文中的tensorflow.clip_by_norm函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论