本文整理汇总了Python中tensorflow.stop_gradient函数的典型用法代码示例。如果您正苦于以下问题:Python stop_gradient函数的具体用法?Python stop_gradient怎么用?Python stop_gradient使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了stop_gradient函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_next_input
def get_next_input(output):
# the next location is computed by the location network
baseline = tf.sigmoid(tf.matmul(output,Wb_h_b) + Bb_h_b)
baselines.append(baseline)
# compute the next location, then impose noise
if eyeCentered:
# add the last sampled glimpse location
# TODO max(-1, min(1, u + N(output, sigma) + prevLoc))
mean_loc = tf.maximum(-1.0, tf.minimum(1.0, tf.matmul(output, Wl_h_l) + sampled_locs[-1] ))
else:
mean_loc = tf.matmul(output, Wl_h_l)
# mean_loc = tf.stop_gradient(mean_loc)
mean_locs.append(mean_loc)
mean_locs_stopGrad.append(tf.stop_gradient(mean_loc))
# add noise
# sample_loc = tf.tanh(mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd))
sample_loc = tf.maximum(-1.0, tf.minimum(1.0, mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd)))
# don't propagate throught the locations
# sample_loc = tf.stop_gradient(sample_loc)
sampled_locs.append(sample_loc)
sampled_locs_stopGrad.append(tf.stop_gradient(sample_loc))
return get_glimpse(sample_loc)
开发者ID:QihongL,项目名称:RAM,代码行数:26,代码来源:ram.py
示例2: context_infer
def context_infer(pooled_features):
with tf.variable_scope("fc", reuse=True):
weights = tf.stop_gradient(tf.get_variable("weights"))
# b = tf.stop_gradient(tf.get_variable("biases"))
z = tf.stop_gradient(pooled_features) #Nx64
z = tf.expand_dims(z, -1) # Nx64x1
w = weights # 64x10
w = tf.expand_dims(w, 0) # 1x64x10
mean, variance = tf.nn.moments(w, [1], keep_dims=True) #1x1x10
response = tf.reduce_sum(tf.mul(z, w), 1, keep_dims=True) # Nx1x10
response_vec = tf.mul(response, w) # Nx64x10
response_vec = tf.div(response_vec, variance) # Nx64x10
h = tf.sub(z, response_vec) # Nx64x10
weights_initializer = tf.truncated_normal_initializer(
stddev=FC_WEIGHT_STDDEV)
with tf.variable_scope("context", reuse=True):
context_weights = tf.stop_gradient(tf.get_variable("weights"))
biases = tf.stop_gradient(tf.get_variable("biases"))
context_weights = tf.expand_dims(context_weights, 0)
biases = tf.expand_dims(biases, 0)
scores = tf.reduce_sum(tf.mul(h, context_weights), 1) + biases
# TODO how to deal with b?
return scores
开发者ID:yihui-he,项目名称:context-cifar,代码行数:27,代码来源:cifar10.py
示例3: get_dynamic_rebar_gradient
def get_dynamic_rebar_gradient(self):
"""Get the dynamic rebar gradient (t, eta optimized)."""
tiled_pre_temperature = tf.tile([self.pre_temperature_variable],
[self.batch_size])
temperature = tf.exp(tiled_pre_temperature)
hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
if self.hparams.quadratic:
gumbel_cv, extra = self._create_gumbel_control_variate_quadratic(logQHard, temperature=temperature)
else:
gumbel_cv, extra = self._create_gumbel_control_variate(logQHard, temperature=temperature)
f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))
eta = {}
h_grads, eta_statistics = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
eta)
model_grads = U.add_grads_and_vars(f_grads, h_grads)
total_grads = model_grads
# Construct the variance objective
g = U.vectorize(model_grads, set_none_to_zero=True)
self.maintain_ema_ops.append(self.ema.apply([g]))
gbar = 0 #tf.stop_gradient(self.ema.average(g))
variance_objective = tf.reduce_mean(tf.square(g - gbar))
reinf_g_t = 0
if self.hparams.quadratic:
for layer in xrange(self.hparams.n_layer):
gumbel_learning_signal, _ = extra[layer]
df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
reinf_g_t_i, _ = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * logQHard[layer])),
eta)
reinf_g_t += U.vectorize(reinf_g_t_i, set_none_to_zero=True)
reparam = tf.add_n([reparam_i for _, reparam_i in extra])
else:
gumbel_learning_signal, reparam = extra
df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
reinf_g_t, _ = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * tf.add_n(logQHard))),
eta)
reinf_g_t = U.vectorize(reinf_g_t, set_none_to_zero=True)
reparam_g, _ = self.multiply_by_eta_per_layer(
self.optimizer_class.compute_gradients(tf.reduce_mean(reparam)),
eta)
reparam_g = U.vectorize(reparam_g, set_none_to_zero=True)
reparam_g_t = tf.gradients(tf.reduce_mean(2*tf.stop_gradient(g - gbar)*reparam_g), self.pre_temperature_variable)[0]
variance_objective_grad = tf.reduce_mean(2*(g - gbar)*reinf_g_t) + reparam_g_t
debug = { 'ELBO': hardELBO,
'etas': eta_statistics,
'variance_objective': variance_objective,
}
return total_grads, debug, variance_objective, variance_objective_grad
开发者ID:ALISCIFP,项目名称:models,代码行数:60,代码来源:rebar.py
示例4: build_loss
def build_loss(self):
"""
Loss function to minimize, whose gradient is a stochastic
gradient inspired by adaptive importance sampling.
loss = E_{p(z | x)} [ log p(z | x) - log q(z; lambda) ]
is equivalent to minimizing
E_{p(z | x)} [ log p(x, z) - log q(z; lambda) ]
\approx 1/B sum_{b=1}^B
w_norm(z^b; lambda) (log p(x, z^b) - log q(z^b; lambda))
with gradient
\approx - 1/B sum_{b=1}^B
w_norm(z^b; lambda) grad_{lambda} log q(z^b; lambda)
where + z^b ~ q(z^b; lambda)
+ w_norm(z^b; lambda) = w(z^b; lambda) / sum_{b=1}^B w(z^b; lambda)
+ w(z^b; lambda) = p(x, z^b) / q(z^b; lambda)
"""
x = self.data.sample(self.n_data)
z, self.samples = self.variational.sample(self.n_minibatch)
q_log_prob = tf.zeros([self.n_minibatch], dtype=tf.float32)
for i in range(self.variational.num_factors):
q_log_prob += self.variational.log_prob_i(i, tf.stop_gradient(z))
# normalized importance weights
log_w = self.model.log_prob(x, z) - q_log_prob
log_w_norm = log_w - log_sum_exp(log_w)
w_norm = tf.exp(log_w_norm)
self.loss = tf.reduce_mean(w_norm * log_w)
return -tf.reduce_mean(q_log_prob * tf.stop_gradient(w_norm))
开发者ID:crack521,项目名称:edward,代码行数:35,代码来源:inferences.py
示例5: energy
def energy(self, visible_state, hidden_state, scope='energy'):
with tf.variable_scope(scope):
visible_state = tf.stop_gradient(visible_state, name="visible_state")
hidden_state = tf.stop_gradient(hidden_state, name="hidden_state")
energy = -tf.reduce_mean(tf.reduce_sum(tf.multiply(tf.matmul(visible_state, self.W, name='visible_weights'),
hidden_state, name='weights_hidden')
, axis=1, name='energy_sum'), name="batch_energy_mean")
if self.visible.use_bias:
if self.visible.binary:
energy = tf.add(energy, -tf.reduce_mean(
tf.reduce_sum(tf.multiply(self.visible.bias, visible_state, name='visible_bias_energy'), axis=1)))
else:
v = visible_state - self.visible.bias
energy = tf.add(energy, tf.reduce_mean(tf.reduce_sum(tf.multiply(v, v) / 2, axis=1)))
if self.hidden.use_bias:
if self.hidden.binary:
energy = tf.add(energy, -tf.reduce_mean(
tf.reduce_sum(tf.multiply(self.hidden.bias, hidden_state, name='hidden_bias_energy'), axis=1)))
else:
h = hidden_state - self.hidden.bias
energy = tf.add(energy, tf.reduce_mean(tf.reduce_sum(tf.multiply(h, h) / 2, axis=1)))
return energy
开发者ID:aby2s,项目名称:harmonium,代码行数:26,代码来源:rbm.py
示例6: target_critic_net
def target_critic_net(self, states, actions, for_critic_loss=False):
"""Returns the output of the target critic network.
The target network is used to compute stable targets for training.
Args:
states: A [batch_size, num_state_dims] tensor representing a batch
of states.
actions: A [batch_size, num_action_dims] tensor representing a batch
of actions.
Returns:
q values: A [batch_size] tensor of q values.
Raises:
ValueError: If `states` or `actions' do not have the expected dimensions.
"""
self._validate_states(states)
self._validate_actions(actions)
values1 = tf.stop_gradient(
self._target_critic_net(states, actions,
for_critic_loss=for_critic_loss))
values2 = tf.stop_gradient(
self._target_critic_net2(states, actions,
for_critic_loss=for_critic_loss))
if for_critic_loss:
return values1, values2
return values1
开发者ID:Exscotticus,项目名称:models,代码行数:26,代码来源:ddpg_agent.py
示例7: virtual_adversarial_loss_bidir
def virtual_adversarial_loss_bidir(logits, embedded, inputs,
logits_from_embedding_fn):
"""Virtual adversarial loss for bidirectional models."""
logits = tf.stop_gradient(logits)
f_inputs, _ = inputs
weights = f_inputs.eos_weights
if FLAGS.single_label:
indices = tf.stack([tf.range(FLAGS.batch_size), f_inputs.length - 1], 1)
weights = tf.expand_dims(tf.gather_nd(f_inputs.eos_weights, indices), 1)
assert weights is not None
perturbs = [
_mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length)
for emb in embedded
]
for _ in xrange(FLAGS.num_power_iteration):
perturbs = [
_scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs
]
d_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
kl = _kl_divergence_with_logits(logits, d_logits, weights)
perturbs = tf.gradients(
kl,
perturbs,
aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
perturbs = [tf.stop_gradient(d) for d in perturbs]
perturbs = [_scale_l2(d, FLAGS.perturb_norm_length) for d in perturbs]
vadv_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
return _kl_divergence_with_logits(logits, vadv_logits, weights)
开发者ID:812864539,项目名称:models,代码行数:32,代码来源:adversarial_losses.py
示例8: latent_prediction_model
def latent_prediction_model(inputs,
ed_attention_bias,
latents_discrete,
latents_dense,
hparams,
name="latent_prediction"):
"""Transformer-based latent prediction model.
It is an autoregressive decoder over latents_discrete given inputs.
Args:
inputs: Tensor of shape [batch, length_kv, hparams.hidden_size]. Inputs to
attend to for the decoder on latents.
ed_attention_bias: Tensor which broadcasts with shape [batch,
hparams.num_heads, length_q, length_kv]. Encoder-decoder attention bias.
latents_discrete: Tensor of shape [batch, length_q, vocab_size].
One-hot latents to compute log-probability of given inputs.
latents_dense: Tensor of shape [batch, length_q, hparams.hidden_size].
hparams: tf.contrib.training.HParams.
name: string, variable scope.
Returns:
latents_pred: Tensor of shape [batch, length_q, hparams.hidden_size].
latents_pred_loss: Tensor of shape [batch, length_q].
"""
with tf.variable_scope(name):
if hparams.mode != tf.estimator.ModeKeys.PREDICT:
latents_pred = transformer_latent_decoder(
tf.stop_gradient(latents_dense), inputs, ed_attention_bias,
hparams, name)
_, latent_pred_loss = ae_latent_softmax(
latents_pred, tf.stop_gradient(latents_discrete), hparams)
return latents_pred, latent_pred_loss
开发者ID:kltony,项目名称:tensor2tensor,代码行数:33,代码来源:latent_layers.py
示例9: build_graph
def build_graph(self, state, action, futurereward, action_prob):
logits, value = self._get_NN_prediction(state)
value = tf.squeeze(value, [1], name='pred_value') # (B,)
policy = tf.nn.softmax(logits, name='policy')
is_training = get_current_tower_context().is_training
if not is_training:
return
log_probs = tf.log(policy + 1e-6)
log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')
pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,)
importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(value, name='predict_reward')
advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False)
cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage,
cost, tf.reduce_mean(importance, name='importance'))
return cost
开发者ID:tobyma,项目名称:tensorpack,代码行数:30,代码来源:train-atari.py
示例10: _create_gumbel_control_variate_quadratic
def _create_gumbel_control_variate_quadratic(self, logQHard, temperature=None):
'''Calculate gumbel control variate.
'''
if temperature is None:
temperature = self.hparams.temperature
h = 0
extra = []
for layer in xrange(self.hparams.n_layer):
logQ, softSamples = self._recognition_network(sampler=functools.partial(
self._random_sample_switch, switch_layer=layer, temperature=temperature))
softELBO, _ = self._generator_network(softSamples, logQ)
# Generate the softELBO_v (should be the same value but different grads)
logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
self._random_sample_switch_v, switch_layer=layer, temperature=temperature))
softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)
# Compute losses
learning_signal = tf.stop_gradient(softELBO_v)
# Control variate
h += (tf.stop_gradient(learning_signal) * logQHard[layer]
- softELBO + softELBO_v)
extra.append((softELBO_v, -softELBO + softELBO_v))
return h, extra
开发者ID:ALISCIFP,项目名称:models,代码行数:28,代码来源:rebar.py
示例11: _step
def _step(self, J, voltage, refractory, dt):
delta_t = tf.clip_by_value(dt - refractory, self.zero, dt)
dV = (voltage - J) * tf.expm1(-delta_t / self.tau_rc)
voltage += dV
spiked = voltage > self.one
spikes = tf.cast(spiked, J.dtype) * self.alpha
partial_ref = -self.tau_rc * tf.log1p((self.one - voltage) /
(J - self.one))
# FastLIF version (linearly approximate spike time when calculating
# remaining refractory period)
# partial_ref = signals.dt * (voltage - self.one) / dV
refractory = tf.where(spiked, self.tau_ref - partial_ref,
refractory - dt)
voltage = tf.where(spiked, self.zeros,
tf.maximum(voltage, self.min_voltage))
# we use stop_gradient to avoid propagating any nans (those get
# propagated through the cond even if the spiking version isn't
# being used at all)
return (tf.stop_gradient(spikes), tf.stop_gradient(voltage),
tf.stop_gradient(refractory))
开发者ID:nengo,项目名称:nengo_deeplearning,代码行数:26,代码来源:neuron_builders.py
示例12: virtual_adversarial_loss_bidir
def virtual_adversarial_loss_bidir(logits, embedded, inputs,
logits_from_embedding_fn):
"""Virtual adversarial loss for bidirectional models."""
logits = tf.stop_gradient(logits)
f_inputs, _ = inputs
weights = _end_of_seq_mask(f_inputs.labels)
perturbs = [
_mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length)
for emb in embedded
]
for _ in xrange(FLAGS.num_power_iteration):
perturbs = [
_scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs
]
d_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
kl = _kl_divergence_with_logits(logits, d_logits, weights)
perturbs = tf.gradients(
kl,
perturbs,
aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
perturbs = [tf.stop_gradient(d) for d in perturbs]
perturbs = [
_scale_l2(_mask_by_length(d, f_inputs.length), FLAGS.perturb_norm_length)
for d in perturbs
]
vadv_logits = logits_from_embedding_fn(
[emb + d for (emb, d) in zip(embedded, perturbs)])
return _kl_divergence_with_logits(logits, vadv_logits, weights)
开发者ID:Jmq14,项目名称:models,代码行数:31,代码来源:adversarial_losses.py
示例13: self_kl
def self_kl(self, logits,
sampling_dim, act_dim, act_type):
"""Calculate KL of distribution with itself.
Used layer only for the gradients.
"""
if self.env_spec.is_discrete(act_type):
probs = tf.nn.softmax(logits)
log_probs = tf.nn.log_softmax(logits)
self_kl = tf.reduce_sum(
tf.stop_gradient(probs) *
(tf.stop_gradient(log_probs) - log_probs), -1)
elif self.env_spec.is_box(act_type):
means = logits[:, :sampling_dim / 2]
std = logits[:, sampling_dim / 2:]
my_means = tf.stop_gradient(means)
my_std = tf.stop_gradient(std)
self_kl = tf.reduce_sum(
tf.log(std / my_std) +
(tf.square(my_std) + tf.square(my_means - means)) /
(2.0 * tf.square(std)) - 0.5,
-1)
else:
assert False
return self_kl
开发者ID:ALISCIFP,项目名称:models,代码行数:27,代码来源:policy.py
示例14: _logits_cumulative
def _logits_cumulative(self, inputs, stop_gradient):
"""Evaluate logits of the cumulative densities.
Arguments:
inputs: The values at which to evaluate the cumulative densities, expected
to be a `Tensor` of shape `(channels, 1, batch)`.
stop_gradient: Boolean. Whether to add `tf.stop_gradient` calls so
that the gradient of the output with respect to the density model
parameters is disconnected (the gradient with respect to `inputs` is
left untouched).
Returns:
A `Tensor` of the same shape as `inputs`, containing the logits of the
cumulative densities evaluated at the given inputs.
"""
logits = inputs
for i in range(len(self.filters) + 1):
matrix = self._matrices[i]
if stop_gradient:
matrix = tf.stop_gradient(matrix)
logits = tf.linalg.matmul(matrix, logits)
bias = self._biases[i]
if stop_gradient:
bias = tf.stop_gradient(bias)
logits += bias
if i < len(self._factors):
factor = self._factors[i]
if stop_gradient:
factor = tf.stop_gradient(factor)
logits += factor * tf.math.tanh(logits)
return logits
开发者ID:michaelshiyu,项目名称:compression,代码行数:35,代码来源:entropy_models.py
示例15: _create_gumbel_control_variate
def _create_gumbel_control_variate(self, logQHard, temperature=None):
'''Calculate gumbel control variate.
'''
if temperature is None:
temperature = self.hparams.temperature
logQ, softSamples = self._recognition_network(sampler=functools.partial(
self._random_sample_soft, temperature=temperature))
softELBO, _ = self._generator_network(softSamples, logQ)
logQ = tf.add_n(logQ)
# Generate the softELBO_v (should be the same value but different grads)
logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
self._random_sample_soft_v, temperature=temperature))
softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)
logQ_v = tf.add_n(logQ_v)
# Compute losses
learning_signal = tf.stop_gradient(softELBO_v)
# Control variate
h = (tf.stop_gradient(learning_signal) * tf.add_n(logQHard)
- softELBO + softELBO_v)
extra = (softELBO_v, -softELBO + softELBO_v)
return h, extra
开发者ID:ALISCIFP,项目名称:models,代码行数:27,代码来源:rebar.py
示例16: loop_function
def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols):
if output_projection is not None:
prev = nn_ops.xw_plus_b(
prev, output_projection[0], output_projection[1])
# prev= prev.get_shape().with_rank(2)[1]
probs = tf.log(tf.nn.softmax(prev))
if i > 1:
probs = tf.reshape(probs + log_beam_probs[-1],
[-1, beam_size * num_symbols])
best_probs, indices = tf.nn.top_k(probs, beam_size)
indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))
symbols = indices % num_symbols # Which word in vocabulary.
beam_parent = indices // num_symbols # Which hypothesis it came from.
beam_symbols.append(symbols)
beam_path.append(beam_parent)
log_beam_probs.append(best_probs)
# Note that gradients will not propagate through the second parameter of
# embedding_lookup.
emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
emb_prev = tf.reshape(emb_prev,[beam_size,embedding_size])
# emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
if not update_embedding:
emb_prev = array_ops.stop_gradient(emb_prev)
return emb_prev
开发者ID:Vunb,项目名称:Neural_Conversation_Models,代码行数:34,代码来源:my_seq2seq.py
示例17: rnn_decoder
def rnn_decoder(cell, inputs, initial_state, embedding_size, embedding_length, sequence_length,
name='RNNDecoder', reuse=False, use_inputs_prob=0.0, static_input=None):
with tf.variable_scope(name, reuse=reuse):
# print(tf.get_variable_scope().reuse, tf.get_variable_scope().name)
with tf.name_scope("embedding"):
batch_size = tf.shape(initial_state)[0]
embedding_table = tf.get_variable(
name='embedding_table',
shape=[embedding_length, embedding_size],
initializer=tf.truncated_normal_initializer(stddev=glorot_mul(embedding_length, embedding_size)),
)
# 0 is index for _SOS_ (start of sentence symbol)
initial_embedding = tf.gather(embedding_table, tf.zeros(tf.pack([batch_size]), tf.int32))
states = [initial_state]
outputs = []
outputs_softmax = []
decoder_outputs_argmax_embedding = []
for j in range(sequence_length):
with tf.variable_scope(tf.get_variable_scope(), reuse=True if j > 0 else None):
# get input :
# either feedback the previous decoder argmax output
# or use the provided input (note that you have to use the previous input (index si therefore -1)
input = initial_embedding
if j > 0:
true_input = tf.gather(embedding_table, inputs[j - 1])
decoded_input = decoder_outputs_argmax_embedding[-1]
choice = tf.floor(tf.random_uniform([1], use_inputs_prob, 1 + use_inputs_prob, tf.float32))
input = choice * true_input + (1.0 - choice) * decoded_input
if static_input:
input = tf.concat(1, [input, static_input])
# print(tf.get_variable_scope().reuse, tf.get_variable_scope().name)
output, state = cell(input, states[-1])
projection = linear(
input=output,
input_size=cell.output_size,
output_size=embedding_length,
name='output_linear_projection'
)
outputs.append(projection)
states.append(state)
softmax = tf.nn.softmax(projection, name="output_softmax")
# we do no compute the gradient trough argmax
output_argmax = tf.stop_gradient(tf.argmax(softmax, 1))
# we do no compute the gradient for embeddings when used with noisy argmax outputs
output_argmax_embedding = tf.stop_gradient(tf.gather(embedding_table, output_argmax))
decoder_outputs_argmax_embedding.append(output_argmax_embedding)
outputs_softmax.append(tf.expand_dims(softmax, 1))
# remove the initial state
states = states[1:]
return states, outputs, outputs_softmax
开发者ID:jurcicek,项目名称:ndm,代码行数:60,代码来源:bricks.py
示例18: __init__
def __init__(self,
q_t,
q_tp1,
q_tp0,
importance_weights,
rewards,
done_mask,
twin_q_t,
twin_q_tp1,
actor_loss_coeff=0.1,
critic_loss_coeff=1.0,
gamma=0.99,
n_step=1,
use_huber=False,
huber_threshold=1.0,
twin_q=False,
policy_delay=1):
q_t_selected = tf.squeeze(q_t, axis=len(q_t.shape) - 1)
if twin_q:
twin_q_t_selected = tf.squeeze(twin_q_t, axis=len(q_t.shape) - 1)
q_tp1 = tf.minimum(q_tp1, twin_q_tp1)
q_tp1_best = tf.squeeze(input=q_tp1, axis=len(q_tp1.shape) - 1)
q_tp1_best_masked = (1.0 - done_mask) * q_tp1_best
# compute RHS of bellman equation
q_t_selected_target = rewards + gamma**n_step * q_tp1_best_masked
# compute the error (potentially clipped)
if twin_q:
td_error = q_t_selected - tf.stop_gradient(q_t_selected_target)
twin_td_error = twin_q_t_selected - tf.stop_gradient(
q_t_selected_target)
self.td_error = td_error + twin_td_error
if use_huber:
errors = _huber_loss(td_error, huber_threshold) + _huber_loss(
twin_td_error, huber_threshold)
else:
errors = 0.5 * tf.square(td_error) + 0.5 * tf.square(
twin_td_error)
else:
self.td_error = (
q_t_selected - tf.stop_gradient(q_t_selected_target))
if use_huber:
errors = _huber_loss(self.td_error, huber_threshold)
else:
errors = 0.5 * tf.square(self.td_error)
self.critic_loss = critic_loss_coeff * tf.reduce_mean(
importance_weights * errors)
# for policy gradient, update policy net one time v.s.
# update critic net `policy_delay` time(s)
global_step = tf.train.get_or_create_global_step()
policy_delay_mask = tf.to_float(
tf.equal(tf.mod(global_step, policy_delay), 0))
self.actor_loss = (-1.0 * actor_loss_coeff * policy_delay_mask *
tf.reduce_mean(q_tp0))
开发者ID:robertnishihara,项目名称:ray,代码行数:59,代码来源:ddpg_policy_graph.py
示例19: build_score_loss_and_gradients
def build_score_loss_and_gradients(inference, var_list):
"""Build loss function and gradients based on the score function
estimator (Paisley et al., 2012).
Computed by sampling from $q(z;\lambda)$ and evaluating the
expectation using Monte Carlo sampling.
"""
p_log_prob = [0.0] * inference.n_samples
q_log_prob = [0.0] * inference.n_samples
for s in range(inference.n_samples):
# Form dictionary in order to replace conditioning on prior or
# observed variable with conditioning on a specific value.
scope = 'inference_' + str(id(inference)) + '/' + str(s)
dict_swap = {}
for x, qx in six.iteritems(inference.data):
if isinstance(x, RandomVariable):
if isinstance(qx, RandomVariable):
qx_copy = copy(qx, scope=scope)
dict_swap[x] = qx_copy.value()
else:
dict_swap[x] = qx
for z, qz in six.iteritems(inference.latent_vars):
# Copy q(z) to obtain new set of posterior samples.
qz_copy = copy(qz, scope=scope)
dict_swap[z] = qz_copy.value()
q_log_prob[s] += tf.reduce_sum(
inference.scale.get(z, 1.0) *
qz_copy.log_prob(tf.stop_gradient(dict_swap[z])))
for z in six.iterkeys(inference.latent_vars):
z_copy = copy(z, dict_swap, scope=scope)
p_log_prob[s] += tf.reduce_sum(
inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
for x in six.iterkeys(inference.data):
if isinstance(x, RandomVariable):
x_copy = copy(x, dict_swap, scope=scope)
p_log_prob[s] += tf.reduce_sum(
inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
p_log_prob = tf.stack(p_log_prob)
q_log_prob = tf.stack(q_log_prob)
if inference.logging:
summary_key = 'summaries_' + str(id(inference))
tf.summary.scalar("loss/p_log_prob", tf.reduce_mean(p_log_prob),
collections=[summary_key])
tf.summary.scalar("loss/q_log_prob", tf.reduce_mean(q_log_prob),
collections=[summary_key])
losses = p_log_prob - q_log_prob
loss = -tf.reduce_mean(losses)
grads = tf.gradients(
-tf.reduce_mean(q_log_prob * tf.stop_gradient(losses)),
var_list)
grads_and_vars = list(zip(grads, var_list))
return loss, grads_and_vars
开发者ID:ekostem,项目名称:edward,代码行数:59,代码来源:klqp.py
示例20: get_muprop_gradient
def get_muprop_gradient(self):
"""
random sample function that actually returns mean
new forward pass that returns logQ as a list
can get x_i from samples
"""
# Hard loss
logQHard, hardSamples = self._recognition_network()
hardELBO, reinforce_model_grad = self._generator_network(hardSamples, logQHard)
# Soft loss
logQ, muSamples = self._recognition_network(sampler=self._mean_sample)
muELBO, _ = self._generator_network(muSamples, logQ)
# Compute gradients
muELBOGrads = tf.gradients(tf.reduce_sum(muELBO),
[ muSamples[i]['activation'] for
i in xrange(self.hparams.n_layer) ])
# Compute MuProp gradient estimates
learning_signal = hardELBO
optimizerLoss = 0.0
learning_signals = []
for i in xrange(self.hparams.n_layer):
dfDiff = tf.reduce_sum(
muELBOGrads[i] * (hardSamples[i]['activation'] -
muSamples[i]['activation']),
axis=1)
dfMu = tf.reduce_sum(
tf.stop_gradient(muELBOGrads[i]) *
tf.nn.sigmoid(hardSamples[i]['log_param']),
axis=1)
scaling_baseline_0 = self._create_eta(collection='BASELINE')
scaling_baseline_1 = self._create_eta(collection='BASELINE')
learning_signals.append(learning_signal - scaling_baseline_0 * muELBO - scaling_baseline_1 * dfDiff - self._create_baseline())
self.baseline_loss.append(tf.square(learning_signals[i]))
optimizerLoss += (
logQHard[i] * tf.stop_gradient(learning_signals[i]) +
tf.stop_gradient(scaling_baseline_1) * dfMu)
optimizerLoss += reinforce_model_grad
optimizerLoss *= -1
optimizerLoss = tf.reduce_mean(optimizerLoss)
muprop_gradient = self.optimizer_class.compute_gradients(optimizerLoss)
debug = {
'ELBO': hardELBO,
'muELBO': muELBO,
}
debug.update(dict([
('RMS learning signal layer %d' % i, U.rms(learning_signal))
for (i, learning_signal) in enumerate(learning_signals)]))
return muprop_gradient, debug
开发者ID:ALISCIFP,项目名称:models,代码行数:59,代码来源:rebar.py
注:本文中的tensorflow.stop_gradient函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论