• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python tensorflow.stop_gradient函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.stop_gradient函数的典型用法代码示例。如果您正苦于以下问题:Python stop_gradient函数的具体用法?Python stop_gradient怎么用?Python stop_gradient使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了stop_gradient函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_next_input

def get_next_input(output):
    # the next location is computed by the location network
    baseline = tf.sigmoid(tf.matmul(output,Wb_h_b) + Bb_h_b)
    baselines.append(baseline)
    # compute the next location, then impose noise
    if eyeCentered:
        # add the last sampled glimpse location
        # TODO max(-1, min(1, u + N(output, sigma) + prevLoc))
        mean_loc = tf.maximum(-1.0, tf.minimum(1.0, tf.matmul(output, Wl_h_l) + sampled_locs[-1] ))
    else:
        mean_loc = tf.matmul(output, Wl_h_l)

    # mean_loc = tf.stop_gradient(mean_loc)
    mean_locs.append(mean_loc)
    mean_locs_stopGrad.append(tf.stop_gradient(mean_loc))

    # add noise
    # sample_loc = tf.tanh(mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd))
    sample_loc = tf.maximum(-1.0, tf.minimum(1.0, mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd)))

    # don't propagate throught the locations
    # sample_loc = tf.stop_gradient(sample_loc)
    sampled_locs.append(sample_loc)
    sampled_locs_stopGrad.append(tf.stop_gradient(sample_loc))

    return get_glimpse(sample_loc)
开发者ID:QihongL,项目名称:RAM,代码行数:26,代码来源:ram.py


示例2: context_infer

def context_infer(pooled_features):
    with tf.variable_scope("fc", reuse=True):
        weights = tf.stop_gradient(tf.get_variable("weights"))
        # b = tf.stop_gradient(tf.get_variable("biases"))

    z = tf.stop_gradient(pooled_features) #Nx64
    z = tf.expand_dims(z, -1) # Nx64x1
    
    w = weights # 64x10
    w = tf.expand_dims(w, 0) # 1x64x10
    mean, variance = tf.nn.moments(w, [1], keep_dims=True) #1x1x10
    response = tf.reduce_sum(tf.mul(z, w), 1, keep_dims=True) # Nx1x10
    response_vec = tf.mul(response, w) # Nx64x10
    response_vec = tf.div(response_vec, variance) # Nx64x10
    h = tf.sub(z, response_vec) # Nx64x10

    weights_initializer = tf.truncated_normal_initializer(
        stddev=FC_WEIGHT_STDDEV)
    with tf.variable_scope("context", reuse=True):
        context_weights = tf.stop_gradient(tf.get_variable("weights"))
        biases = tf.stop_gradient(tf.get_variable("biases"))
    context_weights = tf.expand_dims(context_weights, 0)
    biases = tf.expand_dims(biases, 0)
    scores = tf.reduce_sum(tf.mul(h, context_weights), 1) + biases    
    
    # TODO how to deal with b?
    return scores
开发者ID:yihui-he,项目名称:context-cifar,代码行数:27,代码来源:cifar10.py


示例3: get_dynamic_rebar_gradient

  def get_dynamic_rebar_gradient(self):
    """Get the dynamic rebar gradient (t, eta optimized)."""
    tiled_pre_temperature = tf.tile([self.pre_temperature_variable],
                                [self.batch_size])
    temperature = tf.exp(tiled_pre_temperature)

    hardELBO, nvil_gradient, logQHard = self._create_hard_elbo()
    if self.hparams.quadratic:
      gumbel_cv, extra  = self._create_gumbel_control_variate_quadratic(logQHard, temperature=temperature)
    else:
      gumbel_cv, extra  = self._create_gumbel_control_variate(logQHard, temperature=temperature)

    f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient))

    eta = {}
    h_grads, eta_statistics = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)),
        eta)

    model_grads = U.add_grads_and_vars(f_grads, h_grads)
    total_grads = model_grads

    # Construct the variance objective
    g = U.vectorize(model_grads, set_none_to_zero=True)
    self.maintain_ema_ops.append(self.ema.apply([g]))
    gbar = 0  #tf.stop_gradient(self.ema.average(g))
    variance_objective = tf.reduce_mean(tf.square(g - gbar))

    reinf_g_t = 0
    if self.hparams.quadratic:
      for layer in xrange(self.hparams.n_layer):
        gumbel_learning_signal, _ = extra[layer]
        df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
        reinf_g_t_i, _ = self.multiply_by_eta_per_layer(
            self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * logQHard[layer])),
            eta)
        reinf_g_t += U.vectorize(reinf_g_t_i, set_none_to_zero=True)

      reparam = tf.add_n([reparam_i for _, reparam_i in extra])
    else:
      gumbel_learning_signal, reparam = extra
      df_dt = tf.gradients(gumbel_learning_signal, tiled_pre_temperature)[0]
      reinf_g_t, _ = self.multiply_by_eta_per_layer(
          self.optimizer_class.compute_gradients(tf.reduce_mean(tf.stop_gradient(df_dt) * tf.add_n(logQHard))),
          eta)
      reinf_g_t = U.vectorize(reinf_g_t, set_none_to_zero=True)

    reparam_g, _ = self.multiply_by_eta_per_layer(
        self.optimizer_class.compute_gradients(tf.reduce_mean(reparam)),
        eta)
    reparam_g = U.vectorize(reparam_g, set_none_to_zero=True)
    reparam_g_t = tf.gradients(tf.reduce_mean(2*tf.stop_gradient(g - gbar)*reparam_g), self.pre_temperature_variable)[0]

    variance_objective_grad = tf.reduce_mean(2*(g - gbar)*reinf_g_t) + reparam_g_t

    debug = { 'ELBO': hardELBO,
             'etas': eta_statistics,
             'variance_objective': variance_objective,
             }
    return total_grads, debug, variance_objective, variance_objective_grad
开发者ID:ALISCIFP,项目名称:models,代码行数:60,代码来源:rebar.py


示例4: build_loss

    def build_loss(self):
        """
        Loss function to minimize, whose gradient is a stochastic
        gradient inspired by adaptive importance sampling.

        loss = E_{p(z | x)} [ log p(z | x) - log q(z; lambda) ]

        is equivalent to minimizing

        E_{p(z | x)} [ log p(x, z) - log q(z; lambda) ]
        \approx 1/B sum_{b=1}^B
            w_norm(z^b; lambda) (log p(x, z^b) - log q(z^b; lambda))

        with gradient
        \approx - 1/B sum_{b=1}^B
            w_norm(z^b; lambda) grad_{lambda} log q(z^b; lambda)

        where + z^b ~ q(z^b; lambda)
              + w_norm(z^b; lambda) = w(z^b; lambda) / sum_{b=1}^B w(z^b; lambda)
              + w(z^b; lambda) = p(x, z^b) / q(z^b; lambda)
        """
        x = self.data.sample(self.n_data)
        z, self.samples = self.variational.sample(self.n_minibatch)

        q_log_prob = tf.zeros([self.n_minibatch], dtype=tf.float32)
        for i in range(self.variational.num_factors):
            q_log_prob += self.variational.log_prob_i(i, tf.stop_gradient(z))

        # normalized importance weights
        log_w = self.model.log_prob(x, z) - q_log_prob
        log_w_norm = log_w - log_sum_exp(log_w)
        w_norm = tf.exp(log_w_norm)

        self.loss = tf.reduce_mean(w_norm * log_w)
        return -tf.reduce_mean(q_log_prob * tf.stop_gradient(w_norm))
开发者ID:crack521,项目名称:edward,代码行数:35,代码来源:inferences.py


示例5: energy

    def energy(self, visible_state, hidden_state, scope='energy'):
        with tf.variable_scope(scope):
            visible_state = tf.stop_gradient(visible_state, name="visible_state")
            hidden_state = tf.stop_gradient(hidden_state, name="hidden_state")
            energy = -tf.reduce_mean(tf.reduce_sum(tf.multiply(tf.matmul(visible_state, self.W, name='visible_weights'),
                                                               hidden_state, name='weights_hidden')
                                                   , axis=1, name='energy_sum'), name="batch_energy_mean")

            if self.visible.use_bias:
                if self.visible.binary:
                    energy = tf.add(energy, -tf.reduce_mean(
                        tf.reduce_sum(tf.multiply(self.visible.bias, visible_state, name='visible_bias_energy'), axis=1)))
                else:
                    v = visible_state - self.visible.bias
                    energy = tf.add(energy,  tf.reduce_mean(tf.reduce_sum(tf.multiply(v, v) / 2, axis=1)))


            if self.hidden.use_bias:
                if self.hidden.binary:
                    energy = tf.add(energy, -tf.reduce_mean(
                        tf.reduce_sum(tf.multiply(self.hidden.bias, hidden_state, name='hidden_bias_energy'), axis=1)))
                else:
                    h = hidden_state - self.hidden.bias
                    energy = tf.add(energy, tf.reduce_mean(tf.reduce_sum(tf.multiply(h, h) / 2, axis=1)))

        return energy
开发者ID:aby2s,项目名称:harmonium,代码行数:26,代码来源:rbm.py


示例6: target_critic_net

  def target_critic_net(self, states, actions, for_critic_loss=False):
    """Returns the output of the target critic network.

    The target network is used to compute stable targets for training.

    Args:
      states: A [batch_size, num_state_dims] tensor representing a batch
        of states.
      actions: A [batch_size, num_action_dims] tensor representing a batch
        of actions.
    Returns:
      q values: A [batch_size] tensor of q values.
    Raises:
      ValueError: If `states` or `actions' do not have the expected dimensions.
    """
    self._validate_states(states)
    self._validate_actions(actions)
    values1 = tf.stop_gradient(
        self._target_critic_net(states, actions,
                                for_critic_loss=for_critic_loss))
    values2 = tf.stop_gradient(
        self._target_critic_net2(states, actions,
                                 for_critic_loss=for_critic_loss))
    if for_critic_loss:
      return values1, values2
    return values1
开发者ID:Exscotticus,项目名称:models,代码行数:26,代码来源:ddpg_agent.py


示例7: virtual_adversarial_loss_bidir

def virtual_adversarial_loss_bidir(logits, embedded, inputs,
                                   logits_from_embedding_fn):
  """Virtual adversarial loss for bidirectional models."""
  logits = tf.stop_gradient(logits)
  f_inputs, _ = inputs
  weights = f_inputs.eos_weights
  if FLAGS.single_label:
    indices = tf.stack([tf.range(FLAGS.batch_size), f_inputs.length - 1], 1)
    weights = tf.expand_dims(tf.gather_nd(f_inputs.eos_weights, indices), 1)
  assert weights is not None

  perturbs = [
      _mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length)
      for emb in embedded
  ]
  for _ in xrange(FLAGS.num_power_iteration):
    perturbs = [
        _scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs
    ]
    d_logits = logits_from_embedding_fn(
        [emb + d for (emb, d) in zip(embedded, perturbs)])
    kl = _kl_divergence_with_logits(logits, d_logits, weights)
    perturbs = tf.gradients(
        kl,
        perturbs,
        aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
    perturbs = [tf.stop_gradient(d) for d in perturbs]

  perturbs = [_scale_l2(d, FLAGS.perturb_norm_length) for d in perturbs]
  vadv_logits = logits_from_embedding_fn(
      [emb + d for (emb, d) in zip(embedded, perturbs)])
  return _kl_divergence_with_logits(logits, vadv_logits, weights)
开发者ID:812864539,项目名称:models,代码行数:32,代码来源:adversarial_losses.py


示例8: latent_prediction_model

def latent_prediction_model(inputs,
                            ed_attention_bias,
                            latents_discrete,
                            latents_dense,
                            hparams,
                            name="latent_prediction"):
  """Transformer-based latent prediction model.

  It is an autoregressive decoder over latents_discrete given inputs.

  Args:
    inputs: Tensor of shape [batch, length_kv, hparams.hidden_size]. Inputs to
      attend to for the decoder on latents.
    ed_attention_bias: Tensor which broadcasts with shape [batch,
      hparams.num_heads, length_q, length_kv]. Encoder-decoder attention bias.
    latents_discrete: Tensor of shape [batch, length_q, vocab_size].
      One-hot latents to compute log-probability of given inputs.
    latents_dense: Tensor of shape [batch, length_q, hparams.hidden_size].
    hparams: tf.contrib.training.HParams.
    name: string, variable scope.

  Returns:
    latents_pred: Tensor of shape [batch, length_q, hparams.hidden_size].
    latents_pred_loss: Tensor of shape [batch, length_q].
  """
  with tf.variable_scope(name):
    if hparams.mode != tf.estimator.ModeKeys.PREDICT:
      latents_pred = transformer_latent_decoder(
          tf.stop_gradient(latents_dense), inputs, ed_attention_bias,
          hparams, name)
      _, latent_pred_loss = ae_latent_softmax(
          latents_pred, tf.stop_gradient(latents_discrete), hparams)
  return latents_pred, latent_pred_loss
开发者ID:kltony,项目名称:tensor2tensor,代码行数:33,代码来源:latent_layers.py


示例9: build_graph

    def build_graph(self, state, action, futurereward, action_prob):
        logits, value = self._get_NN_prediction(state)
        value = tf.squeeze(value, [1], name='pred_value')  # (B,)
        policy = tf.nn.softmax(logits, name='policy')
        is_training = get_current_tower_context().is_training
        if not is_training:
            return
        log_probs = tf.log(policy + 1e-6)

        log_pi_a_given_s = tf.reduce_sum(
            log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
        advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')

        pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1)  # (B,)
        importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))

        policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
        xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
        value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')

        pred_reward = tf.reduce_mean(value, name='predict_reward')
        advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
        entropy_beta = tf.get_variable('entropy_beta', shape=[],
                                       initializer=tf.constant_initializer(0.01), trainable=False)
        cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
        cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
        summary.add_moving_summary(policy_loss, xentropy_loss,
                                   value_loss, pred_reward, advantage,
                                   cost, tf.reduce_mean(importance, name='importance'))
        return cost
开发者ID:tobyma,项目名称:tensorpack,代码行数:30,代码来源:train-atari.py


示例10: _create_gumbel_control_variate_quadratic

  def _create_gumbel_control_variate_quadratic(self, logQHard, temperature=None):
    '''Calculate gumbel control variate.
    '''
    if temperature is None:
      temperature = self.hparams.temperature

    h = 0
    extra = []
    for layer in xrange(self.hparams.n_layer):
      logQ, softSamples = self._recognition_network(sampler=functools.partial(
          self._random_sample_switch, switch_layer=layer, temperature=temperature))
      softELBO, _ = self._generator_network(softSamples, logQ)

      # Generate the softELBO_v (should be the same value but different grads)
      logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
          self._random_sample_switch_v, switch_layer=layer, temperature=temperature))
      softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)

      # Compute losses
      learning_signal = tf.stop_gradient(softELBO_v)

      # Control variate
      h += (tf.stop_gradient(learning_signal) * logQHard[layer]
            - softELBO + softELBO_v)

      extra.append((softELBO_v, -softELBO + softELBO_v))

    return h, extra
开发者ID:ALISCIFP,项目名称:models,代码行数:28,代码来源:rebar.py


示例11: _step

    def _step(self, J, voltage, refractory, dt):
        delta_t = tf.clip_by_value(dt - refractory, self.zero, dt)

        dV = (voltage - J) * tf.expm1(-delta_t / self.tau_rc)
        voltage += dV

        spiked = voltage > self.one
        spikes = tf.cast(spiked, J.dtype) * self.alpha

        partial_ref = -self.tau_rc * tf.log1p((self.one - voltage) /
                                              (J - self.one))
        # FastLIF version (linearly approximate spike time when calculating
        # remaining refractory period)
        # partial_ref = signals.dt * (voltage - self.one) / dV

        refractory = tf.where(spiked, self.tau_ref - partial_ref,
                              refractory - dt)

        voltage = tf.where(spiked, self.zeros,
                           tf.maximum(voltage, self.min_voltage))

        # we use stop_gradient to avoid propagating any nans (those get
        # propagated through the cond even if the spiking version isn't
        # being used at all)
        return (tf.stop_gradient(spikes), tf.stop_gradient(voltage),
                tf.stop_gradient(refractory))
开发者ID:nengo,项目名称:nengo_deeplearning,代码行数:26,代码来源:neuron_builders.py


示例12: virtual_adversarial_loss_bidir

def virtual_adversarial_loss_bidir(logits, embedded, inputs,
                                   logits_from_embedding_fn):
  """Virtual adversarial loss for bidirectional models."""
  logits = tf.stop_gradient(logits)
  f_inputs, _ = inputs
  weights = _end_of_seq_mask(f_inputs.labels)

  perturbs = [
      _mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length)
      for emb in embedded
  ]
  for _ in xrange(FLAGS.num_power_iteration):
    perturbs = [
        _scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs
    ]
    d_logits = logits_from_embedding_fn(
        [emb + d for (emb, d) in zip(embedded, perturbs)])
    kl = _kl_divergence_with_logits(logits, d_logits, weights)
    perturbs = tf.gradients(
        kl,
        perturbs,
        aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)
    perturbs = [tf.stop_gradient(d) for d in perturbs]

  perturbs = [
      _scale_l2(_mask_by_length(d, f_inputs.length), FLAGS.perturb_norm_length)
      for d in perturbs
  ]
  vadv_logits = logits_from_embedding_fn(
      [emb + d for (emb, d) in zip(embedded, perturbs)])
  return _kl_divergence_with_logits(logits, vadv_logits, weights)
开发者ID:Jmq14,项目名称:models,代码行数:31,代码来源:adversarial_losses.py


示例13: self_kl

  def self_kl(self, logits,
              sampling_dim, act_dim, act_type):
    """Calculate KL of distribution with itself.

    Used layer only for the gradients.
    """

    if self.env_spec.is_discrete(act_type):
      probs = tf.nn.softmax(logits)
      log_probs = tf.nn.log_softmax(logits)
      self_kl = tf.reduce_sum(
          tf.stop_gradient(probs) *
          (tf.stop_gradient(log_probs) - log_probs), -1)
    elif self.env_spec.is_box(act_type):
      means = logits[:, :sampling_dim / 2]
      std = logits[:, sampling_dim / 2:]
      my_means = tf.stop_gradient(means)
      my_std = tf.stop_gradient(std)
      self_kl = tf.reduce_sum(
          tf.log(std / my_std) +
          (tf.square(my_std) + tf.square(my_means - means)) /
          (2.0 * tf.square(std)) - 0.5,
          -1)
    else:
      assert False

    return self_kl
开发者ID:ALISCIFP,项目名称:models,代码行数:27,代码来源:policy.py


示例14: _logits_cumulative

  def _logits_cumulative(self, inputs, stop_gradient):
    """Evaluate logits of the cumulative densities.

    Arguments:
      inputs: The values at which to evaluate the cumulative densities, expected
        to be a `Tensor` of shape `(channels, 1, batch)`.
      stop_gradient: Boolean. Whether to add `tf.stop_gradient` calls so
        that the gradient of the output with respect to the density model
        parameters is disconnected (the gradient with respect to `inputs` is
        left untouched).

    Returns:
      A `Tensor` of the same shape as `inputs`, containing the logits of the
      cumulative densities evaluated at the given inputs.
    """
    logits = inputs

    for i in range(len(self.filters) + 1):
      matrix = self._matrices[i]
      if stop_gradient:
        matrix = tf.stop_gradient(matrix)
      logits = tf.linalg.matmul(matrix, logits)

      bias = self._biases[i]
      if stop_gradient:
        bias = tf.stop_gradient(bias)
      logits += bias

      if i < len(self._factors):
        factor = self._factors[i]
        if stop_gradient:
          factor = tf.stop_gradient(factor)
        logits += factor * tf.math.tanh(logits)

    return logits
开发者ID:michaelshiyu,项目名称:compression,代码行数:35,代码来源:entropy_models.py


示例15: _create_gumbel_control_variate

  def _create_gumbel_control_variate(self, logQHard, temperature=None):
    '''Calculate gumbel control variate.
    '''
    if temperature is None:
      temperature = self.hparams.temperature

    logQ, softSamples = self._recognition_network(sampler=functools.partial(
        self._random_sample_soft, temperature=temperature))
    softELBO, _ = self._generator_network(softSamples, logQ)
    logQ = tf.add_n(logQ)

    # Generate the softELBO_v (should be the same value but different grads)
    logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
        self._random_sample_soft_v, temperature=temperature))
    softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)
    logQ_v = tf.add_n(logQ_v)

    # Compute losses
    learning_signal = tf.stop_gradient(softELBO_v)

    # Control variate
    h = (tf.stop_gradient(learning_signal) * tf.add_n(logQHard)
          - softELBO + softELBO_v)

    extra = (softELBO_v, -softELBO + softELBO_v)

    return h, extra
开发者ID:ALISCIFP,项目名称:models,代码行数:27,代码来源:rebar.py


示例16: loop_function

  def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(
          prev, output_projection[0], output_projection[1])
    # prev= prev.get_shape().with_rank(2)[1]

    probs  = tf.log(tf.nn.softmax(prev))

    if i > 1:

        probs = tf.reshape(probs + log_beam_probs[-1],
                               [-1, beam_size * num_symbols])

    best_probs, indices = tf.nn.top_k(probs, beam_size)
    indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
    best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))

    symbols = indices % num_symbols # Which word in vocabulary.
    beam_parent = indices // num_symbols # Which hypothesis it came from.


    beam_symbols.append(symbols)
    beam_path.append(beam_parent)
    log_beam_probs.append(best_probs)

    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.

    emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
    emb_prev  = tf.reshape(emb_prev,[beam_size,embedding_size])
    # emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev
开发者ID:Vunb,项目名称:Neural_Conversation_Models,代码行数:34,代码来源:my_seq2seq.py


示例17: rnn_decoder

def rnn_decoder(cell, inputs, initial_state, embedding_size, embedding_length, sequence_length,
                name='RNNDecoder', reuse=False, use_inputs_prob=0.0, static_input=None):
    with tf.variable_scope(name, reuse=reuse):
        # print(tf.get_variable_scope().reuse, tf.get_variable_scope().name)
        with tf.name_scope("embedding"):
            batch_size = tf.shape(initial_state)[0]
            embedding_table = tf.get_variable(
                name='embedding_table',
                shape=[embedding_length, embedding_size],
                initializer=tf.truncated_normal_initializer(stddev=glorot_mul(embedding_length, embedding_size)),
            )
            # 0 is index for _SOS_ (start of sentence symbol)
            initial_embedding = tf.gather(embedding_table, tf.zeros(tf.pack([batch_size]), tf.int32))

        states = [initial_state]
        outputs = []
        outputs_softmax = []
        decoder_outputs_argmax_embedding = []

        for j in range(sequence_length):
            with tf.variable_scope(tf.get_variable_scope(), reuse=True if j > 0 else None):
                # get input :
                #   either feedback the previous decoder argmax output
                #   or use the provided input (note that you have to use the previous input (index si therefore -1)
                input = initial_embedding
                if j > 0:
                    true_input = tf.gather(embedding_table, inputs[j - 1])
                    decoded_input = decoder_outputs_argmax_embedding[-1]
                    choice = tf.floor(tf.random_uniform([1], use_inputs_prob, 1 + use_inputs_prob, tf.float32))
                    input = choice * true_input + (1.0 - choice) * decoded_input

                if static_input:
                    input = tf.concat(1, [input, static_input])

                # print(tf.get_variable_scope().reuse, tf.get_variable_scope().name)
                output, state = cell(input, states[-1])

                projection = linear(
                    input=output,
                    input_size=cell.output_size,
                    output_size=embedding_length,
                    name='output_linear_projection'
                )

                outputs.append(projection)
                states.append(state)

                softmax = tf.nn.softmax(projection, name="output_softmax")
                # we do no compute the gradient trough argmax
                output_argmax = tf.stop_gradient(tf.argmax(softmax, 1))
                # we do no compute the gradient for embeddings when used with noisy argmax outputs
                output_argmax_embedding = tf.stop_gradient(tf.gather(embedding_table, output_argmax))
                decoder_outputs_argmax_embedding.append(output_argmax_embedding)

                outputs_softmax.append(tf.expand_dims(softmax, 1))

    # remove the initial state
    states = states[1:]

    return states, outputs, outputs_softmax
开发者ID:jurcicek,项目名称:ndm,代码行数:60,代码来源:bricks.py


示例18: __init__

    def __init__(self,
                 q_t,
                 q_tp1,
                 q_tp0,
                 importance_weights,
                 rewards,
                 done_mask,
                 twin_q_t,
                 twin_q_tp1,
                 actor_loss_coeff=0.1,
                 critic_loss_coeff=1.0,
                 gamma=0.99,
                 n_step=1,
                 use_huber=False,
                 huber_threshold=1.0,
                 twin_q=False,
                 policy_delay=1):

        q_t_selected = tf.squeeze(q_t, axis=len(q_t.shape) - 1)
        if twin_q:
            twin_q_t_selected = tf.squeeze(twin_q_t, axis=len(q_t.shape) - 1)
            q_tp1 = tf.minimum(q_tp1, twin_q_tp1)

        q_tp1_best = tf.squeeze(input=q_tp1, axis=len(q_tp1.shape) - 1)
        q_tp1_best_masked = (1.0 - done_mask) * q_tp1_best

        # compute RHS of bellman equation
        q_t_selected_target = rewards + gamma**n_step * q_tp1_best_masked

        # compute the error (potentially clipped)
        if twin_q:
            td_error = q_t_selected - tf.stop_gradient(q_t_selected_target)
            twin_td_error = twin_q_t_selected - tf.stop_gradient(
                q_t_selected_target)
            self.td_error = td_error + twin_td_error
            if use_huber:
                errors = _huber_loss(td_error, huber_threshold) + _huber_loss(
                    twin_td_error, huber_threshold)
            else:
                errors = 0.5 * tf.square(td_error) + 0.5 * tf.square(
                    twin_td_error)
        else:
            self.td_error = (
                q_t_selected - tf.stop_gradient(q_t_selected_target))
            if use_huber:
                errors = _huber_loss(self.td_error, huber_threshold)
            else:
                errors = 0.5 * tf.square(self.td_error)

        self.critic_loss = critic_loss_coeff * tf.reduce_mean(
            importance_weights * errors)

        # for policy gradient, update policy net one time v.s.
        # update critic net `policy_delay` time(s)
        global_step = tf.train.get_or_create_global_step()
        policy_delay_mask = tf.to_float(
            tf.equal(tf.mod(global_step, policy_delay), 0))
        self.actor_loss = (-1.0 * actor_loss_coeff * policy_delay_mask *
                           tf.reduce_mean(q_tp0))
开发者ID:robertnishihara,项目名称:ray,代码行数:59,代码来源:ddpg_policy_graph.py


示例19: build_score_loss_and_gradients

def build_score_loss_and_gradients(inference, var_list):
  """Build loss function and gradients based on the score function
  estimator (Paisley et al., 2012).

  Computed by sampling from $q(z;\lambda)$ and evaluating the
  expectation using Monte Carlo sampling.
  """
  p_log_prob = [0.0] * inference.n_samples
  q_log_prob = [0.0] * inference.n_samples
  for s in range(inference.n_samples):
    # Form dictionary in order to replace conditioning on prior or
    # observed variable with conditioning on a specific value.
    scope = 'inference_' + str(id(inference)) + '/' + str(s)
    dict_swap = {}
    for x, qx in six.iteritems(inference.data):
      if isinstance(x, RandomVariable):
        if isinstance(qx, RandomVariable):
          qx_copy = copy(qx, scope=scope)
          dict_swap[x] = qx_copy.value()
        else:
          dict_swap[x] = qx

    for z, qz in six.iteritems(inference.latent_vars):
      # Copy q(z) to obtain new set of posterior samples.
      qz_copy = copy(qz, scope=scope)
      dict_swap[z] = qz_copy.value()
      q_log_prob[s] += tf.reduce_sum(
          inference.scale.get(z, 1.0) *
          qz_copy.log_prob(tf.stop_gradient(dict_swap[z])))

    for z in six.iterkeys(inference.latent_vars):
      z_copy = copy(z, dict_swap, scope=scope)
      p_log_prob[s] += tf.reduce_sum(
          inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))

    for x in six.iterkeys(inference.data):
      if isinstance(x, RandomVariable):
        x_copy = copy(x, dict_swap, scope=scope)
        p_log_prob[s] += tf.reduce_sum(
            inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))

  p_log_prob = tf.stack(p_log_prob)
  q_log_prob = tf.stack(q_log_prob)

  if inference.logging:
    summary_key = 'summaries_' + str(id(inference))
    tf.summary.scalar("loss/p_log_prob", tf.reduce_mean(p_log_prob),
                      collections=[summary_key])
    tf.summary.scalar("loss/q_log_prob", tf.reduce_mean(q_log_prob),
                      collections=[summary_key])

  losses = p_log_prob - q_log_prob
  loss = -tf.reduce_mean(losses)

  grads = tf.gradients(
      -tf.reduce_mean(q_log_prob * tf.stop_gradient(losses)),
      var_list)
  grads_and_vars = list(zip(grads, var_list))
  return loss, grads_and_vars
开发者ID:ekostem,项目名称:edward,代码行数:59,代码来源:klqp.py


示例20: get_muprop_gradient

  def get_muprop_gradient(self):
    """
    random sample function that actually returns mean
    new forward pass that returns logQ as a list

    can get x_i from samples
    """

    # Hard loss
    logQHard, hardSamples = self._recognition_network()
    hardELBO, reinforce_model_grad = self._generator_network(hardSamples, logQHard)

    # Soft loss
    logQ, muSamples = self._recognition_network(sampler=self._mean_sample)
    muELBO, _ = self._generator_network(muSamples, logQ)

    # Compute gradients
    muELBOGrads = tf.gradients(tf.reduce_sum(muELBO),
                               [ muSamples[i]['activation'] for
                                i in xrange(self.hparams.n_layer) ])

    # Compute MuProp gradient estimates
    learning_signal = hardELBO
    optimizerLoss = 0.0
    learning_signals = []
    for i in xrange(self.hparams.n_layer):
      dfDiff = tf.reduce_sum(
          muELBOGrads[i] * (hardSamples[i]['activation'] -
                            muSamples[i]['activation']),
          axis=1)
      dfMu = tf.reduce_sum(
          tf.stop_gradient(muELBOGrads[i]) *
          tf.nn.sigmoid(hardSamples[i]['log_param']),
          axis=1)

      scaling_baseline_0 = self._create_eta(collection='BASELINE')
      scaling_baseline_1 = self._create_eta(collection='BASELINE')
      learning_signals.append(learning_signal - scaling_baseline_0 * muELBO - scaling_baseline_1 * dfDiff - self._create_baseline())
      self.baseline_loss.append(tf.square(learning_signals[i]))

      optimizerLoss += (
          logQHard[i] * tf.stop_gradient(learning_signals[i]) +
          tf.stop_gradient(scaling_baseline_1) * dfMu)
    optimizerLoss += reinforce_model_grad
    optimizerLoss *= -1

    optimizerLoss = tf.reduce_mean(optimizerLoss)

    muprop_gradient = self.optimizer_class.compute_gradients(optimizerLoss)
    debug = {
        'ELBO': hardELBO,
        'muELBO': muELBO,
    }

    debug.update(dict([
        ('RMS learning signal layer %d' % i, U.rms(learning_signal))
        for (i, learning_signal) in enumerate(learning_signals)]))

    return muprop_gradient, debug
开发者ID:ALISCIFP,项目名称:models,代码行数:59,代码来源:rebar.py



注:本文中的tensorflow.stop_gradient函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tensorflow.strided_slice函数代码示例发布时间:2022-05-27
下一篇:
Python tensorflow.stack函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap