• 设为首页
  • 点击收藏
  • 手机版
  • 关注官方公众号

Python math_ops.pow函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.python.ops.math_ops.pow函数的典型用法代码示例。如果您正苦于以下问题:Python pow函数的具体用法?Python pow怎么用?Python pow使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


示例1: _compute_power_svd

  def _compute_power_svd(self, var, mat_g, mat_g_size, alpha, mat_h_slot_name):
    """Computes mat_h = mat_g^alpha using svd. mat_g is a symmetric PSD matrix.

      var: the variable we are updating.
      mat_g: the symmetric PSD matrix whose power it to be computed
      mat_g_size: size of mat_g
      alpha: a real number
      mat_h_slot_name: name of slot to store the power, if needed.

      mat_h = mat_g^alpha

    Stores mat_h in the appropriate slot, if it exists.
    Note that mat_g is PSD. So we could use linalg_ops.self_adjoint_eig.
    if mat_g_size == 1:
      mat_h = math_ops.pow(mat_g + self._epsilon, alpha)
      damping = self._epsilon * linalg_ops.eye(math_ops.to_int32(mat_g_size))
      diag_d, mat_u, mat_v = linalg_ops.svd(mat_g + damping, full_matrices=True)
      mat_h = math_ops.matmul(
          mat_v * math_ops.pow(math_ops.maximum(diag_d, self._epsilon), alpha),
    if mat_h_slot_name is not None:
      return state_ops.assign(self.get_slot(var, mat_h_slot_name), mat_h)
    return mat_h

示例2: testPowNegativeExponent

  def testPowNegativeExponent(self):
    for dtype in [np.int32, np.int64]:
      with test_util.force_cpu():
        with self.assertRaisesRegexp(
            "Integers to negative integer powers are not allowed"):
          x = np.array([5, 2]).astype(dtype)
          y = np.array([-2, 3]).astype(dtype)
          self.evaluate(math_ops.pow(x, y))

      with test_util.force_cpu():
        with self.assertRaisesRegexp(
            "Integers to negative integer powers are not allowed"):
          x = np.array([5, 2]).astype(dtype)
          y = np.array([2, -3]).astype(dtype)
          self.evaluate(math_ops.pow(x, y))

      with test_util.force_cpu():
        with self.assertRaisesRegexp(
            "Integers to negative integer powers are not allowed"):
          x = np.array([5, 2]).astype(dtype)
          y = -3
          self.evaluate(math_ops.pow(x, y))

示例3: testPowNegativeExponent

  def testPowNegativeExponent(self):
    for dtype in [np.int32, np.int64]:
      with self.test_session(use_gpu=False) as sess:
        with self.assertRaisesRegexp(
            "Integers to negative integer powers are not allowed"):
          x = np.array([5, 2]).astype(dtype)
          y = np.array([-2, 3]).astype(dtype)
          sess.run(math_ops.pow(x, y))

      with self.test_session(use_gpu=False) as sess:
        with self.assertRaisesRegexp(
            "Integers to negative integer powers are not allowed"):
          x = np.array([5, 2]).astype(dtype)
          y = np.array([2, -3]).astype(dtype)
          sess.run(math_ops.pow(x, y))

      with self.test_session(use_gpu=False) as sess:
        with self.assertRaisesRegexp(
            "Integers to negative integer powers are not allowed"):
          x = np.array([5, 2]).astype(dtype)
          y = -3
          sess.run(math_ops.pow(x, y))

示例4: get_beta_accumulators

def get_beta_accumulators(opt, dtype):
  local_step = math_ops.cast(opt.iterations + 1, dtype)
  beta_1_t = math_ops.cast(opt._get_hyper("beta_1"), dtype)
  beta_1_power = math_ops.pow(beta_1_t, local_step)
  beta_2_t = math_ops.cast(opt._get_hyper("beta_2"), dtype)
  beta_2_power = math_ops.pow(beta_2_t, local_step)
  return (beta_1_power, beta_2_power)

示例5: _resource_apply_sparse

  def _resource_apply_sparse(self, grad, var, indices):
    var_dtype = var.dtype.base_dtype
    lr_t = self._decayed_lr(var_dtype)
    beta_1_t = self._get_hyper('beta_1', var_dtype)
    beta_2_t = self._get_hyper('beta_2', var_dtype)
    local_step = math_ops.cast(self.iterations + 1, var_dtype)
    beta_1_power = math_ops.pow(beta_1_t, local_step)
    beta_2_power = math_ops.pow(beta_2_t, local_step)
    epsilon_t = self._get_hyper('epsilon', var_dtype)
    lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power))

    # m_t = beta1 * m + (1 - beta1) * g_t
    m = self.get_slot(var, 'm')
    m_scaled_g_values = grad * (1 - beta_1_t)
    m_t = state_ops.assign(m, m * beta_1_t, use_locking=self._use_locking)
    with ops.control_dependencies([m_t]):
      m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
      # m_bar = (1 - beta1) * g_t + beta1 * m_t
      m_bar = m_scaled_g_values + beta_1_t * array_ops.gather(m_t, indices)

    # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
    v = self.get_slot(var, 'v')
    v_scaled_g_values = (grad * grad) * (1 - beta_2_t)
    v_t = state_ops.assign(v, v * beta_2_t, use_locking=self._use_locking)
    with ops.control_dependencies([v_t]):
      v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)

    v_t_slice = array_ops.gather(v_t, indices)
    v_sqrt = math_ops.sqrt(v_t_slice)
    var_update = self._resource_scatter_add(var, indices,
                                            -lr * m_bar / (v_sqrt + epsilon_t))
    return control_flow_ops.group(*[var_update, m_bar, v_t])

示例6: _phi

def _phi(r, order):
  """Coordinate-wise nonlinearity used to define the order of the interpolation.

  See https://en.wikipedia.org/wiki/Polyharmonic_spline for the definition.

    r: input op
    order: interpolation order

    phi_k evaluated coordinate-wise on r, for k = r

  # using EPSILON prevents log(0), sqrt0), etc.
  # sqrt(0) is well-defined, but its gradient is not
  with ops.name_scope('phi'):
    if order == 1:
      r = math_ops.maximum(r, EPSILON)
      r = math_ops.sqrt(r)
      return r
    elif order == 2:
      return 0.5 * r * math_ops.log(math_ops.maximum(r, EPSILON))
    elif order == 4:
      return 0.5 * math_ops.square(r) * math_ops.log(
          math_ops.maximum(r, EPSILON))
    elif order % 2 == 0:
      r = math_ops.maximum(r, EPSILON)
      return 0.5 * math_ops.pow(r, 0.5 * order) * math_ops.log(r)
      r = math_ops.maximum(r, EPSILON)
      return math_ops.pow(r, 0.5 * order)

示例7: _SparseUpdate

def _SparseUpdate(variable, gradients, accum, linear, base_lr,
                  lr_power, l1, l2):
  """Sparse Update "variable", "accum", "linear" based on sparse "gradients".

  See the description in _Update.

    variable: A Variable.
    gradients: A Sparse Tensor
    accum: A Variable containing the sum of the squares of gradients.
    linear: A Variable containing approximation info.
    base_lr: A constant represents base learning rate.
    lr_power: A constant is used to adjust learning rate.
    l1: A constant represents l1 regularization strength.
    l2: A constant represents l2 regularization strength.

    A group op including three ScatterUpdate ops:
      1. ScatterUpdate for "accum"
      2. ScatterUpdate for "linear"
      3. ScatterUpdate for "variable"
  assert isinstance(gradients, ops.IndexedSlices)
  with ops.name_scope("sparse_update_" + variable.op.name) as scope:
    dtype = variable.dtype.base_dtype
    base_lr = ops.convert_to_tensor(base_lr, dtype=dtype)
    lr_power = ops.convert_to_tensor(lr_power, dtype=dtype)
    l1 = ops.convert_to_tensor(l1, dtype=dtype)
    l2 = ops.convert_to_tensor(l2, dtype=dtype)

    # Compute the new value for the accumulator
    previous_accum = array_ops.gather(accum, gradients.indices)
    sqr_grad = gradients.values * gradients.values
    accum_updated = sqr_grad + previous_accum

    # Compute the new linear
    neg_lr_power = math_ops.neg(lr_power)
    sigma = math_ops.pow(accum_updated, neg_lr_power) - math_ops.pow(
        previous_accum, neg_lr_power)
    sigma /= base_lr
    variable_slice = array_ops.gather(variable, gradients.indices)
    proximal_adjust = sigma * variable_slice
    linear_slice = array_ops.gather(linear, gradients.indices)
    linear_updated = linear_slice + gradients.values - proximal_adjust

    # Compute the new "variable"
    variable_updated = _Compute(accum_updated, linear_updated, base_lr,
                                lr_power, l1, l2)

    with ops.control_dependencies([sigma]):
      accum_update_op = state_ops.scatter_update(accum, gradients.indices,
    linear_update_op = state_ops.scatter_update(linear, gradients.indices,
    variable_update_op = state_ops.scatter_update(variable, gradients.indices,
    group_op = control_flow_ops.group(linear_update_op, accum_update_op,
                                      variable_update_op, name=scope)
    return group_op

示例8: Moment

def Moment(k, tensor, standardize=False, reduction_indices=None, mask=None):
  """Compute the k-th central moment of a tensor, possibly standardized.

    k: Which moment to compute. 1 = mean, 2 = variance, etc.
    tensor: Input tensor.
    standardize: If True, returns the standardized moment, i.e. the central
      moment divided by the n-th power of the standard deviation.
    reduction_indices: Axes to reduce across. If None, reduce to a scalar.
    mask: Mask to apply to tensor.

    The mean and the requested moment.
  warnings.warn("Moment is deprecated. "
                "Will be removed in DeepChem 1.4.", DeprecationWarning)
  if reduction_indices is not None:
    reduction_indices = np.atleast_1d(reduction_indices).tolist()

  # get the divisor
  if mask is not None:
    tensor = Mask(tensor, mask)
    ones = tf.constant(1, dtype=tf.float32, shape=tensor.get_shape())
    divisor = tf.reduce_sum(
        Mask(ones, mask), axis=reduction_indices, keep_dims=True)
  elif reduction_indices is None:
    divisor = tf.constant(np.prod(tensor.get_shape().as_list()), tensor.dtype)
    divisor = 1.0
    for i in range(len(tensor.get_shape())):
      if i in reduction_indices:
        divisor *= tensor.get_shape()[i].value
    divisor = tf.constant(divisor, tensor.dtype)

  # compute the requested central moment
  # note that mean is a raw moment, not a central moment
  mean = tf.math.divide(
      tf.reduce_sum(tensor, axis=reduction_indices, keep_dims=True), divisor)
  delta = tensor - mean
  if mask is not None:
    delta = Mask(delta, mask)
  moment = tf.math.divide(
          math_ops.pow(delta, k), axis=reduction_indices, keep_dims=True),
  moment = tf.squeeze(moment, reduction_indices)
  if standardize:
    moment = tf.multiply(
            tf.rsqrt(Moment(2, tensor, reduction_indices=reduction_indices)[1]),

  return tf.squeeze(mean, reduction_indices), moment

示例9: _Update

def _Update(variable, gradients, accum, linear, base_lr, lr_power, l1, l2):
  """Update "variable", "accum", "linear" based on "gradients".

  Some notations here: "variable" as W, "accum" as N, "linear" as Z,
                       "gradients" as G, N(t) means "accum" at t-step.
  Assuming lr_power = -0.5 which means using adagrad learning rate.
  "accum" updates as: N = N + G^2
  "linear" updates as: Z = Z + G - W * (sqrt(N(t)) - sqrt(N(t-1)))/base_lr
  REQUIRES: Dimensionality of variable, gradients, accum and linear
            must be same.

    variable: A Variable.
    gradients: A Tensor of same shape as 'variable'.
    accum: A Variable containing the sum of the squares of gradients.
    linear: A Variable containing approximation info.
    base_lr: A constant represents base learning rate.
    lr_power: A constant is used to adjust learning rate.
    l1: A constant represents l1 regularization strength.
    l2: A constant represents l2 regularization strength.

    A group op including three Assign ops:
      1. Assign for "accum"
      2. Assign for "linear"
      3. Assign for "variable"
  dtype = variable.dtype.base_dtype
  base_lr = ops.convert_to_tensor(base_lr, dtype=dtype)
  lr_power = ops.convert_to_tensor(lr_power, dtype=dtype)
  l1 = ops.convert_to_tensor(l1, dtype=dtype)
  l2 = ops.convert_to_tensor(l2, dtype=dtype)
  # Compute the new accumulator
  sqr_grad = math_ops.square(gradients)
  accum_updated = sqr_grad + accum
  # Compute the new linear
  neg_lr_power = math_ops.neg(lr_power)
  sigma = math_ops.pow(accum_updated, neg_lr_power) - math_ops.pow(
      accum, neg_lr_power)
  sigma /= base_lr
  proximal_adjust = sigma * variable
  linear_updated = linear + gradients - proximal_adjust
  # Compute the "variable"
  variable_updated = _Compute(accum_updated, linear_updated, base_lr,
                              lr_power, l1, l2)

  with ops.control_dependencies([sigma]):
    accum_update_op = state_ops.assign(accum, accum_updated)
  linear_update_op = state_ops.assign(linear, linear_updated)
  variable_update_op = state_ops.assign(variable, variable_updated)
  group_op = control_flow_ops.group(linear_update_op, accum_update_op,
  return group_op

示例10: _prepare

 def _prepare(self, var_list):
   var_dtype = var_list[0].dtype.base_dtype
   beta_1_t = self._get_hyper('beta_1', var_dtype)
   local_step = math_ops.cast(self.iterations + 1, var_dtype)
   decay_base = math_ops.cast(0.96, var_dtype)
   self.m_cache_t = beta_1_t * (
       1. - 0.5 * (math_ops.pow(decay_base, self._initial_decay * local_step)))
   self.m_cache_t_1 = beta_1_t * (
       1. - 0.5 *
       (math_ops.pow(decay_base, self._initial_decay * (local_step + 1))))
   m_schedule_new = self._m_cache * self.m_cache_t
   self.m_schedule_new = state_ops.assign(
       self._m_cache, m_schedule_new, use_locking=self._use_locking)
   self.m_schedule_next = self.m_schedule_new * self.m_cache_t_1

示例11: decayed_lr

  def decayed_lr(learning_rate, global_step, decay_steps, initial_variance,
                 variance_decay, num_periods, alpha, beta, name):
    """Helper to recompute learning rate; most helpful in eager-mode."""
    with ops.name_scope(name, "NoisyLinearCosineDecay",
                        [learning_rate, global_step]) as name:
      learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
      dtype = learning_rate.dtype
      decay_steps = math_ops.cast(decay_steps, dtype)
      initial_variance = math_ops.cast(initial_variance, dtype)
      variance_decay = math_ops.cast(variance_decay, dtype)
      num_periods = math_ops.cast(num_periods, dtype)
      alpha = math_ops.cast(alpha, dtype)
      beta = math_ops.cast(beta, dtype)

      global_step_recomp = math_ops.cast(global_step, dtype)
      global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
      linear_decayed = (decay_steps - global_step_recomp) / decay_steps
      variance = initial_variance / (
          math_ops.pow(1.0 + global_step_recomp, variance_decay))
      std = math_ops.sqrt(variance)
      noisy_linear_decayed = (
          linear_decayed + random_ops.random_normal(
              linear_decayed.shape, stddev=std))

      completed_fraction = global_step_recomp / decay_steps
      fraction = 2.0 * num_periods * completed_fraction
      cosine_decayed = 0.5 * (
          1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
      noisy_linear_cosine_decayed = (
          (alpha + noisy_linear_decayed) * cosine_decayed + beta)

      return math_ops.multiply(
          learning_rate, noisy_linear_cosine_decayed, name=name)

示例12: _resource_apply_sparse

  def _resource_apply_sparse(self, grad, var, indices):
    var_dtype = var.dtype.base_dtype
    lr_t = self._decayed_lr(var_dtype)

    beta_1_t = self._get_hyper('beta_1', var_dtype)
    beta_2_t = self._get_hyper('beta_2', var_dtype)
    local_step = math_ops.cast(self.iterations + 1, var_dtype)
    beta_1_power = math_ops.pow(beta_1_t, local_step)
    epsilon_t = self._get_hyper('epsilon', var_dtype)

    # m_t = beta1 * m + (1 - beta1) * g_t
    m = self.get_slot(var, 'm')
    m_slice = array_ops.gather(m, indices)
    m_t_slice = m_slice * beta_1_t + grad * (1 - beta_1_t)
    with ops.control_dependencies([m_t_slice]):
      m_t = self._resource_scatter_update(m, indices, m_t_slice)

    # u_t = max(beta2 * u, abs(g_t))
    v = self.get_slot(var, 'v')
    v_slice = array_ops.gather(v, indices)
    v_t_slice = math_ops.maximum(v_slice * beta_2_t, math_ops.abs(grad))
    with ops.control_dependencies([v_t_slice]):
      v_t = self._resource_scatter_update(v, indices, v_t_slice)
    # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t
    var_slice = -lr_t / (1 - beta_1_power) * (
        m_t_slice / (v_t_slice + epsilon_t))
    with ops.control_dependencies([var_slice]):
      var_update = self._resource_scatter_add(var, indices, var_slice)
    return control_flow_ops.group(*[var_update, m_t, v_t])

示例13: exponential_decay

def exponential_decay(learning_rate, global_step, decay_steps, decay_rate,
                      staircase=False, name=None):
  """Applies exponential decay to the learning rate.

  When training a model, it is often recommended to lower the learning rate as
  the training progresses.  This function applies an exponential decay function
  to a provided initial learning rate.  It requires a `global_step` value to
  compute the decayed learning rate.  You can just pass a TensorFlow variable
  that you increment at each training step.

  The function returns the decayed learning rate.  It is computed as:

  decayed_learning_rate = learning_rate *
                          decay_rate ^ (global_step / decay_steps)

  If the argument `staircase` is `True`, then `global_step /decay_steps` is an
  integer division and the decayed learning rate follows a staircase function.

  Example: decay every 100000 steps with a base of 0.96:

  global_step = tf.Variable(0, trainable=False)
  starter_learning_rate = 0.1
  learning_rate = tf.exponential_decay(starter_learning_rate, global_step,
                                       100000, 0.96, staircase=True)
  optimizer = tf.GradientDescent(learning_rate)
  # Passing global_step to minimize() will increment it at each step.
  optimizer.minimize(...my loss..., global_step=global_step)

    learning_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The initial learning rate.
    global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
      Global step to use for the decay computation.  Must not be negative.
    decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
      Must be positive.  See the decay computation above.
    decay_rate: A scalar `float32` or `float64` `Tensor` or a
      Python number.  The decay rate.
    staircase: Boolean.  It `True` decay the learning rate at discrete intervals.
    name: string.  Optional name of the operation.  Defaults to 'ExponentialDecay'

    A scalar `Tensor` of the same type as `learning_rate`.  The decayed
    learning rate.
  with ops.op_scope([learning_rate, global_step, decay_steps, decay_rate],
                   name, "ExponentialDecay") as name:
    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
    dtype = learning_rate.dtype
    global_step = math_ops.cast(global_step, dtype)
    decay_steps = math_ops.cast(decay_steps, dtype)
    decay_rate = math_ops.cast(decay_rate, dtype)
    p = global_step / decay_steps
    if staircase:
      p = math_ops.floor(p)
    return math_ops.mul(learning_rate, math_ops.pow(decay_rate, p), name=name)

示例14: dropout_selu_impl

    def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name):
        keep_prob = 1.0 - rate
        x = ops.convert_to_tensor(x, name="x")
        if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
            raise ValueError("keep_prob must be a scalar tensor or a float in the "
                                             "range (0, 1], got %g" % keep_prob)
        keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob")

        alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha")

        if tensor_util.constant_value(keep_prob) == 1:
            return x

        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
        random_tensor = keep_prob
        random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype)
        binary_tensor = math_ops.floor(random_tensor)
        ret = x * binary_tensor + alpha * (1-binary_tensor)

        a = math_ops.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * math_ops.pow(alpha-fixedPointMean,2) + fixedPointVar)))

        b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha)
        ret = a * ret + b
        return ret

示例15: __call__

  def __call__(self, step):
    with ops.name_scope(self.name, "NoisyLinearCosineDecay",
                        [self.initial_learning_rate, step]) as name:
      initial_learning_rate = ops.convert_to_tensor(
          self.initial_learning_rate, name="initial_learning_rate")
      dtype = initial_learning_rate.dtype
      decay_steps = math_ops.cast(self.decay_steps, dtype)
      initial_variance = math_ops.cast(self.initial_variance, dtype)
      variance_decay = math_ops.cast(self.variance_decay, dtype)
      num_periods = math_ops.cast(self.num_periods, dtype)
      alpha = math_ops.cast(self.alpha, dtype)
      beta = math_ops.cast(self.beta, dtype)

      global_step_recomp = math_ops.cast(step, dtype)
      global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
      linear_decayed = (decay_steps - global_step_recomp) / decay_steps
      variance = initial_variance / (
          math_ops.pow(1.0 + global_step_recomp, variance_decay))
      std = math_ops.sqrt(variance)
      noisy_linear_decayed = (
          linear_decayed + random_ops.random_normal(
              linear_decayed.shape, stddev=std))

      completed_fraction = global_step_recomp / decay_steps
      fraction = 2.0 * num_periods * completed_fraction
      cosine_decayed = 0.5 * (
          1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
      noisy_linear_cosine_decayed = (
          (alpha + noisy_linear_decayed) * cosine_decayed + beta)

      return math_ops.multiply(
          initial_learning_rate, noisy_linear_cosine_decayed, name=name)

示例16: test_zero_grad_tf_gradients

  def test_zero_grad_tf_gradients(self):
    if context.executing_eagerly():
      self.skipTest("tf.gradients not supported in eager.")

    x = constant_op.constant([-1., 0., 1.])
    g = self.evaluate(gradients.gradients(math_ops.pow(x, 2), x)[0])
    self.assertAllClose([-2., 0., 2.], g)

示例17: _prob

 def _prob(self, x):
   y = (x - self.mu) / self.sigma
   half_df = 0.5 * self.df
   return (math_ops.exp(math_ops.lgamma(0.5 + half_df) -
                        math_ops.lgamma(half_df)) /
           (math_ops.sqrt(self.df) * math.sqrt(math.pi) * self.sigma) *
           math_ops.pow(1. + math_ops.square(y) / self.df, -(0.5 + half_df)))

示例18: _setup_sparsity

  def _setup_sparsity(self):
    begin_step = self._spec.sparsity_function_begin_step
    end_step = self._spec.sparsity_function_end_step
    initial_sparsity = self._spec.initial_sparsity
    target_sparsity = self._spec.target_sparsity
    exponent = self._spec.sparsity_function_exponent

    if begin_step >= end_step:
      raise ValueError(
          'Pruning must begin before it can end. begin_step=%d, end_step=%d' %
          (begin_step, end_step))

    with ops.name_scope(self._spec.name):
      p = math_ops.minimum(1.0,
                                   math_ops.cast(self._global_step - begin_step,
                                   end_step - begin_step)))
      sparsity = math_ops.add(
          math_ops.multiply(initial_sparsity - target_sparsity,
                            math_ops.pow(1 - p, exponent)),

    return sparsity

示例19: test_zero_grad_tape

 def test_zero_grad_tape(self):
   with execution_callbacks.errstate(inf_or_nan=RAISE):
     x = constant_op.constant([-1, 0., 1.])
     with backprop.GradientTape() as tape:
       g = tape.gradient(math_ops.pow(x, 2), x)
     g = self.evaluate(g)
     self.assertAllClose([-2., 0., 2.], g)

示例20: get_updates

  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)
    self.updates = []

    with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
      t = math_ops.cast(self.iterations, K.floatx())

    # Due to the recommendations in [2], i.e. warming momentum schedule
    momentum_cache_t = self.beta_1 * (
        1. - 0.5 *
        (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
    momentum_cache_t_1 = self.beta_1 * (
        1. - 0.5 *
        (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
    m_schedule_new = self.m_schedule * momentum_cache_t
    m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
    self.updates.append((self.m_schedule, m_schedule_new))

    shapes = [K.int_shape(p) for p in params]
    ms = [K.zeros(shape) for shape in shapes]
    vs = [K.zeros(shape) for shape in shapes]

    self.weights = [self.iterations, self.m_schedule] + ms + vs

    for p, g, m, v in zip(params, grads, ms, vs):
      # the following equations given in [1]
      g_prime = g / (1. - m_schedule_new)
      m_t = self.beta_1 * m + (1. - self.beta_1) * g
      m_t_prime = m_t / (1. - m_schedule_next)
      v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
      v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
      m_t_bar = (1. -
                 momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime

      self.updates.append(state_ops.assign(m, m_t))
      self.updates.append(state_ops.assign(v, v_t))

      p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon)
      new_p = p_t

      # Apply constraints.
      if getattr(p, 'constraint', None) is not None:
        new_p = p.constraint(new_p)

      self.updates.append(state_ops.assign(p, new_p))
    return self.updates









Python math_ops.range函数代码示例发布时间:2022-05-27
Python math_ops.polygamma函数代码示例发布时间:2022-05-27





在线客服(服务时间 9:00~18:00)


Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap