本文整理汇总了Python中tensorflow.python.ops.math_ops.reciprocal函数的典型用法代码示例。如果您正苦于以下问题:Python reciprocal函数的具体用法?Python reciprocal怎么用?Python reciprocal使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了reciprocal函数的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _grad_and_hess_for_logloss
def _grad_and_hess_for_logloss(logits, labels):
# TODO(youngheek): add weights handling.
predictions = math_ops.reciprocal(math_ops.exp(-logits) + 1.0)
normalizer = math_ops.reciprocal(
math_ops.cast(array_ops.size(predictions), dtypes.float32))
gradients = (predictions - labels) * normalizer
hessians = predictions * (1.0 - predictions) * normalizer
return gradients, hessians
开发者ID:Jackiefan,项目名称:tensorflow,代码行数:8,代码来源:boosted_trees.py
示例2: _grad_and_hess_for_logloss
def _grad_and_hess_for_logloss(logits, labels):
"""A closed form gradient and hessian for logistic loss."""
# TODO(youngheek): add weights handling.
predictions = math_ops.reciprocal(math_ops.exp(-logits) + 1.0)
normalizer = math_ops.reciprocal(
math_ops.cast(array_ops.size(predictions), dtypes.float32))
labels = math_ops.cast(labels, dtypes.float32)
labels = head_lib._check_dense_labels_match_logits_and_reshape( # pylint: disable=protected-access
labels, logits, head.logits_dimension)
gradients = (predictions - labels) * normalizer
hessians = predictions * (1.0 - predictions) * normalizer
return gradients, hessians
开发者ID:Huoxubeiyin,项目名称:tensorflow,代码行数:12,代码来源:boosted_trees.py
示例3: normalize_moments
def normalize_moments(counts, mean_ss, variance_ss, shift, name=None):
"""Calculate the mean and variance of based on the sufficient statistics.
Args:
counts: A `Tensor` containing a the total count of the data (one value).
mean_ss: A `Tensor` containing the mean sufficient statistics: the (possibly
shifted) sum of the elements to average over.
variance_ss: A `Tensor` containing the variance sufficient statistics: the
(possibly shifted) squared sum of the data to compute the variance over.
shift: A `Tensor` containing the value by which the data is shifted for
numerical stability, or `None` if no shift was performed.
name: Name used to scope the operations that compute the moments.
Returns:
Two `Tensor` objects: `mean` and `variance`.
"""
with ops.name_scope(name, "normalize", [counts, mean_ss, variance_ss, shift]):
divisor = math_ops.reciprocal(counts, name="divisor")
if shift is not None:
shifted_mean = math_ops.mul(mean_ss, divisor, name="shifted_mean")
mean = math_ops.add(shifted_mean, shift, name="mean")
else: # no shift.
shifted_mean = math_ops.mul(mean_ss, divisor, name="mean")
mean = shifted_mean
variance = math_ops.sub(math_ops.mul(variance_ss, divisor),
math_ops.square(shifted_mean),
name="variance")
return (mean, variance)
开发者ID:BloodD,项目名称:tensorflow,代码行数:28,代码来源:nn_impl.py
示例4: _TanGrad
def _TanGrad(op, grad):
"""Returns grad * 1/sec^2(x)."""
x = op.inputs[0]
with ops.control_dependencies([grad]):
x = math_ops.conj(x)
secx = math_ops.reciprocal(math_ops.cos(x))
secx2 = math_ops.square(secx)
return grad * secx2
开发者ID:neuroradiology,项目名称:tensorflow,代码行数:8,代码来源:math_grad.py
示例5: _AtanGrad
def _AtanGrad(op, grad):
"""Returns grad * 1/ (1 + x^2)."""
x = op.inputs[0]
with ops.control_dependencies([grad]):
x = math_ops.conj(x)
x2 = math_ops.square(x)
one = constant_op.constant(1, dtype=grad.dtype)
inv = math_ops.reciprocal(math_ops.add(one, x2))
return grad * inv
开发者ID:neuroradiology,项目名称:tensorflow,代码行数:9,代码来源:math_grad.py
示例6: _AngleGrad
def _AngleGrad(op, grad):
"""Returns -grad / (Im(x) + iRe(x))"""
x = op.inputs[0]
with ops.control_dependencies([grad]):
re = math_ops.real(x)
im = math_ops.imag(x)
z = math_ops.reciprocal(math_ops.complex(im, re))
zero = constant_op.constant(0, dtype=grad.dtype)
complex_grad = math_ops.complex(grad, zero)
return -complex_grad * z
开发者ID:neuroradiology,项目名称:tensorflow,代码行数:10,代码来源:math_grad.py
示例7: _AcosGrad
def _AcosGrad(op, grad):
"""Returns grad * -1/sqrt(1-x^2)."""
x = op.inputs[0]
with ops.control_dependencies([grad]):
x = math_ops.conj(x)
x2 = math_ops.square(x)
one = constant_op.constant(1, dtype=grad.dtype)
den = math_ops.sqrt(math_ops.subtract(one, x2))
inv = math_ops.reciprocal(den)
return -grad * inv
开发者ID:neuroradiology,项目名称:tensorflow,代码行数:10,代码来源:math_grad.py
示例8: _BesselI1eGrad
def _BesselI1eGrad(op, grad):
"""Compute gradient of bessel_i1e(x) with respect to its argument."""
x = op.inputs[0]
y = op.outputs[0]
with ops.control_dependencies([grad]):
# For x = 0, the correct gradient is 0.5.
# However, the main branch gives NaN because of the division by x, so
# we impute the gradient manually.
# An alternative solution is to express the gradient via bessel_i0e and
# bessel_i2e, but the latter is not yet implemented in Eigen.
eps = np.finfo(x.dtype.as_numpy_dtype).eps
zeros = array_ops.zeros_like(x)
x_is_not_tiny = math_ops.abs(x) > eps
safe_x = array_ops.where(x_is_not_tiny, x, eps + zeros)
dy_dx = math_ops.bessel_i0e(safe_x) - y * (
math_ops.sign(safe_x) + math_ops.reciprocal(safe_x))
return grad * array_ops.where(x_is_not_tiny, dy_dx, 0.5 + zeros)
开发者ID:AnishShah,项目名称:tensorflow,代码行数:17,代码来源:math_grad.py
示例9: _BatchNormGrad
def _BatchNormGrad(grad_y, x, scale, epsilon, data_format):
"""Returns the gradients for the 3 inputs of BatchNorm.
Args:
grad_y: A `Tensor` of 4 dimensions for gradient for y.
x: A `Tensor` of 4 dimensions for x.
scale: A `Tensor` of 1 dimension for scaling.
epsilon: A small float number added to the variance of x.
data_format: The data format for input. Either b"NHWC" or b"NCHW".
Returns:
A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient
for x, grad_scale the gradient for scale, and grad_offset the gradient
for offset.
"""
if data_format == b"NHWC":
keep_dims = False
reduce_axis = [0, 1, 2]
else:
keep_dims = True
reduce_axis = [0, 2, 3]
shape = [1, array_ops.size(scale), 1, 1]
scale = array_ops.reshape(scale, shape)
mean_grad_y = math_ops.reduce_mean(grad_y, reduce_axis, keep_dims=keep_dims)
mean_x = math_ops.reduce_mean(x, reduce_axis, keep_dims=keep_dims)
var_x = math_ops.reduce_mean(
math_ops.squared_difference(x, array_ops.stop_gradient(mean_x)),
reduce_axis,
keep_dims=keep_dims)
grad_y_offset = grad_y - mean_grad_y
x_offset = x - mean_x
mean = math_ops.reduce_mean(
grad_y * x_offset, axis=reduce_axis, keep_dims=keep_dims)
grad_x = scale * math_ops.rsqrt(var_x + epsilon) * (
grad_y_offset - math_ops.reciprocal(var_x + epsilon) * mean * x_offset)
grad_scale = math_ops.rsqrt(var_x + epsilon) * math_ops.reduce_sum(
grad_y * x_offset, axis=reduce_axis, keep_dims=keep_dims)
if data_format == b"NCHW":
grad_scale = array_ops.squeeze(grad_scale)
grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis)
return grad_x, grad_scale, grad_offset
开发者ID:chdinh,项目名称:tensorflow,代码行数:41,代码来源:nn_grad.py
示例10: _Grad
def _Grad(op, grad):
"""A gradient function for IRFFT with the provided `rank` and `rfft_fn`."""
# Generate a simple mask like [1.0, 2.0, ..., 2.0, 1.0] for even-length FFTs
# and [1.0, 2.0, ..., 2.0] for odd-length FFTs. To reduce extra ops in the
# graph we special-case the situation where the FFT length and last
# dimension of the input are known at graph construction time.
fft_length = op.inputs[1]
is_odd = math_ops.mod(fft_length[-1], 2)
input_last_dimension = array_ops.shape(op.inputs[0])[-1]
mask = array_ops.concat(
[[1.0], 2.0 * array_ops.ones([input_last_dimension - 2 + is_odd]),
array_ops.ones([1 - is_odd])], 0)
rsize = math_ops.reciprocal(math_ops.to_float(_FFTSizeForGrad(grad, rank)))
# The gradient of IRFFT is the RFFT of the incoming gradient times a scaling
# factor and a mask. The mask scales the gradient for the Hermitian
# symmetric components of the RFFT by a factor of two, since these
# components are de-duplicated in the RFFT.
rfft = rfft_fn(grad, fft_length)
return rfft * math_ops.cast(rsize * mask, dtypes.complex64), None
开发者ID:AnishShah,项目名称:tensorflow,代码行数:21,代码来源:spectral_grad.py
示例11: _SelfAdjointEigV2Grad
def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
"""Gradient for SelfAdjointEigV2."""
e = op.outputs[0]
compute_v = op.get_attr("compute_v")
# a = op.inputs[0], which satisfies
# a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
with ops.control_dependencies([grad_e, grad_v]):
if compute_v:
v = op.outputs[1]
# Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
# Notice that because of the term involving f, the gradient becomes
# infinite (or NaN in practice) when eigenvalues are not unique.
# Mathematically this should not be surprising, since for (k-fold)
# degenerate eigenvalues, the corresponding eigenvectors are only defined
# up to arbitrary rotation in a (k-dimensional) subspace.
f = array_ops.matrix_set_diag(
math_ops.reciprocal(
array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)),
array_ops.zeros_like(e))
grad_a = math_ops.matmul(
v,
math_ops.matmul(
array_ops.matrix_diag(grad_e) +
f * math_ops.matmul(v, grad_v, adjoint_a=True),
v,
adjoint_b=True))
else:
_, v = linalg_ops.self_adjoint_eig(op.inputs[0])
grad_a = math_ops.matmul(v,
math_ops.matmul(
array_ops.matrix_diag(grad_e),
v,
adjoint_b=True))
# The forward op only depends on the lower triangular part of a, so here we
# symmetrize and take the lower triangle
grad_a = array_ops.matrix_band_part(
grad_a + math_ops.conj(array_ops.matrix_transpose(grad_a)), -1, 0)
grad_a = array_ops.matrix_set_diag(grad_a,
0.5 * array_ops.matrix_diag_part(grad_a))
return grad_a
开发者ID:1000sprites,项目名称:tensorflow,代码行数:40,代码来源:linalg_grad.py
示例12: _mini_batch_training_op
def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers,
cluster_centers_var, total_counts):
"""Creates an op for training for mini batch case.
Args:
inputs: list of input Tensors.
cluster_idx_list: A vector (or list of vectors). Each element in the
vector corresponds to an input row in 'inp' and specifies the cluster id
corresponding to the input.
cluster_centers: Tensor of cluster centers, possibly normalized.
cluster_centers_var: Tensor Ref of cluster centers.
total_counts: Tensor Ref of cluster counts.
Returns:
An op for doing an update of mini-batch k-means.
"""
update_ops = []
for inp, cluster_idx in zip(inputs, cluster_idx_list):
with ops.colocate_with(inp):
assert total_counts is not None
cluster_idx = array_ops.reshape(cluster_idx, [-1])
# Dedupe the unique ids of cluster_centers being updated so that updates
# can be locally aggregated.
unique_ids, unique_idx = array_ops.unique(cluster_idx)
num_unique_cluster_idx = array_ops.size(unique_ids)
# Fetch the old values of counts and cluster_centers.
with ops.colocate_with(total_counts):
old_counts = array_ops.gather(total_counts, unique_ids)
with ops.colocate_with(cluster_centers):
old_cluster_centers = array_ops.gather(cluster_centers, unique_ids)
# Locally aggregate the increment to counts.
count_updates = math_ops.unsorted_segment_sum(
array_ops.ones_like(
unique_idx, dtype=total_counts.dtype),
unique_idx,
num_unique_cluster_idx)
# Locally compute the sum of inputs mapped to each id.
# For a cluster with old cluster value x, old count n, and with data
# d_1,...d_k newly assigned to it, we recompute the new value as
# x += (sum_i(d_i) - k * x) / (n + k).
# Compute sum_i(d_i), see comment above.
cluster_center_updates = math_ops.unsorted_segment_sum(
inp, unique_idx, num_unique_cluster_idx)
# Shape to enable broadcasting count_updates and learning_rate to inp.
# It extends the shape with 1's to match the rank of inp.
broadcast_shape = array_ops.concat(
[
array_ops.reshape(num_unique_cluster_idx, [1]), array_ops.ones(
array_ops.reshape(array_ops.rank(inp) - 1, [1]),
dtype=dtypes.int32)
],
0)
# Subtract k * x, see comment above.
cluster_center_updates -= math_ops.cast(
array_ops.reshape(count_updates, broadcast_shape),
inp.dtype) * old_cluster_centers
learning_rate = math_ops.reciprocal(
math_ops.cast(old_counts + count_updates, inp.dtype))
learning_rate = array_ops.reshape(learning_rate, broadcast_shape)
# scale by 1 / (n + k), see comment above.
cluster_center_updates *= learning_rate
# Apply the updates.
update_counts = state_ops.scatter_add(total_counts, unique_ids,
count_updates)
update_cluster_centers = state_ops.scatter_add(cluster_centers_var,
unique_ids,
cluster_center_updates)
update_ops.extend([update_counts, update_cluster_centers])
return control_flow_ops.group(*update_ops)
开发者ID:AliMiraftab,项目名称:tensorflow,代码行数:69,代码来源:clustering_ops.py
示例13: _SvdGrad
def _SvdGrad(op, grad_s, grad_u, grad_v):
"""Gradient for the singular value decomposition."""
# The derivation for the compute_uv=False case, and most of
# the derivation for the full_matrices=True case, are in
# Giles' paper (see reference at top of file). A derivation for
# the full_matrices=False case is available at
# https://j-towns.github.io/papers/svd-derivative.pdf
a = op.inputs[0]
a_shape = a.get_shape().with_rank_at_least(2)
grad_s_mat = array_ops.matrix_diag(grad_s)
if not op.get_attr("compute_uv"):
s, u, v = linalg_ops.svd(a, compute_uv=True)
grad_a = math_ops.matmul(u, math_ops.matmul(grad_s_mat, v, adjoint_b=True))
grad_a.set_shape(a_shape)
return grad_a
full_matrices = op.get_attr("full_matrices")
# TODO(rmlarsen): Make this work with complex types.
if a.dtype.is_complex:
raise NotImplementedError(
"SVD gradient is not implemented for complex types and "
"compute_uv=True.")
grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
m = a_shape.dims[-2].merge_with(grad_u_shape[-2])
n = a_shape.dims[-1].merge_with(grad_v_shape[-2])
batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
grad_v_shape[:-2])
a_shape = batch_shape.concatenate([m, n])
m = a_shape.dims[-2].value
n = a_shape.dims[-1].value
# TODO(rmlarsen): Make this work with placeholders.
if m is None or n is None:
raise NotImplementedError(
"SVD gradient has not been implemented for input with unknown "
"inner matrix shape.")
s = op.outputs[0]
u = op.outputs[1]
v = op.outputs[2]
use_adjoint = False
if m > n:
# Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the
# Hermitian transpose of the gradient at the end.
use_adjoint = True
m, n = n, m
u, v = v, u
grad_u, grad_v = grad_v, grad_u
with ops.control_dependencies([grad_s, grad_u, grad_v]):
if full_matrices and abs(m - n) > 1:
raise NotImplementedError(
"svd gradient is not implemented for abs(m - n) > 1 "
"when full_matrices is True")
s_mat = array_ops.matrix_diag(s)
s2 = math_ops.square(s)
# NOTICE: Because of the term involving f, the gradient becomes
# infinite (or NaN in practice) when singular values are not unique.
# Mathematically this should not be surprising, since for (k-fold)
# degenerate singular values, the corresponding singular vectors are
# only defined up a (k-dimensional) subspace. In practice, this can
# lead to numerical instability when singular values are close but not
# exactly equal.
f = array_ops.matrix_set_diag(
math_ops.reciprocal(
array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)),
array_ops.zeros_like(s))
s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s))
v1 = v[..., :, :m]
grad_v1 = grad_v[..., :, :m]
u_gu = math_ops.matmul(u, grad_u, adjoint_a=True)
v_gv = math_ops.matmul(v1, grad_v1, adjoint_a=True)
f_u = f * u_gu
f_v = f * v_gv
term1_nouv = (
grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) +
math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v)))
term1 = math_ops.matmul(u, math_ops.matmul(term1_nouv, v1, adjoint_b=True))
if m == n:
grad_a_before_transpose = term1
else:
gv1t = array_ops.matrix_transpose(grad_v1)
gv1t_v1 = math_ops.matmul(gv1t, v1)
term2_nous = gv1t - math_ops.matmul(gv1t_v1, v1, adjoint_b=True)
if full_matrices:
v2 = v[..., :, m:n]
grad_v2 = grad_v[..., :, m:n]
#.........这里部分代码省略.........
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:101,代码来源:linalg_grad.py
示例14: _Log1pGrad
def _Log1pGrad(op, grad):
"""Returns grad * (1/(1 + x))."""
x = op.inputs[0]
with ops.control_dependencies([grad]):
x = math_ops.conj(x)
return grad * math_ops.reciprocal(1 + x)
开发者ID:neuroradiology,项目名称:tensorflow,代码行数:6,代码来源:math_grad.py
示例15: _BatchNormGrad
def _BatchNormGrad(grad_y,
x,
scale,
pop_mean,
pop_var,
epsilon,
data_format,
is_training=True):
"""Returns the gradients for the 3 inputs of BatchNorm.
Args:
grad_y: A `Tensor` of 4 dimensions for gradient for y.
x: A `Tensor` of 4 dimensions for x.
scale: A `Tensor` of 1 dimension for scaling.
pop_mean: A `Tensor` of 1 dimension for the population mean. Only used when
is_training=False.
pop_var: A `Tensor` of 1 dimension for the population variance. Only used
when is_training=False.
epsilon: A small float number added to the variance of x.
data_format: The data format for input. Either b"NHWC" or b"NCHW".
is_training: A bool value to indicate the operation is for training
(default) or inference.
Returns:
A tuple (grad_x, grad_scale, grad_offset), where grad_x is the gradient
for x, grad_scale the gradient for scale, and grad_offset the gradient
for offset.
"""
x_dtype = x.dtype.base_dtype
if x_dtype == dtypes.float16:
# float16 math is too imprecise, so we do the batch norm gradient
# computations in float32.
x = math_ops.cast(x, dtypes.float32)
grad_y = math_ops.cast(grad_y, dtypes.float32)
if is_training:
if data_format == b"NHWC":
keepdims = False
reduce_axis = [0, 1, 2]
else:
keepdims = True
reduce_axis = [0, 2, 3]
shape = [1, array_ops.size(scale), 1, 1]
scale = array_ops.reshape(scale, shape)
mean_grad_y = math_ops.reduce_mean(grad_y, reduce_axis, keepdims=keepdims)
mean_x = math_ops.reduce_mean(x, reduce_axis, keepdims=keepdims)
var_x = math_ops.reduce_mean(
math_ops.squared_difference(x, array_ops.stop_gradient(mean_x)),
reduce_axis,
keepdims=keepdims)
grad_y_offset = grad_y - mean_grad_y
x_offset = x - mean_x
mean = math_ops.reduce_mean(
grad_y * x_offset, axis=reduce_axis, keepdims=keepdims)
grad_x = scale * math_ops.rsqrt(var_x + epsilon) * (
grad_y_offset - math_ops.reciprocal(var_x + epsilon) * mean * x_offset)
grad_scale = math_ops.rsqrt(var_x + epsilon) * math_ops.reduce_sum(
grad_y * x_offset, axis=reduce_axis, keepdims=keepdims)
if data_format == b"NCHW":
grad_scale = array_ops.squeeze(grad_scale)
grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis)
return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset
else:
if data_format == b"NHWC":
reduce_axis = [0, 1, 2]
else:
reduce_axis = [0, 2, 3]
shape = [1, array_ops.size(pop_mean), 1, 1]
pop_mean = array_ops.reshape(pop_mean, shape)
pop_var = array_ops.reshape(pop_var, shape)
scale = array_ops.reshape(scale, shape)
grad_offset = math_ops.reduce_sum(grad_y, axis=reduce_axis)
var_rsqrt = math_ops.rsqrt(pop_var + epsilon)
grad_scale = math_ops.reduce_sum(
grad_y * (x - pop_mean) * var_rsqrt, axis=reduce_axis)
grad_x = grad_y * scale * var_rsqrt
return math_ops.cast(grad_x, x_dtype), grad_scale, grad_offset
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:77,代码来源:nn_grad.py
示例16: _ComputeBatchNormCorrections
def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay,
fused_batch_norm):
"""Computes batch norm correction params.
Before batch normalization is frozen:
We use batch statistics for batch norm.
correction_scale = sigma_b/sigma_mv
correction_recip = 1/correction_scale
correction_offset = 0
After batch normalization is frozen:
correction_scale = sigma_b/sigma_mv
correction_recip = 1
correction_offset = gamma*(mu_b/sigma_b-mu_mv/sigma_mv).
Batch norm is frozen if global_step > bn_freeze_delay.
The corrections ensure that:
a) The weights are quantized after scaling by gamma/sigma_mv. This enables
smoother training as the scaling on the weights changes slowly, rather than
jump across mini-batches
b) Changing the values of the corrections allows for one to switch between
using batch statistics to using moving mean and average, without requiring
changes to batch_norm
Args:
context: The scope under which we look for batch norm params
match: Object containing required batch norm tensors for correction
computation.
freeze_batch_norm_delay: Delay in steps at which computation switches
from regular batch norm to frozen mean and variance.
fused_batch_norm: Bool, true if fused batch norm is used.
Returns:
A tuple of correction_scale, correction_recip, correction_offset
"""
g = ops.get_default_graph()
prefix = '' if not context else context + '/'
with g.name_scope(prefix + 'batch_norm_correction'):
recip_sigma_mv = math_ops.rsqrt(
match.moving_variance_tensor + match.batch_epsilon)
recip_sigma = math_ops.rsqrt(match.variance_tensor + match.batch_epsilon)
correction_scale = math_ops.divide(
recip_sigma_mv, recip_sigma, name='scale_compute')
correction_scale = array_ops.identity(
correction_scale, name='correction_scale')
correction_recip = math_ops.reciprocal(
correction_scale, name='reciprocal_compute')
correction_offset = math_ops.multiply(
match.gamma_tensor,
match.mean_tensor * recip_sigma -
match.moving_mean_tensor * recip_sigma_mv,
name='offset_compute')
if freeze_batch_norm_delay is not None:
use_mv_avg = math_ops.greater_equal(
common.CreateOrGetQuantizationStep(),
freeze_batch_norm_delay,
name='use_moving_average')
else:
use_mv_avg = False
bn_decay_zero = 0.0
bn_decay_mean_consumers = list(match.bn_decay_mean_tensor.consumers())
bn_decay_var_consumers = list(match.bn_decay_mean_tensor.consumers())
bn_decay_mean_out = utils.smart_cond(
use_mv_avg,
lambda: bn_decay_zero,
lambda: match.bn_decay_mean_tensor,
name='freeze_moving_mean')
graph_editor.reroute_ts(
[bn_decay_mean_out], [match.bn_decay_mean_tensor],
can_modify=bn_decay_mean_consumers)
if fused_batch_norm is False:
bn_decay_var_consumers = list(match.bn_decay_var_tensor.consumers())
bn_decay_var_out = utils.smart_cond(
use_mv_avg,
lambda: bn_decay_zero,
lambda: match.bn_decay_var_tensor,
name='freeze_moving_var')
graph_editor.reroute_ts(
[bn_decay_var_out], [match.bn_decay_var_tensor],
can_modify=bn_decay_var_consumers)
correction_recip = utils.smart_cond(
use_mv_avg,
lambda: array_ops.ones(correction_scale.shape),
lambda: correction_recip,
name='correction_recip')
correction_offset = utils.smart_cond(
use_mv_avg,
lambda: correction_offset,
lambda: array_ops.zeros(correction_offset.shape),
name='correction_offset')
return correction_scale, correction_recip, correction_offset
开发者ID:BhaskarNallani,项目名称:tensorflow,代码行数:99,代码来源:fold_batch_norms.py
示例17: weighted_moments
def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
"""Returns the frequency-weighted mean and variance of `x`.
Args:
x: A tensor.
axes: 1-d tensor of int32 values; these are the axes along which
to compute mean and variance.
frequency_weights: A tensor of positive weights which can be
broadcast with x.
name: Name used to scope the operation.
keep_dims: Produce moments with the same dimensionality as the input.
Returns:
Two tensors: `weighted_mean` and `weighted_variance`.
"""
with ops.name_scope(name, "weighted_moments", [x, frequency_weights, axes]):
x = ops.convert_to_tensor(x, name="x")
frequency_weights = ops.convert_to_tensor(
frequency_weights, name="frequency_weights")
# Unlike moments(), this just uses a simpler two-pass method.
# See comment in moments() WRT precision; it applies here too.
needs_cast = x.dtype == dtypes.float16
if needs_cast:
x = math_ops.cast(x, dtypes.float32)
if frequency_weights.dtype != x.dtype:
frequency_weights = math_ops.cast(frequency_weights, x.dtype)
# Note that we use keep_dims=True for our reductions regardless of the arg;
# this is so that the results remain broadcast-compatible with the inputs.
weighted_input_sum = math_ops.reduce_sum(
frequency_weights * x, axes, name="weighted_input_sum", keep_dims=True)
# The shape of the weights isn't necessarily the same as x's
# shape, just broadcast-compatible with it -- so this expression
# performs broadcasting to give a per-item weight, with the same
# shape as (freqency_weights * x). This avoids having to reason
# through all the broadcast logic to compute a correct
# sum_of_weights.
broadcasted_weights = frequency_weights + array_ops.zeros_like(x)
sum_of_weights = math_ops.reduce_sum(
broadcasted_weights, axes, name="sum_of_weights", keep_dims=True)
divisor = math_ops.reciprocal(sum_of_weights, name="inv_weight_sum")
weighted_mean = math_ops.mul(weighted_input_sum, divisor)
# Have the weighted mean; now on to variance:
weighted_distsq = math_ops.reduce_sum(
frequency_weights * math_ops.squared_difference(x, weighted_mean),
axes,
name="weighted_distsq",
keep_dims=True)
weighted_variance = math_ops.mul(weighted_distsq, divisor)
if not keep_dims:
weighted_mean = array_ops.squeeze(weighted_mean, squeeze_dims=axes)
weighted_variance = array_ops.squeeze(
weighted_variance, squeeze_dims=axes)
if needs_cast:
weighted_mean = math_ops.cast(weighted_mean, dtypes.float16)
weighted_variance = math_ops.cast(weighted_variance, dtypes.float16)
return weighted_mean, weighted_variance
开发者ID:BloodD,项目名称:tensorflow,代码行数:69,代码来源:nn_impl.py
示例18: _SvdGrad
def _SvdGrad(op, grad_s, grad_u, grad_v):
"""Gradient for Svd based on Giles' algorithm. Reference at top of file."""
if op.get_attr("compute_uv") and not op.get_attr("full_matrices"):
raise NotImplementedError(
"SVD gradient is not implemented for compute_uv=True and "
"full_matrices=False.")
a = op.inputs[0]
a_shape = a.get_shape().with_rank_at_least(2)
if op.get_attr("compute_uv"):
# TODO(rmlarsen): Make this work with complex types.
if a.dtype.is_complex:
raise NotImplementedError(
"SVD gradient is not implemented for complex types and "
"compute_uv=True.")
grad_u_shape = grad_u.get_shape().with_rank_at_least(2)
grad_v_shape = grad_v.get_shape().with_rank_at_least(2)
m = a_shape[-2].merge_with(grad_u_shape[-2])
n = a_shape[-1].merge_with(grad_v_shape[-2])
batch_shape = a_shape[:-2].merge_with(grad_u_shape[:-2]).merge_with(
grad_v_shape[:-2])
a_shape = batch_shape.concatenate([m, n])
m = a_shape[-2].value
n = a_shape[-1].value
# TODO(rmlarsen): Make this work with placeholders.
if m is None or n is None:
raise NotImplementedError(
"SVD gradient has not been implemented for input with unknown "
"inner matrix shape.")
if not op.get_attr("full_matrices") or not op.get_attr("compute_uv"):
s, u, v = linalg_ops.svd(a, compute_uv=True, full_matrices=True)
else:
s = op.outputs[0]
u = op.outputs[1]
v = op.outputs[2]
use_adjoint = False
if m > n:
# Compute the gradient for A^H = V * S^T * U^H, and (implicitly) take the
# Hermitian transpose of the gradient at the end.
use_adjoint = True
m, n = n, m
u, v = v, u
grad_u, grad_v = grad_v, grad_u
with ops.control_dependencies([grad_s, grad_u, grad_v]):
grad_s_mat = array_ops.matrix_diag(grad_s)
if not op.get_attr("compute_uv"):
if use_adjoint:
grad_a = math_ops.matmul(
v[..., :, :m], math_ops.matmul(u, grad_s_mat), adjoint_b=True)
else:
grad_a = math_ops.matmul(u,
math_ops.matmul(
grad_s_mat, v[..., :, :m], adjoint_b=True))
grad_a.set_shape(a_shape)
return grad_a
# TODO(rmlarsen): Define a gradient that is numerically stable for
# abs(m-n) > 1. Currently this does not work because there are effectively
# multiple singular values with value zero. I am not sure if this is a true
# instability or if it simply throws off the finite difference gradient
# checker.
if abs(m - n) > 1:
raise NotImplementedError(
"svd gradient is not implemented for abs(m - n) > 1")
s_mat = array_ops.matrix_diag(s)
s2 = math_ops.square(s)
# NOTICE: Because of the term involving f, the gradient becomes
# infinite (or NaN in practice) when singular values are not unique.
# Mathematically this should not be surprising, since for (k-fold)
# degenerate singular values, the corresponding singular vectors are
# only defined up a (k-dimensional) subspace. In practice, this can
# lead to numerical instability when singular values are close but not
# exactly equal.
f = array_ops.matrix_set_diag(
math_ops.reciprocal(
array_ops.expand_dims(s2, -2) - array_ops.expand_dims(s2, -1)),
array_ops.zeros_like(s))
s_inv_mat = array_ops.matrix_diag(math_ops.reciprocal(s))
u_gu = math_ops.matmul(u, grad_u, adjoint_a=True)
v_gv = math_ops.matmul(v, grad_v, adjoint_a=True)
if m == n:
f_u = f * u_gu
f_v = f * v_gv
else:
dv2 = array_ops.matrix_transpose(v_gv[..., m:n, :m]) - v_gv[..., :m, m:n]
f_u = f * u_gu
f_v = f * v_gv[..., :m, :m]
grad_a_nouv = (
grad_s_mat + math_ops.matmul(f_u + _linalg.adjoint(f_u), s_mat) +
math_ops.matmul(s_mat, f_v + _linalg.adjoint(f_v)))
#.........这里部分代码省略.........
开发者ID:AbhinavJain13,项目名称:tensorflow,代码行数:101,代码来源:linalg_grad.py
注:本文中的tensorflow.python.ops.math_ops.reciprocal函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论