本文整理汇总了Python中tensorflow.python.ops.math_ops.is_nan函数的典型用法代码示例。如果您正苦于以下问题:Python is_nan函数的具体用法?Python is_nan怎么用?Python is_nan使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了is_nan函数的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testUniformNans
def testUniformNans(self):
a = 10.0
b = [11.0, 100.0]
uniform = uniform_lib.Uniform(low=a, high=b)
no_nans = constant_op.constant(1.0)
nans = constant_op.constant(0.0) / constant_op.constant(0.0)
self.assertTrue(self.evaluate(math_ops.is_nan(nans)))
with_nans = array_ops.stack([no_nans, nans])
pdf = uniform.prob(with_nans)
is_nan = self.evaluate(math_ops.is_nan(pdf))
self.assertFalse(is_nan[0])
self.assertTrue(is_nan[1])
开发者ID:AnishShah,项目名称:tensorflow,代码行数:15,代码来源:uniform_test.py
示例2: _apply_transform
def _apply_transform(self, input_tensors, **kwargs):
"""Applies the transformation to the `transform_input`.
Args:
input_tensors: a list of Tensors representing the input to
the Transform.
**kwargs: Additional keyword arguments, unused here.
Returns:
A namedtuple of Tensors representing the transformed output.
"""
d = input_tensors[0]
if self.strip_value is np.nan:
strip_hot = math_ops.is_nan(d)
else:
strip_hot = math_ops.equal(d,
array_ops.constant([self.strip_value],
dtype=d.dtype))
keep_hot = math_ops.logical_not(strip_hot)
length = array_ops.reshape(array_ops.shape(d), [])
indices = array_ops.boolean_mask(math_ops.range(length), keep_hot)
values = array_ops.boolean_mask(d, keep_hot)
sparse_indices = array_ops.reshape(
math_ops.cast(indices, dtypes.int64), [-1, 1])
shape = math_ops.cast(array_ops.shape(d), dtypes.int64)
# pylint: disable=not-callable
return self.return_type(ops.SparseTensor(sparse_indices, values, shape))
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:31,代码来源:sparsify.py
示例3: testUniformNans
def testUniformNans(self):
with self.test_session():
a = 10.0
b = [11.0, 100.0]
uniform = uniform_lib.Uniform(a=a, b=b)
no_nans = constant_op.constant(1.0)
nans = constant_op.constant(0.0) / constant_op.constant(0.0)
self.assertTrue(math_ops.is_nan(nans).eval())
with_nans = array_ops.stack([no_nans, nans])
pdf = uniform.pdf(with_nans)
is_nan = math_ops.is_nan(pdf).eval()
self.assertFalse(is_nan[0])
self.assertTrue(is_nan[1])
开发者ID:ivankreso,项目名称:tensorflow,代码行数:16,代码来源:uniform_test.py
示例4: pdf
def pdf(self, x, name="pdf"):
"""The PDF of observations in `x` under these Uniform distribution(s).
Args:
x: tensor of dtype `dtype`, must be broadcastable with `a` and `b`.
name: The name to give this op.
Returns:
pdf: tensor of dtype `dtype`, the pdf values of `x`. If `x` is `nan`, will
return `nan`.
"""
with ops.name_scope(self.name):
with ops.op_scope([self.a, self.b, x], name):
x = ops.convert_to_tensor(x, name="x")
if x.dtype != self.dtype:
raise TypeError("Input x dtype does not match dtype: %s vs. %s" %
(x.dtype, self.dtype))
broadcasted_x = x * self._ones()
return math_ops.select(
math_ops.is_nan(broadcasted_x), broadcasted_x, math_ops.select(
math_ops.logical_or(broadcasted_x < self.a,
broadcasted_x > self.b),
array_ops.zeros_like(broadcasted_x),
(1.0 / self.range()) * array_ops.ones_like(broadcasted_x)))
开发者ID:0ruben,项目名称:tensorflow,代码行数:25,代码来源:uniform.py
示例5: kl_divergence
def kl_divergence(distribution_a, distribution_b,
allow_nan_stats=True, name=None):
"""Get the KL-divergence KL(distribution_a || distribution_b).
If there is no KL method registered specifically for `type(distribution_a)`
and `type(distribution_b)`, then the class hierarchies of these types are
searched.
If one KL method is registered between any pairs of classes in these two
parent hierarchies, it is used.
If more than one such registered method exists, the method whose registered
classes have the shortest sum MRO paths to the input types is used.
If more than one such shortest path exists, the first method
identified in the search is used (favoring a shorter MRO distance to
`type(distribution_a)`).
Args:
distribution_a: The first distribution.
distribution_b: The second distribution.
allow_nan_stats: Python `bool`, default `True`. When `True`,
statistics (e.g., mean, mode, variance) use the value "`NaN`" to
indicate the result is undefined. When `False`, an exception is raised
if one or more of the statistic's batch members are undefined.
name: Python `str` name prefixed to Ops created by this class.
Returns:
A Tensor with the batchwise KL-divergence between `distribution_a`
and `distribution_b`.
Raises:
NotImplementedError: If no KL method is defined for distribution types
of `distribution_a` and `distribution_b`.
"""
kl_fn = _registered_kl(type(distribution_a), type(distribution_b))
if kl_fn is None:
raise NotImplementedError(
"No KL(distribution_a || distribution_b) registered for distribution_a "
"type %s and distribution_b type %s"
% (type(distribution_a).__name__, type(distribution_b).__name__))
with ops.name_scope("KullbackLeibler"):
kl_t = kl_fn(distribution_a, distribution_b, name=name)
if allow_nan_stats:
return kl_t
# Check KL for NaNs
kl_t = array_ops.identity(kl_t, name="kl")
with ops.control_dependencies([
control_flow_ops.Assert(
math_ops.logical_not(
math_ops.reduce_any(math_ops.is_nan(kl_t))),
["KL calculation between %s and %s returned NaN values "
"(and was called with allow_nan_stats=False). Values:"
% (distribution_a.name, distribution_b.name), kl_t])]):
return array_ops.identity(kl_t, name="checked_kl")
开发者ID:AlbertXiebnu,项目名称:tensorflow,代码行数:58,代码来源:kullback_leibler.py
示例6: testBasic
def testBasic(self):
for dtype in [dtypes.float32, dtypes.float64]:
one = constant_op.constant([1], dtype=dtype)
two = constant_op.constant([2], dtype=dtype)
zero = constant_op.constant([0], dtype=dtype)
nan = constant_op.constant([np.nan], dtype=dtype)
eps = constant_op.constant([np.finfo(dtype.as_numpy_dtype).eps],
dtype=dtype)
self.assertAllEqual(math_ops.nextafter(one, two) - one, eps)
self.assertAllLess(math_ops.nextafter(one, zero) - one, 0)
self.assertAllEqual(
math_ops.is_nan(math_ops.nextafter(nan, one)), [True])
self.assertAllEqual(
math_ops.is_nan(math_ops.nextafter(one, nan)), [True])
self.assertAllEqual(math_ops.nextafter(one, one), one)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:18,代码来源:math_ops_test.py
示例7: _prob
def _prob(self, x):
broadcasted_x = x * array_ops.ones(self.batch_shape_tensor())
return array_ops.where(
math_ops.is_nan(broadcasted_x),
broadcasted_x,
array_ops.where(
math_ops.logical_or(broadcasted_x < self.low,
broadcasted_x >= self.high),
array_ops.zeros_like(broadcasted_x),
array_ops.ones_like(broadcasted_x) / self.range()))
开发者ID:LUTAN,项目名称:tensorflow,代码行数:10,代码来源:uniform.py
示例8: _prob
def _prob(self, x):
broadcasted_x = x * array_ops.ones(self.batch_shape())
return array_ops.where(
math_ops.is_nan(broadcasted_x),
broadcasted_x,
array_ops.where(
math_ops.logical_or(broadcasted_x < self.a,
broadcasted_x > self.b),
array_ops.zeros_like(broadcasted_x),
(1. / self.range()) * array_ops.ones_like(broadcasted_x)))
开发者ID:AliMiraftab,项目名称:tensorflow,代码行数:10,代码来源:uniform.py
示例9: kl
def kl(dist_a, dist_b, allow_nan=False, name=None):
"""Get the KL-divergence KL(dist_a || dist_b).
If there is no KL method registered specifically for `type(dist_a)` and
`type(dist_b)`, then the class hierarchies of these types are searched.
If one KL method is registered between any pairs of classes in these two
parent hierarchies, it is used.
If more than one such registered method exists, the method whose registered
classes have the shortest sum MRO paths to the input types is used.
If more than one such shortest path exists, the first method
identified in the search is used (favoring a shorter MRO distance to
`type(dist_a)`).
Args:
dist_a: The first distribution.
dist_b: The second distribution.
allow_nan: If `False` (default), a runtime error is raised
if the KL returns NaN values for any batch entry of the given
distributions. If `True`, the KL may return a NaN for the given entry.
name: (optional) Name scope to use for created operations.
Returns:
A Tensor with the batchwise KL-divergence between dist_a and dist_b.
Raises:
NotImplementedError: If no KL method is defined for distribution types
of dist_a and dist_b.
"""
kl_fn = _registered_kl(type(dist_a), type(dist_b))
if kl_fn is None:
raise NotImplementedError(
"No KL(dist_a || dist_b) registered for dist_a type %s and dist_b "
"type %s" % ((type(dist_a).__name__, type(dist_b).__name__)))
with ops.name_scope("KullbackLeibler"):
kl_t = kl_fn(dist_a, dist_b, name=name)
if allow_nan:
return kl_t
# Check KL for NaNs
kl_t = array_ops.identity(kl_t, name="kl")
with ops.control_dependencies([
control_flow_ops.Assert(
math_ops.logical_not(
math_ops.reduce_any(math_ops.is_nan(kl_t))),
["KL calculation between %s and %s returned NaN values "
"(and was called with allow_nan=False). Values:"
% (dist_a.name, dist_b.name), kl_t])]):
return array_ops.identity(kl_t, name="checked_kl")
开发者ID:AliMiraftab,项目名称:tensorflow,代码行数:53,代码来源:kullback_leibler.py
示例10: _calculate_acceptance_probabilities
def _calculate_acceptance_probabilities(init_probs, target_probs):
"""Calculate the per-class acceptance rates.
Args:
init_probs: The class probabilities of the data.
target_probs: The desired class proportion in minibatches.
Returns:
A list of the per-class acceptance probabilities.
This method is based on solving the following analysis:
Let F be the probability of a rejection (on any example).
Let p_i be the proportion of examples in the data in class i (init_probs)
Let a_i is the rate the rejection sampler should *accept* class i
Let t_i is the target proportion in the minibatches for class i (target_probs)
```
F = sum_i(p_i * (1-a_i))
= 1 - sum_i(p_i * a_i) using sum_i(p_i) = 1
```
An example with class `i` will be accepted if `k` rejections occur, then an
example with class `i` is seen by the rejector, and it is accepted. This can
be written as follows:
```
t_i = sum_k=0^inf(F^k * p_i * a_i)
= p_i * a_j / (1 - F) using geometric series identity, since 0 <= F < 1
= p_i * a_i / sum_j(p_j * a_j) using F from above
```
Note that the following constraints hold:
```
0 <= p_i <= 1, sum_i(p_i) = 1
0 <= a_i <= 1
0 <= t_i <= 1, sum_i(t_i) = 1
```
A solution for a_i in terms of the other variabes is the following:
```a_i = (t_i / p_i) / max_i[t_i / p_i]```
"""
# Make list of t_i / p_i.
ratio_l = target_probs / init_probs
# Replace NaNs with 0s.
ratio_l = math_ops.select(math_ops.is_nan(ratio_l),
array_ops.zeros_like(ratio_l),
ratio_l)
# Calculate list of acceptance probabilities.
max_ratio = math_ops.reduce_max(ratio_l)
return ratio_l / max_ratio
开发者ID:MrCrumpets,项目名称:tensorflow,代码行数:53,代码来源:sampling_ops.py
示例11: _compare
def _compare(self, x, use_gpu):
np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
with test_util.device(use_gpu=use_gpu):
inx = ops.convert_to_tensor(x)
ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
inx), math_ops.is_nan(inx)
tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan])
self.assertAllEqual(np_inf, tf_inf)
self.assertAllEqual(np_nan, tf_nan)
self.assertAllEqual(np_finite, tf_finite)
self.assertShapeEqual(np_inf, oinf)
self.assertShapeEqual(np_nan, onan)
self.assertShapeEqual(np_finite, ofinite)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:13,代码来源:cwise_ops_test.py
示例12: kl
def kl(dist_a, dist_b, allow_nan=False, name=None):
"""Get the KL-divergence KL(dist_a || dist_b).
Args:
dist_a: instance of distributions.Distribution.
dist_b: instance of distributions.Distribution.
allow_nan: If False (default), a runtime error is raised
if the KL returns NaN values for any batch entry of the given
distributions. If True, the KL may return a NaN for the given entry.
name: (optional) Name scope to use for created operations.
Returns:
A Tensor with the batchwise KL-divergence between dist_a and dist_b.
Raises:
TypeError: If dist_a or dist_b is not an instance of Distribution.
NotImplementedError: If no KL method is defined for distribution types
of dist_a and dist_b.
"""
if not isinstance(dist_a, distribution.Distribution):
raise TypeError("dist_a is not an instance of Distribution, received type: %s" % type(dist_a))
if not isinstance(dist_b, distribution.Distribution):
raise TypeError("dist_b is not an instance of Distribution, received type: %s" % type(dist_b))
kl_fn = _DIVERGENCES.get((type(dist_a), type(dist_b)), None)
if kl_fn is None:
raise NotImplementedError(
"No KL(dist_a || dist_b) registered for dist_a type %s and dist_b "
"type %s" % ((type(dist_a).__name__, type(dist_b).__name__))
)
with ops.name_scope("KullbackLeibler"):
kl_t = kl_fn(dist_a, dist_b, name=name)
if allow_nan:
return kl_t
# Check KL for NaNs
kl_t = array_ops.identity(kl_t, name="kl")
with ops.control_dependencies(
[
logging_ops.Assert(
math_ops.logical_not(math_ops.reduce_any(math_ops.is_nan(kl_t))),
[
"KL calculation between %s and %s returned NaN values "
"(and was called with allow_nan=False). Values:" % (dist_a.name, dist_b.name),
kl_t,
],
)
]
):
return array_ops.identity(kl_t, name="checked_kl")
开发者ID:ChaitanyaCixLive,项目名称:tensorflow,代码行数:50,代码来源:kullback_leibler.py
示例13: sparsemax_loss
def sparsemax_loss(logits, sparsemax, labels, name=None):
"""Computes sparsemax loss function [1].
[1]: https://arxiv.org/abs/1602.02068
Args:
logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
`float64`.
sparsemax: A `Tensor`. Must have the same type as `logits`.
labels: A `Tensor`. Must have the same type as `logits`.
name: A name for the operation (optional).
Returns:
A `Tensor`. Has the same type as `logits`.
"""
with ops.name_scope(name, "sparsemax_loss",
[logits, sparsemax, labels]) as name:
logits = ops.convert_to_tensor(logits, name="logits")
sparsemax = ops.convert_to_tensor(sparsemax, name="sparsemax")
labels = ops.convert_to_tensor(labels, name="labels")
# In the paper, they call the logits z.
# A constant can be substracted from logits to make the algorithm
# more numerically stable in theory. However, there are really no major
# source numerical instability in this algorithm.
z = logits
# sum over support
# Use a conditional where instead of a multiplication to support z = -inf.
# If z = -inf, and there is no support (sparsemax = 0), a multiplication
# would cause 0 * -inf = nan, which is not correct in this case.
sum_s = array_ops.where(
math_ops.logical_or(sparsemax > 0, math_ops.is_nan(sparsemax)),
sparsemax * (z - 0.5 * sparsemax), array_ops.zeros_like(sparsemax))
# - z_k + ||q||^2
q_part = labels * (0.5 * labels - z)
# Fix the case where labels = 0 and z = -inf, where q_part would
# otherwise be 0 * -inf = nan. But since the lables = 0, no cost for
# z = -inf should be consideredself.
# The code below also coveres the case where z = inf. Howeverm in this
# caose the sparsemax will be nan, which means the sum_s will also be nan,
# therefor this case doesn't need addtional special treatment.
q_part_safe = array_ops.where(
math_ops.logical_and(math_ops.equal(labels, 0), math_ops.is_inf(z)),
array_ops.zeros_like(z), q_part)
return math_ops.reduce_sum(sum_s + q_part_safe, axis=1)
开发者ID:Ajaycs99,项目名称:tensorflow,代码行数:49,代码来源:sparsemax_loss.py
示例14: _compare
def _compare(self, x, use_gpu):
np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x)
with self.test_session(
use_gpu=use_gpu,
force_gpu=use_gpu and test_util.is_gpu_available()) as sess:
inx = ops.convert_to_tensor(x)
ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf(
inx), math_ops.is_nan(inx)
tf_finite, tf_inf, tf_nan = sess.run([ofinite, oinf, onan])
self.assertAllEqual(np_inf, tf_inf)
self.assertAllEqual(np_nan, tf_nan)
self.assertAllEqual(np_finite, tf_finite)
self.assertShapeEqual(np_inf, oinf)
self.assertShapeEqual(np_nan, onan)
self.assertShapeEqual(np_finite, ofinite)
开发者ID:bunbutter,项目名称:tensorflow,代码行数:15,代码来源:cwise_ops_test.py
示例15: testSqrt
def testSqrt(self):
for dtype in [np.float16, np.float32, np.float64]:
fi = np.finfo(dtype)
for size in [1, 3, 4, 7, 8, 63, 64, 65]:
# For float32 Eigen uses Carmack's fast vectorized sqrt algorithm.
# It is not accurate for very large arguments, so we test for
# fi.max/100 instead of fi.max here.
for value in [fi.min, -2, -1, 0, fi.tiny, 1, 2, 1000, fi.max / 100]:
x = np.full((size,), value, dtype=dtype)
np_y = np.sqrt(x)
np_nan = np.isnan(np_y)
with test_util.use_gpu():
tf_y = math_ops.sqrt(x)
tf_nan = math_ops.is_nan(tf_y)
if value < 0:
self.assertAllEqual(np_nan, self.evaluate(tf_nan))
else:
self.assertAllCloseAccordingToType(np_y, self.evaluate(tf_y))
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:18,代码来源:cwise_ops_test.py
示例16: kernel
#.........这里部分代码省略.........
samples[i] = x_val
```
```python
# Empirical-Bayes estimation of a hyperparameter by MCMC-EM:
# Problem setup
N = 150
D = 10
x = np.random.randn(N, D).astype(np.float32)
true_sigma = 0.5
true_beta = true_sigma * np.random.randn(D).astype(np.float32)
y = x.dot(true_beta) + np.random.randn(N).astype(np.float32)
def log_prior(beta, log_sigma):
return tf.reduce_sum(-0.5 / tf.exp(2 * log_sigma) * tf.square(beta) -
log_sigma)
def regression_log_joint(beta, log_sigma, x, y):
# This function returns log p(beta | log_sigma) + log p(y | x, beta).
means = tf.matmul(tf.expand_dims(beta, 0), x, transpose_b=True)
means = tf.squeeze(means)
log_likelihood = tf.reduce_sum(-0.5 * tf.square(y - means))
return log_prior(beta, log_sigma) + log_likelihood
def log_joint_partial(beta):
return regression_log_joint(beta, log_sigma, x, y)
# Our estimate of log(sigma)
log_sigma = tf.Variable(0., name='log_sigma')
# The state of the Markov chain
beta = tf.Variable(tf.random_normal([x.shape[1]]), name='beta')
new_beta, _, _, _ = hmc.kernel(0.1, 5, beta, log_joint_partial,
event_dims=[0])
beta_update = tf.assign(beta, new_beta)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
with tf.control_dependencies([beta_update]):
log_sigma_update = optimizer.minimize(-log_prior(beta, log_sigma),
var_list=[log_sigma])
sess = tf.Session()
sess.run(tf.global_variables_initializer())
log_sigma_history = np.zeros(1000)
for i in xrange(1000):
log_sigma_val, _ = sess.run([log_sigma, log_sigma_update])
log_sigma_history[i] = log_sigma_val
# Should converge to something close to true_sigma
plt.plot(np.exp(log_sigma_history))
```
"""
with ops.name_scope(name, 'hmc_kernel', [step_size, n_leapfrog_steps, x]):
potential_and_grad = _make_potential_and_grad(target_log_prob_fn)
x = ops.convert_to_tensor(x, name='x')
x_shape = array_ops.shape(x)
m = random_ops.random_normal(x_shape, dtype=x.dtype)
kinetic_0 = 0.5 * math_ops.reduce_sum(math_ops.square(m), event_dims)
if (x_log_prob is not None) and (x_grad is not None):
log_potential_0, grad_0 = -x_log_prob, -x_grad # pylint: disable=invalid-unary-operand-type
else:
if x_log_prob is not None:
logging.warn('x_log_prob was provided, but x_grad was not,'
' so x_log_prob was not used.')
if x_grad is not None:
logging.warn('x_grad was provided, but x_log_prob was not,'
' so x_grad was not used.')
log_potential_0, grad_0 = potential_and_grad(x)
new_x, new_m, log_potential_1, grad_1 = leapfrog_integrator(
step_size, n_leapfrog_steps, x, m, potential_and_grad, grad_0)
kinetic_1 = 0.5 * math_ops.reduce_sum(math_ops.square(new_m), event_dims)
energy_change = log_potential_1 - log_potential_0 + kinetic_1 - kinetic_0
# Treat NaN as infinite energy (and therefore guaranteed rejection).
energy_change = array_ops.where(
math_ops.is_nan(energy_change),
array_ops.fill(array_ops.shape(energy_change),
energy_change.dtype.as_numpy_dtype(np.inf)),
energy_change)
acceptance_probs = math_ops.exp(math_ops.minimum(-energy_change, 0.))
accepted = (
random_ops.random_uniform(
array_ops.shape(acceptance_probs), dtype=x.dtype)
< acceptance_probs)
new_log_prob = -array_ops.where(accepted, log_potential_1, log_potential_0)
# TODO(b/65738010): This should work, but it doesn't for now.
# reduced_shape = math_ops.reduced_shape(x_shape, event_dims)
reduced_shape = array_ops.shape(math_ops.reduce_sum(x, event_dims,
keep_dims=True))
accepted = array_ops.reshape(accepted, reduced_shape)
accepted = math_ops.logical_or(
accepted, math_ops.cast(array_ops.zeros_like(x), dtypes.bool))
new_x = array_ops.where(accepted, new_x, x)
new_grad = -array_ops.where(accepted, grad_1, grad_0)
# TODO(langmore) Gradients of acceptance_probs and new_log_prob with respect
# to initial_x will propagate NaNs (see testNanFromGradsDontPropagate). This
# should be fixed.
return new_x, acceptance_probs, new_log_prob, new_grad
开发者ID:AbhinavJain13,项目名称:tensorflow,代码行数:101,代码来源:hmc_impl.py
示例17: sparsemax
def sparsemax(logits, name=None):
"""Computes sparsemax activations [1].
For each batch `i` and class `j` we have
$$sparsemax[i, j] = max(logits[i, j] - tau(logits[i, :]), 0)$$
[1]: https://arxiv.org/abs/1602.02068
Args:
logits: A `Tensor`. Must be one of the following types: `half`, `float32`,
`float64`.
name: A name for the operation (optional).
Returns:
A `Tensor`. Has the same type as `logits`.
"""
with ops.name_scope(name, "sparsemax", [logits]) as name:
logits = ops.convert_to_tensor(logits, name="logits")
obs = array_ops.shape(logits)[0]
dims = array_ops.shape(logits)[1]
# In the paper, they call the logits z.
# The mean(logits) can be substracted from logits to make the algorithm
# more numerically stable. the instability in this algorithm comes mostly
# from the z_cumsum. Substacting the mean will cause z_cumsum to be close
# to zero. However, in practise the numerical instability issues are very
# minor and substacting the mean causes extra issues with inf and nan
# input.
z = logits
# sort z
z_sorted, _ = nn.top_k(z, k=dims)
# calculate k(z)
z_cumsum = math_ops.cumsum(z_sorted, axis=1)
k = math_ops.range(
1, math_ops.cast(dims, logits.dtype) + 1, dtype=logits.dtype)
z_check = 1 + k * z_sorted > z_cumsum
# because the z_check vector is always [1,1,...1,0,0,...0] finding the
# (index + 1) of the last `1` is the same as just summing the number of 1.
k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1)
# calculate tau(z)
# If there are inf values or all values are -inf, the k_z will be zero,
# this is mathematically invalid and will also cause the gather_nd to fail.
# Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
# fixed later (see p_safe) by returning p = nan. This results in the same
# behavior as softmax.
k_z_safe = math_ops.maximum(k_z, 1)
indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1)
tau_sum = array_ops.gather_nd(z_cumsum, indices)
tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype)
# calculate p
p = math_ops.maximum(
math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis])
# If k_z = 0 or if z = nan, then the input is invalid
p_safe = array_ops.where(
math_ops.logical_or(
math_ops.equal(k_z, 0), math_ops.is_nan(z_cumsum[:, -1])),
array_ops.fill([obs, dims], math_ops.cast(float("nan"), logits.dtype)),
p)
return p_safe
开发者ID:Ajaycs99,项目名称:tensorflow,代码行数:65,代码来源:sparsemax.py
注:本文中的tensorflow.python.ops.math_ops.is_nan函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论