def _log_survival_function(self, y):
low = self._low
high = self._high
# Recall the promise:
# survival_function(y) := P[Y > y]
# = 0, if y >= high,
# = 1, if y < low,
# = P[X > y], otherwise.
# P[Y > j] = P[ceiling(Y) > j] since mass is only at integers, not in
# between.
j = math_ops.ceil(y)
# P[X > j], used when low < X < high.
result_so_far = self.distribution.log_survival_function(j)
# Broadcast, because it's possible that this is a single distribution being
# evaluated on a number of samples, or something like that.
j += array_ops.zeros_like(result_so_far)
# Re-define values at the cutoffs.
if low is not None:
result_so_far = array_ops.where(j < low,
array_ops.zeros_like(result_so_far),
result_so_far)
if high is not None:
neg_inf = -np.inf * array_ops.ones_like(result_so_far)
result_so_far = array_ops.where(j >= high, neg_inf, result_so_far)
return result_so_far
def __call__(self, step):
with ops.name_scope(
self.name, "PolynomialDecay",
[self.initial_learning_rate, step, self.decay_steps,
self.end_learning_rate, self.power]
) as name:
initial_learning_rate = ops.convert_to_tensor(
self.initial_learning_rate, name="initial_learning_rate")
dtype = initial_learning_rate.dtype
end_learning_rate = math_ops.cast(self.end_learning_rate, dtype)
power = math_ops.cast(self.power, dtype)
global_step_recomp = math_ops.cast(step, dtype)
decay_steps_recomp = math_ops.cast(self.decay_steps, dtype)
if self.cycle:
# Find the first multiple of decay_steps that is bigger than
# global_step. If global_step is zero set the multiplier to 1
multiplier = control_flow_ops.cond(
math_ops.equal(global_step_recomp, 0), lambda: 1.0,
lambda: math_ops.ceil(global_step_recomp / self.decay_steps))
decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
else:
# Make sure that the global_step used is not bigger than decay_steps.
global_step_recomp = math_ops.minimum(global_step_recomp,
self.decay_steps)
p = math_ops.div(global_step_recomp, decay_steps_recomp)
return math_ops.add(
math_ops.multiply(initial_learning_rate - end_learning_rate,
math_ops.pow(1 - p, power)),
end_learning_rate,
name=name)
def _survival_function(self, y):
lower_cutoff = self._lower_cutoff
upper_cutoff = self._upper_cutoff
# Recall the promise:
# survival_function(y) := P[Y > y]
# = 0, if y >= upper_cutoff,
# = 1, if y < lower_cutoff,
# = P[X > y], otherwise.
# P[Y > j] = P[ceiling(Y) > j] since mass is only at integers, not in
# between.
j = math_ops.ceil(y)
# P[X > j], used when lower_cutoff < X < upper_cutoff.
result_so_far = self.distribution.survival_function(j)
# Broadcast, because it's possible that this is a single distribution being
# evaluated on a number of samples, or something like that.
j += array_ops.zeros_like(result_so_far)
# Re-define values at the cutoffs.
if lower_cutoff is not None:
result_so_far = math_ops.select(j < lower_cutoff,
array_ops.ones_like(result_so_far),
result_so_far)
if upper_cutoff is not None:
result_so_far = math_ops.select(j >= upper_cutoff,
array_ops.zeros_like(result_so_far),
result_so_far)
return result_so_far
def decayed_lr(learning_rate, global_step, decay_steps, end_learning_rate,
power, cycle, name):
"""Helper to recompute learning rate; most helpful in eager-mode."""
with ops.name_scope(
name, "PolynomialDecay",
[learning_rate, global_step, decay_steps, end_learning_rate, power]
) as name:
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
dtype = learning_rate.dtype
end_learning_rate = math_ops.cast(end_learning_rate, dtype)
power = math_ops.cast(power, dtype)
global_step_recomp = math_ops.cast(global_step, dtype)
decay_steps_recomp = math_ops.cast(decay_steps, dtype)
if cycle:
# Find the first multiple of decay_steps that is bigger than
# global_step. If global_step is zero set the multiplier to 1
multiplier = control_flow_ops.cond(
math_ops.equal(global_step_recomp, 0), lambda: 1.0,
lambda: math_ops.ceil(global_step_recomp / decay_steps))
decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
else:
# Make sure that the global_step used is not bigger than decay_steps.
global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
p = math_ops.div(global_step_recomp, decay_steps_recomp)
return math_ops.add(
math_ops.multiply(learning_rate - end_learning_rate,
math_ops.pow(1 - p, power)),
end_learning_rate,
name=name)
def frames(signal, frame_length, frame_step, name=None):
"""Frame a signal into overlapping frames.
May be used in front of spectral functions.
For example:
```python
pcm = tf.placeholder(tf.float32, [None, 9152])
frames = tf.contrib.signal.frames(pcm, 512, 180)
magspec = tf.abs(tf.spectral.rfft(frames, [512]))
image = tf.expand_dims(magspec, 3)
```
Args:
signal: A `Tensor` of shape `[batch_size, signal_length]`.
frame_length: An `int32` or `int64` `Tensor`. The length of each frame.
frame_step: An `int32` or `int64` `Tensor`. The step between frames.
name: A name for the operation (optional).
Returns:
A `Tensor` of frames with shape `[batch_size, num_frames, frame_length]`.
Raises:
ValueError: if signal does not have rank 2.
"""
with ops.name_scope(name, "frames", [signal, frame_length, frame_step]):
signal = ops.convert_to_tensor(signal, name="signal")
frame_length = ops.convert_to_tensor(frame_length, name="frame_length")
frame_step = ops.convert_to_tensor(frame_step, name="frame_step")
signal_rank = signal.shape.ndims
if signal_rank != 2:
raise ValueError("expected signal to have rank 2 but was " + signal_rank)
signal_length = array_ops.shape(signal)[1]
num_frames = math_ops.ceil((signal_length - frame_length) / frame_step)
num_frames = 1 + math_ops.cast(num_frames, dtypes.int32)
pad_length = (num_frames - 1) * frame_step + frame_length
pad_signal = array_ops.pad(signal, [[0, 0], [0,
pad_length - signal_length]])
indices_frame = array_ops.expand_dims(math_ops.range(frame_length), 0)
indices_frames = array_ops.tile(indices_frame, [num_frames, 1])
indices_step = array_ops.expand_dims(
math_ops.range(num_frames) * frame_step, 1)
indices_steps = array_ops.tile(indices_step, [1, frame_length])
indices = indices_frames + indices_steps
# TODO(androbin): remove `transpose` when `gather` gets `axis` support
pad_signal = array_ops.transpose(pad_signal)
signal_frames = array_ops.gather(pad_signal, indices)
signal_frames = array_ops.transpose(signal_frames, perm=[2, 0, 1])
return signal_frames
def _enclosing_power_of_two(value):
"""Return 2**N for integer N such that 2**N >= value."""
value_static = tensor_util.constant_value(value)
if value_static is not None:
return constant_op.constant(
int(2**np.ceil(np.log(value_static) / np.log(2.0))), value.dtype)
return math_ops.cast(
math_ops.pow(2.0, math_ops.ceil(
math_ops.log(math_ops.to_float(value)) / math_ops.log(2.0))),
value.dtype)
def _sample_n(self, n, seed=None):
low = self._low
high = self._high
with ops.name_scope("transform"):
n = ops.convert_to_tensor(n, name="n")
x_samps = self.distribution.sample(n, seed=seed)
ones = array_ops.ones_like(x_samps)
# Snap values to the intervals (j - 1, j].
result_so_far = math_ops.ceil(x_samps)
if low is not None:
result_so_far = array_ops.where(result_so_far < low,
low * ones, result_so_far)
if high is not None:
result_so_far = array_ops.where(result_so_far > high,
high * ones, result_so_far)
return result_so_far
def decayed_lr():
"""Helper to recompute learning rate; most helpful in eager-mode."""
global_step_recomp = math_ops.cast(global_step, dtype)
decay_steps_recomp = math_ops.cast(decay_steps, dtype)
if cycle:
# Find the first multiple of decay_steps that is bigger than
# global_step. If global_step is zero set the multiplier to 1
multiplier = control_flow_ops.cond(
math_ops.equal(global_step_recomp, 0), lambda: 1.0,
lambda: math_ops.ceil(global_step_recomp / decay_steps))
decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
else:
# Make sure that the global_step used is not bigger than decay_steps.
global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
p = math_ops.div(global_step_recomp, decay_steps_recomp)
return math_ops.add(
math_ops.multiply(learning_rate - end_learning_rate,
math_ops.pow(1 - p, power)),
end_learning_rate,
name=name)
def sequence_length_from_sparse_tensor(sp_tensor, num_elements=1):
"""Returns a [batch_size] Tensor with per-example sequence length."""
with ops.name_scope(None, 'sequence_length') as name_scope:
row_ids = sp_tensor.indices[:, 0]
column_ids = sp_tensor.indices[:, 1]
# Add one to convert column indices to element length
column_ids += array_ops.ones_like(column_ids)
# Get the number of elements we will have per example/row
seq_length = math_ops.segment_max(column_ids, segment_ids=row_ids)
# The raw values are grouped according to num_elements;
# how many entities will we have after grouping?
# Example: orig tensor [[1, 2], [3]], col_ids = (0, 1, 1),
# row_ids = (0, 0, 1), seq_length = [2, 1]. If num_elements = 2,
# these will get grouped, and the final seq_length is [1, 1]
seq_length = math_ops.cast(
math_ops.ceil(seq_length / num_elements), dtypes.int64)
# If the last n rows do not have ids, seq_length will have shape
# [batch_size - n]. Pad the remaining values with zeros.
n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1]
padding = array_ops.zeros(n_pad, dtype=seq_length.dtype)
return array_ops.concat([seq_length, padding], axis=0, name=name_scope)
def polynomial_decay(exploration_rate, timestep, decay_steps,
end_exploration_rate=0.0001, power=1.0,
cycle=False, name=None):
"""Applies a polynomial decay to the exploration rate.
It is commonly observed that a monotonically decreasing exploration rate, whose
degree of change is carefully chosen, results in a better performing model.
This function applies a polynomial decay function to a provided initial
`exploration_rate` to reach an `end_exploration_rate` in the given `decay_steps`.
It requires a `timestep` value to compute the decayed exploration rate. You
can just pass a TensorFlow variable that you increment at each training step.
The function returns the decayed exploration rate. It is computed as:
```python
>>> timestep = min(timestep, decay_steps)
>>> decayed_exploration_rate = (exploration_rate - end_exploration_rate) *
... (1 - timestep / decay_steps) ^ (power) + end_exploration_rate
```
If `cycle` is True then a multiple of `decay_steps` is used, the first one
that is bigger than `timesteps`.
```python
>>> decay_steps = decay_steps * ceil(timestep / decay_steps)
>>> decayed_exploration_rate = (exploration_rate - end_exploration_rate) *
... (1 - timestep / decay_steps) ^ (power) +
... end_exploration_rate
```
Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5):
```python
>>> timestep = tf.Variable(0, trainable=False)
>>> starter_exploration_rate = 0.1
>>> end_exploration_rate = 0.01
>>> decay_steps = 10000
>>> exploration_rate = tf.train.polynomial_decay(starter_exploration_rate, timestep,
... decay_steps, end_exploration_rate, power=0.5)
>>> # Passing timestep to minimize() will increment it at each step.
>>> learning_step = (
... tf.train.GradientDescentOptimizer(exploration_rate)
... .minimize(...my loss..., timestep=timestep)
... )
```
Args:
exploration_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The initial exploration rate.
timestep: A scalar `int32` or `int64` `Tensor` or a Python number.
Global step to use for the decay computation. Must not be negative.
decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
Must be positive. See the decay computation above.
end_exploration_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The minimal end exploration rate.
power: A scalar `float32` or `float64` `Tensor` or a
Python number. The power of the polynomial. Defaults to linear, 1.0.
cycle: A boolean, whether or not it should cycle beyond decay_steps.
name: String. Optional name of the operation. Defaults to
'PolynomialDecay'.
Returns:
A scalar `Tensor` of the same type as `exploration_rate`. The decayed exploration rate.
Raises:
ValueError: if `timestep` is not supplied.
"""
if timestep is None:
raise ValueError("timestep is required for polynomial_decay.")
with get_name_scope(name=name, scope="PolynomialDecay",
values=[exploration_rate, timestep,
decay_steps, end_exploration_rate, power]) as name:
exploration_rate = ops.convert_to_tensor(exploration_rate, name="exploration_rate")
dtype = exploration_rate.dtype
timestep = math_ops.cast(timestep, dtype)
decay_steps = math_ops.cast(decay_steps, dtype)
end_exploration_rate = math_ops.cast(end_exploration_rate, dtype)
power = math_ops.cast(power, dtype)
if cycle:
# Find the first multiple of decay_steps that is bigger than timestep.
decay_steps = math_ops.multiply(decay_steps,
math_ops.ceil(timestep / decay_steps))
else:
# Make sure that the timestep used is not bigger than decay_steps.
timestep = math_ops.minimum(timestep, decay_steps)
p = math_ops.div(timestep, decay_steps)
return math_ops.add(math_ops.multiply(exploration_rate - end_exploration_rate,
math_ops.pow(1 - p, power)),
end_exploration_rate, name=name)
def odeint_fixed(func, y0, t, dt=None, method='rk4', name=None):
"""ODE integration on a fixed grid (with no step size control).
Useful in certain scenarios to avoid the overhead of adaptive step size
control, e.g. when differentiation of the integration result is desired and/or
the time grid is known a priori to be sufficient.
Args:
func: Function that maps a Tensor holding the state `y` and a scalar Tensor
`t` into a Tensor of state derivatives with respect to time.
y0: N-D Tensor giving starting value of `y` at time point `t[0]`.
t: 1-D Tensor holding a sequence of time points for which to solve for
`y`. The initial time point should be the first element of this sequence,
and each time must be larger than the previous time. May have any floating
point dtype.
dt: 0-D or 1-D Tensor providing time step suggestion to be used on time
integration intervals in `t`. 1-D Tensor should provide values
for all intervals, must have 1 less element than that of `t`.
If given a 0-D Tensor, the value is interpreted as time step suggestion
same for all intervals. If passed None, then time step is set to be the
t[1:] - t[:-1]. Defaults to None. The actual step size is obtained by
insuring an integer number of steps per interval, potentially reducing the
time step.
method: One of 'midpoint' or 'rk4'.
name: Optional name for the resulting operation.
Returns:
y: (N+1)-D tensor, where the first dimension corresponds to different
time points. Contains the solved value of y for each desired time point in
`t`, with the initial value `y0` being the first element along the first
dimension.
Raises:
ValueError: Upon caller errors.
"""
with ops.name_scope(name, 'odeint_fixed', [y0, t, dt]):
t = ops.convert_to_tensor(t, preferred_dtype=dtypes.float64, name='t')
y0 = ops.convert_to_tensor(y0, name='y0')
intervals = t[1:] - t[:-1]
if dt is None:
dt = intervals
dt = ops.convert_to_tensor(dt, preferred_dtype=dtypes.float64, name='dt')
steps_on_intervals = math_ops.ceil(intervals / dt)
dt = intervals / steps_on_intervals
steps_on_intervals = math_ops.cast(steps_on_intervals, dtype=dtypes.int32)
_check_input_types(y0, t, dt)
_check_input_sizes(t, dt)
with _assert_increasing(t):
with ops.name_scope(method):
if method == 'midpoint':
return _MidpointFixedGridIntegrator().integrate(func, y0, t, dt,
steps_on_intervals)
elif method == 'rk4':
return _RK4FixedGridIntegrator().integrate(func, y0, t, dt,
steps_on_intervals)
else:
raise ValueError('method not supported: {!s}'.format(method))
#.........这里部分代码省略.........
# One can also check that R_m / (N / 2 - m) is an unbiased estimate of RXX[m].
# Since F[x] is the DFT of x, this leads us to a zero-padding and FFT/IFFT
# based version of estimating RXX.
# Note that this is a special case of the Wiener-Khinchin Theorem.
with ops.name_scope(name, values=[x]):
x = ops.convert_to_tensor(x, name="x")
# Rotate dimensions of x in order to put axis at the rightmost dim.
# FFT op requires this.
rank = util.prefer_static_rank(x)
if axis < 0:
axis = rank + axis
shift = rank - 1 - axis
# Suppose x.shape[axis] = T, so there are T "time" steps.
# ==> x_rotated.shape = B + [T],
# where B is x_rotated's batch shape.
x_rotated = util.rotate_transpose(x, shift)
if center:
x_rotated -= math_ops.reduce_mean(x_rotated, axis=-1, keepdims=True)
# x_len = N / 2 from above explanation. The length of x along axis.
# Get a value for x_len that works in all cases.
x_len = util.prefer_static_shape(x_rotated)[-1]
# TODO(langmore) Investigate whether this zero padding helps or hurts. At
# the moment is is necessary so that all FFT implementations work.
# Zero pad to the next power of 2 greater than 2 * x_len, which equals
# 2**(ceil(Log_2(2 * x_len))). Note: Log_2(X) = Log_e(X) / Log_e(2).
x_len_float64 = math_ops.cast(x_len, np.float64)
target_length = math_ops.pow(
np.float64(2.),
math_ops.ceil(math_ops.log(x_len_float64 * 2) / np.log(2.)))
pad_length = math_ops.cast(target_length - x_len_float64, np.int32)
# We should have:
# x_rotated_pad.shape = x_rotated.shape[:-1] + [T + pad_length]
# = B + [T + pad_length]
x_rotated_pad = util.pad(x_rotated, axis=-1, back=True, count=pad_length)
dtype = x.dtype
if not dtype.is_complex:
if not dtype.is_floating:
raise TypeError("Argument x must have either float or complex dtype"
" found: {}".format(dtype))
x_rotated_pad = math_ops.complex(x_rotated_pad,
dtype.real_dtype.as_numpy_dtype(0.))
# Autocorrelation is IFFT of power-spectral density (up to some scaling).
fft_x_rotated_pad = spectral_ops.fft(x_rotated_pad)
spectral_density = fft_x_rotated_pad * math_ops.conj(fft_x_rotated_pad)
# shifted_product is R[m] from above detailed explanation.
# It is the inner product sum_n X[n] * Conj(X[n - m]).
shifted_product = spectral_ops.ifft(spectral_density)
# Cast back to real-valued if x was real to begin with.
shifted_product = math_ops.cast(shifted_product, dtype)
# Figure out if we can deduce the final static shape, and set max_lags.
# Use x_rotated as a reference, because it has the time dimension in the far
# right, and was created before we performed all sorts of crazy shape
# manipulations.
know_static_shape = True
if not x_rotated.shape.is_fully_defined():
know_static_shape = False
#.........这里部分代码省略.........
percentile. If `None` (the default), treat every dimension as a sample
dimension, returning a scalar.
interpolation : {"lower", "higher", "nearest"}. Default: "nearest"
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points `i < j`:
* lower: `i`.
* higher: `j`.
* nearest: `i` or `j`, whichever is nearest.
keep_dims: Python `bool`. If `True`, the last dimension is kept with size 1
If `False`, the last dimension is removed from the output shape.
validate_args: Whether to add runtime checks of argument validity.
If False, and arguments are incorrect, correct behavior is not guaranteed.
name: A Python string name to give this `Op`. Default is "percentile"
Returns:
A `(N - len(axis))` dimensional `Tensor` of same dtype as `x`, or, if
`axis` is `None`, a scalar.
Raises:
ValueError: If argument 'interpolation' is not an allowed type.
"""
name = name or "percentile"
allowed_interpolations = {"lower", "higher", "nearest"}
if interpolation is None:
interpolation = "nearest"
else:
if interpolation not in allowed_interpolations:
raise ValueError("Argument 'interpolation' must be in %s. Found %s" %
(allowed_interpolations, interpolation))
with ops.name_scope(name, [x, q]):
x = ops.convert_to_tensor(x, name="x")
# Double is needed here and below, else we get the wrong index if the array
# is huge along axis.
q = math_ops.to_double(q, name="q")
_get_static_ndims(q, expect_ndims=0)
if validate_args:
q = control_flow_ops.with_dependencies([
check_ops.assert_rank(q, 0),
check_ops.assert_greater_equal(q, math_ops.to_double(0.)),
check_ops.assert_less_equal(q, math_ops.to_double(100.))
], q)
if axis is None:
y = array_ops.reshape(x, [-1])
else:
axis = ops.convert_to_tensor(axis, name="axis")
check_ops.assert_integer(axis)
axis_ndims = _get_static_ndims(
axis, expect_static=True, expect_ndims_no_more_than=1)
axis_const = tensor_util.constant_value(axis)
if axis_const is None:
raise ValueError(
"Expected argument 'axis' to be statically available. Found: %s" %
axis)
axis = axis_const
if axis_ndims == 0:
axis = [axis]
axis = [int(a) for a in axis]
x_ndims = _get_static_ndims(
x, expect_static=True, expect_ndims_at_least=1)
axis = _make_static_axis_non_negative(axis, x_ndims)
y = _move_dims_to_flat_end(x, axis, x_ndims)
frac_at_q_or_above = 1. - q / 100.
d = math_ops.to_double(array_ops.shape(y)[-1])
if interpolation == "lower":
index = math_ops.ceil((d - 1) * frac_at_q_or_above)
elif interpolation == "higher":
index = math_ops.floor((d - 1) * frac_at_q_or_above)
elif interpolation == "nearest":
index = math_ops.round((d - 1) * frac_at_q_or_above)
# If d is gigantic, then we would have d == d - 1, even in double... So
# let's use max/min to avoid out of bounds errors.
d = array_ops.shape(y)[-1]
# d - 1 will be distinct from d in int32.
index = clip_ops.clip_by_value(math_ops.to_int32(index), 0, d - 1)
# Sort everything, not just the top 'k' entries, which allows multiple calls
# to sort only once (under the hood) and use CSE.
sorted_y = _sort_tensor(y)
# result.shape = B
result = sorted_y[..., index]
result.set_shape(y.get_shape()[:-1])
if keep_dims:
if axis is None:
# ones_vec = [1, 1,..., 1], total length = len(S) + len(B).
ones_vec = array_ops.ones(
shape=[_get_best_effort_ndims(x)], dtype=dtypes.int32)
result *= array_ops.ones(ones_vec, dtype=x.dtype)
else:
result = _insert_back_keep_dims(result, axis)
return result
def polynomial_decay(learning_rate, global_step, decay_steps,
end_learning_rate=0.0001, power=1.0,
cycle=False, name=None):
"""Applies a polynomial decay to the learning rate.
It is commonly observed that a monotonically decreasing learning rate, whose
degree of change is carefully chosen, results in a better performing model.
This function applies a polynomial decay function to a provided initial
`learning_rate` to reach an `end_learning_rate` in the given `decay_steps`.
It requires a `global_step` value to compute the decayed learning rate. You
can just pass a TensorFlow variable that you increment at each training step.
The function returns the decayed learning rate. It is computed as:
```python
global_step = min(global_step, decay_steps)
decayed_learning_rate = (learning_rate - end_learning_rate) *
(1 - global_step / decay_steps) ^ (power) +
end_learning_rate
```
If `cycle` is True then a multiple of `decay_steps` is used, the first one
that is bigger than `global_steps`.
```python
decay_steps = decay_steps * ceil(global_step / decay_steps)
decayed_learning_rate = (learning_rate - end_learning_rate) *
(1 - global_step / decay_steps) ^ (power) +
end_learning_rate
```
Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5):
```python
...
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
end_learning_rate = 0.01
decay_steps = 10000
learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step,
decay_steps, end_learning_rate,
power=0.5)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
tf.train.GradientDescentOptimizer(learning_rate)
.minimize(...my loss..., global_step=global_step)
)
```
Args:
learning_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The initial learning rate.
global_step: A scalar `int32` or `int64` `Tensor` or a Python number.
Global step to use for the decay computation. Must not be negative.
decay_steps: A scalar `int32` or `int64` `Tensor` or a Python number.
Must be positive. See the decay computation above.
end_learning_rate: A scalar `float32` or `float64` `Tensor` or a
Python number. The minimal end learning rate.
power: A scalar `float32` or `float64` `Tensor` or a
Python number. The power of the polynomial. Defaults to sqrt, i.e. 0.5.
cycle: A boolean, whether or not it should cycle beyond decay_steps.
name: String. Optional name of the operation. Defaults to 'PolynomialDecay'
Returns:
A scalar `Tensor` of the same type as `learning_rate`. The decayed
learning rate.
"""
with ops.name_scope(name, "PolynomialDecay",
[learning_rate, global_step,
decay_steps, end_learning_rate, power]) as name:
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
dtype = learning_rate.dtype
global_step = math_ops.cast(global_step, dtype)
decay_steps = math_ops.cast(decay_steps, dtype)
end_learning_rate = math_ops.cast(end_learning_rate, dtype)
power = math_ops.cast(power, dtype)
if cycle:
# Find the first multiple of decay_steps that is bigger than global_step.
decay_steps = math_ops.mul(decay_steps,
math_ops.ceil(global_step / decay_steps))
else:
# Make sure that the global_step used is not bigger than decay_steps.
global_step = math_ops.minimum(global_step, decay_steps)
p = math_ops.div(global_step, decay_steps)
return math_ops.add(math_ops.mul(learning_rate - end_learning_rate,
math_ops.pow(1 - p, power)),
end_learning_rate, name=name)
def kernel_classifier_distance_and_std_from_activations(real_activations,
generated_activations,
max_block_size=1024,
dtype=None):
"""Kernel "classifier" distance for evaluating a generative model.
This methods computes the kernel classifier distance from activations of
real images and generated images. This can be used independently of the
kernel_classifier_distance() method, especially in the case of using large
batches during evaluation where we would like to precompute all of the
activations before computing the classifier distance, or if we want to
compute multiple metrics based on the same images. It also returns a rough
estimate of the standard error of the estimator.
This technique is described in detail in https://arxiv.org/abs/1801.01401.
Given two distributions P and Q of activations, this function calculates
E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')]
- 2 E_{X ~ P, Y ~ Q}[k(X, Y)]
where k is the polynomial kernel
k(x, y) = ( x^T y / dimension + 1 )^3.
This captures how different the distributions of real and generated images'
visual features are. Like the Frechet distance (and unlike the Inception
score), this is a true distance and incorporates information about the
target images. Unlike the Frechet score, this function computes an
*unbiased* and asymptotically normal estimator, which makes comparing
estimates across models much more intuitive.
The estimator used takes time quadratic in max_block_size. Larger values of
max_block_size will decrease the variance of the estimator but increase the
computational cost. This differs slightly from the estimator used by the
original paper; it is the block estimator of https://arxiv.org/abs/1307.1954.
The estimate of the standard error will also be more reliable when there are
more blocks, i.e. when max_block_size is smaller.
NOTE: the blocking code assumes that real_activations and
generated_activations are both in random order. If either is sorted in a
meaningful order, the estimator will behave poorly.
Args:
real_activations: 2D Tensor containing activations of real data. Shape is
[batch_size, activation_size].
generated_activations: 2D Tensor containing activations of generated data.
Shape is [batch_size, activation_size].
max_block_size: integer, default 1024. The distance estimator splits samples
into blocks for computational efficiency. Larger values are more
computationally expensive but decrease the variance of the distance
estimate. Having a smaller block size also gives a better estimate of the
standard error.
dtype: If not None, coerce activations to this dtype before computations.
Returns:
The Kernel Inception Distance. A floating-point scalar of the same type
as the output of the activations.
An estimate of the standard error of the distance estimator (a scalar of
the same type).
"""
real_activations.shape.assert_has_rank(2)
generated_activations.shape.assert_has_rank(2)
real_activations.shape[1].assert_is_compatible_with(
generated_activations.shape[1])
if dtype is None:
dtype = real_activations.dtype
assert generated_activations.dtype == dtype
else:
real_activations = math_ops.cast(real_activations, dtype)
generated_activations = math_ops.cast(generated_activations, dtype)
# Figure out how to split the activations into blocks of approximately
# equal size, with none larger than max_block_size.
n_r = array_ops.shape(real_activations)[0]
n_g = array_ops.shape(generated_activations)[0]
n_bigger = math_ops.maximum(n_r, n_g)
n_blocks = math_ops.to_int32(math_ops.ceil(n_bigger / max_block_size))
v_r = n_r // n_blocks
v_g = n_g // n_blocks
n_plusone_r = n_r - v_r * n_blocks
n_plusone_g = n_g - v_g * n_blocks
sizes_r = array_ops.concat([
array_ops.fill([n_blocks - n_plusone_r], v_r),
array_ops.fill([n_plusone_r], v_r + 1),
], 0)
sizes_g = array_ops.concat([
array_ops.fill([n_blocks - n_plusone_g], v_g),
array_ops.fill([n_plusone_g], v_g + 1),
], 0)
zero = array_ops.zeros([1], dtype=dtypes.int32)
inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0)
inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0)
#.........这里部分代码省略.........
请发表评论