本文整理汇总了Python中tensorflow.assign_sub函数的典型用法代码示例。如果您正苦于以下问题:Python assign_sub函数的具体用法?Python assign_sub怎么用?Python assign_sub使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了assign_sub函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: batch_norm
def batch_norm(input_,
dim,
name,
scale=True,
train=True,
epsilon=1e-8,
decay=.1,
axes=[0],
bn_lag=DEFAULT_BN_LAG):
"""Batch normalization."""
# create variables
with tf.variable_scope(name):
var = variable_on_cpu(
"var", [dim], tf.constant_initializer(1.), trainable=False)
mean = variable_on_cpu(
"mean", [dim], tf.constant_initializer(0.), trainable=False)
step = variable_on_cpu("step", [], tf.constant_initializer(0.), trainable=False)
if scale:
gamma = variable_on_cpu("gamma", [dim], tf.constant_initializer(1.))
beta = variable_on_cpu("beta", [dim], tf.constant_initializer(0.))
# choose the appropriate moments
if train:
used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm")
cur_mean, cur_var = used_mean, used_var
if bn_lag > 0.:
used_mean -= (1. - bn_lag) * (used_mean - tf.stop_gradient(mean))
used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
used_mean /= (1. - bn_lag**(step + 1))
used_var /= (1. - bn_lag**(step + 1))
else:
used_mean, used_var = mean, var
cur_mean, cur_var = used_mean, used_var
# normalize
res = (input_ - used_mean) / tf.sqrt(used_var + epsilon)
# de-normalize
if scale:
res *= gamma
res += beta
# update variables
if train:
with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
with ops.colocate_with(mean):
new_mean = tf.assign_sub(
mean,
tf.check_numerics(decay * (mean - cur_mean), "NaN in moving mean."))
with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
with ops.colocate_with(var):
new_var = tf.assign_sub(
var,
tf.check_numerics(decay * (var - cur_var),
"NaN in moving variance."))
with tf.name_scope(name, "IncrementTime", [step]):
with ops.colocate_with(step):
new_step = tf.assign_add(step, 1.)
res += 0. * new_mean * new_var * new_step
return res
开发者ID:Peratham,项目名称:models,代码行数:59,代码来源:real_nvp_utils.py
示例2: batch_norm_log_diff
def batch_norm_log_diff(input_,
dim,
name,
train=True,
epsilon=1e-8,
decay=.1,
axes=[0],
reuse=None,
bn_lag=DEFAULT_BN_LAG):
"""Batch normalization with corresponding log determinant Jacobian."""
if reuse is None:
reuse = not train
# create variables
with tf.variable_scope(name) as scope:
if reuse:
scope.reuse_variables()
var = variable_on_cpu(
"var", [dim], tf.constant_initializer(1.), trainable=False)
mean = variable_on_cpu(
"mean", [dim], tf.constant_initializer(0.), trainable=False)
step = variable_on_cpu("step", [], tf.constant_initializer(0.), trainable=False)
# choose the appropriate moments
if train:
used_mean, used_var = tf.nn.moments(input_, axes, name="batch_norm")
cur_mean, cur_var = used_mean, used_var
if bn_lag > 0.:
used_var = stable_var(input_=input_, mean=used_mean, axes=axes)
cur_var = used_var
used_mean -= (1 - bn_lag) * (used_mean - tf.stop_gradient(mean))
used_mean /= (1. - bn_lag**(step + 1))
used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
used_var /= (1. - bn_lag**(step + 1))
else:
used_mean, used_var = mean, var
cur_mean, cur_var = used_mean, used_var
# update variables
if train:
with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
with ops.colocate_with(mean):
new_mean = tf.assign_sub(
mean,
tf.check_numerics(
decay * (mean - cur_mean), "NaN in moving mean."))
with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
with ops.colocate_with(var):
new_var = tf.assign_sub(
var,
tf.check_numerics(decay * (var - cur_var),
"NaN in moving variance."))
with tf.name_scope(name, "IncrementTime", [step]):
with ops.colocate_with(step):
new_step = tf.assign_add(step, 1.)
used_var += 0. * new_mean * new_var * new_step
used_var += epsilon
return used_mean, used_var
开发者ID:Peratham,项目名称:models,代码行数:57,代码来源:real_nvp_utils.py
示例3: testInitRequiredAssignSub
def testInitRequiredAssignSub(self):
with self.test_session():
p = tf.Variable(tf.fill([1024, 1024], 1),
tf.int32)
a = tf.assign_sub(p, tf.fill([1024, 1024], 0))
with self.assertRaisesOpError("use uninitialized"):
a.op.run()
开发者ID:CdricGmd,项目名称:tensorflow,代码行数:7,代码来源:dense_update_ops_test.py
示例4: central_step
def central_step():
# restore v1, slots
op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(restored_vars, tmp_vars)])
with tf.get_default_graph().control_dependencies([op5]):
back = tf.group(*[tf.assign_sub(v, -self._lr_t*grad) for grad,v in grads_and_vars])
with tf.get_default_graph().control_dependencies([back]):
return tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
开发者ID:255BITS,项目名称:hyperchamber-gan,代码行数:7,代码来源:curl_optimizer.py
示例5: _initAssignSubFetch
def _initAssignSubFetch(self, x, y, use_gpu=False):
"""Initialize a param to init, and compute param -= y."""
with self.test_session(use_gpu=use_gpu):
p = tf.Variable(x)
sub = tf.assign_sub(p, y)
p.initializer.run()
new_value = sub.eval()
return p.eval(), new_value
开发者ID:CdricGmd,项目名称:tensorflow,代码行数:8,代码来源:dense_update_ops_test.py
示例6: exponential_moving_average
def exponential_moving_average(self,
var,
avg_var=None,
decay=0.999,
ignore_nan=False):
"""Calculates the exponential moving average.
TODO(): check if this implementation of moving average can now
be replaced by tensorflows implementation.
Adds a variable to keep track of the exponential moving average and adds an
update operation to the bookkeeper. The name of the variable is
'%s_average' % name prefixed with the current variable scope.
Args:
var: The variable for which a moving average should be computed.
avg_var: The variable to set the average into, if None create a zero
initialized one.
decay: How much history to use in the moving average.
Higher, means more history values [0, 1) accepted.
ignore_nan: If the value is NaN or Inf, skip it.
Returns:
The averaged variable.
Raises:
ValueError: if decay is not in [0, 1).
"""
with self._g.as_default():
if decay < 0 or decay >= 1.0:
raise ValueError('Decay is %5.2f, but has to be in [0, 1).' % decay)
if avg_var is None:
avg_name = '%s_average' % _bare_var_name(var)
with tf.control_dependencies(None):
with tf.name_scope(avg_name + '/Initializer/'):
if isinstance(var, tf.Variable):
init_val = var.initialized_value()
elif var.get_shape().is_fully_defined():
init_val = tf.constant(0,
shape=var.get_shape(),
dtype=var.dtype.base_dtype)
else:
init_val = tf.constant(0, dtype=var.dtype.base_dtype)
avg_var = tf.Variable(init_val, name=avg_name, trainable=False)
num_updates = tf.cast(self.global_step, tf.float32)
decay = tf.minimum(decay, tf.maximum(0.9, (1.0 + num_updates) /
(10.0 + num_updates)))
with tf.device(avg_var.device):
if ignore_nan:
var = tf.where(tf.is_finite(var), var, avg_var)
if var.get_shape().is_fully_defined():
avg_update = tf.assign_sub(avg_var, (1 - decay) * (avg_var - var))
else:
avg_update = tf.assign(avg_var,
avg_var - (1 - decay) * (avg_var - var),
validate_shape=False)
self._g.add_to_collection(GraphKeys.UPDATE_OPS, avg_update)
return avg_update
开发者ID:google,项目名称:prettytensor,代码行数:57,代码来源:bookkeeper.py
示例7: curl
def curl():
grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
op3 = tf.group(*[tf.assign_sub(v, self._lr_t*grad) for grad,v in zip(grads, all_vars)])
with tf.get_default_graph().control_dependencies([op3]):
def curlcombine(g1,g2):
stepsize = self._lr_t
return g1-(g2-g1)/stepsize
new_grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
g3s = [curlcombine(g1,g2) for g1,g2 in zip(grads,new_grads)]
return g3s
开发者ID:255BITS,项目名称:hyperchamber-gan,代码行数:10,代码来源:jr_optimizer.py
示例8: _assign_sub
def _assign_sub(self, ref, updates, indices=None):
if indices is not None:
if isinstance(ref, tf.Variable):
return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking)
elif isinstance(ref, resource_variable_ops.ResourceVariable):
with tf.control_dependencies([resource_variable_ops.resource_scatter_add(ref.handle, indices, -updates)]):
return ref.value()
else:
raise TypeError("did not expect type %r" % type(ref))
else:
return tf.assign_sub(ref, updates, use_locking=self._use_locking)
开发者ID:rwth-i6,项目名称:returnn,代码行数:11,代码来源:TFUpdater.py
示例9: _apply_dense
def _apply_dense(self, grad, var):
lr_t = tf.cast(self._lr_t, var.dtype.base_dtype)
beta2_t = tf.cast(self._beta2_t, var.dtype.base_dtype)
if var.dtype.base_dtype == tf.float16:
eps = 1e-7
else:
eps = 1e-8
m = self.get_slot(var, "m")
m_t = m.assign(tf.maximum(beta2_t * m + eps, tf.abs(grad)))
g_t = grad / m_t
var_update = tf.assign_sub(var, lr_t * g_t)
return tf.group(*[var_update, m_t])
开发者ID:luohuayong,项目名称:tensorflow,代码行数:12,代码来源:t15.py
示例10: sgd
def sgd(cost, parameters=None, learning_rate=0.01):
if parameters is None:
parameters = tf.trainable_variables()
grads = tf.gradients(cost, parameters)
all_updates = []
for grad, param in zip(grads, parameters):
assigned = tf.assign_sub(param, learning_rate * grad)
all_updates.append(assigned)
update_op = tf.group(*all_updates)
return update_op
开发者ID:diogo149,项目名称:tensorflow_utils,代码行数:13,代码来源:sample_optimizer.py
示例11: _resource_apply_dense
def _resource_apply_dense(self, grad, var):
grad_squared = tf.square(grad) + 1e-30
grad_squared_mean = tf.reduce_mean(grad_squared)
decay_rate = self._decay_rate
update_scale = self._learning_rate
if self._multiply_by_parameter_scale:
update_scale *= self._parameter_scale(var)
# HACK: Make things dependent on grad.
# This confounds the XLA rewriter and keeps it from fusing computations
# across different variables. This fusion is a bad for HBM usage, since
# it causes the gradients to persist in memory.
decay_rate += grad_squared_mean * 1e-30
update_scale += grad_squared_mean * 1e-30
# END HACK
mixing_rate = 1.0 - decay_rate
shape = var.get_shape().as_list()
updates = []
if self._should_use_factored_second_moment_estimate(shape):
grad_squared_row_mean = tf.reduce_mean(grad_squared, 1)
grad_squared_col_mean = tf.reduce_mean(grad_squared, 0)
vr = self.get_slot(var, "vr")
new_vr = (decay_rate * vr + mixing_rate * grad_squared_row_mean)
vc = self.get_slot(var, "vc")
new_vc = (decay_rate * vc + mixing_rate * grad_squared_col_mean)
vr_update = tf.assign(vr, new_vr, use_locking=self._use_locking)
vc_update = tf.assign(vc, new_vc, use_locking=self._use_locking)
updates = [vr_update, vc_update]
long_term_mean = tf.reduce_mean(new_vr)
r_factor = tf.rsqrt(new_vr / long_term_mean)
c_factor = tf.rsqrt(new_vc)
x = grad * tf.expand_dims(r_factor, 1) * tf.expand_dims(c_factor, 0)
else:
v = self.get_slot(var, "v")
new_v = decay_rate * v + mixing_rate * grad_squared
v_update = tf.assign(v, new_v, use_locking=self._use_locking)
updates = [v_update]
x = grad * tf.rsqrt(new_v)
if self._clipping_threshold is not None:
clipping_denom = tf.maximum(1.0, reduce_rms(x) / self._clipping_threshold)
x /= clipping_denom
subtrahend = update_scale * x
if self._beta1:
m = self.get_slot(var, "m")
new_m = self._beta1 * m + (1.0 - self._beta1) * subtrahend
updates.append(tf.assign(m, new_m, use_locking=self._use_locking))
subtrahend = new_m
var_update = tf.assign_sub(var, subtrahend, use_locking=self._use_locking)
updates = [var_update] + updates
return tf.group(*updates)
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:49,代码来源:adafactor.py
示例12: adam
def adam(cost,
parameters=None,
learning_rate=1e-3,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
if parameters is None:
parameters = tf.trainable_variables()
grads = tf.gradients(cost, parameters)
all_updates = []
zero_init = tf.constant_initializer(0.)
with tf.variable_scope("adam"):
t_prev = tf.get_variable("t",
shape=(),
initializer=zero_init)
t = tf.assign_add(t_prev, 1)
all_updates.append(t)
for grad, param in zip(grads, parameters):
with tf.variable_scope(param.name.replace(":", "_")):
param_shape = tfu.get_shape_values(param)
m_prev = tf.get_variable("m",
shape=param_shape,
initializer=zero_init)
v_prev = tf.get_variable("v",
shape=param_shape,
initializer=zero_init)
m = tf.assign(m_prev,
m_prev * beta1 + grad * (1 - beta1))
v = tf.assign(v_prev,
v_prev * beta2 + tf.square(grad) * (1 - beta2))
numerator = learning_rate * m / (1 - tf.pow(beta1, t))
denominator = tf.sqrt(v / (1 - tf.pow(beta2, t))) + epsilon
assigned = tf.assign_sub(param, numerator / denominator)
all_updates += [m, v, assigned]
update_op = tf.group(*all_updates)
return update_op
开发者ID:diogo149,项目名称:tensorflow_utils,代码行数:40,代码来源:sample_optimizer.py
示例13: exponential_moving_average
def exponential_moving_average(
self, var, avg_var=None, decay=0.999, ignore_nan=False):
"""Calculates the exponential moving average.
Adds a variable to keep track of the exponential moving average and adds an
update operation to the bookkeeper. The name of the variable is
'%s_average' % name prefixed with the current variable scope.
Args:
var: The variable for which a moving average should be computed.
avg_var: The variable to set the average into, if None create a zero
initialized one.
decay: How much history to use in the moving average.
Higher, means more history values [0, 1) accepted.
ignore_nan: If the value is NaN or Inf, skip it.
Returns:
The averaged variable.
Raises:
ValueError: if decay is not in [0, 1).
"""
with self.g.as_default():
if decay < 0 or decay >= 1.0:
raise ValueError('Decay is %5.2f, but has to be in [0, 1).' % decay)
if not avg_var:
shape = var.get_shape()
avg_name = '%s_average' % _bare_var_name(var)
avg_var = tf.Variable(
tf.zeros_initializer(shape=shape, dtype=var.dtype),
name=avg_name,
trainable=False)
num_updates = tf.cast(self.global_step, tf.float32)
decay = tf.maximum(
0.9, tf.minimum(decay, (1.0 + num_updates) / (10.0 + num_updates)))
with tf.device(avg_var.device):
if ignore_nan:
var = tf.select(tf.is_finite(var), var, avg_var)
avg_update = tf.assign_sub(avg_var, (1 - decay) * (avg_var - var))
self._g.add_to_collection(GraphKeys.UPDATE_OPS, avg_update)
return avg_var
开发者ID:pombredanne,项目名称:prettytensor,代码行数:39,代码来源:bookkeeper.py
示例14: finite_differences
def finite_differences(self, grads_and_vars, global_step, name, d_vars, g_vars, d_grads, g_grads):
all_vars = [ v for _,v in grads_and_vars]
all_grads = [ g for g, _ in grads_and_vars ]
d_grads = all_grads[:len(d_vars)]
g_grads = all_grads[len(d_vars):]
d_vars = []
g_vars = []
for grad,var in grads_and_vars:
if var in self.gan.d_vars():
d_vars += [var]
elif var in self.gan.g_vars():
g_vars += [var]
else:
raise("Couldn't find var in g_vars or d_vars")
with ops.init_scope():
[self._zeros_slot(v, "orig", self._name) for _,v in grads_and_vars]
slots_list = []
if self.config.include_slots:
for name in self.optimizer.get_slot_names():
for var in self.optimizer.variables():
slots_list.append(self.optimizer._zeros_slot(var, "orig", "orig"))
v1 = [self.get_slot(v, "orig") for _,v in grads_and_vars]
slots_list = []
slots_vars = []
restored_vars = all_vars + slots_vars
tmp_vars = v1 + slots_list
e1 = 0.0001
e2 = 0.0001
#gamma12
save = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, restored_vars)]) # store variables
restore = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, tmp_vars)]) # store variables
def curl():
grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
op3 = tf.group(*[tf.assign_sub(v, self._lr_t*grad) for grad,v in zip(grads, all_vars)])
with tf.get_default_graph().control_dependencies([op3]):
def curlcombine(g1,g2):
stepsize = self._lr_t
return g1-(g2-g1)/stepsize
new_grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
g3s = [curlcombine(g1,g2) for g1,g2 in zip(grads,new_grads)]
return g3s
#gamma12
with tf.get_default_graph().control_dependencies([save]):
#opboth = self.optimizer.apply_gradients(grads_and_vars, global_step=global_step, name=name)
#opdp = self.optimizer.apply_gradients(grads_and_vars[:len(d_vars)], global_step=global_step, name=name)
#opgp = self.optimizer.apply_gradients(grads_and_vars[len(d_vars):], global_step=global_step, name=name)
opboth = tf.group(*[tf.assign_sub(w, self._lr_t * v) for w,v in zip(all_vars, all_grads)]) # store variables
opd = tf.group(*[tf.assign_sub(w, self._lr_t * v) for w,v in zip(d_vars, d_grads)]) # store variables
opg = tf.group(*[tf.assign_sub(w, self._lr_t * v) for w,v in zip(g_vars, g_grads)]) # store variables
with tf.get_default_graph().control_dependencies([opboth]):
gboth = curl()#tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
with tf.get_default_graph().control_dependencies([restore]):
with tf.get_default_graph().control_dependencies([opd]):
#new_d_grads = [tf.zeros_like(_d) for _d in d_vars]+tf.gradients(self.gan.trainer.g_loss, g_vars)
new_d_grads = curl()#tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
with tf.get_default_graph().control_dependencies([restore]):
with tf.get_default_graph().control_dependencies([opg]):
#new_g_grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + [tf.zeros_like(_g) for _g in g_vars]
new_g_grads = curl()#tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
with tf.get_default_graph().control_dependencies([restore]):
new_grads = []
for _gboth, _gd, _gg, _g in zip(gboth,new_d_grads,new_g_grads,d_grads):
det = tf.square(_gboth)-(_gg*_gd)+1e-8
h_1 = 1.0/det * (2*_gboth - _gd - _gg)
if self.config.hessian:
#v = (g(x + hjej)-g(x)))/(2hj) + \
# (g(x + hiei)-g(x))/(2hi)
a = (_gboth - _g) / self._lr_t # d2f/dx2i
c = (_gboth - _g) / self._lr_t # d2f/dx2j
b = (_gg - _g) / (2*self._lr_t)+(_gd-_g)/(2*self._lr_t) # d2f/dx1dx2
d = b # d2f/dx2dx1
det = a*d-b*c+1e-8
#h_1 = 1.0/det * (b+d-a-c)
h_1_a = d/det
h_1_b = -b/det
h_1_c = -c/det
h_1_d = a/det
h_1 = h_1_a*h_1_d-h_1_b*h_1_c
new_grads.append( _g*h_1 )
for _gboth, _gd, _gg, _g in zip(gboth[len(d_vars):],new_d_grads[len(d_vars):],new_g_grads[len(d_vars):],g_grads):
det = tf.square(_gboth)-(_gg*_gd)+1e-8
h_1 = 1.0/det * (2*_gboth - _gd - _gg)
if self.config.hessian:
#v = (g(x + hjej)-g(x)))/(2hj) + \
# (g(x + hiei)-g(x))/(2hi)
a = (_gboth - _g) / self._lr_t # d2f/dx2i
c = (_gboth - _g) / self._lr_t # d2f/dx2j
b = (_gg - _g) / (2*self._lr_t)+(_gd-_g)/(2*self._lr_t) # d2f/dx1dx2
d = b # d2f/dx2dx1
det = a*d-b*c+1e-8
#h_1 = 1.0/det * (b+d-a-c)
#.........这里部分代码省略.........
开发者ID:255BITS,项目名称:hyperchamber-gan,代码行数:101,代码来源:jr_optimizer.py
示例15: testAssignUpdateNoShape
def testAssignUpdateNoShape(self):
var = state_ops.variable_op([1, 2], tf.float32, set_shape=False)
added = tf.assign_add(var, self._NewShapelessTensor())
self.assertEqual(tensor_shape.unknown_shape(), added.get_shape())
subbed = tf.assign_sub(var, self._NewShapelessTensor())
self.assertEqual(tensor_shape.unknown_shape(), subbed.get_shape())
开发者ID:debaratidas1994,项目名称:tensorflow,代码行数:6,代码来源:variable_ops_test.py
示例16: testAssignUpdateNoValueShape
def testAssignUpdateNoValueShape(self):
var = state_ops.variable_op([1, 2], tf.float32)
added = tf.assign_add(var, self._NewShapelessTensor())
self.assertEqual([1, 2], added.get_shape())
subbed = tf.assign_sub(var, self._NewShapelessTensor())
self.assertEqual([1, 2], subbed.get_shape())
开发者ID:debaratidas1994,项目名称:tensorflow,代码行数:6,代码来源:variable_ops_test.py
示例17: testAssignUpdateNoVarShape
def testAssignUpdateNoVarShape(self):
var = state_ops.variable_op([1, 2], tf.float32, set_shape=False)
added = tf.assign_add(var, [[2.0, 3.0]])
self.assertEqual([1, 2], added.get_shape())
subbed = tf.assign_sub(var, [[12.0, 13.0]])
self.assertEqual([1, 2], subbed.get_shape())
开发者ID:debaratidas1994,项目名称:tensorflow,代码行数:6,代码来源:variable_ops_test.py
示例18: apply_updates
def apply_updates(self):
assert not self._updates_applied
self._updates_applied = True
devices = list(self._dev_grads.keys())
total_grads = sum(len(grads) for grads in self._dev_grads.values())
assert len(devices) >= 1 and total_grads >= 1
ops = []
with absolute_name_scope(self.scope):
# Cast gradients to FP32 and calculate partial sum within each device.
dev_grads = OrderedDict() # device => [(grad, var), ...]
for dev_idx, dev in enumerate(devices):
with tf.name_scope('ProcessGrads%d' % dev_idx), tf.device(dev):
sums = []
for gv in zip(*self._dev_grads[dev]):
assert all(v is gv[0][1] for g, v in gv)
g = [tf.cast(g, tf.float32) for g, v in gv]
g = g[0] if len(g) == 1 else tf.add_n(g)
sums.append((g, gv[0][1]))
dev_grads[dev] = sums
# Sum gradients across devices.
if len(devices) > 1:
with tf.name_scope('SumAcrossGPUs'), tf.device(None):
for var_idx, grad_shape in enumerate(self._grad_shapes):
g = [dev_grads[dev][var_idx][0] for dev in devices]
if np.prod(grad_shape): # nccl does not support zero-sized tensors
g = tf.contrib.nccl.all_sum(g)
for dev, gg in zip(devices, g):
dev_grads[dev][var_idx] = (gg, dev_grads[dev][var_idx][1])
# Apply updates separately on each device.
for dev_idx, (dev, grads) in enumerate(dev_grads.items()):
with tf.name_scope('ApplyGrads%d' % dev_idx), tf.device(dev):
# Scale gradients as needed.
if self.use_loss_scaling or total_grads > 1:
with tf.name_scope('Scale'):
coef = tf.constant(np.float32(1.0 / total_grads), name='coef')
coef = self.undo_loss_scaling(coef)
grads = [(g * coef, v) for g, v in grads]
# Check for overflows.
with tf.name_scope('CheckOverflow'):
grad_ok = tf.reduce_all(tf.stack([tf.reduce_all(tf.is_finite(g)) for g, v in grads]))
# Update weights and adjust loss scaling.
with tf.name_scope('UpdateWeights'):
opt = self._dev_opt[dev]
ls_var = self.get_loss_scaling_var(dev)
if not self.use_loss_scaling:
ops.append(tf.cond(grad_ok, lambda: opt.apply_gradients(grads), tf.no_op))
else:
ops.append(tf.cond(grad_ok,
lambda: tf.group(tf.assign_add(ls_var, self.loss_scaling_inc), opt.apply_gradients(grads)),
lambda: tf.group(tf.assign_sub(ls_var, self.loss_scaling_dec))))
# Report statistics on the last device.
if dev == devices[-1]:
with tf.name_scope('Statistics'):
ops.append(autosummary(self.id + '/learning_rate', self.learning_rate))
ops.append(autosummary(self.id + '/overflow_frequency', tf.where(grad_ok, 0, 1)))
if self.use_loss_scaling:
ops.append(autosummary(self.id + '/loss_scaling_log2', ls_var))
# Initialize variables and group everything into a single op.
self.reset_optimizer_state()
init_uninited_vars(list(self._dev_ls_var.values()))
return tf.group(*ops, name='TrainingOp')
开发者ID:Gavin666Github,项目名称:progressive_growing_of_gans,代码行数:69,代码来源:tfutil.py
示例19: __init__
#.........这里部分代码省略.........
m_lstm_outputs, m_lstm_state = self.fast_dlstm(m_rnn_in, m_state_in, m_lstm_cell, FLAGS.manager_horizon,
FLAGS.hidden_dim * FLAGS.manager_horizon)
m_lstm_c, m_lstm_h = m_lstm_state
self.m_state_out = (m_lstm_c[-1, :1, :], m_lstm_h[-1, :1, :])
self.goals = tf.reshape(m_lstm_outputs, [-1, FLAGS.hidden_dim])
self.normalized_goals = tf.contrib.layers.fully_connected(self.goals, FLAGS.hidden_dim, activation_fn=tf.tanh, name="Gt")
summary_goals = tf.contrib.layers.summarize_activation(self.normalized_goals)
def randomize_goals(t):
t = tf.cast(t, tf.int32)
packed_tensors = tf.stack([tf.random_normal([FLAGS.hidden_dim, ]), self.normalized_goals[t, :]])
to_update = tf.cond(
tf.less(self.prob_of_random_goal, tf.constant(FLAGS.final_random_goal_prob, dtype=tf.float32)),
lambda: tf.cast(
tf.multinomial(
tf.log([[self.prob_of_random_goal,
tf.subtract(tf.constant(1.0),
self.prob_of_random_goal)]]), 1)[0][0], tf.int32),
lambda: tf.constant(1, tf.int32))
resulted_tensor = tf.gather(packed_tensors, to_update)
return resulted_tensor
self.randomized_goals = tf.map_fn(lambda t: randomize_goals(t), tf.to_float(tf.range(0, step_size[0])),
name="random_gt")
summary_random_goals = tf.contrib.layers.summarize_activation(self.randomized_goals)
self.decrease_prob_of_random_goal = tf.assign_sub(self.prob_of_random_goal, tf.constant(
(FLAGS.initial_random_goal_prob - FLAGS.final_random_goal_prob) / FLAGS.explore_steps))
m_fc_value_w = tf.get_variable("M_Value_W", shape=[FLAGS.hidden_dim, 1],
initializer=normalized_columns_initializer(1.0))
self.m_value = tf.matmul(m_rnn_out, m_fc_value_w, name="M_Value")
summary_m_value_act = tf.contrib.layers.summarize_activation(self.m_value)
############################################################################################################
# Worker network
self.sum_prev_goals = tf.placeholder(shape=[None, FLAGS.hidden_dim], dtype=tf.float32, name="Prev_c_Goals_sum")
w_rnn_in = tf.expand_dims(self.f_percept, [0], name="Wrnn_in")
step_size = tf.shape(self.inputs)[:1]
w_lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(FLAGS.goal_embedding_size * FLAGS.nb_actions)
w_c_init = np.zeros((1, w_lstm_cell.state_size.c), np.float32)
w_h_init = np.zeros((1, w_lstm_cell.state_size.h), np.float32)
self.w_state_init = [w_c_init, w_h_init]
w_c_in = tf.placeholder(tf.float32, [1, w_lstm_cell.state_size.c], name="Wrnn_c_in")
w_h_in = tf.placeholder(tf.float32, [1, w_lstm_cell.state_size.h], name="Wrnn_h_in")
self.w_state_in = (w_c_in, w_h_in)
w_state_in = tf.contrib.rnn.LSTMStateTuple(w_c_in, w_h_in)
w_lstm_outputs, w_lstm_state = tf.nn.dynamic_rnn(
w_lstm_cell, w_rnn_in, initial_state=w_state_in, sequence_length=step_size,
time_major=False)
w_lstm_c, w_lstm_h = w_lstm_state
self.w_state_out = (w_lstm_c[:1, :], w_lstm_h[:1, :])
Ut = tf.reshape(w_lstm_outputs, [step_size[0], FLAGS.nb_actions, FLAGS.goal_embedding_size],
开发者ID:ioanachelu,项目名称:turi,代码行数:67,代码来源:network.py
示例20: get_zero_state
y: x_values_sm_b,
modulation: np.zeros((batch_size, num_steps, 1)),
state: get_zero_state()
})
duration = time.time() - start_time
error = np.sum(np.square(out_v_test[-1][0]/c.lambda_max - x_values_sm_b))
dw_grads.append(state_v[0][5])
db_grads.append(state_v[0][6])
r.append(rhythm)
print "Epoch {} ({:.2f}s), train error {:.3f}".format(
i,
duration,
error
)
r = np.asarray(r)
dw_grads = np.asarray(dw_grads)
db_grads = np.asarray(db_grads)
dw_grads_m = np.mean(dw_grads, 0)
# dw_grads_m = 2.0*dw_bl
# db_grads = 2.0*dbias_bl
sess.run(tf.assign_sub(net.cells[-1].params[0], 10.0*dw_grads_m.reshape(input_size, output_size)))
sess.run(tf.assign_sub(net.cells[-1].params[1], 10.0*np.mean(db_grads).reshape(1)))
开发者ID:alexeyche,项目名称:alexeyche-junk,代码行数:29,代码来源:main_ts_output_rule_tweak.py
注:本文中的tensorflow.assign_sub函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论