本文整理汇总了Python中theano.tensor.sqr函数的典型用法代码示例。如果您正苦于以下问题:Python sqr函数的具体用法?Python sqr怎么用?Python sqr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sqr函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_mean_square_norm_gradients_variance_method_00
def get_mean_square_norm_gradients_variance_method_00(D_by_layer, cost, accum = 0):
# This returns a theano variable that will be of shape (minibatch_size, ).
# It will contain, for each training example, the associated mean of the
# variance wrt the gradient of that minibatch.
for (layer_name, D) in D_by_layer.items():
input = D['input']
input_square_norms = tensor.sqr(D['input']).sum(axis=1)
backprop_output = tensor.grad(cost, D['output'])
# I don't think that theano recomputes this.
# It should be just redundant nodes in the computational graph
# that end up being computed only once anyways.
grad_weight = tensor.grad(cost, D['weight'])
grad_bias = tensor.grad(cost, D['bias'])
backprop_output_square_norms = tensor.sqr(backprop_output).sum(axis=1)
if D.has_key('weight'):
A = input_square_norms * backprop_output_square_norms
C = tensor.sqr(grad_weight).sum() # all the terms get this "middle" expression added to them
B = (backprop_output.dot(grad_weight.T) * input).sum(axis=1)
accum += (A - 2*B + C)
if D.has_key('bias'):
# this last `sum` could be a component-wise `max` if we wanted
# to carry the maximum of the variances instead of the sum of squares
accum = accum + tensor.sqr(backprop_output - grad_bias.reshape((1,-1))).sum(axis=1)
return accum
开发者ID:chinnadhurai,项目名称:ImportanceSamplingSGD,代码行数:32,代码来源:verifying_grad_square_norm_formula.py
示例2: batchnorm
def batchnorm(X, rescale=None, reshift=None, u=None, s=None, e=1e-8):
"""
batchnorm with support for not using scale and shift parameters
as well as inference values (u and s) and partial batchnorm (via a)
will detect and use convolutional or fully connected version
"""
g = rescale
b = reshift
if X.ndim == 4:
if u is not None and s is not None:
# use normalization params given a priori
b_u = u.dimshuffle('x', 0, 'x', 'x')
b_s = s.dimshuffle('x', 0, 'x', 'x')
else:
# compute normalization params from input
b_u = T.mean(X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
b_s = T.mean(T.sqr(X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
# batch normalize
X = (X - b_u) / T.sqrt(b_s + e)
if g is not None and b is not None:
# apply rescale and reshift
X = X*T.exp(0.2*g.dimshuffle('x', 0, 'x', 'x')) + b.dimshuffle('x', 0, 'x', 'x')
elif X.ndim == 2:
if u is None and s is None:
# compute normalization params from input
u = T.mean(X, axis=0)
s = T.mean(T.sqr(X - u), axis=0)
# batch normalize
X = (X - u) / T.sqrt(s + e)
if g is not None and b is not None:
# apply rescale and reshift
X = X*T.exp(0.2*g) + b
else:
raise NotImplementedError
return X
开发者ID:Philip-Bachman,项目名称:Sequential-Generation,代码行数:35,代码来源:NetLayers.py
示例3: sgd_updates_adadelta
def sgd_updates_adadelta(params,cost,rho=0.95,epsilon=1e-6,norm_lim=9,word_vec_name='Words'):
"""
adadelta update rule, mostly from
https://groups.google.com/forum/#!topic/pylearn-dev/3QbKtCumAW4 (for Adadelta)
"""
updates = OrderedDict({})
exp_sqr_grads = OrderedDict({})
exp_sqr_ups = OrderedDict({})
gparams = []
for param in params:
empty = numpy.zeros_like(param.get_value())
exp_sqr_grads[param] = theano.shared(value=as_floatX(empty),name="exp_grad_%s" % param.name)
gp = T.grad(cost, param)
exp_sqr_ups[param] = theano.shared(value=as_floatX(empty), name="exp_grad_%s" % param.name)
gparams.append(gp)
for param, gp in zip(params, gparams):
exp_sg = exp_sqr_grads[param]
exp_su = exp_sqr_ups[param]
up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gp)
updates[exp_sg] = up_exp_sg
step = -(T.sqrt(exp_su + epsilon) / T.sqrt(up_exp_sg + epsilon)) * gp
updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step)
stepped_param = param + step
if (param.get_value(borrow=True).ndim == 2) and (param.name!='Words'):
col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
desired_norms = T.clip(col_norms, 0, T.sqrt(norm_lim))
scale = desired_norms / (1e-7 + col_norms)
tmp=stepped_param * scale
tmp=T.cast(tmp,'float32')
#print param.type,tmp.type
updates[param] = tmp
else:
updates[param] = stepped_param
#print param.type,stepped_param.type
return updates
开发者ID:zjh-nudger,项目名称:BioNLP-ST2016,代码行数:35,代码来源:conv_test.py
示例4: applyConstraint
def applyConstraint(self, param):
if param.ndim != 4 and param.ndim != 2:
warnings.warn("Norm constraints are normally applied to matrices"
+" or 4-dimensional tensors, but currently got "
+"%d dimensions, please make sure this is the desired"
+" parameter to apply norm constraints" % param.ndim)
needFlip = False
if param.ndim == 4: # a hack for conv layer filters
prevShape = param.shape
# conv layer filter shape is (nChannelOut, nChannelIn, r, c)
param = param.flatten(2)
# now it is (nout, nin), which is different from (nin, nout)
# from fulling connected networks, so need to flip here
needFlip = True
if needFlip:
col_norm = T.sqrt(T.sum(T.sqr(param), axis=1, keepdims=True))
else:
col_norm = T.sqrt(T.sum(T.sqr(param), axis=0, keepdims=True))
param /= (col_norm+1e-7)
param *= self.norm
if needFlip:
param = param.reshape(prevShape)
return param
开发者ID:ybzhou,项目名称:Gemini,代码行数:28,代码来源:constraints.py
示例5: sgd_updates_adadelta
def sgd_updates_adadelta(params, cost, rho=0.95, epsilon=1e-6,
norm_lim=9, word_vec_name='embedding'):
updates = OrderedDict({})
exp_sqr_grads = OrderedDict({})
exp_sqr_ups = OrderedDict({})
gparams = []
for param in params:
empty = np.zeros_like(param.get_value())
exp_sqr_grads[param] = theano.shared(value=as_floatX(empty),name="exp_grad_%s" % param.name)
gp = T.grad(cost, param)
exp_sqr_ups[param] = theano.shared(value=as_floatX(empty), name="exp_grad_%s" % param.name)
gparams.append(gp)
for param, gp in zip(params, gparams):
exp_sg = exp_sqr_grads[param]
exp_su = exp_sqr_ups[param]
up_exp_sg = rho * exp_sg + (1 - rho) * T.sqr(gp)
updates[exp_sg] = up_exp_sg
step = -(T.sqrt(exp_su + epsilon) / T.sqrt(up_exp_sg + epsilon)) * gp
updates[exp_su] = rho * exp_su + (1 - rho) * T.sqr(step)
stepped_param = param + step
if (param.get_value(borrow=True).ndim == 2) and (param.name!='embedding'):
col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
desired_norms = T.clip(col_norms, 0, T.sqrt(norm_lim))
scale = desired_norms / (1e-7 + col_norms)
updates[param] = stepped_param * scale
else:
updates[param] = stepped_param
return updates
开发者ID:Tskatom,项目名称:Protest_Event_Encoder,代码行数:30,代码来源:MLT_CNN_no_validation.py
示例6: build_cost_functional_L2norm_w_reg
def build_cost_functional_L2norm_w_reg(lambda_val,h,y_sym,Thetas):
"""
build_cost_functional_L2norm (with regularization) J=J_y(Theta,b) # J\equiv J_y(\Theta,b),
for the L2 norm, or Euclidean space norm, but now with
regularization
INPUT/PARAMETERS
================
@type y_sym : theano symbolic matrix, such as T.matrix() or theano shared variable
@param y_sym : output data as a symbolic theano variable or theano shared variable
NOTE: y_sym = T.matrix(); # this could be a vector, but I can keep y to be "general" in size dimensions
@type h : theano shared variable of size dims. (K,m) (size dim. might be (m,K) due to right action
@param h : hypothesis
@type Thetas : tuple, list, or (ordered) iterable of Theta's as theano shared variables, of length L
@params Thetas : weights or parameters thetas for all the layers l=1,2,...L-1
NOTE: remember, we want a list of theano MATRICES, themselves, not the class
RETURN/OUTPUTS
==============
@type J_theta : theano symbolic expression (computational graph)
"""
J_theta = np.cast[theano.config.floatX](0.5) * T.mean(T.sqr(h-y_sym))
# T.sqr is element-wise operation (take the square of each element), and so it's an automorphism
reg_term = T.mean( [ T.sum( T.sqr(Theta), acc_dtype=theano.config.floatX) for Theta in Thetas], acc_dtype=theano.config.floatX )
reg_term = np.cast[theano.config.floatX](lambda_val/ (2.))*reg_term
J_theta = J_theta + reg_term
return J_theta
开发者ID:ernestyalumni,项目名称:MLgrabbag,代码行数:32,代码来源:CNN.py
示例7: AdadeltaUpdate
def AdadeltaUpdate(params,cost,stepSize=1.0,rho=0.95,epsilon=1e-6,norm_lim=9):
updates=OrderedDict({})
exp_sqr_grads=OrderedDict({})
exp_sqr_update=OrderedDict({})
g_params=[]
for param in params:
empty=np.zeros_like(param.get_value())
exp_sqr_grads[param]=theano.shared(value=as_floatX(empty),name='exp_grad_%s'%param.name)
exp_sqr_update[param]=theano.shared(value=as_floatX(empty),name='exp_grad_%s'%param.name)
gp=T.grad(cost,param)
g_params.append(gp)
for param,gp in zip(params,g_params):
exp_sg=exp_sqr_grads[param]
exp_su=exp_sqr_update[param]
update_exp_sg=rho*exp_sg+(1-rho)*T.sqr(gp)#????
updates[exp_sg]=update_exp_sg
step=-(T.sqrt(exp_su+epsilon)/T.sqrt(update_exp_sg+epsilon))*gp
stepped_param=param+step*stepSize
update_exp_su=rho*exp_su+(1-rho)*T.sqr(step)
updates[exp_su]=update_exp_su
if param.get_value(borrow=True).ndim==2 and param.name!='wordVec':
col_norms=T.sqrt(T.sum(T.sqr(stepped_param),axis=0))
desired_norms=T.clip(col_norms,0,T.sqrt(norm_lim))#???
scale=desired_norms/(1e-7+col_norms)
updates[param]=stepped_param*scale
else:
updates[param]=stepped_param
return updates
开发者ID:wolfhu,项目名称:RCNNSentence,代码行数:31,代码来源:dcnnModel.py
示例8: get_layer_monitoring_channels
def get_layer_monitoring_channels(self,state_below=None,state=None,target=None):
rval=OrderedDict()
W,=self.transformer.get_params()
rval['norm']=T.sqrt(T.sqr(W).sum())
if(target is not None) and ((state_below is not None) or (state is not None)):
if state is None:
state=self.fprop(state_below)
target=1.-target #0/1 dissim/sim to 1/0 distances
rmse=T.sqrt(T.mean(T.sqr(state-target)))
rval['rmse']=rmse.mean()
if self.costfn=='margin':
thresh=self.costparam
elif self.costfn=='cauchy':
thresh=2./(1.+T.exp(self.costparam))
else:
thresh=0.5
yhat=state<thresh
y=target<0.5
wrong_bit=T.cast(T.neq(y,yhat),state.dtype)
rval['01_loss']=wrong_bit.mean()
y=T.cast(y,state.dtype)
yhat=T.cast(yhat,state.dtype)
tp=(y*yhat).sum()
fp=((1-y)*yhat).sum()
prec=compute_precision(tp,fp)
rec=compute_recall(y,tp)
f1=compute_f1(prec,rec)
rval['neg_precision']=-prec
rval['neg_recall']=-rec
rval['neg_f1']=-f1
return rval
开发者ID:matudor,项目名称:siamese,代码行数:32,代码来源:siamesenet.py
示例9: cosine_similarity
def cosine_similarity(y_true, y_pred):
norm_y_true = T.sqrt(T.sum(T.sqr(y_true), 1, keepdims=True))
norm_y_pred = T.sqrt(T.sum(T.sqr(y_pred), 1, keepdims=True))
dot = T.tensordot(y_true, y_pred, axes=[1,1])
cossim = dot / (norm_y_true * norm_y_pred)
objective = 1-cossim
return objective.mean(axis=-1)
开发者ID:axeltidemann,项目名称:propeller,代码行数:7,代码来源:utils.py
示例10: get_updates
def get_updates(self, grads):
grads = OrderedDict(grads)
updates = OrderedDict()
for param in grads.keys():
# mean_squared_grad := E[g^2]_{t-1}
mean_square_grad = theano.shared(theano._asarray(
param.get_value() * 0., dtype=theano.config.floatX), name='mean_square_grad_' + param.name, borrow=False)
self.parameters.append(mean_square_grad)
# mean_square_dx := E[(\Delta x)^2]_{t-1}
mean_square_dx = theano.shared(theano._asarray(
param.get_value() * 0., dtype=theano.config.floatX), name='mean_square_dx_' + param.name, borrow=False)
self.parameters.append(mean_square_dx)
# Accumulate gradient
new_mean_squared_grad = self.decay * mean_square_grad + \
(1 - self.decay) * T.sqr(grads[param])
# Compute update
rms_dx_tm1 = T.sqrt(mean_square_dx + self.epsilon)
rms_grad_t = T.sqrt(new_mean_squared_grad + self.epsilon)
delta_x_t = - rms_dx_tm1 / rms_grad_t * grads[param]
# Accumulate updates
new_mean_square_dx = self.decay * mean_square_dx + (1 - self.decay) * T.sqr(delta_x_t)
# Apply update
updates[mean_square_grad] = new_mean_squared_grad
updates[mean_square_dx] = new_mean_square_dx
updates[param] = param + delta_x_t
return updates
开发者ID:arranger1044,项目名称:MADE,代码行数:32,代码来源:update_rules.py
示例11: entropy_exp
def entropy_exp(X, g=None, b=None, u=None, s=None, a=1., e=1e-8):
if X.ndim == 4:
if u is not None and s is not None:
b_u = u.dimshuffle('x', 0, 'x', 'x')
b_s = s.dimshuffle('x', 0, 'x', 'x')
else:
b_u = T.mean(X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
b_s = T.mean(T.sqr(X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
if a != 1:
b_u = (1. - a)*0. + a*b_u
b_s = (1. - a)*1. + a*b_s
X = (X - b_u) / T.sqrt(b_s + e)
if g is not None and b is not None:
X = X*T.exp(g.dimshuffle('x', 0, 'x', 'x'))+b.dimshuffle('x', 0, 'x', 'x')
elif X.ndim == 2:
if u is None and s is None:
u = T.mean(X, axis=0)
s = T.mean(T.sqr(X - u), axis=0)
if a != 1:
u = (1. - a)*0. + a*u
s = (1. - a)*1. + a*s
X = (X - u) / T.sqrt(s + e)
if g is not None and b is not None:
X = X*T.exp(g)+b
else:
raise NotImplementedError
return X
开发者ID:taesupkim,项目名称:dcgan_code,代码行数:27,代码来源:energy_rbm_cifar10_0.py
示例12: create_adam_updates
def create_adam_updates(updates, params, gparams, gsums, xsums, lr, eps, beta1, beta2):
i = theano.shared(np.float64(0.0).astype(theano.config.floatX))
i_t = i + 1.0
omb1_t = 1.0 - beta1**i_t
omb2_t = 1.0 - beta2**i_t
lr_t = lr * (T.sqrt(omb2_t) / omb1_t)
for p, g, m, v in zip(params, gparams, gsums, xsums):
if is_subtensor_op(p):
origin, indexes = get_subtensor_op_inputs(p)
m_sub = m[indexes]
v_sub = v[indexes]
m_t = beta1*m_sub + (1.0-beta1)*g
v_t = beta2*v_sub + (1.0-beta2)*T.sqr(g)
g_t = m_t / (T.sqrt(v_t) + eps)
updates[m] = T.set_subtensor(m_sub, m_t)
updates[v] = T.set_subtensor(v_sub, v_t)
updates[origin] = T.inc_subtensor(p, -lr_t*g_t)
else:
m_t = beta1*m + (1.0-beta1)*g
v_t = beta2*v + (1.0-beta2)*T.sqr(g)
g_t = m_t / (T.sqrt(v_t) + eps)
updates[m] = m_t
updates[v] = v_t
updates[p] = p - lr_t*g_t
updates[i] = i_t
开发者ID:hiroki13,项目名称:neural-sentence-matching-system,代码行数:25,代码来源:optimization.py
示例13: batchnorm
def batchnorm(X, g=None, b=None, u=None, s=None, a=1., e=1e-8):
"""
batchnorm with support for not using scale and shift parameters
as well as inference values (u and s) and partial batchnorm (via a)
will detect and use convolutional or fully connected version
"""
if X.ndim == 4:
if u is not None and s is not None:
b_u = u.dimshuffle('x', 0, 'x', 'x')
b_s = s.dimshuffle('x', 0, 'x', 'x')
else:
b_u = tensor.mean(X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
b_s = tensor.mean(tensor.sqr(X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
if a != 1:
b_u = (1. - a)*0. + a*b_u
b_s = (1. - a)*1. + a*b_s
X = (X - b_u) / tensor.sqrt(b_s + e)
if g is not None and b is not None:
X = X*g.dimshuffle('x', 0, 'x', 'x') + b.dimshuffle('x', 0, 'x', 'x')
elif X.ndim == 2:
if u is None and s is None:
u = tensor.mean(X, axis=0)
s = tensor.mean(tensor.sqr(X - u), axis=0)
if a != 1:
u = (1. - a)*0. + a*u
s = (1. - a)*1. + a*s
X = (X - u) / tensor.sqrt(s + e)
if g is not None and b is not None:
X = X*g + b
else:
raise NotImplementedError
return X
开发者ID:markstoehr,项目名称:lstm_acoustic_embedding,代码行数:32,代码来源:ops.py
示例14: mse
def mse(output, target, mean_over_second=True):
"""
This is the Mean Square Error (MSE) across all dimensions, or per multibatch row (depending on mean_over_second).
Parameters
----------
output : tensor
The symbolic tensor (or compatible) output from the network. (Comes from model).
target : tensor
The symbolic tensor (or compatible) target truth to compare the output against. (Comes from data).
mean_over_second : bool
Boolean whether or not to take the mean across all dimensions (True) or just the
feature dimensions (False)
Returns
-------
number
The appropriate mean square error.
"""
# The following definition came from the Conditional_nade project
if mean_over_second:
cost = T.mean(T.sqr(target - output))
else:
cost = T.mean(T.sqr(target - output).sum(axis=1))
return cost
开发者ID:52nlp,项目名称:OpenDeep,代码行数:25,代码来源:cost.py
示例15: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
for k, p in mainloop.updates.items():
for key in self.keys:
if key in str(k):
token = 1
for waiver in self.waivers:
if waiver in str(k):
token = 0
if token:
updated_param = mainloop.updates[k]
if self.is_vector:
col_norms = T.sqrt(T.sqr(updated_param).sum(axis=0))
desired_norms = T.clip(col_norms, 0, self.weight_norm)
ratio = (desired_norms / (1e-7 + col_norms))
mainloop.updates[k] = updated_param * ratio
else:
norm = T.sqrt(T.sqr(updated_param).sum())
desired_norm = T.clip(norm, 0, self.weight_norm)
ratio = (desired_norm / (1e-7 + norm))
mainloop.updates[k] = updated_param * ratio
开发者ID:Beronx86,项目名称:cle,代码行数:28,代码来源:ext.py
示例16: __call__
def __call__(self, model, X, Y):
batch_size = 32
image_size = 96
Y_hat = model.fprop(X)
print "Warning: the size of the axe is set manually"
Yx_hat = Y_hat[:, :image_size]
Yy_hat = Y_hat[:, image_size:]
Yx = Y[:, :image_size]
Yy = Y[:, image_size:]
epsylon = 1e-10
costMatrix = T.matrix()
max_x = T.argmax(Yx, axis=1)
max_y = T.argmax(Yy, axis=1)
costMatrix = T.sqr(
T.log((Yx + epsylon) / (Yx[range(batch_size), max_x] + epsylon)[:, None])
- T.log((Yx_hat + epsylon) / (Yx_hat[range(batch_size), max_x] + epsylon)[:, None])
)
costMatrix += T.sqr(
T.log((Yy + epsylon) / (Yy[range(batch_size), max_y] + epsylon)[:, None])
- T.log((Yy_hat + epsylon) / (Yy_hat[range(batch_size), max_y] + epsylon)[:, None])
)
costMatrix *= T.neq(T.sum(Y, axis=1), 0)[:, None]
cost = costMatrix.sum(axis=1).mean()
return cost
开发者ID:Kerkil,项目名称:ift6266kaggle,代码行数:33,代码来源:PositionalConv.py
示例17: get_regs
def get_regs(self, states_0_, states, M):
"""
Additional regularization terms.
"""
regs = 0
if self.L1_Wrec > 0:
W = self.params['Wrec']
regs += self.L1_Wrec * tensor.mean(abs(W))
if self.L2_Wrec > 0:
W = self.params['Wrec']
regs += self.L2_Wrec * tensor.mean(tensor.sqr(W))
#---------------------------------------------------------------------------------
# Firing rates
#---------------------------------------------------------------------------------
if self.L2_r > 0:
baseline = 0.
M_ = (tensor.tile(M.T, (states.shape[-1], 1, 1))).T
states_all = tensor.concatenate(
[states_0_.reshape((1, states_0_.shape[0], states_0_.shape[1])), states],
axis=0
)
r = self.f_hidden(states_all)
regs += self.L2_r * tensor.sum(tensor.sqr(r - baseline)*M_)/tensor.sum(M_)
#---------------------------------------------------------------------------------
return regs
开发者ID:frsong,项目名称:pyrl,代码行数:33,代码来源:simple.py
示例18: initialise
def initialise(self):
if self.X.ndim == 4:
if self.u is not None and self.s is not None:
b_u = self.u.dimshuffle('x',0,'x','x')
b_s = self.s.dimshuffle('x',0,'x','x')
else:
b_u = T.mean(self.X, axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
b_s = T.mean(T.sqr(self.X - b_u), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x')
if self.a != 1:
b_u = (1. - self.a)*0. + self.a*b_u
b_s = (1. - self.a)*1. + self.a*b_s
output = (self.X - b_u) / T.sqrt(b_s + self.e)
if self.g is not None and self.b is not None:
self.X = self.X*self.g.dimshuffle('x', 0, 'x', 'x') + self.b.dimshuffle('x', 0, 'x', 'x')
self.params.append(g);self.params.append(b)
elif self.X.ndim == 2:
if self.u is None and self.s is None:
self.u = T.mean(self.X, axis=0)
self.s = T.mean(T.sqr(self.X - self.u), axis=0)
if self.a != 1:
self.u = (1. - self.a)*0. + self.a*self.u
self.s = (1. - self.a)*1. + self.a*self.s
self.X = (self.X - self.u) / T.sqrt(self.s + self.e)
if self.g is not None and self.b is not None:
self.X = self.X*self.g + self.b
self.params.append(g);self.params.append(b)
else:
raise NotImplementedError
开发者ID:KyriacosShiarli,项目名称:SingNet,代码行数:28,代码来源:layers.py
示例19: get_updates_adadelta
def get_updates_adadelta(grads,params,decay=0.95):
decay = constantX(decay)
print 'build updates with adadelta'
for param, grad in zip(params, grads):
# mean_squared_grad := E[g^2]_{t-1}
mean_square_grad = sharedX(numpy.zeros(param.get_value().shape, dtype=floatX))
# mean_square_dx := E[(\Delta x)^2]_{t-1}
mean_square_dx = sharedX(numpy.zeros(param.get_value().shape, dtype=floatX))
if param.name is not None:
mean_square_grad.name = 'mean_square_grad_' + param.name
mean_square_dx.name = 'mean_square_dx_' + param.name
# Accumulate gradient
new_mean_squared_grad = \
decay * mean_square_grad +\
(1. - decay) * T.sqr(grad)
# Compute update
epsilon = constantX(1e-7)
rms_dx_tm1 = T.sqrt(mean_square_dx + epsilon)
rms_grad_t = T.sqrt(new_mean_squared_grad + epsilon)
delta_x_t = - rms_dx_tm1 / rms_grad_t * grad
# Accumulate updates
new_mean_square_dx = \
decay * mean_square_dx + \
(1. - decay) * T.sqr(delta_x_t)
# Apply update
updates[mean_square_grad] = new_mean_squared_grad
updates[mean_square_dx] = new_mean_square_dx
updates[param] = param + delta_x_t
开发者ID:nehz,项目名称:NeuralNet,代码行数:31,代码来源:rnn-draw.py
示例20: learning_updates
def learning_updates(self):
# This code computes updates only for given R, so it drops last dimension. Plus soe theano magic to circumvent its graph comp.
grads = self.grads
for i, param in enumerate(self.params):
mean_square_grad = theano.shared(
np.zeros_like(param.get_value(), dtype=theano.config.floatX), name=param.name + str(self.network.R)+'_msg')
mean_square_dx = theano.shared(
np.zeros_like(param.get_value(), dtype=theano.config.floatX), name=param.name + str(self.network.R)+'_dx')
# Accumulate gradient
new_mean_squared_grad = (
self.decay * mean_square_grad +
(1 - self.decay) * T.sqr(grads[i])
)
# Compute update
epsilon = self.lr
rms_dx_tm1 = T.sqrt(mean_square_dx + epsilon)
rms_grad_t = T.sqrt(new_mean_squared_grad + epsilon)
delta_x_t = - (rms_dx_tm1 / rms_grad_t) * grads[i]
# Accumulate updates
new_mean_square_dx = (
self.decay * mean_square_dx +
(1 - self.decay) * T.sqr(delta_x_t)
)
# Apply update
yield mean_square_grad, T.cast(new_mean_squared_grad, dtype=theano.config.floatX)
yield mean_square_dx, T.cast(new_mean_square_dx, dtype=theano.config.floatX)
yield param, param + 2*T.cast(delta_x_t, dtype=theano.config.floatX)
开发者ID:mapleyustat,项目名称:learning-wordnet,代码行数:34,代码来源:model.py
注:本文中的theano.tensor.sqr函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论