本文整理汇总了Python中theano.tensor.grad函数的典型用法代码示例。如果您正苦于以下问题:Python grad函数的具体用法?Python grad怎么用?Python grad使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了grad函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: fit
def fit(self, X, learning_rate=0.5, mu=0.99, epochs=1, batch_sz=100, show_fig=False):
N, D = X.shape
n_batches = N / batch_sz
W0 = init_weights((D, self.M))
self.W = theano.shared(W0, 'W_%s' % self.id)
self.bh = theano.shared(np.zeros(self.M), 'bh_%s' % self.id)
self.bo = theano.shared(np.zeros(D), 'bo_%s' % self.id)
self.params = [self.W, self.bh, self.bo]
self.forward_params = [self.W, self.bh]
# TODO: technically these should be reset before doing backprop
self.dW = theano.shared(np.zeros(W0.shape), 'dW_%s' % self.id)
self.dbh = theano.shared(np.zeros(self.M), 'dbh_%s' % self.id)
self.dbo = theano.shared(np.zeros(D), 'dbo_%s' % self.id)
self.dparams = [self.dW, self.dbh, self.dbo]
self.forward_dparams = [self.dW, self.dbh]
X_in = T.matrix('X_%s' % self.id)
X_hat = self.forward_output(X_in)
# attach it to the object so it can be used later
# must be sigmoidal because the output is also a sigmoid
H = T.nnet.sigmoid(X_in.dot(self.W) + self.bh)
self.hidden_op = theano.function(
inputs=[X_in],
outputs=H,
)
# cost = ((X_in - X_hat) * (X_in - X_hat)).sum() / N
cost = -(X_in * T.log(X_hat) + (1 - X_in) * T.log(1 - X_hat)).sum() / (batch_sz * D)
cost_op = theano.function(
inputs=[X_in],
outputs=cost,
)
updates = [
(p, p + mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
] + [
(dp, mu*dp - learning_rate*T.grad(cost, p)) for p, dp in zip(self.params, self.dparams)
]
train_op = theano.function(
inputs=[X_in],
updates=updates,
)
costs = []
print "training autoencoder: %s" % self.id
for i in xrange(epochs):
print "epoch:", i
X = shuffle(X)
for j in xrange(n_batches):
batch = X[j*batch_sz:(j*batch_sz + batch_sz)]
train_op(batch)
the_cost = cost_op(X) # technically we could also get the cost for Xtest here
print "j / n_batches:", j, "/", n_batches, "cost:", the_cost
costs.append(the_cost)
if show_fig:
plt.plot(costs)
plt.show()
开发者ID:renjinghai,项目名称:machine_learning_examples,代码行数:60,代码来源:autoencoder.py
示例2: sgd_optimization
def sgd_optimization(learning_rate=0.13, n_epochs=1000, batch_size=100):
dataset = generate_data()
train_x, train_y = dataset[0]
print train_x.type, train_y.type
validate_x, validate_y = dataset[1]
test_x, test_y = dataset[2]
print 'train set size %d' %(train_x.get_value().shape[0])
print 'validate set size %d' %(validate_x.get_value().shape[0])
print 'test set size %d' %(test_x.get_value().shape[0])
n_batches = train_x.get_value(borrow=True).shape[0] / batch_size
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
lr = LogisticRegression(x, train_x.get_value().shape[1])
cost = lr.negative_log_likelihood(y)
print 'compile function test_model...'
test_model = theano.function(inputs=[index],
outputs=lr.errors(y),
givens={
x : train_x[index*batch_size : (index+1)*batch_size],
y : train_y[index*batch_size : (index+1)*batch_size]
})
g_w = T.grad(cost=cost, wrt=lr.w)
g_b = T.grad(cost=cost, wrt=lr.b)
updates = [(lr.w, lr.w-learning_rate*g_w),
(lr.b, lr.b-learning_rate*g_b)]
print 'complie function train_model...'
train_model = theano.function(inputs=[index],
outputs=cost,
updates=updates,
givens={
x : train_x[index*batch_size : (index+1)*batch_size],
y : train_y[index*batch_size : (index+1)*batch_size]
})
best_train_error = numpy.Inf
start_time = time.clock()
for epoch in xrange(n_epochs):
for minibatch_index in xrange(n_batches):
batch_cost = train_model(minibatch_index)
train_errors = [test_model(i) for i in xrange(n_batches)]
train_error = numpy.mean(train_errors)
if best_train_error > train_error:
best_train_error = train_error
print 'epoch %d, best_train_error %lf, train_error %lf' \
%(epoch, best_train_error, train_error)
#print 'iterator %d %lf' %(epoch*n_batches + minibatch_index+1, batch_cost)
end_time = time.clock()
print 'cost %d' %(end_time-start_time)
开发者ID:brighthush,项目名称:Notes,代码行数:60,代码来源:LogisticRegression.py
示例3: __init__
def __init__(self,
input=tensor.dvector('input'),
target=tensor.dvector('target'),
n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw):
super(NNet, self).__init__(**kw)
self.input = input
self.target = target
self.lr = shared(lr, 'learning_rate')
self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
# print self.lr.type
self.hidden = sigmoid(tensor.dot(self.w1, self.input))
self.output = tensor.dot(self.w2, self.hidden)
self.cost = tensor.sum((self.output - self.target)**2)
self.sgd_updates = {
self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)}
self.sgd_step = pfunc(
params=[self.input, self.target],
outputs=[self.output, self.cost],
updates=self.sgd_updates)
self.compute_output = pfunc([self.input], self.output)
self.output_from_hidden = pfunc([self.hidden], self.output)
开发者ID:12190143,项目名称:Theano,代码行数:29,代码来源:test_misc.py
示例4: get_gradients
def get_gradients(self, X, Y, weights=1.0):
W_mean, W_ls, b_mean, b_ls = self.parameters
mean, log_sigma = self.sample_expected(Y)
sigma = tensor.exp(log_sigma)
cost = -log_sigma - 0.5 * (X - mean) ** 2 / tensor.exp(2 * log_sigma)
if weights != 1.0:
cost = -weights.dimshuffle(0, "x") * cost
cost_scaled = sigma ** 2 * cost
cost_gscale = (sigma ** 2).sum(axis=1).dimshuffle([0, "x"])
cost_gscale = cost_gscale * cost
gradients = OrderedDict()
params = Selector(self.mlp).get_parameters()
for pname, param in params.iteritems():
gradients[param] = tensor.grad(cost_gscale.sum(), param, consider_constant=[X, Y])
gradients[W_mean] = tensor.grad(cost_scaled.sum(), W_mean, consider_constant=[X, Y])
gradients[b_mean] = tensor.grad(cost_scaled.sum(), b_mean, consider_constant=[X, Y])
gradients[W_ls] = tensor.grad(cost_scaled.sum(), W_ls, consider_constant=[X, Y])
gradients[b_ls] = tensor.grad(cost_scaled.sum(), b_ls, consider_constant=[X, Y])
return gradients
开发者ID:jbornschein,项目名称:bihm,代码行数:27,代码来源:prob_layers.py
示例5: build
def build(self):
self.debug = []
lM = []
lpullerror = []
lpusherror = []
lupdate = []
for i in xrange(self.M):
if not self.localM:
lM.append(theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True))
lpullerror.append(0.0)
lpusherror.append(0.0)
continue
M = theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True)
pullerror, pusherror = self._local_error(M, i)
pullerror *= (1-self.mu)
pusherror *= self.mu
error = pullerror + pusherror
update = (M, M - self._lr[i] * T.grad(error, M))
lM.append(M)
lpullerror.append((1-self.mu)*pullerror)
lpusherror.append(self.mu*pusherror)
lupdate.append(update)
self.lM = lM
self.lpusherror = lpusherror
self.lpullerror = lpullerror
self.lupdate = lupdate
#gError = 0.0
gM = []
gpullerror = []
gpusherror = []
gupdate = []
for i in xrange(self.M):
if not self.globalM:
gM.append(theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True))
gpullerror.append(0.0)
gpusherror.append(0.0)
continue
M = theano.shared(value=np.eye(self.dim, dtype='float32'), name='M', borrow=True)
if i == 0:
pullerror, pusherror = self._global_error(M, i, None)
else:
pullerror, pusherror = self._global_error(M, i, gM[-1])
error = (1-self.mu) * pullerror + self.mu * pusherror
# gError += error#*(float(i+1)/self.M)
update = (M, M - self._lr[i+self.M] * T.grad(error, M))
gM.append(M)
gpullerror.append((1-self.mu)*pullerror)
gpusherror.append(self.mu*pusherror)
gupdate.append(update)
# if self.globalM:
# gupdate = [(gM[i], gM[i] - self._lr[i+self.M]*T.grad(gError, M)) for i in xrange(self.M)]
self.gM = gM
self.gpusherror = gpusherror
self.gpullerror = gpullerror
self.gupdate = gupdate
开发者ID:PiscesDream,项目名称:Lab_MMAPM,代码行数:60,代码来源:MLMNN.1.64.py
示例6: train
def train(self, epochs = 1000, learning_rate = 0.1):
regression = self.regression
X = self.X
Y = self.Y
x = T.matrix('x') # data, presented as rasterized images
y = T.vector('y') # labels, presented as 1D vector of [int] labels
error = regression.error(x, y)
g_W = T.grad(cost=error, wrt=regression.W)
g_b = T.grad(cost=error, wrt=regression.b)
# start-snippet-3
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs.
updates = [(regression.W, regression.W - learning_rate * g_W),
(regression.b, regression.b - learning_rate * g_b)]
# compiling a Theano function `train_model` that returns the cost, but in
# the same time updates the parameter of the model based on the rules
# defined in `updates`
train_model = tn.function(
inputs=[],
outputs=error,
updates=updates,
givens={
x: X,
y: Y
}
)
print('training start:')
start_time = timeit.default_timer()
epoch = 0
while(epoch < epochs):
avg_error = train_model()
print('epoch {0}, error {1}'.format(epoch, avg_error), end='\r')
epoch += 1
print('training finish (start: {0}) took {1} seconds.'.format(regression.error(X, Y).eval(), timeit.default_timer() - start_time))
# z = regression.compute(data_x).ravel()
# e = regression.error(data_y, z)
# l = regression.loss(e)
# epoch = 0
# while(epoch < epochs):
# g = regression.grad(data_y, z)
# d = regression.delta(g, data_x)
# regression.W -= learning_rate * d[0]
# regression.b -= learning_rate * d[1]
#
# z = regression.compute(data_x).ravel()
# e = regression.error(data_y, z)
# l = regression.loss(e)
# # print(l.eval())
#
# epoch += 1
# print('epoch:', epoch, end='\r')
pass
开发者ID:KeViNOne,项目名称:MLNotebook,代码行数:60,代码来源:linear_regression_theano.py
示例7: test_gradient_batch_normalization_op
def test_gradient_batch_normalization_op():
epsilon = 1e-8
op = gn.GradientBatchNormalizationOp(subtract_mean=True,
keep_mean=False,
epsilon=epsilon)
X = np.random.randn(3, 4).astype(fX)
W = np.random.randn(2, 3).astype(fX)
x = T.matrix("x")
w = T.matrix("w")
orig_grad = T.grad(w.dot(x).sum(), x).eval({x: X, w: W})
new_grad = T.grad(w.dot(op(x)).sum(), x).eval({x: X, w: W})
mu = orig_grad.mean(axis=0, keepdims=True)
sigma = orig_grad.std(axis=0, keepdims=True) + epsilon
ans = (orig_grad - mu) / sigma
np.testing.assert_allclose(ans,
new_grad,
rtol=1e-5)
np.testing.assert_allclose(np.zeros(4),
new_grad.mean(axis=0),
atol=1e-5)
np.testing.assert_allclose(np.ones(4),
new_grad.std(axis=0),
rtol=1e-5)
开发者ID:diogo149,项目名称:treeano,代码行数:26,代码来源:gradient_normalization_test.py
示例8: __build_theano__
def __build_theano__(self):
x = ivector(name="x")
y = ivector(name="y")
U, V, W = self.U, self.V, self.W
def forword_prop_step(x_t, s_t_prev, U, V, W):
s_t = T.tanh(U[:,x_t] + V.dot(s_t_prev))
o_t = T.nnet.softmax(W.dot(s_t))
return [o_t[0], s_t]
[o,s], updates = theano.scan(forword_prop_step, sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
non_sequences=[U,V,W], truncate_gradient=4, strict=True)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW = T.grad(o_error, W)
self.forward = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.c_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW])
learning_rate = scalar(name="learning_rate")
self.sgd_step = theano.function([x, y, learning_rate], [],
updates=[(self.U, self.U-learning_rate*dU),
(self.V, self.V-learning_rate*dV),
(self.W, self.W-learning_rate*dW)])
开发者ID:zerkh,项目名称:theano-fun,代码行数:31,代码来源:rnn.py
示例9: calculate_Rl
def calculate_Rl(v_input):
# Sample a h_sample according to one v_input
_, hl_mean, hl_sample = self.sample_h_given_v(v_input)
# Calculate the probability of visible output according to h_sample
_, vn_mean = self.propdown(hl_sample)
# - Part1.
# Desc: Multiply each element in grad with T.log(vn_mean).sum()
# Hint: [array(...), array(...), array(...)] = T.grad(..., self.params)
# The number of elements in gradient is the number of params which are partial derivation.
# part1 = map(lambda x: x * T.log(vn_mean).sum(),
# T.grad(T.log(hl_mean).sum(),
# self.params,
# disconnected_inputs='warn'))
part1 = [x * T.log(vn_mean).sum() for x in T.grad(
T.log(hl_mean).sum(),
self.params,
disconnected_inputs='warn')]
# - Part2.
part2 = T.grad((T.log(self.propdown(hl_sample)[1]).sum()),
self.params,
consider_constant=[hl_sample],
disconnected_inputs='warn')
# Rl is the result that add corresponding elements in two gradient.
# Rl = log(p(v^n|h^l;\theta)) * grad(log(p(h^l|v^n;\theta))) + grad(log(p(v^n|h^l;\theta)))
# Rl = map(lambda p1, p2: p1 + p2, part1, part2)
Rl = [x + y for x, y in zip(part1, part2)]
mi_cost_xi = T.log(vn_mean).sum()
Rl.append(mi_cost_xi)
return Rl
开发者ID:meowoodie,项目名称:Mutual-Information-for-RBM,代码行数:33,代码来源:mi-rbm.py
示例10: test_downsample
def test_downsample():
shps = [
(1, 1, 1, 12),
(1, 1, 2, 2),
(1, 1, 1, 1),
(1, 1, 4, 4),
(1, 1, 10, 11),
(1, 2, 2, 2),
(3, 5, 4, 4),
(25, 1, 7, 7),
(1, 1, 12, 12),
(1, 1, 2, 14),
(1, 1, 12, 14),
(1, 1, 14, 14),
(1, 1, 16, 16),
(1, 1, 18, 18),
(1, 1, 24, 24),
(1, 6, 24, 24),
(10, 1, 24, 24),
(10, 6, 24, 24),
(30, 6, 12, 12),
(30, 2, 24, 24),
(30, 6, 24, 24),
(10, 10, 10, 11),
(1, 1, 10, 1025),
(1, 1, 10, 1023),
(1, 1, 1025, 10),
(1, 1, 1023, 10),
]
numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)
for shp in shps:
for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]:
continue
if ds[1] > shp[3]:
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor.
if float(shp[3]) / ds[1] > 512:
continue
for ignore_border in (True, False):
print "test_downsample", shp, ds, ignore_border
ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), "a")
f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu)
assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()])
assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()])
assert numpy.allclose(f(), f2())
g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_with_gpu)
g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_without_gpu)
assert any(
[isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()]
)
assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()])
assert numpy.allclose(g(), g2())
开发者ID:pascanur,项目名称:Theano,代码行数:60,代码来源:test_blas.py
示例11: fit
def fit(self,data_x,data_y):
print "Training"
start = time.clock()
n_batches = data_x.get_value(borrow=True).shape[0]/self.batch_size
tensor_x = T.matrix('x')
tensor_y = T.ivector('y')
index = T.lscalar('index')
self.single_layer = Layer(self.n_in,self.n_out,T.nnet.softmax)
cost = self.single_layer.negative_log_likelihood(tensor_x, tensor_y)
g_W = T.grad(cost,self.single_layer.W)
g_b = T.grad(cost,self.single_layer.b)
updates = [(self.single_layer.W,self.single_layer.W - g_W*self.learning_rate),
(self.single_layer.b,self.single_layer.b - g_b*self.learning_rate)]
train_batch = theano.function([index],[cost],
updates=updates,
givens={tensor_x : data_x[index*self.batch_size : (index + 1)*self.batch_size],
tensor_y : data_y[index*self.batch_size : (index + 1)*self.batch_size]})
train_batch_costs = [0 for i in xrange(n_batches)]
for iter in xrange(self.iters):
for minibatch_index in xrange(n_batches):
train_batch_costs[minibatch_index] = train_batch(minibatch_index)
if self.verbose==1: print "Iter %d --> %f" % (iter,np.mean(train_batch_costs))
end = time.clock()
print "Finished Training Logistic Regression Model\n" \
"Iterations %d\n" \
"Time Taken : %d secs" % (self.iters,end - start)
开发者ID:saatvikshah1994,项目名称:kaggle_complete,代码行数:26,代码来源:classifier.py
示例12: get_params_and_grads
def get_params_and_grads(graph, cost, verbose=False):
params = []
for k, p in graph.items():
if k == DATASETS_ID:
# skip datasets
continue
if k == RANDOM_ID:
# skip random
continue
params.append(p)
if verbose:
grads = []
for k, p in graph.items():
if k == DATASETS_ID:
# skip datasets
continue
if k == RANDOM_ID:
# skip random
continue
print("Computing grad w.r.t %s" % k)
grad = tensor.grad(cost, p)
grads.append(grad)
else:
grads = tensor.grad(cost, params)
return params, grads
开发者ID:samim23,项目名称:dagbldr,代码行数:26,代码来源:utils.py
示例13: get_mean_square_norm_gradients_variance_method_00
def get_mean_square_norm_gradients_variance_method_00(D_by_layer, cost, accum = 0):
# This returns a theano variable that will be of shape (minibatch_size, ).
# It will contain, for each training example, the associated mean of the
# variance wrt the gradient of that minibatch.
for (layer_name, D) in D_by_layer.items():
input = D['input']
input_square_norms = tensor.sqr(D['input']).sum(axis=1)
backprop_output = tensor.grad(cost, D['output'])
# I don't think that theano recomputes this.
# It should be just redundant nodes in the computational graph
# that end up being computed only once anyways.
grad_weight = tensor.grad(cost, D['weight'])
grad_bias = tensor.grad(cost, D['bias'])
backprop_output_square_norms = tensor.sqr(backprop_output).sum(axis=1)
if D.has_key('weight'):
A = input_square_norms * backprop_output_square_norms
C = tensor.sqr(grad_weight).sum() # all the terms get this "middle" expression added to them
B = (backprop_output.dot(grad_weight.T) * input).sum(axis=1)
accum += (A - 2*B + C)
if D.has_key('bias'):
# this last `sum` could be a component-wise `max` if we wanted
# to carry the maximum of the variances instead of the sum of squares
accum = accum + tensor.sqr(backprop_output - grad_bias.reshape((1,-1))).sum(axis=1)
return accum
开发者ID:chinnadhurai,项目名称:ImportanceSamplingSGD,代码行数:32,代码来源:verifying_grad_square_norm_formula.py
示例14: test_reduce_custom_dtype
def test_reduce_custom_dtype(self):
"""
Test the ability to provide your own output dtype for a reduce.
"""
# We try multiple axis combinations even though axis should not matter.
idx = 0
for method in self.methods:
for input_dtype in self.dtypes:
x = tensor.matrix(dtype=input_dtype)
for output_dtype in self.dtypes:
# If the output is a complex, the gradient of the reduce will
# cast the complex to the input dtype. We can't call the normal
# cast on a complex to a not complex as this is ambiguous.
if (not input_dtype.startswith('complex') and
output_dtype.startswith('complex')):
continue
axis = self.axes[idx % len(self.axes)]
var = getattr(x, method)(dtype=output_dtype, axis=axis)
assert var.dtype == output_dtype
f = theano.function([x], var, mode=self.mode)
topo = f.maker.fgraph.toposort()
assert [n for n in topo if isinstance(n.op, self.op)], (topo,
dtype)
data = numpy.random.rand(3, 4) * 10
data = data.astype(input_dtype)
f(data)
if "complex" in input_dtype:
continue
# Check that we can take the gradient
tensor.grad(var.sum(), x,
disconnected_inputs='ignore')
idx += 1
开发者ID:AI-Cdrone,项目名称:Theano,代码行数:34,代码来源:test_elemwise.py
示例15: check_mat_rop_lop
def check_mat_rop_lop(self, y, out_shape):
vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
yv = tensor.Rop(y, self.mx, self.mv)
rop_f = function([self.mx, self.mv], yv)
sy, _ = theano.scan( lambda i,y,x,v: (tensor.grad(y[i],x)*v).sum(),
sequences = tensor.arange(y.shape[0]),
non_sequences = [y,self.mx,self.mv])
scan_f = function([self.mx,self.mv], sy)
v1 = rop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('ROP mismatch: %s %s' % (v1, v2))
self.check_nondiff_rop( theano.clone(y,
replace={self.mx:break_op(self.mx)}))
vv = numpy.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
yv = tensor.Lop(y, self.mx, self.v)
lop_f = function([self.mx, self.v], yv)
sy = tensor.grad((self.v*y).sum(), self.mx)
scan_f = function([self.mx, self.v], sy)
v1 = lop_f(vx,vv)
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
开发者ID:onze,项目名称:Theano,代码行数:30,代码来源:test_rop.py
示例16: __init__
def __init__(self, sizes, input_dim, output_dim):
self.layers = len(sizes) + 1
in_dim = [input_dim] + sizes
out_dim = sizes + [output_dim]
x = T.dvector('x')
y = T.dvector('y')
self.hyp_params = []
for i, (r,c) in enumerate(zip(in_dim,out_dim)):
if i == 0:
obj = HiddenLayer(x, r, c)
else:
obj = HiddenLayer(obj.output,r,c)
self.hyp_params.append(obj.params)
yhat = obj.output
prediction = T.argmax(yhat)
self.predict = theano.function([x],[yhat])
o_error = T.sum(T.sqr(yhat - y))
# o_error = T.sum(T.nnet.categorical_crossentropy(yhat, y))
updates = []
learning_rate = T.scalar('learning_rate')
for param in self.hyp_params:
updates.append((param['W'], param['W'] - learning_rate * T.grad(o_error,param['W'])))
updates.append((param['b'], param['b'] - learning_rate * T.grad(o_error,param['b'])))
self.train_step = theano.function([x,y,learning_rate],[o_error],
updates = updates)
开发者ID:ranarag,项目名称:theano_works,代码行数:31,代码来源:mlp_theano.py
示例17: __theano_build__
def __theano_build__(self):
U, V, W = self.U, self.V, self.W
x = T.ivector('x')
y = T.ivector('y')
def forward_prop_step(x_t, s_t_prev, U, V, W):
s_t = T.tanh(U[:,x_t] + W.dot(s_t_prev))
o_t = T.nnet.softmax(V.dot(s_t))
return [o_t[0], s_t]
[o,s], updates = theano.scan(
forward_prop_step,
sequences=x,
outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
non_sequences=[U, V, W],
truncate_gradient=self.bptt_truncate,
strict=True)
prediction = T.argmax(o, axis=1)
o_error = T.sum(T.nnet.categorical_crossentropy(o, y))
# Gradients
dU = T.grad(o_error, U)
dV = T.grad(o_error, V)
dW = T.grad(o_error, W)
# Assign functions
self.forward_propagation = theano.function([x], o)
self.predict = theano.function([x], prediction)
self.ce_error = theano.function([x, y], o_error)
self.bptt = theano.function([x, y], [dU, dV, dW])
# SGD
learning_rate = T.scalar('learning_rate')
self.sgd_step = theano.function([x,y,learning_rate], [],
updates=[(self.U, self.U - learning_rate * dU),
(self.V, self.V - learning_rate * dV),
(self.W, self.W - learning_rate * dW)])
开发者ID:binxuankong,项目名称:poetry-generator,代码行数:32,代码来源:RNNTheano.py
示例18: theano_setup
def theano_setup(self):
# The matrices Wb and Wc were originally tied.
# Because of that, I decided to keep Wb and Wc with
# the same shape (instead of being transposed) to
# avoid disturbing the code as much as possible.
Wb = T.dmatrix('Wb')
Wc = T.dmatrix('Wc')
b = T.dvector('b')
c = T.dvector('c')
s = T.dscalar('s')
x = T.dmatrix('x')
h_act = T.dot(x, Wc) + c
if self.act_func[0] == 'tanh':
h = T.tanh(h_act)
elif self.act_func[0] == 'sigmoid':
h = T.nnet.sigmoid(h_act)
elif self.act_func[0] == 'id':
# bad idae
h = h_act
else:
raise("Invalid act_func[0]")
r_act = T.dot(h, Wb.T) + b
if self.act_func[1] == 'tanh':
r = s * T.tanh(r_act)
elif self.act_func[1] == 'sigmoid':
r = s * T.nnet.sigmoid(r_act)
elif self.act_func[1] == 'id':
r = s * r_act
else:
raise("Invalid act_func[1]")
# Another variable to be able to call a function
# with a noisy x and compare it to a reference x.
y = T.dmatrix('y')
loss = ((r - y)**2)
sum_loss = T.sum(loss)
# theano_encode_decode : vectorial function in argument X.
# theano_loss : vectorial function in argument X.
# theano_gradients : returns triplet of gradients, each of
# which involves the all data X summed
# so it's not a "vectorial" function.
self.theano_encode_decode = function([Wb,Wc,b,c,s,x], r)
self.theano_loss = function([Wb,Wc,b,c,s,x,y], loss)
self.theano_gradients = function([Wb,Wc,b,c,s,x,y],
[T.grad(sum_loss, Wb), T.grad(sum_loss, Wc),
T.grad(sum_loss, b), T.grad(sum_loss, c),
T.grad(sum_loss, s)])
# other useful theano functions for the experiments that involve
# adding noise to the hidden states
self.theano_encode = function([Wc,c,x], h)
self.theano_decode = function([Wb,b,s,h], r)
开发者ID:gyom,项目名称:denoising_autoencoder,代码行数:60,代码来源:dae_untied_weights.py
示例19: mcmc
def mcmc(ll, *frvs):
full_observations = dict(observations)
full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)]))
loglik = -full_log_likelihood(full_observations)
proposals = free_RVs_prop
H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik
# -- this should be an inner loop
g = []
g.append(tensor.grad(loglik, frvs))
proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)]
rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)]
full_observations = dict(observations)
full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)]))
new_loglik = -full_log_likelihood(full_observations)
gnew = []
gnew.append(tensor.grad(new_loglik, rvsp))
proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)]
# --
Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik
dH = Hnew - H
accept = tensor.or_(dH < 0., U < tensor.exp(-dH))
return [tensor.switch(accept, -new_loglik, ll)] + \
[tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \
{}, theano.scan_module.until(accept)
开发者ID:helson73,项目名称:MonteTheano,代码行数:34,代码来源:sample.py
示例20: create_TrainFunc_tranPES
def create_TrainFunc_tranPES(simfn, embeddings, marge=0.5, alpha=1., beta=1.):
# parse the embedding data
embedding = embeddings[0] # D x N matrix
lembedding = embeddings[1]
# declare the symbolic variables for training triples
hp = S.csr_matrix('head positive') # N x batchsize matrix
rp = S.csr_matrix('relation')
tp = S.csr_matrix('tail positive')
hn = S.csr_matrix('head negative')
tn = S.csr_matrix('tail negative')
lemb = T.scalar('embedding learning rate')
lremb = T.scalar('relation learning rate')
subtensorE = T.ivector('batch entities set')
subtensorR = T.ivector('batch link set')
# Generate the training positive and negative triples
hpmat = S.dot(embedding.E, hp).T # batchsize x D dense matrix
rpmat = S.dot(lembedding.E, rp).T
tpmat = S.dot(embedding.E, tp).T
hnmat = S.dot(embedding.E, hn).T
tnmat = S.dot(embedding.E, tn).T
# calculate the score
pos = tranPES3(simfn, T.concatenate([hpmat, tpmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tpmat)
negh = tranPES3(simfn, T.concatenate([hnmat, tpmat], axis=1).reshape((hnmat.shape[0], 2, hnmat.shape[1])).dimshuffle(0, 2, 1), hnmat, rpmat, tpmat)
negt = tranPES3(simfn, T.concatenate([hpmat, tnmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tnmat)
costh, outh = margeCost(pos, negh, marge)
costt, outt = margeCost(pos, negt, marge)
embreg = regEmb(embedding, subtensorE, alpha)
lembreg = regLink(lembedding, subtensorR, beta)
cost = costh + costt + embreg[0] + lembreg
out = T.concatenate([outh, outt])
outc = embreg[1]
# list of inputs to the function
list_in = [lemb, lremb, hp, rp, tp, hn, tn, subtensorE, subtensorR]
# updating the embeddings using gradient descend
emb_grad = T.grad(cost, embedding.E)
New_embedding = embedding.E - lemb*emb_grad
remb_grad = T.grad(cost, lembedding.E)
New_rembedding = lembedding.E - lremb * remb_grad
updates = OrderedDict({embedding.E: New_embedding, lembedding.E: New_rembedding})
return theano.function(list_in, [cost, T.mean(out), T.mean(outc), embreg[0], lembreg],
updates=updates, on_unused_input='ignore')
开发者ID:while519,项目名称:tranpes,代码行数:60,代码来源:model.py
注:本文中的theano.tensor.grad函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论