本文整理汇总了Python中theano_toolkit.parameters.Parameters类的典型用法代码示例。如果您正苦于以下问题:Python Parameters类的具体用法?Python Parameters怎么用?Python Parameters使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Parameters类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: create_model
def create_model(ids,vocab2id,size):
word_vector_size = size
hidden_state_size = size
P = Parameters()
P.V = create_vocab_vectors(P,vocab2id,word_vector_size)
P.W_predict = np.zeros(P.V.get_value().shape).T
P.b_predict = np.zeros((P.V.get_value().shape[0],))
X = P.V[ids]
step = build_lstm_step(P,word_vector_size,hidden_state_size)
[states,_],_ = theano.scan(
step,
sequences = [X],
outputs_info = [P.init_h,P.init_c]
)
scores = T.dot(states,P.W_predict) + P.b_predict
scores = T.nnet.softmax(scores)
log_likelihood, cross_ent = word_cost(scores[:-1],ids[1:])
cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() )
obv_cost = cross_ent
return scores, cost, obv_cost, P
开发者ID:andersonhaynes,项目名称:theano-nlp-1,代码行数:25,代码来源:lstm_lang_model.py
示例2: make_train
def make_train(input_size,output_size,mem_size,mem_width,hidden_sizes=[100]):
P = Parameters()
ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes)
predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl)
input_seq = T.matrix('input_sequence')
output_seq = T.matrix('output_sequence')
seqs = predict(input_seq)
output_seq_pred = seqs[-1]
cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1)
params = P.values()
l2 = T.sum(0)
for p in params:
l2 = l2 + (p ** 2).sum()
cost = T.sum(cross_entropy) + 1e-4*l2
grads = [ T.clip(g,-10,10) for g in T.grad(cost,wrt=params) ]
train = theano.function(
inputs=[input_seq,output_seq],
outputs=cost,
# updates=updates.adadelta(params,grads)
updates = updates.rmsprop(params,grads,learning_rate = 1e-5)
)
return P,train
开发者ID:chanhou,项目名称:neural-turing-machines,代码行数:25,代码来源:train_copy.py
示例3: make_train
def make_train(input_size,output_size,mem_size,mem_width,hidden_size=100):
P = Parameters()
# Build controller. ctrl is a network that takes an external and read input
# and returns the output of the network and its hidden layer
ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_size)
# Build model that predicts output sequence given input sequence
predict = model.build(P,mem_size,mem_width,hidden_size,ctrl)
input_seq = T.matrix('input_sequence')
output_seq = T.matrix('output_sequence')
[M,weights,output_seq_pred] = predict(input_seq)
# Setup for adadelta updates
cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1)
params = P.values()
l2 = T.sum(0)
for p in params:
l2 = l2 + (p ** 2).sum()
cost = T.sum(cross_entropy) + 1e-3*l2
# clip gradients
grads = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]
train = theano.function(
inputs=[input_seq,output_seq],
outputs=cost,
updates=updates.adadelta(params,grads)
)
return P,train
开发者ID:alee101,项目名称:598c-project,代码行数:31,代码来源:train_copy.py
示例4: make_functions
def make_functions(
input_size, output_size, mem_size, mem_width, hidden_sizes=[100]):
start_time = time.time()
input_seqs = T.btensor3('input_sequences')
output_seqs = T.btensor3('output_sequences')
P = Parameters()
process = model.build(P,
input_size, output_size, mem_size, mem_width, hidden_sizes[0])
outputs = process(T.cast(input_seqs,'float32'))
output_length = (input_seqs.shape[1] - 2) // 2
Y = output_seqs[:,-output_length:,:-2]
Y_hat = T.nnet.sigmoid(outputs[:,-output_length:,:-2])
cross_entropy = T.mean(T.nnet.binary_crossentropy(Y_hat,Y))
bits_loss = cross_entropy * (Y.shape[1] * Y.shape[2]) / T.log(2)
params = P.values()
cost = cross_entropy # + 1e-5 * sum(T.sum(T.sqr(w)) for w in params)
print "Computing gradients",
grads = T.grad(cost, wrt=params)
grads = updates.clip_deltas(grads, np.float32(clip_length))
print "Done. (%0.3f s)"%(time.time() - start_time)
start_time = time.time()
print "Compiling function",
P_learn = Parameters()
update_pairs = updates.rmsprop(
params, grads,
learning_rate=1e-4,
P=P_learn
)
train = theano.function(
inputs=[input_seqs, output_seqs],
outputs=cross_entropy,
updates=update_pairs,
)
test = theano.function(
inputs=[input_seqs, output_seqs],
outputs=bits_loss
)
print "Done. (%0.3f s)"%(time.time() - start_time)
print P.parameter_count()
return P, P_learn, train, test
开发者ID:shawntan,项目名称:neural-turing-machines,代码行数:53,代码来源:train_copy.py
示例5: make_train_functions
def make_train_functions():
P = Parameters()
X = T.bvector('X')
Y = T.ivector('Y')
aux = {}
predict = model.build(
P,
input_size=128,
embedding_size=64,
controller_size=256,
stack_size=256,
output_size=128,
)
output = predict(X,aux=aux)
error = - T.log(output[T.arange(Y.shape[0]),((128+1 + Y)%(128+1))])
error = error[-(Y.shape[0]/2):]
parameters = P.values()
gradients = T.grad(T.sum(error),wrt=parameters)
shapes = [ p.get_value().shape for p in parameters ]
count = theano.shared(np.float32(0))
acc_grads = [
theano.shared(np.zeros(s,dtype=np.float32))
for s in shapes
]
acc_update = [ (a,a+g) for a,g in zip(acc_grads,gradients) ] +\
[ (count,count + np.float32(1)) ]
acc_clear = [ (a,np.float32(0) * a) for a in acc_grads ] +\
[ (count,np.int32(0)) ]
avg_grads = [ (g / count) for g in acc_grads ]
avg_grads = [ clip(g,1) for g in acc_grads ]
acc = theano.function(
inputs=[X,Y],
outputs=T.mean(error),
updates = acc_update,
)
update = theano.function(
inputs=[],
updates=updates.adadelta(parameters,avg_grads,learning_rate=1e-8) + acc_clear
)
test = theano.function(
inputs=[X],
outputs=T.argmax(output,axis=1)[-(X.shape[0]/2):],
)
return acc,update,test
开发者ID:ml-lab,项目名称:neural-transducers,代码行数:50,代码来源:train.py
示例6: build_network
def build_network(input_size,hidden_size,constraint_adj=False):
P = Parameters()
X = T.bmatrix('X')
P.W_input_hidden = U.initial_weights(input_size,hidden_size)
P.b_hidden = U.initial_weights(hidden_size)
P.b_output = U.initial_weights(input_size)
hidden_lin = T.dot(X,P.W_input_hidden)+P.b_hidden
hidden = T.nnet.sigmoid(hidden_lin)
output = T.nnet.softmax(T.dot(hidden,P.W_input_hidden.T) + P.b_output)
parameters = P.values()
cost = build_error(X,output,P)
if constraint_adj:pass
#cost = cost + adjacency_constraint(hidden_lin)
return X,output,cost,P
开发者ID:shawntan,项目名称:viz-speech,代码行数:16,代码来源:order_constraint.py
示例7: make_train
def make_train(input_size,output_size,mem_size,mem_width,hidden_sizes=[100]):
P = Parameters()
ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes)
predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl)
input_seq = T.matrix('input_sequence')
output_seq = T.matrix('output_sequence')
seqs = predict(input_seq)
output_seq_pred = seqs[-1]
cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1)
cost = T.sum(cross_entropy) # + 1e-3 * l2
params = P.values()
grads = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]
response_length = input_seq.shape[0]/2
train = theano.function(
inputs=[input_seq,output_seq],
outputs=T.mean(cross_entropy[-response_length:]),
updates=updates.adadelta(params,grads)
)
return P,train
开发者ID:FrictionlessCoin,项目名称:neural-turing-machines,代码行数:22,代码来源:train_copy.py
示例8: __init__
def __init__(self, hidden_size, input_size, vocab_size, entropy_reg = 0.001, key_entropy_reg = 0.001, stack_size=1, celltype=LSTM):
# core layer in RNN/LSTM
self.model = StackedCells(input_size, celltype=celltype, layers =[hidden_size] * stack_size)
# add an embedding
self.model.layers.insert(0, Embedding(vocab_size, input_size))
# add a classifier:
self.model.layers.append(Layer(hidden_size, vocab_size, activation = softmax))
self.entropy_reg = entropy_reg
self.key_entropy_reg = key_entropy_reg
self.turing_params = Parameters()
#init turing machine model
self.turing_updates , self.turing_predict = turing_model.build(self.turing_params , hidden_size , vocab_size)
self.hidden_size = hidden_size
# inputs are matrices of indices,
# each row is a sentence, each column a timestep
self._stop_word = theano.shared(np.int32(999999999), name="stop word")
self.for_how_long = T.ivector()
self.mask_matrix = T.imatrix()
self.input_mat = T.imatrix()
self.priming_word = T.iscalar()
self.srng = T.shared_randomstreams.RandomStreams(np.random.randint(0, 1024))
# create symbolic variables for prediction:
#change by darong #issue : what is greedy
self.lstm_predictions = self.create_lstm_prediction()
self.final_predictions,self.entropy,self.key_entropy = self.create_final_prediction()
# create symbolic variable for greedy search:
self.greedy_predictions = self.create_lstm_prediction(greedy=True)
# create gradient training functions:
self.create_cost_fun()#create 2 cost func(lstm final)
self.lstm_lr = 0.01
self.turing_lr = 0.01
self.all_lr = 0.01
self.create_training_function()#create 3 functions(lstm turing all)
self.create_predict_function()#create 2 predictions(lstm final)
# create ppl
self.lstm_ppl = self.create_lstm_ppl()
self.final_ppl = self.create_final_ppl()
self.create_ppl_function()
开发者ID:darongliu,项目名称:Lstm_Turing_LM,代码行数:48,代码来源:lm_v4.py
示例9: Parameters
in_gate = T.nnet.sigmoid(in_lin)
forget_gate = T.nnet.sigmoid(forget_lin)
cell_updates = T.tanh(cell_lin)
cell = forget_gate * prev_cell + in_gate * cell_updates
out_lin = x_o + h_o + b_o + T.dot(cell,V_o)
out_gate = T.nnet.sigmoid(out_lin)
hid = out_gate * T.tanh(cell)
return cell,hid
return step
if __name__ == "__main__":
P = Parameters()
X = T.ivector('X')
P.V = np.zeros((8,8),dtype=np.int32)
X_rep = P.V[X]
P.W_output = np.zeros((15,8),dtype=np.int32)
lstm_layer = build(P,
name = "test",
input_size = 8,
hidden_size =15
)
_,hidden = lstm_layer(X_rep)
output = T.nnet.softmax(T.dot(hidden,P.W_output))
delay = 5
label = X[:-delay]
开发者ID:wavelets,项目名称:neural-qa,代码行数:31,代码来源:lstm.py
示例10: Parameters
import theano
import theano.tensor as T
import numpy as np
import vocab
import model
from theano_toolkit.parameters import Parameters
if __name__ == "__main__":
model_file = args.model_file
temp_input = args.temperature
id2char = pickle.load(args.vocab_file)
char2id = vocab.load(args.vocab_file.name)
prime_str = args.prime
P = Parameters()
sampler = model.build_sampler(P,
character_count=len(char2id) + 1,
embedding_size=20,
hidden_size=100
)
P.load(model_file)
temp = T.scalar('temp')
char = T.iscalar('char')
p_cell_1, p_hidden_1, p_cell_2, p_hidden_2 = T.vector("p_cell_1"), T.vector("p_hidden_2"), T.vector("p_cell_2"), T.vector("p_hidden_2")
output, cell_1, hidden_1, cell_2, hidden_2 = sampler(temp, char, p_cell_1, p_hidden_1, p_cell_2, p_hidden_2)
sample = theano.function(
inputs=[temp, char, p_cell_1, p_hidden_1, p_cell_2, p_hidden_2],
outputs=[output, cell_1, hidden_1, cell_2, hidden_2]
)
开发者ID:OlafLee,项目名称:theano-nlp,代码行数:31,代码来源:sample.py
示例11: label_seq
predict = T.nnet.softmax(T.dot(hidden, W_hidden_output) + b_output)
return X, predict
def label_seq(string):
idxs = font.indexify(string)
result = np.ones((len(idxs) * 2 + 1,), dtype=np.int32) * -1
result[np.arange(len(idxs)) * 2 + 1] = idxs
print result
return result
if __name__ == "__main__":
P = Parameters()
X = T.matrix('X')
Y = T.ivector('Y')
X, predict = build_model(P, X, 10, 10, 10)
cost = ctc.cost(predict, Y)
params = P.values()
grad = T.grad(cost, wrt=params)
train = theano.function(
inputs=[X, Y],
outputs=cost,
updates=updates.adadelta(params, grad)
)
for _ in xrange(10):
print train(np.eye(10, dtype=np.float32)[::-1], np.arange(10, dtype=np.int32))
开发者ID:Duum,项目名称:theano-ctc,代码行数:30,代码来源:toy.py
示例12: Parameters
import theano
import theano.tensor as T
import numpy as np
import sys
import data
import model
from theano_toolkit.parameters import Parameters
from theano_toolkit import updates
if __name__ == '__main__':
model_filename = sys.argv[1]
test_filename = sys.argv[2]
train_filename = sys.argv[3]
P = Parameters()
data_X, df = data.load_test(test_filename, train_filename)
f = model.build(P,
input_size=data_X.shape[1],
hidden_sizes=[256, 128, 64, 32]
)
X = T.matrix('X')
predict = theano.function(
inputs=[X],
outputs=f(X, test=True) > 0.5,
)
P.load(model_filename)
output = predict(data_X)
print data_X.shape
print output.shape
print df.values.shape
开发者ID:shawntan,项目名称:higgs-boson,代码行数:31,代码来源:predict.py
示例13: model
P.W_output = np.zeros((hidden_size,output_size))
P.b_output = np.zeros((output_size,))
def model(X):
hidden = lstm_layer(X)[1]
return T.nnet.softmax(T.dot(hidden,P.W_output) + P.b_output)
return model
def label_seq(string):
idxs = font.indexify(string)
return idxs
if __name__ == "__main__":
P = Parameters()
X = T.matrix('X')
Y = T.ivector('Y')
predict = build_model(P,8,512,len(font.chars)+1)
probs = predict(X)
alpha = 0.5
params = P.values()
cost = ctc.cost(probs, Y) #+ 1e-8 * sum(T.sum(T.sqr(w)) for w in params)
gradients = T.grad(cost, wrt=params)
gradient_acc = [ theano.shared(0 * p.get_value()) for p in params ]
counter = theano.shared(np.float32(0.))
acc = theano.function(
开发者ID:Duum,项目名称:theano-ctc,代码行数:31,代码来源:ocr.py
示例14: Parameters
forget_gate = T.nnet.sigmoid(forget_lin)
cell_updates = T.tanh(cell_lin)
cell = forget_gate * prev_cell + in_gate * cell_updates
out_lin = x_o + h_o + b_o + T.dot(cell, V_o)
out_gate = T.nnet.sigmoid(out_lin)
hid = out_gate * T.tanh(cell)
return cell, hid
return step
if __name__ == "__main__":
P = Parameters()
X = T.ivector("X")
P.V = np.zeros((8, 8), dtype=np.int32)
X_rep = P.V[X]
P.W_output = np.zeros((15, 8), dtype=np.int32)
lstm_layer = build(P, name="test", input_size=8, hidden_size=15)
_, hidden = lstm_layer(X_rep)
output = T.nnet.softmax(T.dot(hidden, P.W_output))
delay = 5
label = X[:-delay]
predicted = output[delay:]
cost = -T.sum(T.log(predicted[T.arange(predicted.shape[0]), label]))
params = P.values()
开发者ID:ml-lab,项目名称:neural-transducers,代码行数:31,代码来源:lstm.py
示例15: numbers
# TODO: fix these magic numbers (especially the 800)
def f(X):
layer0 = X.reshape((X.shape[0], 1, 28, 28))
layer1 = _build_conv_pool(P, 1, layer0, 20, 1, 5, 2)
layer2_= _build_conv_pool(P, 2, layer1, 50, 20, 5, 2)
layer2 = layer2_.flatten(2)
output = T.nnet.softmax(T.dot(layer2, P.W_hidden_output) + P.b_output)
return output
return f
def cost(P, Y_hat, Y, l2 = 0):
return (T.mean(T.nnet.categorical_crossentropy(Y_hat, Y)) +
l2 * sum(T.mean(p**2) for p in P.values()))
if __name__ == "__main__":
import datasets
x,y = datasets.mnist()
x,y = x[0:1000],y[0:1000]
P = Parameters()
X = T.matrix('X')
Y = T.ivector('Y')
net = build(P, 784, 800, 10)
Y_hat = net(X)
f = theano.function(inputs = [X], outputs = Y_hat)
J = cost(P, Y_hat, Y)
grad = T.grad(J, wrt=P.values())
开发者ID:jeffiar,项目名称:theano-learn,代码行数:29,代码来源:lenet_model.py
示例16: crossentropy
def crossentropy(output,Y):
if output.owner.op == T.nnet.softmax_op:
x = output.owner.inputs[0]
k = T.max(x,axis=1,keepdims=True)
sum_x = T.log(T.sum(T.exp(x - k),axis=1)) + k
return - x[T.arange(x.shape[0]),Y] + sum_x
else:
return T.nnet.categorical_crossentropy(outputs,Y)
if __name__ == "__main__":
config.parse_args()
total_frames = sum(x.shape[0] for x,_ in frame_label_data.training_stream())
logging.info("Total frames: %d"%total_frames)
P = Parameters()
predict = model.build(P)
X = T.matrix('X')
Y = T.ivector('Y')
_,outputs = predict(X)
cross_entropy = T.mean(crossentropy(outputs,Y))
parameters = P.values()
loss = cross_entropy + \
(0.5/total_frames) * sum(T.sum(T.sqr(w)) for w in parameters)
gradients = T.grad(loss,wrt=parameters)
logging.info("Parameters to tune:" + ', '.join(sorted(w.name for w in parameters)))
update_vars = Parameters()
logging.debug("Compiling functions...")
开发者ID:wbgxx333,项目名称:theano-kaldi,代码行数:31,代码来源:train.py
示例17: __init__
def __init__(self,
input_size, output_size, mem_size, mem_width, hidden_sizes, num_heads,
max_epochs, momentum, learning_rate ,grad_clip, l2_norm):
self.input_size = input_size
self.output_size = output_size
self.mem_size = mem_size
self.mem_width = mem_width
self.hidden_sizes = hidden_sizes
self.num_heads = num_heads
self.max_epochs = max_epochs
self.momentum = momentum
self.learning_rate = learning_rate
self.grad_clip = grad_clip
self.l2_norm = l2_norm
self.best_train_cost = np.inf
self.best_valid_cost = np.inf
#self.train = None
#self.cost = None
self.train_his = []
P = Parameters()
ctrl = controller.build( P, self.input_size, self.output_size, self.mem_size, self.mem_width, self.hidden_sizes)
predict = model.build( P, self.mem_size, self.mem_width, self.hidden_sizes[-1], ctrl, self.num_heads)
input_seq = T.matrix('input_sequence')
output_seq = T.matrix('output_sequence')
[M_curr,weights,output] = predict(input_seq)
# output_seq_pred = seqs[-1]
cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output, output_seq),axis=1)
self.params = P.values()
l2 = T.sum(0)
for p in self.params:
l2 = l2 + (p ** 2).sum()
cost = T.sum(cross_entropy) + self.l2_norm * l2
# cost = T.sum(cross_entropy) + 1e-3*l2
grads = [ T.clip(g, grad_clip[0], grad_clip[1]) for g in T.grad(cost, wrt=self.params) ]
# grads = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]
# grads = [ T.clip(g,1e-9, 0.2) for g in T.grad(cost,wrt=params) ]
self.train = theano.function(
inputs=[input_seq,output_seq],
outputs=cost,
# updates=updates.adadelta(params,grads)
updates = updates.rmsprop(self.params, grads, momentum=self.momentum, learning_rate=self.learning_rate )
)
self.predict_cost = theano.function(
inputs=[input_seq,output_seq],
outputs= cost
)
self.predict = theano.function(
inputs=[input_seq],
outputs= [ weights, output]
)
开发者ID:c3h3,项目名称:pyntm,代码行数:64,代码来源:ntm.py
示例18: Parameters
import theano.tensor as T
import numpy as np
from theano_toolkit import utils as U
from theano_toolkit import hinton
from theano_toolkit import updates
from theano_toolkit.parameters import Parameters
import ctc
import font
import lstm
from ocr import *
if __name__ == "__main__":
import sys
test_word = sys.argv[1]
P = Parameters()
X = T.matrix('X')
predict = build_model(P,8,512,len(font.chars)+1)
probs = predict(X)
test = theano.function(inputs=[X],outputs=probs)
P.load('model.pkl')
image = font.imagify(test_word)
hinton.plot(image.astype(np.float32).T[::-1])
y_seq = label_seq(test_word)
probs = test(image)
print " ", ' '.join(font.chars[i] if i < len(font.chars) else "_" for i in np.argmax(probs,axis=1))
hinton.plot(probs[:,y_seq].T,max_arr=1.)
开发者ID:Duum,项目名称:theano-ctc,代码行数:29,代码来源:ocr_test.py
示例19: int
if __name__ == "__main__":
batch_size = 256
validation = 0.1
all_X, all_W, all_Y = data.load('data/training.csv')
validation_count = int(math.ceil(all_X.shape[0] * validation))
train_X, train_W, train_Y = (all_X[:-validation_count],
all_W[:-validation_count],
all_Y[:-validation_count])
valid_X, valid_W, valid_Y = (all_X[-validation_count:],
all_W[-validation_count:],
all_Y[-validation_count:])
P = Parameters()
data_X = theano.shared(train_X)
data_W = theano.shared(train_W)
data_Y = theano.shared(train_Y)
train, test = get_train_test_fn(P, data_X, data_W, data_Y)
batches = int(math.ceil(train_X.shape[0] / float(batch_size)))
best_score = -np.inf
for epoch in xrange(20):
for i in xrange(batches):
train(i, batch_size)
scores = test(valid_X, valid_W, valid_Y)
print scores,
if scores[0] > best_score :
P.save('model.pkl')
best_score = scores[0]
开发者ID:shawntan,项目名称:higgs-boson,代码行数:30,代码来源:train.py
示例20: make_train
def make_train(image_size , word_size , first_hidden_size , proj_size , reg_lambda) :
#initialize model
P = Parameters()
image_projecting = image_project.build(P, image_size, proj_size)
batched_triplet_encoding , vector_triplet_encoding = triplet_encoding.build(P , word_size , first_hidden_size , proj_size)
image_vector = T.vector()
#training
correct_triplet = [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E]
negative_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]
image_projection_vector = image_projecting(image_vector)
image_projection_matrix = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet[0].shape[0] , axis=0)
correct_triplet_encoding_vector = vector_triplet_encoding(correct_triplet[0] , correct_triplet[1] , correct_triplet[2])
negative_triplet_encoding_matrix = batched_triplet_encoding(negative_triplet[0] , negative_triplet[1] , negative_triplet[2])
correct_cross_dot_scalar = T.dot(image_projection_vector , correct_triplet_encoding_vector)
negative_cross_dot_vector = T.batched_dot(image_projection_matrix , negative_triplet_encoding_matrix)
#margin cost
zero_cost = T.zeros_like(negative_cross_dot_vector)
margin_cost = 1 - correct_cross_dot_scalar + negative_cross_dot_vector
cost_vector = T.switch(T.gt(zero_cost , margin_cost) , zero_cost , margin_cost)
#regulizar cost
params = P.values()
l2 = T.sum(0)
for p in params:
l2 = l2 + (p ** 2).sum()
cost = T.sum(cost_vector)/T.shape(negative_triplet[0])[0] + reg_lambda * l2 #assume word vector has been put into P #unsolved
grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)]
lr = T.scalar(name='learning rate',dtype='float32')
train = theano.function(
inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2], lr],
outputs=cost,
updates=updates.rmsprop(params, grads, learning_rate=lr),
allow_input_downcast=True
)
#valid
valid = theano.function(
inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2]],
outputs=cost,
allow_input_downcast=True
)
#testing
all_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]
image_projection_matrix_test = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[0].shape[0] , axis=0)
all_triplet_encoding_matrix = batched_triplet_encoding(all_triplet[0] , all_triplet[1] , all_triplet[2])
all_cross_dot_vector = T.batched_dot(image_projection_matrix_test , all_triplet_encoding_matrix)
test = theano.function(
inputs=[image_vector, all_triplet[0], all_triplet[1], all_triplet[2]],
outputs=all_cross_dot_vector,
allow_input_downcast=True
)
#default
P_default = Parameters()
P_default['left'] = 2 * (np.random.rand(word_size) - 0.5)
P_default['right'] = 2 * (np.random.rand(word_size) - 0.5)
P_default['relation'] = 2 * (np.random.rand(word_size) - 0.5)
correct_triplet_d = [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E]
negative_triplet_d = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]
correct_triplet_d_train = [correct_triplet_d,correct_triplet_d,correct_triplet_d]
negative_triplet_d_train = [negative_triplet_d,negative_triplet_d,negative_triplet_d]
cost = 0
for i in range(3) :
if i == 0 :
correct_triplet_d_train[0] = [correct_triplet_d[0],P_default['relation'],P_default['right']]
negative_triplet_d_train[0] = [negative_triplet_d[0],repeat(P_default['relation'].dimshuffle(('x',0)),negative_triplet_d[0].shape[0] , axis=0),repeat(P_default['right'].dimshuffle(('x',0)),negative_triplet_d[0].shape[0] , axis=0)]
elif i == 1 :
correct_triplet_d_train[1] = [P_default['left'],correct_triplet_d[1],P_default['right']]
negative_triplet_d_train[1] = [repeat(P_default['left'].dimshuffle(('x',0)),negative_triplet_d[1].shape[0] , axis=0),negative_triplet_d[1],repeat(P_default['right'].dimshuffle(('x',0)),negative_triplet_d[1].shape[0] , axis=0)]
elif i == 2 :
correct_triplet_d_train[2] = [P_default['left'],P_default['relation'],correct_triplet_d[2]]
negative_triplet_d_train[2] = [repeat(P_default['left'].dimshuffle(('x',0)),negative_triplet_d[2].shape[0] , axis=0),repeat(P_default['relation'].dimshuffle(('x',0)),negative_triplet_d[2].shape[0] , axis=0),negative_triplet_d[2]]
image_projection_matrix_d = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet_d[i].shape[0] , axis=0)
correct_triplet_encoding_vector_d = vector_triplet_encoding(correct_triplet_d_train[i][0] , correct_triplet_d_train[i][1] , correct_triplet_d_train[i][2])
negative_triplet_encoding_matrix_d = batched_triplet_encoding(negative_triplet_d_train[i][0] , negative_triplet_d_train[i][1] , negative_triplet_d_train[i][2])
correct_cross_dot_scalar_d = T.dot(image_projection_vector , correct_triplet_encoding_vector_d)
negative_cross_dot_vector_d = T.batched_dot(image_projection_matrix_d , negative_triplet_encoding_matrix_d)
#margin cost
zero_cost_d = T.zeros_like(negative_cross_dot_vector_d)
margin_cost_d = 1 - correct_cross_dot_scalar_d + negative_cross_dot_vector_d
cost_vector_d = T.switch(T.gt(zero_cost_d , margin_cost_d) , zero_cost_d , margin_cost_d)
cost = cost + T.sum(cost_vector_d)/T.shape(negative_triplet[i])[0]
params_d = P_default.values()
#.........这里部分代码省略.........
开发者ID:darongliu,项目名称:Cross_Modal_Projection,代码行数:101,代码来源:train.py
注:本文中的theano_toolkit.parameters.Parameters类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论