本文整理汇总了Python中theano.tensor.col函数 的典型用法代码示例。如果您正苦于以下问题:Python col函数的具体用法?Python col怎么用?Python col使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了col函数 的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_broadcast_arguments
def test_broadcast_arguments(self):
m = Module()
m.random = RandomStreams(utt.fetch_seed())
low = tensor.vector()
high = tensor.col()
out = m.random.uniform(low=low, high=high)
assert out.ndim == 2
m.f = Method([low, high], out)
made = m.make()
made.random.initialize()
rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
numpy_rng = numpy.random.RandomState(int(rng_seed))
low_vals = [
numpy.asarray([-5, .5, 0, 1], dtype=config.floatX),
numpy.asarray([.9], dtype=config.floatX),
numpy.asarray([-5, .5, 0, 1], dtype=config.floatX) ]
high_vals = [
numpy.asarray([[1.]], dtype=config.floatX),
numpy.asarray([[1.], [1.1], [1.5]], dtype=config.floatX),
numpy.asarray([[1.], [1.1], [1.5]], dtype=config.floatX) ]
val0 = made.f(low_vals[0], high_vals[0])
val1 = made.f(low_vals[1], high_vals[1])
val2 = made.f(low_vals[2], high_vals[2])
numpy_val0 = numpy_rng.uniform(low=low_vals[0], high=high_vals[0])
numpy_val1 = numpy_rng.uniform(low=low_vals[1], high=high_vals[1])
numpy_val2 = numpy_rng.uniform(low=low_vals[2], high=high_vals[2])
assert numpy.allclose(val0, numpy_val0)
assert numpy.allclose(val1, numpy_val1)
assert numpy.allclose(val2, numpy_val2)
开发者ID:HaniAlmousli, 项目名称:Theano, 代码行数:33, 代码来源:test_randomstreams.py
示例2: __init__
def __init__(self, args):
reward = T.col('r')
action = T.icol('a')
terminal = T.icol('t')
discount = T.scalar('gamma')
learningRate = T.scalar('lr')
rho = T.scalar('rho')
epsilon = T.scalar('eps')
rng = np.random.RandomState(42)
self.batchNb = args.batchSize
#convLayers = [[(8,8),(4,4),64],
# [(4,4),(2,2),128],
# [(3,3),(1,1),256],
# [(3,3),(1,1),512]]
#fcl = [1024, 6]
convLayers = [[(8,8),(4,4),64],
[(4,4),(2,2),128],
[(3,3),(1,1),256],
[(3,3),(1,1),256]]
fcl = [1024, args.actionNb]
self.q1 = NetStruct(convLayers, fcl, (4,100,100), rng, args)
self.q2 = NetStruct(convLayers, fcl, (4,100,100), rng, args)
self.q2.setParams(self.q1)
self.states = theano.shared(np.zeros((args.batchSize,4,100,100), dtype='float32'))
self.states2 = theano.shared(np.zeros((args.batchSize,4,100,100), dtype='float32'))
self.actions = theano.shared(np.zeros((args.batchSize,1), dtype='int32'), broadcastable=(False,True))
self.rewards = theano.shared(np.zeros((args.batchSize,1), dtype='float32'), broadcastable=(False,True))
self.terminals = theano.shared(np.zeros((args.batchSize,1), dtype='int32'), broadcastable=(False,True))
self.learningRate = theano.shared(np.array(args.learningRate, dtype='float32'))
self.rho = theano.shared(np.array(args.rmsPropRho, dtype='float32'))
self.epsilon = theano.shared(np.array(args.rmsPropEpsilon, dtype='float32'))
self.discount = theano.shared(np.array(args.discountFactor, dtype='float32'))
loss = self.QLoss(self.q1.output, self.q2.output, action, reward, terminal, discount)
params = self.q1.getParams()
updates = self.rmsProp(loss, params, rho, epsilon, learningRate)
self.train_model = theano.function(
[],
loss,
updates=updates,
givens = {
self.q1.input: self.states,
self.q2.input: self.states2,
action: self.actions,
reward: self.rewards,
terminal: self.terminals,
discount: self.discount,
learningRate: self.learningRate,
rho: self.rho,
epsilon: self.epsilon
}
)
开发者ID:Levoila, 项目名称:CrappyAI, 代码行数:59, 代码来源:net.py
示例3: createGradientFunctions
def createGradientFunctions(self):
#Create the Theano variables
W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")
#Create biases as cols so they can be broadcasted for minibatches
b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6")
z1 = T.col("z1")
if self.continuous:
#convolve x
# no_filters = 100, stride = 4, filter_size = 50
h_encoder = T.tanh(T.dot(W1,x) + b1)
#h_encoder = T.dot(W1,x) + b1
else:
h_encoder = T.tanh(T.dot(W1,x) + b1)
mu_encoder = T.dot(W2,h_encoder) + b2
log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)
mu_encoder = T.dot(W2,h_encoder) + b2
log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)
#Find the hidden variable z
z = mu_encoder + T.exp(log_sigma_encoder)*eps
prior = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder))
#Set up decoding layer
if self.continuous:
h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
h_dec = T.nnet.softplus(T.dot(W4,z1) + b4)
#h_decoder = T.dot(W4,z) + b4
#h_dec = T.dot(W4,z1) + b4
mu_decoder = T.tanh(T.dot(W5,h_decoder) + b5)
mu_dec = T.tanh(T.dot(W5,h_dec) + b5)
log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
else:
h_decoder = T.tanh(T.dot(W4,z) + b4)
y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
logpxz = -T.nnet.binary_crossentropy(y,x).sum()
gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]
logp = logpxz + prior
#Compute all the gradients
derivatives = T.grad(logp,gradvariables)
#Add the lowerbound so we can keep track of results
derivatives.append(logp)
self.get_z = th.function(gradvariables+[x,eps],z,on_unused_input='ignore')
self.generate = th.function(gradvariables+[z1,x,eps],mu_dec,on_unused_input='ignore')
self.predict = th.function(gradvariables+[x,eps],mu_decoder,on_unused_input='ignore')
self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
开发者ID:KyriacosShiarli, 项目名称:Variational-Autoencoder, 代码行数:58, 代码来源:VariationalAutoencoder.py
示例4: test_ndim_mismatch
def test_ndim_mismatch(self):
rng = numpy.random.RandomState(utt.fetch_seed())
data = rng.rand(5).astype(self.dtype)
x = self.shared(data)
y = tensor.col('y', self.dtype)
cond = theano.tensor.iscalar('cond')
self.assertRaises(TypeError, ifelse, cond, x, y)
self.assertRaises(TypeError, ifelse, cond, y, x)
开发者ID:aboSamoor, 项目名称:Theano, 代码行数:9, 代码来源:test_ifelse.py
示例5: testDataSet
def testDataSet(self, dataSet_, dataLabels_):
dataSet = T.matrix("dataSet")
labels = T.col("labels")
svLabels = T.col("svLabels")
gamma = T.dscalar("gamma")
svs = T.matrix("supportVectors")
svAlphas = T.matrix("svAlphas")
b = T.dscalar("b")
# we need to transpose the result because the results of the per-row actions are usually columns
errorVec = theano.scan(lambda row, realLabel : self.testDataSet_inner_(svs, row, gamma, svLabels, svAlphas, b, realLabel), sequences=[dataSet, labels])[0]
errors = T.sum(errorVec)
inputs = [dataSet, labels, svs, svLabels, gamma, svAlphas, b]
compErrors = theano.function(inputs=inputs, outputs=errors, on_unused_input='ignore')
gamma_ = 1/(-1*self.Training.UsedKernel[1]**2)
numErrors = compErrors(dataSet_, dataLabels_, self.Training.SupportVectors, self.Training.SVLabels, gamma_, self.Training.Alphas[self.Training.SVIndices], self.Training.B.item(0))
return float(numErrors) / float(dataSet_.shape[0])
开发者ID:martinmeinke, 项目名称:ipml, 代码行数:19, 代码来源:HybridSVMClassifier.py
示例6: __init__
def __init__(self, numpy_rng, theano_rng = None, first_layer_type = 'bernoulli', mean_doc_size = 1, n_ins = 784, mid_layer_sizes=[200], inner_code_length = 10):
"""This class is made to support a variable number of layers.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: numpy random number generator used to draw initial
weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type n_ins: int
:param n_ins: dimension of the input (and autoencoder output, y) of the SMH
:type n_code_length: int
:param n_code_length: how many codes to squash down to in the middle layer
"""
self.first_layer_type = first_layer_type;
self.mean_doc_size = mean_doc_size;
self.sigmoid_layers = []
self.rbm_layers = []
self.params = []
self.n_ins = n_ins
self.inner_code_length = inner_code_length
self.mid_layer_sizes = list(mid_layer_sizes)
self.numpy_rng = numpy_rng
self.theano_rng = RandomStreams(numpy_rng.randint(2**30))
# allocate symbolic variables for the data
if (theano.config.floatX == "float32"):
self.x = T.matrix('x') #
self.x_sums = T.col('x_sums')
self.y = T.matrix('y') # the output (after finetuning) should /look the same as the input
else:
if (theano.config.floatX == "float64"):
self.x = T.dmatrix('x') #
self.x_sums = T.dcol('x_sums')
self.y = T.dmatrix('y') # the output (after finetuning) should look the same as the input
else:
raise Exception #not sure whats up here..
# The SMH is an MLP, for which all weights of intermediate layers are shared with a
# different RBM. We will first construct the SMH as a deep multilayer perceptron, and
# when constructing each sigmoidal layer we also construct an RBM that shares weights
# with that layer. During pretraining we will train these RBMs (which will lead
# to chainging the weights of the MLP as well) During finetuning we will finish
# training the SMH by doing stochastic gradient descent on the MLP.
self.init_layers()
开发者ID:utunga, 项目名称:hashmapd, 代码行数:54, 代码来源:SMH.py
示例7: __init__
def __init__(self, input_width, input_height, output_dim, num_frames, batch_size):
self.input_width = input_width
self.input_height = input_height
self.output_dim = output_dim
self.num_frames = num_frames
self.batch_size = batch_size
self.gamma = 0.99 # discount factor
self.rho = 0.99
self.lr = 0.00025 # learning rate
self.momentum = 0.95
self.freeze_targets = True
self.l_out = self.build_network(input_width, input_height, output_dim, num_frames, batch_size)
if self.freeze_targets:
self.next_l_out = self.build_network(input_width, input_height, output_dim, num_frames, batch_size)
self.reset_q_hat()
states = T.tensor4('states')
next_states = T.tensor4('next_states')
rewards = T.col('rewards')
actions = T.icol('actions')
# terminals = T.icol('terminals')
self.states_shared = theano.shared(np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX))
self.next_states_shared = theano.shared(np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX))
self.rewards_shared = theano.shared(np.zeros((batch_size, 1), dtype=theano.config.floatX), broadcastable=(False,True))
self.actions_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False,True))
# self.terminals_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False,True))
q_vals = self.l_out.get_output(states / 255.0)
if self.freeze_targets:
next_q_vals = self.next_l_out.get_output(next_states / 255.0)
else:
next_q_vals = self.l_out.get_output(next_states / 255.0)
next_q_vals = theano.gradient.disconnected_grad(next_q_vals)
target = rewards + self.gamma * T.max(next_q_vals, axis=1, keepdims=True)
diff = target - q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1,1))
loss = T.mean(diff ** 2)
params = lasagne.layers.helper.get_all_params(self.l_out)
givens = {
states: self.states_shared,
next_states: self.next_states_shared,
rewards: self.rewards_shared,
actions: self.actions_shared,
# terminals: self.terminals_shared
}
if self.momentum > 0:
updates = rmsprop_nesterov(loss, params, self.lr, self.rho, self.momentum, 1e-2)
else:
updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho, 1e-6)
self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens)
self._q_vals = theano.function([], q_vals, givens={ states: self.states_shared })
开发者ID:npow, 项目名称:deep_q_rl, 代码行数:54, 代码来源:network.py
示例8: build_finetune_functions
def build_finetune_functions(self, batch_size, learning_rate):
'''Generates a function `train` that implements one step of finetuning, a function
`validate` that computes the error on a batch from the validation set, and a function
`test` that computes the error on a batch from the testing set
:type batch_size: int
:param batch_size: size of a minibatch
:type learning_rate: float
:param learning_rate: learning rate used during finetune stage
'''
train_set_x = T.matrix('train_set_x')
train_set_x_sums = T.col('train_set_x_sums')
valid_set_x = T.matrix('valid_set_x')
valid_set_x_sums = T.col('valid_set_x_sums')
test_set_x = T.matrix('test_set_x')
test_set_x_sums = T.col('test_set_x_sums')
# compute the gradients with respect to the model parameters
gparams = T.grad(self.finetune_cost, self.params)
# compute list of fine-tuning updates
updates = {}
for param, gparam in zip(self.params, gparams):
updates[param] = param - gparam*learning_rate
train_fn = theano.function(inputs = [train_set_x, train_set_x_sums],
outputs = self.finetune_cost,
updates = updates,
givens = { self.x : train_set_x,
self.x_sums : train_set_x_sums })
valid_score_i = theano.function([valid_set_x, valid_set_x_sums], self.finetune_cost,
givens = { self.x : valid_set_x,
self.x_sums : valid_set_x_sums })
test_score_i = theano.function([test_set_x, test_set_x_sums], self.finetune_cost,
givens = { self.x : test_set_x,
self.x_sums : test_set_x_sums })
return train_fn, valid_score_i, test_score_i
开发者ID:utunga, 项目名称:hashmapd, 代码行数:41, 代码来源:SMH.py
示例9: neural_tensor_network
def neural_tensor_network():
# tensor params
subj = T.col('e_1')
targets = T.matrix('e_2')
W = T.tensor3('W')
# neural net params
u = T.col('u')
V = T.matrix('V')
b = T.col('b')
# tensor
h = subj.T.dot(W).dot(targets)
# neural net
d = subj.shape[0]
V_subj = V[:, :d].dot(subj)
V_targ = V[:, d:].dot(targets)
activations = T.tanh(h + V_subj + V_targ + b)
score = u.T.dot(activations).reshape((-1, 1))
margins = score[0] - score[1:]
cost = T.min(T.concatenate((T.ones_like(margins), margins), axis=1), axis=1).mean()
gsubj, gtargets, gW, gu, gV, gb = T.grad(cost, [subj, targets, W, u, V, b])
print 'Compiling NTN score'
score = theano.function([subj, W, targets, u, V, b], score, name='NTN Score',
mode='FAST_RUN')
print 'Compiling NTN fprop'
fprop = theano.function([subj, W, targets, u, V, b], cost, name='NTN fprop',
mode='FAST_RUN')
print 'Compiling NTN bprop'
bprop = theano.function([subj, W, targets, u, V, b],
outputs=[gsubj, gW, gtargets, gu, gV, gb],
name='NTN bprop', mode='FAST_RUN')
return {'score': score, 'fprop': fprop, 'bprop': bprop}
开发者ID:eshijia, 项目名称:traversing_knowledge_graphs, 代码行数:41, 代码来源:models.py
示例10: setup_theano
def setup_theano(self):
# for numpy optimization
oneCol = T.col("oneCol")
pi_t = T.col("pi_t")
z_t = T.col("z_t")
z_t1 = z_t.reshape((self.numKeypoints, 2))
pts = T.concatenate((z_t1, oneCol), axis=1)
A_t_ = T.matrix("A_t_")
r_t_ = T.dot(A_t_, pts.transpose()).transpose()
r_t1_ = r_t_[:, 0:2].reshape((2 * self.numKeypoints, 1))
diff_ = pi_t * (r_t1_ - self.mu)
difft_ = diff_.reshape((1, 2 * self.numKeypoints))
cost_1 = T.dot(difft_, diff_)
# cost_1 = theano.printing.Print('cost is:')(cost_1)
cost_ = T.max(cost_1)
A_t_grad_ = T.grad(cost=cost_, wrt=A_t_)
A_t_grad_ = T.basic.set_subtensor(A_t_grad_[2, :], 0)
self.cost = theano.function(inputs=[A_t_, pi_t, z_t, oneCol], outputs=[cost_, A_t_grad_])
开发者ID:YangXS, 项目名称:lisa_emotiw, 代码行数:21, 代码来源:faceAlign.py
示例11: pretraining_functions
def pretraining_functions(self, batch_size, method, pretrain_lr, k):
''' Generates a list of functions, for performing one step of gradient descent at a
given layer. The function will require as input a minibatch of data, and to train an
RBM you just need to iterate, calling the corresponding function on all minibatches.
:type batch_size: int
:param batch_size: size of a [mini]batch
:type method: string
:param method: type of Gibbs sampling to perform: 'cd' (default) or 'pcd'
:type k: int
:param k: number of Gibbs steps to do in CD-k / PCD-k
;type finetune_lr: float
;param finetune_lr: the 'learning rate' to use during finetuning phase
'''
learning_rate = T.scalar('lr') # learning rate to use
#learning_rate.value = pretrain_lr
# i *think* the following is equivalent to above.. doing this because i can't see where lr gets a value at all
#learning_rate = theano.shared(pretrain_lr, 'learning_rate')
train_set_x = T.matrix('train_set_x')
train_set_x_sums = T.col('train_set_x_sums')
pretrain_fns = []
for rbm in self.rbm_layers:
if method == 'pcd':
# initialize storage for the persistent chain (state = hidden layer of chain)
persistent_chain = theano.shared(numpy.zeros((batch_size,rbm.n_hidden),dtype=theano.config.floatX))
# get the cost and the gradient corresponding to one step of PCD-k
cost,updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=k)
else:
# default = use CD instead
cost,updates = rbm.get_cost_updates(lr=learning_rate)
# compile the theano function
fn = theano.function(inputs = [train_set_x,train_set_x_sums,
theano.Param(learning_rate, default = 0.1)],
outputs = cost,
updates = updates,
givens = {self.x:train_set_x,
self.x_sums:train_set_x_sums}
# uncomment the following line to perform debugging:
# ,mode=theano.compile.debugmode.DebugMode(stability_patience=5)
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
开发者ID:utunga, 项目名称:hashmapd, 代码行数:49, 代码来源:SMH.py
示例12: transE_model
def transE_model():
'''
Note X_S is a column and X_T is a matrix so that broadcasting occurs
across the columns of X_T (this allows batching X_T with negatives,
for example.
'''
# construct theano expression graph
X_s = T.col('X_s')
W = T.matrix('W')
X_t = T.matrix('X_t')
rels = W[:, :, None].transpose(1, 0, 2)
# Computes x_{r_1} + x_{r_{2}} + ... + x_{r_n} - X_{t}
results, updates = theano.scan(fn=lambda rel, v: rel + v,
outputs_info=-X_t, sequences=[rels])
# score is always a column vector
score = T.sum((X_s + results[-1]) ** 2, axis=0).reshape((-1, 1))
margins = 1. + score[0] - score[1:]
# zero out negative entries
pos_parts = margins * (margins > 0)
# we are using online Maximizer, so the objective is negated
cost = -pos_parts.mean()
gX_s, gW, gX_t = T.grad(cost, [X_s, W, X_t])
print 'Compiling TransE score'
# return negative score since this is a ranking
score = theano.function([X_s, W, X_t], -score, name='transE Score',
mode='FAST_RUN')
score.trust_input = True
print 'Compiling TransE fprop'
fprop = theano.function([X_s, W, X_t], cost, name='transE fprop',
mode='FAST_RUN')
fprop.trust_input = True
print 'Compiling TransE bprop'
bprop = theano.function([X_s, W, X_t],
outputs=[gX_s, gW, gX_t],
name='transE bprop', mode='FAST_RUN')
bprop.trust_input = True
return {'score': score, 'fprop': fprop, 'bprop': bprop}
开发者ID:eshijia, 项目名称:traversing_knowledge_graphs, 代码行数:48, 代码来源:models.py
示例13: test_wrong_broadcast
def test_wrong_broadcast(self):
a = tt.col()
increment = tt.vector()
# These symbolic graphs legitimate, as long as increment has exactly
# one element. So it should fail at runtime, not at compile time.
rng = numpy.random.RandomState(utt.fetch_seed())
def rng_randX(*shape):
return rng.rand(*shape).astype(theano.config.floatX)
for op in (tt.set_subtensor, tt.inc_subtensor):
for base in (a[:], a[0]):
out = op(base, increment)
f = theano.function([a, increment], out)
# This one should work
f(rng_randX(3, 1), rng_randX(1))
# These ones should not
self.assertRaises(ValueError, f, rng_randX(3, 1), rng_randX(2))
self.assertRaises(ValueError, f, rng_randX(3, 1), rng_randX(3))
self.assertRaises(ValueError, f, rng_randX(3, 1), rng_randX(0))
开发者ID:igul222, 项目名称:Theano, 代码行数:21, 代码来源:test_inc_subtensor.py
示例14: __init__
def __init__(self, env, args, rng, name = "DQNLasagne"):
""" Initializes a network based on the Lasagne Theano framework.
Args:
env (AtariEnv): The envirnoment in which the agent actuates.
args (argparse.Namespace): All settings either with a default value or set via command line arguments.
rng (mtrand.RandomState): Initialized Mersenne Twister pseudo-random number generator.
name (str): The name of the network object.
Note:
This function should always call the base class first to initialize
the common values for the networks.
"""
_logger.info("Initialize object of type " + str(type(self).__name__))
super(DQNLasagne, self).__init__(env, args, rng, name)
self.input_shape = (self.batch_size, self.sequence_length, args.frame_width, args.frame_height)
self.dummy_batch = np.zeros(self.input_shape, dtype=np.uint8)
lasagne.random.set_rng(self.rng)
self.network = self._create_layer()
# TODO: Load weights from pretrained network?!
if not self.args.load_weights == None:
self.load_weights(self.args.load_weights)
if self.target_update_frequency > 0:
self.target_network = self._create_layer()
self._copy_theta()
states = T.tensor4('states')
followup_states = T.tensor4('followup_states')
rewards = T.col('rewards')
actions = T.icol('actions')
terminals = T.icol('terminals')
self.states_shared = theano.shared(
np.zeros(self.input_shape, dtype=theano.config.floatX)
)
self.followup_states_shared = theano.shared(
np.zeros(self.input_shape, dtype=theano.config.floatX)
)
self.rewards_shared = theano.shared(
np.zeros((self.batch_size, 1), dtype=theano.config.floatX),
broadcastable=(False, True)
)
self.actions_shared = theano.shared(
np.zeros((self.batch_size, 1), dtype='int32'),
broadcastable=(False, True)
)
self.terminals_shared = theano.shared(
np.zeros((self.batch_size, 1), dtype='int32'),
broadcastable=(False, True)
)
qvalues = lasagne.layers.get_output(
self.network,
self._prepare_network_input(states)
)
if self.target_update_frequency > 0:
qvalues_followup_states = lasagne.layers.get_output(
self.target_network,
self._prepare_network_input(followup_states)
)
else:
qvalues_followup_states = lasagne.layers.get_output(
self.network,
self._prepare_network_input(followup_states)
)
qvalues_followup_states = theano.gradient.disconnected_grad(qvalues_followup_states)
targets = (rewards +
(T.ones_like(terminals) - terminals) *
self.discount_rate *
T.max(qvalues_followup_states, axis=1, keepdims=True)
)
errors = targets - qvalues[
T.arange(self.batch_size),
actions.reshape((-1,))].reshape((-1, 1))
if self.clip_error > 0:
quadratic_part = T.minimum(abs(errors), self.clip_error)
linear_part = abs(errors) - quadratic_part
cost_function = T.sum(0.5 * quadratic_part ** 2 + self.clip_error * linear_part)
else:
cost_function = T.sum(0.5 * errors ** 2)
self.params = lasagne.layers.helper.get_all_params(self.network)
self.observations = {
states: self.states_shared,
followup_states: self.followup_states_shared,
rewards: self.rewards_shared,
actions: self.actions_shared,
terminals: self.terminals_shared
}
self._set_optimizer(cost_function)
if self.momentum > 0:
self.optimizer = lasagne.updates.apply_momentum(
#.........这里部分代码省略.........
开发者ID:maurolopes, 项目名称:deepatari, 代码行数:101, 代码来源:dqnlasagne.py
示例15: __init__
def __init__(self, input_width, input_height, avail_actions, num_actions,
num_frames, discount, learning_rate, rho,
rms_epsilon, momentum, clip_delta, freeze_interval,
batch_size, network_type, update_rule,
batch_accumulator, rng, train_all, input_scale=255.0):
self.input_width = input_width
self.input_height = input_height
self.avail_actions = avail_actions
self.num_actions = num_actions
self.num_frames = num_frames
self.batch_size = batch_size
self.discount = discount
self.rho = rho
self.lr = learning_rate
self.rms_epsilon = rms_epsilon
self.momentum = momentum
self.clip_delta = clip_delta
self.freeze_interval = freeze_interval
self.rng = rng
self.train_all = train_all
lasagne.random.set_rng(self.rng)
self.update_counter = 0
print "num_actions: " + str(num_actions)
self.l_out = self.build_network(network_type, input_width, input_height,
num_actions, num_frames, batch_size)
if self.freeze_interval > 0:
self.next_l_out = self.build_network(network_type, input_width,
input_height, num_actions,
num_frames, batch_size)
self.reset_q_hat()
states = T.tensor4('states')
next_states = T.tensor4('next_states')
rewards = T.col('rewards')
actions = T.icol('actions')
terminals = T.icol('terminals')
self.states_shared = theano.shared(
np.zeros((batch_size, num_frames, input_height, input_width),
dtype=theano.config.floatX))
self.next_states_shared = theano.shared(
np.zeros((batch_size, num_frames, input_height, input_width),
dtype=theano.config.floatX))
self.rewards_shared = theano.shared(
np.zeros((batch_size, 1), dtype=theano.config.floatX),
broadcastable=(False, True))
self.actions_shared = theano.shared(
np.zeros((batch_size, 1), dtype='int32'),
broadcastable=(False, True))
self.terminals_shared = theano.shared(
np.zeros((batch_size, 1), dtype='int32'),
broadcastable=(False, True))
q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)
if self.freeze_interval > 0:
next_q_vals = lasagne.layers.get_output(self.next_l_out,
next_states / input_scale)
else:
next_q_vals = lasagne.layers.get_output(self.l_out,
next_states / input_scale)
next_q_vals = theano.gradient.disconnected_grad(next_q_vals)
target = (rewards +
(T.ones_like(terminals) - terminals) *
self.discount * T.max(next_q_vals, axis=1, keepdims=True))
diff = target - q_vals[T.arange(batch_size),
actions.reshape((-1,))].reshape((-1, 1))
if self.clip_delta > 0:
# If we simply take the squared clipped diff as our loss,
# then the gradient will be zero whenever the diff exceeds
# the clip bounds. To avoid this, we extend the loss
# linearly past the clip point to keep the gradient constant
# in that regime.
#
# This is equivalent to declaring d loss/d q_vals to be
# equal to the clipped diff, then backpropagating from
# there, which is what the DeepMind implementation does.
quadratic_part = T.minimum(abs(diff), self.clip_delta)
linear_part = abs(diff) - quadratic_part
loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
else:
loss = 0.5 * diff ** 2
if batch_accumulator == 'sum':
loss = T.sum(loss)
elif batch_accumulator == 'mean':
loss = T.mean(loss)
else:
raise ValueError("Bad accumulator: {}".format(batch_accumulator))
#.........这里部分代码省略.........
开发者ID:cowhi, 项目名称:deep_q_rl, 代码行数:101, 代码来源:q_network.py
示例16: __init__
def __init__(self, batchSize, numFrames, inputHeight, inputWidth, numActions,
discountRate, learningRate, rho, rms_epsilon, momentum, networkUpdateDelay, useSARSAUpdate, kReturnLength,
networkType = "conv", updateRule = "deepmind_rmsprop", batchAccumulator = "sum", clipDelta = 1.0, inputScale = 255.0):
self.batchSize = batchSize
self.numFrames = numFrames
self.inputWidth = inputWidth
self.inputHeight = inputHeight
self.inputScale = inputScale
self.numActions = numActions
self.discountRate = discountRate
self.learningRate = learningRate
self.rho = rho
self.rms_epsilon = rms_epsilon
self.momentum = momentum
self.networkUpdateDelay = networkUpdateDelay
self.useSARSAUpdate = useSARSAUpdate
self.kReturnLength = kReturnLength
self.networkType = networkType
self.updateRule = updateRule
self.batchAccumulator = batchAccumulator
self.clipDelta = clipDelta
self.updateCounter = 0
states = T.tensor4("states")
nextStates = T.tensor4("nextStates")
rewards = T.col("rewards")
actions = T.icol("actions")
nextActions= T.icol("nextActions")
terminals = T.icol("terminals")
self.statesShared = theano.shared(np.zeros((self.batchSize, self.numFrames, self.inputHeight, self.inputWidth), dtype=theano.config.floatX))
self.nextStatesShared = theano.shared(np.zeros((self.batchSize, self.numFrames, self.inputHeight, self.inputWidth), dtype=theano.config.floatX))
self.rewardsShared = theano.shared(np.zeros((self.batchSize, 1), dtype=theano.config.floatX), broadcastable=(False, True))
self.actionsShared = theano.shared(np.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True))
self.nextActionsShared = theano.shared(np.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True))
self.terminalsShared = theano.shared(np.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True))
self.qValueNetwork = DeepNetworks.buildDeepQNetwork(
self.batchSize, self.numFrames, self.inputHeight, self.inputWidth, self.numActions, self.networkType)
qValues = lasagne.layers.get_output(self.qValueNetwork, states / self.inputScale)
if self.networkUpdateDelay > 0:
self.nextQValueNetwork = DeepNetworks.buildDeepQNetwork(
self.batchSize, self.numFrames, self.inputHeight, self.inputWidth, self.numActions, self.networkType)
self.resetNextQValueNetwork()
nextQValues = lasagne.layers.get_output(self.nextQValueNetwork, nextStates / self.inputScale)
else:
nextQValues = lasagne.layers.get_output(self.qValueNetwork, nextStates / self.inputScale)
nextQValues = theano.gradient.disconnected_grad(nextQValues)
if self.useSARSAUpdate:
target = rewards + terminals * (self.discountRate ** self.kReturnLength) * nextQValues[T.arange(self.batchSize), nextActions.reshape((-1,))].reshape((-1, 1))
else:
target = rewards + terminals * (self.discountRate ** self.kReturnLength) * T.max(nextQValues, axis = 1, keepdims = True)
targetDifference = target - qValues[T.arange(self.batchSize), actions.reshape((-1,))].reshape((-1, 1))
quadraticPart = T.minimum(abs(targetDifference), self.clipDelta)
linearPart = abs(targetDifference) - quadraticPart
# if self.clipDelta > 0:
# targetDifference = targetDifference.clip(-1.0 * self.clipDelta, self.clipDelta)
if self.batchAccumulator == "sum":
# loss = T.sum(targetDifference ** 2)
loss = T.sum(0.5 * quadraticPart ** 2 + self.clipDelta * linearPart)
elif self.batchAccumulator == "mean":
# loss = T.mean(targetDifference ** 2)
loss = T.mean(0.5 * quadraticPart ** 2 + self.clipDelta * linearPart)
else:
raise ValueError("Bad Network Accumulator. {sum, mean} expected")
networkParameters = lasagne.layers.helper.get_all_params(self.qValueNetwork)
if self.updateRule == "deepmind_rmsprop":
updates = DeepNetworks.deepmind_rmsprop(loss, networkParameters, self.learningRate, self.rho, self.rms_epsilon)
elif self.updateRule == "rmsprop":
updates = lasagne.updates.rmsprop(loss, networkParameters, self.learningRate, self.rho, self.rms_epsilon)
elif self.updateRule == "sgd":
updates = lasagne.updates.sgd(loss, networkParameters, self.learningRate)
else:
raise ValueError("Bad update rule. {deepmind_rmsprop, rmsprop, sgd} expected")
if self.momentum > 0:
updates.lasagne.updates.apply_momentum(updates, None, self.momentum)
lossGivens = {
states: self.statesShared,
nextStates: self.nextStatesShared,
rewards:self.rewardsShared,
actions: self.actionsShared,
nextActions: self.nextActionsShared,
terminals: self.terminalsShared
}
#.........这里部分代码省略.........
开发者ID:Mog333, 项目名称:DeepRL, 代码行数:101, 代码来源:DeepQNetwork.py
示例17: __init__
def __init__(self, num_actions):
# remember parameters
self.num_actions = num_actions
self.batch_size = BATCH_SIZE
self.discount_rate = DISCOUNT_RATE
self.history_length = HISTORY_LENGTH
self.screen_dim = DIMS
self.img_height = SCREEN_HEIGHT
self.img_width = SCREEN_WIDTH
self.clip_error = CLIP_ERROR
self.input_color_scale = COLOR_SCALE
self.target_steps = TARGET_STEPS
self.train_iterations = TRAIN_STEPS
self.train_counter = 0
self.momentum = MOMENTUM
self.update_rule = UPDATE_RULE
self.learning_rate = LEARNING_RATE
self.rms_decay = RMS_DECAY
self.rms_epsilon = RMS_EPSILON
self.rng = np.random.RandomState(RANDOM_SEED)
# set seed
lasagne.random.set_rng(self.rng)
# prepare tensors once and reuse them
states = T.tensor4('states')
next_states = T.tensor4('next_states')
rewards = T.col('rewards')
actions = T.icol('actions')
# terminals are bool for our case
terminals = T.bcol('terminals')
# create shared theano variables
self.states_shared = theano.shared(
np.zeros((self.batch_size, self.history_length, self.img_height, self.img_width),
dtype=theano.config.floatX))
self.next_states_shared = theano.shared(
np.zeros((self.batch_size, self.history_length, self.img_height, self.img_width),
dtype=theano.config.floatX))
# !broadcast ?
self.rewards_shared = theano.shared(
np.zeros((self.batch_size, 1), dtype=theano.config.floatX),
broadcastable=(False, True))
self.actions_shared = theano.shared(
np.zeros((self.batch_size, 1), dtype='int32'),
broadcastable=(False, True))
self.terminals_shared = theano.shared(
#np.zeros((self.batch_size, 1), dtype='int32'),
np.zeros((self.batch_size, 1), dtype='int8'),
broadcastable=(False, True))
# can add multiple nets here
self.l_primary = self.build_network()
if self.target_steps > 0:
self.l_secondary = self.build_network()
self.copy_to_secondary()
"""
# input scale i.e. division can be applied to input directly also to normalize
"""
# define output symbols
q_vals = lasagne.layers.get_output(self.l_primary, states / self.input_color_scale)
if self.target_steps > 0:
q_vals_secondary = lasagne.layers.get_output(self.l_secondary, next_states / self.input_color_scale)
else:
# why this ?
q_vals_secondary = lasagne.layers.get_output(self.l_primary, next_states / self.input_color_scale)
q_vals_secondary = theano.gradient.disconnected_grad(q_vals_secondary)
# target = r + max
target = (rewards + (T.ones_like(terminals) - terminals) * self.discount_rate * T.max(q_vals_secondary, axis=1, keepdims=True))
"""
# check what this does
"""
diff = target - q_vals[T.arange(self.batch_size),
actions.reshape((-1,))].reshape((-1, 1))
# print shape ?
if self.clip_error > 0:
# If we simply take the squared clipped diff as our loss,
# then the gradient will be zero whenever the diff exceeds
# the clip bounds. To avoid this, we extend the loss
# linearly past the clip point to keep the gradient constant
# in that regime.
#
# This is equivalent to declaring d loss/d q_vals to be
# equal to the clipped diff, then backpropagating from
#.........这里部分代码省略.........
开发者ID:hercky, 项目名称:a3c, 代码行数:101, 代码来源:network.py
Python入门教程 Python 是一种解释型、面向对象、动态数据类型的高级程序设计语言。 P
阅读:13931| 2022-01-22
Python wikiutil.getFrontPage函数代码示例
阅读:10292| 2022-05-24
Python 简介 Python 是一个高层次的结合了解释性、编译性、互动性和面向对象的脚本
阅读:4169| 2022-01-22
Python tests.group函数代码示例
阅读:4064| 2022-05-27
Python util.check_if_user_has_permission函数代码示例
阅读:3889| 2022-05-27
Python 练习实例98 Python 100例题目:从键盘输入一个字符串,将小写字母全部转换成大
阅读:3539| 2022-01-22
Python 环境搭建 本章节我们将向大家介绍如何在本地搭建 Python 开发环境。 Py
阅读:3067| 2022-01-22
Python 基础语法 Python 语言与 Perl,C 和 Java 等语言有许多相似之处。但是,也
阅读:2726| 2022-01-22
Python output.darkgreen函数代码示例
阅读:2682| 2022-05-25
Python 中文编码前面章节中我们已经学会了如何用 Python 输出 Hello, World!,英文没
阅读:2346| 2022-01-22
请发表评论