• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python tensor.col函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中theano.tensor.col函数的典型用法代码示例。如果您正苦于以下问题:Python col函数的具体用法?Python col怎么用?Python col使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了col函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_broadcast_arguments

    def test_broadcast_arguments(self):
        m = Module()
        m.random = RandomStreams(utt.fetch_seed())
        low = tensor.vector()
        high = tensor.col()
        out = m.random.uniform(low=low, high=high)
        assert out.ndim == 2
        m.f = Method([low, high], out)
        made = m.make()
        made.random.initialize()

        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
        numpy_rng = numpy.random.RandomState(int(rng_seed))
        low_vals = [
                numpy.asarray([-5, .5, 0, 1], dtype=config.floatX),
                numpy.asarray([.9], dtype=config.floatX),
                numpy.asarray([-5, .5, 0, 1], dtype=config.floatX) ]
        high_vals = [
                numpy.asarray([[1.]], dtype=config.floatX),
                numpy.asarray([[1.], [1.1], [1.5]], dtype=config.floatX),
                numpy.asarray([[1.], [1.1], [1.5]], dtype=config.floatX) ]

        val0 = made.f(low_vals[0], high_vals[0])
        val1 = made.f(low_vals[1], high_vals[1])
        val2 = made.f(low_vals[2], high_vals[2])

        numpy_val0 = numpy_rng.uniform(low=low_vals[0], high=high_vals[0])
        numpy_val1 = numpy_rng.uniform(low=low_vals[1], high=high_vals[1])
        numpy_val2 = numpy_rng.uniform(low=low_vals[2], high=high_vals[2])

        assert numpy.allclose(val0, numpy_val0)
        assert numpy.allclose(val1, numpy_val1)
        assert numpy.allclose(val2, numpy_val2)
开发者ID:HaniAlmousli,项目名称:Theano,代码行数:33,代码来源:test_randomstreams.py


示例2: __init__

	def __init__(self, args):
		reward = T.col('r')
		action = T.icol('a')
		terminal = T.icol('t')
		discount = T.scalar('gamma')
		learningRate = T.scalar('lr')
		rho = T.scalar('rho')
		epsilon = T.scalar('eps')
		rng = np.random.RandomState(42)
		
		self.batchNb = args.batchSize
		
		#convLayers = [[(8,8),(4,4),64],
		#			  [(4,4),(2,2),128],
		#			  [(3,3),(1,1),256],
		#			  [(3,3),(1,1),512]]
		#fcl = [1024, 6]
		
		convLayers = [[(8,8),(4,4),64],
					  [(4,4),(2,2),128],
					  [(3,3),(1,1),256],
					  [(3,3),(1,1),256]]
		fcl = [1024, args.actionNb]
		self.q1 = NetStruct(convLayers, fcl, (4,100,100), rng, args)
		self.q2 = NetStruct(convLayers, fcl, (4,100,100), rng, args)
		self.q2.setParams(self.q1)
		
		self.states = theano.shared(np.zeros((args.batchSize,4,100,100), dtype='float32'))
		self.states2 = theano.shared(np.zeros((args.batchSize,4,100,100), dtype='float32'))
		self.actions = theano.shared(np.zeros((args.batchSize,1), dtype='int32'), broadcastable=(False,True))
		self.rewards = theano.shared(np.zeros((args.batchSize,1), dtype='float32'), broadcastable=(False,True))
		self.terminals = theano.shared(np.zeros((args.batchSize,1), dtype='int32'), broadcastable=(False,True))
		
		self.learningRate = theano.shared(np.array(args.learningRate, dtype='float32'))
		self.rho = theano.shared(np.array(args.rmsPropRho, dtype='float32'))
		self.epsilon = theano.shared(np.array(args.rmsPropEpsilon, dtype='float32'))
		self.discount = theano.shared(np.array(args.discountFactor, dtype='float32'))
		
		loss = self.QLoss(self.q1.output, self.q2.output, action, reward, terminal, discount)
		
		params = self.q1.getParams()
		
		updates = self.rmsProp(loss, params, rho, epsilon, learningRate)
		self.train_model = theano.function(
			[],
			loss,
			updates=updates,
			givens = { 
					   self.q1.input: self.states,
					   self.q2.input: self.states2,
					   action: self.actions,
					   reward: self.rewards,
					   terminal: self.terminals,
					   discount: self.discount,
					   learningRate: self.learningRate,
					   rho: self.rho,
					   epsilon: self.epsilon
					 }
		)
开发者ID:Levoila,项目名称:CrappyAI,代码行数:59,代码来源:net.py


示例3: createGradientFunctions

    def createGradientFunctions(self):
        #Create the Theano variables
        W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")
        #Create biases as cols so they can be broadcasted for minibatches
        b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6")
        z1 = T.col("z1")
        if self.continuous:
            #convolve x
            # no_filters = 100, stride = 4, filter_size = 50

            h_encoder = T.tanh(T.dot(W1,x) + b1)
            #h_encoder = T.dot(W1,x) + b1
        else:   
            h_encoder = T.tanh(T.dot(W1,x) + b1)

        mu_encoder = T.dot(W2,h_encoder) + b2
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        mu_encoder = T.dot(W2,h_encoder) + b2 
        log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)

        #Find the hidden variable z
        z = mu_encoder + T.exp(log_sigma_encoder)*eps

        prior = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder))


        #Set up decoding layer
        if self.continuous:
            h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
            h_dec = T.nnet.softplus(T.dot(W4,z1) + b4)

            #h_decoder = T.dot(W4,z) + b4
            #h_dec = T.dot(W4,z1) + b4

            mu_decoder = T.tanh(T.dot(W5,h_decoder) + b5)
            mu_dec = T.tanh(T.dot(W5,h_dec) + b5)
            log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
            logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
            gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
        else:
            h_decoder = T.tanh(T.dot(W4,z) + b4)
            y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
            logpxz = -T.nnet.binary_crossentropy(y,x).sum()
            gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]
        logp = logpxz + prior

        #Compute all the gradients
        derivatives = T.grad(logp,gradvariables)

        #Add the lowerbound so we can keep track of results
        derivatives.append(logp)
        
        self.get_z = th.function(gradvariables+[x,eps],z,on_unused_input='ignore')
        self.generate = th.function(gradvariables+[z1,x,eps],mu_dec,on_unused_input='ignore')
        self.predict = th.function(gradvariables+[x,eps],mu_decoder,on_unused_input='ignore')
        self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
开发者ID:KyriacosShiarli,项目名称:Variational-Autoencoder,代码行数:58,代码来源:VariationalAutoencoder.py


示例4: test_ndim_mismatch

    def test_ndim_mismatch(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        data = rng.rand(5).astype(self.dtype)
        x = self.shared(data)
        y = tensor.col('y', self.dtype)
        cond = theano.tensor.iscalar('cond')

        self.assertRaises(TypeError, ifelse, cond, x, y)
        self.assertRaises(TypeError, ifelse, cond, y, x)
开发者ID:aboSamoor,项目名称:Theano,代码行数:9,代码来源:test_ifelse.py


示例5: testDataSet

 def testDataSet(self, dataSet_, dataLabels_):
     dataSet = T.matrix("dataSet")
     labels = T.col("labels")
     svLabels = T.col("svLabels")
     gamma = T.dscalar("gamma")
     svs = T.matrix("supportVectors")
     svAlphas = T.matrix("svAlphas")
     b = T.dscalar("b")
           
     # we need to transpose the result because the results of the per-row actions are usually columns
     errorVec = theano.scan(lambda row, realLabel : self.testDataSet_inner_(svs, row, gamma, svLabels, svAlphas, b, realLabel), sequences=[dataSet, labels])[0]
     errors = T.sum(errorVec)
     
     inputs = [dataSet, labels, svs, svLabels, gamma, svAlphas, b]
     compErrors = theano.function(inputs=inputs, outputs=errors, on_unused_input='ignore')
     
     gamma_ = 1/(-1*self.Training.UsedKernel[1]**2)
     numErrors = compErrors(dataSet_, dataLabels_, self.Training.SupportVectors, self.Training.SVLabels, gamma_, self.Training.Alphas[self.Training.SVIndices], self.Training.B.item(0))
     return float(numErrors) / float(dataSet_.shape[0])
开发者ID:martinmeinke,项目名称:ipml,代码行数:19,代码来源:HybridSVMClassifier.py


示例6: __init__

    def __init__(self, numpy_rng, theano_rng = None, first_layer_type = 'bernoulli', mean_doc_size = 1, n_ins = 784, mid_layer_sizes=[200], inner_code_length = 10):
        """This class is made to support a variable number of layers. 

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial 
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is 
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input (and autoencoder output, y) of the SMH 

        :type n_code_length: int
        :param n_code_length: how many codes to squash down to in the middle layer
        """
        
        self.first_layer_type = first_layer_type;
        self.mean_doc_size = mean_doc_size;
        
        self.sigmoid_layers = []
        self.rbm_layers     = []
        self.params         = []
        
        self.n_ins = n_ins
        self.inner_code_length = inner_code_length
        self.mid_layer_sizes = list(mid_layer_sizes)
        
        self.numpy_rng = numpy_rng
        self.theano_rng = RandomStreams(numpy_rng.randint(2**30))
     
        # allocate symbolic variables for the data
        
        if (theano.config.floatX == "float32"):
            self.x  = T.matrix('x')  #
            self.x_sums = T.col('x_sums')
            self.y  = T.matrix('y') # the output (after finetuning) should /look the same as the input
        else:
            if (theano.config.floatX == "float64"):
                self.x  = T.dmatrix('x')  #
                self.x_sums = T.dcol('x_sums')
                self.y  = T.dmatrix('y') # the output (after finetuning) should look the same as the input
            else:        
                raise Exception #not sure whats up here..

        # The SMH is an MLP, for which all weights of intermediate layers are shared with a
        # different RBM.  We will first construct the SMH as a deep multilayer perceptron, and
        # when constructing each sigmoidal layer we also construct an RBM that shares weights
        # with that layer. During pretraining we will train these RBMs (which will lead
        # to chainging the weights of the MLP as well) During finetuning we will finish
        # training the SMH by doing stochastic gradient descent on the MLP.

        self.init_layers()
开发者ID:utunga,项目名称:hashmapd,代码行数:54,代码来源:SMH.py


示例7: __init__

    def __init__(self, input_width, input_height, output_dim, num_frames, batch_size):
        self.input_width = input_width
        self.input_height = input_height
        self.output_dim = output_dim
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.gamma = 0.99 # discount factor
        self.rho = 0.99
        self.lr = 0.00025 # learning rate
        self.momentum = 0.95
        self.freeze_targets = True

        self.l_out = self.build_network(input_width, input_height, output_dim, num_frames, batch_size)
        if self.freeze_targets:
            self.next_l_out = self.build_network(input_width, input_height, output_dim, num_frames, batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
#        terminals = T.icol('terminals')

        self.states_shared = theano.shared(np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX))
        self.next_states_shared = theano.shared(np.zeros((batch_size, num_frames, input_height, input_width), dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(np.zeros((batch_size, 1), dtype=theano.config.floatX), broadcastable=(False,True))
        self.actions_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False,True))
#        self.terminals_shared = theano.shared(np.zeros((batch_size, 1), dtype='int32'), broadcastable=(False,True))

        q_vals = self.l_out.get_output(states / 255.0)
        if self.freeze_targets:
            next_q_vals = self.next_l_out.get_output(next_states / 255.0)
        else:
            next_q_vals = self.l_out.get_output(next_states / 255.0)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        target = rewards + self.gamma * T.max(next_q_vals, axis=1, keepdims=True)
        diff = target - q_vals[T.arange(batch_size), actions.reshape((-1,))].reshape((-1,1))
        loss = T.mean(diff ** 2)

        params = lasagne.layers.helper.get_all_params(self.l_out)
        givens = {
            states: self.states_shared,
            next_states: self.next_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
#            terminals: self.terminals_shared
        }
        if self.momentum > 0:
            updates = rmsprop_nesterov(loss, params, self.lr, self.rho, self.momentum, 1e-2)
        else:
            updates = lasagne.updates.rmsprop(loss, params, self.lr, self.rho, 1e-6)
        self._train = theano.function([], [loss, q_vals], updates=updates, givens=givens)
        self._q_vals = theano.function([], q_vals, givens={ states: self.states_shared })
开发者ID:npow,项目名称:deep_q_rl,代码行数:54,代码来源:network.py


示例8: build_finetune_functions

 def build_finetune_functions(self, batch_size, learning_rate):
     '''Generates a function `train` that implements one step of finetuning, a function
     `validate` that computes the error on a batch from the validation set, and a function
     `test` that computes the error on a batch from the testing set
     
     :type batch_size: int
     :param batch_size: size of a minibatch
     :type learning_rate: float
     :param learning_rate: learning rate used during finetune stage
     '''
     
     train_set_x = T.matrix('train_set_x')
     train_set_x_sums = T.col('train_set_x_sums')
     valid_set_x = T.matrix('valid_set_x')
     valid_set_x_sums = T.col('valid_set_x_sums')
     test_set_x = T.matrix('test_set_x')
     test_set_x_sums = T.col('test_set_x_sums')
     
     # compute the gradients with respect to the model parameters
     gparams = T.grad(self.finetune_cost, self.params)
     
     # compute list of fine-tuning updates
     updates = {}
     for param, gparam in zip(self.params, gparams):
         updates[param] = param - gparam*learning_rate
     
     train_fn = theano.function(inputs = [train_set_x, train_set_x_sums], 
           outputs =  self.finetune_cost, 
           updates = updates,
           givens  = { self.x : train_set_x,
                       self.x_sums : train_set_x_sums })
     
     valid_score_i = theano.function([valid_set_x, valid_set_x_sums], self.finetune_cost,
           givens  = { self.x : valid_set_x,
                       self.x_sums : valid_set_x_sums })
     
     test_score_i = theano.function([test_set_x, test_set_x_sums], self.finetune_cost,
           givens  = { self.x : test_set_x,
                       self.x_sums : test_set_x_sums })
     
     return train_fn, valid_score_i, test_score_i
开发者ID:utunga,项目名称:hashmapd,代码行数:41,代码来源:SMH.py


示例9: neural_tensor_network

def neural_tensor_network():
    # tensor params
    subj = T.col('e_1')
    targets = T.matrix('e_2')
    W = T.tensor3('W')

    # neural net params
    u = T.col('u')
    V = T.matrix('V')
    b = T.col('b')

    # tensor
    h = subj.T.dot(W).dot(targets)

    # neural net
    d = subj.shape[0]
    V_subj = V[:, :d].dot(subj)
    V_targ = V[:, d:].dot(targets)

    activations = T.tanh(h + V_subj + V_targ + b)
    score = u.T.dot(activations).reshape((-1, 1))

    margins = score[0] - score[1:]
    cost = T.min(T.concatenate((T.ones_like(margins), margins), axis=1), axis=1).mean()

    gsubj, gtargets, gW, gu, gV, gb = T.grad(cost, [subj, targets, W, u, V, b])

    print 'Compiling NTN score'
    score = theano.function([subj, W, targets, u, V, b], score, name='NTN Score',
                            mode='FAST_RUN')

    print 'Compiling NTN fprop'
    fprop = theano.function([subj, W, targets, u, V, b], cost, name='NTN fprop',
                            mode='FAST_RUN')

    print 'Compiling NTN bprop'
    bprop = theano.function([subj, W, targets, u, V, b],
                            outputs=[gsubj, gW, gtargets, gu, gV, gb],
                            name='NTN bprop', mode='FAST_RUN')

    return {'score': score, 'fprop': fprop, 'bprop': bprop}
开发者ID:eshijia,项目名称:traversing_knowledge_graphs,代码行数:41,代码来源:models.py


示例10: setup_theano

    def setup_theano(self):
        # for numpy optimization
        oneCol = T.col("oneCol")
        pi_t = T.col("pi_t")
        z_t = T.col("z_t")
        z_t1 = z_t.reshape((self.numKeypoints, 2))
        pts = T.concatenate((z_t1, oneCol), axis=1)
        A_t_ = T.matrix("A_t_")
        r_t_ = T.dot(A_t_, pts.transpose()).transpose()
        r_t1_ = r_t_[:, 0:2].reshape((2 * self.numKeypoints, 1))

        diff_ = pi_t * (r_t1_ - self.mu)
        difft_ = diff_.reshape((1, 2 * self.numKeypoints))

        cost_1 = T.dot(difft_, diff_)
        # cost_1 = theano.printing.Print('cost is:')(cost_1)
        cost_ = T.max(cost_1)

        A_t_grad_ = T.grad(cost=cost_, wrt=A_t_)
        A_t_grad_ = T.basic.set_subtensor(A_t_grad_[2, :], 0)
        self.cost = theano.function(inputs=[A_t_, pi_t, z_t, oneCol], outputs=[cost_, A_t_grad_])
开发者ID:YangXS,项目名称:lisa_emotiw,代码行数:21,代码来源:faceAlign.py


示例11: pretraining_functions

    def pretraining_functions(self, batch_size, method, pretrain_lr, k):
        ''' Generates a list of functions, for performing one step of gradient descent at a
        given layer. The function will require as input a minibatch of data, and to train an
        RBM you just need to iterate, calling the corresponding function on all minibatches.
        
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :type method: string
        :param method: type of Gibbs sampling to perform: 'cd' (default) or 'pcd'
        :type k: int
        :param k: number of Gibbs steps to do in CD-k / PCD-k
        ;type finetune_lr: float
        ;param finetune_lr: the 'learning rate' to use during finetuning phase
        '''

        learning_rate = T.scalar('lr')    # learning rate to use
        #learning_rate.value = pretrain_lr

        # i *think* the following is equivalent to above.. doing this because i can't see where lr gets a value at all
        #learning_rate = theano.shared(pretrain_lr, 'learning_rate')
        train_set_x = T.matrix('train_set_x')
        train_set_x_sums = T.col('train_set_x_sums')

        pretrain_fns = []
        for rbm in self.rbm_layers:
            if method == 'pcd':
                # initialize storage for the persistent chain (state = hidden layer of chain)
                persistent_chain = theano.shared(numpy.zeros((batch_size,rbm.n_hidden),dtype=theano.config.floatX))
                # get the cost and the gradient corresponding to one step of PCD-k
                cost,updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=k)
            else:
                # default = use CD instead
                cost,updates = rbm.get_cost_updates(lr=learning_rate)
            
            # compile the theano function    
            fn = theano.function(inputs = [train_set_x,train_set_x_sums,
                        theano.Param(learning_rate, default = 0.1)],
                    outputs = cost,
                    updates = updates,
                    givens  = {self.x:train_set_x,
                               self.x_sums:train_set_x_sums}
                    # uncomment the following line to perform debugging:
                    #   ,mode=theano.compile.debugmode.DebugMode(stability_patience=5)
                    )
            
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
开发者ID:utunga,项目名称:hashmapd,代码行数:49,代码来源:SMH.py


示例12: transE_model

def transE_model():
    '''
        Note X_S is a column and X_T is a matrix so that broadcasting occurs
        across the columns of X_T (this allows batching X_T with negatives,
        for example.
    '''
    # construct theano expression graph
    X_s = T.col('X_s')
    W = T.matrix('W')
    X_t = T.matrix('X_t')

    rels = W[:, :, None].transpose(1, 0, 2)

    # Computes x_{r_1} + x_{r_{2}} + ... + x_{r_n} - X_{t}
    results, updates = theano.scan(fn=lambda rel, v: rel + v,
                                   outputs_info=-X_t, sequences=[rels])

    # score is always a column vector
    score = T.sum((X_s + results[-1]) ** 2, axis=0).reshape((-1, 1))

    margins = 1. + score[0] - score[1:]

    # zero out negative entries
    pos_parts = margins * (margins > 0)

    # we are using online Maximizer, so the objective is negated
    cost = -pos_parts.mean()

    gX_s, gW, gX_t = T.grad(cost, [X_s, W, X_t])

    print 'Compiling TransE score'
    # return negative score since this is a ranking
    score = theano.function([X_s, W, X_t], -score, name='transE Score',
                            mode='FAST_RUN')
    score.trust_input = True

    print 'Compiling TransE fprop'
    fprop = theano.function([X_s, W, X_t], cost, name='transE fprop',
                            mode='FAST_RUN')
    fprop.trust_input = True

    print 'Compiling TransE bprop'
    bprop = theano.function([X_s, W, X_t],
                            outputs=[gX_s, gW, gX_t],
                            name='transE bprop', mode='FAST_RUN')
    bprop.trust_input = True

    return {'score': score, 'fprop': fprop, 'bprop': bprop}
开发者ID:eshijia,项目名称:traversing_knowledge_graphs,代码行数:48,代码来源:models.py


示例13: test_wrong_broadcast

    def test_wrong_broadcast(self):
        a = tt.col()
        increment = tt.vector()

        # These symbolic graphs legitimate, as long as increment has exactly
        # one element. So it should fail at runtime, not at compile time.
        rng = numpy.random.RandomState(utt.fetch_seed())

        def rng_randX(*shape):
            return rng.rand(*shape).astype(theano.config.floatX)

        for op in (tt.set_subtensor, tt.inc_subtensor):
            for base in (a[:], a[0]):
                out = op(base, increment)
                f = theano.function([a, increment], out)
                # This one should work
                f(rng_randX(3, 1), rng_randX(1))
                # These ones should not
                self.assertRaises(ValueError, f, rng_randX(3, 1), rng_randX(2))
                self.assertRaises(ValueError, f, rng_randX(3, 1), rng_randX(3))
                self.assertRaises(ValueError, f, rng_randX(3, 1), rng_randX(0))
开发者ID:igul222,项目名称:Theano,代码行数:21,代码来源:test_inc_subtensor.py


示例14: __init__

    def __init__(self, env, args, rng, name = "DQNLasagne"):
        """ Initializes a network based on the Lasagne Theano framework.

        Args:
            env (AtariEnv): The envirnoment in which the agent actuates.
            args (argparse.Namespace): All settings either with a default value or set via command line arguments.
            rng (mtrand.RandomState): Initialized Mersenne Twister pseudo-random number generator.
            name (str): The name of the network object.

        Note:
            This function should always call the base class first to initialize
            the common values for the networks.
        """
        _logger.info("Initialize object of type " + str(type(self).__name__))
        super(DQNLasagne, self).__init__(env, args, rng, name)
        self.input_shape = (self.batch_size, self.sequence_length, args.frame_width, args.frame_height)
        self.dummy_batch = np.zeros(self.input_shape, dtype=np.uint8)
        lasagne.random.set_rng(self.rng)

        self.network = self._create_layer()

        # TODO: Load weights from pretrained network?!
        if not self.args.load_weights == None:
            self.load_weights(self.args.load_weights)

        if self.target_update_frequency > 0:
            self.target_network = self._create_layer()
            self._copy_theta()

        states = T.tensor4('states')
        followup_states = T.tensor4('followup_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        self.states_shared = theano.shared(
                np.zeros(self.input_shape, dtype=theano.config.floatX)
        )
        self.followup_states_shared = theano.shared(
                np.zeros(self.input_shape, dtype=theano.config.floatX)
        )
        self.rewards_shared = theano.shared(
                np.zeros((self.batch_size, 1), dtype=theano.config.floatX),
                broadcastable=(False, True)
        )
        self.actions_shared = theano.shared(
                np.zeros((self.batch_size, 1), dtype='int32'),
                broadcastable=(False, True)
        )
        self.terminals_shared = theano.shared(
                np.zeros((self.batch_size, 1), dtype='int32'),
                broadcastable=(False, True)
        )

        qvalues = lasagne.layers.get_output(
                self.network,
                self._prepare_network_input(states)
        )

        if self.target_update_frequency > 0:
            qvalues_followup_states = lasagne.layers.get_output(
                    self.target_network,
                    self._prepare_network_input(followup_states)
            )
        else:
            qvalues_followup_states = lasagne.layers.get_output(
                    self.network,
                    self._prepare_network_input(followup_states)
            )
            qvalues_followup_states = theano.gradient.disconnected_grad(qvalues_followup_states)

        targets = (rewards +
                (T.ones_like(terminals) - terminals) *
                self.discount_rate *
                T.max(qvalues_followup_states, axis=1, keepdims=True)
        )
        errors = targets - qvalues[
                T.arange(self.batch_size),
                actions.reshape((-1,))].reshape((-1, 1))

        if self.clip_error > 0:
            quadratic_part = T.minimum(abs(errors), self.clip_error)
            linear_part = abs(errors) - quadratic_part
            cost_function = T.sum(0.5 * quadratic_part ** 2 + self.clip_error * linear_part)
        else:
            cost_function = T.sum(0.5 * errors ** 2)

        self.params = lasagne.layers.helper.get_all_params(self.network)
        self.observations = {
            states: self.states_shared,
            followup_states: self.followup_states_shared,
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }

        self._set_optimizer(cost_function)

        if self.momentum > 0:
            self.optimizer = lasagne.updates.apply_momentum(
#.........这里部分代码省略.........
开发者ID:maurolopes,项目名称:deepatari,代码行数:101,代码来源:dqnlasagne.py


示例15: __init__

    def __init__(self, input_width, input_height, avail_actions, num_actions,
                 num_frames, discount, learning_rate, rho,
                 rms_epsilon, momentum, clip_delta, freeze_interval,
                 batch_size, network_type, update_rule,
                 batch_accumulator, rng, train_all, input_scale=255.0):

        self.input_width = input_width
        self.input_height = input_height
        self.avail_actions = avail_actions
        self.num_actions = num_actions
        self.num_frames = num_frames
        self.batch_size = batch_size
        self.discount = discount
        self.rho = rho
        self.lr = learning_rate
        self.rms_epsilon = rms_epsilon
        self.momentum = momentum
        self.clip_delta = clip_delta
        self.freeze_interval = freeze_interval
        self.rng = rng
        self.train_all = train_all

        lasagne.random.set_rng(self.rng)

        self.update_counter = 0

        print "num_actions: " + str(num_actions)
        self.l_out = self.build_network(network_type, input_width, input_height,
                                        num_actions, num_frames, batch_size)
        if self.freeze_interval > 0:
            self.next_l_out = self.build_network(network_type, input_width,
                                                 input_height, num_actions,
                                                 num_frames, batch_size)
            self.reset_q_hat()

        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        self.states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((batch_size, num_frames, input_height, input_width),
                     dtype=theano.config.floatX))

        self.rewards_shared = theano.shared(
            np.zeros((batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            np.zeros((batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        q_vals = lasagne.layers.get_output(self.l_out, states / input_scale)

        if self.freeze_interval > 0:
            next_q_vals = lasagne.layers.get_output(self.next_l_out,
                                                    next_states / input_scale)
        else:
            next_q_vals = lasagne.layers.get_output(self.l_out,
                                                    next_states / input_scale)
            next_q_vals = theano.gradient.disconnected_grad(next_q_vals)

        target = (rewards +
                  (T.ones_like(terminals) - terminals) *
                  self.discount * T.max(next_q_vals, axis=1, keepdims=True))
        diff = target - q_vals[T.arange(batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

        if self.clip_delta > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            #
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(diff), self.clip_delta)
            linear_part = abs(diff) - quadratic_part
            loss = 0.5 * quadratic_part ** 2 + self.clip_delta * linear_part
        else:
            loss = 0.5 * diff ** 2

        if batch_accumulator == 'sum':
            loss = T.sum(loss)
        elif batch_accumulator == 'mean':
            loss = T.mean(loss)
        else:
            raise ValueError("Bad accumulator: {}".format(batch_accumulator))

#.........这里部分代码省略.........
开发者ID:cowhi,项目名称:deep_q_rl,代码行数:101,代码来源:q_network.py


示例16: __init__

    def __init__(self, batchSize, numFrames, inputHeight, inputWidth, numActions, 
        discountRate, learningRate, rho, rms_epsilon, momentum, networkUpdateDelay, useSARSAUpdate, kReturnLength,
        networkType = "conv", updateRule = "deepmind_rmsprop", batchAccumulator = "sum", clipDelta = 1.0, inputScale = 255.0):
        
        self.batchSize          = batchSize
        self.numFrames          = numFrames
        self.inputWidth         = inputWidth
        self.inputHeight        = inputHeight
        self.inputScale         = inputScale
        self.numActions         = numActions
        self.discountRate       = discountRate
        self.learningRate       = learningRate
        self.rho                = rho
        self.rms_epsilon        = rms_epsilon
        self.momentum           = momentum
        self.networkUpdateDelay = networkUpdateDelay
        self.useSARSAUpdate     = useSARSAUpdate
        self.kReturnLength      = kReturnLength
        self.networkType        = networkType
        self.updateRule         = updateRule
        self.batchAccumulator   = batchAccumulator
        self.clipDelta          = clipDelta
        self.updateCounter      = 0

        states     = T.tensor4("states")
        nextStates = T.tensor4("nextStates")
        rewards    = T.col("rewards")
        actions    = T.icol("actions")
        nextActions= T.icol("nextActions")
        terminals  = T.icol("terminals")

        self.statesShared      = theano.shared(np.zeros((self.batchSize, self.numFrames, self.inputHeight, self.inputWidth), dtype=theano.config.floatX))
        self.nextStatesShared  = theano.shared(np.zeros((self.batchSize, self.numFrames, self.inputHeight, self.inputWidth), dtype=theano.config.floatX))
        self.rewardsShared     = theano.shared(np.zeros((self.batchSize, 1), dtype=theano.config.floatX), broadcastable=(False, True))
        self.actionsShared     = theano.shared(np.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True))
        self.nextActionsShared = theano.shared(np.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True))
        self.terminalsShared   = theano.shared(np.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True))

        self.qValueNetwork  = DeepNetworks.buildDeepQNetwork(
            self.batchSize, self.numFrames, self.inputHeight, self.inputWidth, self.numActions, self.networkType)

        qValues = lasagne.layers.get_output(self.qValueNetwork, states / self.inputScale)

        if self.networkUpdateDelay > 0:
            self.nextQValueNetwork = DeepNetworks.buildDeepQNetwork(
                self.batchSize, self.numFrames, self.inputHeight, self.inputWidth, self.numActions, self.networkType)
            self.resetNextQValueNetwork()
            nextQValues = lasagne.layers.get_output(self.nextQValueNetwork, nextStates / self.inputScale)

        else:
            nextQValues = lasagne.layers.get_output(self.qValueNetwork, nextStates / self.inputScale)
            nextQValues = theano.gradient.disconnected_grad(nextQValues)


        if self.useSARSAUpdate:
            target = rewards + terminals * (self.discountRate ** self.kReturnLength) * nextQValues[T.arange(self.batchSize), nextActions.reshape((-1,))].reshape((-1, 1))
        else:
            target = rewards + terminals * (self.discountRate ** self.kReturnLength) * T.max(nextQValues, axis = 1, keepdims = True)

        targetDifference = target - qValues[T.arange(self.batchSize), actions.reshape((-1,))].reshape((-1, 1))


        quadraticPart = T.minimum(abs(targetDifference), self.clipDelta)
        linearPart = abs(targetDifference) - quadraticPart

        # if self.clipDelta > 0:
        #     targetDifference = targetDifference.clip(-1.0 * self.clipDelta, self.clipDelta)

        if self.batchAccumulator == "sum":
            # loss = T.sum(targetDifference ** 2)
            loss = T.sum(0.5 * quadraticPart ** 2 + self.clipDelta * linearPart)
        elif self.batchAccumulator == "mean":
            # loss = T.mean(targetDifference ** 2)
            loss = T.mean(0.5 * quadraticPart ** 2 + self.clipDelta * linearPart)
        else:
            raise ValueError("Bad Network Accumulator. {sum, mean} expected")


        networkParameters = lasagne.layers.helper.get_all_params(self.qValueNetwork)

        if self.updateRule == "deepmind_rmsprop":
            updates = DeepNetworks.deepmind_rmsprop(loss, networkParameters, self.learningRate, self.rho, self.rms_epsilon)
        elif self.updateRule == "rmsprop":
            updates = lasagne.updates.rmsprop(loss, networkParameters, self.learningRate, self.rho, self.rms_epsilon)
        elif self.updateRule == "sgd":
            updates = lasagne.updates.sgd(loss, networkParameters, self.learningRate)
        else:
            raise ValueError("Bad update rule. {deepmind_rmsprop, rmsprop, sgd} expected")

        if self.momentum > 0:
            updates.lasagne.updates.apply_momentum(updates, None, self.momentum)

        lossGivens = {
            states: self.statesShared,
            nextStates: self.nextStatesShared,
            rewards:self.rewardsShared,
            actions: self.actionsShared,
            nextActions: self.nextActionsShared,
            terminals: self.terminalsShared
        }
#.........这里部分代码省略.........
开发者ID:Mog333,项目名称:DeepRL,代码行数:101,代码来源:DeepQNetwork.py


示例17: __init__

    def __init__(self, num_actions):
        
        # remember parameters
        self.num_actions = num_actions
        self.batch_size = BATCH_SIZE
        self.discount_rate = DISCOUNT_RATE
        self.history_length = HISTORY_LENGTH
        self.screen_dim = DIMS
        self.img_height = SCREEN_HEIGHT
        self.img_width = SCREEN_WIDTH
        self.clip_error = CLIP_ERROR
        self.input_color_scale = COLOR_SCALE

        self.target_steps = TARGET_STEPS
        self.train_iterations = TRAIN_STEPS
        self.train_counter = 0
        self.momentum = MOMENTUM
        self.update_rule = UPDATE_RULE
        self.learning_rate = LEARNING_RATE
        self.rms_decay = RMS_DECAY
        self.rms_epsilon = RMS_EPSILON        
        
        self.rng = np.random.RandomState(RANDOM_SEED)

        # set seed
        lasagne.random.set_rng(self.rng)

        # prepare tensors once and reuse them
        states = T.tensor4('states')
        next_states = T.tensor4('next_states')
        rewards = T.col('rewards')
        actions = T.icol('actions')
        # terminals are bool for our case
        terminals = T.bcol('terminals')

        # create shared theano variables
        self.states_shared = theano.shared(
            np.zeros((self.batch_size, self.history_length, self.img_height, self.img_width),
                     dtype=theano.config.floatX))

        self.next_states_shared = theano.shared(
            np.zeros((self.batch_size, self.history_length, self.img_height, self.img_width),
                     dtype=theano.config.floatX))

        # !broadcast ?
        self.rewards_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype=theano.config.floatX),
            broadcastable=(False, True))

        self.actions_shared = theano.shared(
            np.zeros((self.batch_size, 1), dtype='int32'),
            broadcastable=(False, True))

        self.terminals_shared = theano.shared(
            #np.zeros((self.batch_size, 1), dtype='int32'),
            np.zeros((self.batch_size, 1), dtype='int8'),
            broadcastable=(False, True))

        # can add multiple nets here
        self.l_primary = self.build_network()

        if self.target_steps > 0:
            self.l_secondary = self.build_network()
            self.copy_to_secondary()

        
        """
        # input scale i.e. division can be applied to input directly also to normalize
        """

        # define output symbols
        q_vals = lasagne.layers.get_output(self.l_primary, states / self.input_color_scale)
        
        if self.target_steps > 0:
            q_vals_secondary = lasagne.layers.get_output(self.l_secondary, next_states / self.input_color_scale)
        else:
            # why this ?
            q_vals_secondary = lasagne.layers.get_output(self.l_primary, next_states / self.input_color_scale)
            q_vals_secondary = theano.gradient.disconnected_grad(q_vals_secondary)

        # target = r + max
        target = (rewards + (T.ones_like(terminals) - terminals) * self.discount_rate * T.max(q_vals_secondary, axis=1, keepdims=True))
        
        """
        # check what this does
        """
        diff = target - q_vals[T.arange(self.batch_size),
                               actions.reshape((-1,))].reshape((-1, 1))

        # print shape ? 

        if self.clip_error > 0:
            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            # 
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
#.........这里部分代码省略.........
开发者ID:hercky,项目名称:a3c,代码行数:101,代码来源:network.py


示例18: __init__

该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tensor.concatenate函数代码示例发布时间:2022-05-27
下一篇:
Python tensor.clip函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap