• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python torch.tensor函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中torch.tensor函数的典型用法代码示例。如果您正苦于以下问题:Python tensor函数的具体用法?Python tensor怎么用?Python tensor使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了tensor函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_gather_extended_gold_tokens

    def test_gather_extended_gold_tokens(self):
        vocab_size = self.model._target_vocab_size
        end_index = self.model._end_index
        pad_index = self.model._pad_index
        oov_index = self.model._oov_index
        tok_index = 6  # some other arbitrary token
        assert tok_index not in [end_index, pad_index, oov_index]

        # first sentence tokens:
        #  1: oov but not copied
        #  2: not oov and not copied
        #  3: not copied
        #  4: not copied
        # second sentence tokens:
        #  1: not oov and copied
        #  2: oov and copied
        #  3: not copied
        #  4: not copied

        # shape: (batch_size, target_sequence_length)
        target_tokens = torch.tensor([[oov_index, tok_index, end_index, pad_index],
                                      [tok_index, oov_index, tok_index, end_index]])
        # shape: (batch_size, trimmed_source_length)
        source_token_ids = torch.tensor([[0, 1, 2, 3],
                                         [0, 1, 0, 2]])
        # shape: (batch_size, target_sequence_length)
        target_token_ids = torch.tensor([[4, 5, 6, 7],
                                         [1, 0, 3, 4]])
        # shape: (batch_size, target_sequence_length)
        result = self.model._gather_extended_gold_tokens(target_tokens, source_token_ids, target_token_ids)
        # shape: (batch_size, target_sequence_length)
        check = np.array([[oov_index, tok_index, end_index, pad_index],
                          [tok_index, vocab_size, tok_index, end_index]])
        np.testing.assert_array_equal(result.numpy(), check)
开发者ID:apmoore1,项目名称:allennlp,代码行数:34,代码来源:copynet_seq2seq_test.py


示例2: calc_loss

def calc_loss(batch, net, tgt_net, gamma, device="cpu", save_prefix=None):
    states, actions, rewards, dones, next_states = common.unpack_batch(batch)
    batch_size = len(batch)

    states_v = torch.tensor(states).to(device)
    actions_v = torch.tensor(actions).to(device)
    next_states_v = torch.tensor(next_states).to(device)

    # next state distribution
    next_distr_v, next_qvals_v = tgt_net.both(next_states_v)
    next_actions = next_qvals_v.max(1)[1].data.cpu().numpy()
    next_distr = tgt_net.apply_softmax(next_distr_v).data.cpu().numpy()

    next_best_distr = next_distr[range(batch_size), next_actions]
    dones = dones.astype(np.bool)

    # project our distribution using Bellman update
    proj_distr = common.distr_projection(next_best_distr, rewards, dones, Vmin, Vmax, N_ATOMS, gamma)

    # calculate net output
    distr_v = net(states_v)
    state_action_values = distr_v[range(batch_size), actions_v.data]
    state_log_sm_v = F.log_softmax(state_action_values, dim=1)
    proj_distr_v = torch.tensor(proj_distr).to(device)

    if save_prefix is not None:
        pred = F.softmax(state_action_values, dim=1).data.cpu().numpy()
        save_transition_images(batch_size, pred, proj_distr, next_best_distr, dones, rewards, save_prefix)

    loss_v = -state_log_sm_v * proj_distr_v
    return loss_v.sum(dim=1).mean()
开发者ID:dhaopku,项目名称:Deep-Reinforcement-Learning-Hands-On,代码行数:31,代码来源:07_dqn_distrib.py


示例3: test_factory

 def test_factory(self):
     default_size = torch.Size([1, 3])
     size = torch.Size([3, 3])
     for include_size in [True, False]:
         for use_tensor_idx in [True, False]:
             for use_tensor_val in [True, False]:
                 for use_cuda in ([False] if not torch.cuda.is_available() else [True, False]):
                     # have to include size with cuda sparse tensors
                     include_size = include_size or use_cuda
                     dtype = torch.float64
                     long_dtype = torch.int64
                     device = torch.device('cpu') if not use_cuda else torch.device(torch.cuda.device_count() - 1)
                     indices = torch.tensor(([0], [2]), dtype=long_dtype) if use_tensor_idx else ([0], [2])
                     values = torch.tensor([1.], dtype=dtype) if use_tensor_val else 1.
                     if include_size:
                         sparse_tensor = torch.sparse_coo_tensor(indices, values, size, dtype=dtype,
                                                                 device=device, requires_grad=True)
                     else:
                         sparse_tensor = torch.sparse_coo_tensor(indices, values, dtype=dtype,
                                                                 device=device, requires_grad=True)
                     self.assertEqual(indices, sparse_tensor._indices())
                     self.assertEqual(values, sparse_tensor._values())
                     self.assertEqual(size if include_size else default_size, sparse_tensor.size())
                     self.assertEqual(dtype, sparse_tensor.dtype)
                     if use_cuda:
                         self.assertEqual(device, sparse_tensor._values().device)
                     self.assertEqual(True, sparse_tensor.requires_grad)
开发者ID:gtgalone,项目名称:pytorch,代码行数:27,代码来源:test_sparse.py


示例4: model

 def model():
     p2 = torch.tensor(torch.ones(2) / 2)
     p3 = torch.tensor(torch.ones(3) / 3)
     x2 = pyro.sample("x2", dist.OneHotCategorical(p2))
     x3 = pyro.sample("x3", dist.OneHotCategorical(p3))
     assert x2.shape == torch.Size([2]) + iarange_shape + p2.shape
     assert x3.shape == torch.Size([3, 1]) + iarange_shape + p3.shape
开发者ID:lewisKit,项目名称:pyro,代码行数:7,代码来源:test_valid_models.py


示例5: generate_translation

def generate_translation(encoder, decoder, sentence, max_length, target_lang, search="greedy", k = None):
    """ 
    @param max_length: the max # of words that the decoder can return
    @returns decoded_words: a list of words in target language
    """    
    with torch.no_grad():
        input_tensor = sentence
        input_length = sentence.size()[1]
        
        # encode the source sentence
        encoder_hidden = encoder.init_hidden(1)
        # input_tensor 1 by 12 
        # 
        encoder_outputs, encoder_hidden = encoder(input_tensor.view(1, -1),torch.tensor([input_length]))
        # start decoding
        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS
        decoder_hidden = encoder_hidden
        decoded_words = []
        
        if search == 'greedy':
            decoded_words = greedy_search_batch(decoder, decoder_input, encoder_outputs, decoder_hidden, max_length)
        elif search == 'beam':
            if k == None:
                k = 2 # since k = 2 preforms badly
            decoded_words = beam_search(decoder, decoder_input, encoder_outputs, decoder_hidden, max_length, k, target_lang) 

        return decoded_words
开发者ID:vwrj,项目名称:neural_machine_translation,代码行数:27,代码来源:V2-Attention-Vish.py


示例6: test_index_setitem_bools_slices

    def test_index_setitem_bools_slices(self):
        true = torch.tensor(1, dtype=torch.uint8)
        false = torch.tensor(0, dtype=torch.uint8)

        tensors = [Variable(torch.randn(2, 3)), torch.tensor(3)]

        for a in tensors:
            # prefix with a 1,1, to ensure we are compatible with numpy which cuts off prefix 1s
            # (some of these ops already prefix a 1 to the size)
            neg_ones = torch.ones_like(a) * -1
            neg_ones_expanded = neg_ones.unsqueeze(0).unsqueeze(0)
            a[True] = neg_ones_expanded
            self.assertEqual(a, neg_ones)
            a[False] = 5
            self.assertEqual(a, neg_ones)
            a[true] = neg_ones_expanded * 2
            self.assertEqual(a, neg_ones * 2)
            a[false] = 5
            self.assertEqual(a, neg_ones * 2)
            a[None] = neg_ones_expanded * 3
            self.assertEqual(a, neg_ones * 3)
            a[...] = neg_ones_expanded * 4
            self.assertEqual(a, neg_ones * 4)
            if a.dim() == 0:
                with self.assertRaises(RuntimeError):
                    a[:] = neg_ones_expanded * 5
开发者ID:MaheshBhosale,项目名称:pytorch,代码行数:26,代码来源:test_indexing.py


示例7: diamond_guide

def diamond_guide(dim):
    p0 = torch.tensor(math.exp(-0.70), requires_grad=True)
    p1 = torch.tensor(math.exp(-0.43), requires_grad=True)
    pyro.sample("a1", dist.Bernoulli(p0))
    for i in pyro.irange("irange", dim):
        pyro.sample("b{}".format(i), dist.Bernoulli(p1))
    pyro.sample("c1", dist.Bernoulli(p0))
开发者ID:lewisKit,项目名称:pyro,代码行数:7,代码来源:test_compute_downstream_costs.py


示例8: perform_val

def perform_val(multi_gpu, device, embedding_size, batch_size, backbone, carray, issame, nrof_folds = 10, tta = True):
    if multi_gpu:
        backbone = backbone.module # unpackage model from DataParallel
        backbone = backbone.to(device)
    else:
        backbone = backbone.to(device)
    backbone.eval() # switch to evaluation mode

    idx = 0
    embeddings = np.zeros([len(carray), embedding_size])
    with torch.no_grad():
        while idx + batch_size <= len(carray):
            batch = torch.tensor(carray[idx:idx + batch_size][:, [2, 1, 0], :, :])
            if tta:
                fliped = hflip_batch(batch)
                emb_batch = backbone(batch.to(device)).cpu() + backbone(fliped.to(device)).cpu()
                embeddings[idx:idx + batch_size] = l2_norm(emb_batch)
            else:
                embeddings[idx:idx + batch_size] = backbone(batch.to(device)).cpu()
            idx += batch_size
        if idx < len(carray):
            batch = torch.tensor(carray[idx:])
            if tta:
                fliped = hflip_batch(batch)
                emb_batch = backbone(batch.to(device)).cpu() + backbone(fliped.to(device)).cpu()
                embeddings[idx:] = l2_norm(emb_batch)
            else:
                embeddings[idx:] = backbone(batch.to(device)).cpu()

    tpr, fpr, accuracy, best_thresholds = evaluate(embeddings, issame, nrof_folds)
    buf = gen_plot(fpr, tpr)
    roc_curve = Image.open(buf)
    roc_curve_tensor = transforms.ToTensor()(roc_curve)

    return accuracy.mean(), best_thresholds.mean(), roc_curve_tensor
开发者ID:stjordanis,项目名称:face.evoLVe.PyTorch,代码行数:35,代码来源:utils.py


示例9: testDutyCycleUpdate

  def testDutyCycleUpdate(self):
    """
    Start with equal duty cycle, boost factor=0, k=4, batch size=2
    """
    x = self.x2

    expected = torch.zeros_like(x)
    expected[0, 0, 1, 0] = 1.1
    expected[0, 0, 1, 1] = 1.2
    expected[0, 1, 0, 1] = 1.2
    expected[0, 2, 1, 0] = 1.3
    expected[1, 0, 0, 0] = 1.4
    expected[1, 1, 0, 0] = 1.5
    expected[1, 1, 0, 1] = 1.6
    expected[1, 2, 1, 1] = 1.7

    dutyCycle = torch.zeros((1, 3, 1, 1))
    dutyCycle[:] = 1.0 / 3.0
    updateDutyCycleCNN(expected, dutyCycle, 2, 2)
    newDuty = torch.tensor([1.5000, 1.5000, 1.0000]) / 4.0
    diff = (dutyCycle.reshape(-1) - newDuty).abs().sum()
    self.assertLessEqual(diff, 0.001)

    dutyCycle[:] = 1.0 / 3.0
    updateDutyCycleCNN(expected, dutyCycle, 4, 4)
    newDuty = torch.tensor([0.3541667, 0.3541667, 0.2916667])
    diff = (dutyCycle.reshape(-1) - newDuty).abs().sum()
    self.assertLessEqual(diff, 0.001)
开发者ID:rhyolight,项目名称:nupic.research,代码行数:28,代码来源:k_winners_cnn_test.py


示例10: test_advance_with_all_repeats_gets_blocked

 def test_advance_with_all_repeats_gets_blocked(self):
     # all beams repeat (beam >= 1 repeat dummy scores)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47
     ngram_repeat = 3
     for batch_sz in [1, 3]:
         beam = BeamSearch(
             beam_sz, batch_sz, 0, 1, 2, 2,
             torch.device("cpu"), GlobalScorerStub(), 0, 30,
             False, ngram_repeat, set(),
             torch.randint(0, 30, (batch_sz,)), False, 0.)
         for i in range(ngram_repeat + 4):
             # predict repeat_idx over and over again
             word_probs = torch.full(
                 (batch_sz * beam_sz, n_words), -float('inf'))
             word_probs[0::beam_sz, repeat_idx] = 0
             attns = torch.randn(1, batch_sz * beam_sz, 53)
             beam.advance(word_probs, attns)
             if i <= ngram_repeat:
                 expected_scores = torch.tensor(
                             [0] + [-float('inf')] * (beam_sz - 1))\
                         .repeat(batch_sz, 1)
                 self.assertTrue(beam.topk_log_probs.equal(expected_scores))
             else:
                 self.assertTrue(
                     beam.topk_log_probs.equal(
                         torch.tensor(self.BLOCKED_SCORE)
                         .repeat(batch_sz, beam_sz)))
开发者ID:Unbabel,项目名称:OpenNMT-py,代码行数:29,代码来源:test_beam_search.py


示例11: test_optimizers

def test_optimizers(factory):
    optim = factory()

    def model(loc, cov):
        x = pyro.param("x", torch.randn(2))
        y = pyro.param("y", torch.randn(3, 2))
        z = pyro.param("z", torch.randn(4, 2).abs(), constraint=constraints.greater_than(-1))
        pyro.sample("obs_x", dist.MultivariateNormal(loc, cov), obs=x)
        with pyro.iarange("y_iarange", 3):
            pyro.sample("obs_y", dist.MultivariateNormal(loc, cov), obs=y)
        with pyro.iarange("z_iarange", 4):
            pyro.sample("obs_z", dist.MultivariateNormal(loc, cov), obs=z)

    loc = torch.tensor([-0.5, 0.5])
    cov = torch.tensor([[1.0, 0.09], [0.09, 0.1]])
    for step in range(100):
        tr = poutine.trace(model).get_trace(loc, cov)
        loss = -tr.log_prob_sum()
        params = {name: pyro.param(name).unconstrained() for name in ["x", "y", "z"]}
        optim.step(loss, params)

    for name in ["x", "y", "z"]:
        actual = pyro.param(name)
        expected = loc.expand(actual.shape)
        assert_equal(actual, expected, prec=1e-2,
                     msg='{} in correct: {} vs {}'.format(name, actual, expected))
开发者ID:lewisKit,项目名称:pyro,代码行数:26,代码来源:test_multi.py


示例12: run_episode

    def run_episode(self, episode, steps_accumulated=0):
        start_time = time.time()
        observation = self.env.reset()
        state = torch.from_numpy(observation).to(self.config.device, dtype=torch.float32).unsqueeze(0)

        for step in range(MAX_STEPS):
            action = self.agent.get_action(state, step + steps_accumulated)

            observation_next, _, done, _ = self.env.step(action.item())

            if done:
                state_next = None
                self.total_step = np.hstack((self.total_step[1:], step + 1))
                if self.is_success_episode(step):
                    reward = torch.tensor([1.0], dtype=torch.float32, device=self.config.device)
                else:
                    reward = torch.tensor([-1.0], dtype=torch.float32, device=self.config.device)

            else:
                reward = torch.tensor([0.0], dtype=torch.float32, device=self.config.device)
                state_next = torch.from_numpy(observation_next).to(self.config.device, dtype=torch.float32).unsqueeze(0)

            self.agent.observe(state, action, state_next, reward)
            if step % self.config.replay_interval == 0:
                self.agent.learn(episode)

            state = state_next

            if done:
                elapsed_time = round(time.time() - start_time, 3)
                print('episode: {0}, steps: {1}, mean steps {2}, time: {3}'.format(episode, step, self.total_step.mean(), elapsed_time))
                return step + 1

        return MAX_STEPS
开发者ID:y-kamiya,项目名称:machine-learning-samples,代码行数:34,代码来源:cartpole_rainbow.py


示例13: load

    def load(self, fdata, use_char=False, n_context=1, max_len=10):
        sentences = self.preprocess(fdata)
        x, y, char_x, lens = [], [], [], []

        for wordseq, tagseq in sentences:
            wiseq = [self.wdict.get(w, self.unk_wi) for w in wordseq]
            tiseq = [self.tdict[t] for t in tagseq]
            # 获取每个词汇的上下文
            if n_context > 1:
                x.append(self.get_context(wiseq, n_context))
            else:
                x.append(torch.tensor(wiseq, dtype=torch.long))
            y.append(torch.tensor(tiseq, dtype=torch.long))
            # 不足最大长度的部分用0填充
            char_x.append(torch.tensor([
                [self.cdict.get(c, self.unk_ci)
                 for c in w[:max_len]] + [0] * (max_len - len(w))
                for w in wordseq
            ]))
            lens.append(len(tiseq))

        x = pad_sequence(x, True)
        y = pad_sequence(y, True)
        char_x = pad_sequence(char_x, True)
        lens = torch.tensor(lens)

        if use_char:
            dataset = TensorDataset(x, y, char_x, lens)
        else:
            dataset = TensorDataset(x, y, lens)

        return dataset
开发者ID:zysite,项目名称:post,代码行数:32,代码来源:corpus.py


示例14: guide

 def guide():
     q1 = pyro.param("q1", torch.tensor(pi1, requires_grad=True))
     q2 = pyro.param("q2", torch.tensor(pi2, requires_grad=True))
     with pyro.iarange("particles", num_particles):
         y = pyro.sample("y", dist.Bernoulli(q1).expand_by([num_particles]), infer={"enumerate": enumerate1})
         if include_z:
             pyro.sample("z", dist.Normal(q2 * y + 0.10, 1.0))
开发者ID:lewisKit,项目名称:pyro,代码行数:7,代码来源:test_enum.py


示例15: __init__

 def __init__(self, mean, std):
     super(Normalization, self).__init__()
     # .view the mean and std to make them [C x 1 x 1] so that they can
     # directly work with image Tensor of shape [B x C x H x W].
     # B is batch size. C is number of channels. H is height and W is width.
     self.mean = torch.tensor(mean).view(-1, 1, 1)
     self.std = torch.tensor(std).view(-1, 1, 1)
开发者ID:maranemil,项目名称:howto,代码行数:7,代码来源:torch_style_stransfer_cpu.py


示例16: model

 def model(num_particles):
     with pyro.iarange("particles", num_particles):
         q3 = pyro.param("q3", torch.tensor(pi3, requires_grad=True))
         q4 = pyro.param("q4", torch.tensor(0.5 * (pi1 + pi2), requires_grad=True))
         z = pyro.sample("z", dist.Normal(q3, 1.0).expand_by([num_particles]))
         zz = torch.exp(z) / (1.0 + torch.exp(z))
         pyro.sample("y", dist.Bernoulli(q4 * zz))
开发者ID:lewisKit,项目名称:pyro,代码行数:7,代码来源:test_enum.py


示例17: forward

 def forward(self, sents, sent_lengths):
     '''
         sents is (batch_size by padded_length)
         when we evaluate sentence by sentence, you evaluate it with batch_size = 1, padded_length.
         [[1, 2, 3, 4]] etc. 
     '''
     batch_size = sents.size()[0]
     sent_lengths = list(sent_lengths)
     # We sort and then do pad packed sequence here. 
     descending_lengths = [x for x, _ in sorted(zip(sent_lengths, range(len(sent_lengths))), reverse=True)]
     descending_indices = [x for _, x in sorted(zip(sent_lengths, range(len(sent_lengths))), reverse=True)]
     descending_lengths = torch.tensor(descending_lengths)
     descending_indices = torch.tensor(descending_indices).to(device)
     descending_sents = torch.index_select(sents, torch.tensor(0), descending_indices)
     
     # get embedding
     embed = self.embedding(descending_sents)
     # pack padded sequence
     embed = torch.nn.utils.rnn.pack_padded_sequence(embed, descending_lengths, batch_first=True)
     
     # fprop though RNN
     self.hidden = self.init_hidden(batch_size)
     rnn_out, self.hidden = self.gru(embed, self.hidden)
     pdb.set_trace()
     rnn_out, _ = torch.nn.utils.rnn.pad_packed_sequence(rnn_out, batch_first=True)
     # rnn_out is 32 by 72 by 256
     
     # change the order back
     change_it_back = [x for _, x in sorted(zip(descending_indices, range(len(descending_indices))))]
     self.hidden = torch.index_select(self.hidden, 1, torch.LongTensor(change_it_back).to(device))  
     rnn_out = torch.index_select(rnn_out, 0, torch.LongTensor(change_it_back).to(device)) 
     
     return rnn_out, self.hidden
开发者ID:vwrj,项目名称:neural_machine_translation,代码行数:33,代码来源:V2-Attention-Vish.py


示例18: test_cases_cos

def test_cases_cos():
    a=torch.tensor([[1,2,3.0],[0,0,0.0]])
    b=torch.tensor([[1,2,3.1],[-1,-2,-3.0]])
    a2=torch.tensor([[1,2,3.0],[0,0,0]])
    b2=torch.tensor([[1,2,3.1],[-1,-2,-3.0],[5,5,5.0],[6,6,6.0]])
    a3=torch.tensor([1,2,3.0])
    b3=torch.tensor([1,2,3.1])
    a31=torch.tensor([[1,2,3.0]])
    b31=torch.tensor([[1,2,3.1]])

    ar=np.random.rand(5,10)
    br=np.random.rand(15,10)
    art=torch.tensor(ar)
    brt=torch.tensor(br)

    cos(a,b)
    cos(a2,b2)
    abrt = cos(art,brt)
    print("sklearn cos:", sklearn.metrics.pairwise.cosine_similarity(ar,br))
    cos(a3,b3)
    try:
        cos(a3,b3)
    except:
        print("cos(a3,b3) failed")
    try:
        cos([a3],[b3])
    except:
        print("cos(a3,b3) failed")
    cos(a31,b31)
开发者ID:tianzhiliang,项目名称:test,代码行数:29,代码来源:cosine_similarity.py


示例19: testBinaryEntropy

  def testBinaryEntropy(self):

    p = torch.tensor([0.1, 0.02, 0.99, 0.5, 0.75, 0.8, 0.3, 0.4, 0.0, 1.0])
    entropy, entropySum = binaryEntropy(p)
    self.assertAlmostEqual(entropySum, 5.076676985, places=4)
    self.assertAlmostEqual(entropySum, entropy.sum(), places=4)
    self.assertAlmostEqual(entropy[0], 0.468995594, places=4)
    self.assertAlmostEqual(entropy[1], 0.141440543, places=4)
    self.assertAlmostEqual(entropy[2], 0.080793136, places=4)
    self.assertEqual(entropy[8], 0.0)
    self.assertEqual(entropy[9], 0.0)

    p = torch.tensor([0.25, 0.25, 0.25, 0.25])
    entropy, entropySum = binaryEntropy(p)
    self.assertAlmostEqual(entropySum, 3.245112498, places=4)
    self.assertAlmostEqual(entropySum, entropy.sum(), places=4)

    p = torch.tensor([0.5, 0.5, 0.5, 0.5])
    entropy, entropySum = binaryEntropy(p)
    self.assertAlmostEqual(entropySum, 4.0, places=4)
    self.assertAlmostEqual(entropySum, entropy.sum(), places=4)
    self.assertAlmostEqual(entropy[0], 1.0, places=4)
    self.assertAlmostEqual(entropy[1], 1.0, places=4)
    self.assertAlmostEqual(entropy[2], 1.0, places=4)
    self.assertAlmostEqual(entropy[3], 1.0, places=4)
开发者ID:rhyolight,项目名称:nupic.research,代码行数:25,代码来源:duty_cycle_metrics_test.py


示例20: train_batch

def train_batch(param):
    if len(memory) < param['batch_size']:
        return 0
    batch = memory.sample(param['batch_size'])
    batch_states = default_states_preprocessor([m.state for m in batch])
    batch_next_states = default_states_preprocessor([m.next_state for m in batch])
    batch_ended = torch.tensor([m.ended for m in batch])
    batch_rewards = torch.tensor([m.reward for m in batch]).to(device)
    batch_actions = torch.tensor([m.action for m in batch]).to(device)

    ## Calculate expected reward:
    with torch.set_grad_enabled(False):
        not_ended_batch = 1 -torch.ByteTensor(batch_ended).to(device)
        next_states_non_final = batch_next_states[not_ended_batch]
        next_state_values = torch.zeros(param['batch_size']).to(device)
        reward_hat = target_dqn(next_states_non_final)
        next_state_values[not_ended_batch] = reward_hat.max(1)[0]
        expected_state_action_values = next_state_values*param['GAMMA'] + batch_rewards

    # Predict value function:
    yhat = dqn(batch_states)
    state_action_values = yhat.gather(1, batch_actions.unsqueeze(1)).squeeze()

    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
    optimizer.zero_grad()
    loss.backward()
    for param in dqn.parameters():
        param.data.clamp_(-1, 1)
    optimizer.step()
    return float(loss.data.cpu().numpy())
开发者ID:abri-simond,项目名称:RL,代码行数:30,代码来源:train_atari.py



注:本文中的torch.tensor函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python torch.transpose函数代码示例发布时间:2022-05-27
下一篇:
Python torch.tanh函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap