• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python torch.bmm函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中torch.bmm函数的典型用法代码示例。如果您正苦于以下问题:Python bmm函数的具体用法?Python bmm怎么用?Python bmm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了bmm函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: forward

    def forward(self, inputs):
        x, u = inputs
        x = self.bn0(x)
        x = F.tanh(self.linear1(x))
        x = F.tanh(self.linear2(x))

        V = self.V(x)
        mu = F.tanh(self.mu(x))

        Q = None
        if u is not None:
            num_outputs = mu.size(1)
            L = self.L(x).view(-1, num_outputs, num_outputs)
            L = L * \
                self.tril_mask.expand_as(
                    L) + torch.exp(L) * self.diag_mask.expand_as(L)
            P = torch.bmm(L, L.transpose(2, 1))

            u_mu = (u - mu).unsqueeze(2)
            A = -0.5 * \
                torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0]

            Q = A + V

        return mu, Q, V
开发者ID:lenvdv,项目名称:pytorch-ddpg-naf,代码行数:25,代码来源:naf.py


示例2: predict

 def predict(self, x_de, x_en):
     bs = x_de.size(0)
     emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
     emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
     h_enc = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     c_enc = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     h_dec = Variable(torch.zeros(self.n_layers, bs, self.hidden_dim).cuda())
     c_dec = Variable(torch.zeros(self.n_layers, bs, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h_enc, c_enc)) # (bs,n_de,hiddensz*2)
     dec_h, _ = self.decoder(emb_en, (h_dec, c_dec)) # (bs,n_en,hiddensz)
     # all the same. enc_h is bs,n_de,hiddensz*n_directions. h and c are both n_layers*n_directions,bs,hiddensz
     if self.directions == 2:
         scores = torch.bmm(self.dim_reduce(enc_h), dec_h.transpose(1,2))
     else:
         scores = torch.bmm(enc_h, dec_h.transpose(1,2))
     # (bs,n_de,hiddensz) * (bs,hiddensz,n_en) = (bs,n_de,n_en)
     scores[(x_de == pad_token).unsqueeze(2).expand(scores.size())] = -math.inf # binary mask
     attn_dist = F.softmax(scores,dim=1) # bs,n_de,n_en
     context = torch.bmm(attn_dist.transpose(2,1),enc_h)
     # (bs,n_en,n_de) * (bs,n_de,hiddensz*ndirections) = (bs,n_en,hiddensz*ndirections)
     pred = self.vocab_layer(torch.cat([dec_h,context],2)) # bs,n_en,len(EN.vocab)
     pred = pred[:,:-1,:] # alignment
     _, tokens = pred.max(2) # bs,n_en-1
     sauce = Variable(torch.cuda.LongTensor([[sos_token]]*bs)) # bs
     return torch.cat([sauce,tokens],1), attn_dist
开发者ID:anihamde,项目名称:cs287-s18,代码行数:25,代码来源:visualization_junk_copy_to_aws.py


示例3: forward

    def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None):

        num_wrong = wrong.size(1)
        batch_size = feat.size(0)

        feat = feat.view(-1, self.ninp, 1)
        right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat)
        wrong_dis = torch.bmm(wrong, feat)
        batch_wrong_dis = torch.bmm(batch_wrong, feat)

        wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \
                + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1)

        loss_dis = torch.sum(torch.log(wrong_score + 1))
        loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm()

        if fake:
            fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat)
            fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask)

            margin_score = F.relu(torch.log(fake_score + 1) - self.margin)
            loss_fake = torch.sum(margin_score)
            loss_dis += loss_fake
            loss_norm += fake.norm()

        loss = (loss_dis + 0.1 * loss_norm) / batch_size
        if fake:
            return loss, loss_fake.data[0] / batch_size
        else:
            return loss
开发者ID:AashishV,项目名称:visDial.pytorch,代码行数:30,代码来源:model.py


示例4: forward

    def forward(self, vocab):
        with torch.no_grad():
            batch_shape = vocab['sentence'].shape
            s_embedding = self.embedding(vocab['sentence'].cuda())
            a_embedding = self.embedding(vocab['aspect'].cuda())

            packed_s = pack_padded_sequence(s_embedding, vocab['sent_len'], batch_first=True)

        out_s, (h_s, c1) = self.lstm_s(packed_s) # packed output
        out_a, (h_a, c2) = self.lstm_a(a_embedding)

        with torch.no_grad():
            unpacked_out_s, _ = pad_packed_sequence(out_s, batch_first=True)

        # Pair-wise interaction matrix
        I_matrix = torch.bmm(unpacked_out_s, out_a.permute(0,2,1))

        # Column-wise softmax
        a2s_attn = F.softmax(I_matrix, dim=1)

        # Row-wise softmax => Column-wise average => aspect attention
        s2a_attn = F.softmax(I_matrix, dim=2)
        a_attn = torch.mean(s2a_attn, dim=1)

        # Final sentence attn => weighted sum of each individual a2s_attn
        s_attn = torch.bmm(a2s_attn, a_attn.unsqueeze(-1))

        final_rep = torch.bmm(unpacked_out_s.permute(0,2,1), s_attn).squeeze(-1)
        pred = self.fc(final_rep)
        return pred
开发者ID:bearcave9,项目名称:Weekend-Projects,代码行数:30,代码来源:AOA_LSTM.py


示例5: forward

    def forward(self, ht, hs, mask, weighted_ctx=True):
        '''
        ht: batch x ht_dim
        hs: (seq_len x batch x hs_dim, seq_len x batch x ht_dim)
        mask: seq_len x batch
        '''
        hs, hs_ = hs
        # seq_len, batch, _ = hs.size()
        hs = hs.transpose(0, 1)
        hs_ = hs_.transpose(0, 1)
        # hs: batch x seq_len x hs_dim
        # hs_: batch x seq_len x ht_dim
        # hs_ = self.hs2ht(hs)
        # Alignment/Attention Function
        # batch x ht_dim x 1
        ht = ht.unsqueeze(2)
        # batch x seq_len
        score = torch.bmm(hs_, ht).squeeze(2)
        # attn = F.softmax(score, dim=-1)
        attn = F.softmax(score, dim=-1) * mask.transpose(0, 1) + EPSILON
        attn = attn / attn.sum(-1, keepdim=True)

        # Compute weighted sum of hs by attention.
        # batch x 1 x seq_len
        attn = attn.unsqueeze(1)
        if weighted_ctx:
            # batch x hs_dim
            weight_hs = torch.bmm(attn, hs).squeeze(1)
        else:
            weight_hs = None

        return weight_hs, attn
开发者ID:UriSha,项目名称:sigmorphon,代码行数:32,代码来源:model.py


示例6: forward_dot

    def forward_dot(self, hid, ctx, ctx_mask):
        r"""Computes Luong-style dot attention probabilities between
        decoder's hidden state and source annotations.

        Arguments:
            hid(Variable): A set of decoder hidden states of shape `T*B*H`
                where `T` == 1, `B` is batch dim and `H` is hidden state dim.
            ctx(Variable): A set of annotations of shape `S*B*C` where `S`
                is the source timestep dim, `B` is batch dim and `C`
                is annotation dim.
            ctx_mask(FloatTensor): A binary mask of shape `S*B` with zeroes
                in the padded timesteps.

        Returns:
            scores(Variable): A variable of shape `S*B` containing normalized
                attention scores for each position and sample.
            z_t(Variable): A variable of shape `B*H` containing the final
                attended context vector for this target decoding timestep.
        """
        # Apply transformations first to make last dims both C and then
        # shuffle dims to prepare for batch mat-mult
        ctx_ = self.ctx2ctx(ctx).permute(1, 2, 0)   # S*B*C -> S*B*C -> B*C*S
        hid_ = self.hid2ctx(hid).permute(1, 0, 2)   # T*B*H -> T*B*C -> B*T*C

        # 'dot' scores of B*T*S
        scores = F.softmax(torch.bmm(hid_, ctx_), dim=-1)

        # Transform back to hidden_dim for further decoders
        # B*T*S x B*S*C -> B*T*C -> B*T*H
        z_t = self.ctx2hid(torch.bmm(scores, ctx.transpose(0, 1)))

        return scores.transpose(0, 1), z_t.transpose(0, 1)
开发者ID:bardetadrien,项目名称:nmtpytorch,代码行数:32,代码来源:attention.py


示例7: forward

    def forward(self, output, context):
        batch_size = output.size(0)
        hidden_size = output.size(2)
        input_size = context.size(1)

        # (batch, out_len, dim) * (batch, in_len, dim) -> (batch, out_len, in_len)
        attn = torch.bmm(output, context.transpose(1, 2))
        mask = torch.eq(attn, 0).data.byte()
        attn.data.masked_fill_(mask, -float('inf'))
        attn = F.softmax(attn.view(-1, input_size), dim=1).view(batch_size, -1, input_size)

        # (batch, out_len, in_len) * (batch, in_len, dim) -> (batch, out_len, dim)
        mix = torch.bmm(attn, context)

        # concat -> (batch, out_len, 2*dim)
        combined = torch.cat((mix, output), dim=2)

        # output -> (batch, out_len, dim)
        output = F.tanh(self.linear_out(combined.view(-1, 2 * hidden_size))).view(batch_size, -1, hidden_size)


        if not output.is_contiguous():
            output = output.contiguous()

        return output, attn
开发者ID:shruthi0898,项目名称:Writing-editing-Network,代码行数:25,代码来源:attention.py


示例8: backward

    def backward(ctx, grad_output):
        batch1, batch2 = ctx.saved_variables
        grad_add_matrix = grad_batch1 = grad_batch2 = None

        if ctx.needs_input_grad[0]:
            grad_add_matrix = maybe_unexpand(grad_output, ctx.add_matrix_size)
            if ctx.alpha != 1:
                grad_add_matrix = grad_add_matrix.mul(ctx.alpha)

        if any(ctx.needs_input_grad[1:]):
            batch_grad_output = (grad_output
                                 .unsqueeze(0)
                                 .expand(batch1.size(0), batch1.size(1), batch2.size(2)))

        if ctx.needs_input_grad[1]:
            grad_batch1 = torch.bmm(batch_grad_output, batch2.transpose(1, 2))
            if ctx.beta != 1:
                grad_batch1 *= ctx.beta

        if ctx.needs_input_grad[2]:
            grad_batch2 = torch.bmm(batch1.transpose(1, 2), batch_grad_output)
            if ctx.beta != 1:
                grad_batch2 *= ctx.beta

        return grad_add_matrix, grad_batch1, grad_batch2, None, None, None
开发者ID:Northrend,项目名称:pytorch,代码行数:25,代码来源:blas.py


示例9: forward

    def forward(self, q, k, v):
        b_q, t_q, dim_q = list(q.size())
        b_k, t_k, dim_k = list(k.size())
        b_v, t_v, dim_v = list(v.size())
        assert(b_q == b_k and b_k == b_v)  # batch size should be equal
        assert(dim_q == dim_k)  # dims should be equal
        assert(t_k == t_v)  # times should be equal
        b = b_q
        qk = torch.bmm(q, k.transpose(1, 2))  # b x t_q x t_k
        qk.div_(dim_k ** 0.5)
        mask = None
        if self.causal and t_q > 1:
            causal_mask = q.data.new(t_q, t_k).byte().fill_(1).triu_(1)
            mask = Variable(causal_mask.unsqueeze(0).expand(b, t_q, t_k),
                            requires_grad=False)
        if self.mask_k is not None:
            mask_k = self.mask_k.unsqueeze(1).expand(b, t_q, t_k)
            mask = mask_k if mask is None else mask | mask_k
        if self.mask_q is not None:
            mask_q = self.mask_q.unsqueeze(2).expand(b, t_q, t_k)
            mask = mask_q if mask is None else mask | mask_q
        if mask is not None:
            qk.masked_fill_(mask, -1e9)

        sm_qk = F.softmax(qk, dim=2)
        sm_qk = self.dropout(sm_qk)
        return torch.bmm(sm_qk, v), sm_qk  # b x t_q x dim_v
开发者ID:yangkexin,项目名称:seq2seq.pytorch,代码行数:27,代码来源:attention.py


示例10: lstsq

def lstsq(b, y, alpha=0.01):
    """
    Batched linear least-squares for pytorch with optional L1 regularization.

    Parameters
    ----------

    b : shape(L, M, N)
    y : shape(L, M)

    Returns
    -------
    tuple of (coefficients, model, residuals)

    """
    bT = b.transpose(-1, -2)
    AA = torch.bmm(bT, b)
    if alpha != 0:
        diag = torch.diagonal(AA, dim1=1, dim2=2)
        diag += alpha
    RHS = torch.bmm(bT, y[:, :, None])
    X, LU = torch.gesv(RHS, AA)
    fit = torch.bmm(b, X)[..., 0]
    res = y - fit
    return X[..., 0], fit, res
开发者ID:Tillsten,项目名称:skultrafast,代码行数:25,代码来源:pytorch_fitter.py


示例11: bnorm

def bnorm(x, U):
    mx = torch.bmm(U,x)
    subs = x-mx
    subs2 = subs*subs
    vx = torch.bmm(U,subs2)
    out = subs / (vx.clamp(min=1e-10).sqrt() + 1e-5)
    return out
开发者ID:ParsonsZeng,项目名称:DiCoNet,代码行数:7,代码来源:model.py


示例12: forward

    def forward(self, q, k, v, attn_mask=None):

        d_k, d_v = self.d_k, self.d_v
        n_head = self.n_head

        residual = q
        #print('q,k,v:',q.size(),k.size(),v.size())
        mb_size, len_q, q_hidden_size = q.size()
        mb_size, len_k, k_hidden_size = k.size()
        mb_size, len_v, v_hidden_size = v.size()

        # treat as a (n_head) size batch
        q_s = q.repeat(n_head, 1, 1).view(n_head, -1, q_hidden_size) # n_head x (mb_size*len_q) x d_model
        k_s = k.repeat(n_head, 1, 1).view(n_head, -1, k_hidden_size) # n_head x (mb_size*len_k) x d_model
        v_s = v.repeat(n_head, 1, 1).view(n_head, -1, v_hidden_size) # n_head x (mb_size*len_v) x d_model
        #print('q_s,k_s,v_s:',q_s.size(),k_s.size(),v_s.size())
        #print('w_qs',self.w_qs.size())
        # treat the result as a (n_head * mb_size) size batch
        q_s = torch.bmm(q_s, self.w_qs).view(-1, len_q, d_k)   # (n_head*mb_size) x len_q x d_k
        k_s = torch.bmm(k_s, self.w_ks).view(-1, len_k, d_k)   # (n_head*mb_size) x len_k x d_k
        v_s = torch.bmm(v_s, self.w_vs).view(-1, len_v, d_v)   # (n_head*mb_size) x len_v x d_v

        # perform attention, result size = (n_head * mb_size) x len_q x d_v
        #print('attn_mask:',attn_mask.size())
        #print(attn_mask)
        outputs, attns = self.attention.forward(q_s, k_s, v_s, attn_mask=attn_mask.repeat(n_head,1,1))

        # back to original mb_size batch, result size = mb_size x len_q x (n_head*d_v)
        outputs = torch.cat(torch.split(outputs, mb_size, dim=0), dim=-1) 

        # project back to residual size
        outputs = self.proj.forward(outputs)
        outputs = self.dropout(outputs)

        return self.layer_norm(outputs + residual), attns
开发者ID:chickenbestlover,项目名称:DrQA-RN,代码行数:35,代码来源:SubLayers.py


示例13: predict2

 def predict2(self, x_de, beamsz, gen_len):
     emb_de = self.embedding_de(x_de) # "batch size",n_de,word_dim, but "batch size" is 1 in this case!
     h0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
     c0 = Variable(torch.zeros(self.n_layers*self.directions, 1, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h0, c0))
     # since enc batch size=1, enc_h is 1,n_de,hiddensz*n_directions
     if self.directions == 2:
         enc_h = self.dim_reduce(enc_h) # 1,n_de,hiddensz
     masterheap = CandList(self.n_layers,self.hidden_dim,enc_h.size(1),beamsz)
     # in the following loop, beamsz is length 1 for first iteration, length true beamsz (100) afterward
     for i in range(gen_len):
         prev = masterheap.get_prev() # beamsz
         emb_t = self.embedding_en(prev) # embed the last thing we generated. beamsz,word_dim
         enc_h_expand = enc_h.expand(prev.size(0),-1,-1) # beamsz,n_de,hiddensz
         
         h, c = masterheap.get_hiddens() # (n_layers,beamsz,hiddensz),(n_layers,beamsz,hiddensz)
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) # dec_h is beamsz,1,hiddensz (batch_first=True)
         scores = torch.bmm(enc_h_expand, dec_h.transpose(1,2)).squeeze(2)
         # (beamsz,n_de,hiddensz) * (beamsz,hiddensz,1) = (beamsz,n_de,1). squeeze to beamsz,n_de
         attn_dist = F.softmax(scores,dim=1)
         if self.attn_type == "hard":
             _, argmax = attn_dist.max(1) # beamsz for each batch, select most likely german word to pay attention to
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
             context = torch.bmm(one_hot.unsqueeze(1), enc_h_expand).squeeze(1)
         else:
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h_expand).squeeze(1)
         # the difference btwn hard and soft is just whether we use a one_hot or a distribution
         # context is beamsz,hiddensz*n_directions
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) # beamsz,len(EN.vocab)
         # TODO: set the columns corresponding to <pad>,<unk>,</s>,etc to 0
         masterheap.update_beam(pred)
         masterheap.update_hiddens(h,c)
         masterheap.update_attentions(attn_dist)
         masterheap.firstloop = False
     return masterheap.probs,masterheap.wordlist,masterheap.attentions
开发者ID:anihamde,项目名称:cs287-s18,代码行数:35,代码来源:models_original.py


示例14: predict

 def predict(self, x_de, x_en):
     bs = x_de.size(0)
     emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
     emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
     h = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     c = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda())
     enc_h, _ = self.encoder(emb_de, (h, c))
     dec_h, _ = self.decoder(emb_en, (h, c))
     # all the same. enc_h is bs,n_de,hiddensz*n_directions. h and c are both n_layers*n_directions,bs,hiddensz
     if self.directions == 2:
         enc_h = self.dim_reduce(enc_h) # bs,n_de,hiddensz
     scores = torch.bmm(enc_h, dec_h.transpose(1,2))
     # (bs,n_de,hiddensz) * (bs,hiddensz,n_en) = (bs,n_de,n_en)
     y = [Variable(torch.cuda.LongTensor([sos_token]*bs))] # bs
     self.attn = []
     for t in range(x_en.size(1)-1): # iterate over english words, with teacher forcing
         attn_dist = F.softmax(scores[:,:,t],dim=1) # bs,n_de
         self.attn.append(attn_dist.data)
         if self.attn_type == "hard":
             _, argmax = attn_dist.max(1) # bs. for each batch, select most likely german word to pay attention to
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda())
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
         else:
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
         # the difference btwn hard and soft is just whether we use a one_hot or a distribution
         # context is bs,hiddensz
         pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab)
         _, next_token = pred.max(1) # bs
         y.append(next_token)
     self.attn = torch.stack(self.attn, 0).transpose(0, 1) # bs,n_en,n_de (for visualization!)
     y = torch.stack(y,0).transpose(0,1) # bs,n_en
     return y,self.attn
开发者ID:anihamde,项目名称:cs287-s18,代码行数:32,代码来源:models_original.py


示例15: predict

 def predict(self, x, attn_type = "hard"):
     #predict with greedy decoding
     emb = self.embedding(x)
     h = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     c = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     enc_h, _ = self.encoder(emb, (h, c))
     y = [Variable(torch.zeros(x.size(0)).long())]
     self.attn = []        
     for t in range(x.size(1)):
         emb_t = self.embedding(y[-1])
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c))
         scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2)
         attn_dist = F.softmax(scores, dim = 1)
         self.attn.append(attn_dist.data)
         if attn_type == "hard":
             _, argmax = attn_dist.max(1)
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1))
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)                    
         else:                
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1))
         _, next_token = pred.max(1)
         y.append(next_token)
     self.attn = torch.stack(self.attn, 0).transpose(0, 1)
     return torch.stack(y, 0).transpose(0, 1)
开发者ID:anihamde,项目名称:cs287-s18,代码行数:25,代码来源:section4-Copy1.py


示例16: forward

    def forward(self, agent_qs, states):
        """Forward pass for the mixer.

        Arguments:
            agent_qs: Tensor of shape [B, T, n_agents, n_actions]
            states: Tensor of shape [B, T, state_dim]
        """
        bs = agent_qs.size(0)
        states = states.reshape(-1, self.state_dim)
        agent_qs = agent_qs.view(-1, 1, self.n_agents)
        # First layer
        w1 = th.abs(self.hyper_w_1(states))
        b1 = self.hyper_b_1(states)
        w1 = w1.view(-1, self.n_agents, self.embed_dim)
        b1 = b1.view(-1, 1, self.embed_dim)
        hidden = F.elu(th.bmm(agent_qs, w1) + b1)
        # Second layer
        w_final = th.abs(self.hyper_w_final(states))
        w_final = w_final.view(-1, self.embed_dim, 1)
        # State-dependent bias
        v = self.V(states).view(-1, 1, 1)
        # Compute final output
        y = th.bmm(hidden, w_final) + v
        # Reshape and return
        q_tot = y.view(bs, -1, 1)
        return q_tot
开发者ID:jamescasbon,项目名称:ray,代码行数:26,代码来源:mixers.py


示例17: forward

 def forward(self, x_de, x_en, update_baseline=True):
     bs = x_de.size(0)
     # x_de is bs,n_de. x_en is bs,n_en
     emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
     emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
     h0_enc = torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()
     c0_enc = torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()
     h0_dec = torch.zeros(self.n_layers, bs, self.hidden_dim).cuda()
     c0_dec = torch.zeros(self.n_layers, bs, self.hidden_dim).cuda()
     # hidden vars have dimension n_layers*n_directions,bs,hiddensz
     enc_h, _ = self.encoder(emb_de, (Variable(h0_enc), Variable(c0_enc)))
     # enc_h is bs,n_de,hiddensz*n_directions. ordering is different from last week because batch_first=True
     dec_h, _ = self.decoder(emb_en, (Variable(h0_dec), Variable(c0_dec)))
     # dec_h is bs,n_en,hidden_size*n_directions
     # we've gotten our encoder/decoder hidden states so we are ready to do attention
     # first let's get all our scores, which we can do easily since we are using dot-prod attention
     if self.directions == 2:
         scores = torch.bmm(self.dim_reduce(enc_h), dec_h.transpose(1,2))
         # TODO: any easier ways to reduce dimension?
     else:
         scores = torch.bmm(enc_h, dec_h.transpose(1,2))
     # (bs,n_de,hiddensz*n_directions) * (bs,hiddensz*n_directions,n_en) = (bs,n_de,n_en)
     reinforce_loss = 0 # we only use this variable for hard attention
     loss = 0
     avg_reward = 0
     # we just iterate to dec_h.size(1)-1, since there's </s> at the end of each sentence
     for t in range(dec_h.size(1)-1): # iterate over english words, with teacher forcing
         attn_dist = F.softmax(scores[:, :, t],dim=1) # bs,n_de. these are the alphas (attention scores for each german word)
         if self.attn_type == "hard":
             cat = torch.distributions.Categorical(attn_dist) 
             attn_samples = cat.sample() # bs. each element is a sample from categorical distribution
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, attn_samples.data.unsqueeze(1), 1).cuda()) # bs,n_de
             # made a bunch of one-hot vectors
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
             # now we use the one-hot vectors to select correct hidden vectors from enc_h
             # (bs,1,n_de) * (bs,n_de,hiddensz*n_directions) = (bs,1,hiddensz*n_directions). squeeze to bs,hiddensz*n_directions
         else:
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1) # same dimensions
             # (bs,1,n_de) * (bs,n_de,hiddensz*n_directions) = (bs,1,hiddensz*n_directions)
         # context is bs,hidden_size*n_directions
         # the rnn output and the context together make the decoder "hidden state", which is bs,2*hidden_size*n_directions
         pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab)
         y = x_en[:, t+1] # bs. these are our labels
         no_pad = (y != pad_token) # exclude english padding tokens
         reward = torch.gather(pred, 1, y.unsqueeze(1)) # bs,1
         # reward[i,1] = pred[i,y[i]]. this gets log prob of correct word for each batch. similar to -crossentropy
         reward = reward.squeeze(1)[no_pad] # less than bs
         if self.attn_type == "hard":
             reinforce_loss -= (cat.log_prob(attn_samples[no_pad]) * (reward-self.baseline).detach()).sum() 
             # reinforce rule (just read the formula), with special baseline
         loss -= reward.sum() # minimizing loss is maximizing reward
     no_pad_total = (x_en[:,1:] != pad_token).data.sum() # TODO: i think this is right, right?
     loss /= no_pad_total
     reinforce_loss /= no_pad_total
     avg_reward = -loss.data[0]
     if update_baseline: # update baseline as a moving average
         self.baseline = Variable(0.95*self.baseline.data + 0.05*avg_reward)
     return loss, reinforce_loss,avg_reward
开发者ID:anihamde,项目名称:cs287-s18,代码行数:58,代码来源:models_original.py


示例18: forward

    def forward(self, match_encoders):
        
        '''
        match_encoders (pn_steps, batch, hidden_size*2)
        '''
        vh_matrix = self.vh_net(match_encoders) # pn_steps, batch, hidden_size
        
        # prediction start
        h0 = Variable(torch.zeros(match_encoders.size(1), self.hidden_size)).cuda()
        c0 = Variable(torch.zeros(match_encoders.size(1), self.hidden_size)).cuda()
        
        wha1 = self.wa_net(h0) # bacth, hidden_size
        wha1 = wha1.expand(match_encoders.size(0), wha1.size(0), wha1.size(1)) # pn_steps, batch, hidden_size
        #print ('_sum.size() ', _sum.size())
        #print ('vh_matrix.size() ', vh_matrix.size())
        f1 = self.tanh(vh_matrix + wha1) # pn_steps, batch, hidden_size
        #print ('f1.size() ', f1.size())
        vf1 = self.v_net(f1.transpose(0, 1)).squeeze(-1) #batch, pn_steps
        
        beta1 = self.softmax(vf1) #batch, pn_steps
        softmax_beta1 = self.softmax(beta1).view(beta1.size(0), 1, beta1.size(1)) #batch, 1, pn_steps
        
        inp = torch.bmm(softmax_beta1, match_encoders.transpose(0, 1)) # bacth, 1, hidden_size
        inp = inp.squeeze(1) # bacth, hidden_size
        
        h1, c1 = self.pointer_lstm(inp, (h0, c0))
        
        
        wha2 = self.wa_net(h1) # bacth, hidden_size
        wha2 = wha2.expand(match_encoders.size(0), wha2.size(0), wha2.size(1)) # pn_steps, batch, hidden_size
        f2 = self.tanh(vh_matrix + wha2) # pn_steps, batch, hidden_size
        vf2 = self.v_net(f2.transpose(0, 1)).squeeze(-1) #batch, pn_steps
        
        beta2 = self.softmax(vf2)#batch, pn_steps
        softmax_beta2 = self.softmax(beta2).view(beta2.size(0), 1, beta2.size(1)) #batch, 1, pn_steps
        
        inp = torch.bmm(softmax_beta2, match_encoders.transpose(0, 1)) # bacth, 1, hidden_size
        inp = inp.squeeze(1) # bacth, hidden_size
        
        h2, c2 = self.pointer_lstm(inp, (h1, c1))
            
        _, start = torch.max(beta1, 1)
        _, end = torch.max(beta2, 1)
        
        beta1 = beta1.view(1, beta1.size(0), beta1.size(1))
        beta2 = beta2.view(1, beta2.size(0), beta2.size(1))
        
        logits = torch.cat([beta1, beta2])
        
        start = start.view(1, start.size(0))
        end = end.view(1, end.size(0))
        
        prediction = torch.cat([start, end]).transpose(0, 1).cpu().data.numpy()
        

        return logits, prediction
开发者ID:xuwenshen,项目名称:Reading_Comprehension,代码行数:56,代码来源:linear_match_lstm.py


示例19: forward

 def forward(self, query_embeddings, in_memory_embeddings, out_memory_embeddings, attention_mask=None):
     attention = torch.bmm(in_memory_embeddings, query_embeddings.unsqueeze(2)).squeeze(2)
     if attention_mask is not None:
         # exclude masked elements from the softmax
         attention = attention_mask.float() * attention + (1 - attention_mask.float()) * -1e20
     probs = softmax(attention).unsqueeze(1)
     memory_output = torch.bmm(probs, out_memory_embeddings).squeeze(1)
     query_embeddings = self.linear(query_embeddings)
     output = memory_output + query_embeddings
     return output
开发者ID:jojonki,项目名称:ParlAI,代码行数:10,代码来源:modules.py


示例20: updateGradInput

    def updateGradInput(self, input, gradOutput):
        M, v = input
        self.gradInput[0].resize_as_(M)
        self.gradInput[1].resize_as_(v)
        gradOutput = gradOutput.contiguous()

        assert gradOutput.ndimension() == 1 or gradOutput.ndimension() == 2

        if gradOutput.ndimension() == 2:
            assert M.ndimension() == 3
            assert v.ndimension() == 2
            bdim = M.size(0)
            odim = M.size(1)
            idim = M.size(2)

            if self.trans:
                torch.bmm(v.view(bdim, odim, 1), gradOutput.view(bdim, 1, idim), out=self.gradInput[0])
                torch.bmm(M, gradOutput.view(bdim, idim, 1), out=self.gradInput[1].view(bdim, odim, 1))
            else:
                torch.bmm(gradOutput.view(bdim, odim, 1), v.view(bdim, 1, idim), out=self.gradInput[0])
                torch.bmm(M.transpose(1, 2), gradOutput.view(bdim, odim, 1), out=self.gradInput[1].view(bdim, idim, 1))
        else:
            assert M.ndimension() == 2
            assert v.ndimension() == 1

            if self.trans:
                torch.ger(v, gradOutput, out=self.gradInput[0])
                self.gradInput[1] = M * gradOutput
            else:
                torch.ger(gradOutput, v, out=self.gradInput[0])
                self.gradInput[1] = M.t() * gradOutput

        return self.gradInput
开发者ID:Jsmilemsj,项目名称:pytorch,代码行数:33,代码来源:MV.py



注:本文中的torch.bmm函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python torch.cat函数代码示例发布时间:2022-05-27
下一篇:
Python torch.argmax函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap