本文整理汇总了Python中torch.matmul函数的典型用法代码示例。如果您正苦于以下问题:Python matmul函数的具体用法?Python matmul怎么用?Python matmul使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了matmul函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: forward
def forward(self,x):
max_sample = x.size()[1]
x = x.view(-1,self.feature_size)
assignment = th.matmul(x,self.clusters)
if self.add_batch_norm:
assignment = self.batch_norm(assignment)
assignment = F.softmax(assignment, dim=1)
assignment = assignment.view(-1, max_sample, self.cluster_size)
assignment = assignment.transpose(1,2)
x = x.view(-1, max_sample, self.feature_size)
rvlad = th.matmul(assignment, x)
rvlad = rvlad.transpose(-1,1)
# L2 intra norm
rvlad = F.normalize(rvlad)
# flattening + L2 norm
rvlad = rvlad.view(-1, self.cluster_size*self.feature_size)
rvlad = F.normalize(rvlad)
return rvlad
开发者ID:lvaleriu,项目名称:Mixture-of-Embedding-Experts,代码行数:25,代码来源:loupe.py
示例2: forward
def forward(self, hidden_states, attention_mask):
mixed_query_layer = self.query(hidden_states)
mixed_key_layer = self.key(hidden_states)
mixed_value_layer = self.value(hidden_states)
query_layer = self.transpose_for_scores(mixed_query_layer)
key_layer = self.transpose_for_scores(mixed_key_layer)
value_layer = self.transpose_for_scores(mixed_value_layer)
# Take the dot product between "query" and "key" to get the raw attention scores.
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
attention_scores = attention_scores / math.sqrt(self.attention_head_size)
# Apply the attention mask is (precomputed for all layers in BertModel forward() function)
attention_scores = attention_scores + attention_mask
# Normalize the attention scores to probabilities.
attention_probs = nn.Softmax(dim=-1)(attention_scores)
# This is actually dropping out entire tokens to attend to, which might
# seem a bit unusual, but is taken from the original Transformer paper.
attention_probs = self.dropout(attention_probs)
context_layer = torch.matmul(attention_probs, value_layer)
context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
context_layer = context_layer.view(*new_context_layer_shape)
return context_layer
开发者ID:zhouleidcc,项目名称:bert-Chinese-classification-task,代码行数:27,代码来源:modeling.py
示例3: forward
def forward(self, sequence, graph):
"""
Apply self-attention to the sequence, ignores
the graph
"""
sequence = sequence.squeeze(1)
#get the dimension
n, d = sequence.size()
#project the sequence into key, value, and query sequences
keySeq = f.relu(self.keyProj(sequence))
valueSeq = f.relu(self.valueProj(sequence))
querySeq = f.relu(self.queryProj(sequence))
#combine query with each key
#a_ijh = softmax( (q_ih^T k_jh) / sqrt(d) )
#the result is, row i is the importance of the sequence for key i
importance = f.softmax(t.matmul(querySeq, keySeq.permute(1,0)) * math.sqrt(d),0).permute(1,0)
#apply the importance weights to the value sequence
attention = t.matmul(valueSeq.permute(1,0), importance).permute(1,0)
#sum the sequence for a complete representation
final = t.sum(attention, 0)
return attention.unsqueeze(1), final
开发者ID:jworr,项目名称:ml_tools,代码行数:27,代码来源:attention.py
示例4: train
def train(ep):
model.train()
total_loss = 0
count = 0
train_idx_list = np.arange(len(X_train), dtype="int32")
np.random.shuffle(train_idx_list)
for idx in train_idx_list:
data_line = X_train[idx]
x, y = Variable(data_line[:-1]), Variable(data_line[1:])
if args.cuda:
x, y = x.cuda(), y.cuda()
optimizer.zero_grad()
output = model(x.unsqueeze(0)).squeeze(0)
loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
torch.matmul((1 - y), torch.log(1 - output).float().t()))
total_loss += loss.data[0]
count += output.size(0)
if args.clip > 0:
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
loss.backward()
optimizer.step()
if idx > 0 and idx % args.log_interval == 0:
cur_loss = total_loss / count
print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss))
total_loss = 0.0
count = 0
开发者ID:wasaCheney,项目名称:TCN,代码行数:28,代码来源:music_test.py
示例5: forward
def forward(self, context, state, input_):
output = (torch.matmul(context, self._v_c.unsqueeze(1))
+ torch.matmul(state, self._v_s.unsqueeze(1))
+ torch.matmul(input_, self._v_i.unsqueeze(1)))
if self._b is not None:
output = output + self._b.unsqueeze(0)
return output
开发者ID:ShawnXiha,项目名称:fast_abs_rl,代码行数:7,代码来源:copy_summ.py
示例6: score
def score(self, hidden, encoder_output):
if self.method == 'dot':
# hidden is 1 by 256
# encoder_output is 22 by 256
encoder_output = torch.transpose(encoder_output, 0, 1)
# encoder_output is 256 by 22
energy = torch.matmul(hidden, encoder_output)
return energy
elif self.method == 'general':
# hidden is 1 by 256
# encoder_output is 256 by 22
# encoder_output = torch.transpose(encoder_output, 0, 1)
hidden = hidden.view(1, -1)
a = self.attn(encoder_output)
a = torch.transpose(a, 0, 1)
energy = torch.matmul(hidden, a)
return energy
elif self.method == 'concat':
len_encoder_output = encoder_output.size()[1]
# hidden is 1 by 256
# encoder_output is 256 by 22
hidden = torch.transpose(hidden, 0, 1)
# hidden is 256 by 1
hidden = hidden.repeat(hidden_size, len_encoder_output)
# hidden is 256 by 22
concat = torch.cat((hidden, encoder_output), dim=0)
# concat is 512 by 22
# self.attn(concat) --> 256 by 22
energy = torch.matmul(self.v, F.tanh(self.attn(concat)))
return energy
开发者ID:vwrj,项目名称:neural_machine_translation,代码行数:33,代码来源:V2-Attention-Vish.py
示例7: write
def write(self, z, time, debug=False):
# update usage indicator
self.u = self.u + T.matmul(Variable(T.from_numpy(np.ones((1, Kr), dtype=np.float32))), self.W_predictor)
# update writing weights
prev_v_wr = self.v_wr
v_wr = np.zeros((N_mem, 1), dtype=np.float32)
if time < N_mem:
v_wr[time][0] = 1
else:
waste_index = int(T.argmin(self.u).data)
v_wr[waste_index][0] = 1
self.v_wr = Variable(T.from_numpy(v_wr))
# writing
# z: (1, Z_DIM)
if debug:
print(self.M)
if USE_RETROACTIVE:
# update retroactive weights
self.v_ret = GAMMA*self.v_ret + (1-GAMMA)*prev_v_wr
z_wr = T.cat([z, Variable(T.from_numpy(np.zeros((1, Z_DIM), dtype=np.float32)))], 1)
z_ret = T.cat([Variable(T.from_numpy(np.zeros((1, Z_DIM), dtype=np.float32))), z], 1)
self.M = self.M + T.matmul(self.v_wr, z_wr) + T.matmul(self.v_ret, z_ret)
else:
self.M = self.M + T.matmul(self.v_wr, z)
if debug:
return self.M
开发者ID:andreofner,项目名称:MERLIN,代码行数:28,代码来源:memory.py
示例8: grad2
def grad2():
W = Variable(torch.rand(2, 2), requires_grad=True)
W2 = Variable(torch.rand(2, 1), requires_grad=True)
x1 = Variable(torch.rand(1, 2), requires_grad=True)
x2 = Variable(torch.rand(1, 2), requires_grad=True)
print("w: ")
print(W)
print("x1: ")
print(x1)
print("x2: ")
print(x2)
print("--------------------")
y1 = torch.matmul(torch.matmul(x1, W), W2)
print(torch.matmul(W, W2))
# y = Variable(y, requires_grad=True)
# print("y1:")
# print(y1)
y1.backward()
# print(W.grad)
print(x1.grad)
# W.grad.data.zero_()
# x1.grad.data.zero_()
y2 = torch.matmul(torch.matmul(x2, W), W2)
y2.backward()
# print("y2: ")
# print(y2)
# print(W.grad)
print(x2.grad)
开发者ID:gonglixue,项目名称:PRML_Python,代码行数:32,代码来源:gradient.py
示例9: _attn
def _attn(self, q, k, v):
w = torch.matmul(q, k)
if self.scale:
w = w / math.sqrt(v.size(-1))
w = w * self.b + -1e9 * (1 - self.b) # TF implem method: mask_attn_weights
w = nn.Softmax(dim=-1)(w)
w = self.attn_dropout(w)
return torch.matmul(w, v)
开发者ID:chenghuige,项目名称:pytorch-openai-transformer-lm,代码行数:8,代码来源:model_pytorch.py
示例10: test
def test():
x = torch.ones(1, 2)
Sigma = torch.FloatTensor([[1, 0.8], [0.8, 1]])
z = torch.ones(x.size())
y = torch.matmul(x, Sigma)
y = torch.matmul(y, x.t())
print(y)
开发者ID:gonglixue,项目名称:PRML_Python,代码行数:8,代码来源:test.py
示例11: attention_score
def attention_score(attention, query, v, w):
""" unnormalized attention score"""
sum_ = attention.unsqueeze(1) + torch.matmul(
query, w.unsqueeze(0)
).unsqueeze(2) # [B, Nq, Ns, D]
score = torch.matmul(
F.tanh(sum_), v.unsqueeze(0).unsqueeze(1).unsqueeze(3)
).squeeze(3) # [B, Nq, Ns]
return score
开发者ID:ShawnXiha,项目名称:fast_abs_rl,代码行数:9,代码来源:extract.py
示例12: attention
def attention(cls, query, key, value, mask=None, dropout=None):
d_k = query.size(-1)
scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
if mask is not None:
scores = scores.masked_fill(mask == 0, -1e9)
p_attn = F.softmax(scores, dim=-1)
if dropout is not None:
p_attn = dropout(p_attn)
return torch.matmul(p_attn, value), p_attn
开发者ID:daixiangau,项目名称:naacl2019-select-pretraining-data-for-ner,代码行数:9,代码来源:attention.py
示例13: _prepare
def _prepare(self, attn_mem):
attn_feat = torch.matmul(attn_mem, self._attn_wm.unsqueeze(0))
hop_feat = torch.matmul(attn_mem, self._hop_wm.unsqueeze(0))
bs = attn_mem.size(0)
n_l, d = self._init_h.size()
size = (n_l, bs, d)
lstm_states = (self._init_h.unsqueeze(1).expand(*size).contiguous(),
self._init_c.unsqueeze(1).expand(*size).contiguous())
d = self._init_i.size(0)
init_i = self._init_i.unsqueeze(0).unsqueeze(1).expand(bs, 1, d)
return attn_feat, hop_feat, lstm_states, init_i
开发者ID:ShawnXiha,项目名称:fast_abs_rl,代码行数:11,代码来源:extract.py
示例14: forward
def forward(self, matrix_1: torch.Tensor, matrix_2: torch.Tensor) -> torch.Tensor:
if self._use_input_biases:
bias1 = matrix_1.new_ones(matrix_1.size()[:-1] + (1,))
bias2 = matrix_2.new_ones(matrix_2.size()[:-1] + (1,))
matrix_1 = torch.cat([matrix_1, bias1], -1)
matrix_2 = torch.cat([matrix_2, bias2], -1)
intermediate = torch.matmul(matrix_1.unsqueeze(1), self._weight_matrix.unsqueeze(0))
final = torch.matmul(intermediate, matrix_2.unsqueeze(1).transpose(2, 3))
return self._activation(final.squeeze(1) + self._bias)
开发者ID:pyknife,项目名称:allennlp,代码行数:11,代码来源:bilinear_matrix_attention.py
示例15: high_dimension_gaussain_energy
def high_dimension_gaussain_energy(x):
u = High_mu
Sigma = High_Sigma
Sigma = torch.inverse(Sigma)
if isinstance(x, Variable):
u = Variable(u, requires_grad=True)
Sigma = Variable(Sigma, requires_grad=True)
diff = x - u
temp = 0.5 * torch.matmul(torch.matmul(diff, Sigma), diff.t())
return temp
开发者ID:gonglixue,项目名称:PRML_Python,代码行数:13,代码来源:HamiltonianDynamics.py
示例16: distance_calcu
def distance_calcu(self, query, gallery):
"""
:param query:
:param gallery:
:return:
"""
query = query.expand_as(gallery).contiguous()
x = torch.cat([query, gallery, query-gallery], 1)
W1, W2 = self.adpW(x)
num = query.size(0)
dist = torch.norm((torch.matmul(W2, gallery.view(num, -1, 1))+torch.matmul(W1, query.view(num, -1, 1))) # projected gallery(combin with query)
- query.view(num, -1, 1), 2, 1) # orig query
return dist
开发者ID:hh23333,项目名称:FVAE_adversarial,代码行数:13,代码来源:adaptive_triplet.py
示例17: adpW
def adpW(self,x):
# x = F.normalize(x)
x = self.adp_metric_embedding1(x)
# x = self.adp_metric_embedding1_bn(x)
x = F.prelu(x)
x = self.adp_metric_embedding2(x)
# x = self.adp_metric_embedding2_bn(x)
diag_matrix = []
for i in range(x.size(0)):
diag_matrix.append(torch.diag(x[i,:]))
x = torch.stack(diag_matrix)
W = torch.matmul(self.transform_matrix,torch.matmul(x,self.transform_matrix))
return W
开发者ID:hh23333,项目名称:FVAE_adversarial,代码行数:13,代码来源:adaptive_triplet_onlyadp.py
示例18: attention
def attention(query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
mask: torch.Tensor = None,
dropout: Callable = None) -> Tuple[torch.Tensor, torch.Tensor]:
"""Compute 'Scaled Dot Product Attention'"""
d_k = query.size(-1)
scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
if mask is not None:
scores = scores.masked_fill(mask == 0, -1e9)
p_attn = F.softmax(scores, dim=-1)
if dropout is not None:
p_attn = dropout(p_attn)
return torch.matmul(p_attn, value), p_attn
开发者ID:apmoore1,项目名称:allennlp,代码行数:14,代码来源:bidirectional_language_model_transformer.py
示例19: max_singular_value
def max_singular_value(W, u=None, Ip=1):
"""
power iteration for weight parameter
"""
#xp = W.data
if u is None:
u = torch.FloatTensor(1, W.size(0)).normal_(0, 1).cuda()
_u = u
for _ in range(Ip):
#print(_u.size(), W.size())
_v = _l2normalize(torch.matmul(_u, W.data), eps=1e-12)
_u = _l2normalize(torch.matmul(_v, torch.transpose(W.data, 0, 1)), eps=1e-12)
sigma = torch.matmul(torch.matmul(_v, torch.transpose(W.data, 0, 1)), torch.transpose(_u, 0, 1))
return sigma, _v
开发者ID:LuChengTHU,项目名称:SN-GAN,代码行数:14,代码来源:train-conditional.py
示例20: seqAttention
def seqAttention(sequence, weights):
"""
Applies attention to the given sequence
"""
#compute the importance over the sequence
importance = t.tanh(t.matmul(sequence, weights))
#compute the attention
attention = f.softmax(importance, 0)
tSeq = sequence.permute(1,0)
#compute and return the representation
return t.matmul(tSeq, attention)
开发者ID:jworr,项目名称:ml_tools,代码行数:14,代码来源:attention.py
注:本文中的torch.matmul函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论