本文整理汇总了Python中torch.gather函数的典型用法代码示例。如果您正苦于以下问题:Python gather函数的具体用法?Python gather怎么用?Python gather使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了gather函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: eliminate_rows
def eliminate_rows(self, prob_sc, ind, phis):
""" eliminate rows of phis and prob_matrix scale """
length = prob_sc.size()[1]
mask = (prob_sc[:, :, 0] > 0.85).type(dtype)
rang = (Variable(torch.range(0, length - 1).unsqueeze(0)
.expand_as(mask)).
type(dtype))
ind_sc = torch.sort(rang * (1-mask) + length * mask, 1)[1]
# permute prob_sc
m = mask.unsqueeze(2).expand_as(prob_sc)
mm = m.clone()
mm[:, :, 1:] = 0
prob_sc = (torch.gather(prob_sc * (1 - m) + mm, 1,
ind_sc.unsqueeze(2).expand_as(prob_sc)))
# compose permutations
ind = torch.gather(ind, 1, ind_sc)
active = torch.gather(1-mask, 1, ind_sc)
# permute phis
active1 = active.unsqueeze(2).expand_as(phis)
ind1 = ind.unsqueeze(2).expand_as(phis)
active2 = active.unsqueeze(1).expand_as(phis)
ind2 = ind.unsqueeze(1).expand_as(phis)
phis_out = torch.gather(phis, 1, ind1) * active1
phis_out = torch.gather(phis_out, 2, ind2) * active2
return prob_sc, ind, phis_out, active
开发者ID:ParsonsZeng,项目名称:DiCoNet,代码行数:25,代码来源:DCN.py
示例2: sample_relax_given_class
def sample_relax_given_class(logits, samp):
cat = Categorical(logits=logits)
u = torch.rand(B,C).clamp(1e-8, 1.-1e-8)
gumbels = -torch.log(-torch.log(u))
z = logits + gumbels
b = samp #torch.argmax(z, dim=1)
logprob = cat.log_prob(b).view(B,1)
u_b = torch.gather(input=u, dim=1, index=b.view(B,1))
z_tilde_b = -torch.log(-torch.log(u_b))
z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b))
z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)
z = z_tilde
u_b = torch.gather(input=u, dim=1, index=b.view(B,1))
z_tilde_b = -torch.log(-torch.log(u_b))
u = torch.rand(B,C).clamp(1e-8, 1.-1e-8)
z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits, dim=1)) - torch.log(u_b))
z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)
return z, z_tilde, logprob
开发者ID:chriscremer,项目名称:Other_Code,代码行数:29,代码来源:plotting_cat_grads_dist.py
示例3: hard_example_mining
def hard_example_mining(dist_mat, labels, return_inds=False):
"""For each anchor, find the hardest positive and negative sample.
Args:
dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N]
labels: pytorch LongTensor, with shape [N]
return_inds: whether to return the indices. Save time if `False`(?)
Returns:
dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
dist_an: pytorch Variable, distance(anchor, negative); shape [N]
p_inds: pytorch LongTensor, with shape [N];
indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
n_inds: pytorch LongTensor, with shape [N];
indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
NOTE: Only consider the case in which all labels have same num of samples,
thus we can cope with all anchors in parallel.
"""
assert len(dist_mat.size()) == 2
assert dist_mat.size(0) == dist_mat.size(1)
N = dist_mat.size(0)
# shape [N, N]
is_pos = labels.expand(N, N).eq(labels.expand(N, N).t())
is_neg = labels.expand(N, N).ne(labels.expand(N, N).t())
# `dist_ap` means distance(anchor, positive)
# both `dist_ap` and `relative_p_inds` with shape [N, 1]
dist_ap, relative_p_inds = torch.max(
dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True)
# `dist_an` means distance(anchor, negative)
# both `dist_an` and `relative_n_inds` with shape [N, 1]
dist_an, relative_n_inds = torch.min(
dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True)
# shape [N]
dist_ap = dist_ap.squeeze(1)
dist_an = dist_an.squeeze(1)
if return_inds:
# shape [N, N]
ind = (labels.new().resize_as_(labels)
.copy_(torch.arange(0, N).long())
.unsqueeze( 0).expand(N, N))
# shape [N, 1]
p_inds = torch.gather(
ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data)
n_inds = torch.gather(
ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data)
# shape [N]
p_inds = p_inds.squeeze(1)
n_inds = n_inds.squeeze(1)
return dist_ap, dist_an, p_inds, n_inds
return dist_ap, dist_an
开发者ID:ChunfeiMa,项目名称:AlignedReID-Re-Production-Pytorch,代码行数:53,代码来源:loss.py
示例4: sort_by_embeddings
def sort_by_embeddings(self, Phis, Inputs_N, e):
ind = torch.sort(e, 1)[1].squeeze()
for i, phis in enumerate(Phis):
# rearange phis
phis_out = (torch.gather(Phis[i], 1, ind.unsqueeze(2)
.expand_as(phis)))
Phis[i] = (torch.gather(phis_out, 2, ind.unsqueeze(1)
.expand_as(phis)))
# rearange inputs
Inputs_N[i] = torch.gather(Inputs_N[i], 1,
ind.unsqueeze(2).expand_as(Inputs_N[i]))
return Phis, Inputs_N
开发者ID:ParsonsZeng,项目名称:DiCoNet,代码行数:12,代码来源:DCN.py
示例5: proposal_layer
def proposal_layer(self, rpn_class, rpn_bbox):
# handling proposals
scores = rpn_class[:, :, 1]
# Box deltas [batch, num_rois, 4]
deltas_mul = Variable(torch.from_numpy(np.reshape(
self.config.RPN_BBOX_STD_DEV, [1, 1, 4]).astype(np.float32))).cuda()
deltas = rpn_bbox * deltas_mul
pre_nms_limit = min(6000, self.anchors.shape[0])
scores, ix = torch.topk(scores, pre_nms_limit, dim=-1,
largest=True, sorted=True)
ix = torch.unsqueeze(ix, 2)
ix = torch.cat([ix, ix, ix, ix], dim=2)
deltas = torch.gather(deltas, 1, ix)
_anchors = []
for i in range(self.config.IMAGES_PER_GPU):
anchors = Variable(torch.from_numpy(
self.anchors.astype(np.float32))).cuda()
_anchors.append(anchors)
anchors = torch.stack(_anchors, 0)
pre_nms_anchors = torch.gather(anchors, 1, ix)
refined_anchors = apply_box_deltas_graph(pre_nms_anchors, deltas)
# Clip to image boundaries. [batch, N, (y1, x1, y2, x2)]
height, width = self.config.IMAGE_SHAPE[:2]
window = np.array([0, 0, height, width]).astype(np.float32)
window = Variable(torch.from_numpy(window)).cuda()
refined_anchors_clipped = clip_boxes_graph(refined_anchors, window)
refined_proposals = []
for i in range(self.config.IMAGES_PER_GPU):
indices = nms(
torch.cat([refined_anchors_clipped.data[i], scores.data[i]], 1), 0.7)
indices = indices[:self.proposal_count]
indices = torch.stack([indices, indices, indices, indices], dim=1)
indices = Variable(indices).cuda()
proposals = torch.gather(refined_anchors_clipped[i], 0, indices)
padding = self.proposal_count - proposals.size()[0]
proposals = torch.cat(
[proposals, Variable(torch.zeros([padding, 4])).cuda()], 0)
refined_proposals.append(proposals)
rpn_rois = torch.stack(refined_proposals, 0)
return rpn_rois
开发者ID:huanglizhi,项目名称:Pytorch_Mask_RCNN,代码行数:51,代码来源:mask_rcnn.py
示例6: _score_sentence
def _score_sentence(self, scores, mask, tags):
"""
input:
scores: variable (seq_len, batch, tag_size, tag_size)
mask: (batch, seq_len)
tags: tensor (batch, seq_len)
output:
score: sum of score for gold sequences within whole batch
"""
# Gives the score of a provided tag sequence
batch_size = scores.size(1)
seq_len = scores.size(0)
tag_size = scores.size(2)
## convert tag value into a new format, recorded label bigram information to index
new_tags = autograd.Variable(torch.LongTensor(batch_size, seq_len))
if self.gpu:
new_tags = new_tags.cuda()
for idx in range(seq_len):
if idx == 0:
## start -> first score
new_tags[:,0] = (tag_size - 2)*tag_size + tags[:,0]
else:
new_tags[:,idx] = tags[:,idx-1]*tag_size + tags[:,idx]
## transition for label to STOP_TAG
end_transition = self.transitions[:,STOP_TAG].contiguous().view(1, tag_size).expand(batch_size, tag_size)
## length for batch, last word position = length - 1
length_mask = torch.sum(mask.long(), dim = 1).view(batch_size,1).long()
## index the label id of last word
end_ids = torch.gather(tags, 1, length_mask - 1)
## index the transition score for end_id to STOP_TAG
end_energy = torch.gather(end_transition, 1, end_ids)
## convert tag as (seq_len, batch_size, 1)
new_tags = new_tags.transpose(1,0).contiguous().view(seq_len, batch_size, 1)
### need convert tags id to search from 400 positions of scores
tg_energy = torch.gather(scores.view(seq_len, batch_size, -1), 2, new_tags).view(seq_len, batch_size) # seq_len * bat_size
## mask transpose to (seq_len, batch_size)
tg_energy = tg_energy.masked_select(mask.transpose(1,0))
# ## calculate the score from START_TAG to first label
# start_transition = self.transitions[START_TAG,:].view(1, tag_size).expand(batch_size, tag_size)
# start_energy = torch.gather(start_transition, 1, tags[0,:])
## add all score together
# gold_score = start_energy.sum() + tg_energy.sum() + end_energy.sum()
gold_score = tg_energy.sum() + end_energy.sum()
return gold_score
开发者ID:chongp,项目名称:Name-Entity-Recognition,代码行数:50,代码来源:crf.py
示例7: word_pre_train_forward
def word_pre_train_forward(self, sentence, position):
"""
output of forward language model
args:
sentence (char_seq_len, batch_size): char-level representation of sentence
position (word_seq_len, batch_size): position of blank space in char-level representation of sentence
"""
embeds = self.char_embeds(sentence)
d_embeds = self.dropout(embeds)
lstm_out, hidden = self.forw_char_lstm(d_embeds)
tmpsize = position.size()
position = position.unsqueeze(2).expand(tmpsize[0], tmpsize[1], self.char_hidden_dim)
select_lstm_out = torch.gather(lstm_out, 0, position)
d_lstm_out = self.dropout(select_lstm_out).view(-1, self.char_hidden_dim)
if self.if_highway:
char_out = self.forw2word(d_lstm_out)
d_char_out = self.dropout(char_out)
else:
d_char_out = d_lstm_out
pre_score = self.word_pre_train_out(d_char_out)
return pre_score, hidden
开发者ID:qlwang25,项目名称:HSCRF-pytorch,代码行数:27,代码来源:word_rep_layer.py
示例8: decode_with_crf
def decode_with_crf(crf, word_reps, mask_v, l_map):
"""
decode with viterbi algorithm and return score
"""
seq_len = word_reps.size(0)
bat_size = word_reps.size(1)
decoded_crf = crf.decode(word_reps, mask_v)
scores = crf.cal_score(word_reps).data
mask_v = mask_v.data
decoded_crf = decoded_crf.data
decoded_crf_withpad = torch.cat((torch.cuda.LongTensor(1,bat_size).fill_(l_map['<start>']), decoded_crf), 0)
decoded_crf_withpad = decoded_crf_withpad.transpose(0,1).cpu().numpy()
label_size = len(l_map)
bi_crf = []
cur_len = decoded_crf_withpad.shape[1]-1
for i_l in decoded_crf_withpad:
bi_crf.append([i_l[ind] * label_size + i_l[ind + 1] for ind in range(0, cur_len)] + [
i_l[cur_len] * label_size + l_map['<pad>']])
bi_crf = torch.cuda.LongTensor(bi_crf).transpose(0,1).unsqueeze(2)
tg_energy = torch.gather(scores.view(seq_len, bat_size, -1), 2, bi_crf).view(seq_len, bat_size) # seq_len * bat_size
tg_energy = tg_energy.transpose(0,1).masked_select(mask_v.transpose(0,1))
tg_energy = tg_energy.cpu().numpy()
masks = mask_v.sum(0)
crf_result_scored_by_crf = []
start = 0
for i, mask in enumerate(masks):
end = start + mask
crf_result_scored_by_crf.append(tg_energy[start:end].sum())
start = end
crf_result_scored_by_crf = np.array(crf_result_scored_by_crf)
return decoded_crf.cpu().transpose(0,1).numpy(), crf_result_scored_by_crf
开发者ID:qlwang25,项目名称:HSCRF-pytorch,代码行数:35,代码来源:utils.py
示例9: _compute_loss
def _compute_loss(self, batch, output, target):
scores = self.generator(self._bottle(output))
gtruth = target.view(-1)
if self.confidence < 1:
tdata = gtruth.data
mask = torch.nonzero(tdata.eq(self.padding_idx)).squeeze()
log_likelihood = torch.gather(scores.data, 1, tdata.unsqueeze(1))
tmp_ = self.one_hot.repeat(gtruth.size(0), 1)
tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence)
if mask.dim() > 0:
log_likelihood.index_fill_(0, mask, 0)
tmp_.index_fill_(0, mask, 0)
gtruth = Variable(tmp_, requires_grad=False)
loss = self.criterion(scores, gtruth)
if self.confidence < 1:
# Default: report smoothed ppl.
# loss_data = -log_likelihood.sum(0)
loss_data = loss.data.clone()
else:
loss_data = loss.data.clone()
stats = self._stats(loss_data, scores.data, target.view(-1).data)
return loss, stats
开发者ID:xiamengzhou,项目名称:OpenNMT-py,代码行数:25,代码来源:Loss.py
示例10: forward
def forward(self, log_prob, y_true, mask):
mask = mask.float()
log_P = torch.gather(log_prob.view(-1, log_prob.size(2)), 1, y_true.contiguous().view(-1, 1)) # batch*time x 1
log_P = log_P.view(y_true.size(0), y_true.size(1)) # batch x time
log_P = log_P * mask # batch x time
sum_log_P = torch.sum(log_P, dim=1) / torch.sum(mask, dim=1) # batch
return -sum_log_P
开发者ID:zhhengcs,项目名称:seq2seq-keyphrase-pytorch,代码行数:7,代码来源:eric_layers.py
示例11: get_max_q_values_with_target
def get_max_q_values_with_target(
self, q_values, q_values_target, possible_actions_mask
):
"""
Used in Q-learning update.
:param states: Numpy array with shape (batch_size, state_dim). Each row
contains a representation of a state.
:param possible_actions_mask: Numpy array with shape (batch_size, action_dim).
possible_actions[i][j] = 1 iff the agent can take action j from
state i.
:param double_q_learning: bool to use double q-learning
"""
# The parametric DQN can create flattened q values so we reshape here.
q_values = q_values.reshape(possible_actions_mask.shape)
q_values_target = q_values_target.reshape(possible_actions_mask.shape)
if self.double_q_learning:
# Set q-values of impossible actions to a very large negative number.
inverse_pna = 1 - possible_actions_mask
impossible_action_penalty = self.ACTION_NOT_POSSIBLE_VAL * inverse_pna
q_values = q_values + impossible_action_penalty
# Select max_q action after scoring with online network
max_q_values, max_indicies = torch.max(q_values, dim=1, keepdim=True)
# Use q_values from target network for max_q action from online q_network
# to decouble selection & scoring, preventing overestimation of q-values
q_values = torch.gather(q_values_target, 1, max_indicies)
return q_values, max_indicies
else:
return self.get_max_q_values(q_values, possible_actions_mask)
开发者ID:sra4077,项目名称:Horizon,代码行数:30,代码来源:dqn_trainer_base.py
示例12: forward
def forward(self, ctx_dict, y):
"""Computes the softmax outputs given source annotations `ctxs` and
ground-truth target token indices `y`.
Arguments:
ctxs(Variable): A variable of `S*B*ctx_dim` representing the source
annotations in an order compatible with ground-truth targets.
y(Variable): A variable of `T*B` containing ground-truth target
token indices for the given batch.
"""
loss = 0.0
# Convert token indices to embeddings -> T*B*E
y_emb = self.emb(y)
# Get initial hidden state
h = self.f_init(*ctx_dict['txt'])
# -1: So that we skip the timestep where input is <eos>
for t in range(y_emb.shape[0] - 1):
log_p, h = self.f_next(ctx_dict, y_emb[t], h)
loss += torch.gather(
log_p, dim=1, index=y[t + 1].unsqueeze(1)).sum()
return loss
开发者ID:codealphago,项目名称:nmtpytorch,代码行数:25,代码来源:condmm_decoder.py
示例13: masked_cross_entropy
def masked_cross_entropy(logits, target, length):
length = Variable(torch.LongTensor(length)).cuda()
"""
Args:
logits: A Variable containing a FloatTensor of size
(batch, max_len, num_classes) which contains the
unnormalized probability for each class.
target: A Variable containing a LongTensor of size
(batch, max_len) which contains the index of the true
class for each corresponding step.
length: A Variable containing a LongTensor of size (batch,)
which contains the length of each data in a batch.
Returns:
loss: An average loss value masked by the length.
"""
# logits_flat: (batch * max_len, num_classes)
logits_flat = logits.view(-1, logits.size(-1))
# log_probs_flat: (batch * max_len, num_classes)
log_probs_flat = functional.log_softmax(logits_flat)
# target_flat: (batch * max_len, 1)
target_flat = target.view(-1, 1)
# losses_flat: (batch * max_len, 1)
losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
# losses: (batch, max_len)
losses = losses_flat.view(*target.size())
# mask: (batch, max_len)
mask = sequence_mask(sequence_length=length, max_len=target.size(1))
losses = losses * mask.float()
loss = losses.sum() / length.float().sum()
return loss
开发者ID:Cadene,项目名称:practical-pytorch,代码行数:33,代码来源:masked_cross_entropy.py
示例14: forward
def forward(self, batch):
X_data, X_padding_mask, X_lens, X_batch_extend_vocab, X_extra_zeros, context, coverage = self.get_input_from_batch(batch)
y_data, y_padding_mask, y_max_len, y_lens_var, target_data = self.get_output_from_batch(batch)
encoder_outputs, encoder_hidden, max_encoder_output = self.encoder(X_data, X_lens)
s_t_1 = self.reduce_state(encoder_hidden)
if config.use_maxpool_init_ctx:
context = max_encoder_output
step_losses = []
for di in range(min(y_max_len, self.args.max_decoder_steps)):
y_t_1 = y_data[:, di] # Teacher forcing
final_dist, s_t_1, context, attn_dist, p_gen, coverage = self.decoder(y_t_1, s_t_1,
encoder_outputs, X_padding_mask, context,
X_extra_zeros, X_batch_extend_vocab,
coverage)
target = target_data[:, di]
gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze()
step_loss = -torch.log(gold_probs + self.args.eps)
if self.args.is_coverage:
step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1)
step_loss = step_loss + config.cov_loss_wt * step_coverage_loss
step_mask = y_padding_mask[:, di]
step_loss = step_loss * step_mask
step_losses.append(step_loss)
sum_losses = torch.sum(torch.stack(step_losses, 1), 1)
batch_avg_loss = sum_losses / y_lens_var
loss = torch.mean(batch_avg_loss)
return loss
开发者ID:coder352,项目名称:shellscript,代码行数:31,代码来源:seq2seq_original.py
示例15: forward
def forward(self, batch):
"""Forward method receives target-length ordered batches."""
# Encode image and get initial variables
img_ctx, c_t, h_t = self.f_init(batch)
# Fetch embeddings -> (seq_len, batch_size, emb_dim)
caption = batch[self.tl]
# n_tokens token processed in this batch
self.n_tokens = caption.numel()
# Get embeddings
embs = self.emb(caption)
# Accumulators
loss = 0.0
self.alphas = []
# -1: So that we skip the timestep where input is <eos>
for t in range(caption.shape[0] - 1):
# NOTE: This is where scheduled sampling will happen
# Either fetch from self.emb or from log_p
# Current textual input to decoder: y_t = embs[t]
log_p, c_t, h_t, _ = self.f_next(img_ctx, embs[t], c_t, h_t)
# t + 1: We're predicting next token
# Cumulate losses
loss += torch.gather(
log_p, dim=1, index=caption[t + 1].unsqueeze(1)).sum()
# Return normalized loss
return loss / self.n_tokens
开发者ID:codealphago,项目名称:nmtpytorch,代码行数:33,代码来源:sat.py
示例16: eval_one_batch
def eval_one_batch(self, batch):
enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \
get_input_from_batch(batch, use_cuda)
dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \
get_output_from_batch(batch, use_cuda)
encoder_outputs, encoder_hidden, max_encoder_output = self.model.encoder(enc_batch, enc_lens)
s_t_1 = self.model.reduce_state(encoder_hidden)
if config.use_maxpool_init_ctx:
c_t_1 = max_encoder_output
step_losses = []
for di in range(min(max_dec_len, config.max_dec_steps)):
y_t_1 = dec_batch[:, di] # Teacher forcing
final_dist, s_t_1, c_t_1,attn_dist, p_gen, coverage = self.model.decoder(y_t_1, s_t_1,
encoder_outputs, enc_padding_mask, c_t_1,
extra_zeros, enc_batch_extend_vocab, coverage)
target = target_batch[:, di]
gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze()
step_loss = -torch.log(gold_probs + config.eps)
if config.is_coverage:
step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1)
step_loss = step_loss + config.cov_loss_wt * step_coverage_loss
step_mask = dec_padding_mask[:, di]
step_loss = step_loss * step_mask
step_losses.append(step_loss)
sum_step_losses = torch.sum(torch.stack(step_losses, 1), 1)
batch_avg_loss = sum_step_losses / dec_lens_var
loss = torch.mean(batch_avg_loss)
return loss.data[0]
开发者ID:sa7i,项目名称:pointer_summarizer,代码行数:34,代码来源:eval.py
示例17: lm_lstm
def lm_lstm(self, forw_sentence, forw_position, back_sentence, back_position, word_seq):
'''
return word representations with character-language-model
args:
forw_sentence (char_seq_len, batch_size) : char-level representation of sentence
forw_position (word_seq_len, batch_size) : position of blank space in char-level representation of sentence
back_sentence (char_seq_len, batch_size) : char-level representation of sentence (inverse order)
back_position (word_seq_len, batch_size) : position of blank space in inversed char-level representation of sentence
word_seq (word_seq_len, batch_size) : word-level representation of sentence
'''
self.set_batch_seq_size(forw_position)
forw_emb = self.char_embeds(forw_sentence)
back_emb = self.char_embeds(back_sentence)
d_f_emb = self.dropout(forw_emb)
d_b_emb = self.dropout(back_emb)
forw_lstm_out, _ = self.forw_char_lstm(d_f_emb)
back_lstm_out, _ = self.back_char_lstm(d_b_emb)
forw_position = forw_position.unsqueeze(2).expand(self.word_seq_length, self.batch_size, self.char_hidden_dim)
select_forw_lstm_out = torch.gather(forw_lstm_out, 0, forw_position)
back_position = back_position.unsqueeze(2).expand(self.word_seq_length, self.batch_size, self.char_hidden_dim)
select_back_lstm_out = torch.gather(back_lstm_out, 0, back_position)
fb_lstm_out = self.dropout(torch.cat((select_forw_lstm_out, select_back_lstm_out), dim=2))
if self.if_highway:
char_out = self.fb2char(fb_lstm_out)
d_char_out = self.dropout(char_out)
else:
d_char_out = fb_lstm_out
word_emb = self.word_embeds(word_seq)
d_word_emb = self.dropout(word_emb)
word_input = torch.cat((d_word_emb, d_char_out), dim=2)
lstm_out, _ = self.word_lstm_lm(word_input)
d_lstm_out = self.dropout(lstm_out)
return d_lstm_out
开发者ID:qlwang25,项目名称:HSCRF-pytorch,代码行数:47,代码来源:word_rep_layer.py
示例18: NN
def NN(epoch, net, lemniscate, trainloader, testloader, recompute_memory=0):
net.eval()
net_time = AverageMeter()
cls_time = AverageMeter()
losses = AverageMeter()
correct = 0.
total = 0
testsize = testloader.dataset.__len__()
trainFeatures = lemniscate.memory.t()
if hasattr(trainloader.dataset, 'imgs'):
trainLabels = torch.LongTensor([y for (p, y) in trainloader.dataset.imgs]).cuda()
else:
trainLabels = torch.LongTensor(trainloader.dataset.train_labels).cuda()
if recompute_memory:
transform_bak = trainloader.dataset.transform
trainloader.dataset.transform = testloader.dataset.transform
temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1)
for batch_idx, (inputs, targets, indexes) in enumerate(temploader):
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = Variable(inputs, volatile=True), Variable(targets)
batchSize = inputs.size(0)
features = net(inputs)
trainFeatures[:, batch_idx*batchSize:batch_idx*batchSize+batchSize] = features.data.t()
trainLabels = torch.LongTensor(temploader.dataset.train_labels).cuda()
trainloader.dataset.transform = transform_bak
end = time.time()
for batch_idx, (inputs, targets, indexes) in enumerate(testloader):
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = Variable(inputs, volatile=True), Variable(targets)
batchSize = inputs.size(0)
features = net(inputs)
net_time.update(time.time() - end)
end = time.time()
dist = torch.mm(features.data, trainFeatures)
yd, yi = dist.topk(1, dim=1, largest=True, sorted=True)
candidates = trainLabels.view(1,-1).expand(batchSize, -1)
retrieval = torch.gather(candidates, 1, yi)
retrieval = retrieval.narrow(1, 0, 1).clone().view(-1)
yd = yd.narrow(1, 0, 1)
total += targets.size(0)
correct += retrieval.eq(targets.data).cpu().sum()
cls_time.update(time.time() - end)
end = time.time()
print('Test [{}/{}]\t'
'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t'
'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t'
'Top1: {:.2f}'.format(
total, testsize, correct*100./total, net_time=net_time, cls_time=cls_time))
return correct/total
开发者ID:eglxiang,项目名称:lemniscate.pytorch,代码行数:59,代码来源:test.py
示例19: forward
def forward(self, input, target, mask):
logprob_select = torch.gather(input, 1, target)
out = torch.masked_select(logprob_select, mask)
loss = -torch.sum(out) / mask.float().sum()
return loss
开发者ID:wooridle,项目名称:EmbodiedQA,代码行数:8,代码来源:models.py
示例20: forward
def forward(self, x_de, x_en, update_baseline=True):
bs = x_de.size(0)
# x_de is bs,n_de. x_en is bs,n_en
emb_de = self.embedding_de(x_de) # bs,n_de,word_dim
emb_en = self.embedding_en(x_en) # bs,n_en,word_dim
h0_enc = torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()
c0_enc = torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()
h0_dec = torch.zeros(self.n_layers, bs, self.hidden_dim).cuda()
c0_dec = torch.zeros(self.n_layers, bs, self.hidden_dim).cuda()
# hidden vars have dimension n_layers*n_directions,bs,hiddensz
enc_h, _ = self.encoder(emb_de, (Variable(h0_enc), Variable(c0_enc)))
# enc_h is bs,n_de,hiddensz*n_directions. ordering is different from last week because batch_first=True
dec_h, _ = self.decoder(emb_en, (Variable(h0_dec), Variable(c0_dec)))
# dec_h is bs,n_en,hidden_size*n_directions
# we've gotten our encoder/decoder hidden states so we are ready to do attention
# first let's get all our scores, which we can do easily since we are using dot-prod attention
if self.directions == 2:
scores = torch.bmm(self.dim_reduce(enc_h), dec_h.transpose(1,2))
# TODO: any easier ways to reduce dimension?
else:
scores = torch.bmm(enc_h, dec_h.transpose(1,2))
# (bs,n_de,hiddensz*n_directions) * (bs,hiddensz*n_directions,n_en) = (bs,n_de,n_en)
reinforce_loss = 0 # we only use this variable for hard attention
loss = 0
avg_reward = 0
# we just iterate to dec_h.size(1)-1, since there's </s> at the end of each sentence
for t in range(dec_h.size(1)-1): # iterate over english words, with teacher forcing
attn_dist = F.softmax(scores[:, :, t],dim=1) # bs,n_de. these are the alphas (attention scores for each german word)
if self.attn_type == "hard":
cat = torch.distributions.Categorical(attn_dist)
attn_samples = cat.sample() # bs. each element is a sample from categorical distribution
one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, attn_samples.data.unsqueeze(1), 1).cuda()) # bs,n_de
# made a bunch of one-hot vectors
context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)
# now we use the one-hot vectors to select correct hidden vectors from enc_h
# (bs,1,n_de) * (bs,n_de,hiddensz*n_directions) = (bs,1,hiddensz*n_directions). squeeze to bs,hiddensz*n_directions
else:
context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1) # same dimensions
# (bs,1,n_de) * (bs,n_de,hiddensz*n_directions) = (bs,1,hiddensz*n_directions)
# context is bs,hidden_size*n_directions
# the rnn output and the context together make the decoder "hidden state", which is bs,2*hidden_size*n_directions
pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab)
y = x_en[:, t+1] # bs. these are our labels
no_pad = (y != pad_token) # exclude english padding tokens
reward = torch.gather(pred, 1, y.unsqueeze(1)) # bs,1
# reward[i,1] = pred[i,y[i]]. this gets log prob of correct word for each batch. similar to -crossentropy
reward = reward.squeeze(1)[no_pad] # less than bs
if self.attn_type == "hard":
reinforce_loss -= (cat.log_prob(attn_samples[no_pad]) * (reward-self.baseline).detach()).sum()
# reinforce rule (just read the formula), with special baseline
loss -= reward.sum() # minimizing loss is maximizing reward
no_pad_total = (x_en[:,1:] != pad_token).data.sum() # TODO: i think this is right, right?
loss /= no_pad_total
reinforce_loss /= no_pad_total
avg_reward = -loss.data[0]
if update_baseline: # update baseline as a moving average
self.baseline = Variable(0.95*self.baseline.data + 0.05*avg_reward)
return loss, reinforce_loss,avg_reward
开发者ID:anihamde,项目名称:cs287-s18,代码行数:58,代码来源:models_original.py
注:本文中的torch.gather函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论