本文整理汇总了Python中tensorflow.python.ops.rnn_cell.linear函数的典型用法代码示例。如果您正苦于以下问题:Python linear函数的具体用法?Python linear怎么用?Python linear使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了linear函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __call__
def __call__(self, inputs, state, scope=None):
gru_out, gru_state = super(GRUCellAttn, self).__call__(inputs, state, scope)
with vs.variable_scope(scope or type(self).__name__):
with vs.variable_scope("Attn2"):
gamma_h = tanh(rnn_cell.linear(gru_out, self._num_units, True, 1.0))
weights = tf.reduce_sum(self.phi_hs * gamma_h, reduction_indices=2, keep_dims=True)
weights = tf.exp(weights - tf.reduce_max(weights, reduction_indices=0, keep_dims=True))
weights = weights / (1e-6 + tf.reduce_sum(weights, reduction_indices=0, keep_dims=True))
context = tf.reduce_sum(self.hs * weights, reduction_indices=0)
with vs.variable_scope("AttnConcat"):
out = tf.nn.relu(rnn_cell.linear([context, gru_out], self._num_units, True, 1.0))
self.attn_map = tf.squeeze(tf.slice(weights, [0, 0, 0], [-1, -1, 1]))
return (out, out)
开发者ID:hrishikeshvganu,项目名称:nlc,代码行数:13,代码来源:nlc_model.py
示例2: __call__
def __call__(self, inputs, state, episodic_gate, scope=None):
"""Gated recurrent unit (GRU) with nunits cells."""
with vs.variable_scope("MGRUCell"): # "GRUCell"
with vs.variable_scope("Gates"): # Reset gate and update gate.
# We start with bias of 1.0 to not reset and not update.
r = rnn_cell.linear([inputs, state], self._num_units, True, 1.0, scope=scope)
r = sigmoid(r)
with vs.variable_scope("Candidate"):
c = tanh(rnn_cell.linear([inputs, r * state], self._num_units, True))
new_h = tf.mul(episodic_gate, c) + tf.mul((1 - episodic_gate), state)
return new_h, new_h
开发者ID:sufengniu,项目名称:DMN-tensorflow,代码行数:13,代码来源:cell.py
示例3: downscale
def downscale(self, inp):
with vs.variable_scope("Downscale"):
inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
out3d = tf.reshape(out2d, [self.batch_size, -1, self.size])
out3d = tf.transpose(out3d, perm=[1, 0, 2])
out = tanh(out3d)
return out
开发者ID:nipengmath,项目名称:nlc,代码行数:8,代码来源:nlc_model.py
示例4: __init__
def __init__(self, num_units, encoder_output, scope=None):
self.hs = encoder_output
with vs.variable_scope(scope or type(self).__name__):
with vs.variable_scope("Attn1"):
hs2d = tf.reshape(self.hs, [-1, num_units])
phi_hs2d = tanh(rnn_cell.linear(hs2d, num_units, True, 1.0))
self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
super(GRUCellAttn, self).__init__(num_units)
开发者ID:hrishikeshvganu,项目名称:nlc,代码行数:8,代码来源:nlc_model.py
示例5: attention
def attention(query):
"""Point on hidden using hidden_features and query."""
with vs.variable_scope("Attention"):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v * math_ops.tanh(hidden_features + y), [2, 3])
return s
开发者ID:heshizhu,项目名称:TensorFlow-Pointer-Networks,代码行数:9,代码来源:pointer.py
示例6: testLinear
def testLinear(self):
with self.test_session() as sess:
with tf.variable_scope("root", initializer=tf.constant_initializer(1.0)):
x = tf.zeros([1, 2])
l = linear([x], 2, False)
sess.run([tf.initialize_all_variables()])
res = sess.run([l], {x.name: np.array([[1., 2.]])})
self.assertAllClose(res[0], [[3.0, 3.0]])
# Checks prevent you from accidentally creating a shared function.
with self.assertRaises(ValueError):
l1 = linear([x], 2, False)
# But you can create a new one in a new scope and share the variables.
with tf.variable_scope("l1") as new_scope:
l1 = linear([x], 2, False)
with tf.variable_scope(new_scope, reuse=True):
linear([l1], 2, False)
self.assertEqual(len(tf.trainable_variables()), 2)
开发者ID:0-T-0,项目名称:tensorflow,代码行数:19,代码来源:rnn_cell_test.py
示例7: build_encoder
def build_encoder(self):
"""Inference Network. q(h|X)"""
with tf.variable_scope("encoder"):
self.l1_lin = linear(tf.expand_dims(self.x, 0), self.embed_dim, bias=True, scope="l1")
self.l1 = tf.nn.relu(self.l1_lin)
self.l2_lin = linear(self.l1, self.embed_dim, bias=True, scope="l2")
self.l2 = tf.nn.relu(self.l2_lin)
self.mu = linear(self.l2, self.h_dim, bias=True, scope="mu")
self.log_sigma_sq = linear(self.l2, self.h_dim, bias=True, scope="log_sigma_sq")
self.eps = tf.random_normal((1, self.h_dim), 0, 1, dtype=tf.float32)
self.sigma = tf.sqrt(tf.exp(self.log_sigma_sq))
self.h = tf.add(self.mu, tf.mul(self.sigma, self.eps))
_ = tf.histogram_summary("mu", self.mu)
_ = tf.histogram_summary("sigma", self.sigma)
_ = tf.histogram_summary("h", self.h)
_ = tf.histogram_summary("mu + sigma", self.mu + self.sigma)
开发者ID:tonydeep,项目名称:variational-text-tensorflow,代码行数:21,代码来源:nvdm.py
示例8: attention
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
with vs.variable_scope("Attention"):
# Attention mask is a softmax of h_in^T*decoder_hidden.
dec_hid = array_ops.tile(query, [1, attn_length]) # replicate query for element-wise multiplication
dec_hid = array_ops.reshape(dec_hid, [-1, attn_length, attention_vec_size])
attn_weight = nn_ops.softmax(math_ops.reduce_sum(attention_states*dec_hid, [2])) # attn weights for every hidden states in encoder
# Now calculate the attention-weighted vector (context vector) cc.
cc = math_ops.reduce_sum(array_ops.reshape(attn_weight, [-1, attn_length, 1, 1])*hidden, [1,2])
# attented hidden state
with vs.variable_scope("AttnW1"):
term1 = rnn_cell.linear(query, attn_size, False)
with vs.variable_scope("AttnW2"):
term2 = rnn_cell.linear(cc, attn_size, False)
# environment representation
if env: # 2D Tensor of shape [batch_size, env_size]
with vs.variable_scope("Environment"):
term3 = rnn_cell.linear(math_ops.to_float(env), attn_size, False)
h_attn = math_ops.tanh(term1 + term2 + term3)
else:
h_attn = math_ops.tanh(term1 + term2)
return h_attn, attn_weight
开发者ID:LittleYUYU,项目名称:RobotNavigateNLP,代码行数:22,代码来源:seq2seq_ops.py
示例9: attention
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
ds = [] # Results of attention reads will be stored here.
for a in xrange(num_heads):
with variable_scope.variable_scope("Attention_%d" % a):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
a = nn_ops.softmax(s)
# Now calculate the attention-weighted vector d.
d = math_ops.reduce_sum(array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2])
ds.append(array_ops.reshape(d, [-1, attn_size]))
return ds
开发者ID:sherrym,项目名称:tensorflow,代码行数:14,代码来源:seq2seq.py
示例10: setup_label_loss
def setup_label_loss(self):
with vs.variable_scope("LabelLogistic"):
doshape = tf.shape(self.decoder_output)
T, batch_size = doshape[0], doshape[1]
# [batch_size, cell.state_size]
# decoder_output: [batch_size, time_step, cell.state_size]
last_state = self.decoder_output[:, -1, :]
# projecting to label space
# [batch_size, label_size]
logits = rnn_cell.linear(last_state, self.label_size, True, 1.0)
self.losses = tf.nn.softmax_cross_entropy_with_logits(logits, self.label_placeholder)
self.predictions = logits
开发者ID:windweller,项目名称:Trident,代码行数:14,代码来源:story_model.py
示例11: setup_loss
def setup_loss(self):
with vs.variable_scope("Logistic"):
do2d = tf.reshape(self.decoder_output, [-1, self.size])
logits2d = rnn_cell.linear(do2d, self.vocab_size, True, 1.0)
outputs2d = tf.nn.softmax(logits2d)
self.outputs = tf.reshape(outputs2d, [-1, self.batch_size, self.vocab_size])
targets_no_GO = tf.slice(self.target_tokens, [1, 0], [-1, -1])
masks_no_GO = tf.slice(self.target_mask, [1, 0], [-1, -1])
# easier to pad target/mask than to split decoder input since tensorflow does not support negative indexing
labels1d = tf.reshape(tf.pad(targets_no_GO, [[0, 1], [0, 0]]), [-1])
mask1d = tf.reshape(tf.pad(masks_no_GO, [[0, 1], [0, 0]]), [-1])
losses1d = tf.nn.sparse_softmax_cross_entropy_with_logits(logits2d, labels1d) * tf.to_float(mask1d)
losses2d = tf.reshape(losses1d, [-1, self.batch_size])
self.losses = tf.reduce_sum(losses2d) / self.batch_size
开发者ID:wanjinchang,项目名称:nlc,代码行数:15,代码来源:nlc_model.py
示例12: downscale
def downscale(self, inp, mask):
with vs.variable_scope("Downscale"):
inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
out3d = tf.reshape(out2d, [self.batch_size, -1, self.size])
out3d = tf.transpose(out3d, perm=[1, 0, 2])
out = tanh(out3d)
mask = tf.transpose(mask)
mask = tf.reshape(mask, [-1, 2])
mask = tf.cast(mask, tf.bool)
mask = tf.reduce_any(mask, reduction_indices=1)
mask = tf.to_int32(mask)
mask = tf.reshape(mask, [self.batch_size, -1])
mask = tf.transpose(mask)
return out, mask
开发者ID:wanjinchang,项目名称:nlc,代码行数:16,代码来源:nlc_model.py
示例13: basic_rnn_cell
def basic_rnn_cell(inputs, state, num_units, scope=None):
if state is None:
if inputs is not None:
batch_size = inputs.get_shape()[0]
dtype = inputs.dtype
else:
batch_size = 0
dtype = tf.float32
init_output = tf.zeros(tf.pack([batch_size, num_units]), dtype=dtype)
init_state = tf.zeros(tf.pack([batch_size, num_units]), dtype=dtype)
init_output.set_shape([batch_size, num_units])
init_state.set_shape([batch_size, num_units])
return init_output, init_state
else:
with tf.variable_op_scope([inputs, state], scope, "BasicRNNCell"):
output = tf.tanh(linear([inputs, state], num_units, True))
return output, output
开发者ID:RuhiSharma,项目名称:tensorflow,代码行数:17,代码来源:rnn_cell_test.py
示例14: downscale
def downscale(self, inp, mask):
with vs.variable_scope("Downscale"):
inshape = tf.shape(inp)
T, batch_size, dim = inshape[0], inshape[1], inshape[2]
inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
out3d = tf.reshape(out2d, tf.pack((batch_size, tf.to_int32(T/2), dim)))
out3d = tf.transpose(out3d, perm=[1, 0, 2])
out3d.set_shape([None, None, self.size])
out = tanh(out3d)
mask = tf.transpose(mask)
mask = tf.reshape(mask, [-1, 2])
mask = tf.cast(mask, tf.bool)
mask = tf.reduce_any(mask, reduction_indices=1)
mask = tf.to_int32(mask)
mask = tf.reshape(mask, tf.pack([batch_size, -1]))
mask = tf.transpose(mask)
return out, mask
开发者ID:hrishikeshvganu,项目名称:nlc,代码行数:19,代码来源:nlc_model.py
示例15: dnn
def dnn(tensor_in, hidden_units, activation=nn.relu, dropout=None):
"""Creates fully connected deep neural network subgraph.
Args:
tensor_in: tensor or placeholder for input features.
hidden_units: list of counts of hidden units in each layer.
activation: activation function between layers. Can be None.
dropout: if not None, will add a dropout layer with given probability.
Returns:
A tensor which would be a deep neural network.
"""
with vs.variable_scope('dnn'):
for i, n_units in enumerate(hidden_units):
with vs.variable_scope('layer%d' % i):
tensor_in = rnn_cell.linear(tensor_in, n_units, True)
if activation is not None:
tensor_in = activation(tensor_in)
if dropout is not None:
tensor_in = dropout_ops.dropout(tensor_in, prob=(1.0 - dropout))
return tensor_in
开发者ID:01bui,项目名称:tensorflow,代码行数:21,代码来源:dnn_ops.py
示例16: pointer_decoder
def pointer_decoder(decoder_inputs, initial_state, attention_states, cell,
feed_prev=True, dtype=dtypes.float32, scope=None):
"""RNN decoder with pointer net for the sequence-to-sequence model.
Args:
decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
initial_state: 2D Tensor [batch_size x cell.state_size].
attention_states: 3D Tensor [batch_size x attn_length x attn_size].
cell: rnn_cell.RNNCell defining the cell function and size.
dtype: The dtype to use for the RNN initial state (default: tf.float32).
scope: VariableScope for the created subgraph; default: "pointer_decoder".
Returns:
outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
[batch_size x output_size]. These represent the generated outputs.
Output i is computed from input i (which is either i-th decoder_inputs.
First, we run the cell
on a combination of the input and previous attention masks:
cell_output, new_state = cell(linear(input, prev_attn), prev_state).
Then, we calculate new attention masks:
new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
and then we calculate the output:
output = linear(cell_output, new_attn).
states: The state of each decoder cell in each time-step. This is a list
with length len(decoder_inputs) -- one item for each time-step.
Each item is a 2D Tensor of shape [batch_size x cell.state_size].
"""
if not decoder_inputs:
raise ValueError("Must provide at least 1 input to attention decoder.")
if not attention_states.get_shape()[1:2].is_fully_defined():
raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
% attention_states.get_shape())
with vs.variable_scope(scope or "point_decoder"):
batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping.
input_size = decoder_inputs[0].get_shape()[1].value
attn_length = attention_states.get_shape()[1].value
attn_size = attention_states.get_shape()[2].value
# To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
hidden = array_ops.reshape(
attention_states, [-1, attn_length, 1, attn_size])
attention_vec_size = attn_size # Size of query vectors for attention.
k = vs.get_variable("AttnW", [1, 1, attn_size, attention_vec_size])
hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
v = vs.get_variable("AttnV", [attention_vec_size])
states = [initial_state]
def attention(query):
"""Point on hidden using hidden_features and query."""
with vs.variable_scope("Attention"):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v * math_ops.tanh(hidden_features + y), [2, 3])
return s
outputs = []
prev = None
batch_attn_size = array_ops.pack([batch_size, attn_size])
attns = array_ops.zeros(batch_attn_size, dtype=dtype)
attns.set_shape([None, attn_size])
inps = []
for i in xrange(len(decoder_inputs)):
if i > 0:
vs.get_variable_scope().reuse_variables()
inp = decoder_inputs[i]
if feed_prev and i > 0:
inp = tf.pack(decoder_inputs)
inp = tf.transpose(inp, perm=[1, 0, 2])
inp = tf.reshape(inp, [-1, attn_length, input_size])
inp = tf.reduce_sum(inp * tf.reshape(tf.nn.softmax(output), [-1, attn_length, 1]), 1)
inp = tf.stop_gradient(inp)
inps.append(inp)
# Use the same inputs in inference, order internaly
# Merge input and previous attentions into one vector of the right size.
x = rnn_cell.linear([inp, attns], cell.input_size, True)
# Run the RNN.
cell_output, new_state = cell(x, states[-1])
states.append(new_state)
# Run the attention mechanism.
output = attention(new_state)
outputs.append(output)
return outputs, states, inps
开发者ID:heshizhu,项目名称:TensorFlow-Pointer-Networks,代码行数:91,代码来源:pointer.py
示例17: attention_decoder
def attention_decoder(decoder_inputs, initial_state, attention_states, cell,
output_size=None, num_heads=1, loop_function=None,
dtype=dtypes.float32, scope=None):
"""RNN decoder with attention for the sequence-to-sequence model.
Args:
decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
initial_state: 2D Tensor [batch_size x cell.state_size].
attention_states: 3D Tensor [batch_size x attn_length x attn_size].
cell: rnn_cell.RNNCell defining the cell function and size.
output_size: size of the output vectors; if None, we use cell.output_size.
num_heads: number of attention heads that read from attention_states.
loop_function: if not None, this function will be applied to i-th output
in order to generate i+1-th input, and decoder_inputs will be ignored,
except for the first element ("GO" symbol). This can be used for decoding,
but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
Signature -- loop_function(prev, i) = next
* prev is a 2D Tensor of shape [batch_size x cell.output_size],
* i is an integer, the step number (when advanced control is needed),
* next is a 2D Tensor of shape [batch_size x cell.input_size].
dtype: The dtype to use for the RNN initial state (default: tf.float32).
scope: VariableScope for the created subgraph; default: "attention_decoder".
Returns:
outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
[batch_size x output_size]. These represent the generated outputs.
Output i is computed from input i (which is either i-th decoder_inputs or
loop_function(output {i-1}, i)) as follows. First, we run the cell
on a combination of the input and previous attention masks:
cell_output, new_state = cell(linear(input, prev_attn), prev_state).
Then, we calculate new attention masks:
new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
and then we calculate the output:
output = linear(cell_output, new_attn).
states: The state of each decoder cell in each time-step. This is a list
with length len(decoder_inputs) -- one item for each time-step.
Each item is a 2D Tensor of shape [batch_size x cell.state_size].
Raises:
ValueError: when num_heads is not positive, there are no inputs, or shapes
of attention_states are not set.
"""
if not decoder_inputs:
raise ValueError("Must provide at least 1 input to attention decoder.")
if num_heads < 1:
raise ValueError("With less than 1 heads, use a non-attention decoder.")
if not attention_states.get_shape()[1:2].is_fully_defined():
raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
% attention_states.get_shape())
if output_size is None:
output_size = cell.output_size
with vs.variable_scope(scope or "attention_decoder"):
batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping.
attn_length = attention_states.get_shape()[1].value
attn_size = attention_states.get_shape()[2].value
# To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
hidden = array_ops.reshape(
attention_states, [-1, attn_length, 1, attn_size])
hidden_features = []
v = []
attention_vec_size = attn_size # Size of query vectors for attention.
for a in xrange(num_heads):
k = vs.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
v.append(vs.get_variable("AttnV_%d" % a, [attention_vec_size]))
states = [initial_state]
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
ds = [] # Results of attention reads will be stored here.
for a in xrange(num_heads):
with vs.variable_scope("Attention_%d" % a):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
a = nn_ops.softmax(s)
# Now calculate the attention-weighted vector d.
d = math_ops.reduce_sum(
array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
[1, 2])
ds.append(array_ops.reshape(d, [-1, attn_size]))
return ds
outputs = []
prev = None
batch_attn_size = array_ops.pack([batch_size, attn_size])
attns = [array_ops.zeros(batch_attn_size, dtype=dtype)
for _ in xrange(num_heads)]
for a in attns: # Ensure the second shape of attention vectors is set.
a.set_shape([None, attn_size])
for i in xrange(len(decoder_inputs)):
if i > 0:
vs.get_variable_scope().reuse_variables()
inp = decoder_inputs[i]
# If loop_function is set, we use it instead of decoder_inputs.
#.........这里部分代码省略.........
开发者ID:ttamada,项目名称:MachineTranslationWithVisualContexts,代码行数:101,代码来源:seq2seq.py
示例18: attention_encoder
def attention_encoder(decoder_inputs, initial_state, attention_states,
cell, num_heads=1,
output_size=None, dtype=dtypes.float32, scope=None,
initial_state_attention=False):
"""
Encoder that receives attention from another encoder
Parameters
----------
decoder_inputs:
second encoder's input we call it a decoder's input
it should be already wrapped by add_embedding()
it's A list of num_steps length 2D Tensors [batch_size, input_size = embed_size]
initial_state:
2D Tensor (batch_size x cell.state_size).
attention_states:
3D Tensor (batch_size x attn_length (seq_length) x attn_size)
cell
num_heads
output_size
dtype
scope
initial_state_attention
Returns
-------
A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 2D Tensors with
shape [batch_size x output_size] containing the generated outputs.
state: The state of each decoder cell at the final time-step.
It is a 2D Tensor of shape (batch_size x cell.state_size).
"""
decoder_inputs = [decoder_inputs] # in original model this is a bucket list of inputs
with vs.variable_scope(scope or "attention_encoder"):
batch_size = array_ops.shape(decoder_inputs[0])[0]
attn_length = attention_states.get_shape()[1].value
attn_size = attention_states.get_shape()[2].value
v = []
attention_vec_size = attn_size # Size of query vectors for attention.
hidden = array_ops.reshape(
attention_states, [-1, attn_length, 1, attn_size])
hidden_features = []
for a in xrange(num_heads):
k = vs.get_variable("AttnW_%d" % a,
[1, 1, attn_size, attention_vec_size])
hidden_features.append(tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
v.append(vs.get_variable("AttnV_%d" % a, [attention_vec_size]))
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
ds = [] # Results of attention reads will be stored here.
for a in xrange(num_heads):
with vs.variable_scope("Attention_%d" % a):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
a = tf.nn.softmax(s)
# Now calculate the attention-weighted vector d.
d = math_ops.reduce_sum(
array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
[1, 2])
ds.append(array_ops.reshape(d, [-1, attn_size]))
return ds
outputs = []
batch_attn_size = array_ops.pack([batch_size, attn_size])
attns = [array_ops.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads)]
for a in attns: # Ensure the second shape of attention vectors is set.
a.set_shape([None, attn_size])
if initial_state_attention:
attns = attention(initial_state)
state = initial_state
# this is now iterating on time steps
for i, inp in enumerate(decoder_inputs):
if i > 0:
vs.get_variable_scope().reuse_variables()
# Merge input and previous attentions into one vector of the right size.
x = rnn_cell.linear([inp] + attns, cell.input_size, True)
# Run the RNN.
cell_output, state = cell(x, state)
# Run the attention mechanism.
if i == 0 and initial_state_attention:
with vs.variable_scope(vs.get_variable_scope(), reuse=True):
attns = attention(state)
else:
attns = attention(state)
with vs.variable_scope("AttnOutputProjection"):
output = rnn_cell.linear([cell_output] + attns, output_size, True)
outputs.append(output)
#.........这里部分代码省略.........
开发者ID:windweller,项目名称:Trident,代码行数:101,代码来源:seq2seq.py
示例19: attention_decoder
def attention_decoder(decoder_inputs, initial_state, attention_states, cell,
output_size=None, num_heads=1, loop_function=None,
dtype=dtypes.float32, scope=None,
initial_state_attention=False):
"""RNN decoder with attention for the sequence-to-sequence model.
In this context "attention" means that, during decoding, the RNN can look up
information in the additional tensor attention_states, and it does this by
focusing on a few entries from the tensor. This model has proven to yield
especially good results in a number of sequence-to-sequence tasks. This
implementation is based on http://arxiv.org/abs/1412.7449 (see below for
details). It is recommended for complex sequence-to-sequence tasks.
Args:
decoder_inputs: A list of 2D Tensors [batch_size x cell.input_size].
initial_state: 2D Tensor [batch_size x cell.state_size].
attention_states: 3D Tensor [batch_size x attn_length x attn_size].
cell: rnn_cell.RNNCell defining the cell function and size.
output_size: Size of the output vectors; if None, we use cell.output_size.
num_heads: Number of attention heads that read from attention_states.
loop_function: If not None, this function will be applied to i-th output
in order to generate i+1-th input, and decoder_inputs will be ignored,
except for the first element ("GO" symbol). This can be used for decoding,
but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
Signature -- loop_function(prev, i) = next
* prev is a 2D Tensor of shape [batch_size x cell.output_size],
* i is an integer, the step number (when advanced control is needed),
* next is a 2D Tensor of shape [batch_size x cell.input_size].
dtype: The dtype to use for the RNN initial state (default: tf.float32).
scope: VariableScope for the created subgraph; default: "attention_decoder".
initial_state_attention: If False (default), initial attentions are zero.
If True, initialize the attentions from the initial state and attention
states -- useful when we wish to resume decoding from a previously
stored decoder state and attention states.
Returns:
A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 2D Tensors of
shape [batch_size x output_size]. These represent the generated outputs.
Output i is computed from input i (which is either the i-th element
of decoder_inputs or loop_function(output {i-1}, i)) as follows.
First, we run the cell on a combination of the input and previous
attention masks:
cell_output, new_state = cell(linear(input, prev_attn), prev_state).
Then, we calculate new attention masks:
new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
and then we calculate the output:
output = linear(cell_output, new_attn).
state: The state of each decoder cell the final time-step.
It is a 2D Tensor of shape [batch_size x cell.state_size].
Raises:
ValueError: when num_heads is not positive, there are no inputs, or shapes
of attention_states are not set.
"""
if not decoder_inputs:
raise ValueError("Must provide at least 1 input to attention decoder.")
if num_heads < 1:
raise ValueError("With less than 1 heads, use a non-attention decoder.")
if not attention_states.get_shape()[1:2].is_fully_defined():
raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
% attention_states.get_shape())
if output_size is None:
output_size = cell.output_size
with variable_scope.variable_scope(scope or "attention_decoder"):
batch_size = array_ops.shape(decoder_inputs[0])[0] # Needed for reshaping.
attn_length = attention_states.get_shape()[1].value
attn_size = attention_states.get_shape()[2].value
# To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
hidden = array_ops.reshape(
attention_states, [-1, attn_length, 1, attn_size])
hidden_features = []
v = []
attention_vec_size = attn_size # Size of query vectors for attention.
for a in xrange(num_heads):
k = variable_scope.get_variable("AttnW_%d" % a,
[1, 1, attn_size, attention_vec_size])
hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
v.append(variable_scope.get_variable("AttnV_%d" % a,
[attention_vec_size]))
state = initial_state
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
ds = [] # Results of attention reads will be stored here.
for a in xrange(num_heads):
with variable_scope.variable_scope("Attention_%d" % a):
y = rnn_cell.linear(query, attention_vec_size, True)
y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
# Attention mask is a softmax of v^T * tanh(...).
s = math_ops.reduce_sum(
v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
a = nn_ops.softmax(s)
# Now calculate the attention-weighted vector d.
d = math_ops.reduce_sum(
array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
[1, 2])
#.........这里部分代码省略.........
开发者ID:maxkarlovitz,项目名称:tensorflow,代码行数:101,代码来源:seq2seq.py
示例20: attention_decoder
def attention_decoder(decoder_inputs, initial_state, attention_states, cell, batch_size, state_size,
decoder_inputs_positions=None, decoder_inputs_maps=None, output_size=None, loop_function=None,
dtype=dtypes.float32, scope=None):
"""RNN decoder with attention for the sequence-to-sequence model.
Args:
decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. Embedded inputs.
initial_state: 2D Tensor [batch_size x cell.state_size].
attention_states: 3D Tensor [batch_size x attn_length x attn_size].
cell: rnn_cell.RNNCell defining the cell function and size.
batch_size: need to clarify batch size explicitly since env_state is updated one sample by one sample.
state_size: size of environment state.
decoder_inputs_positions: a list of 2D Tensors of shape [batch_size, 3],
indicating intial positions of each example in a map. Default None.
decoder_inputs_maps: a 1D Tensor of length batch_size indicating the map. Default None.
output_size: size of the output vectors; if None, we use cell.output_size.
loop_function: if not None, this function will be applied to i-th output
in order to generate i+1-th input, and decoder_inputs will be ignored,
except for the first element ("GO" symbol). This can be used for decoding,
but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
Signature -- loop_function(prev, i) = next
* prev is a 2D Tensor of shape [batch_size x cell.output_size],
* i is an integer, the step number (when advanced control is needed),
* next is a 2D Tensor of shape [batch_size x cell.input_size].
dtype: The dtype to use for the RNN initial state (default: tf.float32).
scope: VariableScope for the created subgraph; default: "attention_decoder".
Returns:
outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
[batch_size x output_size]. These represent the generated outputs.
Output i is computed from input i (which is either i-th decoder_inputs or
loop_function(output {i-1}, i)) as follows.
First, we run the cell on the current decoder input or feed from previous output:
cur_output, new_state = cell(input, prev_state).
Then, we calculate new attention masks:
new_attn = softmax(h_t^T * attention_states).
Thus, the context vector:
cont_vec = weighted_sum_of(attention_states), weighted by (new_attn),
and then we calculate the attended output:
attn_output = tanh(W1*current_output + W2*cont_vec + W3*env_state).
The finally output for prediction:
output = softmax(W*attn_output).
This "output" should be a 1D Tensor of shape [num_symbols].
Every item of the output refers to the probability of predicting certain symbol for the next step.
states: The state of each decoder cell in each time-step. This is a list
with length len(decoder_inputs) -- one item for each time-step.
Each item is a 2D Tensor of shape [batch_size x cell.state_size].
Raises:
ValueError: when num_heads is not positive, there are no inputs, or shapes
of attention_states are not set.
"""
if not decoder_inputs:
raise ValueError("Must provide at least 1 input to attention decoder.")
if not attention_states.get_shape()[1:2].is_fully_defined():
raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
% attention_states.get_shape())
if output_size is None:
output_size = cell.output_size
with vs.variable_scope(scope or "attention_decoder"):
attn_length = attention_states.get_shape()[1].value
attn_size = attention_states.get_shape()[2].value
mapIdx = array_ops.pack([map3.map_grid, map3.map_jelly, map3.map_one]) #map
attention_vec_size = attn_size # size of query
states = [initial_state]
# current position and environment
position, env = None, None
hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size]) # reshape for later computation
def attention(query):
"""Put attention masks on hidden using hidden_features and query."""
with vs.variable_scope("Attention"):
# Attention mask is a softmax of h_in^T*decoder_hidden.
dec_hid = array_ops.tile(query, [1, attn_length]) # replicate query for element-wise multiplication
dec_hid = array_ops.reshape(dec_hid, [-1, attn_length, attention_vec_size])
attn_weight = nn_ops.softmax(math_ops.reduce_sum(attention_states*dec_hid, [2])) # attn weights for every hidden states in encoder
# Now calculate the attention-weighted vector (context vector) cc.
cc = math_ops.reduce_sum(array_ops.reshape(attn_weight, [-1, attn_length, 1, 1])*hidden, [1,2])
# attented hidden state
with vs.variable_scope("AttnW1"):
term1 = rnn_cell.linear(query, attn_size, False)
with vs.variable_scope("AttnW2"):
term2 = rnn_cell.linear(cc, attn_size, False)
# environment representation
|
请发表评论