本文整理汇总了Python中tensorflow.one_hot函数的典型用法代码示例。如果您正苦于以下问题:Python one_hot函数的具体用法?Python one_hot怎么用?Python one_hot使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了one_hot函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _build_graph
def _build_graph(self, inputs, is_training):
state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state, is_training)
action_onehot = tf.one_hot(action, NUM_ACTIONS)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N,
max_pred_reward = tf.reduce_mean(tf.reduce_max(
self.predict_value, 1), name='predict_reward')
add_moving_summary(max_pred_reward)
self.greedy_choice = tf.argmax(self.predict_value, 1) # N,
with tf.variable_scope('target'):
targetQ_predict_value = self._get_DQN_prediction(next_state, False) # NxA
# DQN
#best_v = tf.reduce_max(targetQ_predict_value, 1) # N,
# Double-DQN
predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0)
best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)
target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v)
sqrcost = tf.square(target - pred_action_value)
abscost = tf.abs(target - pred_action_value) # robust error func
cost = tf.select(abscost < 1, sqrcost, abscost)
summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms']) ]) # monitor all W
self.cost = tf.reduce_mean(cost, name='cost')
开发者ID:xhrwang,项目名称:tensorpack,代码行数:28,代码来源:DQN.py
示例2: build
def build(self, sampling):
if sampling == True:
batch_size, num_steps = 1, 1
else:
batch_size = self.__batch_size
num_steps = self.__num_steps
tf_x = tf.placeholder(tf.int32, shape=[batch_size, num_steps], name='tf_x')
tf_y = tf.placeholder(tf.int32, shape=[batch_size, num_steps], name='tf_y')
tf_keepprob = tf.placeholder(tf.float32, name='tf_keepprob')
# one-hot encoding:
x_onehot = tf.one_hot(tf_x, depth=self.__num_classes)
y_onehot = tf.one_hot(tf_y, depth=self.__num_classes)
# build the multi-layer RNN cells
cells = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(self.__lstm_size), output_keep_prob=tf_keepprob) for _ in range(self.__num_layers)])
# Define the initial state
self.__initial_state = cells.zero_state(batch_size, tf.float32)
# Run each sequence step through the RNN
lstm_outputs, self.__final_state = tf.nn.dynamic_rnn(cells, x_onehot, initial_state = self.__initial_state)
print(' << lstm_outputs >>', lstm_outputs)
seq_output_reshaped = tf.reshape(lstm_outputs, shape=[-1, self.__lstm_size], name='seq_output_reshaped')
logits = tf.layers.dense(inputs=seq_output_reshaped, units=self.__num_classes, activation=None, name='logits')
proba = tf.nn.softmax(logits, name='probabilities')
y_reshaped = tf.reshape(y_onehot, shape=[-1, self.__num_classes], name='y_reshaped')
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped), name='cost')
# Gradient clipping to avoid 'exploding gradients'
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), self.__grad_clip)
optimizer = tf.train.AdamOptimizer(self.__learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars), name='train_op')
开发者ID:dazhouze,项目名称:helloWorld,代码行数:29,代码来源:ML_16_2_charRNN.py
示例3: prob_is_largest
def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
# work out what the mean and variance is of the indicated latent function.
oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1.0, 0.0), float_type)
mu_selected = tf.reduce_sum(oh_on * mu, 1)
var_selected = tf.reduce_sum(oh_on * var, 1)
# generate Gauss Hermite grid
X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape(
tf.sqrt(tf.clip_by_value(2.0 * var_selected, 1e-10, np.inf)), (-1, 1)
)
# compute the CDF of the Gaussian between the latent functions and the grid (including the selected function)
dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims(
tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2
)
cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0)))
cdfs = cdfs * (1 - 2e-4) + 1e-4
# blank out all the distances on the selected latent function
oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0.0, 1.0), float_type)
cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2)
# take the product over the latent functions, and the sum over the GH grid.
return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
开发者ID:GPflow,项目名称:GPflow,代码行数:25,代码来源:likelihoods.py
示例4: char_rnn_model
def char_rnn_model(features, labels, mode):
"""Character level recurrent neural network model to predict classes."""
byte_vectors = tf.one_hot(features[CHARS_FEATURE], 256, 1., 0.)
byte_list = tf.unstack(byte_vectors, axis=1)
cell = tf.contrib.rnn.GRUCell(HIDDEN_SIZE)
_, encoding = tf.contrib.rnn.static_rnn(cell, byte_list, dtype=tf.float32)
logits = tf.layers.dense(encoding, MAX_LABEL, activation=None)
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={
'class': predicted_classes,
'prob': tf.nn.softmax(logits)
})
onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
eval_metric_ops = {
'accuracy': tf.metrics.accuracy(
labels=labels, predictions=predicted_classes)
}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
开发者ID:1000sprites,项目名称:tensorflow,代码行数:33,代码来源:text_classification_character_rnn.py
示例5: get_online_sequences
def get_online_sequences(sequence_length, batch_size):
"""Gets tensor which constantly produce new random examples.
Args:
sequence_length: total length of the sequences.
batch_size: how many at a time.
Returns:
(data, targets): data is `[sequence_length, batch_size, 2]` and targets
are `[batch_size]`.
"""
# getting the random channel is easy
random_data = tf.random_uniform([sequence_length, batch_size, 1],
minval=0.0, maxval=1.0)
# now we need a random marker in each half of the data
random_index_1 = tf.random_uniform([1, batch_size], minval=0,
maxval=sequence_length//2,
dtype=tf.int32)
random_index_2 = tf.random_uniform([1, batch_size], minval=0,
maxval=sequence_length//2,
dtype=tf.int32)
markers = tf.concat(axis=2, values=[tf.one_hot(random_index_1, sequence_length//2),
tf.one_hot(random_index_2, sequence_length//2)])
markers = tf.transpose(markers)
targets = tf.reduce_sum(random_data * markers,
axis=0)
return tf.concat(axis=2, values=[random_data, markers]), tf.squeeze(targets)
开发者ID:PFCM,项目名称:datasets,代码行数:27,代码来源:addition.py
示例6: build_graph
def build_graph(self, state, action, futurereward, action_prob):
logits, value = self._get_NN_prediction(state)
value = tf.squeeze(value, [1], name='pred_value') # (B,)
policy = tf.nn.softmax(logits, name='policy')
is_training = get_current_tower_context().is_training
if not is_training:
return
log_probs = tf.log(policy + 1e-6)
log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')
pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,)
importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(value, name='predict_reward')
advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False)
cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage,
cost, tf.reduce_mean(importance, name='importance'))
return cost
开发者ID:tobyma,项目名称:tensorpack,代码行数:30,代码来源:train-atari.py
示例7: create_tf_operations
def create_tf_operations(self, config):
super(DQNModel, self).create_tf_operations(config)
num_actions = {name: action.num_actions for name, action in config.actions}
# Training network
with tf.variable_scope('training'):
self.training_network = NeuralNetwork(config.network, inputs=self.state)
self.internal_inputs.extend(self.training_network.internal_inputs)
self.internal_outputs.extend(self.training_network.internal_outputs)
self.internal_inits.extend(self.training_network.internal_inits)
training_output = dict()
for action in self.action:
training_output[action] = layers['linear'](x=self.training_network.output, size=num_actions[action])
self.action_taken[action] = tf.argmax(training_output[action], axis=1)
# Target network
with tf.variable_scope('target'):
self.target_network = NeuralNetwork(config.network, inputs=self.state)
self.internal_inputs.extend(self.target_network.internal_inputs)
self.internal_outputs.extend(self.target_network.internal_outputs)
self.internal_inits.extend(self.target_network.internal_inits)
target_value = dict()
for action in self.action:
target_output = layers['linear'](x=self.target_network.output, size=num_actions[action])
if config.double_dqn:
selector = tf.one_hot(self.action_taken[action], num_actions[action])
target_value[action] = tf.reduce_sum(tf.multiply(target_output, selector), axis=1)
else:
target_value[action] = tf.reduce_max(target_output, axis=1)
with tf.name_scope('update'):
for action in self.action:
# One_hot tensor of the actions that have been taken
action_one_hot = tf.one_hot(self.action[action][:-1], num_actions[action])
# Training output, so we get the expected rewards given the actual states and actions
q_value = tf.reduce_sum(training_output[action][:-1] * action_one_hot, axis=1)
# Surrogate loss as the mean squared error between actual observed rewards and expected rewards
q_target = self.reward[:-1] + (1.0 - tf.cast(self.terminal[:-1], tf.float32)) * self.discount * target_value[action][1:]
delta = q_target - q_value
self.loss_per_instance = tf.square(delta)
# If gradient clipping is used, calculate the huber loss
if config.clip_gradients > 0.0:
huber_loss = tf.where(tf.abs(delta) < config.clip_gradients, 0.5 * self.loss_per_instance, tf.abs(delta) - 0.5)
loss = tf.reduce_mean(huber_loss)
else:
loss = tf.reduce_mean(self.loss_per_instance)
tf.losses.add_loss(loss)
# Update target network
with tf.name_scope("update_target"):
self.target_network_update = list()
for v_source, v_target in zip(self.training_network.variables, self.target_network.variables):
update = v_target.assign_sub(config.update_target_weight * (v_target - v_source))
self.target_network_update.append(update)
开发者ID:et0803,项目名称:tensorforce,代码行数:60,代码来源:dqn_model.py
示例8: make_update_op
def make_update_op(self, upd_idxs, upd_keys, upd_vals,
batch_size, use_recent_idx, intended_output):
"""Function that creates all the update ops."""
base_update_op = super(LSHMemory, self).make_update_op(
upd_idxs, upd_keys, upd_vals,
batch_size, use_recent_idx, intended_output)
# compute hash slots to be updated
hash_slot_idxs = self.get_hash_slots(upd_keys)
# make updates
update_ops = []
with tf.control_dependencies([base_update_op]):
for i, slot_idxs in enumerate(hash_slot_idxs):
# for each slot, choose which entry to replace
entry_idx = tf.random_uniform([batch_size],
maxval=self.num_per_hash_slot,
dtype=tf.int32)
entry_mul = 1 - tf.one_hot(entry_idx, self.num_per_hash_slot,
dtype=tf.int32)
entry_add = (tf.expand_dims(upd_idxs, 1) *
tf.one_hot(entry_idx, self.num_per_hash_slot,
dtype=tf.int32))
mul_op = tf.scatter_mul(self.hash_slots[i], slot_idxs, entry_mul)
with tf.control_dependencies([mul_op]):
add_op = tf.scatter_add(self.hash_slots[i], slot_idxs, entry_add)
update_ops.append(add_op)
return tf.group(*update_ops)
开发者ID:tsingcoo,项目名称:models,代码行数:30,代码来源:memory.py
示例9: load_mnist
def load_mnist(path, is_training):
fd = open(os.path.join(cfg.dataset, 'train-images-idx3-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
trX = loaded[16:].reshape((60000, 28, 28, 1)).astype(np.float)
fd = open(os.path.join(cfg.dataset, 'train-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
trY = loaded[8:].reshape((60000)).astype(np.float)
fd = open(os.path.join(cfg.dataset, 't10k-images-idx3-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
teX = loaded[16:].reshape((10000, 28, 28, 1)).astype(np.float)
fd = open(os.path.join(cfg.dataset, 't10k-labels-idx1-ubyte'))
loaded = np.fromfile(file=fd, dtype=np.uint8)
teY = loaded[8:].reshape((10000)).astype(np.float)
# normalization and convert to a tensor [60000, 28, 28, 1]
trX = tf.convert_to_tensor(trX / 255., tf.float32)
# => [num_samples, 10]
trY = tf.one_hot(trY, depth=10, axis=1, dtype=tf.float32)
teY = tf.one_hot(teY, depth=10, axis=1, dtype=tf.float32)
if is_training:
return trX, trY
else:
return teX / 255., teY
开发者ID:SrGrace,项目名称:Artificial-Intelligence-Deep-Learning-Machine-Learning-Tutorials,代码行数:28,代码来源:utils.py
示例10: predict
def predict(self, test_features, test_labels, result_path):
train_labels = tf.one_hot(self.train_labels, depth=2, on_value=1, off_value=0)
test_labels = tf.one_hot(test_labels, depth=2, on_value=1, off_value=0)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
y_, y = sess.run([test_labels, train_labels])
# loop over test data
for index in range(len(test_features)):
feed_dict = {self.xtr: self.train_features, self.xte: test_features[index, :]}
nn_index = sess.run(self.prediction, feed_dict=feed_dict)
print('Test [{}] Actual Class: {}, Predicted Class : {}'.format(index, np.argmax(y_[index]),
np.argmax(y[nn_index])))
self.save_labels(predictions=np.argmax(y[nn_index]), actual=np.argmax(y_[index]),
result_path=result_path, step=index, phase='testing')
if np.argmax(y[nn_index]) == np.argmax(y_[index]):
self.accuracy += 1. / len(test_features)
print('Accuracy : {}'.format(self.accuracy))
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:32,代码来源:nearest_neighbor.py
示例11: char_cnn_model
def char_cnn_model(features, target):
"""Character level convolutional neural network model to predict classes."""
target = tf.one_hot(target, 15, 1, 0)
byte_list = tf.reshape(tf.one_hot(features, 256, 1, 0), [-1, MAX_DOCUMENT_LENGTH, 256, 1])
with tf.variable_scope("CNN_Layer1"):
# Apply Convolution filtering on input sequence.
conv1 = tf.contrib.layers.convolution2d(byte_list, N_FILTERS, FILTER_SHAPE1, padding="VALID")
# Add a RELU for non linearity.
conv1 = tf.nn.relu(conv1)
# Max pooling across output of Convolution+Relu.
pool1 = tf.nn.max_pool(
conv1, ksize=[1, POOLING_WINDOW, 1, 1], strides=[1, POOLING_STRIDE, 1, 1], padding="SAME"
)
# Transpose matrix so that n_filters from convolution becomes width.
pool1 = tf.transpose(pool1, [0, 1, 3, 2])
with tf.variable_scope("CNN_Layer2"):
# Second level of convolution filtering.
conv2 = tf.contrib.layers.convolution2d(pool1, N_FILTERS, FILTER_SHAPE2, padding="VALID")
# Max across each filter to get useful features for classification.
pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
# Apply regular WX + B and classification.
logits = tf.contrib.layers.fully_connected(pool2, 15, activation_fn=None)
loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.contrib.framework.get_global_step(), optimizer="Adam", learning_rate=0.01
)
return ({"class": tf.argmax(logits, 1), "prob": tf.nn.softmax(logits)}, loss, train_op)
开发者ID:brchiu,项目名称:tensorflow,代码行数:30,代码来源:text_classification_character_cnn.py
示例12: char_cnn_model
def char_cnn_model(features, labels, mode):
"""Character level convolutional neural network model to predict classes."""
features_onehot = tf.one_hot(features[CHARS_FEATURE], 256)
input_layer = tf.reshape(
features_onehot, [-1, MAX_DOCUMENT_LENGTH, 256, 1])
with tf.variable_scope('CNN_Layer1'):
# Apply Convolution filtering on input sequence.
conv1 = tf.layers.conv2d(
input_layer,
filters=N_FILTERS,
kernel_size=FILTER_SHAPE1,
padding='VALID',
# Add a ReLU for non linearity.
activation=tf.nn.relu)
# Max pooling across output of Convolution+Relu.
pool1 = tf.layers.max_pooling2d(
conv1,
pool_size=POOLING_WINDOW,
strides=POOLING_STRIDE,
padding='SAME')
# Transpose matrix so that n_filters from convolution becomes width.
pool1 = tf.transpose(pool1, [0, 1, 3, 2])
with tf.variable_scope('CNN_Layer2'):
# Second level of convolution filtering.
conv2 = tf.layers.conv2d(
pool1,
filters=N_FILTERS,
kernel_size=FILTER_SHAPE2,
padding='VALID')
# Max across each filter to get useful features for classification.
pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1])
# Apply regular WX + B and classification.
logits = tf.layers.dense(pool2, MAX_LABEL, activation=None)
predicted_classes = tf.argmax(logits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode,
predictions={
'class': predicted_classes,
'prob': tf.nn.softmax(logits)
})
onehot_labels = tf.one_hot(labels, MAX_LABEL, 1, 0)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
eval_metric_ops = {
'accuracy': tf.metrics.accuracy(
labels=labels, predictions=predicted_classes)
}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
开发者ID:DjangoPeng,项目名称:tensorflow,代码行数:58,代码来源:text_classification_character_cnn.py
示例13: one_hot_categorical_model
def one_hot_categorical_model(features, target):
target = tf.one_hot(target, 2, 1.0, 0.0)
features = tf.one_hot(features, n_classes, 1.0, 0.0)
prediction, loss = learn.models.logistic_regression(
tf.squeeze(features, [1]), target)
train_op = layers.optimize_loss(loss,
tf.contrib.framework.get_global_step(), optimizer='SGD',
learning_rate=0.01)
return tf.argmax(prediction, dimension=1), loss, train_op
开发者ID:ilblackdragon,项目名称:tf_examples,代码行数:9,代码来源:titanic_categorical_variables.py
示例14: posterior_mean_and_sample
def posterior_mean_and_sample(self, candidates):
"""Draw samples for test predictions.
Given a Tensor of 'candidates' inputs, returns samples from the posterior
and the posterior mean prediction for those inputs.
Args:
candidates: A (num-examples x num-dims) Tensor containing the inputs for
which to return predictions.
Returns:
y_mean: The posterior mean prediction given these inputs
y_sample: A sample from the posterior of the outputs given these inputs
"""
# Cross-covariance for test predictions
w = tf.identity(self.weights_train)
inds = tf.squeeze(
tf.reshape(
tf.tile(
tf.reshape(tf.range(self.n_out), (self.n_out, 1)),
(1, tf.shape(candidates)[0])), (-1, 1)))
cross_cov = self.cov(tf.tile(candidates, [self.n_out, 1]), self.x_train)
cross_task_cov = self.task_cov(tf.one_hot(inds, self.n_out), w)
cross_cov *= cross_task_cov
# Test mean prediction
y_mean = tf.matmul(cross_cov, tf.matmul(self.input_inv, self.y_train))
# Test sample predictions
# Note this can be done much more efficiently using Kronecker products
# if all tasks are fully observed (which we won't assume)
test_cov = (
self.cov(tf.tile(candidates, [self.n_out, 1]),
tf.tile(candidates, [self.n_out, 1])) *
self.task_cov(tf.one_hot(inds, self.n_out),
tf.one_hot(inds, self.n_out)) -
tf.matmul(cross_cov,
tf.matmul(self.input_inv,
tf.transpose(cross_cov))))
# Get the matrix square root through an SVD for drawing samples
# This seems more numerically stable than the Cholesky
s, _, v = tf.svd(test_cov, full_matrices=True)
test_sqrt = tf.matmul(v, tf.matmul(tf.diag(s), tf.transpose(v)))
y_sample = (
tf.matmul(
test_sqrt,
tf.random_normal([tf.shape(test_sqrt)[0], 1], dtype=tf.float64)) +
y_mean)
y_sample = (
tf.transpose(tf.reshape(y_sample,
(self.n_out, -1))) * self.input_std +
self.input_mean)
return y_mean, y_sample
开发者ID:812864539,项目名称:models,代码行数:57,代码来源:multitask_gp.py
示例15: __init__
def __init__(self, env, env_name, _optimizer='adam'):
"""
:param env:
Output of this Discriminator is reward for learning agent. Not the cost.
Because discriminator predicts P(expert|s,a) = 1 - P(agent|s,a).
"""
self._optimizer = _optimizer
env_header = env_name.split('-')[0]
# CartPole-v1, Arcobot-v1, Pendulum-v0, HalfCheetah-v2, Hopper-v2, Walker2d-v2, Humanoid-v2
if env_header == 'CartPole' or env_header == 'Arcobot' or env_header == 'Pendulum' or env_header == 'MountainCar': #Classic control Gym
action_space_count = env.action_space.n
else: #Mujoco
action_space_count = env.action_space.shape[0]
with tf.variable_scope('discriminator'):
self.scope = tf.get_variable_scope().name
self.expert_s = tf.placeholder(dtype=tf.float32, shape=[None] + list(env.observation_space.shape))
self.expert_a = tf.placeholder(dtype=tf.int32, shape=[None])
expert_a_one_hot = tf.one_hot(self.expert_a, depth=action_space_count)
# add noise for stabilise training
expert_a_one_hot += tf.random_normal(tf.shape(expert_a_one_hot), mean=0.2, stddev=0.1, dtype=tf.float32)/1.2
expert_s_a = tf.concat([self.expert_s, expert_a_one_hot], axis=1)
self.agent_s = tf.placeholder(dtype=tf.float32, shape=[None] + list(env.observation_space.shape))
self.agent_a = tf.placeholder(dtype=tf.int32, shape=[None])
agent_a_one_hot = tf.one_hot(self.agent_a, depth=action_space_count)
# add noise for stabilise training
agent_a_one_hot += tf.random_normal(tf.shape(agent_a_one_hot), mean=0.2, stddev=0.1, dtype=tf.float32)/1.2
agent_s_a = tf.concat([self.agent_s, agent_a_one_hot], axis=1)
with tf.variable_scope('network') as network_scope:
prob_1 = self.construct_network(input=expert_s_a)
network_scope.reuse_variables() # share parameter
prob_2 = self.construct_network(input=agent_s_a)
with tf.variable_scope('loss'):
loss_expert = tf.reduce_mean(tf.log(tf.clip_by_value(prob_1, 0.01, 1)))
loss_agent = tf.reduce_mean(tf.log(tf.clip_by_value(1 - prob_2, 0.01, 1)))
loss = loss_expert + loss_agent
loss = -loss
tf.summary.scalar('discriminator', loss)
# optimizer: adagrad, rmsprop, adadelta, adam, cocob
if self._optimizer == 'adagrad':
optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) # initial_accumulator_value=0.1
elif self._optimizer == 'rmsprop':
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00025) # decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False
elif self._optimizer == 'adadelta':
optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.5) # learning_rate=0.001, rho=0.95, epsilon=1e-08, use_locking=False
elif self._optimizer == 'cocob':
optimizer = cocob.COCOB()
else: # adam
optimizer = tf.train.AdamOptimizer() # lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False
self.train_op = optimizer.minimize(loss)
self.rewards = tf.log(tf.clip_by_value(prob_2, 1e-10, 1)) # log(P(expert|s,a)) larger is better for agent
开发者ID:6-Billionaires,项目名称:gail_ppo_optimizer,代码行数:56,代码来源:discriminator.py
示例16: crappy_plot
def crappy_plot(val, levels):
x_len = val.get_shape().as_list()[1]
left_val = tf.concat(1, (val[:, 0:1], val[:, 0:x_len - 1]))
right_val = tf.concat(1, (val[:, 1:], val[:, x_len - 1:]))
left_mean = (val + left_val) // 2
right_mean = (val + right_val) // 2
low_val = tf.minimum(tf.minimum(left_mean, right_mean), val)
high_val = tf.maximum(tf.maximum(left_mean, right_mean), val + 1)
return tf.cumsum(tf.one_hot(low_val, levels, axis=1) - tf.one_hot(high_val, levels, axis=1), axis=1)
开发者ID:NoahDStein,项目名称:NeuralNetSandbox,代码行数:10,代码来源:tfutil.py
示例17: nearest_neighbor
def nearest_neighbor(x,
means,
block_v_size,
random_top_k=1,
soft_em=False,
num_samples=1):
"""Find the nearest element in means to elements in x.
Args:
x: Batch of encoder continuous latent states sliced/projected into shape
[-1, num_blocks, block_dim].
means: Embedding table of shpae [num_blocks, block_v_size, block_dim].
block_v_size: Number of table entries per block.
random_top_k: Noisy top-k if this is bigger than 1 (Default: 1).
soft_em: If True then use soft EM rather than hard EM (Default: False).
num_samples: Number of samples to take in soft EM (Default: 1).
Returns:
Tensor with nearest element in mean encoded in one-hot notation
and distances.
"""
x_norm_sq = tf.reduce_sum(tf.square(x), axis=-1, keep_dims=True)
means_norm_sq = tf.reduce_sum(tf.square(means), axis=-1, keep_dims=True)
scalar_prod = tf.matmul(
tf.transpose(x, perm=[1, 0, 2]), tf.transpose(means, perm=[0, 2, 1]))
scalar_prod = tf.transpose(scalar_prod, perm=[1, 0, 2])
dist = x_norm_sq + tf.transpose(
means_norm_sq, perm=[2, 0, 1]) - 2 * scalar_prod
# computing cluster probabilities
if soft_em:
num_blocks = common_layers.shape_list(dist)[1]
nearest_idx = tf.stack(
[
tf.multinomial(-dist[:, i, :], num_samples=num_samples)
for i in range(num_blocks)
],
axis=1)
nearest_hot = tf.one_hot(nearest_idx, depth=block_v_size)
nearest_hot = tf.reduce_mean(nearest_hot, axis=-2)
else:
if random_top_k > 1:
_, top_k_idx = tf.nn.top_k(-dist, k=random_top_k)
nearest_idx = tf.gather(
top_k_idx,
tf.random_uniform(
[1], minval=0, maxval=random_top_k - 1, dtype=tf.int32),
axis=-1)
else:
nearest_idx = tf.argmax(-dist, axis=-1)
nearest_hot = tf.one_hot(nearest_idx, block_v_size)
return nearest_hot
开发者ID:kltony,项目名称:tensor2tensor,代码行数:52,代码来源:discretization.py
示例18: _testOneHot
def _testOneHot(self, truth, use_gpu=False, expected_err_re=None,
raises=None, **inputs):
with self.test_session(use_gpu=use_gpu):
if raises is not None:
with self.assertRaises(raises):
tf.one_hot(**inputs)
else:
ans = tf.one_hot(**inputs)
if expected_err_re is None:
tf_ans = ans.eval()
self.assertAllClose(tf_ans, truth, atol=1e-10)
self.assertEqual(tf_ans.shape, ans.get_shape())
else:
with self.assertRaisesOpError(expected_err_re):
ans.eval()
开发者ID:0-T-0,项目名称:tensorflow,代码行数:15,代码来源:one_hot_op_test.py
示例19: one_hot_matrix
def one_hot_matrix(labels, C):
"""
Creates a matrix where the i-th row corresponds to the ith class number and the jth column
corresponds to the jth training example. So if example j had a label i. Then entry (i,j)
will be 1.
Arguments:
labels -- vector containing the labels
C -- number of classes, the depth of the one hot dimension
Returns:
one_hot -- one hot matrix
"""
# Create a tf.constant equal to C (depth), name it 'C'. (approx. 1 line)
C = tf.constant(C, name = "C")
# Use tf.one_hot, be careful with the axis (approx. 1 line)
one_hot_matrix = tf.one_hot(labels, C)
# Create the session (approx. 1 line)
sess = tf.Session()
# Run the session (approx. 1 line)
one_hot = sess.run(one_hot_matrix).T
# Close the session (approx. 1 line). See method 1 above.
session.close()
### END CODE HERE ###
return one_hot
开发者ID:shriavi,项目名称:datasciencecoursera,代码行数:33,代码来源:Tensorflow+Tutorial.py
示例20: rnn_model
def rnn_model(features, target):
"""RNN model to predict from sequence of words to a class."""
# Convert indexes of words into embeddings.
# This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
# maps word indexes of the sequence into [batch_size, sequence_length,
# EMBEDDING_SIZE].
word_vectors = tf.contrib.layers.embed_sequence(
features, vocab_size=n_words, embed_dim=EMBEDDING_SIZE, scope='words')
# Split into list of embedding per word, while removing doc length dim.
# word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
word_list = tf.unstack(word_vectors, axis=1)
# Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE)
# Create an unrolled Recurrent Neural Networks to length of
# MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
_, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32)
# Given encoding of RNN, take encoding of last step (e.g hidden size of the
# neural network of last step) and pass it as features for logistic
# regression over output classes.
target = tf.one_hot(target, 15, 1, 0)
logits = tf.contrib.layers.fully_connected(encoding, 15, activation_fn=None)
loss = tf.contrib.losses.softmax_cross_entropy(logits, target)
# Create a training op.
train_op = tf.contrib.layers.optimize_loss(
loss, tf.contrib.framework.get_global_step(),
optimizer='Adam', learning_rate=0.01)
return (
{'class': tf.argmax(logits, 1), 'prob': tf.nn.softmax(logits)},
loss, train_op)
开发者ID:ComeOnGetMe,项目名称:tensorflow,代码行数:35,代码来源:text_classification.py
注:本文中的tensorflow.one_hot函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论