本文整理汇总了Python中tensorflow.string_split函数的典型用法代码示例。如果您正苦于以下问题:Python string_split函数的具体用法?Python string_split怎么用?Python string_split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了string_split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: init
def init(self):
# init
self.global_step = global_step = tf.Variable(0, trainable=False, name='global_step')
self.learning_rate = learning_rate = tf.train.exponential_decay(1e-2, global_step, 500, 0.95, staircase=True)
# Load classes
src_table = tf.contrib.lookup.index_table_from_file('./iwslt15/vocab.en', default_value=0)
tgt_table = tf.contrib.lookup.index_table_from_file('./iwslt15/vocab.vi', default_value=0)
#src_table_size = src_table.size()
#tgt_table_size = tgt_table.size()
src_table_size = 17191
tgt_table_size = 7709
src_eos_id = tf.cast(src_table.lookup(tf.constant('</s>')), tf.int64)
self.tgt_eos_id = tgt_eos_id = tf.cast(tgt_table.lookup(tf.constant('</s>')), tf.int64)
self.tgt_sos_id = tgt_sos_id = tf.cast(tgt_table.lookup(tf.constant('<s>')), tf.int64)
# file placeholder
src_files = tf.placeholder(tf.string, shape=[None])
tgt_files = tf.placeholder(tf.string, shape=[None])
# Read data
src_dataset = tf.contrib.data.TextLineDataset(src_files)
tgt_dataset = tf.contrib.data.TextLineDataset(tgt_files)
# Convert data to word indices
src_dataset = src_dataset.map(lambda string: tf.concat([['<s>'], tf.string_split([string]).values, ['</s>']], 0))
src_dataset = src_dataset.map(lambda words: (words, tf.size(words)))
src_dataset = src_dataset.map(lambda words, size: (src_table.lookup(words), size))
tgt_dataset = tgt_dataset.map(lambda string: tf.concat([['<s>'], tf.string_split([string]).values, ['</s>']], 0))
tgt_dataset = tgt_dataset.map(lambda words: (words, tf.size(words)))
tgt_dataset = tgt_dataset.map(lambda words, size: (tgt_table.lookup(words), size))
# zip data
dataset = tf.contrib.data.Dataset.zip((src_dataset, tgt_dataset))
# batch
batched_dataset = dataset.padded_batch(self.batch_size,
padded_shapes=((tf.TensorShape([None]), tf.TensorShape([])),(tf.TensorShape([None]), tf.TensorShape([]))),
padding_values=((src_eos_id, 0), (tgt_eos_id, 0)))
batched_iterator = batched_dataset.make_initializable_iterator()
((source, source_lengths), (target, target_lengths)) = batched_iterator.get_next()
self.target = target
self.target_lengths = target_lengths
self.source_lengths = source_lengths
# Load embedding (dic limits to 100000)
src_embed = tf.Variable(tf.random_normal([100000, self.embed_vector_size], stddev=0.1))
self.tgt_embed = tgt_embed = tf.Variable(tf.random_normal([100000, self.embed_vector_size], stddev=0.1))
self.src_lookup = src_lookup = tf.nn.embedding_lookup(src_embed, source)
self.tgt_lookup = tgt_lookup = tf.nn.embedding_lookup(tgt_embed, target)
# Projection Layer
self.projection_layer = projection_layer = layers_core.Dense(tgt_table_size)
return batched_iterator, src_files, tgt_files
开发者ID:flrngel,项目名称:understanding-ai,代码行数:59,代码来源:model.py
示例2: create_char_vectors_from_post
def create_char_vectors_from_post(self, raw_post, mxlen):
char2index = self.index
if self.do_lowercase:
raw_post = self.lowercase(raw_post)
raw_post = tf.string_split(tf.reshape(raw_post, [-1]))
culled_word_token_vals = tf.substr(raw_post.values, 0, self.mxwlen)
char_tokens = tf.string_split(culled_word_token_vals, delimiter='')
char_indices = char2index.lookup(char_tokens)
return self.reshape_indices(char_indices, [mxlen, self.mxwlen])
开发者ID:dpressel,项目名称:baseline,代码行数:9,代码来源:preprocessors.py
示例3: decode_libsvm
def decode_libsvm(line):
columns = tf.string_split([line], ' ')
labels = tf.string_to_number(columns.values[0], out_type=tf.float32)
splits = tf.string_split(columns.values[1:], ':')
id_vals = tf.reshape(splits.values,splits.dense_shape)
feat_ids, feat_vals = tf.split(id_vals,num_or_size_splits=2,axis=1)
feat_ids = tf.string_to_number(feat_ids, out_type=tf.int32)
feat_vals = tf.string_to_number(feat_vals, out_type=tf.float32)
return {"feat_ids": feat_ids, "feat_vals": feat_vals}, labels
开发者ID:chenxingqiang,项目名称:ML_CIA,代码行数:9,代码来源:NFM.py
示例4: __init__
def __init__(self, args, txt_file, num_classes, mode, batch_size, num_preprocess_threads=1, shuffle=True,
min_queue_examples=1):
self.args = args
self.txt_file = txt_file
self.num_preprocess_threads = num_preprocess_threads
self.min_queue_examples = min_queue_examples
self.batch_size = batch_size
self.mode = mode
self.imgShape = [self.args.imageHeight, self.args.imageWidth, self.args.imageChannels]
self.maskShape = tf.stack([self.args.imageHeight, self.args.imageWidth])
self.num_classes = int(num_classes)
input_queue = tf.train.string_input_producer([txt_file], shuffle=False)
line_reader = tf.TextLineReader()
_, line = line_reader.read(input_queue)
split_line = tf.string_split([line]).values
if (mode == 'training' or mode == 'validation'):
split_line = tf.string_split([line]).values
rgb_image_path = split_line[0]
label_image_path = split_line[1]
self.image_o = self.read_image(rgb_image_path, 0)
self.label_image_o = self.read_image(label_image_path, 1)
do_flip = tf.random_uniform([], 0, 1)
self.image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(self.image_o), lambda: self.image_o)
self.label_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(self.label_image_o),
lambda: self.label_image_o)
self.image.set_shape((self.args.imageHeight, self.args.imageWidth, 3))
self.label_image.set_shape((self.args.imageHeight, self.args.imageWidth, 1))
self.img_batch, self.label_batch = tf.train.shuffle_batch([self.image, self.label_image],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
elif (mode == 'test'):
print('Generating test Image Batch')
split_line = tf.string_split([line]).values
rgb_image_path = split_line[0]
self.image = self.read_image(rgb_image_path, 0)
self.image.set_shape((self.args.imageHeight, self.args.imageWidth, 3))
self.img_batch = tf.train.batch([self.image],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 1 * batch_size,
)
开发者ID:ruyi345,项目名称:Fully-convolutional-networks-TF,代码行数:55,代码来源:dataGenerator.py
示例5: _parse_line
def _parse_line(line):
"""
_parse_line
"""
line_arr = tf.string_split([line], '\t').values
#print(line_arr[2]) Tensor("strided_slice:0", shape=(), dtype=string)
user = line_arr[0]
label = tf.string_to_number(line_arr[1], out_type=tf.int32)
#print(tf.string_split([line_arr[2]]).values) Tensor("StringSplit_1:1", shape=(?,), dtype=string)
features = {}
features["words"] = tf.string_to_number(tf.string_split([line_arr[2]], ",").values, tf.int32)
features["id"] = user
return features, label
开发者ID:UranusZS,项目名称:machine_learning_study,代码行数:13,代码来源:data.py
示例6: _get_labels_builder
def _get_labels_builder(self, labels_file):
labels_vocabulary = tf.contrib.lookup.index_table_from_file(
self.labels_vocabulary_file,
vocab_size=self.num_labels)
dataset = tf.data.TextLineDataset(labels_file)
process_fn = lambda x: {
"tags": tf.string_split([x]).values,
"tags_id": labels_vocabulary.lookup(tf.string_split([x]).values)
}
padded_shapes_fn = lambda: {
"tags": [None],
"tags_id": [None]
}
return dataset, process_fn, padded_shapes_fn
开发者ID:yhgon,项目名称:OpenNMT-tf,代码行数:15,代码来源:sequence_tagger.py
示例7: lowercase
def lowercase(self, raw_post):
split_chars = tf.string_split(tf.reshape(raw_post, [-1]), delimiter="").values
upchar_inds = self.upchars_lut.lookup(split_chars)
return tf.reduce_join(tf.map_fn(lambda x: tf.cond(x[0] > 25,
lambda: x[1],
lambda: self.lchars[x[0]]),
(upchar_inds, split_chars), dtype=tf.string))
开发者ID:dpressel,项目名称:baseline,代码行数:7,代码来源:preprocessors.py
示例8: get_predict_iterator
def get_predict_iterator(src_vocab_table, vocab_size, batch_size, max_len=max_sequence):
pred_dataset = tf.contrib.data.TextLineDataset(pred_file)
pred_dataset = pred_dataset.map(
lambda src: tf.string_split([src]).values)
if max_len:
pred_dataset = pred_dataset.map(lambda src: src[:max_sequence])
pred_dataset = pred_dataset.map(
lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32))
pred_dataset = pred_dataset.map(lambda src: (src, tf.size(src)))
def batching_func(x):
return x.padded_batch(
batch_size,
padded_shapes=(tf.TensorShape([None]), # src
tf.TensorShape([])), # src_len
padding_values=(vocab_size+1, # src
0)) # src_len -- unused
batched_dataset = batching_func(pred_dataset)
batched_iter = batched_dataset.make_initializable_iterator()
(src_ids, src_seq_len) = batched_iter.get_next()
# 这里target_input在预测的时候不需要,但是不能返回None否则报错。这里则用个placeholder代替,但是仍然不会用到。
WAHTEVER = 10
fake_tag = tf.placeholder(tf.int32, [None, WAHTEVER])
return BatchedInput(
initializer=batched_iter.initializer,
source=src_ids,
target_input=fake_tag,
source_sequence_length=src_seq_len,
target_sequence_length=src_seq_len)
开发者ID:luluyouyue,项目名称:NER,代码行数:33,代码来源:utils.py
示例9: _decode_and_resize
def _decode_and_resize(image_tensor):
"""Decodes jpeg string, resizes it and returns a uint8 tensor."""
# These constants are set by Inception v3's expectations.
height = 299
width = 299
channels = 3
image_tensor = tf.where(tf.equal(image_tensor, ''), IMAGE_DEFAULT_STRING, image_tensor)
# Fork by whether image_tensor value is a file path, or a base64 encoded string.
slash_positions = tf.equal(tf.string_split([image_tensor], delimiter="").values, '/')
is_file_path = tf.cast(tf.count_nonzero(slash_positions), tf.bool)
# The following two functions are required for tf.cond. Note that we can not replace them
# with lambda. According to TF docs, if using inline lambda, both branches of condition
# will be executed. The workaround is to use a function call.
def _read_file():
return tf.read_file(image_tensor)
def _decode_base64():
return tf.decode_base64(image_tensor)
image = tf.cond(is_file_path, lambda: _read_file(), lambda: _decode_base64())
image = tf.image.decode_jpeg(image, channels=channels)
image = tf.expand_dims(image, 0)
image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
image = tf.squeeze(image, squeeze_dims=[0])
image = tf.cast(image, dtype=tf.uint8)
return image
开发者ID:googledatalab,项目名称:pydatalab,代码行数:30,代码来源:feature_transforms.py
示例10: get_test_iterator
def get_test_iterator(src_dataset, src_vocab_table, batch_size, config):
src_eos_id = tf.cast(src_vocab_table.lookup(tf.constant(config.eos)), tf.int32)
src_dataset = src_dataset.map(lambda src: tf.string_split([src]).values)
src_dataset = src_dataset.map(lambda src: src[:config.src_max_len])
src_dataset = src_dataset.map(
lambda src: tf.cast(src_vocab_table.lookup(src), tf.int32))
if config.reverse_src:
src_dataset = src_dataset.map(lambda src: tf.reverse(src, axis=[0]))
src_dataset = src_dataset.map(lambda src: (src, tf.size(src)))
def batching_func(x):
return x.padded_batch(
config.batch_size,
padded_shapes=(tf.TensorShape([None]),
tf.TensorShape([])),
padding_values=(src_eos_id,
0))
batched_dataset = batching_func(src_dataset)
batched_iter = batched_dataset.make_initializable_iterator()
src_ids, src_seq_len = batched_iter.get_next()
return BatchedInput(
initializer=batched_iter.initializer,
source=src_ids,
target_input=None,
target_output=None,
source_sequence_length=src_seq_len,
target_sequence_length=None)
开发者ID:rpryzant,项目名称:code-doodles,代码行数:32,代码来源:input_pipeline.py
示例11: custom_fast_text
def custom_fast_text(features, labels, mode, params):
vocab_table = lookup.index_table_from_file(vocabulary_file='data/vocab.csv', num_oov_buckets=1, default_value=-1)
text = features[commons.FEATURE_COL]
words = tf.string_split(text)
dense_words = tf.sparse_tensor_to_dense(words, default_value=commons.PAD_WORD)
word_ids = vocab_table.lookup(dense_words)
padding = tf.constant([[0, 0], [0, commons.CNN_MAX_DOCUMENT_LENGTH]])
# Pad all the word_ids entries to the maximum document length
word_ids_padded = tf.pad(word_ids, padding)
word_id_vector = tf.slice(word_ids_padded, [0, 0], [-1, commons.CNN_MAX_DOCUMENT_LENGTH])
if mode == tf.estimator.ModeKeys.TRAIN:
tf.keras.backend.set_learning_phase(True)
else:
tf.keras.backend.set_learning_phase(False)
embedded_sequences = tf.keras.layers.Embedding(params.N_WORDS, 20, input_length=commons.CNN_MAX_DOCUMENT_LENGTH)(
word_id_vector)
f1 = tf.keras.layers.GlobalMaxPooling1D()(embedded_sequences)
logits = tf.keras.layers.Dense(commons.TARGET_SIZE, activation=None)(f1)
predictions = tf.nn.sigmoid(logits)
if mode == tf.estimator.ModeKeys.PREDICT:
prediction_dict = {
'class': tf.cast(tf.map_fn(lambda x: tf.cond(x > 0.30, lambda: 1.0, lambda: 0.0),
tf.squeeze(predictions)), dtype=tf.int32),
}
export_outputs = {
'predictions': tf.estimator.export.PredictOutput(prediction_dict)
}
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs)
loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=labels, logits=logits)
tf.summary.scalar('loss', loss)
acc = tf.equal(tf.cast(predictions, dtype=tf.int32), labels)
acc = tf.reduce_mean(tf.cast(acc, tf.float32))
tf.summary.scalar('acc', acc)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss=loss)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metrics_ops = {
'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions)
}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
开发者ID:Aurora11111,项目名称:CloudML-Serving,代码行数:59,代码来源:custom_model.py
示例12: testStringSplit
def testStringSplit(self):
strings = ["pigs on the wing", "animals"]
with self.test_session() as sess:
tokens = tf.string_split(strings)
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [1, 0]])
self.assertAllEqual(values, [b"pigs", b"on", b"the", b"wing", b"animals"])
self.assertAllEqual(shape, [2, 4])
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:9,代码来源:string_split_op_test.py
示例13: sparse_from_csv
def sparse_from_csv(csv):
ids, post_tags_str = tf.decode_csv(csv, [[-1], [""]])
table = tf.contrib.lookup.index_table_from_tensor(
mapping=TAG_SET, default_value=-1) ## 这里构造了个查找表 ##
split_tags = tf.string_split(post_tags_str, "|")
return tf.SparseTensor(
indices=split_tags.indices,
values=table.lookup(split_tags.values), ## 这里给出了不同值通过表查到的index ##
dense_shape=split_tags.dense_shape)
开发者ID:huyuxiang,项目名称:tensorflow_practice,代码行数:9,代码来源:multi-value-feature.py
示例14: resize_sen
def resize_sen(self, raw, mxlen):
"""
Splits and rejoins a string to ensure that tokens meet
the required max len.
"""
raw_tokens = tf.string_split(tf.reshape(raw, [-1])).values
# sentence length > mxlen
raw_post = tf.reduce_join(raw_tokens[:mxlen], separator=" ")
return raw_post
开发者ID:dpressel,项目名称:baseline,代码行数:9,代码来源:preprocessors.py
示例15: testStringSplitEmptyToken
def testStringSplitEmptyToken(self):
strings = [" hello ", "", "world "]
with self.test_session() as sess:
tokens = tf.string_split(strings)
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [2, 0]])
self.assertAllEqual(values, [b"hello", b"world"])
self.assertAllEqual(shape, [3, 1])
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:9,代码来源:string_split_op_test.py
示例16: create_word_vectors_from_post
def create_word_vectors_from_post(self, raw_post, mxlen):
# vocab has only lowercase words
word2index = self.index
if self.do_lowercase:
raw_post = self.lowercase(raw_post)
word_tokens = tf.string_split(tf.reshape(raw_post, [-1]))
word_indices = word2index.lookup(word_tokens)
# Reshape them out to the proper length
reshaped_words = tf.sparse_reshape(word_indices, shape=[-1])
return self.reshape_indices(reshaped_words, [mxlen])
开发者ID:dpressel,项目名称:baseline,代码行数:10,代码来源:preprocessors.py
示例17: has_no_question_marks
def has_no_question_marks(line):
"""Returns True if the line of text has no question marks."""
# split the line into an array of characters
chars = tf.string_split(line[tf.newaxis], "").values
# for each character check if it is a question mark
is_question = tf.equal(chars, "?")
any_question = tf.reduce_any(is_question)
no_question = ~any_question
return no_question
开发者ID:dananjayamahesh,项目名称:tensorflow,代码行数:10,代码来源:imports85.py
示例18: testStringSplitEmptyDelimiter
def testStringSplitEmptyDelimiter(self):
strings = ["hello", "hola"]
with self.test_session() as sess:
tokens = tf.string_split(strings, delimiter="")
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
[1, 0], [1, 1], [1, 2], [1, 3]])
self.assertAllEqual(values, [b"h", b"e", b"l", b"l", b"o", b"h", b"o",
b"l", b"a"])
self.assertAllEqual(shape, [2, 5])
开发者ID:821760408-sp,项目名称:tensorflow,代码行数:11,代码来源:string_split_op_test.py
示例19: _create_word_vectors_from_post_mixed_case
def _create_word_vectors_from_post_mixed_case(self, nraw_post, mxlen):
# vocab has only lowercase words
word_tokens = tf.string_split(tf.reshape(nraw_post, [-1]))
word_indices = self.word2index.lookup(word_tokens)
# Reshape them out to the proper length
reshaped_words = tf.sparse_reshape(word_indices, shape=[-1])
x = self._reshape_indices(reshaped_words, [mxlen])
return x
开发者ID:dpressel,项目名称:baseline,代码行数:11,代码来源:exporter_elmo.py
示例20: testStringSplitWithDelimiter
def testStringSplitWithDelimiter(self):
strings = ["hello|world", "hello world"]
with self.test_session() as sess:
self.assertRaises(
ValueError, tf.string_split, strings, delimiter=["|", ""])
self.assertRaises(ValueError, tf.string_split, strings, delimiter=["a"])
tokens = tf.string_split(strings, delimiter="|")
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0]])
self.assertAllEqual(values, [b"hello", b"world", b"hello world"])
self.assertAllEqual(shape, [2, 2])
tokens = tf.string_split(strings, delimiter="| ")
indices, values, shape = sess.run(tokens)
self.assertAllEqual(indices, [[0, 0], [0, 1], [1, 0], [1, 1]])
self.assertAllEqual(values, [b"hello", b"world", b"hello", b"world"])
self.assertAllEqual(shape, [2, 2])
开发者ID:BloodD,项目名称:tensorflow,代码行数:20,代码来源:string_split_op_test.py
注:本文中的tensorflow.string_split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论