本文整理汇总了Python中tensorflow.contrib.data.python.ops.batching.map_and_batch函数的典型用法代码示例。如果您正苦于以下问题:Python map_and_batch函数的具体用法?Python map_and_batch怎么用?Python map_and_batch使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了map_and_batch函数的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testModelMapAndBatch
def testModelMapAndBatch(self):
batch_size = 16
k = 1024 * 1024
dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
np.random.rand(4 * k,
1))).repeat()
dataset = dataset.apply(
batching.map_and_batch(
math_ops.matmul,
num_parallel_calls=optimization.AUTOTUNE,
batch_size=batch_size))
iterator = dataset.apply(optimization.model()).make_one_shot_iterator()
get_next = iterator.get_next()
deltas = []
with self.cached_session() as sess:
for _ in range(5):
sess.run(get_next.op)
for _ in range(10):
start = time.time()
sess.run(get_next.op)
end = time.time()
deltas.append(end - start)
print("%f (median), %f (mean), %f (stddev), %f (min), %f (max)\n" %
(np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
np.max(deltas)))
开发者ID:HughKu,项目名称:tensorflow,代码行数:27,代码来源:model_dataset_op_test.py
示例2: testMapAndBatchTypes
def testMapAndBatchTypes(self, element, dtype):
def gen():
yield element
dataset = dataset_ops.Dataset.from_generator(gen, dtype).repeat(100).apply(
batching.map_and_batch(lambda x: x, batch_size=10))
get_next = dataset.make_one_shot_iterator().get_next()
with self.cached_session() as sess:
for _ in range(10):
self.assertAllEqual([element for _ in range(10)], sess.run(get_next))
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:12,代码来源:batch_dataset_op_test.py
示例3: testBatchAndMapDatasetFails
def testBatchAndMapDatasetFails(self):
"""Test a dataset that maps a TF function across its input elements."""
dataset = dataset_ops.Dataset.from_tensors(
array_ops.check_numerics(
constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
batch_size = array_ops.placeholder(dtypes.int64, shape=[])
iterator = (dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
.make_initializable_iterator())
init_op = iterator.initializer
with self.test_session() as sess:
with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
sess.run(init_op, feed_dict={batch_size: 14})
开发者ID:dyoung418,项目名称:tensorflow,代码行数:12,代码来源:batch_dataset_op_test.py
示例4: build_ds
def build_ds(range_start, drop_remainder=False):
def _map_fn(x):
return math_ops.square(x)
return dataset_ops.Dataset.range(
range_start, range_start + range_size).repeat(num_repeats).apply(
batching.map_and_batch(
map_func=_map_fn,
batch_size=batch_size,
num_parallel_calls=num_parallel_calls,
drop_remainder=drop_remainder))
开发者ID:jinxin0924,项目名称:tensorflow,代码行数:12,代码来源:batch_dataset_op_test.py
示例5: testMapAndBatchYieldsPartialBatch
def testMapAndBatchYieldsPartialBatch(self):
iterator = (dataset_ops.Dataset.range(10)
.apply(batching.map_and_batch(
lambda x: array_ops.reshape(x * x, [1]), 4))
.make_one_shot_iterator())
self.assertEqual([None, 1], iterator.output_shapes.as_list())
next_element = iterator.get_next()
with self.cached_session() as sess:
self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
self.assertAllEqual([[64], [81]], sess.run(next_element))
with self.assertRaises(errors.OutOfRangeError):
sess.run(next_element)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:13,代码来源:batch_dataset_op_test.py
示例6: testMapAndBatchParallelGetNext
def testMapAndBatchParallelGetNext(self):
iterator = (dataset_ops.Dataset.range(50000)
.apply(batching.map_and_batch(lambda x: x, batch_size=100))
.make_one_shot_iterator())
elements = []
for _ in range(100):
elements.append(iterator.get_next())
with self.cached_session() as sess:
for i in range(5):
got = sess.run(elements)
got.sort(key=lambda x: x[0])
expected = []
for j in range(100):
expected.append(range(i*10000+j*100, i*10000+(j+1)*100))
self.assertAllEqual(got, expected)
with self.assertRaises(errors.OutOfRangeError):
sess.run(elements)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:17,代码来源:batch_dataset_op_test.py
示例7: _testMapAndBatchPartialBatchHelper
def _testMapAndBatchPartialBatchHelper(self, drop_remainder=False):
iterator = (
dataset_ops.Dataset.range(10).apply(
batching.map_and_batch(
lambda x: array_ops.reshape(x * x, [1]),
batch_size=4,
drop_remainder=drop_remainder)).make_one_shot_iterator())
if drop_remainder:
self.assertEqual([4, 1], iterator.output_shapes.as_list())
else:
self.assertEqual([None, 1], iterator.output_shapes.as_list())
next_element = iterator.get_next()
with self.test_session() as sess:
self.assertAllEqual([[0], [1], [4], [9]], sess.run(next_element))
self.assertAllEqual([[16], [25], [36], [49]], sess.run(next_element))
if not drop_remainder:
self.assertAllEqual([[64], [81]], sess.run(next_element))
with self.assertRaises(errors.OutOfRangeError):
sess.run(next_element)
开发者ID:jinxin0924,项目名称:tensorflow,代码行数:19,代码来源:batch_dataset_op_test.py
示例8: testMapAndBatchImplicitDispose
def testMapAndBatchImplicitDispose(self):
# Tests whether a map and batch dataset will be cleaned up correctly when
# the pipeline does not run it until exhaustion.
# The pipeline is TensorSliceDataset -> RepeatDataset(1000) ->
# MapAndBatchDataset(f=square_3, batch_size=100).
components = (np.arange(1000),
np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
np.array(37.0) * np.arange(1000))
def _map_fn(x, y, z):
return math_ops.square(x), math_ops.square(y), math_ops.square(z)
dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
1000).apply(batching.map_and_batch(_map_fn, batch_size=100))
dataset = dataset.prefetch(5)
iterator = dataset.make_one_shot_iterator()
get_next = iterator.get_next()
with self.cached_session() as sess:
for _ in range(3):
sess.run(get_next)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:21,代码来源:batch_dataset_op_test.py
示例9: testBatchAndMapDatasetShapeMismatch
def testBatchAndMapDatasetShapeMismatch(self):
"""Test a dataset that maps a TF function across its input elements."""
def generator():
yield [1]
yield [2]
yield [3]
yield [[4, 5, 6]]
dataset = dataset_ops.Dataset.from_generator(
generator, output_types=dtypes.int32)
batch_size = 4
iterator = (
dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
.make_initializable_iterator())
init_op = iterator.initializer
get_next = iterator.get_next()
with self.test_session() as sess:
sess.run(init_op)
with self.assertRaisesRegexp(errors.InvalidArgumentError,
"number of elements does not match"):
sess.run(get_next)
开发者ID:dyoung418,项目名称:tensorflow,代码行数:21,代码来源:batch_dataset_op_test.py
示例10: testComplexPipeline
def testComplexPipeline(self):
# Setup a complex input pipeline.
batch_size = 2
num_epochs = 5
dataset = dataset_ops.Dataset.from_tensor_slices(
self._createTFRecordFiles())
dataset = dataset.shuffle(buffer_size=self._num_files)
dataset = dataset.flat_map(readers.TFRecordDataset)
dataset = dataset.prefetch(buffer_size=batch_size)
dataset = dataset.shuffle(2 * self._num_files * self._num_records)
dataset = dataset.repeat(num_epochs)
dataset = dataset.apply(batching.map_and_batch(
lambda x: x, batch_size=batch_size))
dataset = dataset.prefetch(buffer_size=None)
# Auto shard.
dataset = input_ops.auto_shard_dataset(
dataset, self._num_shards, self._shard_index)
# Verify output.
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()
with self.cached_session() as sess:
actual = []
num_iterations = (self._num_files * self._num_records * num_epochs) // (
self._num_shards * batch_size)
for _ in range(num_iterations):
actual.extend(sess.run(next_element))
with self.assertRaises(errors.OutOfRangeError):
sess.run(next_element)
expected = []
for f in range(0, self._num_files, self._num_shards):
for r in range(self._num_records):
expected.append(self._record(r, f))
expected *= num_epochs
self.assertAllEqual(sorted(expected), sorted(actual))
开发者ID:AnishShah,项目名称:tensorflow,代码行数:38,代码来源:input_ops_test.py
示例11: testMapAndBatchSparse
def testMapAndBatchSparse(self):
def _sparse(i):
return sparse_tensor.SparseTensorValue(
indices=[[0]], values=(i * [1]), dense_shape=[1])
iterator = dataset_ops.Dataset.range(10).apply(
batching.map_and_batch(_sparse, 5)).make_initializable_iterator()
init_op = iterator.initializer
get_next = iterator.get_next()
with self.cached_session() as sess:
sess.run(init_op)
for i in range(2):
actual = sess.run(get_next)
expected = sparse_tensor.SparseTensorValue(
indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
dense_shape=[5, 1])
self.assertTrue(sparse_tensor.is_sparse(actual))
self.assertSparseValuesEqual(actual, expected)
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:23,代码来源:batch_dataset_op_test.py
示例12: testMapAndBatchOutOfRangeError
def testMapAndBatchOutOfRangeError(self, threshold):
def raising_py_fn(i):
if i >= threshold:
raise StopIteration()
else:
return i
iterator = (
dataset_ops.Dataset.range(100).apply(
batching.map_and_batch(
lambda x: script_ops.py_func(raising_py_fn, [x], dtypes.int64),
batch_size=10)).make_one_shot_iterator())
get_next = iterator.get_next()
with self.cached_session() as sess:
for i in range(threshold // 10):
self.assertAllEqual([i * 10 + j for j in range(10)], sess.run(get_next))
if threshold % 10 != 0:
self.assertAllEqual(
[threshold // 10 * 10 + j for j in range(threshold % 10)],
sess.run(get_next))
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:24,代码来源:batch_dataset_op_test.py
示例13: dataset_fn
def dataset_fn():
dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
# TODO(isaprykin): map_and_batch with drop_remainder causes shapes to be
# fully defined for TPU. Remove this when XLA supports dynamic shapes.
return dataset.apply(
batching.map_and_batch(lambda x: x, batch_size=1, drop_remainder=True))
开发者ID:AnishShah,项目名称:tensorflow,代码行数:6,代码来源:single_loss_example.py
示例14: benchmark
def benchmark(label, series):
print("%s:" % label)
for num_calls, inter_op, element_size, batch_size in series:
num_iters = 1024 // (
(element_size * batch_size) // min(num_calls, inter_op))
k = 1024 * 1024
dataset = dataset_ops.Dataset.from_tensors((np.random.rand(
element_size, 4 * k), np.random.rand(4 * k, 1))).repeat()
chained_dataset = dataset.map(
math_ops.matmul,
num_parallel_calls=num_calls).batch(batch_size=batch_size)
chained_iterator = chained_dataset.make_one_shot_iterator()
chained_get_next = chained_iterator.get_next()
chained_deltas = []
with session.Session(
config=config_pb2.ConfigProto(
inter_op_parallelism_threads=inter_op,
use_per_session_threads=True)) as sess:
for _ in range(5):
sess.run(chained_get_next.op)
for _ in range(num_iters):
start = time.time()
sess.run(chained_get_next.op)
end = time.time()
chained_deltas.append(end - start)
fused_dataset = dataset = dataset.apply(
batching.map_and_batch(
math_ops.matmul,
num_parallel_calls=num_calls,
batch_size=batch_size))
fused_iterator = fused_dataset.make_one_shot_iterator()
fused_get_next = fused_iterator.get_next()
fused_deltas = []
with session.Session(
config=config_pb2.ConfigProto(
inter_op_parallelism_threads=inter_op,
use_per_session_threads=True)) as sess:
for _ in range(5):
sess.run(fused_get_next.op)
for _ in range(num_iters):
start = time.time()
sess.run(fused_get_next.op)
end = time.time()
fused_deltas.append(end - start)
print(
"batch size: %d, num parallel calls: %d, inter-op parallelism: %d, "
"element size: %d, num iters: %d\nchained wall time: %f (median), "
"%f (mean), %f (stddev), %f (min), %f (max)\n fused wall time: "
"%f (median), %f (mean), %f (stddev), %f (min), %f (max)\n "
"chained/fused: %.2fx (median), %.2fx (mean)" %
(batch_size, num_calls, inter_op, element_size, num_iters,
np.median(chained_deltas), np.mean(chained_deltas),
np.std(chained_deltas), np.min(chained_deltas),
np.max(chained_deltas), np.median(fused_deltas),
np.mean(fused_deltas), np.std(fused_deltas), np.min(fused_deltas),
np.max(fused_deltas),
np.median(chained_deltas) / np.median(fused_deltas),
np.mean(chained_deltas) / np.mean(fused_deltas)))
self.report_benchmark(
iters=num_iters,
wall_time=np.median(chained_deltas),
name=name("chained", label, num_calls, inter_op, element_size,
batch_size))
self.report_benchmark(
iters=num_iters,
wall_time=np.median(fused_deltas),
name=name("fused", label, num_calls, inter_op, element_size,
batch_size))
print("")
开发者ID:clsung,项目名称:tensorflow,代码行数:80,代码来源:map_dataset_op_test.py
示例15: make_csv_dataset
#.........这里部分代码省略.........
specified by `label_name`.
Raises:
ValueError: If any of the arguments is malformed.
"""
# Create dataset of all matching filenames
filenames = _get_file_names(file_pattern, False)
dataset = dataset_ops.Dataset.from_tensor_slices(filenames)
if shuffle:
dataset = dataset.shuffle(len(filenames), shuffle_seed)
# Clean arguments; figure out column names and defaults
if comment is not None and len(comment) != 1:
raise ValueError("`comment` arg must be a single-character string or None")
if column_names is None:
if not header:
raise ValueError("Cannot infer column names without a header line.")
# If column names are not provided, infer from the header lines
column_names = _infer_column_names(filenames, field_delim, use_quote_delim)
if len(column_names) != len(set(column_names)):
raise ValueError("Cannot have duplicate column names.")
if column_defaults is not None:
column_defaults = [
constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x
for x in column_defaults
]
else:
# If column defaults are not provided, infer from records at graph
# construction time
column_defaults = _infer_column_defaults(
filenames, len(column_names), field_delim, use_quote_delim, na_value,
header, comment, default_float_type, num_rows_for_inference)
if label_name is not None and label_name not in column_names:
raise ValueError("`label_name` provided must be one of the columns.")
# Define map and filter functions
def filter_fn(line):
return math_ops.not_equal(string_ops.substr(line, 0, 1), comment)
def filename_to_dataset(filename):
ds = core_readers.TextLineDataset(filename)
if header:
ds = ds.skip(1)
if comment is not None:
ds = ds.filter(filter_fn)
return ds
def decode_csv(line):
"""Decodes CSV line into features.
Args:
line: String tensor corresponding to one csv record.
Returns:
A dictionary of feature names to values for that particular record. If
label_name is provided, extracts the label feature to be returned as the
second element of the tuple.
"""
columns = parsing_ops.decode_csv(
line,
column_defaults,
field_delim=field_delim,
use_quote_delim=use_quote_delim,
na_value=na_value,
)
features = dict(zip(column_names, columns))
if label_name is not None:
label = features.pop(label_name)
return features, label
return features
# Read files sequentially or in parallel
dataset = dataset.apply(
interleave_ops.parallel_interleave(
filename_to_dataset, cycle_length=num_parallel_reads, sloppy=sloppy))
if num_epochs != 1 and shuffle:
# Use shuffle_and_repeat for perf
dataset = dataset.apply(
shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs,
shuffle_seed))
elif shuffle:
dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed)
elif num_epochs != 1:
dataset = dataset.repeat(num_epochs)
# Use map_and_batch for perf
# TODO(b/76425672): use num_parallel_calls for better performance tuning when
# that is added
dataset = dataset.apply(
batching.map_and_batch(
map_func=decode_csv,
batch_size=batch_size,
num_parallel_batches=int(
ceil(num_parallel_parser_calls / batch_size))))
dataset = dataset.prefetch(prefetch_buffer_size)
return dataset
开发者ID:syed-ahmed,项目名称:tensorflow,代码行数:101,代码来源:readers.py
示例16: testMapAndBatch
def testMapAndBatch(self, num_parallel_calls, num_parallel_batches):
"""Test a dataset that maps a TF function across its input elements."""
# The pipeline is TensorSliceDataset ->
# RepeatDataset(count) -> MapAndBatchDataset(square_3, batch_size).
components = (np.arange(7),
np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
np.array(37.0) * np.arange(7))
count = array_ops.placeholder(dtypes.int64, shape=[])
batch_size = array_ops.placeholder(dtypes.int64, shape=[])
def _map_fn(x, y, z):
return math_ops.square(x), math_ops.square(y), math_ops.square(z)
iterator = (
dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
batching.map_and_batch(
map_func=_map_fn,
batch_size=batch_size,
num_parallel_calls=num_parallel_calls,
num_parallel_batches=num_parallel_batches))
.make_initializable_iterator())
init_op = iterator.initializer
get_next = iterator.get_next()
self.assertEqual([[None] + list(c.shape[1:]) for c in components],
[t.shape.as_list() for t in get_next])
with self.cached_session() as sess:
# Batch of a finite input, where the batch_size divides the
# total number of elements.
sess.run(init_op, feed_dict={count: 28, batch_size: 14})
num_batches = (28 * 7) // 14
for i in range(num_batches):
result = sess.run(get_next)
for component, result_component in zip(components, result):
for j in range(14):
self.assertAllEqual(component[(i * 14 + j) % 7]**2,
result_component[j])
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
# Batch of a finite input, where the batch_size does not
# divide the total number of elements.
sess.run(init_op, feed_dict={count: 14, batch_size: 8})
# We expect (num_batches - 1) full-sized batches.
num_batches = int(math.ceil((14 * 7) / 8))
for i in range(num_batches - 1):
result = sess.run(get_next)
for component, result_component in zip(components, result):
for j in range(8):
self.assertAllEqual(component[(i * 8 + j) % 7]**2,
result_component[j])
result = sess.run(get_next)
for component, result_component in zip(components, result):
for j in range((14 * 7) % 8):
self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
result_component[j])
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
# Batch of an empty input should fail straight away.
sess.run(init_op, feed_dict={count: 0, batch_size: 8})
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
# Empty batch should be an initialization time error.
with self.assertRaises(errors.InvalidArgumentError):
sess.run(init_op, feed_dict={count: 14, batch_size: 0})
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:70,代码来源:batch_dataset_op_test.py
示例17: make_tf_record_dataset
def make_tf_record_dataset(
file_pattern,
batch_size,
parser_fn=None,
num_epochs=None,
shuffle=True,
shuffle_buffer_size=None,
shuffle_seed=None,
prefetch_buffer_size=None,
num_parallel_reads=None,
num_parallel_parser_calls=None,
drop_final_batch=False):
"""Reads and optionally parses TFRecord files into a dataset.
Provides common functionality such as batching, optional parsing, shuffling,
and performant defaults.
Args:
file_pattern: List of files or patterns of TFRecord file paths.
See @{tf.gfile.Glob} for pattern rules.
batch_size: An int representing the number of records to combine
in a single batch.
parser_fn: (Optional.) A function accepting string input to parse
and process the record contents. This function must map records
to components of a fixed shape, so they may be batched. By
default, uses the record contents unmodified.
num_epochs: (Optional.) An int specifying the number of times this
dataset is repeated. If None (the default), cycles through the
dataset forever.
shuffle: (Optional.) A bool that indicates whether the input
should be shuffled. Defaults to `True`.
shuffle_buffer_size: (Optional.) Buffer size to use for
shuffling. A large buffer size ensures better shuffling, but
increases memory usage and startup time.
shuffle_seed: (Optional.) Randomization seed to use for shuffling.
prefetch_buffer_size: (Optional.) An int specifying the number of
feature batches to prefetch for performance improvement.
Defaults to auto-tune. Set to 0 to disable prefetching.
num_parallel_reads: (Optional.) Number of threads used to read
records from files. By default or if set to a value >1, the
results will be interleaved.
num_parallel_parser_calls: (Optional.) Number of parallel
records to parse in parallel. Defaults to an automatic selection.
drop_final_batch: (Optional.) Whether the last batch should be
dropped in case its size is smaller than `batch_size`; the
default behavior is not to drop the smaller batch.
Returns:
A dataset, where each element matches the output of `parser_fn`
except it will have an additional leading `batch-size` dimension,
or a `batch_size`-length 1-D tensor of strings if `parser_fn` is
unspecified.
"""
files = dataset_ops.Dataset.list_files(
file_pattern, shuffle=shuffle, seed=shuffle_seed)
if num_parallel_reads is None:
# Note: We considered auto-tuning this value, but there is a concern
# that this affects the mixing of records from different files, which
# could affect training convergence/accuracy, so we are defaulting to
# a constant for now.
num_parallel_reads = 24
dataset = core_readers.TFRecordDataset(
files, num_parallel_reads=num_parallel_reads)
if shuffle_buffer_size is None:
# TODO(josh11b): Auto-tune this value when not specified
shuffle_buffer_size = 10000
dataset = _maybe_shuffle_and_repeat(
dataset, num_epochs, shuffle, shuffle_buffer_size, shuffle_seed)
if parser_fn is None:
if drop_final_batch:
dataset = dataset.apply(batching.batch_and_drop_remainder(batch_size))
else:
dataset = dataset.batch(batch_size)
else:
# TODO(josh11b): if num_parallel_parser_calls is None, use some function
# of num cores instead of map_and_batch's default behavior of one batch.
dataset = dataset.apply(batching.map_and_batch(
parser_fn, batch_size, num_parallel_calls=num_parallel_parser_calls,
drop_remainder=drop_final_batch))
if prefetch_buffer_size is None:
prefetch_buffer_size = -1 # tf.config.data.AUTOTUNE
if prefetch_buffer_size == 0:
return dataset
else:
return dataset.prefetch(buffer_size=prefetch_buffer_size)
开发者ID:jfreedman0,项目名称:tensorflow,代码行数:89,代码来源:readers.py
示例18: minibatch
def minibatch(self, dataset, subset, use_datasets, cache_data,
shift_ratio=-1):
if shift_ratio < 0:
shift_ratio = self.shift_ratio
with tf.name_scope('batch_processing'):
# Build final results per split.
images = [[] for _ in range(self.num_splits)]
labels = [[] for _ in range(self.num_splits)]
if use_datasets:
glob_pattern = dataset.tf_record_pattern(subset)
file_names = gfile.Glob(glob_pattern)
if not file_names:
raise ValueError('Found no files in --data_dir matching: {}'
.format(glob_pattern))
ds = tf.data.TFRecordDataset.list_files(file_names)
ds = ds.apply(
interleave_ops.parallel_interleave(
tf.data.TFRecordDataset, cycle_length=10))
if cache_data:
ds = ds.take(1).cache().repeat()
counter = tf.data.Dataset.range(self.batch_size)
counter = counter.repeat()
ds = tf.data.Dataset.zip((ds, counter))
ds = ds.prefetch(buffer_size=self.batch_size)
ds = ds.shuffle(buffer_size=10000)
ds = ds.repeat()
ds = ds.apply(
batching.map_and_batch(
map_func=self.parse_and_preprocess,
batch_size=self.batch_size_per_split,
num_parallel_batches=self.num_splits))
ds = ds.prefetch(buffer_size=self.num_splits)
ds_iterator = ds.make_one_shot_iterator()
for d in xrange(self.num_splits):
labels[d], images[d] = ds_iterator.get_next()
else:
record_input = data_flow_ops.RecordInput(
file_pattern=dataset.tf_record_pattern(subset),
seed=301,
parallelism=64,
buffer_size=10000,
batch_size=self.batch_size,
shift_ratio=shift_ratio,
name='record_input')
records = record_input.get_yield_op()
records = tf.split(records, self.batch_size, 0)
records = [tf.reshape(record, []) for record in records]
for idx in xrange(self.batch_size):
value = records[idx]
(label, image) = self.parse_and_preprocess(value, idx)
split_index = idx % self.num_splits
labels[split_index].append(label)
images[split_index].append(image)
for split_index in xrange(self.num_splits):
if not use_datasets:
images[split_index] = tf.parallel_stack(images[split_index])
labels[split_index] = tf.concat(labels[split_index], 0)
images[split_index] = tf.cast(images[split_index], self.dtype)
depth = 3
images[split_index] = tf.reshape(
images[split_index],
shape=[self.batch_size_per_split, self.height, self.width, depth])
labels[split_index] = tf.reshape(labels[split_index],
[self.batch_size_per_split])
return images, labels
开发者ID:jmazanec15,项目名称:deep-learning-benchmark-mirror,代码行数:67,代码来源:preprocessing.py
注:本文中的tensorflow.contrib.data.python.ops.batching.map_and_batch函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论