本文整理汇总了Python中tensorflow.python.framework.test_util.create_local_cluster函数的典型用法代码示例。如果您正苦于以下问题:Python create_local_cluster函数的具体用法?Python create_local_cluster怎么用?Python create_local_cluster使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了create_local_cluster函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testDistributedOOM
def testDistributedOOM(self):
if not test.is_gpu_available():
return
ops.reset_default_graph()
workers, _ = test_util.create_local_cluster(2, 0)
with ops.device('/job:worker/replica:0/task:0/gpu:0'):
a = random_ops.random_normal([1, 10000, 20000], name='test_random1')
with ops.device('/job:worker/replica:0/task:1/gpu:0'):
b = random_ops.random_normal([30000, 10000, 1], name='test_random2')
c = a * b
try:
with session.Session(workers[1].target) as sess:
sess.run(c, options=config_pb2.RunOptions(
report_tensor_allocations_upon_oom=True))
except Exception as e: # pylint: disable=broad-except
exception_str = '%s' % e
# test_random2 is reported because it's allocated in worker 1.
self.assertTrue('Current usage from device: '
'/job:worker/replica:0/task:1/device:GPU:0, '
'allocator: GPU_0_bfc' in exception_str)
mat = re.search('(.*)GiB from test_random2/RandomStandardNormal',
exception_str)
self.assertGreater(float(mat.group(1)), 0.0)
# test_random1 is not reported because it's allocated in worker 0.
mat = re.search('(.*)MiB from test_random1/RandomStandardNormal',
exception_str)
self.assertTrue(mat is None)
开发者ID:andrewharp,项目名称:tensorflow,代码行数:30,代码来源:model_analyzer_test.py
示例2: testImplicitDisposeParallelMapDataset
def testImplicitDisposeParallelMapDataset(self):
# Tests whether a parallel map dataset will be cleaned up correctly when
# the pipeline does not run it until exhaustion.
# The pipeline is TensorSliceDataset -> MapDataset(square_3) ->
# RepeatDataset(None) -> PrefetchDataset(100).
worker, _ = test_util.create_local_cluster(1, 1)
components = (np.arange(1000),
np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis],
np.array(37.0) * np.arange(1000))
def _map_fn(x, y, z):
return math_ops.square(x), math_ops.square(y), math_ops.square(z)
dataset = (
dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
.repeat(None).prefetch(10000))
iterator = dataset.make_initializable_iterator()
init_op = iterator.initializer
get_next = iterator.get_next()
with session.Session(worker[0].target) as sess:
self.evaluate(init_op)
for _ in range(3):
sess.run(get_next)
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:26,代码来源:iterator_ops_cluster_test.py
示例3: testRemoteDeviceInPartitionedCallOp
def testRemoteDeviceInPartitionedCallOp(self):
workers, _ = test_util.create_local_cluster(2, 0)
worker0_device = "/job:worker/replica:0/task:0/cpu:0"
worker1_device = "/job:worker/replica:0/task:1/cpu:0"
@eager_def_function.function
def f(a, b):
return a + b
with session.Session(workers[0].target) as sess:
with ops.device(worker0_device):
a = variable_scope.get_variable(
"a", initializer=constant_op.constant(1.), use_resource=True)
with ops.device(worker1_device):
b = variable_scope.get_variable(
"b", initializer=constant_op.constant(1.), use_resource=True)
sess.run(variables.global_variables_initializer())
config = config_pb2.ConfigProto()
config.experimental.share_cluster_devices_in_session = True
with session.Session(workers[0].target, config=config) as sess:
res = sess.run(f(a, b))
self.assertEqual(res, 2)
开发者ID:aritratony,项目名称:tensorflow,代码行数:27,代码来源:functional_ops_test.py
示例4: testCaptureHashTableInSharedIterator
def testCaptureHashTableInSharedIterator(self):
worker, _ = test_util.create_local_cluster(1, 1)
# NOTE(mrry): We must use the V2 variants of `HashTable`
# etc. because these produce a `tf.resource`-typed output that is
# compatible with the in-graph function implementation.
default_val = -1
keys = constant_op.constant(["brain", "salad", "surgery"])
values = constant_op.constant([0, 1, 2], dtypes.int64)
table = lookup_ops.HashTable(
lookup_ops.KeyValueTensorInitializer(keys, values),
default_val,
shared_name="shared_table")
input_sentences = dataset_ops.Dataset.from_tensor_slices(
["brain brain tank salad surgery", "surgery brain"])
iterator = (
input_sentences.map(lambda x: string_ops.string_split([x]).values).map(
table.lookup)
.make_initializable_iterator(shared_name="shared_iterator"))
init_op = iterator.initializer
get_next = iterator.get_next()
with session.Session(worker[0].target) as sess:
self.evaluate(table.initializer)
self.evaluate(init_op)
self.assertAllEqual([0, 0, -1, 1, 2], self.evaluate(get_next))
with session.Session(worker[0].target) as sess:
self.assertAllEqual([2, 0], self.evaluate(get_next))
with self.assertRaises(errors.OutOfRangeError):
sess.run(get_next)
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:33,代码来源:iterator_ops_cluster_test.py
示例5: testRemoteFunction
def testRemoteFunction(self):
worker_config = config_pb2.ConfigProto()
worker_config.device_count["CPU"] = 2
worker, _ = test_util.create_local_cluster(
1, 1, worker_config=worker_config)
@function.Defun(dtypes.int32, dtypes.int32)
def _remote_fn(a, b):
return math_ops.multiply(a, b)
with ops.device("/job:ps/task:0"):
a = variables.Variable(2, dtype=dtypes.int32)
b = variables.Variable(3, dtype=dtypes.int32)
with ops.device("/job:worker/replica:0/task:0/cpu:0"):
remote_op = functional_ops.remote_call(
args=[a, b],
Tout=[dtypes.int32],
f=_remote_fn,
target="/job:worker/replica:0/task:0/cpu:1")
with session.Session(worker[0].target) as sess:
self.evaluate(variables.global_variables_initializer())
mul = self.evaluate(remote_op)
self.assertEqual(mul, [6])
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:25,代码来源:functional_ops_test.py
示例6: testRemoteIteratorUsingRemoteCallOp
def testRemoteIteratorUsingRemoteCallOp(self):
worker_config = config_pb2.ConfigProto()
worker_config.device_count["CPU"] = 2
worker, _ = test_util.create_local_cluster(
1, 1, worker_config=worker_config)
self._testRemoteIteratorHelper("/job:worker/replica:0/task:0/cpu:0",
"/job:worker/replica:0/task:0/cpu:1",
worker[0].target)
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:9,代码来源:iterator_ops_cluster_test.py
示例7: setUpClass
def setUpClass(cls):
# We have to create a global in-process cluster because once an in-process
# tensorflow server is created, there is no way to terminate it. Please see
# multi_worker_test_base.py for more details.
cls._workers, cls._ps = test_util.create_local_cluster(
NUM_WORKERS, num_ps=NUM_PS)
cls._cluster_spec = {
WORKER: [_bytes_to_str(w.target) for w in cls._workers],
PS: [_bytes_to_str(ps.target) for ps in cls._ps]
}
开发者ID:dan-lennox,项目名称:tensorflow,代码行数:10,代码来源:distribute_coordinator_test.py
示例8: _test_device_and_input_device_are_colocated
def _test_device_and_input_device_are_colocated(self, strategy):
if context.executing_eagerly():
self.skipTest(
"cross-device tests are not supported with eager execution.")
workers, _ = test_util.create_local_cluster(2, 0)
inputs = strategy.make_input_fn_iterator(
lambda _: dataset_ops.Dataset.range(5))
comm_fn = lambda x: x + 1
run_op = strategy.experimental_run(comm_fn, inputs)
with session_lib.Session(target=workers[1].target) as sess:
sess.run(inputs.initialize())
sess.run(run_op)
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:12,代码来源:strategy_test_lib.py
示例9: testSerialize
def testSerialize(self):
worker = test_util.create_local_cluster(num_workers=1, num_ps=1)[0][0]
with ops.Graph().as_default(), session.Session(target=worker.target):
with ops.device("/job:worker"):
t = constant_op.constant([[1.0], [2.0]])
l = list_ops.tensor_list_from_tensor(t, element_shape=[1])
with ops.device("/job:ps"):
l_ps = array_ops.identity(l)
l_ps, e = list_ops.tensor_list_pop_back(
l_ps, element_dtype=dtypes.float32)
with ops.device("/job:worker"):
worker_e = array_ops.identity(e)
self.assertAllEqual(self.evaluate(worker_e), [2.0])
开发者ID:aeverall,项目名称:tensorflow,代码行数:13,代码来源:list_ops_test.py
示例10: testSerializeListWithUnknownRank
def testSerializeListWithUnknownRank(self):
worker = test_util.create_local_cluster(num_workers=1, num_ps=1)[0][0]
with ops.Graph().as_default(), session.Session(target=worker.target):
with ops.device("/job:worker"):
t = constant_op.constant([[1.0], [2.0]])
l = list_ops.tensor_list_from_tensor(t, element_shape=None)
with ops.device("/job:ps"):
l_ps = array_ops.identity(l)
element_shape = list_ops.tensor_list_element_shape(
l_ps, shape_type=dtypes.int32)
with ops.device("/job:worker"):
element_shape = array_ops.identity(element_shape)
self.assertEqual(self.evaluate(element_shape), -1)
开发者ID:aeverall,项目名称:tensorflow,代码行数:13,代码来源:list_ops_test.py
示例11: testRemoteIteratorUsingRemoteCallOp
def testRemoteIteratorUsingRemoteCallOp(self):
worker_config = config_pb2.ConfigProto()
worker_config.device_count["CPU"] = 2
worker, _ = test_util.create_local_cluster(
1, 1, worker_config=worker_config)
with ops.device("/job:worker/replica:0/task:0/cpu:1"):
dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
iterator_3 = dataset_3.make_one_shot_iterator()
iterator_3_handle = iterator_3.string_handle()
@function.Defun(dtypes.string)
def _remote_fn(h):
remote_iterator = dataset_ops.Iterator.from_string_handle(
h, dataset_3.output_types, dataset_3.output_shapes)
return remote_iterator.get_next()
with ops.device("/job:worker/replica:0/task:0/cpu:0"):
target_placeholder = array_ops.placeholder(dtypes.string, shape=[])
remote_op = functional_ops.remote_call(
args=[iterator_3_handle],
Tout=[dtypes.int32],
f=_remote_fn,
target=target_placeholder)
with session.Session(worker[0].target) as sess:
elem = sess.run(
remote_op,
feed_dict={target_placeholder: "/job:worker/replica:0/task:0/cpu:1"})
self.assertEqual(elem, [1])
# Fails when target is cpu:0 where the resource is not located.
with self.assertRaises(errors.InvalidArgumentError):
sess.run(
remote_op,
feed_dict={
target_placeholder: "/job:worker/replica:0/task:0/cpu:0"
})
elem = sess.run(
remote_op,
feed_dict={target_placeholder: "/job:worker/replica:0/task:0/cpu:1"})
self.assertEqual(elem, [2])
elem = sess.run(
remote_op,
feed_dict={target_placeholder: "/job:worker/replica:0/task:0/cpu:1"})
self.assertEqual(elem, [3])
with self.assertRaises(errors.OutOfRangeError):
sess.run(
remote_op,
feed_dict={
target_placeholder: "/job:worker/replica:0/task:0/cpu:1"
})
开发者ID:1000sprites,项目名称:tensorflow,代码行数:51,代码来源:iterator_ops_cluster_test.py
示例12: setUpClass
def setUpClass(cls):
"""Create a local cluster with 2 workers."""
num_workers = 2
# Leave some memory for cuda runtime.
gpu_mem_frac = 0.7 / num_workers
default_config = config_pb2.ConfigProto()
default_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac
# The local cluster takes some portion of the local GPUs and there is no way
# for the cluster to terminate unless using multiple processes. Therefore,
# we have to only create only one cluster throughout a test process.
workers, _ = test_util.create_local_cluster(
num_workers, num_ps=0, worker_config=default_config)
cls._master_target = workers[0].target
开发者ID:BhaskarNallani,项目名称:tensorflow,代码行数:14,代码来源:multi_worker_test_base.py
示例13: testSerializeListWithInvalidTensors
def testSerializeListWithInvalidTensors(self):
worker = test_util.create_local_cluster(num_workers=1, num_ps=1)[0][0]
with ops.Graph().as_default(), session.Session(target=worker.target):
with ops.device("/job:worker"):
l = list_ops.tensor_list_reserve(
element_dtype=dtypes.float32, element_shape=[], num_elements=2)
l = list_ops.tensor_list_set_item(l, 0, 1.)
with ops.device("/job:ps"):
l_ps = array_ops.identity(l)
l_ps = list_ops.tensor_list_set_item(l_ps, 1, 2.)
t = list_ops.tensor_list_stack(l_ps, element_dtype=dtypes.float32)
with ops.device("/job:worker"):
worker_t = array_ops.identity(t)
self.assertAllEqual(self.evaluate(worker_t), [1.0, 2.0])
开发者ID:aeverall,项目名称:tensorflow,代码行数:14,代码来源:list_ops_test.py
示例14: _test_device_and_input_device_are_colocated_with_function
def _test_device_and_input_device_are_colocated_with_function(self, strategy):
if context.executing_eagerly():
self.skipTest(
"cross-device tests are not supported with eager execution.")
workers, _ = test_util.create_local_cluster(2, 0)
inputs = strategy.make_input_fn_iterator(
lambda _: dataset_ops.Dataset.range(5))
comm_fn = lambda x: x + 1
experimental_run = def_function.function()(strategy.experimental_run)
with ops.device("/job:worker/replica:0/task:1/device:CPU:0"):
# The tf.function must be defined on the right device as well.
run_op = experimental_run(comm_fn, inputs)
with session_lib.Session(target=workers[1].target) as sess:
sess.run(inputs.initialize())
sess.run(run_op)
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:15,代码来源:strategy_test_lib.py
示例15: testSerializeListWithMaxNumElements
def testSerializeListWithMaxNumElements(self):
if context.num_gpus():
# TODO(b/119151861): Enable on GPU.
return
worker = test_util.create_local_cluster(num_workers=1, num_ps=1)[0][0]
with ops.Graph().as_default(), session.Session(target=worker.target):
with ops.device("/job:worker"):
l = list_ops.empty_tensor_list(
element_shape=-1, element_dtype=dtypes.float32, max_num_elements=2)
l = list_ops.tensor_list_push_back(l, 1.)
with ops.device("/job:ps"):
l_ps = array_ops.identity(l)
l_ps = list_ops.tensor_list_push_back(l_ps, 2.)
with self.assertRaisesRegexp(errors.InvalidArgumentError,
"Tried to push item into a full list"):
with ops.device("/job:worker"):
l_worker = array_ops.identity(l_ps)
l_worker = list_ops.tensor_list_push_back(l_worker, 3.0)
self.evaluate(l_worker)
开发者ID:abhinav-upadhyay,项目名称:tensorflow,代码行数:19,代码来源:list_ops_test.py
示例16: testRemoteIteratorWithoutRemoteCallFail
def testRemoteIteratorWithoutRemoteCallFail(self):
worker_config = config_pb2.ConfigProto()
worker_config.device_count["CPU"] = 2
worker, _ = test_util.create_local_cluster(
1, 1, worker_config=worker_config)
with ops.device("/job:worker/replica:0/task:0/cpu:1"):
dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3])
iterator_3 = dataset_3.make_one_shot_iterator()
iterator_3_handle = iterator_3.string_handle()
with ops.device("/job:worker/replica:0/task:0/cpu:0"):
remote_it = dataset_ops.Iterator.from_string_handle(
iterator_3_handle, dataset_3.output_types, dataset_3.output_shapes)
get_next_op = remote_it.get_next()
with session.Session(worker[0].target) as sess:
with self.assertRaises(errors.InvalidArgumentError):
sess.run(get_next_op)
开发者ID:1000sprites,项目名称:tensorflow,代码行数:19,代码来源:iterator_ops_cluster_test.py
示例17: testRemoteFunctionCrossProcess
def testRemoteFunctionCrossProcess(self):
workers, _ = test_util.create_local_cluster(2, 1)
@function.Defun(dtypes.float32, dtypes.float32)
def _remote_fn(a, b):
return math_ops.multiply(a, b)
with ops.device("/job:ps/task:0"):
a = variables.Variable(2, dtype=dtypes.float32)
b = variables.Variable(3, dtype=dtypes.float32)
with ops.device("/job:worker/replica:0/task:0/cpu:0"):
remote_op = functional_ops.remote_call(
args=[a, b],
Tout=[dtypes.float32],
f=_remote_fn,
target="/job:worker/replica:0/task:1/cpu:0")[0] + 3.0
with session.Session(workers[0].target) as sess:
self.evaluate(variables.global_variables_initializer())
mul = self.evaluate(remote_op)
self.assertEqual(mul, 9)
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:22,代码来源:functional_ops_test.py
示例18: create_in_process_cluster
def create_in_process_cluster(num_workers, num_ps):
"""Create an in-process cluster that consists of only standard server."""
# Leave some memory for cuda runtime.
gpu_mem_frac = 0.7 / num_workers
worker_config = config_pb2.ConfigProto()
worker_config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_frac
ps_config = config_pb2.ConfigProto()
ps_config.device_count['GPU'] = 0
# Create in-process servers. Once an in-process tensorflow server is created,
# there is no way to terminate it. So we create one cluster per test process.
# We could've started the server in another process, we could then kill that
# process to terminate the server. The reasons why we don't want multiple
# processes are
# 1) it is more difficult to manage these processes
# 2) there is something global in CUDA such that if we initialize CUDA in the
# parent process, the child process cannot initialize it again and thus cannot
# use GPUs (https://stackoverflow.com/questions/22950047).
return test_util.create_local_cluster(
num_workers,
num_ps=num_ps,
worker_config=worker_config,
ps_config=ps_config)
开发者ID:dan-lennox,项目名称:tensorflow,代码行数:24,代码来源:multi_worker_test_base.py
示例19: testRemoteIteratorUsingRemoteCallOpCrossProcess
def testRemoteIteratorUsingRemoteCallOpCrossProcess(self):
workers, _ = test_util.create_local_cluster(2, 1)
self._testRemoteIteratorHelper("/job:worker/replica:0/task:0/cpu:0",
"/job:worker/replica:0/task:1/cpu:0",
workers[0].target)
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:6,代码来源:iterator_ops_cluster_test.py
示例20: test2Workers
def test2Workers(self):
num_workers = 2
replicas_to_aggregate = 2
num_ps = 2
workers, _ = create_local_cluster(num_workers=num_workers, num_ps=num_ps)
# Creates and returns all the workers.
sessions, graphs, train_ops = get_workers(num_workers,
replicas_to_aggregate, workers)
# Chief should have already initialized all the variables.
var_0_g_0 = graphs[0].get_tensor_by_name("v0:0")
var_1_g_0 = graphs[0].get_tensor_by_name("v1:0")
local_step_0 = graphs[0].get_tensor_by_name("sync_rep_local_step:0")
self.assertAllEqual(0.0, sessions[0].run(var_0_g_0))
self.assertAllEqual(1.0, sessions[0].run(var_1_g_0))
self.assertAllEqual(0, sessions[0].run(local_step_0))
# Will just use session 1 to verify all the variables later.
var_0_g_1 = graphs[1].get_tensor_by_name("v0:0")
var_1_g_1 = graphs[1].get_tensor_by_name("v1:0")
var_sparse_g_1 = graphs[1].get_tensor_by_name("v_sparse:0")
local_step_1 = graphs[1].get_tensor_by_name("sync_rep_local_step:0")
global_step = graphs[1].get_tensor_by_name("global_step:0")
# The steps should also be initialized.
self.assertAllEqual(0, sessions[1].run(global_step))
self.assertAllEqual(0, sessions[1].run(local_step_1))
self.assertAllClose([[3.0], [4.0]], sessions[1].run(var_sparse_g_1))
# We have initial tokens in the queue so we can call this one by one. After
# the first step, this will no longer work as there will be no more extra
# tokens in the queue.
sessions[0].run(train_ops[0])
sessions[1].run(train_ops[1])
# The global step should have been updated and the variables should now have
# the new values after the average of the gradients are applied.
while sessions[1].run(global_step) != 1:
time.sleep(0.01)
self.assertAllClose(0 - (0.1 + 0.3) / 2 * 2.0, sessions[1].run(var_0_g_1))
self.assertAllClose(1 - (0.9 + 1.1) / 2 * 2.0, sessions[1].run(var_1_g_1))
self.assertAllClose([[3.0], [4.0 - (0.1 + 0.3) / 2 * 2.0]],
sessions[1].run(var_sparse_g_1))
# The local step for both workers should still be 0 because the initial
# tokens in the token queue are 0s. This means that the following
# computation of the gradients will be wasted as local_step is smaller than
# the current global step. However, this only happens once when the system
# just starts and this is necessary to make the system robust for the case
# when chief gets restarted by errors/preemption/...
self.assertAllEqual(0, sessions[0].run(local_step_0))
self.assertAllEqual(0, sessions[1].run(local_step_1))
sessions[0].run(train_ops[0])
sessions[1].run(train_ops[1])
# Although the global step should still be 1 as explained above, the local
# step should now be updated to 1. The variables are still the same.
self.assertAllEqual(1, sessions[1].run(global_step))
self.assertAllEqual(1, sessions[0].run(local_step_0))
self.assertAllEqual(1, sessions[1].run(local_step_1))
self.assertAllClose(0 - (0.1 + 0.3) / 2 * 2.0, sessions[1].run(var_0_g_1))
self.assertAllClose(1 - (0.9 + 1.1) / 2 * 2.0, sessions[1].run(var_1_g_1))
# At this step, the token queue is empty. So the 2 workers need to work
# together to proceed.
threads = []
threads.append(
self.checkedThread(
target=self._run, args=(train_ops[0], sessions[0])))
threads.append(
self.checkedThread(
target=self._run, args=(train_ops[1], sessions[1])))
# The two workers starts to execute the train op.
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# The global step should now be 2 and the gradients should have been
# applied twice.
self.assertAllEqual(2, sessions[1].run(global_step))
self.assertAllClose(0 - 2 * (0.1 + 0.3) / 2 * 2.0,
sessions[1].run(var_0_g_1))
self.assertAllClose(1 - 2 * (0.9 + 1.1) / 2 * 2.0,
sessions[1].run(var_1_g_1))
开发者ID:1000sprites,项目名称:tensorflow,代码行数:88,代码来源:sync_replicas_optimizer_test.py
注:本文中的tensorflow.python.framework.test_util.create_local_cluster函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论