本文整理汇总了Python中tensorflow.python.distribute.cluster_resolver.TPUClusterResolver类的典型用法代码示例。如果您正苦于以下问题:Python TPUClusterResolver类的具体用法?Python TPUClusterResolver怎么用?Python TPUClusterResolver使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TPUClusterResolver类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testSimpleSuccessfulRetrieval
def testSimpleSuccessfulRetrieval(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
'port': '8470',
'health': 'HEALTHY'
}
}
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=['test-tpu-1'],
coordinator_name='coordinator',
coordinator_address='10.128.1.5:10203',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } }
job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } }
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:25,代码来源:tpu_cluster_resolver_test.py
示例2: testGkeEnvironmentForPod
def testGkeEnvironmentForPod(self):
os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = ('grpc://10.120.27.5:8470,'
'grpc://10.120.27.6:8470,'
'grpc://10.120.27.7:8470,'
'grpc://10.120.27.8:8470')
self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
self.assertTrue(TPUClusterResolver._inGke())
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470,'
'grpc://10.120.27.6:8470,'
'grpc://10.120.27.7:8470,'
'grpc://10.120.27.8:8470'),
compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
resolver = TPUClusterResolver()
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470'),
compat.as_bytes(resolver.master()))
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job {
name: 'worker'
tasks { key: 0 value: '10.120.27.5:8470' }
tasks { key: 1 value: '10.120.27.6:8470' }
tasks { key: 2 value: '10.120.27.7:8470' }
tasks { key: 3 value: '10.120.27.8:8470' }
}
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
del os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS']
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:32,代码来源:tpu_cluster_resolver_test.py
示例3: testNewNetworkEndpointFormat
def testNewNetworkEndpointFormat(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'health': 'HEALTHY',
'networkEndpoints': [{
'ipAddress': '10.2.3.4',
'port': 8470,
}]
}
}
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
coordinator_name='coordinator',
coordinator_address='10.128.1.5:10203',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } }
job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } }
"""
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
self.assertEqual('grpc://10.2.3.4:8470', resolver.master())
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:27,代码来源:tpu_cluster_resolver_test.py
示例4: testRetrieveProjectAndZoneFromMetadata
def testRetrieveProjectAndZoneFromMetadata(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
'port': '8470',
'health': 'HEALTHY'
}
}
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu=['test-tpu-1'],
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map),
coordinator_name='coordinator')
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job {
name: 'coordinator'
tasks { key: 0 value: '10.128.1.2:%s' }
}
job {
name: 'worker'
tasks { key: 0 value: '10.1.2.3:8470' }
}
""" % resolver._coordinator_port
self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:30,代码来源:tpu_cluster_resolver_test.py
示例5: testNumAcceleratorsRetryFailure
def testNumAcceleratorsRetryFailure(self, mock_list_devices,
mock_eager_list_devices):
resolver = TPUClusterResolver(tpu='')
mock_list_devices.side_effect = errors.DeadlineExceededError(
None, None, 'timeout')
mock_eager_list_devices.side_effect = errors.DeadlineExceededError(
None, None, 'timeout')
with self.assertRaises(RuntimeError):
resolver.num_accelerators()
开发者ID:perfmjs,项目名称:tensorflow,代码行数:9,代码来源:tpu_cluster_resolver_test.py
示例6: verifyShouldResolve
def verifyShouldResolve(self, tpu, should_resolve):
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=tpu,
coordinator_name=None,
credentials=None,
service=self.mock_service_client(tpu_map={}))
self.assertEqual(should_resolve, resolver._shouldResolve(),
"TPU: '%s'" % tpu)
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:10,代码来源:tpu_cluster_resolver_test.py
示例7: testVerifySameCoreCount
def testVerifySameCoreCount(self):
self.assertEqual(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0, 1, 2, 3, 4, 5, 6, 7]}), 8)
self.assertEqual(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0, 1], 1: [2, 3]}), 2)
with self.assertRaises(RuntimeError):
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0], 1: [1, 2]})
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:10,代码来源:tpu_cluster_resolver_test.py
示例8: testPodResolution
def testPodResolution(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'health':
'HEALTHY',
'networkEndpoints': [
{
'ipAddress': '10.2.3.4',
'port': 8470,
},
{
'ipAddress': '10.2.3.5',
'port': 8470,
},
{
'ipAddress': '10.2.3.6',
'port': 8470,
},
{
'ipAddress': '10.2.3.7',
'port': 8470,
},
]
}
}
resolver = TPUClusterResolver(
tpu='test-tpu-1',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map),
coordinator_name='coordinator')
actual_cluster_spec = resolver.cluster_spec()
expected_proto = """
job {
name: 'coordinator',
tasks { key: 0 value: '10.128.1.2:%s'}
}
job {
name: 'worker'
tasks { key: 0 value: '10.2.3.4:8470' }
tasks { key: 1 value: '10.2.3.5:8470' }
tasks { key: 2 value: '10.2.3.6:8470' }
tasks { key: 3 value: '10.2.3.7:8470' }
}
""" % resolver._coordinator_port
self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:48,代码来源:tpu_cluster_resolver_test.py
示例9: initialize_tpu_system
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices in a separate session and graph.
Args:
cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
if context.executing_eagerly():
# This function looks as it is for the following non-intuitive reasons.
# tpu.initialize_system creates a dummy op whose sole purpose is to trigger
# DistributedTPURewritePass. This pass actually adds real ops that
# initialize the TPU system. Thus, we can't simply run tpu.initialize_system
# eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
# The easiest way to trigger a rewrite is to run the function with
# TPUPartitionedCallOp.
@function.defun
def _tpu_init_fn():
return tpu.initialize_system()
# We can't call _tpu_init_fn normally (because it contains just a dummy op,
# see above) but need to define it to get it added to eager context
# and get its assigned name.
# pylint: disable=protected-access
graph_func = _tpu_init_fn._get_concrete_function_internal()
func_name = compat.as_str(graph_func._inference_function.name)
# pylint: enable=protected-access
output = tpu_functional_ops.TPUPartitionedCall(
args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
serialized_topology = output[0].numpy()
else:
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
return topology.Topology(serialized=serialized_topology)
开发者ID:jackd,项目名称:tensorflow,代码行数:46,代码来源:tpu_strategy.py
示例10: testNotReadyCloudTpu
def testNotReadyCloudTpu(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'ipAddress': '10.1.2.3',
'port': '8470',
'state': 'CREATING'
}
}
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu='test-tpu-1',
coordinator_name=None,
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
with self.assertRaises(RuntimeError):
resolver.cluster_spec()
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:19,代码来源:tpu_cluster_resolver_test.py
示例11: testNumAcceleratorsSuccess
def testNumAcceleratorsSuccess(self, mock_list_devices):
device_names = [
'/job:tpu_worker/task:0/device:TPU:0',
'/job:tpu_worker/task:1/device:TPU:1',
'/job:tpu_worker/task:2/device:TPU:0',
'/job:tpu_worker/task:3/device:TPU:1',
'/job:tpu_worker/task:0/device:TPU:4',
'/job:tpu_worker/task:1/device:TPU:5',
'/job:tpu_worker/task:2/device:TPU:4',
'/job:tpu_worker/task:3/device:TPU:5',
]
device_list = [
session._DeviceAttributes(
name, 'TPU', 1024, 0) for name in device_names
]
mock_list_devices.return_value = device_list
resolver = TPUClusterResolver(tpu='')
self.assertEqual(resolver.num_accelerators(), 2)
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:19,代码来源:tpu_cluster_resolver_test.py
示例12: initialize_tpu_system
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices in a separate session and graph.
Args:
cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
return topology.Topology(serialized=serialized_topology)
开发者ID:ziky90,项目名称:tensorflow,代码行数:21,代码来源:tpu_strategy.py
示例13: testGetDeviceDictAndCoresWithCPUsAndGPUs
def testGetDeviceDictAndCoresWithCPUsAndGPUs(self):
device_names = [
'/job:tpu_worker/task:0/device:CPU:0',
'/job:tpu_worker/task:1/device:CPU:0',
'/job:tpu_worker/task:2/device:CPU:0',
'/job:tpu_worker/task:3/device:CPU:0',
'/job:tpu_worker/task:0/device:GPU:1',
'/job:tpu_worker/task:1/device:GPU:1',
'/job:tpu_worker/task:2/device:GPU:1',
'/job:tpu_worker/task:3/device:GPU:1',
]
device_list = [
session._DeviceAttributes(
name, 'XLA', 1024, 0) for name in device_names
]
device_dict, num_cores = TPUClusterResolver._get_device_dict_and_cores(
device_list)
self.assertEqual(num_cores, 0)
self.assertEqual(device_dict, {})
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:20,代码来源:tpu_cluster_resolver_test.py
示例14: testOverrideTaskTypeAndIndexAndGetMaster
def testOverrideTaskTypeAndIndexAndGetMaster(self):
tpu_map = {
'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': {
'health':
'HEALTHY',
'networkEndpoints': [
{
'ipAddress': '10.2.3.4',
'port': 8470,
},
{
'ipAddress': '10.2.3.5',
'port': 8470,
},
{
'ipAddress': '10.2.3.6',
'port': 8470,
},
{
'ipAddress': '10.2.3.7',
'port': 8470,
},
]
}
}
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
coordinator_name=None,
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map))
self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
resolver.task_type = 'worker'
resolver.task_id = 3
self.assertEqual(resolver.master(), 'grpc://10.2.3.7:8470')
self.assertEqual(
resolver.master(
task_type='worker', task_id=2, rpc_layer='test'),
'test://10.2.3.6:8470')
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:44,代码来源:tpu_cluster_resolver_test.py
示例15: testGetDeviceDictAndCoresWithTPUs
def testGetDeviceDictAndCoresWithTPUs(self):
device_names = [
'/job:tpu_worker/task:0/device:TPU:0',
'/job:tpu_worker/task:1/device:TPU:1',
'/job:tpu_worker/task:2/device:TPU:0',
'/job:tpu_worker/task:3/device:TPU:1',
'/job:tpu_worker/task:0/device:TPU:4',
'/job:tpu_worker/task:1/device:TPU:5',
'/job:tpu_worker/task:2/device:TPU:4',
'/job:tpu_worker/task:3/device:TPU:5',
]
device_list = [
session._DeviceAttributes(
name, 'TPU', 1024, 0) for name in device_names
]
device_details = TPUClusterResolver._get_device_dict_and_cores(
device_list)
self.assertEqual(device_details.total_cores, 8)
self.assertEqual(device_details.device_map,
{'0': ['0', '4'],
'1': ['1', '5'],
'2': ['0', '4'],
'3': ['1', '5']})
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:24,代码来源:tpu_cluster_resolver_test.py
示例16: initialize_tpu_system
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices.
Args:
cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.tpu.Topology object for the topology of the TPU cluster.
Raises:
RuntimeError: If no TPU devices found for eager execution.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
assert isinstance(cluster_resolver, TPUClusterResolver)
tpu_name = compat.as_text(cluster_resolver._tpu) # pylint: disable=protected-access
if tpu_name in _INITIALIZED_TPU_SYSTEMS:
logging.warning("TPU system %s has already been initialized. "
"Reinitializing the TPU can cause previously created "
"variables on TPU to be lost.")
logging.info("Initializing the TPU system.")
if context.executing_eagerly():
# This function looks as it is for the following non-intuitive reasons.
# tpu.initialize_system creates a dummy op whose sole purpose is to trigger
# DistributedTPURewritePass. This pass actually adds real ops that
# initialize the TPU system. Thus, we can't simply run tpu.initialize_system
# eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
# The easiest way to trigger a rewrite is to run the function with
# TPUPartitionedCallOp.
@function.defun
def _tpu_init_fn():
return tpu.initialize_system()
# We can't call _tpu_init_fn normally (because it contains just a dummy op,
# see above) but need to define it to get it added to eager context
# and get its assigned name.
# pylint: disable=protected-access
graph_func = _tpu_init_fn._get_concrete_function_internal()
func_name = compat.as_str(graph_func._inference_function.name)
# pylint: enable=protected-access
tpu_devices = sorted(
[x for x in context.list_devices() if "device:TPU:" in x])
if not tpu_devices:
raise RuntimeError("Could not find any TPU devices")
with ops.device(device_util.get_host_for_device(tpu_devices[0])):
output = tpu_functional_ops.TPUPartitionedCall(
args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
serialized_topology = output[0].numpy()
else:
master = cluster_resolver.master()
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
tpu_topology = topology.Topology(serialized=serialized_topology)
_INITIALIZED_TPU_SYSTEMS[tpu_name] = tpu_topology
return tpu_topology
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:66,代码来源:tpu_strategy_util.py
示例17: initialize_tpu_system
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices.
Args:
cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.tpu.Topology object for the topology of the TPU cluster.
Raises:
RuntimeError: If no TPU devices found for eager execution.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
assert isinstance(cluster_resolver, TPUClusterResolver)
tpu_name = compat.as_text(cluster_resolver._tpu) # pylint: disable=protected-access
if tpu_name in _INITIALIZED_TPU_SYSTEMS:
logging.warning("TPU system %s has already been initialized. "
"Reinitializing the TPU can cause previously created "
"variables on TPU to be lost.")
logging.info("Initializing the TPU system.")
if context.executing_eagerly():
# This function looks as it is for the following non-intuitive reasons.
# tpu.initialize_system creates a dummy op whose sole purpose is to trigger
# DistributedTPURewritePass. This pass actually adds real ops that
# initialize the TPU system. Thus, we can't simply run tpu.initialize_system
# eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
@function.defun
def _tpu_init_fn():
return tpu.initialize_system()
tpu_devices = sorted(
[x for x in context.list_devices() if "device:TPU:" in x])
if not tpu_devices:
raise RuntimeError("Could not find any TPU devices")
# Replace the remote TPU device with the remote TPU_SYSTEM system device. As
# in the remote TPU device case, we will try to compile it instead of
# running through optimization passes and TF Executor, but TPU_SYSTEM should
# work.
tpu_system_device = tpu_devices[0].replace("TPU", "TPU_SYSTEM")
with ops.device(tpu_system_device):
output = _tpu_init_fn()
serialized_topology = output.numpy()
else:
master = cluster_resolver.master()
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
tpu_topology = topology.Topology(serialized=serialized_topology)
_INITIALIZED_TPU_SYSTEMS[tpu_name] = tpu_topology
return tpu_topology
开发者ID:aritratony,项目名称:tensorflow,代码行数:61,代码来源:tpu_strategy_util.py
示例18: testEnvironmentDiscoveryUrl
def testEnvironmentDiscoveryUrl(self):
os.environ['TPU_API_DISCOVERY_URL'] = 'https://{api}.internal/{apiVersion}'
self.assertEqual('https://{api}.internal/{apiVersion}',
TPUClusterResolver._environmentDiscoveryUrl())
开发者ID:aeverall,项目名称:tensorflow,代码行数:4,代码来源:tpu_cluster_resolver_test.py
示例19: testEnvironmentAndRpcDetectionForGrpcString
def testEnvironmentAndRpcDetectionForGrpcString(self):
resolver = TPUClusterResolver(
tpu='grpc://10.1.2.3:8470')
self.assertEqual(resolver.environment, '')
self.assertEqual(resolver.rpc_layer, 'grpc')
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:6,代码来源:tpu_cluster_resolver_test.py
示例20: testNoCallComputeMetadata
def testNoCallComputeMetadata(self):
resolver = TPUClusterResolver(
tpu='/bns/foo/bar')
self.assertEqual('/bns/foo/bar', resolver.master())
self.assertEqual(None, resolver.cluster_spec())
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:5,代码来源:tpu_cluster_resolver_test.py
注:本文中的tensorflow.python.distribute.cluster_resolver.TPUClusterResolver类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论