本文整理汇总了Python中tvm.build函数的典型用法代码示例。如果您正苦于以下问题:Python build函数的具体用法?Python build怎么用?Python build使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了build函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: check_device
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
if device == "cuda" and not tvm.contrib.nvcc.have_int8(ctx.compute_version):
print("Skip because int8 intrinsics are not available")
return
print("Running on target: %s" % device)
with tvm.target.create(device):
C = topi.nn.group_conv2d_nchw(A, W, stride, padding, dilation, groups, out_dtype=dtype)
if add_bias:
C = topi.add(C, bias)
if add_relu:
C = topi.nn.relu(C)
s = topi.generic.schedule_group_conv2d_nchw([C])
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
if add_bias:
func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" %\
(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
func(a, w, b, c)
else:
func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" % \
(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
func(a, w, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:31,代码来源:test_topi_group_conv2d.py
示例2: check_device
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
C = topi.nn.conv2d_NCHWc(A, W, (stride, stride), (padding, padding),
(dilation, dilation),
layout='NCHW%dc'%ic_block,
out_layout="NCHW%dc"%oc_block,
out_dtype=dtype)
if add_bias:
C = topi.add(C, bias)
if add_relu:
C = topi.nn.relu(C)
s = topi.generic.schedule_conv2d_NCHWc([C])
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
if add_bias:
func = tvm.build(s, [A, W, bias, C], device,
name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
func(a, w, b, c)
else:
func = tvm.build(s, [A, W, C], device,
name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
func(a, w, c)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-3)
开发者ID:bddppq,项目名称:tvm,代码行数:33,代码来源:test_topi_conv2d_NCHWc.py
示例3: check_device
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
temp = util.tempdir()
name = "myadd_%s" % device
if sys.platform == "darwin" or sys.platform.startswith('linux'):
f = tvm.build(s, [A, B], device, "llvm -system-lib", name=name)
elif sys.platform == "win32":
f = tvm.build(s, [A, B], device, "llvm", name=name)
else:
raise ValueError("Unsupported platform")
path_dso = temp.relpath("dev_lib.so")
f.export_library(path_dso)
f1 = tvm.module.load(path_dso)
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
f1(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
if sys.platform != "win32":
f2 = tvm.module.system_lib()
f2[name](a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
开发者ID:gwli,项目名称:tvm,代码行数:26,代码来源:test_module_load.py
示例4: _build
def _build(funcs, target, target_host):
tvm_t = tvm.target.create(target)
if tvm_t.device_name == "vta":
return tvm.build(funcs, target="ext_dev", target_host=target_host)
elif tvm_t.device_name == "rasp" or tvm_t.device_name == "vtacpu":
return tvm.build(funcs, target=target_host)
return tvm.build(funcs, target=target)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:7,代码来源:vta_conv2d.py
示例5: test_local_memory
def test_local_memory():
N = 1024
M = 128
A = tvm.placeholder((N,), name='A', dtype='float32')
B = tvm.compute((N, ), lambda i: A[i], name='B')
s = tvm.create_schedule([B.op])
AA = s.cache_read(A, "local", [B])
o, i = s[B].split(s[B].op.axis[0], M)
s[AA].compute_at(s[B], o)
s[B].bind(o, tvm.thread_axis("blockIdx.x"))
# local memory usage: M * 4B
# thread usage: M
for target in ['opencl', 'cuda']:
if not tvm.context(target).exist:
continue
valid = [None]
with tvm.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_local_memory_per_block=4 * M - 1,
max_threads_per_block=1))]}):
tvm.build(s, [A, B], target)
assert not valid[0]
with tvm.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_local_memory_per_block=4 * M,
max_threads_per_block=1))]}):
tvm.build(s, [A, B], target)
assert valid[0]
开发者ID:bddppq,项目名称:tvm,代码行数:34,代码来源:test_pass_verify_gpu_code.py
示例6: test_multiple_kernels
def test_multiple_kernels():
N = 1024
A = tvm.placeholder((N, N), name='A')
B = tvm.compute((N, N), lambda i, j: A[i, j])
C = tvm.compute((N, N), lambda i, j: B[i, j])
s = tvm.create_schedule([C.op])
s[C].bind(s[C].op.axis[1], tvm.thread_axis("threadIdx.x"))
s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x"))
# shared memory usage: 0
# thread usage: N
for target in ['opencl', 'cuda']:
if not tvm.context(target).exist:
continue
valid = [None]
with tvm.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N - 1))]}):
tvm.build(s, [A, C], target)
assert not valid[0]
with tvm.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N))]}):
tvm.build(s, [A, C], target)
assert valid[0]
开发者ID:bddppq,项目名称:tvm,代码行数:33,代码来源:test_pass_verify_gpu_code.py
示例7: check_device
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
if device == 'llvm':
out = non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk, return_indices=False)
indices_out = non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk)
else:
out = topi.cuda.non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk, return_indices=False)
indices_out = topi.cuda.non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk)
s = topi.generic.schedule_nms(out)
indices_s = topi.generic.schedule_nms(indices_out)
tvm_data = tvm.nd.array(np_data, ctx)
tvm_valid_count = tvm.nd.array(np_valid_count, ctx)
tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
f = tvm.build(s, [data, valid_count, out], device)
f(tvm_data, tvm_valid_count, tvm_out)
tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4)
tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), ctx)
f = tvm.build(indices_s, [data, valid_count, indices_out], device)
f(tvm_data, tvm_valid_count, tvm_indices_out)
tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4)
开发者ID:bddppq,项目名称:tvm,代码行数:28,代码来源:test_topi_vision.py
示例8: main
def main():
n = tvm.var('n')
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s = tvm.create_schedule(C.op)
s[C].parallel(s[C].op.axis[0])
print(tvm.lower(s, [A, B, C], simple_mode=True))
tvm.build(s, [A, B, C], 'llvm --system-lib').save(osp.join(sys.argv[1], 'test.o'))
开发者ID:LANHUIYING,项目名称:tvm,代码行数:9,代码来源:build_test_lib.py
示例9: test_rpc_module
def test_rpc_module():
# graph
n = tvm.convert(1024)
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
temp = util.tempdir()
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
# Build the dynamic lib.
# If we don't want to do metal and only use cpu, just set target to be target
f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd")
path_dso1 = temp.relpath("dev_lib.dylib")
f.export_library(path_dso1, xcode.create_dylib,
arch=arch, sdk=sdk)
xcode.codesign(path_dso1)
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=64)
s[B].parallel(xi)
s[B].pragma(xo, "parallel_launch_point")
s[B].pragma(xi, "parallel_barrier_when_finish")
f = tvm.build(s, [A, B], target, name="myadd_cpu")
path_dso2 = temp.relpath("cpu_lib.dylib")
f.export_library(path_dso2, xcode.create_dylib,
arch=arch, sdk=sdk)
xcode.codesign(path_dso2)
# Start RPC test server that contains the compiled library.
server = xcode.popen_test_rpc(proxy_host, proxy_port, key,
destination=destination,
libs=[path_dso1, path_dso2])
# connect to the proxy
remote = rpc.connect(proxy_host, proxy_port, key=key)
ctx = remote.metal(0)
f1 = remote.load_module("dev_lib.dylib")
a_np = np.random.uniform(size=1024).astype(A.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
time_f = f1.time_evaluator(f1.entry_name, ctx, number=10)
cost = time_f(a, b).mean
print('%g secs/op' % cost)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
# CPU
ctx = remote.cpu(0)
f2 = remote.load_module("cpu_lib.dylib")
a_np = np.random.uniform(size=1024).astype(A.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
time_f = f2.time_evaluator(f1.entry_name, ctx, number=10)
cost = time_f(a, b).mean
print('%g secs/op' % cost)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
开发者ID:bddppq,项目名称:tvm,代码行数:55,代码来源:ios_rpc_test.py
示例10: check_device
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
# declare
DepthwiseConv2d = topi.nn.depthwise_conv2d_NCHWc(Input, Filter,
(stride_h, stride_w),
padding_args,
(dilation, dilation),
in_layout,
out_layout, dtype)
# TODO: add scale_shift implement for NCHWc and add test here
Relu = topi.nn.relu(DepthwiseConv2d)
# schedule
s1 = topi.generic.schedule_depthwise_conv2d_nchw(DepthwiseConv2d)
s2 = topi.generic.schedule_depthwise_conv2d_nchw(Relu)
# build the kernels
f1 = tvm.build(s1, [Input, Filter, DepthwiseConv2d], device)
f2 = tvm.build(s2, [Input, Filter, Relu], device)
# Prepare pod type for test data closure
input_shape = (batch, in_channel, in_height, in_width)
filter_shape = (filter_channel, channel_multiplier, filter_height, filter_width)
# Use memoize, pickle the test data for next time use.
@memoize("topi.tests.test_topi_depthwise_conv2d.NCHWc")
def get_ref_data():
input_np = np.random.uniform(size=input_shape).astype(dtype)
filter_np = np.random.uniform(size=filter_shape).astype(dtype)
# correctness with scipy
depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw(
input_np, filter_np, stride, padding)
relu_scipy = np.maximum(depthwise_conv2d_scipy, 0)
return (_transform_data(input_np, ic_block),
_transform_kernel(filter_np, oc_block),
_transform_data(depthwise_conv2d_scipy, oc_block),
_transform_data(relu_scipy, oc_block))
# Get the test data
(input_np, filter_np, depthwise_conv2d_scipy, relu_scipy) = get_ref_data()
input_tvm = tvm.nd.array(input_np, ctx)
filter_tvm = tvm.nd.array(filter_np, ctx)
depthwise_conv2d_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(DepthwiseConv2d.shape),
dtype=DepthwiseConv2d.dtype), ctx)
relu_tvm = tvm.nd.array(np.zeros(shape=get_const_tuple(Relu.shape), dtype=Relu.dtype), ctx)
# launch kernel 1 (depthwise_conv2d)
f1(input_tvm, filter_tvm, depthwise_conv2d_tvm)
# launch kernel 2 (depthwise_conv2d + relu)
f2(input_tvm, filter_tvm, relu_tvm)
tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:55,代码来源:test_topi_depthwise_conv2d.py
示例11: run_inference
def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_filter,
out_filter, k_h, k_w, hpad, wpad, hstride, wstride):
"""
Runs the inference and checks the functional correctness between
compute and schedule outputs
"""
(data_shape, kernel_shape, o_shape) = get_shape(im_height, im_width, in_filter,
out_filter, k_h, k_w, hpad, wpad,
hstride, wstride, out_dtype)
# Create TVM placeholders
data = tvm.placeholder(data_shape, name='data', dtype=data_dtype)
kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
# Create the numpy arrays to be used for executing conv models
if data_dtype == 'float32':
data_array = tvm.nd.array(np.random.rand(*data_shape).astype(dtype=data_dtype), CTX)
kernel_array = tvm.nd.array(np.random.rand(*kernel_shape).astype(dtype=kernel_dtype), CTX)
else:
data_array = tvm.nd.array(np.random.randint(100, size=data_shape).astype(data_dtype))
kernel_array = tvm.nd.array(np.random.randint(100, size=kernel_shape).astype(kernel_dtype))
# c_orig will be used for declaration ouptut
# c_sch will be used for scheduled computation output
c_orig = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
c_sch = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
with tvm.target.create(TARGET_NAME):
conv = topi.nn.conv2d_NCHWc(data, kernel, stride=hstride,
padding=hpad, layout='NCHWc',
out_layout='NCHWc', out_dtype=out_dtype)
out = topi.nn.relu(conv)
sch = tvm.create_schedule(out.op)
func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out')
func(data_array, kernel_array, c_orig)
LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True))
# Generate and run the optimized schedule
sconv = topi.generic.nn.schedule_conv2d_NCHWc(outs=[out])
func = tvm.build(sconv, [data, kernel, out], target=TARGET_NAME, name='conv')
func(data_array, kernel_array, c_sch)
# Functional check
if data_dtype == 'uint8':
np.testing.assert_equal(c_orig.asnumpy(), c_sch.asnumpy())
else:
assert np.allclose(c_orig.asnumpy(), c_sch.asnumpy())
evaluator = func.time_evaluator(func.entry_name, CTX, number=1000)
LOGGER.debug(tvm.lower(sconv, [data, kernel], simple_mode=True))
return evaluator(data_array, kernel_array, c_sch).mean
开发者ID:bddppq,项目名称:tvm,代码行数:52,代码来源:test_conv_int8_intel.py
示例12: verify_bitserial_conv2d_nhwc
def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel, stride, padding,
activation_bits, weight_bits, unipolar):
in_height = in_width = in_size
input_type = 'uint32'
out_dtype = 'int16'
device = 'llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon'
with tvm.target.create(device):
A = tvm.placeholder((batch, in_height, in_width, in_channel), dtype=input_type, name='A')
W = tvm.placeholder((kernel, kernel, in_channel, num_filter), dtype=input_type, name='W')
B = topi.nn.bitserial_conv2d_nhwc(A, W, stride, padding, activation_bits, weight_bits,
pack_dtype='uint8', out_dtype='int16', unipolar=unipolar)
s = topi.generic.schedule_bitserial_conv2d_nhwc([B])
func = tvm.build(s, [A, W, B], device)
assembly = func.get_source('asm')
matches = re.findall("vpadal", assembly)
assert (len(matches) > 0)
matches = re.findall("vcnt", assembly)
assert (len(matches) > 0)
matches = re.findall("vpadd", assembly)
assert (len(matches) > 0)
ctx = tvm.context(device, 0)
if 'arm' not in os.uname()[4]:
print ("Skipped running code, not an arm device")
return
print("Running on target: %s" % device)
def get_ref_data():
a_np = generate_quantized_np(get_const_tuple(A.shape), activation_bits, input_type)
w_np = generate_quantized_np(get_const_tuple(W.shape), weight_bits, input_type)
if unipolar:
w_ = np.copy(w_np).astype(out_dtype)
for x in np.nditer(w_, op_flags=['readwrite']):
x[...] = 1 if x == 1 else -1
b_np = topi.testing.conv2d_nhwc_python(a_np, w_, stride, padding).astype(out_dtype)
else:
b_np = topi.testing.conv2d_nhwc_python(a_np, w_np, stride, padding).astype(out_dtype)
return a_np, w_np, b_np
a_np, w_np, b_np = get_ref_data()
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, W, B], device)
func(a, w, b)
np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:50,代码来源:test_topi_bitserial_conv2d_rasp.py
示例13: check_device
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
target = topi.cpp.TEST_create_target(device)
ctx = tvm.context(device, 0)
out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
f = tvm.build(s1, [A, B], device, name="full_like")
f(tvm.nd.array(np.zeros(shape, dtype), ctx), out)
tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
f = tvm.build(s2, [C], device, name="full")
f(out)
tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:14,代码来源:test_topi_tensor.py
示例14: prepare_test_libs
def prepare_test_libs(base_path):
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B')
s = tvm.create_schedule(B.op)
# Compile library as dynamic library
fadd_dylib = tvm.build(s, [A, B], "llvm", name="addone")
dylib_path = os.path.join(base_path, "test_addone_dll.so")
fadd_dylib.export_library(dylib_path)
# Compile library in system library mode
fadd_syslib = tvm.build(s, [A, B], "llvm --system-lib", name="addonesys")
syslib_path = os.path.join(base_path, "test_addone_sys.o")
fadd_syslib.save(syslib_path)
开发者ID:bddppq,项目名称:tvm,代码行数:14,代码来源:prepare_test_libs.py
示例15: build
def build(*args, **kwargs):
"""Thin wrapper of tvm.build
This wrapper automatically applies VTA's build_config
if there is no user specified build_config in context.
See Also
--------
tvm.build : The original TVM's build function
"""
cfg = tvm.build_module.current_build_config()
if not cfg.add_lower_pass:
with build_config():
return tvm.build(*args, **kwargs)
return tvm.build(*args, **kwargs)
开发者ID:bddppq,项目名称:tvm,代码行数:15,代码来源:build_module.py
示例16: check_device
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
a = tvm.nd.array(a_np, ctx)
w = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
func1 = tvm.build(s1, [A, W, B], device)
func2 = tvm.build(s2, [A, W, C], device)
func1(a, w, b)
func2(a, w, c)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:16,代码来源:test_topi_conv2d_hwcn.py
示例17: test_min_repeat_ms
def test_min_repeat_ms():
tmp = tempdir()
filename = tmp.relpath("log")
@tvm.register_func
def my_debug(filename):
"""one call lasts for 100 ms and writes one character to a file"""
time.sleep(0.1)
with open(filename, "a") as fout:
fout.write("c")
X = tvm.compute((), lambda : tvm.call_packed("my_debug", filename))
s = tvm.create_schedule(X.op)
func = tvm.build(s, [X])
x = tvm.nd.empty((), dtype="int32")
ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
number=1, repeat=1)
ftimer(x)
with open(filename, "r") as fin:
ct = len(fin.readline())
assert ct == 2
ftimer = func.time_evaluator(func.entry_name, tvm.cpu(),
number=1, repeat=1, min_repeat_ms=1000)
ftimer(x)
# make sure we get more than 10 calls
with open(filename, "r") as fin:
ct = len(fin.readline())
assert ct > 10 + 2
开发者ID:bddppq,项目名称:tvm,代码行数:35,代码来源:test_runtime_measure.py
示例18: check_device
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
s = topi.cpp.cuda.schedule_injective(target, [C])
ctx = tvm.context(device, 0)
foo = tvm.build(s, [A, B, C], device, name="broadcast_binary" + "_" + typ)
lhs_npy = np.random.uniform(size=lhs_shape).astype(A.dtype)
rhs_npy = np.random.uniform(size=rhs_shape).astype(A.dtype)
if typ == "add":
out_npy = lhs_npy + rhs_npy
elif typ == "sub":
out_npy = lhs_npy - rhs_npy
elif typ == "div":
rhs_npy = np.abs(rhs_npy) + 0.001
out_npy = lhs_npy / rhs_npy
elif typ == "mul":
out_npy = lhs_npy * rhs_npy
elif typ == "maximum":
out_npy = np.maximum(lhs_npy, rhs_npy)
elif typ == "minimum":
out_npy = np.minimum(lhs_npy, rhs_npy)
elif typ == "pow":
out_npy = lhs_npy ** rhs_npy
else:
raise NotImplementedError
lhs_nd = tvm.nd.array(lhs_npy, ctx)
rhs_nd = tvm.nd.array(rhs_npy, ctx)
out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
for _ in range(1):
foo(lhs_nd, rhs_nd, out_nd)
np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
开发者ID:gwli,项目名称:tvm,代码行数:34,代码来源:test_topi_broadcast.py
示例19: test_upstream
def test_upstream():
@tvm.hybrid.script
def upstream(a):
b = output_tensor((20, ), 'float32')
for i in range(20):
b[i] = a[i] * i
return b
a = tvm.placeholder((20, ), 'float32')
b = tvm.placeholder((20, ), 'float32')
c = tvm.compute((20, ), lambda x: a[x] + b[x])
d = upstream(c)
sch = tvm.create_schedule([c.op, d.op])
ir = tvm.lower(sch, [a, b, d], simple_mode=True)
func = tvm.build(sch, [a, b, d])
assert(func)
a = numpy.random.randn(20).astype('float32')
b = numpy.random.randn(20).astype('float32')
ref = numpy.zeros((20, ), 'float32')
for i in range(20):
ref[i] = (a[i] + b[i]) * i
tvm_a = tvm.nd.array(a)
tvm_b = tvm.nd.array(b)
tvm_d = tvm.nd.array(numpy.zeros((20, )).astype('float32'))
func(tvm_a, tvm_b, tvm_d)
tvm.testing.assert_allclose(tvm_d.asnumpy(), ref, 1e-5, 1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:29,代码来源:test_hybrid_script.py
示例20: test_const_param
def test_const_param():
@tvm.hybrid.script
def add_something(a, b):
c = output_tensor((11, ), 'int32')
for i in range(11):
c[i] = a[i] + b
return c
a = tvm.placeholder((11, ), dtype='int32', name='a')
b = tvm.const(11, 'int32')
c = add_something(a, b)
sch = tvm.create_schedule(c.op)
module = tvm.build(sch, [a, c], 'llvm')
assert(module)
np_a = numpy.arange(11).astype('int32')
np_b = 11
np_c = numpy.zeros((11, )).astype('int32')
nd_a = tvm.ndarray.array(np_a)
nd_c = tvm.ndarray.array(numpy.zeros((11, )).astype('int32'))
module(nd_a, nd_c)
ref = add_something(np_a, 11)
tvm.testing.assert_allclose(nd_c.asnumpy(), ref, 1e-5, 1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_hybrid_script.py
注:本文中的tvm.build函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论