本文整理汇总了Python中tvm.lower函数的典型用法代码示例。如果您正苦于以下问题:Python lower函数的具体用法?Python lower怎么用?Python lower使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了lower函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: verify_softmax
def verify_softmax(m, n, dtype="float32"):
A = tvm.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
s = topi.generic.schedule_softmax(B)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="softmax")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
check_device(device)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:27,代码来源:test_topi_softmax.py
示例2: verify_softmax
def verify_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.cpp.nn.softmax(A, 1)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_softmax(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="softmax")
foo(a, b)
np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
开发者ID:gwli,项目名称:tvm,代码行数:29,代码来源:test_topi_softmax.py
示例3: verify_log_softmax
def verify_log_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.nn.log_softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
with tvm.target.create(device):
s = topi.generic.schedule_softmax(B)
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="log_softmax")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ["opengl"]:
check_device(device)
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_local_topi_softmax.py
示例4: test_lstm_cell_inline
def test_lstm_cell_inline():
num_step = 128
num_input = 256
num_hidden = 1152
batch_size = 4
# Global transition matrix
X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X")
Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h")
Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
# h: output hidden state, c: cell state.
s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
s_init_c = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_c")
s_init_h = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_h")
# LSTM transition
k = tvm.reduce_axis((0, num_input), name="ki2h")
s_i2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
name="s_i2h")
k = tvm.reduce_axis((0, num_hidden), name="ki2h")
s_h2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
name="s_h2h")
# Gate rules
gates = tvm.compute(s_i2h.shape, lambda *i:
s_i2h(*i) + s_h2h(*i), name="gates")
gshape = (num_step, batch_size, num_hidden)
in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate")
in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform")
forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate")
out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate")
next_c = tvm.compute(gshape,
lambda t, i, j:
forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
next_h = tvm.compute(gshape,
lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
# schedule
scan_h, scan_c = tvm.scan(
[s_init_h, s_init_c],
[update_h, update_c],
[s_state_h, s_state_c],
inputs=[X],
name="lstm_scan")
# schedule
s = tvm.create_schedule(scan_h.op)
# Inline gate computations
s[gates].compute_inline()
s[in_gate].compute_inline()
s[in_transform].compute_inline()
s[forget_gate].compute_inline()
s[out_gate].compute_inline()
# verify we can lower correctly
tvm.lower(s, [X, Wi2h, Wh2h, scan_h, scan_c])
开发者ID:LANHUIYING,项目名称:tvm,代码行数:60,代码来源:test_schedule_lstm.py
示例5: test_loop_dependent_allocate
def test_loop_dependent_allocate():
N = tvm.var("N")
A = tvm.placeholder((2*N,), "float32", "A")
C = tvm.compute((N, ), lambda i: A[2*i] + A[i+1], name='C')
s = tvm.create_schedule(C.op)
AA = s.cache_read(A, "local", [C])
s[AA].compute_at(s[C], s[C].op.axis[0])
# this line should fail due to IRUseDefAnalysis sees an allocate statement
# referencing undefined variable
tvm.lower(s, [A,C])
开发者ID:LANHUIYING,项目名称:tvm,代码行数:10,代码来源:test_pass_split_host_device.py
示例6: verify_log_softmax
def verify_log_softmax(m, n, dtype="float32"):
A = tvm.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.log_softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
for device in get_all_backend():
check_device(A, B, a_np, b_np, device, "log_softmax")
开发者ID:bddppq,项目名称:tvm,代码行数:11,代码来源:test_topi_softmax.py
示例7: verify_softmax
def verify_softmax(m, n, dtype="float32"):
A = tvm.placeholder((m, n), dtype=dtype, name='A')
B = topi.nn.softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
check_device(A, B, a_np, b_np, device, "softmax")
开发者ID:bddppq,项目名称:tvm,代码行数:12,代码来源:test_topi_softmax.py
示例8: run_inference
def run_inference(data_dtype, kernel_dtype, out_dtype, im_height, im_width, in_filter,
out_filter, k_h, k_w, hpad, wpad, hstride, wstride):
"""
Runs the inference and checks the functional correctness between
compute and schedule outputs
"""
(data_shape, kernel_shape, o_shape) = get_shape(im_height, im_width, in_filter,
out_filter, k_h, k_w, hpad, wpad,
hstride, wstride, out_dtype)
# Create TVM placeholders
data = tvm.placeholder(data_shape, name='data', dtype=data_dtype)
kernel = tvm.placeholder(kernel_shape, name='kernel', dtype=kernel_dtype)
# Create the numpy arrays to be used for executing conv models
if data_dtype == 'float32':
data_array = tvm.nd.array(np.random.rand(*data_shape).astype(dtype=data_dtype), CTX)
kernel_array = tvm.nd.array(np.random.rand(*kernel_shape).astype(dtype=kernel_dtype), CTX)
else:
data_array = tvm.nd.array(np.random.randint(100, size=data_shape).astype(data_dtype))
kernel_array = tvm.nd.array(np.random.randint(100, size=kernel_shape).astype(kernel_dtype))
# c_orig will be used for declaration ouptut
# c_sch will be used for scheduled computation output
c_orig = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
c_sch = tvm.nd.array(np.zeros(o_shape, dtype=out_dtype), CTX)
with tvm.target.create(TARGET_NAME):
conv = topi.nn.conv2d_NCHWc(data, kernel, stride=hstride,
padding=hpad, layout='NCHWc',
out_layout='NCHWc', out_dtype=out_dtype)
out = topi.nn.relu(conv)
sch = tvm.create_schedule(out.op)
func = tvm.build(sch, [data, kernel, out], target=TARGET_NAME, name='out')
func(data_array, kernel_array, c_orig)
LOGGER.debug(tvm.lower(sch, [data, kernel], simple_mode=True))
# Generate and run the optimized schedule
sconv = topi.generic.nn.schedule_conv2d_NCHWc(outs=[out])
func = tvm.build(sconv, [data, kernel, out], target=TARGET_NAME, name='conv')
func(data_array, kernel_array, c_sch)
# Functional check
if data_dtype == 'uint8':
np.testing.assert_equal(c_orig.asnumpy(), c_sch.asnumpy())
else:
assert np.allclose(c_orig.asnumpy(), c_sch.asnumpy())
evaluator = func.time_evaluator(func.entry_name, CTX, number=1000)
LOGGER.debug(tvm.lower(sconv, [data, kernel], simple_mode=True))
return evaluator(data_array, kernel_array, c_sch).mean
开发者ID:bddppq,项目名称:tvm,代码行数:52,代码来源:test_conv_int8_intel.py
示例9: test_add_pipeline
def test_add_pipeline():
nn = 64
max_threads = 4
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
with ib.for_range(0, (n+1) // 2) as i:
ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2")))
return ib.get()
def extern_generator_gpu(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
bx = tvm.thread_axis("blockIdx.x")
tx = tvm.thread_axis("threadIdx.x")
ib.scope_attr(bx, "thread_extent", (nn+max_threads-1) // max_threads)
ib.scope_attr(tx, "thread_extent", max_threads)
idx = bx.var * max_threads + tx.var
with ib.if_scope(ib.likely(idx < n)):
ib.emit(outs[0].vstore(idx*2, ins[0].vload(idx*2, "float32x2") + tvm.const(1, "float32x2")))
return ib.get()
C_cpu = tvm.extern(A.shape, [A], extern_generator, name='C')
C_gpu = tvm.extern(A.shape, [A], extern_generator_gpu, name='C')
s_cpu = tvm.create_schedule(C_cpu.op)
s_gpu = tvm.create_schedule(C_gpu.op)
print(tvm.lower(s_cpu, [A, C_cpu], simple_mode=True))
print(tvm.lower(s_gpu, [A, C_gpu], simple_mode=True))
def check_target(target):
if not tvm.module.enabled(target):
return
s = s_gpu if target in ['opencl', 'cuda'] else s_cpu
C = C_gpu if target in ['opencl', 'cuda'] else C_cpu
# build and invoke the kernel.
f = tvm.build(s, [A, C], target)
ctx = tvm.context(target, 0)
# launch the kernel.
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
check_target("llvm")
check_target("opencl")
check_target("cuda")
开发者ID:LANHUIYING,项目名称:tvm,代码行数:50,代码来源:test_codegen_extern.py
示例10: check_device
def check_device(device, target_device):
if not tvm.module.enabled(target_device):
print("Skip test because {} is not enabled.".format(target_device))
return
device_ctx = tvm.context(device)
graph = get_simplex_graph(host_ctx.device_type, device_ctx.device_type)
shape = (4,)
# Create module for add whose target is the device.
tensor_a = tvm.placeholder(shape, name="A")
tensor_b = tvm.placeholder(shape, name="B")
elemwise_add = tvm.compute(shape, lambda *i: tensor_a(*i)
+ tensor_b(*i), name="elemwise_add")
target = topi.cpp.TEST_create_target(device)
schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add])
lower_add = tvm.lower(schedule_add, [tensor_a, tensor_b, elemwise_add],
name="elemwise_add")
# Insert copy. Neither compute nor schedule is required for the copy
# node. The compute will be performed at runtime which is just data
# copy from the input to the output.
tensor_copy = tvm.placeholder(shape, name="__copy")
# Create module for sub whose target is the host.
tensor_c = tvm.placeholder(shape, name="C")
elemwise_sub = tvm.compute(shape, lambda *i: tensor_copy(*i)
- tensor_c(*i), name="elemwise_sub")
schedule_sub = tvm.create_schedule(elemwise_sub.op)
lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c,
elemwise_sub],
name="elemwise_sub")
target_flist = {target_device: [lower_add], target_host: [lower_sub]}
mhost = tvm.build(target_flist, target_host=target_host)
ctx = [host_ctx, device_ctx]
mod = graph_runtime.create(graph, mhost, ctx)
params = {}
params["A"] = tensor_a = np.random.uniform(
size=shape).astype(tensor_a.dtype)
params["B"] = tensor_b = np.random.uniform(
size=shape).astype(tensor_b.dtype)
params["C"] = tensor_c = np.random.uniform(
size=shape).astype(tensor_c.dtype)
mod.set_input(**params)
mod.run()
out = mod.get_output(0, tvm.nd.empty(shape))
np.testing.assert_equal(
out.asnumpy(), (tensor_a + tensor_b) - tensor_c)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:49,代码来源:test_runtime_heterogeneous.py
示例11: lower
def lower(*args, **kwargs):
"""Thin wrapper of tvm.lower
This wrapper automatically applies VTA's build_config
if there is no user specified build_config in context.
See Also
--------
tvm.lower : The original TVM's lower function
"""
cfg = tvm.build_module.current_build_config()
if not cfg.add_lower_pass:
with build_config():
return tvm.lower(*args, **kwargs)
return tvm.lower(*args, **kwargs)
开发者ID:bddppq,项目名称:tvm,代码行数:15,代码来源:build_module.py
示例12: _lower
def _lower(sch, inputs, func_name, graph):
import traceback
# pylint: disable=broad-except
try:
f = tvm.lower(sch, inputs, name=func_name)
logging.debug("lower function %s", func_name)
logging.debug("%s", tvm.lower(sch, inputs, simple_mode=True))
except Exception:
msg = traceback.format_exc()
msg += "Error during compile graph\n"
msg += "--------------------------\n"
msg += graph.ir(join_entry_attrs=["shape"])
raise RuntimeError(msg)
return f if isinstance(
f, (tvm.container.Array, tuple, list)) else [f]
开发者ID:masa-ito-fj,项目名称:nnvm,代码行数:15,代码来源:build_module.py
示例13: test_upstream
def test_upstream():
@tvm.hybrid.script
def upstream(a):
b = output_tensor((20, ), 'float32')
for i in range(20):
b[i] = a[i] * i
return b
a = tvm.placeholder((20, ), 'float32')
b = tvm.placeholder((20, ), 'float32')
c = tvm.compute((20, ), lambda x: a[x] + b[x])
d = upstream(c)
sch = tvm.create_schedule([c.op, d.op])
ir = tvm.lower(sch, [a, b, d], simple_mode=True)
func = tvm.build(sch, [a, b, d])
assert(func)
a = numpy.random.randn(20).astype('float32')
b = numpy.random.randn(20).astype('float32')
ref = numpy.zeros((20, ), 'float32')
for i in range(20):
ref[i] = (a[i] + b[i]) * i
tvm_a = tvm.nd.array(a)
tvm_b = tvm.nd.array(b)
tvm_d = tvm.nd.array(numpy.zeros((20, )).astype('float32'))
func(tvm_a, tvm_b, tvm_d)
tvm.testing.assert_allclose(tvm_d.asnumpy(), ref, 1e-5, 1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:29,代码来源:test_hybrid_script.py
示例14: test_add_pipeline
def test_add_pipeline():
nn = 1024
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A')
def extern_generator(ins, outs):
"""Manually write the IR for the extern function, add pipeline"""
ib = tvm.ir_builder.create()
with ib.for_range(0, n/2) as i:
ib.emit(outs[0].vstore(i*2, ins[0].vload(i*2, "float32x2") + tvm.const(1, "float32x2")))
return ib.get()
C = tvm.extern(A.shape, [A], extern_generator, name='C')
s = tvm.create_schedule(C.op)
print(tvm.lower(s, [A, C], simple_mode=True))
def check_llvm():
if not tvm.module.enabled("llvm"):
return
# build and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, c)
np.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + 1)
check_llvm()
开发者ID:gwli,项目名称:tvm,代码行数:30,代码来源:test_codegen_extern.py
示例15: test_in_bounds_vectorize_llvm
def test_in_bounds_vectorize_llvm():
n = 512
lanes = 2
A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
B = tvm.compute((n,), lambda i: A[i], name='B')
C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
s = tvm.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], nparts=2)
_, xi = s[C].split(xi, factor=2)
s[C].parallel(xo)
s[C].vectorize(xi)
s[B].compute_at(s[C], xo)
xo, xi = s[B].split(B.op.axis[0], factor=2)
s[B].vectorize(xi)
# build and invoke the kernel.
lowered_func = tvm.lower (s, [A, C], "llvm", simple_mode=False)
print (lowered_func.body)
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
a = tvm.nd.empty((n,), A.dtype).copyfrom(
np.random.uniform(size=(n, lanes)))
c = tvm.nd.empty((n,), C.dtype, ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_pass_bound_checkers.py
示例16: test_in_bounds_conv_llvm
def test_in_bounds_conv_llvm(loop_tiling=False):
HSTR = WSTR = 1
in_channel = 128
kernel_height = kernel_width = 3
out_channel = 64
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
ic = tvm.reduce_axis((0, in_channel), name='ic')
kh = tvm.reduce_axis((0, kernel_height), name='kh')
kw = tvm.reduce_axis((0, kernel_width), name='kw')
conv = tvm.compute((batch_size, out_channel, out_height, out_width),
lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
kernel[kh, kw, ic, oc],
axis=[ic, kh, kw]),
name="conv2d")
s = tvm.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
if loop_tiling:
oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
lowered_func = tvm.lower(s, [data, kernel, conv], simple_mode=True)
print (lowered_func.body)
ctx = tvm.cpu (0)
f = tvm.build(s, [data, kernel, conv], "llvm")
data_input = tvm.nd.array(np.random.uniform(
size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
kernel_input = tvm.nd.array(np.random.uniform(
size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
f(data_input, kernel_input, conv_out)
开发者ID:bddppq,项目名称:tvm,代码行数:35,代码来源:test_pass_bound_checkers.py
示例17: check_c
def check_c():
if not tvm.module.enabled("llvm"):
return
# Specifically allow offset to test codepath when offset is available
Ab = tvm.decl_buffer(
A.shape, A.dtype,
elem_offset=tvm.var('Aoffset'),
offset_factor=8,
name='A')
binds = {A : Ab}
# BUILD and invoke the kernel.
f1 = tvm.lower(s, [A,B,C], name="fadd_pipeline")
fsplits = [x for x in tvm.ir_pass.SplitHostDevice(f1)]
fsplits[0] = tvm.ir_pass.LowerTVMBuiltin(fsplits[0])
mhost = tvm.codegen.build_module(fsplits[0], "c")
temp = util.tempdir()
path_dso = temp.relpath("temp.so")
mhost.export_library(path_dso)
m = tvm.module.load(path_dso)
fadd = m["fadd_pipeline"]
ctx = tvm.cpu(0)
# launch the kernel.
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
fadd(a, b, c)
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
开发者ID:bddppq,项目名称:tvm,代码行数:29,代码来源:test_codegen_c_host.py
示例18: test_local_gemm
def test_local_gemm():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
nn = 1024
n = tvm.var('n')
n = tvm.convert(nn)
m = n
l = n
A = tvm.placeholder((n, l), name='A', dtype='int32')
B = tvm.placeholder((m, l), name='B', dtype='int32')
k = tvm.reduce_axis((0, l), name='k')
C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
s = tvm.create_schedule(C.op)
s[C].opengl()
print(tvm.lower(s, [A, B, C], simple_mode=True))
f = tvm.build(s, [A, B, C], "opengl", name="gemm")
print("------opengl code------")
print(f.imported_modules[0].get_source(fmt="gl"))
ctx = tvm.opengl()
n, m, l = nn, nn, nn
a_np = np.random.uniform(low=0, high=10, size=(n, l)).astype(A.dtype)
b_np = np.random.uniform(low=0, high=10, size=(m, l)).astype(B.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))
开发者ID:bddppq,项目名称:tvm,代码行数:35,代码来源:test_local_gemm.py
示例19: test_double_splitting_with_indivisible_factors
def test_double_splitting_with_indivisible_factors():
m = 48
dtype="float32"
A = tvm.placeholder((m,), name='A', dtype=dtype)
C = tvm.compute((m,), lambda i: A[i], name='C')
D = tvm.compute((m,), lambda i: C[i], name='D')
s = tvm.create_schedule(D.op)
co, ci = s[C].split(C.op.axis[0], factor=10)
do, di = s[D].split(D.op.axis[0], 32)
s[C].compute_at(s[D], do)
target = 'llvm'
with tvm.build_config(partition_const_loop=True):
f = tvm.lower(s, [A, C, D], name="fadd1", simple_mode=False)
func = tvm.build(f, target=target)
# Find the beginning of the Halide IR corresponding to kernel code
# and make sure it doesn't have an if statements left
top_produce = find_top_produce(f.body)
assert(not any(collect_visit(top_produce, lambda x: isinstance(x, tvm.stmt.IfThenElse))))
# check functional correctness of generated code
ctx = tvm.context(target, 0)
a = tvm.nd.array(numpy.ones(m,).astype(dtype), ctx)
c = tvm.nd.array(numpy.zeros(m,).astype(dtype), ctx)
d = tvm.nd.array(numpy.zeros(m,).astype(dtype), ctx)
func(a, c, d)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy(), rtol=1e-5)
tvm.testing.assert_allclose(d.asnumpy(), a.asnumpy(), rtol=1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:30,代码来源:test_pass_loop_partition.py
示例20: conv_normal
def conv_normal(print_ir):
print("----- CONV2D CPU End-to-End Test-------")
s = topi.generic.schedule_conv2d_nchw([res])
if print_ir:
print(tvm.lower(s, [data, kernel, res], simple_mode=True))
cost = verify(s, True)
gops = (num_ops / cost.mean) / float(10 ** 9)
print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
开发者ID:LANHUIYING,项目名称:tvm,代码行数:8,代码来源:test_benchmark_topi_conv2d.py
注:本文中的tvm.lower函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论