本文整理汇总了Python中tvm.compute函数的典型用法代码示例。如果您正苦于以下问题:Python compute函数的具体用法?Python compute怎么用?Python compute使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了compute函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_inplace_rule2
def test_inplace_rule2(scope_tb = "local_TB2", max_bits = 1024 * 1024 * 1024):
#Test Buffer
register_mem(scope_tb, max_bits)
m = 10
A = tvm.placeholder((m,), name='A')
C = tvm.placeholder((m,), name='C')
D = tvm.placeholder((m,), name='D')
A0 = tvm.compute((m,), lambda i: A[i] + C[i], name='A0')
A1 = tvm.compute((m,), lambda i: D[i] * D[i], name='A1')
A2 = tvm.compute((m,), lambda i: A0[i] + A1[i], name='A2')
B = tvm.compute((m,), lambda i: A2[i], name='B')
s = tvm.create_schedule(B.op)
A0L = s.cache_read(A0, scope_tb, [A2])
A1L = s.cache_read(A1, scope_tb, [A2])
A2L = s.cache_read(A2, scope_tb, [B])
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
Cc = tvm.decl_buffer(C.shape, B.dtype, name='C')
Dd = tvm.decl_buffer(D.shape, B.dtype, name='D')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb, C: Cc, D:Dd}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.stmt.Allocate):
num_alloc[0] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 2
开发者ID:bddppq,项目名称:tvm,代码行数:34,代码来源:test_pass_storage_rewrite.py
示例2: test_bound_tensor_compute_op
def test_bound_tensor_compute_op():
def intrin_test():
m1 = tvm.var("m1")
n1 = tvm.var("n1")
a = tvm.placeholder((m1, n1), name='a')
c = tvm.compute((1, n1), lambda i, j : a[0, j] + a[1, j] + a[2, j], name='c')
Ab = tvm.decl_buffer(a.shape, name="Abuf", offset_factor=1)
Cb = tvm.decl_buffer(c.shape, name="Cbuf", offset_factor=1)
def intrin_func(ins, outs):
aa = ins[0]
cc = outs[0]
def _body():
ib = tvm.ir_builder.create()
ib.emit(tvm.call_extern("int32", "test", cc.access_ptr("w"), aa.access_ptr("r")))
return ib.get()
return _body()
with tvm.build_config(offset_factor=1):
return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a : Ab, c : Cb})
test_func = intrin_test()
A = tvm.placeholder((20,20), name='A')
B = tvm.compute(A.shape, lambda i,j : A[i,j], name='B')
C = tvm.compute((10, 20), lambda i : test_func(B[i:10, 0:20]), name='C')
s = tvm.create_schedule(C.op)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
assert(bounds[B.op.axis[0]].extent.value == 10)
开发者ID:bddppq,项目名称:tvm,代码行数:29,代码来源:test_schedule_bound_inference.py
示例3: test_bound_nest_thread
def test_bound_nest_thread():
m = tvm.var('m')
A = tvm.placeholder((m), name='A')
A1 = tvm.compute((m,), lambda i: A[i], name='A1')
A2 = tvm.compute((m,), lambda i: A1[i] + 2, name='A2')
A3 = tvm.compute((m,), lambda i: A2[i] + 3, name='A3')
s = tvm.create_schedule(A3.op)
s[A2].set_scope("shared")
s[A1].set_scope("local")
block_x = tvm.thread_axis("blockIdx.x")
thread_x = tvm.thread_axis("threadIdx.x")
bx, tx = s[A3].split(A3.op.axis[0], factor=32)
s[A3].bind(bx, block_x)
s[A3].bind(tx, thread_x)
s[A2].compute_at(s[A3], tx)
_, xi = s[A2].split(A2.op.axis[0], nparts=1)
s[A2].bind(xi, thread_x)
s[A1].compute_at(s[A3], tx)
s = s.normalize()
bounds = tvm.schedule.InferBound(s)
assert(bounds[A1.op.axis[0]].extent.value==1)
assert(bounds[A2.op.axis[0]].extent.value==32)
assert(bounds[A3.op.axis[0]].extent == m)
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_schedule_bound_inference.py
示例4: test_multiple_kernels
def test_multiple_kernels():
N = 1024
A = tvm.placeholder((N, N), name='A')
B = tvm.compute((N, N), lambda i, j: A[i, j])
C = tvm.compute((N, N), lambda i, j: B[i, j])
s = tvm.create_schedule([C.op])
s[C].bind(s[C].op.axis[1], tvm.thread_axis("threadIdx.x"))
s[B].bind(s[B].op.axis[1], tvm.thread_axis("threadIdx.x"))
# shared memory usage: 0
# thread usage: N
for target in ['opencl', 'cuda']:
if not tvm.context(target).exist:
continue
valid = [None]
with tvm.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N - 1))]}):
tvm.build(s, [A, C], target)
assert not valid[0]
with tvm.build_config(**{"add_lower_pass": [
(2, get_verify_pass(valid,
max_shared_memory_per_block=0,
max_threads_per_block=N))]}):
tvm.build(s, [A, C], target)
assert valid[0]
开发者ID:bddppq,项目名称:tvm,代码行数:33,代码来源:test_pass_verify_gpu_code.py
示例5: binary_dense
def binary_dense(data, weight):
"""Binary matrix multiplication using xor and bit-count.
Parameters
----------
data : tvm.Tensor
2-D with shape [batch, in_dim], dtype is uint32.
weight : tvm.Tensor
2-D with shape [out_dim, in_dim], dtype is uint32.
Returns
-------
output : tvm.Tensor
2-D with shape [batch, out_dim], dtype is float32.
"""
assert data.dtype == 'uint32' and weight.dtype == 'uint32', \
"dtype of data and weight should be uint32"
assert len(data.shape) == 2 and len(weight.shape) == 2, \
"only support 2-dim binary dense"
batch, in_dim = data.shape
out_dim, _ = weight.shape
k = tvm.reduce_axis((0, in_dim), name='k')
matmul = tvm.compute((batch, out_dim), lambda i, j: \
tvm.sum(tvm.popcount(data[i, k] ^ weight[j, k]), axis=k), \
tag='binary_dense')
return tvm.compute((batch, out_dim), lambda i, j: \
32 * in_dim - 2. * matmul(i, j), \
tag=tag.ELEMWISE)
开发者ID:bddppq,项目名称:tvm,代码行数:30,代码来源:bnn.py
示例6: test_in_bounds_vectorize_llvm
def test_in_bounds_vectorize_llvm():
n = 512
lanes = 2
A = tvm.placeholder((n,), name='A', dtype="float32x%d" % lanes)
B = tvm.compute((n,), lambda i: A[i], name='B')
C = tvm.compute((n,), lambda i: B[i] + tvm.const(1, A.dtype), name='C')
s = tvm.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], nparts=2)
_, xi = s[C].split(xi, factor=2)
s[C].parallel(xo)
s[C].vectorize(xi)
s[B].compute_at(s[C], xo)
xo, xi = s[B].split(B.op.axis[0], factor=2)
s[B].vectorize(xi)
# build and invoke the kernel.
lowered_func = tvm.lower (s, [A, C], "llvm", simple_mode=False)
print (lowered_func.body)
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
a = tvm.nd.empty((n,), A.dtype).copyfrom(
np.random.uniform(size=(n, lanes)))
c = tvm.nd.empty((n,), C.dtype, ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_pass_bound_checkers.py
示例7: test_inplace_rule
def test_inplace_rule():
m = 10
A = tvm.placeholder((m,), name='A')
A0 = tvm.compute((m,), lambda i: A[i], name='A0')
A1 = tvm.compute((m,), lambda i: A[i] + 1, name='A1')
AA = tvm.compute((m,), lambda i: A0[i] + A1[i] + A1[0], name='AA')
B = tvm.compute((m,), lambda i: AA[i] + 1, name='B')
s = tvm.create_schedule(B.op)
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
# verify only have one allocations.
# verify inplace folding works
num_alloc = [0]
def verify(n):
if isinstance(n, tvm.stmt.Allocate):
num_alloc[0] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert num_alloc[0] == 2
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_pass_storage_rewrite.py
示例8: _declaration_dense_pack
def _declaration_dense_pack(cfg, data, weight, bias=None, out_dtype=None):
if out_dtype is None:
out_dtype = data.dtype
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
# create tuning space
cfg.define_split("tile_y", batch, num_outputs=3)
cfg.define_split("tile_x", out_dim, num_outputs=3)
cfg.define_split("tile_k", in_dim, num_outputs=2)
if cfg.is_fallback:
_default_dense_pack_config(cfg, batch, out_dim, in_dim)
packw_bn = cfg["tile_x"].size[-1]
packw_shape = (out_dim // packw_bn, in_dim, packw_bn)
packw = tvm.compute(packw_shape,
lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight")
k = tvm.reduce_axis((0, in_dim), name="k")
C = tvm.compute((batch, out_dim),
lambda y, x: tvm.sum(
data[y, k].astype(out_dtype) *
packw[x // packw_bn, k, x % packw_bn].astype(out_dtype),
axis=k),
tag="dense_pack")
if bias is not None:
C = tvm.compute((batch, out_dim), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
tag=tag.BROADCAST)
return C
开发者ID:bddppq,项目名称:tvm,代码行数:28,代码来源:dense.py
示例9: test_scan
def test_scan():
m = tvm.var("m")
n = tvm.var("n")
x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
s_state = tvm.placeholder((m, n))
s_init = tvm.compute((1, n), lambda _, i: x[0, i], name="s_init")
x_trans = tvm.compute((m, n), lambda i, j: x[i, j] + 1, name="x_trans")
s_up1 = tvm.compute((m, n), lambda t, i: s_state[t - 1, i] + 1, name="up1")
s_update = tvm.compute((m, n), lambda t, i: s_up1[t, i] + x_trans[t, i], name="update")
s_scan = tvm.scan(s_init, s_update, s_state)
def test_getbody():
body = tvm.schedule.ScanGetBody(s_scan.op)
assert set(body) == set([s_scan.op, s_update.op, s_up1.op])
def test_attach_path():
s = tvm.create_schedule(s_scan.op)
s[x_trans].compute_at(s[s_update], s_update.op.axis[0])
apath = tvm.schedule.CreateAttachPath(s)
assert(tuple(apath[s_update.op]) == tuple([s_scan.op.scan_axis]))
assert(tuple(apath[x_trans.op]) == tuple([s_update.op.axis[0], s_scan.op.scan_axis]))
def test_fix_pt():
body = tvm.schedule.ScanGetBody(s_scan.op)
fxpt = tvm.schedule.ScanFixPointAnalysis(s_scan.op, body)
assert(fxpt[s_scan.spatial_axis_[0]].value != 0)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:26,代码来源:test_schedule_graph.py
示例10: dense_default
def dense_default(data, weight, bias=None):
"""The default implementation of dense in topi.
Parameters
----------
data : tvm.Tensor
2-D with shape [batch, in_dim]
weight : tvm.Tensor
2-D with shape [out_dim, in_dim]
bias : tvm.Tensor, optional
1-D with shape [out_dim]
Returns
-------
output : tvm.Tensor
2-D with shape [batch, out_dim]
"""
assert len(data.shape) == 2 and len(weight.shape) == 2, \
"only support 2-dim dense"
if bias is not None:
assert len(bias.shape) == 1
batch, in_dim = data.shape
out_dim, _ = weight.shape
k = tvm.reduce_axis((0, in_dim), name='k')
matmul = tvm.compute((batch, out_dim), \
lambda i, j: tvm.sum(data[i, k] * weight[j, k], axis=k), \
tag='dense')
if bias is not None:
matmul = tvm.compute((batch, out_dim), \
lambda i, j: matmul[i, j] + bias[j], \
tag=tag.BROADCAST)
return matmul
开发者ID:LANHUIYING,项目名称:tvm,代码行数:34,代码来源:dense.py
示例11: test_double_splitting_with_indivisible_factors
def test_double_splitting_with_indivisible_factors():
m = 48
dtype="float32"
A = tvm.placeholder((m,), name='A', dtype=dtype)
C = tvm.compute((m,), lambda i: A[i], name='C')
D = tvm.compute((m,), lambda i: C[i], name='D')
s = tvm.create_schedule(D.op)
co, ci = s[C].split(C.op.axis[0], factor=10)
do, di = s[D].split(D.op.axis[0], 32)
s[C].compute_at(s[D], do)
target = 'llvm'
with tvm.build_config(partition_const_loop=True):
f = tvm.lower(s, [A, C, D], name="fadd1", simple_mode=False)
func = tvm.build(f, target=target)
# Find the beginning of the Halide IR corresponding to kernel code
# and make sure it doesn't have an if statements left
top_produce = find_top_produce(f.body)
assert(not any(collect_visit(top_produce, lambda x: isinstance(x, tvm.stmt.IfThenElse))))
# check functional correctness of generated code
ctx = tvm.context(target, 0)
a = tvm.nd.array(numpy.ones(m,).astype(dtype), ctx)
c = tvm.nd.array(numpy.zeros(m,).astype(dtype), ctx)
d = tvm.nd.array(numpy.zeros(m,).astype(dtype), ctx)
func(a, c, d)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy(), rtol=1e-5)
tvm.testing.assert_allclose(d.asnumpy(), a.asnumpy(), rtol=1e-5)
开发者ID:bddppq,项目名称:tvm,代码行数:30,代码来源:test_pass_loop_partition.py
示例12: test_schedule_create
def test_schedule_create():
m = tvm.var('m')
n = tvm.var('n')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
B = tvm.placeholder((n, l), name='B')
AA = tvm.compute((m, l), lambda i, j: A[i, j])
T = tvm.compute((m, n, l), lambda i, j, k: AA(i, k) * B(j, k))
s = tvm.create_schedule(T.op)
s[AA].set_scope("shared")
xo, xi = s[T].split(T.op.axis[0], factor=10)
xi1, xi2 = s[T].split(xi, factor=2)
s[AA].compute_at(s[T], xi1)
xo, xi = s[AA].split(AA.op.axis[0], factor=10)
s[T].reorder(xi2, xi1)
assert T.op.axis[1] in s[T].leaf_iter_vars
# save load json
json_str = tvm.save_json(s)
s_loaded = tvm.load_json(json_str)
assert isinstance(s_loaded, tvm.schedule.Schedule)
assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body))
# pickle unpickle
dump = pkl.dumps(s)
s_loaded = pkl.loads(dump)
assert isinstance(s_loaded, tvm.schedule.Schedule)
assert(str(s_loaded.outputs[0].body) == str(s.outputs[0].body))
开发者ID:bddppq,项目名称:tvm,代码行数:28,代码来源:test_lang_schedule.py
示例13: test_llvm_persist_parallel
def test_llvm_persist_parallel():
n = 128
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda *i: A(*i) + 1, name='B')
C = tvm.compute(A.shape, lambda *i: tvm.sqrt(B(*i)) * 2 + 2, name='C')
s = tvm.create_schedule(C.op)
xo, xi = s[C].split(C.op.axis[0], factor=8)
xo1, xo2 = s[C].split(xo, nparts=1)
s[B].compute_at(s[C], xo1)
s[B].parallel(s[B].op.axis[0])
s[B].pragma(s[B].op.axis[0], "parallel_barrier_when_finish")
s[C].parallel(xi)
s[C].pragma(xo1, "parallel_launch_point")
s[C].pragma(xi, "parallel_stride_pattern")
def check_llvm():
if not tvm.module.enabled("llvm"):
return
# BUILD and invoke the kernel.
f = tvm.build(s, [A, C], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
f(a, c)
tvm.testing.assert_allclose(c.asnumpy(),
np.sqrt(a.asnumpy() + 1) * 2 + 2,
rtol=1e-5)
check_llvm()
开发者ID:bddppq,项目名称:tvm,代码行数:30,代码来源:test_codegen_llvm.py
示例14: my_clip
def my_clip(x, a_min, a_max):
"""Unlike topi's current clip, put min and max into two stages."""
const_min = tvm.const(a_min, x.dtype)
const_max = tvm.const(a_max, x.dtype)
x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
return x
开发者ID:LANHUIYING,项目名称:tvm,代码行数:7,代码来源:test_benchmark_topi_conv2d.py
示例15: test_scan_group
def test_scan_group():
m = tvm.var("m")
n = tvm.var("n")
x = tvm.compute((m, n), lambda i, j: tvm.const(1, "float32"), name="x")
s_state = tvm.placeholder((m, n))
s_init = tvm.compute((1, n), lambda _, i: x[0, i])
s_update1 = tvm.compute((m, n), lambda t, i: s_state[t-1, i] + x[t, i])
s_update2 = tvm.compute((m, n), lambda t, i: s_update1[t, i] + 1)
s_update3 = tvm.compute((m, n), lambda t, i: s_update2[t, i] + 1)
res = tvm.scan(s_init, s_update3, s_state, inputs=x)
s = tvm.create_schedule(res.op)
assert s[s_update1].group is not None
assert s[s_update2].group == s[s_update1].group
# Assign within group, is valid
s[s_update1].compute_at(s[s_update2], s_update2.op.axis[1])
# create a new group, for [s_update2 and s_update1]
g2 = s.create_group(outputs=s_update2, inputs=[s_state, x])
assert g2.group is not None
assert g2.group == s[s_update3].group
assert s[s_update2].group == g2
assert s[s_update1].group == g2
g2.compute_at(s[s_update3], s_update3.op.axis[1])
assert g2.attach_stage == s[s_update3]
try:
# compute outside group error.
s[s_update2].compute_at(s[s_init], s_init.op.axis[0])
assert False
except tvm.TVMError:
pass
开发者ID:LANHUIYING,项目名称:tvm,代码行数:31,代码来源:test_lang_group.py
示例16: _declaration_dense_nopack
def _declaration_dense_nopack(cfg, data, weight, bias=None, out_dtype=None):
if out_dtype is None:
out_dtype = data.dtype
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
# create tuning space
cfg.define_split("tile_x", out_dim, num_outputs=2)
cfg.define_split("tile_y", batch, num_outputs=2)
cfg.define_split("tile_k", in_dim, num_outputs=2)
if cfg.is_fallback:
_default_dense_nopack_config(cfg, batch, out_dim, in_dim)
vec = cfg["tile_k"].size[-1]
k = tvm.reduce_axis((0, in_dim // vec), "k")
CC = tvm.compute((batch, out_dim, vec),
lambda z, y, x: tvm.sum(
data[z, k * vec + x].astype(out_dtype) *
weight[y, k * vec + x].astype(out_dtype), axis=k))
kk = tvm.reduce_axis((0, vec), "kk")
C = tvm.compute((batch, out_dim),
lambda y, x: tvm.sum(CC[y, x, kk], axis=kk),
tag="dense_nopack")
if bias is not None:
C = tvm.compute((batch, out_dim), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
tag=tag.BROADCAST)
return C
开发者ID:bddppq,项目名称:tvm,代码行数:28,代码来源:dense.py
示例17: test_storage_share_gpu
def test_storage_share_gpu():
m = tvm.var('m')
A = [tvm.placeholder((m), name='A')]
num_stage = 5
for t in range(num_stage):
A.append(tvm.compute((m,), lambda i: A[-1][i] + (t+1), name='A%d_s' % t))
A.append(tvm.compute((m,), lambda i: A[-1][i], name='A%d' % t))
s = tvm.create_schedule(A[-1].op)
for t in range(num_stage):
x = A[2*t+2].op.axis[0]
bx, tx = s[A[2*t+2]].split(x, factor=32)
s[A[2*t+2]].bind(bx, tvm.thread_axis("blockIdx.x"))
s[A[2*t+2]].bind(tx, tvm.thread_axis("threadIdx.x"))
s[A[2*t+1]].compute_at(s[A[2*t+2]], tx)
s[A[2*t+1]].set_scope("shared")
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A[0].shape, A[0].dtype, name='A')
Bb = tvm.decl_buffer(A[0].shape, A[0].dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A[0]: Ab, A[-1]: Bb}, 64)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.StorageRewrite(stmt)
alloc_stats = {"global": 0, "shared": 0}
def verify(n):
if isinstance(n, tvm.stmt.AttrStmt):
if n.attr_key == "storage_scope":
alloc_stats[n.value.value] += 1
tvm.ir_pass.PostOrderVisit(stmt, verify)
assert alloc_stats["global"] == 2
assert alloc_stats["shared"] == num_stage
开发者ID:bddppq,项目名称:tvm,代码行数:34,代码来源:test_pass_storage_rewrite.py
示例18: test_storage_sync
def test_storage_sync():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
A1 = tvm.compute((m, l), lambda i, j: A[i, j], name='A1')
A2 = tvm.compute((m, l), lambda i, j: A1[i, j] + 3, name='A2')
s = tvm.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], factor=8)
s[A2].bind(xo, tvm.thread_axis("blockIdx.x"))
s[A1].compute_at(s[A2], xo)
s[A1].set_scope("shared")
bounds = tvm.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
A2b = tvm.decl_buffer(A2.shape, A2.dtype, name='A2')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, A2: A2b}, 64)
f = tvm.ir_pass.MakeAPI(stmt, "test", [Ab, A2b], 0, True)
flist = tvm.ir_pass.SplitHostDevice(f)
f = flist[1]
f = tvm.ir_pass.ThreadSync(f, "shared")
body_list = tvm.make.stmt_list(f.body.body.body.body)
assert(body_list[1].value.name == "tvm_storage_sync")
开发者ID:gwli,项目名称:tvm,代码行数:26,代码来源:test_pass_storage_sync.py
示例19: test_copy_pad_split
def test_copy_pad_split():
m = 4 * 3
A = tvm.placeholder((m, ), name="A")
Apad = tvm.compute((m + 2,), lambda i:
tvm.select(tvm.all(i >= 1, i <= m),
A[i - 1], 0.0), "Apad")
B = tvm.compute((m,), lambda i: Apad[i] + Apad[i + 1] + Apad[i + 2])
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=4)
s[Apad].compute_at(s[B], xo)
s[Apad].pragma(s[Apad].op.axis[0], "memcpy")
bounds = tvm.schedule.InferBound(s)
stmt = tvm.schedule.ScheduleOps(s, bounds)
Ab = tvm.decl_buffer(A.shape, A.dtype, name='A')
Bb = tvm.decl_buffer(B.shape, B.dtype, name='B')
stmt = tvm.ir_pass.StorageFlatten(stmt, {A: Ab, B: Bb}, 64)
stmt = tvm.ir_pass.Simplify(stmt)
stmt = tvm.ir_pass.CanonicalSimplify(stmt)
def cb(src, dst, pad_before, pad_after, pad_value):
assert(dst.elem_offset.value == 0)
assert_expr_equal(src.elem_offset, tvm.max(xo * 4, 1) - 1)
rpad_before = tvm.max(1 - xo * 4, 0)
rpad_after = tvm.max(xo * 4 - 7, 0)
assert_expr_equal(pad_before[0], rpad_before)
assert_expr_equal(pad_after[0], rpad_after)
assert_expr_equal(src.shape[0], 6 - rpad_before - rpad_after)
return tvm.make.Evaluate(0)
stmt = tvm.ir_pass.InjectCopyIntrin(stmt, "memcpy", cb)
开发者ID:LANHUIYING,项目名称:tvm,代码行数:29,代码来源:test_pass_inject_copy_intrin.py
示例20: global_pool
def global_pool(data, pool_type):
"""Perform global pooling on the data
Parameters
----------
data : tvm.Tensor
4-D with shape [batch, channel, in_height, in_width]
pool_type : str
Pool type, 'max' or 'avg'
Returns
-------
output : tvm.Tensor
4-D with shape [batch, channel, 1, 1]
"""
assert len(data.shape) == 4, "only support 4-dim pooling"
batch, channel, height, width = data.shape
dheight = tvm.reduce_axis((0, height))
dwidth = tvm.reduce_axis((0, width))
if pool_type == 'max':
return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tvm.max(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
tag="global_pool_max")
elif pool_type == 'avg':
tsum = tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tvm.sum(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
tag="global_pool_sum")
return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tsum[n, c, h, w] / (height*width).astype(tsum.dtype), \
tag=tag.ELEMWISE)
else:
raise ValueError("Pool type should be 'avg' or 'max'.")
开发者ID:gwli,项目名称:tvm,代码行数:35,代码来源:pooling.py
注:本文中的tvm.compute函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论