本文整理汇总了Python中tvm.sum函数的典型用法代码示例。如果您正苦于以下问题:Python sum函数的具体用法?Python sum怎么用?Python sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sum函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_reduce_combiner_simplify
def test_reduce_combiner_simplify():
ck = CanonicalChecker()
dummy = tvm.var('dummy')
comm_reducer = tvm.comm_reducer
prod = comm_reducer(lambda x, y: x*y, lambda t0: tvm.const(1, t0))
sum_or_prod = comm_reducer(
lambda x, y: tvm.expr.Select(dummy < 0,
x + y, x*y),
lambda t0: tvm.expr.Select(dummy < 0,
tvm.const(0, t0), tvm.const(1, t0)))
sum_and_prod = comm_reducer(
lambda x, y: (x[0] + y[0],
x[1]*y[1]),
lambda t0, t1: (tvm.const(0, t0),
tvm.const(5, t0) - tvm.const(4, t0)))
some_reducer1 = comm_reducer(
lambda x, y: (x[0] + y[0],
x[0] + y[0] + x[1] + y[1],
x[0]*y[2] + y[0]*x[2],
x[1] + y[2],
4.0),
lambda t0, t1, t2, t3, t4: (tvm.const(0, t0),
tvm.const(1, t1),
tvm.const(2, t2),
tvm.const(3, t3),
tvm.const(4, t4)))
k = tvm.reduce_axis((0, 10), name="k")
A = tvm.placeholder((10,), name='A')
# Test that SimplifyCombiner makes use of vranges
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, -4))
ck.verify(sum_or_prod(A[k], k), tvm.sum(A[k], k))
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(5, 9), True)
ck.verify(sum_or_prod(A[k], k), prod(A[k], k))
ck.analyzer.update(dummy, tvm.arith.ConstIntBound(-10, 100), True)
ck.verify(sum_and_prod((A[k], A[10-k]), k)[0], tvm.sum(A[k], k))
ck.verify(sum_and_prod((A[k], A[10-k]), k)[1], prod(A[10-k], k))
reference_simplified_sources = [[A[0]],
[A[0], A[1]],
[A[0], A[2]],
[A[0], A[1], A[2], A[3]],
[A[4]]]
for j in range(5):
# Here we use the j-th component of the result, so only it and the components it
# depends on are left.
simplified = ck.analyzer.canonical_simplify(
some_reducer1((A[0], A[1], A[2], A[3], A[4]), k)[j])
# Check that the remaining components are the expected ones.
for lhs, rhs in zip(simplified.source, reference_simplified_sources[j]):
assert tvm.ir_pass.Equal(lhs, rhs)
# Test that components with side effects are not removed
side_effect = lambda *xs: tvm.make.Call("int32", "dummy", xs, tvm.expr.Call.Intrinsic, None, 0)
ck.verify(sum_and_prod((A[k], side_effect(A[10-k])), k)[0],
sum_and_prod((A[k], side_effect(A[10-k])), k)[0])
ck.verify(sum_and_prod((side_effect(A[k]), A[10-k]), k)[0],
tvm.sum(side_effect(A[k]), k))
开发者ID:bddppq,项目名称:tvm,代码行数:60,代码来源:test_arith_canonical_simplify.py
示例2: _declaration_dense_nopack
def _declaration_dense_nopack(cfg, data, weight, bias=None, out_dtype=None):
if out_dtype is None:
out_dtype = data.dtype
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
# create tuning space
cfg.define_split("tile_x", out_dim, num_outputs=2)
cfg.define_split("tile_y", batch, num_outputs=2)
cfg.define_split("tile_k", in_dim, num_outputs=2)
if cfg.is_fallback:
_default_dense_nopack_config(cfg, batch, out_dim, in_dim)
vec = cfg["tile_k"].size[-1]
k = tvm.reduce_axis((0, in_dim // vec), "k")
CC = tvm.compute((batch, out_dim, vec),
lambda z, y, x: tvm.sum(
data[z, k * vec + x].astype(out_dtype) *
weight[y, k * vec + x].astype(out_dtype), axis=k))
kk = tvm.reduce_axis((0, vec), "kk")
C = tvm.compute((batch, out_dim),
lambda y, x: tvm.sum(CC[y, x, kk], axis=kk),
tag="dense_nopack")
if bias is not None:
C = tvm.compute((batch, out_dim), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
tag=tag.BROADCAST)
return C
开发者ID:bddppq,项目名称:tvm,代码行数:28,代码来源:dense.py
示例3: test_lstm_cell_inline
def test_lstm_cell_inline():
num_step = 128
num_input = 256
num_hidden = 1152
batch_size = 4
# Global transition matrix
X = tvm.placeholder((num_step - 1, batch_size, num_input), name="X")
Wi2h = tvm.placeholder((4, num_hidden, num_input), name="Wi2h")
Wh2h = tvm.placeholder((4, num_hidden, num_hidden), name="Wh2h")
# h: output hidden state, c: cell state.
s_state_h = tvm.placeholder((num_step, batch_size, num_hidden))
s_state_c = tvm.placeholder((num_step, batch_size, num_hidden))
s_init_c = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_c")
s_init_h = tvm.compute((1, batch_size, num_hidden),
lambda *i: 0.0, name="init_h")
# LSTM transition
k = tvm.reduce_axis((0, num_input), name="ki2h")
s_i2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(X[t - 1, i, k] * Wi2h[x, j, k], axis=k),
name="s_i2h")
k = tvm.reduce_axis((0, num_hidden), name="ki2h")
s_h2h = tvm.compute(
(num_step, 4, batch_size, num_hidden),
lambda t, x, i, j: tvm.sum(s_state_h[t - 1, i, k] * Wh2h[x, j, k], axis=k),
name="s_h2h")
# Gate rules
gates = tvm.compute(s_i2h.shape, lambda *i:
s_i2h(*i) + s_h2h(*i), name="gates")
gshape = (num_step, batch_size, num_hidden)
in_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 0, i, j]), name="in_gate")
in_transform = tvm.compute(gshape, lambda t, i, j: tvm.tanh(gates[t, 1, i, j]), name="in_transform")
forget_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 2, i, j]), name="forget_gate")
out_gate = tvm.compute(gshape, lambda t, i, j: tvm.sigmoid(gates[t, 3, i, j]), name="out_gate")
next_c = tvm.compute(gshape,
lambda t, i, j:
forget_gate[t, i, j] * s_state_c[t - 1, i, j] +
in_gate[t, i, j] * in_transform[t, i, j], name="next_c")
next_h = tvm.compute(gshape,
lambda t, i, j: out_gate[t, i, j] * tvm.tanh(next_c[t, i, j]), name="next_h")
update_c = tvm.compute(gshape, lambda *i: next_c(*i), name="update_c")
update_h = tvm.compute(gshape, lambda *i: next_h(*i), name="update_h")
# schedule
scan_h, scan_c = tvm.scan(
[s_init_h, s_init_c],
[update_h, update_c],
[s_state_h, s_state_c],
inputs=[X],
name="lstm_scan")
# schedule
s = tvm.create_schedule(scan_h.op)
# Inline gate computations
s[gates].compute_inline()
s[in_gate].compute_inline()
s[in_transform].compute_inline()
s[forget_gate].compute_inline()
s[out_gate].compute_inline()
# verify we can lower correctly
tvm.lower(s, [X, Wi2h, Wh2h, scan_h, scan_c])
开发者ID:LANHUIYING,项目名称:tvm,代码行数:60,代码来源:test_schedule_lstm.py
示例4: test_tensor_reduce_multi_axis
def test_tensor_reduce_multi_axis():
m = tvm.var('m')
n = tvm.var('n')
A = tvm.placeholder((m, n), name='A')
k1 = tvm.reduce_axis((0, n), "k")
k2 = tvm.reduce_axis((0, m), "k")
C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=(k1, k2)))
C = tvm.compute((1,), lambda _: tvm.sum(A[k1, k2], axis=[k1, k2]))
开发者ID:gwli,项目名称:tvm,代码行数:8,代码来源:test_lang_tensor.py
示例5: test_reduce_simplify
def test_reduce_simplify():
ck = CanonicalChecker()
k = tvm.reduce_axis((0, 10), name="k")
j = tvm.reduce_axis((-5, 3), name="j")
A = tvm.placeholder((10,), name='A')
ck.verify(tvm.sum(tvm.expr.Select(k + j < 12, k + j, 0), [k, j]),
tvm.sum(k + j, [k, j]))
ck.verify(tvm.sum(A[3], []), A[3])
# The rule below is not typical, removed for now
ck.verify(tvm.sum(k / 10, k), tvm.sum(tvm.const(0, "int32"), k))
开发者ID:bddppq,项目名称:tvm,代码行数:10,代码来源:test_arith_canonical_simplify.py
示例6: _conv
def _conv(n, h, w, co, vh, vw, vc):
b1b2 = (b1+b2).astype(out_dtype)
if dorefa:
return tvm.sum(
(tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1].astype(out_dtype) &
kernel_vec[co, dh, dw, ci, vc, b2].astype(out_dtype)) -
tvm.popcount(data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1].astype(out_dtype) &
~kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype)) << b1b2,
axis=[dh, dw, ci, b1, b2])
return tvm.sum(tvm.popcount(
data_vec[n, h, w, vh*HSTR+dh, vw*WSTR+dw, ci, b1] &
kernel_vec[co, dh, dw, ci, vc, b2]).astype(out_dtype) << b1b2,
axis=[dh, dw, ci, b1, b2])
开发者ID:LANHUIYING,项目名称:tvm,代码行数:14,代码来源:bitserial_conv2d.py
示例7: _conv
def _conv(n, co, h, w, vh, vw, vc):
b1b2 = (b1+b2).astype(out_dtype)
if unipolar:
return tvm.sum((tvm.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype) &
kernel_vec[co, ci, dh, dw, b2, vc].astype(out_dtype)) -
tvm.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1].astype(out_dtype)
& ~kernel_vec[co, ci, dh, dw, b2, vc]).astype(out_dtype)) << b1b2,
axis=[ci, dh, dw, b1, b2])
return tvm.sum((tvm.popcount(
data_vec[n, h, w, ci, vh*HSTR+dh, vw*WSTR+dw, b1] &
kernel_vec[co, ci, dh, dw, b2, vc])).astype(out_dtype) << b1b2,
axis=[ci, dh, dw, b1, b2])
开发者ID:bddppq,项目名称:tvm,代码行数:15,代码来源:bitserial_conv2d.py
示例8: matmul
def matmul(N, L, M, dtype):
A = tvm.placeholder((N, L), name='A', dtype=dtype)
B = tvm.placeholder((L, M), name='B', dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
##### define space begin #####
cfg = autotvm.get_config()
cfg.define_split("tile_y", y, num_outputs=2)
cfg.define_split("tile_x", x, num_outputs=2)
##### define space end #####
# schedule according to config
yo, yi = cfg["tile_y"].apply(s, C, y)
xo, xi = cfg["tile_x"].apply(s, C, x)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
开发者ID:LANHUIYING,项目名称:tvm,代码行数:25,代码来源:test_autotvm_common.py
示例9: binary_dense
def binary_dense(data, weight):
"""Binary matrix multiplication using xor and bit-count.
Parameters
----------
data : tvm.Tensor
2-D with shape [batch, in_dim], dtype is uint32.
weight : tvm.Tensor
2-D with shape [out_dim, in_dim], dtype is uint32.
Returns
-------
output : tvm.Tensor
2-D with shape [batch, out_dim], dtype is float32.
"""
assert data.dtype == 'uint32' and weight.dtype == 'uint32', \
"dtype of data and weight should be uint32"
assert len(data.shape) == 2 and len(weight.shape) == 2, \
"only support 2-dim binary dense"
batch, in_dim = data.shape
out_dim, _ = weight.shape
k = tvm.reduce_axis((0, in_dim), name='k')
matmul = tvm.compute((batch, out_dim), lambda i, j: \
tvm.sum(tvm.popcount(data[i, k] ^ weight[j, k]), axis=k), \
tag='binary_dense')
return tvm.compute((batch, out_dim), lambda i, j: \
32 * in_dim - 2. * matmul(i, j), \
tag=tag.ELEMWISE)
开发者ID:bddppq,项目名称:tvm,代码行数:30,代码来源:bnn.py
示例10: _sample
def _sample(i, c, ph, pw):
roi = rois[i]
batch_index = roi[0].astype('int32')
roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
roi_start_h *= spatial_scale
roi_end_h *= spatial_scale
roi_start_w *= spatial_scale
roi_end_w *= spatial_scale
# force malformed ROIs to be 1x1
roi_h = tvm.max(roi_end_h - roi_start_h, tvm.const(1.0, dtype))
roi_w = tvm.max(roi_end_w - roi_start_w, tvm.const(1.0, dtype))
bin_h = roi_h / pooled_size_h
bin_w = roi_w / pooled_size_w
if sample_ratio > 0:
roi_bin_grid_h = roi_bin_grid_w = tvm.const(sample_ratio, 'int32')
else:
roi_bin_grid_h = tvm.ceil(roi_h / pooled_size_h).astype('int32')
roi_bin_grid_w = tvm.ceil(roi_w / pooled_size_w).astype('int32')
count = roi_bin_grid_h * roi_bin_grid_w
rh = tvm.reduce_axis((0, roi_bin_grid_h))
rw = tvm.reduce_axis((0, roi_bin_grid_w))
roi_start_h += ph * bin_h
roi_start_w += pw * bin_w
return tvm.sum(_bilinear(batch_index, c,
roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w) / count,
axis=[rh, rw])
开发者ID:bddppq,项目名称:tvm,代码行数:31,代码来源:roi_align.py
示例11: test_local_gemm
def test_local_gemm():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
nn = 1024
n = tvm.var('n')
n = tvm.convert(nn)
m = n
l = n
A = tvm.placeholder((n, l), name='A', dtype='int32')
B = tvm.placeholder((m, l), name='B', dtype='int32')
k = tvm.reduce_axis((0, l), name='k')
C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
s = tvm.create_schedule(C.op)
s[C].opengl()
print(tvm.lower(s, [A, B, C], simple_mode=True))
f = tvm.build(s, [A, B, C], "opengl", name="gemm")
print("------opengl code------")
print(f.imported_modules[0].get_source(fmt="gl"))
ctx = tvm.opengl()
n, m, l = nn, nn, nn
a_np = np.random.uniform(low=0, high=10, size=(n, l)).astype(A.dtype)
b_np = np.random.uniform(low=0, high=10, size=(m, l)).astype(B.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))
开发者ID:bddppq,项目名称:tvm,代码行数:35,代码来源:test_local_gemm.py
示例12: test_in_bounds_conv_llvm
def test_in_bounds_conv_llvm(loop_tiling=False):
HSTR = WSTR = 1
in_channel = 128
kernel_height = kernel_width = 3
out_channel = 64
batch_size = 1
in_height = in_width = 64
out_height = out_width = in_height - kernel_height + 1
data = tvm.placeholder((batch_size, in_channel, in_height, in_width), name='data')
kernel = tvm.placeholder((kernel_height, kernel_width, in_channel,
out_channel), name='kernel')
ic = tvm.reduce_axis((0, in_channel), name='ic')
kh = tvm.reduce_axis((0, kernel_height), name='kh')
kw = tvm.reduce_axis((0, kernel_width), name='kw')
conv = tvm.compute((batch_size, out_channel, out_height, out_width),
lambda n, oc, oh, ow: tvm.sum(data[n, ic, oh*HSTR + kh, ow*WSTR + kw] *
kernel[kh, kw, ic, oc],
axis=[ic, kh, kw]),
name="conv2d")
s = tvm.create_schedule(conv.op)
n, oc, oh, ow = conv.op.axis
if loop_tiling:
oho, owo, ohi, owi = s[conv].tile(oh, ow, 16, 16)
lowered_func = tvm.lower(s, [data, kernel, conv], simple_mode=True)
print (lowered_func.body)
ctx = tvm.cpu (0)
f = tvm.build(s, [data, kernel, conv], "llvm")
data_input = tvm.nd.array(np.random.uniform(
size=(batch_size, in_channel, in_height, in_width)).astype(tvm.float32), ctx)
kernel_input = tvm.nd.array(np.random.uniform(
size=(kernel_height, kernel_width, in_channel, out_channel)).astype(tvm.float32), ctx)
conv_out = tvm.nd.empty ((batch_size, out_channel, out_height, out_width), tvm.float32, ctx)
f(data_input, kernel_input, conv_out)
开发者ID:bddppq,项目名称:tvm,代码行数:35,代码来源:test_pass_bound_checkers.py
示例13: matmul_v1
def matmul_v1(N, L, M, dtype):
A = tvm.placeholder((N, L), name='A', dtype=dtype)
B = tvm.placeholder((L, M), name='B', dtype=dtype)
k = tvm.reduce_axis((0, L), name='k')
C = tvm.compute((N, M), lambda i, j: tvm.sum(A[i, k] * B[k, j], axis=k), name='C')
s = tvm.create_schedule(C.op)
# schedule
y, x = s[C].op.axis
k = s[C].op.reduce_axis[0]
# 2. get the config object
cfg = autotvm.get_config()
# 3. define search space
cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
cfg.define_knob("tile_x", [1, 2, 4, 8, 16])
# 4. schedule according to config
yo, yi = s[C].split(y, cfg['tile_y'].val)
xo, xi = s[C].split(x, cfg['tile_x'].val)
s[C].reorder(yo, xo, k, yi, xi)
return s, [A, B, C]
开发者ID:bddppq,项目名称:tvm,代码行数:26,代码来源:tune_simple_template.py
示例14: global_pool
def global_pool(data, pool_type):
"""Perform global pooling on the data
Parameters
----------
data : tvm.Tensor
4-D with shape [batch, channel, in_height, in_width]
pool_type : str
Pool type, 'max' or 'avg'
Returns
-------
output : tvm.Tensor
4-D with shape [batch, channel, 1, 1]
"""
assert len(data.shape) == 4, "only support 4-dim pooling"
batch, channel, height, width = data.shape
dheight = tvm.reduce_axis((0, height))
dwidth = tvm.reduce_axis((0, width))
if pool_type == 'max':
return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tvm.max(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
tag="global_pool_max")
elif pool_type == 'avg':
tsum = tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tvm.sum(data[n, c, dheight, dwidth], axis=[dheight, dwidth]), \
tag="global_pool_sum")
return tvm.compute((batch, channel, 1, 1), lambda n, c, h, w: \
tsum[n, c, h, w] / (height*width).astype(tsum.dtype), \
tag=tag.ELEMWISE)
else:
raise ValueError("Pool type should be 'avg' or 'max'.")
开发者ID:gwli,项目名称:tvm,代码行数:35,代码来源:pooling.py
示例15: test_dot
def test_dot():
nn = 12
n = tvm.convert(nn)
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
k = tvm.reduce_axis((0, n), 'k')
C = tvm.compute((1,), lambda _: tvm.sum(A[k] * B[k], axis=k), name='C')
s = tvm.create_schedule(C.op)
fapi = lower(s, [A, B, C])
def verify(target):
if not tvm.module.enabled(target):
print("Target %s is not enabled" % target)
return
f = tvm.codegen.build_module(fapi, target)
# verify
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.uniform(size=(nn,)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=(nn,)).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros((1,), dtype=C.dtype), ctx)
f(a, b, c)
tvm.testing.assert_allclose(
c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-4)
verify("llvm")
开发者ID:bddppq,项目名称:tvm,代码行数:25,代码来源:test_dot.py
示例16: packed_conv2d
def packed_conv2d(data,
kernel,
padding,
strides,
out_dtype="int32"):
""" Packed conv2d function.
"""
if padding[0]:
pad_data = topi.nn.pad(data, [0, 0, padding[0], padding[1], 0, 0], name="pad_data")
else:
pad_data = data
assert len(data.shape) == 6
assert len(kernel.shape) == 6
oheight = topi.util.simplify((pad_data.shape[2] - kernel.shape[2]) // strides[0] + 1)
owidth = topi.util.simplify((pad_data.shape[3] - kernel.shape[3]) // strides[1] + 1)
oshape = (data.shape[0], kernel.shape[0], oheight, owidth, data.shape[4], kernel.shape[4])
ishape = topi.util.get_const_tuple(data.shape)
kshape = topi.util.get_const_tuple(kernel.shape)
assert data.dtype == "int8", data.dtype
assert kernel.dtype == "int8", kernel.dtype
d_i = tvm.reduce_axis((0, kshape[2]), name='d_i')
d_j = tvm.reduce_axis((0, kshape[3]), name='d_j')
k_o = tvm.reduce_axis((0, ishape[1]), name='k_o')
k_i = tvm.reduce_axis((0, ishape[-1]), name='k_i')
hstride, wstride = strides
res = tvm.compute(
oshape,
lambda b_o, c_o, i, j, b_i, c_i: tvm.sum(
pad_data[b_o, k_o, i*hstride+d_i, j*wstride+d_j, b_i, k_i].astype(out_dtype) *
kernel[c_o, k_o, d_i, d_j, c_i, k_i].astype(out_dtype),
axis=[k_o, d_i, d_j, k_i]),
name="res", tag="packed_conv2d")
return res
开发者ID:LANHUIYING,项目名称:tvm,代码行数:34,代码来源:vta_conv2d.py
示例17: dense_default
def dense_default(data, weight, bias=None):
"""The default implementation of dense in topi.
Parameters
----------
data : tvm.Tensor
2-D with shape [batch, in_dim]
weight : tvm.Tensor
2-D with shape [out_dim, in_dim]
bias : tvm.Tensor, optional
1-D with shape [out_dim]
Returns
-------
output : tvm.Tensor
2-D with shape [batch, out_dim]
"""
assert len(data.shape) == 2 and len(weight.shape) == 2, \
"only support 2-dim dense"
if bias is not None:
assert len(bias.shape) == 1
batch, in_dim = data.shape
out_dim, _ = weight.shape
k = tvm.reduce_axis((0, in_dim), name='k')
matmul = tvm.compute((batch, out_dim), \
lambda i, j: tvm.sum(data[i, k] * weight[j, k], axis=k), \
tag='dense')
if bias is not None:
matmul = tvm.compute((batch, out_dim), \
lambda i, j: matmul[i, j] + bias[j], \
tag=tag.BROADCAST)
return matmul
开发者ID:LANHUIYING,项目名称:tvm,代码行数:34,代码来源:dense.py
示例18: intrin_gemv
def intrin_gemv(m, l):
a = tvm.placeholder((l,), name='a')
b = tvm.placeholder((m, l), name='b')
k = tvm.reduce_axis((0, l), name='k')
c = tvm.compute((m,), lambda i: tvm.sum(a[k] * b[i, k], axis=k), name='c')
Ab = tvm.decl_buffer(a.shape, a.dtype,
name="A",
offset_factor=1,
strides=[1])
Bb = tvm.decl_buffer(b.shape, b.dtype,
name="B",
offset_factor=1,
strides=[tvm.var("s1"), 1])
Cb = tvm.decl_buffer(c.shape, c.dtype,
name="C",
offset_factor=1,
strides=[1])
def intrin_func(ins, outs):
ib = tvm.ir_builder.create()
aa, bb = ins
cc = outs[0]
ib.emit(tvm.call_extern("int32", "gemv_update",
cc.access_ptr("w"),
aa.access_ptr("r"),
bb.access_ptr("r"),
m, l, bb.strides[0]))
return ib.get()
with tvm.build_config(offset_factor=1):
return tvm.decl_tensor_intrin(c.op, intrin_func, binds={a: Ab, b: Bb, c: Cb})
开发者ID:LANHUIYING,项目名称:tvm,代码行数:29,代码来源:tensorize.py
示例19: _declaration_dense_pack
def _declaration_dense_pack(cfg, data, weight, bias=None, out_dtype=None):
if out_dtype is None:
out_dtype = data.dtype
batch, in_dim = get_const_tuple(data.shape)
out_dim, _ = get_const_tuple(weight.shape)
# create tuning space
cfg.define_split("tile_y", batch, num_outputs=3)
cfg.define_split("tile_x", out_dim, num_outputs=3)
cfg.define_split("tile_k", in_dim, num_outputs=2)
if cfg.is_fallback:
_default_dense_pack_config(cfg, batch, out_dim, in_dim)
packw_bn = cfg["tile_x"].size[-1]
packw_shape = (out_dim // packw_bn, in_dim, packw_bn)
packw = tvm.compute(packw_shape,
lambda z, y, x: weight[z * packw_bn + x, y], name="packed_weight")
k = tvm.reduce_axis((0, in_dim), name="k")
C = tvm.compute((batch, out_dim),
lambda y, x: tvm.sum(
data[y, k].astype(out_dtype) *
packw[x // packw_bn, k, x % packw_bn].astype(out_dtype),
axis=k),
tag="dense_pack")
if bias is not None:
C = tvm.compute((batch, out_dim), lambda i, j: C[i, j] + bias[j].astype(out_dtype),
tag=tag.BROADCAST)
return C
开发者ID:bddppq,项目名称:tvm,代码行数:28,代码来源:dense.py
示例20: intrin_gemv
def intrin_gemv(m, n):
w = tvm.placeholder((m, n), name='w')
x = tvm.placeholder((n,), name='x')
k = tvm.reduce_axis((0, n), name='k')
z = tvm.compute((m,), lambda i:
tvm.sum(w[i, k] * x[k], axis=k), name='z')
Wb = tvm.decl_buffer(w.shape, w.dtype,
name="W",
offset_factor=16,
strides=[tvm.var('ldw'), 1])
def intrin_func(ins, outs):
ww, xx = ins
zz = outs[0]
ww_ptr = ww.access_ptr("r")
xx_ptr = xx.access_ptr("r")
zz_ptr = zz.access_ptr("w")
body = tvm.call_packed(
"gemm", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
reset = tvm.call_packed(
"fill_zero", zz_ptr, n)
update = tvm.call_packed(
"gemv_add", ww_ptr, xx_ptr, zz_ptr, n, ww.strides[0])
return body, reset, update
with tvm.build_config(data_alignment=16,
offset_factor=16):
return tvm.decl_tensor_intrin(z.op, intrin_func,
binds={w: Wb})
开发者ID:LANHUIYING,项目名称:tvm,代码行数:28,代码来源:test_schedule_schedule_ops.py
注:本文中的tvm.sum函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论