本文整理汇总了Golang中github.com/klauspost/intrinsics/x86.M512i函数的典型用法代码示例。如果您正苦于以下问题:Golang M512i函数的具体用法?Golang M512i怎么用?Golang M512i使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了M512i函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: M512MaskPermutexvarEpi8
// M512MaskPermutexvarEpi8: Shuffle 8-bit integers in 'a' across lanes using
// the corresponding index in 'idx', and store the results in 'dst' using
// writemask 'k' (elements are copied from 'src' when the corresponding mask
// bit is not set).
//
// FOR j := 0 to 63
// i := j*8
// id := idx[i+5:i]*8
// IF k[j]
// dst[i+7:i] := a[id+7:id]
// ELSE
// dst[i+7:i] := src[i+7:i]
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPERMB'. Intrinsic: '_mm512_mask_permutexvar_epi8'.
// Requires AVX512VBMI.
func M512MaskPermutexvarEpi8(src x86.M512i, k x86.Mmask64, idx x86.M512i, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskPermutexvarEpi8([64]byte(src), uint64(k), [64]byte(idx), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:21,代码来源:avx512vbmi.go
示例2: M512MaskzPermutexvarEpi8
// M512MaskzPermutexvarEpi8: Shuffle 8-bit integers in 'a' across lanes using
// the corresponding index in 'idx', and store the results in 'dst' using
// zeromask 'k' (elements are zeroed out when the corresponding mask bit is not
// set).
//
// FOR j := 0 to 63
// i := j*8
// id := idx[i+5:i]*8
// IF k[j]
// dst[i+7:i] := a[id+7:id]
// ELSE
// dst[i+7:i] := 0
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPERMB'. Intrinsic: '_mm512_maskz_permutexvar_epi8'.
// Requires AVX512VBMI.
func M512MaskzPermutexvarEpi8(k x86.Mmask64, idx x86.M512i, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskzPermutexvarEpi8(uint64(k), [64]byte(idx), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:21,代码来源:avx512vbmi.go
示例3: M512Mask2Permutex2varEpi8
// M512Mask2Permutex2varEpi8: Shuffle 8-bit integers in 'a' and 'b' across
// lanes using the corresponding selector and index in 'idx', and store the
// results in 'dst' using writemask 'k' (elements are copied from 'a' when the
// corresponding mask bit is not set).
//
// FOR j := 0 to 63
// i := j*8
// IF k[j]
// off := 8*idx[i+5:i]
// dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
// ELSE
// dst[i+7:i] := a[i+7:i]
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPERMI2B'. Intrinsic: '_mm512_mask2_permutex2var_epi8'.
// Requires AVX512VBMI.
func M512Mask2Permutex2varEpi8(a x86.M512i, idx x86.M512i, k x86.Mmask64, b x86.M512i) (dst x86.M512i) {
return x86.M512i(m512Mask2Permutex2varEpi8([64]byte(a), [64]byte(idx), uint64(k), [64]byte(b)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:21,代码来源:avx512vbmi.go
示例4: M512Permutex2varEpi8
// M512Permutex2varEpi8: Shuffle 8-bit integers in 'a' and 'b' across lanes
// using the corresponding selector and index in 'idx', and store the results
// in 'dst'.
//
// FOR j := 0 to 63
// i := j*8
// off := 8*idx[i+5:i]
// dst[i+7:i] := idx[i+6] ? b[off+7:off] : a[off+7:off]
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPERMI2B'. Intrinsic: '_mm512_permutex2var_epi8'.
// Requires AVX512VBMI.
func M512Permutex2varEpi8(a x86.M512i, idx x86.M512i, b x86.M512i) (dst x86.M512i) {
return x86.M512i(m512Permutex2varEpi8([64]byte(a), [64]byte(idx), [64]byte(b)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:16,代码来源:avx512vbmi.go
示例5: M512ConflictEpi64
// M512ConflictEpi64: Test each 64-bit element of 'a' for equality with all
// other elements in 'a' closer to the least significant bit. Each element's
// comparison forms a zero extended bit vector in 'dst'.
//
// FOR j := 0 to 7
// i := j*64
// FOR k := 0 to j-1
// m := k*64
// dst[i+k] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
// ENDFOR
// dst[i+63:i+j] := 0
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPCONFLICTQ'. Intrinsic: '_mm512_conflict_epi64'.
// Requires AVX512CD.
func M512ConflictEpi64(a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512ConflictEpi64([64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:19,代码来源:avx512cd.go
示例6: M512Madd52loEpu64
// M512Madd52loEpu64: Multiply packed unsigned 52-bit integers in each 64-bit
// element of 'b' and 'c' to form a 104-bit intermediate result. Add the low
// 52-bit unsigned integer from the intermediate result with the corresponding
// unsigned 64-bit integer in 'a', and store the results in 'dst'.
//
// FOR j := 0 to 7
// i := j*64
// tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
// dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPMADD52LUQ'. Intrinsic: '_mm512_madd52lo_epu64'.
// Requires AVX512IFMA52.
func M512Madd52loEpu64(a x86.M512i, b x86.M512i, c x86.M512i) (dst x86.M512i) {
return x86.M512i(m512Madd52loEpu64([64]byte(a), [64]byte(b), [64]byte(c)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:17,代码来源:avx512ifma52.go
示例7: M512MaskLzcntEpi64
// M512MaskLzcntEpi64: Counts the number of leading zero bits in each packed
// 64-bit integer in 'a', and store the results in 'dst' using writemask 'k'
// (elements are copied from 'src' when the corresponding mask bit is not set).
//
// FOR j := 0 to 7
// i := j*64
// IF k[j]
// tmp := 63
// dst[i+63:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+63:i] := dst[i+63:i] + 1
// OD
// ELSE
// dst[i+63:i] := src[i+63:i]
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPLZCNTQ'. Intrinsic: '_mm512_mask_lzcnt_epi64'.
// Requires AVX512CD.
func M512MaskLzcntEpi64(src x86.M512i, k x86.Mmask8, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskLzcntEpi64([64]byte(src), uint8(k), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:24,代码来源:avx512cd.go
示例8: M512MaskzMultishiftEpi64Epi8
// M512MaskzMultishiftEpi64Epi8: For each 64-bit element in 'b', select 8
// unaligned bytes using a byte-granular shift control within the corresponding
// 64-bit element of 'a', and store the 8 assembled bytes to the corresponding
// 64-bit element of 'dst' using zeromask 'k' (elements are zeroed out when the
// corresponding mask bit is not set).
//
// FOR i := 0 to 7
// q := i * 64
// FOR j := 0 to 7
// tmp8 := 0
// ctrl := a[q+j*8+7:q+j*8] & 63
// FOR l := 0 to 7
// tmp8[k] := b[q+((ctrl+k) & 63)]
// ENDFOR
// IF k[i*8+j]
// dst[q+j*8+7:q+j*8] := tmp8[7:0]
// ELSE
// dst[q+j*8+7:q+j*8] := 0
// FI
// ENDFOR
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPMULTISHIFTQB'. Intrinsic: '_mm512_maskz_multishift_epi64_epi8'.
// Requires AVX512VBMI.
func M512MaskzMultishiftEpi64Epi8(k x86.Mmask64, a x86.M512i, b x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskzMultishiftEpi64Epi8(uint64(k), [64]byte(a), [64]byte(b)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:28,代码来源:avx512vbmi.go
示例9: M512MaskzLzcntEpi32
// M512MaskzLzcntEpi32: Counts the number of leading zero bits in each packed
// 32-bit integer in 'a', and store the results in 'dst' using zeromask 'k'
// (elements are zeroed out when the corresponding mask bit is not set).
//
// FOR j := 0 to 15
// i := j*32
// IF k[j]
// tmp := 31
// dst[i+31:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+31:i] := dst[i+31:i] + 1
// OD
// ELSE
// dst[i+31:i] := 0
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPLZCNTD'. Intrinsic: '_mm512_maskz_lzcnt_epi32'.
// Requires AVX512CD.
func M512MaskzLzcntEpi32(k x86.Mmask16, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskzLzcntEpi32(uint16(k), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:24,代码来源:avx512cd.go
示例10: M512LzcntEpi64
// M512LzcntEpi64: Counts the number of leading zero bits in each packed 64-bit
// integer in 'a', and store the results in 'dst'.
//
// FOR j := 0 to 7
// i := j*64
// tmp := 63
// dst[i+63:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+63:i] := dst[i+63:i] + 1
// OD
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPLZCNTQ'. Intrinsic: '_mm512_lzcnt_epi64'.
// Requires AVX512CD.
func M512LzcntEpi64(a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512LzcntEpi64([64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:19,代码来源:avx512cd.go
示例11: M512MaskLzcntEpi32
// M512MaskLzcntEpi32: Counts the number of leading zero bits in each packed
// 32-bit integer in 'a', and store the results in 'dst' using writemask 'k'
// (elements are copied from 'src' when the corresponding mask bit is not set).
//
// FOR j := 0 to 15
// i := j*32
// IF k[j]
// tmp := 31
// dst[i+31:i] := 0
// DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
// tmp := tmp - 1
// dst[i+31:i] := dst[i+31:i] + 1
// OD
// ELSE
// dst[i+31:i] := src[i+31:i]
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPLZCNTD'. Intrinsic: '_mm512_mask_lzcnt_epi32'.
// Requires AVX512CD.
func M512MaskLzcntEpi32(src x86.M512i, k x86.Mmask16, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskLzcntEpi32([64]byte(src), uint16(k), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:24,代码来源:avx512cd.go
示例12: M512MaskzConflictEpi64
// M512MaskzConflictEpi64: Test each 64-bit element of 'a' for equality with
// all other elements in 'a' closer to the least significant bit using zeromask
// 'k' (elements are zeroed out when the corresponding mask bit is not set).
// Each element's comparison forms a zero extended bit vector in 'dst'.
//
// FOR j := 0 to 7
// i := j*64
// IF k[j]
// FOR l := 0 to j-1
// m := l*64
// dst[i+l] := (a[i+63:i] == a[m+63:m]) ? 1 : 0
// ENDFOR
// dst[i+63:i+j] := 0
// ELSE
// dst[i+63:i] := 0
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPCONFLICTQ'. Intrinsic: '_mm512_maskz_conflict_epi64'.
// Requires AVX512CD.
func M512MaskzConflictEpi64(k x86.Mmask8, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskzConflictEpi64(uint8(k), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:24,代码来源:avx512cd.go
示例13: M512BroadcastmbEpi64
// M512BroadcastmbEpi64: Broadcast the low 8-bits from input mask 'k' to all
// 64-bit elements of 'dst'.
//
// FOR j := 0 to 7
// i := j*64
// dst[i+63:i] := ZeroExtend(k[7:0])
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPBROADCASTMB2Q'. Intrinsic: '_mm512_broadcastmb_epi64'.
// Requires AVX512CD.
func M512BroadcastmbEpi64(k x86.Mmask8) (dst x86.M512i) {
return x86.M512i(m512BroadcastmbEpi64(uint8(k)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:14,代码来源:avx512cd.go
示例14: M512PermutexvarEpi8
// M512PermutexvarEpi8: Shuffle 8-bit integers in 'a' across lanes using the
// corresponding index in 'idx', and store the results in 'dst'.
//
// FOR j := 0 to 63
// i := j*8
// id := idx[i+5:i]*8
// dst[i+7:i] := a[id+7:id]
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPERMB'. Intrinsic: '_mm512_permutexvar_epi8'.
// Requires AVX512VBMI.
func M512PermutexvarEpi8(idx x86.M512i, a x86.M512i) (dst x86.M512i) {
return x86.M512i(m512PermutexvarEpi8([64]byte(idx), [64]byte(a)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:15,代码来源:avx512vbmi.go
示例15: M512MaskMadd52loEpu64
// M512MaskMadd52loEpu64: Multiply packed unsigned 52-bit integers in each
// 64-bit element of 'b' and 'c' to form a 104-bit intermediate result. Add the
// low 52-bit unsigned integer from the intermediate result with the
// corresponding unsigned 64-bit integer in 'a', and store the results in 'dst'
// using writemask 'k' (elements are copied from 'a' when the corresponding
// mask bit is not set).
//
// FOR j := 0 to 7
// i := j*64
// IF k[j]
// tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
// dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[51:0])
// ELSE
// dst[i+63:i] := a[i+63:i]
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPMADD52LUQ'. Intrinsic: '_mm512_mask_madd52lo_epu64'.
// Requires AVX512IFMA52.
func M512MaskMadd52loEpu64(a x86.M512i, k x86.Mmask8, b x86.M512i, c x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskMadd52loEpu64([64]byte(a), uint8(k), [64]byte(b), [64]byte(c)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:23,代码来源:avx512ifma52.go
示例16: M512MaskMultishiftEpi64Epi8
// M512MaskMultishiftEpi64Epi8: For each 64-bit element in 'b', select 8
// unaligned bytes using a byte-granular shift control within the corresponding
// 64-bit element of 'a', and store the 8 assembled bytes to the corresponding
// 64-bit element of 'dst' using writemask 'k' (elements are copied from 'src'
// when the corresponding mask bit is not set).
//
// FOR i := 0 to 7
// q := i * 64
// FOR j := 0 to 7
// tmp8 := 0
// ctrl := a[q+j*8+7:q+j*8] & 63
// FOR l := 0 to 7
// tmp8[k] := b[q+((ctrl+k) & 63)]
// ENDFOR
// IF k[i*8+j]
// dst[q+j*8+7:q+j*8] := tmp8[7:0]
// ELSE
// dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
// FI
// ENDFOR
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPMULTISHIFTQB'. Intrinsic: '_mm512_mask_multishift_epi64_epi8'.
// Requires AVX512VBMI.
func M512MaskMultishiftEpi64Epi8(src x86.M512i, k x86.Mmask64, a x86.M512i, b x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskMultishiftEpi64Epi8([64]byte(src), uint64(k), [64]byte(a), [64]byte(b)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:28,代码来源:avx512vbmi.go
示例17: M512MaskzMadd52hiEpu64
// M512MaskzMadd52hiEpu64: Multiply packed unsigned 52-bit integers in each
// 64-bit element of 'b' and 'c' to form a 104-bit intermediate result. Add the
// high 52-bit unsigned integer from the intermediate result with the
// corresponding unsigned 64-bit integer in 'a', and store the results in 'dst'
// using zeromask 'k' (elements are zeroed out when the corresponding mask bit
// is not set).
//
// FOR j := 0 to 7
// i := j*64
// IF k[j]
// tmp[127:0] := ZeroExtend64(b[i+51:i]) * ZeroExtend64(c[i+51:i])
// dst[i+63:i] := a[i+63:i] + ZeroExtend64(tmp[103:52])
// ELSE
// dst[i+63:i] := 0
// FI
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPMADD52HUQ'. Intrinsic: '_mm512_maskz_madd52hi_epu64'.
// Requires AVX512IFMA52.
func M512MaskzMadd52hiEpu64(k x86.Mmask8, a x86.M512i, b x86.M512i, c x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MaskzMadd52hiEpu64(uint8(k), [64]byte(a), [64]byte(b), [64]byte(c)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:23,代码来源:avx512ifma52.go
示例18: M512MultishiftEpi64Epi8
// M512MultishiftEpi64Epi8: For each 64-bit element in 'b', select 8 unaligned
// bytes using a byte-granular shift control within the corresponding 64-bit
// element of 'a', and store the 8 assembled bytes to the corresponding 64-bit
// element of 'dst'.
//
// FOR i := 0 to 7
// q := i * 64
// FOR j := 0 to 7
// tmp8 := 0
// ctrl := a[q+j*8+7:q+j*8] & 63
// FOR l := 0 to 7
// tmp8[k] := b[q+((ctrl+k) & 63)]
// ENDFOR
// dst[q+j*8+7:q+j*8] := tmp8[7:0]
// ENDFOR
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPMULTISHIFTQB'. Intrinsic: '_mm512_multishift_epi64_epi8'.
// Requires AVX512VBMI.
func M512MultishiftEpi64Epi8(a x86.M512i, b x86.M512i) (dst x86.M512i) {
return x86.M512i(m512MultishiftEpi64Epi8([64]byte(a), [64]byte(b)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:23,代码来源:avx512vbmi.go
示例19: M512BroadcastmwEpi32
// M512BroadcastmwEpi32: Broadcast the low 16-bits from input mask 'k' to all
// 32-bit elements of 'dst'.
//
// FOR j := 0 to 15
// i := j*32
// dst[i+31:i] := ZeroExtend(k[15:0])
// ENDFOR
// dst[MAX:512] := 0
//
// Instruction: 'VPBROADCASTMW2D'. Intrinsic: '_mm512_broadcastmw_epi32'.
// Requires AVX512CD.
func M512BroadcastmwEpi32(k x86.Mmask16) (dst x86.M512i) {
return x86.M512i(m512BroadcastmwEpi32(uint16(k)))
}
开发者ID:klauspost,项目名称:intrinsics,代码行数:14,代码来源:avx512cd.go
注:本文中的github.com/klauspost/intrinsics/x86.M512i函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论