C++ divup函数代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中divup函数的典型用法代码示例。如果您正苦于以下问题：C++ divup函数的具体用法？C++ divup怎么用？C++ divup使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了divup函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: reduce_first

void reduce_first(Param<To> out, CParam<Ti> in, bool change_nan,
                  double nanval) {
    uint threads_x = nextpow2(std::max(32u, (uint)in.dims[0]));
    threads_x      = std::min(threads_x, THREADS_PER_BLOCK);
    uint threads_y = THREADS_PER_BLOCK / threads_x;

    uint blocks_x = divup(in.dims[0], threads_x * REPEAT);
    uint blocks_y = divup(in.dims[1], threads_y);

    Param<To> tmp = out;
    uptr<To> tmp_alloc;
    if (blocks_x > 1) {
        tmp_alloc =
            memAlloc<To>(blocks_x * in.dims[1] * in.dims[2] * in.dims[3]);
        tmp.ptr = tmp_alloc.get();

        tmp.dims[0] = blocks_x;
        for (int k = 1; k < 4; k++) tmp.strides[k] *= blocks_x;
    }

    reduce_first_launcher<Ti, To, op>(tmp, in, blocks_x, blocks_y, threads_x,
                                      change_nan, nanval);

    if (blocks_x > 1) {
        // FIXME: Is there an alternative to the if condition?
        if (op == af_notzero_t) {
            reduce_first_launcher<To, To, af_add_t>(
                out, tmp, 1, blocks_y, threads_x, change_nan, nanval);
        } else {
            reduce_first_launcher<To, To, op>(out, tmp, 1, blocks_y, threads_x,
                                              change_nan, nanval);
        }
    }
}

开发者ID:9prady9，项目名称:arrayfire，代码行数:34，代码来源:reduce.hpp

示例2: morph

void morph(Param<T> out, CParam<T> in, int windLen)
{
    dim3 threads(kernel::THREADS_X, kernel::THREADS_Y);

    int blk_x = divup(in.dims[0], THREADS_X);
    int blk_y = divup(in.dims[1], THREADS_Y);
    // launch batch * blk_x blocks along x dimension
    dim3 blocks(blk_x * in.dims[2], blk_y * in.dims[3]);

    // calculate shared memory size
    int halo      = windLen/2;
    int padding   = 2*halo;
    int shrdLen   = kernel::THREADS_X + padding + 1; // +1 for to avoid bank conflicts
    int shrdSize  = shrdLen * (kernel::THREADS_Y + padding) * sizeof(T);

    switch(windLen) {
        case  3: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation, 3>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case  5: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation, 5>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case  7: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation, 7>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case  9: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation, 9>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case 11: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation,11>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case 13: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation,13>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case 15: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation,15>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case 17: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation,17>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        case 19: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation,19>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
        default: CUDA_LAUNCH_SMEM((morphKernel<T, isDilation, 3>), blocks, threads, shrdSize, out, in, blk_x, blk_y); break;
    }

    POST_LAUNCH_CHECK();
}

开发者ID:munnybearz，项目名称:arrayfire，代码行数:30，代码来源:morph.hpp

示例3: meanshift

void meanshift(Param<T> out, CParam<T> in, float s_sigma, float c_sigma, uint iter)
{
    static dim3 threads(kernel::THREADS_X, kernel::THREADS_Y);

    int blk_x = divup(in.dims[0], THREADS_X);
    int blk_y = divup(in.dims[1], THREADS_Y);

    const int bCount   = (is_color ? 1 : in.dims[2]);
    const int channels = (is_color ? in.dims[2] : 1); // this has to be 3 for color images

    dim3 blocks(blk_x * bCount, blk_y * in.dims[3]);

    // clamp spatical and chromatic sigma's
    float space_     = std::min(11.5f, s_sigma);
    int radius  = std::max((int)(space_ * 1.5f), 1);
    int padding = 2*radius+1;
    const float cvar = c_sigma*c_sigma;
    size_t shrd_size = channels*(threads.x + padding)*(threads.y+padding)*sizeof(T);

    if (is_color)
        CUDA_LAUNCH_SMEM((meanshiftKernel<T, 3>), blocks, threads, shrd_size,
                         out, in, space_, radius, cvar, iter, blk_x, blk_y);
    else
        CUDA_LAUNCH_SMEM((meanshiftKernel<T, 1>), blocks, threads, shrd_size,
                         out, in, space_, radius, cvar, iter, blk_x, blk_y);

    POST_LAUNCH_CHECK();
}

开发者ID:victorv，项目名称:arrayfire，代码行数:28，代码来源:meanshift.hpp

示例4: mxr_mplane_fill

static void mxr_mplane_fill(struct v4l2_plane_pix_format *planes,
	const struct mxr_format *fmt, u32 width, u32 height)
{
	int i;
	int y_size, cb_size;

	memset(planes, 0, sizeof(*planes) * fmt->num_subframes);
	for (i = 0; i < fmt->num_planes; ++i) {
		struct v4l2_plane_pix_format *plane = planes
			+ fmt->plane2subframe[i];
		const struct mxr_block *blk = &fmt->plane[i];
		u32 bl_width = divup(width, blk->width);
		u32 bl_height = divup(height, blk->height);
		u32 sizeimage = bl_width * bl_height * blk->size;
		u16 bytesperline = bl_width * blk->size / blk->height;

		if (fmt->fourcc == V4L2_PIX_FMT_NV12MT) {
			y_size = ALIGN(width, 128) * ALIGN(height, 64);
			cb_size = ALIGN(width, 128) * ALIGN(height / 2, 64);
			plane->sizeimage += i ? cb_size : y_size;
		} else {
			plane->sizeimage += sizeimage;
		}
		plane->bytesperline = max(plane->bytesperline, bytesperline);
	}
}

开发者ID:cm-3470，项目名称:android_kernel_samsung_degaslte，代码行数:26，代码来源:mixer_video.c

示例5: mean_dim

void mean_dim(Param out, Param in, Param inWeight, int dim)
{
    uint threads_y = std::min(THREADS_Y, nextpow2(in.info.dims[dim]));
    uint threads_x = THREADS_X;

    uint groups_all[] = {(uint)divup(in.info.dims[0], threads_x),
        (uint)in.info.dims[1],
        (uint)in.info.dims[2],
        (uint)in.info.dims[3]};

    groups_all[dim] = divup(in.info.dims[dim], threads_y * REPEAT);

    if (groups_all[dim] > 1) {
        dim4 d(4, out.info.dims);
        d[dim] = groups_all[dim];
        Array<To> tmpOut = createEmptyArray<To>(d);
        Array<Tw> tmpWeight = createEmptyArray<Tw>(d);
        mean_dim_launcher<Ti, Tw, To>(tmpOut, tmpWeight, in, inWeight, dim, threads_y, groups_all);

        Param owt;
        groups_all[dim] = 1;
        mean_dim_launcher<Ti, Tw, To>(out, owt, tmpOut, tmpWeight, dim, threads_y, groups_all);
    } else {
        Param tmpWeight;
        mean_dim_launcher<Ti, Tw, To>(out, tmpWeight, in, inWeight, dim, threads_y, groups_all);
    }

}

开发者ID:FilipeMaia，项目名称:arrayfire，代码行数:28，代码来源:mean.hpp

示例6: shift

        void shift(Param<T> out, CParam<T> in, const int *sdims)
        {
            dim3 threads(TX, TY, 1);

            int blocksPerMatX = divup(out.dims[0], TILEX);
            int blocksPerMatY = divup(out.dims[1], TILEY);
            dim3 blocks(blocksPerMatX * out.dims[2],
                        blocksPerMatY * out.dims[3],
                        1);

            const int maxBlocksY = cuda::getDeviceProp(cuda::getActiveDeviceId()).maxGridSize[1];
            blocks.z = divup(blocks.y, maxBlocksY);
            blocks.y = divup(blocks.y, blocks.z);

            int sdims_[4];
            // Need to do this because we are mapping output to input in the kernel
            for(int i = 0; i < 4; i++) {
                // sdims_[i] will always be positive and always [0, oDims[i]].
                // Negative shifts are converted to position by going the other way round
                sdims_[i] = -(sdims[i] % (int)out.dims[i]) + out.dims[i] * (sdims[i] > 0);
                assert(sdims_[i] >= 0 && sdims_[i] <= out.dims[i]);
            }

            CUDA_LAUNCH((shift_kernel<T>), blocks, threads,
                    out, in, sdims_[0], sdims_[1], sdims_[2], sdims_[3],
                    blocksPerMatX, blocksPerMatY);
            POST_LAUNCH_CHECK();
        }

开发者ID:munnybearz，项目名称:arrayfire，代码行数:28，代码来源:shift.hpp

示例7: select

        void select(Param<T> out, CParam<char> cond, CParam<T> a, CParam<T> b, int ndims)
        {
            bool is_same = true;
            for (int i = 0; i < 4; i++) {
                is_same &= (a.dims[i] == b.dims[i]);
            }

            dim3 threads(DIMX, DIMY);

            if (ndims == 1) {
                threads.x *= threads.y;
                threads.y = 1;
            }

            int blk_x = divup(out.dims[0], threads.x);
            int blk_y = divup(out.dims[1], threads.y);


            dim3 blocks(blk_x * out.dims[2],
                        blk_y * out.dims[3]);

            if (is_same) {
                CUDA_LAUNCH((select_kernel<T, true>), blocks, threads,
                            out, cond, a, b, blk_x, blk_y);
            } else {
                CUDA_LAUNCH((select_kernel<T, false>), blocks, threads,
                            out, cond, a, b, blk_x, blk_y);
            }

        }

开发者ID:Brainiarc7，项目名称:arrayfire，代码行数:30，代码来源:select.hpp

示例8: transpose

void transpose(Param<T> out, CParam<T> in, const bool conjugate,
               const bool is32multiple) {
    static const std::string source(transpose_cuh, transpose_cuh_len);

    // clang-format off
    auto transpose = getKernel("cuda::transpose", source,
            {
              TemplateTypename<T>(),
              TemplateArg(conjugate),
              TemplateArg(is32multiple)
            },
            {
              DefineValue(TILE_DIM),
              DefineValue(THREADS_Y)
            }
            );
    // clang-format on

    dim3 threads(kernel::THREADS_X, kernel::THREADS_Y);

    int blk_x = divup(in.dims[0], TILE_DIM);
    int blk_y = divup(in.dims[1], TILE_DIM);
    dim3 blocks(blk_x * in.dims[2], blk_y * in.dims[3]);
    const int maxBlocksY =
        cuda::getDeviceProp(getActiveDeviceId()).maxGridSize[1];
    blocks.z = divup(blocks.y, maxBlocksY);
    blocks.y = divup(blocks.y, blocks.z);

    EnqueueArgs qArgs(blocks, threads, getActiveStream());

    transpose(qArgs, out, in, blk_x, blk_y);

    POST_LAUNCH_CHECK();
}

开发者ID:9prady9，项目名称:arrayfire，代码行数:34，代码来源:transpose.hpp

示例9: scan_dim_by_key

    void scan_dim_by_key(Param<To> out, CParam<Ti> in, CParam<Tk> key, int dim, bool inclusive_scan)
    {
        uint threads_y = std::min(THREADS_Y, nextpow2(out.dims[dim]));
        uint threads_x = THREADS_X;

        uint blocks_all[] = {divup(out.dims[0], threads_x),
                             out.dims[1], out.dims[2], out.dims[3]};

        blocks_all[dim] = divup(out.dims[dim], threads_y * REPEAT);

        if (blocks_all[dim] == 1) {

            scan_dim_final_launcher<Ti, Tk, To, op>(out, in, key,
                                                    dim,
                                                    threads_y,
                                                    blocks_all,
                                                    true, inclusive_scan);

        } else {
            Param<To> tmp = out;
            Param<char> tmpflg;
            Param<int> tmpid;

            tmp.dims[dim] = blocks_all[dim];
            tmp.strides[0] = 1;
            for (int k = 1; k < 4; k++) tmp.strides[k] = tmp.strides[k - 1] * tmp.dims[k - 1];
            for (int k = 0; k < 4; k++) {
                tmpflg.strides[k] = tmp.strides[k];
                tmpid.strides[k] = tmp.strides[k];
                tmpflg.dims[k] = tmp.dims[k];
                tmpid.dims[k] = tmp.dims[k];
            }

            int tmp_elements = tmp.strides[3] * tmp.dims[3];
            tmp.ptr = memAlloc<To>(tmp_elements);
            tmpflg.ptr = memAlloc<char>(tmp_elements);
            tmpid.ptr = memAlloc<int>(tmp_elements);

            scan_dim_nonfinal_launcher<Ti, Tk, To, op>(out, tmp, tmpflg,
                                                       tmpid, in, key,
                                                       dim,
                                                       threads_y,
                                                       blocks_all,
                                                       inclusive_scan);

            int bdim = blocks_all[dim];
            blocks_all[dim] = 1;
            scan_dim_final_launcher<To, char, To,       op>(tmp, tmp, tmpflg,
                                                            dim,
                                                            threads_y,
                                                            blocks_all, false, true);

            blocks_all[dim] = bdim;
            bcast_dim_launcher<To, op>(out, tmp, tmpid, dim, threads_y, blocks_all);

            memFree(tmp.ptr);
            memFree(tmpflg.ptr);
            memFree(tmpid.ptr);
        }
    }

开发者ID:shehzan10，项目名称:arrayfire，代码行数:60，代码来源:scan_dim_by_key_impl.hpp

示例10: lookup

void lookup(Param<in_t> out, CParam<in_t> in, CParam<idx_t> indices, int nDims)
{
    if (nDims==1) {
        const dim3 threads(THREADS, 1);
        /* find which dimension has non-zero # of elements */
        int vDim = 0;
        for (int i=0; i<4; i++) {
            if (in.dims[i]==1)
                vDim++;
            else
                break;
        }

        int blks = divup(out.dims[vDim], THREADS*THRD_LOAD);

        dim3 blocks(blks, 1);

        CUDA_LAUNCH((lookup1D<in_t, idx_t>), blocks, threads, out, in, indices, vDim);
    } else {
        const dim3 threads(THREADS_X, THREADS_Y);

        int blks_x = divup(out.dims[0], threads.x);
        int blks_y = divup(out.dims[1], threads.y);

        dim3 blocks(blks_x*out.dims[2], blks_y*out.dims[3]);

        CUDA_LAUNCH((lookupND<in_t, idx_t, dim>), blocks, threads, out, in, indices, blks_x, blks_y);
    }

    POST_LAUNCH_CHECK();
}

开发者ID:shehzan10，项目名称:arrayfire，代码行数:31，代码来源:lookup.hpp

示例11: transform

        void transform(Param<T> out, CParam<T> in, CParam<float> tf,
                       const bool inverse)
        {
            dim_type nimages = in.dims[2];
            // Multiplied in src/backend/transform.cpp
            const dim_type ntransforms = out.dims[2] / in.dims[2];

            // Copy transform to constant memory.
            CUDA_CHECK(cudaMemcpyToSymbol(c_tmat, tf.ptr, ntransforms * 6 * sizeof(float), 0,
                                          cudaMemcpyDeviceToDevice));

            dim3 threads(TX, TY, 1);
            dim3 blocks(divup(out.dims[0], threads.x), divup(out.dims[1], threads.y));

            const dim_type blocksXPerImage = blocks.x;
            if(nimages > TI) {
                dim_type tile_images = divup(nimages, TI);
                nimages = TI;
                blocks.x = blocks.x * tile_images;
            }

            if (ntransforms > 1) { blocks.y *= ntransforms; }

            if(inverse) {
                transform_kernel<T, true, method><<<blocks, threads>>>
                                (out, in, nimages, ntransforms, blocksXPerImage);
            } else {

开发者ID:EmergentOrder，项目名称:arrayfire，代码行数:27，代码来源:transform.hpp

示例12: transform

        void transform(Param<T> out, CParam<T> in, CParam<float> tf,
                       const bool inverse)
        {
            int nimages = in.dims[2];
            // Multiplied in src/backend/transform.cpp
            const int ntransforms = out.dims[2] / in.dims[2];

            // Copy transform to constant memory.
            CUDA_CHECK(cudaMemcpyToSymbolAsync(c_tmat, tf.ptr, ntransforms * 6 * sizeof(float), 0,
                                          cudaMemcpyDeviceToDevice,
                                          cuda::getStream(cuda::getActiveDeviceId())));

            dim3 threads(TX, TY, 1);
            dim3 blocks(divup(out.dims[0], threads.x), divup(out.dims[1], threads.y));

            const int blocksXPerImage = blocks.x;
            if(nimages > TI) {
                int tile_images = divup(nimages, TI);
                nimages = TI;
                blocks.x = blocks.x * tile_images;
            }

            if (ntransforms > 1) { blocks.y *= ntransforms; }

            if(inverse) {
                CUDA_LAUNCH((transform_kernel<T, true, method>), blocks, threads,
                                out, in, nimages, ntransforms, blocksXPerImage);
            } else {
                CUDA_LAUNCH((transform_kernel<T, false, method>), blocks, threads,
                                out, in, nimages, ntransforms, blocksXPerImage);
            }
            POST_LAUNCH_CHECK();
        }

开发者ID:hxiaox，项目名称:arrayfire，代码行数:33，代码来源:transform.hpp

示例13: scan_dim

    static void scan_dim(Param &out, const Param &in, int dim)
    {
        uint threads_y = std::min(THREADS_Y, nextpow2(out.info.dims[dim]));
        uint threads_x = THREADS_X;

        uint groups_all[] = {divup((uint)out.info.dims[0], threads_x),
                              (uint)out.info.dims[1],
                              (uint)out.info.dims[2],
                              (uint)out.info.dims[3]};

        groups_all[dim] = divup(out.info.dims[dim], threads_y * REPEAT);

        if (groups_all[dim] == 1) {

            scan_dim_launcher<Ti, To, op, inclusive_scan>(out, out, in,
                                          dim, true,
                                          threads_y,
                                          groups_all);
        } else {

            Param tmp = out;

            tmp.info.dims[dim] = groups_all[dim];
            tmp.info.strides[0] = 1;
            for (int k = 1; k < 4; k++) {
                tmp.info.strides[k] = tmp.info.strides[k - 1] * tmp.info.dims[k - 1];
            }

            int tmp_elements = tmp.info.strides[3] * tmp.info.dims[3];
            // FIXME: Do I need to free this ?
            tmp.data = bufferAlloc(tmp_elements * sizeof(To));

            scan_dim_launcher<Ti, To, op, inclusive_scan>(out, tmp, in,
                                          dim, false,
                                          threads_y,
                                          groups_all);

            int gdim = groups_all[dim];
            groups_all[dim] = 1;

            if (op == af_notzero_t) {
                scan_dim_launcher<To, To, af_add_t, true>(tmp, tmp, tmp,
                                                    dim, true,
                                                    threads_y,
                                                    groups_all);
            } else {
                scan_dim_launcher<To, To,       op, true>(tmp, tmp, tmp,
                                                    dim, true,
                                                    threads_y,
                                                    groups_all);
            }

            groups_all[dim] = gdim;
            bcast_dim_launcher<To, To, op, inclusive_scan>(out, tmp,
                                            dim, true,
                                            threads_y,
                                            groups_all);
            bufferFree(tmp.data);
        }
    }

开发者ID:FilipeMaia，项目名称:arrayfire，代码行数:60，代码来源:scan_dim.hpp

示例14: mxr_get_plane_size

unsigned long mxr_get_plane_size(const struct mxr_block *blk,
	unsigned int width, unsigned int height)
{
	unsigned int bl_width = divup(width, blk->width);
	unsigned int bl_height = divup(height, blk->height);

	return bl_width * bl_height * blk->size;
}

开发者ID:cm-3470，项目名称:android_kernel_samsung_degaslte，代码行数:8，代码来源:mixer_video.c

示例15: select_launcher

        void select_launcher(Param out, Param cond, Param a, Param b, int ndims)
        {
            static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
            static std::map<int, Program*>  selProgs;
            static std::map<int, Kernel*> selKernels;

            int device = getActiveDeviceId();

            std::call_once(compileFlags[device], [device] () {

                    std::ostringstream options;
                    options << " -D is_same=" << is_same
                            << " -D T=" << dtype_traits<T>::getName();

                    if (std::is_same<T, double>::value ||
                        std::is_same<T, cdouble>::value) {
                        options << " -D USE_DOUBLE";
                    }

                    cl::Program prog;
                    buildProgram(prog, select_cl, select_cl_len, options.str());
                    selProgs[device] = new Program(prog);

                    selKernels[device] = new Kernel(*selProgs[device], "select_kernel");
                });


            int threads[] = {DIMX, DIMY};

            if (ndims == 1) {
                threads[0] *= threads[1];
                threads[1] = 1;
            }

            NDRange local(threads[0],
                          threads[1]);


            int groups_0 = divup(out.info.dims[0], local[0]);
            int groups_1 = divup(out.info.dims[1], local[1]);

            NDRange global(groups_0 * out.info.dims[2] * local[0],
                           groups_1 * out.info.dims[3] * local[1]);

            auto selectOp = make_kernel<Buffer, KParam,
                                        Buffer, KParam,
                                        Buffer, KParam,
                                        Buffer, KParam,
                                        int, int>(*selKernels[device]);

            selectOp(EnqueueArgs(getQueue(), global, local),
                     *out.data, out.info,
                     *cond.data, cond.info,
                     *a.data, a.info,
                     *b.data, b.info,
                     groups_0, groups_1);

        }

开发者ID:Brainiarc7，项目名称:arrayfire，代码行数:58，代码来源:select.hpp

示例16: convolve2

void convolve2(Param out, const Param signal, const Param filter)
{
    try {
        static std::once_flag  compileFlags[DeviceManager::MAX_DEVICES];
        static std::map<int, Program*>   convProgs;
        static std::map<int, Kernel*>  convKernels;

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {
                const size_t C0_SIZE  = (THREADS_X+2*(fLen-1))* THREADS_Y;
                const size_t C1_SIZE  = (THREADS_Y+2*(fLen-1))* THREADS_X;

                size_t locSize = (conv_dim==0 ? C0_SIZE : C1_SIZE);

                    std::ostringstream options;
                    options << " -D T=" << dtype_traits<T>::getName()
                            << " -D accType="<< dtype_traits<accType>::getName()
                            << " -D CONV_DIM="<< conv_dim
                            << " -D EXPAND="<< expand
                            << " -D FLEN="<< fLen
                            << " -D LOCAL_MEM_SIZE="<<locSize;
                    if (std::is_same<T, double>::value ||
                        std::is_same<T, cdouble>::value) {
                        options << " -D USE_DOUBLE";
                    }
                    Program prog;
                    buildProgram(prog, convolve_separable_cl, convolve_separable_cl_len, options.str());
                    convProgs[device]   = new Program(prog);
                    convKernels[device] = new Kernel(*convProgs[device], "convolve");
                });

        auto convOp = make_kernel<Buffer, KParam, Buffer, KParam, Buffer,
                                  int, int>(*convKernels[device]);

        NDRange local(THREADS_X, THREADS_Y);

        int blk_x = divup(out.info.dims[0], THREADS_X);
        int blk_y = divup(out.info.dims[1], THREADS_Y);

        NDRange global(blk_x*signal.info.dims[2]*THREADS_X,
                       blk_y*signal.info.dims[3]*THREADS_Y);

        cl::Buffer *mBuff = bufferAlloc(fLen*sizeof(accType));
        // FIX ME: if the filter array is strided, direct might cause issues
        getQueue().enqueueCopyBuffer(*filter.data, *mBuff, 0, 0, fLen*sizeof(accType));

        convOp(EnqueueArgs(getQueue(), global, local),
               *out.data, out.info, *signal.data, signal.info, *mBuff, blk_x, blk_y);

        bufferFree(mBuff);
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}

开发者ID:PierreBizouard，项目名称:arrayfire，代码行数:56，代码来源:convolve_separable.hpp

示例17: identity

    static void identity(Param<T> out)
    {
        dim3 threads(32, 8);
        int blocks_x = divup(out.dims[0], threads.x);
        int blocks_y = divup(out.dims[1], threads.y);
        dim3 blocks(blocks_x * out.dims[2], blocks_y * out.dims[3]);

        CUDA_LAUNCH((identity_kernel<T>), blocks, threads, out, blocks_x, blocks_y);
        POST_LAUNCH_CHECK();
    }

开发者ID:rotorliu，项目名称:arrayfire，代码行数:10，代码来源:identity.hpp

示例18: scan_dim

    static void scan_dim(Param &out, const Param &in)
    {
        uint threads_y = std::min(THREADS_Y, nextpow2(out.info.dims[dim]));
        uint threads_x = THREADS_X;

        uint groups_all[] = {divup((uint)out.info.dims[0], threads_x),
                             (uint)out.info.dims[1],
                             (uint)out.info.dims[2],
                             (uint)out.info.dims[3]};

        groups_all[dim] = divup(out.info.dims[dim], threads_y * REPEAT);

        if (groups_all[dim] == 1) {

            scan_dim_fn<Ti, To, op, dim, true>(out, out, in,
                                               threads_y,
                                               groups_all);
        } else {

            Param tmp = out;

            tmp.info.dims[dim] = groups_all[dim];
            tmp.info.strides[0] = 1;
            for (int k = 1; k < 4; k++) {
                tmp.info.strides[k] = tmp.info.strides[k - 1] * tmp.info.dims[k - 1];
            }

            dim_type tmp_elements = tmp.info.strides[3] * tmp.info.dims[3];
            // FIXME: Do I need to free this ?
            tmp.data = cl::Buffer(getContext(), CL_MEM_READ_WRITE, tmp_elements * sizeof(To));

            scan_dim_fn<Ti, To, op, dim, false>(out, tmp, in,
                                                threads_y,
                                                groups_all);

            int gdim = groups_all[dim];
            groups_all[dim] = 1;

            if (op == af_notzero_t) {
                scan_dim_fn<To, To, af_add_t, dim, true>(tmp, tmp, tmp,
                                                         threads_y,
                                                         groups_all);
            } else {
                scan_dim_fn<To, To,       op, dim, true>(tmp, tmp, tmp,
                                                         threads_y,
                                                         groups_all);
            }

            groups_all[dim] = gdim;
            bcast_dim_fn<To, To, op, dim, true>(out, tmp,
                                                threads_y,
                                                groups_all);

        }
    }

开发者ID:EasonYi，项目名称:arrayfire，代码行数:55，代码来源:scan_dim.hpp

示例19: matchTemplate

void matchTemplate(Param out, const Param srch, const Param tmplt)
{
    try {
        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
        static std::map<int, Program*>  mtProgs;
        static std::map<int, Kernel*> mtKernels;

        int device = getActiveDeviceId();

        std::call_once( compileFlags[device], [device] () {

                std::ostringstream options;
                options << " -D inType="  << dtype_traits<inType>::getName()
                        << " -D outType=" << dtype_traits<outType>::getName()
                        << " -D MATCH_T=" << mType
                        << " -D NEEDMEAN="<< needMean
                        << " -D AF_SAD="  << AF_SAD
                        << " -D AF_ZSAD=" << AF_ZSAD
                        << " -D AF_LSAD=" << AF_LSAD
                        << " -D AF_SSD="  << AF_SSD
                        << " -D AF_ZSSD=" << AF_ZSSD
                        << " -D AF_LSSD=" << AF_LSSD
                        << " -D AF_NCC="  << AF_NCC
                        << " -D AF_ZNCC=" << AF_ZNCC
                        << " -D AF_SHD="  << AF_SHD;
                if (std::is_same<outType, double>::value) {
                    options << " -D USE_DOUBLE";
                }
                Program prog;
                buildProgram(prog, matchTemplate_cl, matchTemplate_cl_len, options.str());
                mtProgs[device]   = new Program(prog);
                mtKernels[device] = new Kernel(*mtProgs[device], "matchTemplate");
            });

        NDRange local(THREADS_X, THREADS_Y);

        int blk_x = divup(srch.info.dims[0], THREADS_X);
        int blk_y = divup(srch.info.dims[1], THREADS_Y);

        NDRange global(blk_x * srch.info.dims[2] * THREADS_X, blk_y * srch.info.dims[3] * THREADS_Y);

        auto matchImgOp = make_kernel<Buffer, KParam,
                                       Buffer, KParam,
                                       Buffer, KParam,
                                       int, int> (*mtKernels[device]);

        matchImgOp(EnqueueArgs(getQueue(), global, local),
                    *out.data, out.info, *srch.data, srch.info, *tmplt.data, tmplt.info, blk_x, blk_y);

        CL_DEBUG_FINISH(getQueue());
    } catch (cl::Error err) {
        CL_TO_AF_ERROR(err);
        throw;
    }
}

开发者ID:Brainiarc7，项目名称:arrayfire，代码行数:55，代码来源:match_template.hpp

示例20: memcopy

    void memcopy(cl::Buffer out, const dim_t *ostrides,
                 const cl::Buffer in, const dim_t *idims,
                 const dim_t *istrides, int offset, uint ndims)
    {
        try {
            static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
            static std::map<int, Program*>    cpyProgs;
            static std::map<int, Kernel*>   cpyKernels;

            int device = getActiveDeviceId();

            std::call_once(compileFlags[device], [&]() {
                std::ostringstream options;
                options << " -D T=" << dtype_traits<T>::getName();
                if (std::is_same<T, double>::value ||
                    std::is_same<T, cdouble>::value) {
                    options << " -D USE_DOUBLE";
                }
                Program prog;
                buildProgram(prog, memcopy_cl, memcopy_cl_len, options.str());
                cpyProgs[device]   = new Program(prog);
                cpyKernels[device] = new Kernel(*cpyProgs[device], "memcopy_kernel");
            });

            dims_t _ostrides = {{ostrides[0], ostrides[1], ostrides[2], ostrides[3]}};
            dims_t _istrides = {{istrides[0], istrides[1], istrides[2], istrides[3]}};
            dims_t _idims = {{idims[0], idims[1], idims[2], idims[3]}};

            size_t local_size[2] = {DIM0, DIM1};
            if (ndims == 1) {
                local_size[0] *= local_size[1];
                local_size[1]  = 1;
            }

            int groups_0 = divup(idims[0], local_size[0]);
            int groups_1 = divup(idims[1], local_size[1]);

            NDRange local(local_size[0], local_size[1]);
            NDRange global(groups_0 * idims[2] * local_size[0],
                           groups_1 * idims[3] * local_size[1]);

            auto memcopy_kernel = KernelFunctor< Buffer, dims_t,
                                               Buffer, dims_t,
                                               dims_t, int,
                                               int, int >(*cpyKernels[device]);

            memcopy_kernel(EnqueueArgs(getQueue(), global, local),
                out, _ostrides, in, _idims, _istrides, offset, groups_0, groups_1);
            CL_DEBUG_FINISH(getQueue());
        }
        catch (cl::Error err) {
            CL_TO_AF_ERROR(err);
            throw;
        }
    }

开发者ID:shehzan10，项目名称:arrayfire，代码行数:55，代码来源:memcopy.hpp

注：本文中的divup函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

鲜花

握手

雷人

路过

鸡蛋

该文章已有0人参与评论

请发表评论

全部评论

专题导读

More+

10-27 六六分期app的软件客服如何联系？(六六分期

11-06 可心卡盟:win10系统火狐flash插件崩溃怎么

11-06 亲亲特价:怎么删除回收站图标

11-06 济南大学虚拟社区:鲁大师节能降温的具体办

11-06 xlueops.exe:无线网络安装向导

11-06 女斗合众国:win7系统cf与主机连接不稳定怎

11-06 0xc000022-[cf烟雾头]cf怎么调烟雾头

11-06 qizideyouhuo:应用程序无法正常启动0xc0000

11-06 ipz-185:win7系统vcf文件怎么打开

11-06 傻哥蹦迪:win10系统s4怎么打开usb调试

11-06 八神浩树gtaste:回收站清空了怎么恢复

11-06 妖尾之黑色守护:win10系统电脑没有1440x900

11-06 校园至尊魔王小说:win7系统浏览网页时字体

11-06 女斗合众国:win10系统访问共享文件夹提示请

11-06 tokyo hot n0654:恢复win7系统默认字体一招

11-06 雨酷仙境:设置win7系统转移临时文件夹腾出

11-06 阿穆纳伊之杖:win7系统开始菜单在右边还原

11-06 tunespotting:win10系统火狐flash插件总是

11-06 甘尔葛分析师：计谋网站seo关键词暴涨有什

11-06 蔡贵霖: 计谋网站seo关键词暴涨有什么秘密

11-06 博益网首页:ao3网页版进入不了解决方法

11-06 漏斗子专栏: 网站数据分析小白易懂精华篇

11-06 见证双虹怎么做:win7系统开启telnet命令的

11-06 颾狐蝶蜋:系统资源不足无法完成请求的服务

11-06 国光中学校歌:提交网站到alexa查询详细步骤

11-06 西安有情天:静态网页和动态网页的区别

11-06 红木雅尚斋:外部链接构造对网站的好处

11-06 前官礼遇：防止域名劫持–增强域安全性的10

11-06 密传二转答案: 中文分词算法有哪些

11-06 金泉家园邮编:百度快照劫持的表现及应对方

C++ dixLookupPrivate函数代码示例发布时间：2022-05-30

C++ divide函数代码示例发布时间：2022-05-30

剪的笔顺,诠释剪的笔画,认识剪的部首

1 六六分期app的软件客服如何联系？(六六分期

六六分期app的软件客服如何联系？不知道吗？加qq群【895510560】即可！标题：六六分期

阅读：19251|2023-10-27

2 可心卡盟:win10系统火狐flash插件崩溃怎么

今天小编告诉大家如何处理win10系统火狐flash插件总是崩溃的问题，可能很多用户都不知

阅读：10005|2022-11-06

3 亲亲特价:怎么删除回收站图标

今天小编告诉大家如何对win10系统删除桌面回收站图标进行设置，可能很多用户都不知道

阅读：8335|2022-11-06

4 济南大学虚拟社区:鲁大师节能降温的具体办

今天小编告诉大家如何对win10系统电脑设置节能降温的设置方法，想必大家都遇到过需要

阅读：8703|2022-11-06

5 xlueops.exe:无线网络安装向导

我们在使用xp系统的过程中,经常需要对xp系统无线网络安装向导设置进行设置，可能很多

阅读：8649|2022-11-06

6 女斗合众国:win7系统cf与主机连接不稳定怎

今天小编告诉大家如何处理win7系统玩cf老是与主机连接不稳定的问题，可能很多用户都不

阅读：9675|2022-11-06

7 0xc000022-[cf烟雾头]cf怎么调烟雾头

电脑对日常生活的重要性小编就不多说了，可是一旦碰到win7系统设置cf烟雾头的问题，很

阅读：8635|2022-11-06

8 qizideyouhuo:应用程序无法正常启动0xc0000

我们在日常使用电脑的时候，有的小伙伴们可能在打开应用的时候会遇见提示应用程序无法

阅读：8008|2022-11-06

9 ipz-185:win7系统vcf文件怎么打开

今天小编告诉大家如何对win7系统打开vcf文件进行设置，可能很多用户都不知道怎么对win

阅读：8671|2022-11-06

10 傻哥蹦迪:win10系统s4怎么打开usb调试

今天小编告诉大家如何对win10系统s4开启USB调试模式进行设置，可能很多用户都不知道怎

阅读：7542|2022-11-06

客服电话

电子邮件

C++ divup函数代码示例

示例1: reduce_first

示例2: morph

示例3: meanshift

示例4: mxr_mplane_fill

示例5: mean_dim

示例6: shift

示例7: select

示例8: transpose

示例9: scan_dim_by_key

示例10: lookup

示例11: transform

示例12: transform

示例13: scan_dim

示例14: mxr_get_plane_size

示例15: select_launcher

示例16: convolve2

示例17: identity

示例18: scan_dim

示例19: matchTemplate

示例20: memcopy

请发表评论

全部评论

上一篇：

下一篇：

mikepj/XRG: System monitor for macOS.

appreciated/maven-springboot-electron: P

CVE-2022-36620

宽字符std::wstring的长度和大小问题？size

krishnakapil/MaterialSeachView: Android

剪的笔顺,诠释剪的笔画,认识剪的部首

六六分期app的软件客服如何联系？(六六分期

florent37/ViewAnimator: A fluent Android

florent37/Shrine-MaterialDesign2: implem

CVE-2020-36276

SimpleSoftwareIO/simple-sms: Send and re

关于我们

产品与服务

解决方案

139-2527-9053