C++ clFinish函数代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中clFinish函数的典型用法代码示例。如果您正苦于以下问题：C++ clFinish函数的具体用法？C++ clFinish怎么用？C++ clFinish使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了clFinish函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: main


//.........这里部分代码省略.........
				break;
			dev -= devs[platform];
		}

		workSize[i] = (i < sizeMod) ? sizePerGPU+1 : sizePerGPU;

		check2(d_A[i] = clCreateBuffer(ctx[platform], CL_MEM_READ_ONLY  | CL_MEM_USE_HOST_PTR, workSize[i] * WA * sizeof(TYPE), &A_data[workOffset[i] * WA], &err));
		check2(d_C[i] = clCreateBuffer(ctx[platform], CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, workSize[i] * WC * sizeof(TYPE), &C_data[workOffset[i] * WC], &err));

		check(clSetKernelArg(multiplicationKernel[platform], 0, sizeof(cl_int), &workSize[i]));
		check(clSetKernelArg(multiplicationKernel[platform], 1, sizeof(cl_int), &workSize[i]));
		check(clSetKernelArg(multiplicationKernel[platform], 2, sizeof(cl_int), &workSize[i]));
		check(clSetKernelArg(multiplicationKernel[platform], 3, sizeof(cl_mem), (void *) &d_A[i]));
		check(clSetKernelArg(multiplicationKernel[platform], 4, sizeof(cl_mem), (void *) &d_B[d]));
		check(clSetKernelArg(multiplicationKernel[platform], 5, sizeof(cl_mem), (void *) &d_C[i]));

		size_t globalWorkSize[] = {roundUp(BLOCK_SIZE,WC), roundUp(BLOCK_SIZE,workSize[i])};

		check(clEnqueueNDRangeKernel(commandQueue[platform][dev], multiplicationKernel[platform], 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &GPUExecution[i]));

		// Non-blocking copy of result from device to host
		cqs[i] = commandQueue[platform][dev];
		check2(ptrs[i] = clEnqueueMapBuffer(cqs[i], d_C[i], CL_FALSE, CL_MAP_READ, 0, WC * sizeof(TYPE) * workSize[i], 1, &GPUExecution[i], &GPUDone[i], &err));

		if(i+1 < BLOCKS)
			workOffset[i + 1] = workOffset[i] + workSize[i];
	}


	// CPU sync with GPU
	for (p=0; p<platform_count;p++) {
		cl_uint dev;
		for (dev=0; dev<devs[p]; dev++) {
			clFinish(commandQueue[p][dev]);
		}
	}

	gettimeofday(&end, NULL);
	double timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));

	double dSeconds = timing/1000/1000;
	double dNumOps = 2.0 * (double)WA * (double)HA * (double)WB;
	double gflops = 1.0e-9 * dNumOps/dSeconds;

	printf("Throughput = %.4f GFlops/s, Time = %.5f s, Size = %.0f, NumDevsUsed = %d, Blocks = %ld, Workgroup = %zu\n",
			gflops, dSeconds, dNumOps, device_count, BLOCKS, localWorkSize[0] * localWorkSize[1]);

	// compute reference solution
	if (check) {
		printf("Comparing results with CPU computation... ");
		TYPE* reference = (TYPE*)malloc(C_mem_size);
		computeReference(reference, A_data, B_data, HA, WA, WB);

		// check result
		int res = shrCompareL2fe(reference, C_data, C_size, 1.0e-6f);
		if (res == 0) {
			printf("\n\n");
			printDiff(reference, C_data, WC, HC, 100, 1.0e-5f);
		}
		else printf("PASSED\n\n");
		free(reference);
	}

	for(i = 0; i < BLOCKS; i++)
	{
		clEnqueueUnmapMemObject(cqs[i], d_C[i], ptrs[i], 0, NULL, NULL);

开发者ID:joao-lima，项目名称:starpu-1.2.0rc2，代码行数:67，代码来源:matmul.c

示例2: runProgram


//.........这里部分代码省略.........


	kernel[0] = clCreateKernel(program, "kernel_a", &err);
	OCL_CHECK(err);

	// memory on device
	cl_mem A_d    	= clCreateBuffer(context, CL_MEM_READ_WRITE,  sizeof(float)*N*N,  NULL, NULL);
	cl_mem Aout_d   = clCreateBuffer(context, CL_MEM_READ_WRITE,  sizeof(float)*N*N,  NULL, NULL);


	// copy data to device
	err = clEnqueueWriteBuffer(queue, A_d, 	CL_TRUE, 0, sizeof(float)*N*N, 	A, 0, NULL , &event[0]); 
	OCL_CHECK(err);

	size_t localsize[2];
	size_t globalsize[2];

	localsize[0] = 16; 
	localsize[1] = 16;

	globalsize[0] = N;
	globalsize[1] = N;

	err  = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &A_d);
	if(err != 0) { printf("%d\n",err); OCL_CHECK(err); exit(1);}

	err  = clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &Aout_d);
	if(err != 0) { printf("%d\n",err); OCL_CHECK(err); exit(1);}


	err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, globalsize, localsize, 0, NULL, NULL);
	OCL_CHECK(err);

	clFinish(queue);

	// read device data back to host
	clEnqueueReadBuffer(queue, Aout_d, CL_TRUE, 0, sizeof(float)*N*N, Aout, 0, NULL , &event[1]);

	err = clWaitForEvents(1,&event[1]);
	OCL_CHECK(err);

	err = clGetEventProfilingInfo (event[0], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &gstart, NULL);
	OCL_CHECK(err);

	err = clGetEventProfilingInfo (event[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &gend, NULL);
	OCL_CHECK(err);

	gpuTime = (double)(gend -gstart)/1000000000.0;



	//check_1d_f(sum, blks+1);

#ifdef DEBUG
	puts("Output");
	check_2d_f(Aout,N,N);
#endif

	printf("oclTime = %lf (s)\n", gpuTime );

	// free
	clReleaseMemObject(A_d);	
	clReleaseMemObject(Aout_d);	


	// // check

开发者ID:Anmol-007，项目名称:oclKernels，代码行数:67，代码来源:template.c

示例3: runSummarization

static cl_int runSummarization(CLInfo* ci,
                               SeparationCLMem* cm,
                               const IntegralArea* ia,
                               cl_uint which,
                               Kahan* resultOut)
{
    cl_int err = CL_SUCCESS;
    cl_mem buf;
    cl_uint offset;
    size_t global[1];
    size_t local[1];
    real result[2] = { -1.0, -1.0 };
    cl_uint nElements = ia->r_steps * ia->mu_steps;
    cl_mem sumBufs[2] = { cm->summarizationBufs[0], cm->summarizationBufs[1] };

    if (which == 0)
    {
        buf = cm->outBg;
        offset = 0;
    }
    else
    {
        buf = cm->outStreams;
        offset = (which - 1) * nElements;
    }


    /* First call reads from an offset into one of the output buffers */
    err |= clSetKernelArg(_summarizationKernel, 0, sizeof(cl_mem), &sumBufs[0]);
    err |= clSetKernelArg(_summarizationKernel, 1, sizeof(cl_mem), &buf);
    err |= clSetKernelArg(_summarizationKernel, 2, sizeof(cl_uint), &nElements);
    err |= clSetKernelArg(_summarizationKernel, 3, sizeof(cl_uint), &offset);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error setting summarization kernel arguments");
        return err;
    }

    local[0] = _summarizationWorkgroupSize;
    global[0] = mwNextMultiple(local[0], nElements);

    err = clEnqueueNDRangeKernel(ci->queue, _summarizationKernel, 1,
                                 NULL, global, local,
                                 0, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error enqueuing summarization kernel");
        return err;
    }

    /* Why is this necessary? It seems to frequently break on the 7970 and nowhere else without it */
    err = clFinish(ci->queue);
    //err = clFlush(ci->queue);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error finishing summarization kernel");
        return err;
    }

    /* Later calls swap between summarization buffers without an offset */
    nElements = (cl_uint) mwDivRoundup(global[0], local[0]);
    offset = 0;
    err |= clSetKernelArg(_summarizationKernel, 3, sizeof(cl_uint), &offset);
    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error setting summarization kernel offset argument");
        return err;
    }

    while (nElements > 1)
    {
        /* Swap old summarization buffer to the input and shrink the range */
        swapBuffers(sumBufs);

        global[0] = mwNextMultiple(local[0], nElements);

        err |= clSetKernelArg(_summarizationKernel, 0, sizeof(cl_mem), &sumBufs[0]);
        err |= clSetKernelArg(_summarizationKernel, 1, sizeof(cl_mem), &sumBufs[1]);
        err |= clSetKernelArg(_summarizationKernel, 2, sizeof(cl_uint), &nElements);
        if (err != CL_SUCCESS)
        {
            mwPerrorCL(err, "Error setting summarization kernel arguments");
            return err;
        }

        /*
        err = clEnqueueBarrier(ci->queue);
        if (err != CL_SUCCESS)
        {
            mwPerrorCL(err, "Error enqueuing summarization barrier");
            return err;
        }
        */

        err = clEnqueueNDRangeKernel(ci->queue, _summarizationKernel, 1,
                                     NULL, global, local,
                                     0, NULL, NULL);
        if (err != CL_SUCCESS)
        {
            mwPerrorCL(err, "Error enqueuing summarization kernel");
//.........这里部分代码省略.........

开发者ID:LocutusOfBorg，项目名称:milkywayathome_client，代码行数:101，代码来源:run_cl.c

示例4: main


//.........这里部分代码省略.........
	delete[] kernelSource;

	// Create the compute kernel in the program we wish to run
	kernel_compute_flux = clCreateKernel(program, "compute_flux", &err);
	CHKERR(err, "Failed to create a compute kernel!");

	// Create the reduce kernel in the program we wish to run
	kernel_compute_flux_contributions = clCreateKernel(program, "compute_flux_contributions", &err);
	CHKERR(err, "Failed to create a compute_flux_contributions kernel!");
	// Create the reduce kernel in the program we wish to run
	kernel_compute_step_factor = clCreateKernel(program, "compute_step_factor", &err);
	CHKERR(err, "Failed to create a compute_step_factor kernel!");
	// Create the reduce kernel in the program we wish to run
	kernel_time_step = clCreateKernel(program, "time_step", &err);
	CHKERR(err, "Failed to create a time_step kernel!");
	// Create the reduce kernel in the program we wish to run
	kernel_initialize_variables = clCreateKernel(program, "initialize_variables", &err);
	CHKERR(err, "Failed to create a initialize_variables kernel!");

	// Create arrays and set initial conditions
	cl_mem variables = alloc<cl_float>(context, nelr*NVAR);

	err = 0;
	err = clSetKernelArg(kernel_initialize_variables, 0, sizeof(int), &nelr);
	err |= clSetKernelArg(kernel_initialize_variables, 1, sizeof(cl_mem),&variables);
	err |= clSetKernelArg(kernel_initialize_variables, 2, sizeof(cl_mem),&ff_variable);
	CHKERR(err, "Failed to set kernel arguments!");
	// Get the maximum work group size for executing the kernel on the device
	//err = clGetKernelWorkGroupInfo(kernel_initialize_variables, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL);
	CHKERR(err, "Failed to retrieve kernel_initialize_variables work group info!");
	local_size = 1;//std::min(local_size, (size_t)nelr);
	global_size = nelr;
	err = clEnqueueNDRangeKernel(commands, kernel_initialize_variables, 1, NULL, &global_size, NULL, 0, NULL, &ocdTempEvent);
	err = clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CFD Init Kernels", ocdTempTimer)
	END_TIMER(ocdTempTimer)
	CHKERR(err, "Failed to execute kernel [kernel_initialize_variables]! 0");


	cl_mem old_variables = alloc<float>(context, nelr*NVAR);
	cl_mem fluxes = alloc<float>(context, nelr*NVAR);
	cl_mem step_factors = alloc<float>(context, nelr);
	clFinish(commands);
	cl_mem fc_momentum_x = alloc<float>(context, nelr*NDIM);
	cl_mem fc_momentum_y = alloc<float>(context, nelr*NDIM);
	cl_mem fc_momentum_z = alloc<float>(context, nelr*NDIM);
	cl_mem fc_density_energy = alloc<float>(context, nelr*NDIM);
	clFinish(commands);

	// make sure all memory is floatly allocated before we start timing
	err = 0;
	err = clSetKernelArg(kernel_initialize_variables, 0, sizeof(int), &nelr);
	err |= clSetKernelArg(kernel_initialize_variables, 1, sizeof(cl_mem),&old_variables);
	err |= clSetKernelArg(kernel_initialize_variables, 2, sizeof(cl_mem),&ff_variable);
	CHKERR(err, "Failed to set kernel arguments!");
	// Get the maximum work group size for executing the kernel on the device
	err = clGetKernelWorkGroupInfo(kernel_initialize_variables, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL);
	CHKERR(err, "Failed to retrieve kernel_initialize_variables work group info!");
	err = clEnqueueNDRangeKernel(commands, kernel_initialize_variables, 1, NULL, &global_size, NULL, 0, NULL, &ocdTempEvent);
	clFinish(commands);
	START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CFD Init Kernels", ocdTempTimer)
	END_TIMER(ocdTempTimer)
	CHKERR(err, "Failed to execute kernel [kernel_initialize_variables]! 1");
	err = 0;
	err = clSetKernelArg(kernel_initialize_variables, 0, sizeof(int), &nelr);
	err |= clSetKernelArg(kernel_initialize_variables, 1, sizeof(cl_mem),&fluxes);

开发者ID:CharudattaSChitale，项目名称:OpenDwarfs，代码行数:67，代码来源:cfd.cpp

示例5: finish

 int finish()              { return clFinish(commands); }

开发者ID:gitter-badger，项目名称:OpenPV，代码行数:1，代码来源:CLKernel.hpp

示例6: ops_par_loop_advec_mom_kernel_mass_flux_z


//.........这里部分代码省略.........

  buildOpenCLKernels_advec_mom_kernel_mass_flux_z(xdim0, ydim0, xdim1, ydim1);

  // set up OpenCL thread blocks
  size_t globalWorkSize[3] = {
      ((x_size - 1) / OPS_block_size_x + 1) * OPS_block_size_x,
      ((y_size - 1) / OPS_block_size_y + 1) * OPS_block_size_y,
      ((z_size - 1) / OPS_block_size_z + 1) * OPS_block_size_z};
  size_t localWorkSize[3] = {OPS_block_size_x, OPS_block_size_y,
                             OPS_block_size_z};

  // set up initial pointers
  int d_m[OPS_MAX_DIM];
#ifdef OPS_MPI
  for (int d = 0; d < dim; d++)
    d_m[d] =
        args[0].dat->d_m[d] + OPS_sub_dat_list[args[0].dat->index]->d_im[d];
#else
  for (int d = 0; d < dim; d++)
    d_m[d] = args[0].dat->d_m[d];
#endif
  int base0 = 1 * 1 * (start[0] * args[0].stencil->stride[0] -
                       args[0].dat->base[0] - d_m[0]);
  base0 = base0 +
          args[0].dat->size[0] * 1 * (start[1] * args[0].stencil->stride[1] -
                                      args[0].dat->base[1] - d_m[1]);
  base0 = base0 +
          args[0].dat->size[0] * 1 * args[0].dat->size[1] * 1 *
              (start[2] * args[0].stencil->stride[2] - args[0].dat->base[2] -
               d_m[2]);

#ifdef OPS_MPI
  for (int d = 0; d < dim; d++)
    d_m[d] =
        args[1].dat->d_m[d] + OPS_sub_dat_list[args[1].dat->index]->d_im[d];
#else
  for (int d = 0; d < dim; d++)
    d_m[d] = args[1].dat->d_m[d];
#endif
  int base1 = 1 * 1 * (start[0] * args[1].stencil->stride[0] -
                       args[1].dat->base[0] - d_m[0]);
  base1 = base1 +
          args[1].dat->size[0] * 1 * (start[1] * args[1].stencil->stride[1] -
                                      args[1].dat->base[1] - d_m[1]);
  base1 = base1 +
          args[1].dat->size[0] * 1 * args[1].dat->size[1] * 1 *
              (start[2] * args[1].stencil->stride[2] - args[1].dat->base[2] -
               d_m[2]);

  ops_H_D_exchanges_device(args, 2);
  ops_halo_exchanges(args, 2, range);
  ops_H_D_exchanges_device(args, 2);

  if (OPS_diags > 1) {
    ops_timers_core(&c2, &t2);
    OPS_kernels[134].mpi_time += t2 - t1;
  }

  if (globalWorkSize[0] > 0 && globalWorkSize[1] > 0 && globalWorkSize[2] > 0) {

    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 0, sizeof(cl_mem),
                              (void *)&arg0.data_d));
    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 1, sizeof(cl_mem),
                              (void *)&arg1.data_d));
    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 2, sizeof(cl_int),
                              (void *)&base0));
    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 3, sizeof(cl_int),
                              (void *)&base1));
    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 4, sizeof(cl_int),
                              (void *)&x_size));
    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 5, sizeof(cl_int),
                              (void *)&y_size));
    clSafeCall(clSetKernelArg(OPS_opencl_core.kernel[134], 6, sizeof(cl_int),
                              (void *)&z_size));

    // call/enque opencl kernel wrapper function
    clSafeCall(clEnqueueNDRangeKernel(
        OPS_opencl_core.command_queue, OPS_opencl_core.kernel[134], 3, NULL,
        globalWorkSize, localWorkSize, 0, NULL, NULL));
  }
  if (OPS_diags > 1) {
    clSafeCall(clFinish(OPS_opencl_core.command_queue));
  }

  if (OPS_diags > 1) {
    ops_timers_core(&c1, &t1);
    OPS_kernels[134].time += t1 - t2;
  }

  ops_set_dirtybit_device(args, 2);
  ops_set_halo_dirtybit3(&args[0], range);

  if (OPS_diags > 1) {
    // Update kernel record
    ops_timers_core(&c2, &t2);
    OPS_kernels[134].mpi_time += t2 - t1;
    OPS_kernels[134].transfer += ops_compute_transfer(dim, start, end, &arg0);
    OPS_kernels[134].transfer += ops_compute_transfer(dim, start, end, &arg1);
  }
}

开发者ID:gihanmudalige，项目名称:OPS，代码行数:101，代码来源:advec_mom_kernel_mass_flux_z_opencl_kernel.cpp

示例7: ocl_call


//.........这里部分代码省略.........
	case INTSXP:
	    ptr = INTEGER(arg);
	    al = sizeof(int);
	    break;
	case LGLSXP:
	    ptr = LOGICAL(arg);
	    al = sizeof(int);
	    break;
	case RAWSXP:
	    if (inherits(arg, "clFloat")) {
		ptr = RAW(arg);
		ndiv = al = sizeof(float);
		break;
	    }
	default:
	    Rf_error("only numeric or logical kernel arguments are supported");
	    /* no-ops but needed to make the compiler happy */
	    ptr = 0;
	    al = 0;
	}
	n = LENGTH(arg);
	if (ndiv != 1) n /= ndiv;
	if (n == 1) {/* scalar */
	    if ((last_ocl_error = clSetKernelArg(kernel, an++, al, ptr)) != CL_SUCCESS)
		Rf_error("Failed to set scalar kernel argument %d (size=%d, error code %d)", an, al, last_ocl_error);
	} else {
	    cl_mem input = clCreateBuffer(context,  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,  al * n, ptr, &last_ocl_error);
	    if (!input)
		Rf_error("Unable to create buffer (%d elements, %d bytes each) for vector argument %d (oclError %d)", n, al, an, last_ocl_error);
	    if (!occ->mem_objects)
		occ->mem_objects = arg_alloc(0, 32);
	    arg_add(occ->mem_objects, input);
#if 0 /* we used this before CL_MEM_USE_HOST_PTR */
	    if ((last_ocl_error = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, al * n, ptr, 0, NULL, NULL)) != CL_SUCCESS)
		Rf_error("Failed to transfer data (%d elements) for vector argument %d (oclError %d)", n, an, last_ocl_error);
#endif
	    if ((last_ocl_error = clSetKernelArg(kernel, an++, sizeof(cl_mem), &input)) != CL_SUCCESS)
		Rf_error("Failed to set vector kernel argument %d (size=%d, length=%d, error %d)", an, al, n, last_ocl_error);
	    /* clReleaseMemObject(input); */
	}
	args = CDR(args);
    }

    if ((last_ocl_error = clEnqueueNDRangeKernel(commands, kernel, wdim, NULL, wdims, NULL, 0, NULL, async ? &occ->event : NULL)) != CL_SUCCESS)
	ocl_err("Kernel execution");

    if (async) { /* asynchronous call -> get out and return the context */
#if USE_OCL_COMPLETE_CALLBACK
	last_ocl_error = clSetEventCallback(occ->event, CL_COMPLETE, ocl_complete_callback, occ);
#endif
	clFlush(commands); /* the specs don't guarantee execution unless clFlush is called */
	occ->ftres = ftres;
	occ->ftype = ftype;
	occ->on = on;
	Rf_setAttrib(octx, R_ClassSymbol, mkString("clCallContext"));
	UNPROTECT(1);
	return octx;
    }

    clFinish(commands);
    occ->finished = 1;

    /* we can release input memory objects now */
    if (occ->mem_objects) {
      arg_free(occ->mem_objects, (afin_t) clReleaseMemObject);
      occ->mem_objects = 0;
    }
    if (float_args) {
      arg_free(float_args, 0);
      float_args = occ->float_args = 0;
    }

    res = ftres ? Rf_allocVector(RAWSXP, on * sizeof(float)) : Rf_allocVector(REALSXP, on);
    if (ftype == FT_SINGLE) {
	if (ftres) {
	  if ((last_ocl_error = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * on, RAW(res), 0, NULL, NULL )) != CL_SUCCESS)
		Rf_error("Unable to transfer result vector (%d float elements, oclError %d)", on, last_ocl_error);
	    PROTECT(res);
	    Rf_setAttrib(res, R_ClassSymbol, mkString("clFloat"));
	    UNPROTECT(1);
	} else {
	    /* float - need a temporary buffer */
	    float *fr = (float*) malloc(sizeof(float) * on);
	    double *r = REAL(res);
	    int i;
	    if (!fr)
		Rf_error("unable to allocate memory for temporary single-precision output buffer");
	    occ->float_out = fr;
	    if ((last_ocl_error = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * on, fr, 0, NULL, NULL )) != CL_SUCCESS)
		Rf_error("Unable to transfer result vector (%d float elements, oclError %d)", on, last_ocl_error);
	    for (i = 0; i < on; i++)
		r[i] = fr[i];
	}
    } else if ((last_ocl_error = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(double) * on, REAL(res), 0, NULL, NULL )) != CL_SUCCESS)
	Rf_error("Unable to transfer result vector (%d double elements, oclError %d)", on, last_ocl_error);

    ocl_call_context_fin(octx);
    UNPROTECT(1);
    return res;
}

开发者ID:mprymek，项目名称:OpenCL，代码行数:101，代码来源:ocl.c

示例8: gws_test

//Do the proper test using different sizes.
static cl_ulong gws_test(size_t num, struct fmt_main * self) {

    cl_event myEvent;
    cl_int ret_code;
    cl_uint *tmpbuffer;
    cl_ulong startTime, endTime, runtime;
    int i, loops;

    //Prepare buffers.
    create_clobj(num, self);

    tmpbuffer = mem_alloc(sizeof(sha512_hash) * num);

    if (tmpbuffer == NULL) {
        fprintf(stderr, "Malloc failure in find_best_gws\n");
        exit(EXIT_FAILURE);
    }

    queue_prof = clCreateCommandQueue(context[ocl_gpu_id], devices[ocl_gpu_id],
            CL_QUEUE_PROFILING_ENABLE, &ret_code);
    HANDLE_CLERROR(ret_code, "Failed in clCreateCommandQueue");

    // Set salt.
    set_salt(get_salt("$6$saltstring$"));
    salt->initial = salt->rounds - get_multiple(salt->rounds, HASH_LOOPS);

    // Set keys
    for (i = 0; i < num; i++) {
        set_key("aaabaabaaa", i);
    }
    //** Get execution time **//
    HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, salt_buffer, CL_FALSE, 0,
            sizeof(sha512_salt), salt, 0, NULL, &myEvent),
            "Failed in clEnqueueWriteBuffer");

    HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
    HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
    runtime = endTime - startTime;

    //** Get execution time **//
    HANDLE_CLERROR(clEnqueueWriteBuffer(queue_prof, pass_buffer, CL_FALSE, 0,
            sizeof(sha512_password) * num, plaintext, 0, NULL, &myEvent),
            "Failed in clEnqueueWriteBuffer");

    HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
    HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
    HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
    runtime += endTime - startTime;

    //** Get execution time **//
    if (gpu(source_in_use) || use_local(source_in_use)) {
        ret_code = clEnqueueNDRangeKernel(queue_prof, prepare_kernel,
            1, NULL, &num, &local_work_size, 0, NULL, &myEvent);

        HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
        HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
        runtime += endTime - startTime;
    }

    loops = gpu(source_in_use) || use_local(source_in_use) ? (salt->rounds / HASH_LOOPS) : 1;

    //** Get execution time **//
    for (i = 0; i < loops; i++)
    {
        ret_code = clEnqueueNDRangeKernel(queue_prof, crypt_kernel,
               1, NULL, &num, &local_work_size, 0, NULL, &myEvent);

        HANDLE_CLERROR(clFinish(queue_prof), "Failed in clFinish");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_SUBMIT,
            sizeof(cl_ulong), &startTime, NULL),
            "Failed in clGetEventProfilingInfo I");
        HANDLE_CLERROR(clGetEventProfilingInfo(myEvent, CL_PROFILING_COMMAND_END,
            sizeof(cl_ulong), &endTime, NULL),
            "Failed in clGetEventProfilingInfo II");
        HANDLE_CLERROR(clReleaseEvent(myEvent), "Failed in clReleaseEvent");
        runtime += endTime - startTime;
    }

    //** Get execution time **//
    HANDLE_CLERROR(clEnqueueReadBuffer(queue_prof, hash_buffer, CL_FALSE, 0,
            sizeof(sha512_hash) * num, tmpbuffer, 0, NULL, &myEvent),
            "Failed in clEnqueueReadBuffer");
//.........这里部分代码省略.........

开发者ID:bhargavz，项目名称:pac4mac，代码行数:101，代码来源:opencl_cryptsha512_fmt.c

示例9: opencl_scanhash

static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
				int64_t __maybe_unused max_nonce)
{
	const int thr_id = thr->id;
	struct opencl_thread_data *thrdata = thr->cgpu_data;
	struct cgpu_info *gpu = thr->cgpu;
	_clState *clState = clStates[thr_id];
	const cl_kernel *kernel = &clState->kernel;
	const int dynamic_us = opt_dynamic_interval * 1000;
	cl_bool blocking;

	cl_int status;
	size_t globalThreads[1];
	size_t localThreads[1] = { clState->wsize };
	unsigned int threads;
	int64_t hashes;

	if (gpu->dynamic)
		blocking = CL_TRUE;
	else
		blocking = CL_FALSE;

	/* This finish flushes the readbuffer set with CL_FALSE later */
	if (!blocking)
		clFinish(clState->commandQueue);

	if (gpu->dynamic) {
		struct timeval diff;
		suseconds_t gpu_us;

		gettimeofday(&gpu->tv_gpuend, NULL);
		timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff);
		gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
		if (likely(gpu_us >= 0)) {
			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;

			/* Try to not let the GPU be out for longer than 
			 * opt_dynamic_interval in ms, but increase
			 * intensity when the system is idle in dynamic mode */
			if (gpu->gpu_us_average > dynamic_us) {
				if (gpu->intensity > MIN_INTENSITY)
					--gpu->intensity;
			} else if (gpu->gpu_us_average < dynamic_us / 2) {
				if (gpu->intensity < MAX_INTENSITY)
					++gpu->intensity;
			}
		}
	}
	set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
			   localThreads[0], gpu->intensity);
	if (hashes > gpu->max_hashes)
		gpu->max_hashes = hashes;

	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
	if (unlikely(status != CL_SUCCESS)) {
		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
		return -1;
	}

	/* MAXBUFFERS entry is used as a flag to say nonces exist */
	if (thrdata->res[FOUND]) {
		/* Clear the buffer again */
		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
				BUFFERSIZE, blank_res, 0, NULL, NULL);
		if (unlikely(status != CL_SUCCESS)) {
			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
			return -1;
		}
		if (unlikely(thrdata->last_work)) {
			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
			thrdata->last_work = NULL;
		} else {
			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
			postcalc_hash_async(thr, work, thrdata->res);
		}
		memset(thrdata->res, 0, BUFFERSIZE);
		if (!blocking)
			clFinish(clState->commandQueue);
	}

	gettimeofday(&gpu->tv_gpustart, NULL);

	if (clState->goffset) {
		size_t global_work_offset[1];

		global_work_offset[0] = work->blk.nonce;
		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
						globalThreads, localThreads, 0,  NULL, NULL);
	} else
		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
						globalThreads, localThreads, 0,  NULL, NULL);
	if (unlikely(status != CL_SUCCESS)) {
		applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
		return -1;
	}

	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
	if (unlikely(status != CL_SUCCESS)) {
//.........这里部分代码省略.........

开发者ID:nushor，项目名称:cgminer，代码行数:101，代码来源:driver-opencl.c

示例10: runKernel

void runKernel(void)
{

	cl_int err;
	cl_event event;
	size_t global_item_size_max = 16;
	size_t global_item_size = cl_width * cl_height;
	size_t global_item_size2[] = {cl_width, cl_height};
	char *output;
	int i;
	/* local item size :
	 * the number of work item
	 * in a work group in each diamension
	 *
	 * the only constraint for the global_work_size is
	 * that it must be a multiple of the local_work_size (for each dimension).
	 */
	size_t local_item_size = 64;
	size_t local_item_size2[] = {64, 8};
	//this will update our system by calculating new velocity and updating the positions of our particles
	//Make sure OpenGL is done using our VBOs
	glFinish();
	// map OpenGL buffer object for writing from OpenCL
	//this passes in the vector of VBO buffer objects (position and color)
	err = clEnqueueAcquireGLObjects(command_queue, 2, cl_pbos, 0, NULL, NULL);
	checkError("acquireGLObjects", err);
	clFinish(command_queue);

	//execute the kernel
	//err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
	//		&global_item_size,
	//		//&local_item_size,
	//		NULL,
	//		0, NULL, &event);

	err = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL,
	                             global_item_size2,
	                             local_item_size2,
	                             //NULL,
	                             0, NULL, &event);

	checkError("clEnqueueNDRangeKernel", err);
	err = clEnqueueNDRangeKernel(command_queue, kernel_max, 1, NULL,
	                             &global_item_size_max,
	                             //&local_item_size,
	                             NULL,
	                             0, NULL, &event);
	checkError("clEnqueueNDRangeKernel max", err);


	///* Transfer result to host */
	output = malloc(4 * 4);
	err = clEnqueueReadBuffer(command_queue, mobj, CL_TRUE, 0, 4 * 4 * sizeof(char), output, 0, NULL, NULL);
	checkError("clEnqueueReadBuffer", err);

	free(output);
	//clFinish(command_queue);


	//Release the VBOs so OpenGL can play with them
	clEnqueueReleaseGLObjects(command_queue, 2, cl_pbos, 0, NULL, NULL);
	checkError("releaseGLObjects", err);
	clFlush(command_queue);
	clFinish(command_queue);
}

开发者ID:meyr，项目名称:clglPlayer，代码行数:65，代码来源:algorithm.c

示例11: main


//.........这里部分代码省略.........

    // Create the input and output arrays in device memory for our calculation
    //
    input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * count, NULL, NULL);
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
    if (!input || !output)
    {
        printf("Error: Failed to allocate device memory!\n");
        exit(1);
    }
    timer t = createTimer();
    for(int i =0;i<rep;i++){
        initData(data);
        // Write our data set into the input array in device memory 
        //
        err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to write to source array!\n");
            exit(1);
        }
     
        // Set the arguments to our compute kernel
        //
        err = 0;
        err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
        err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
        err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to set kernel arguments! %d\n", err);
            exit(1);
        }
     
        // Get the maximum work group size for executing the kernel on the device
        //
        err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to retrieve kernel work group info! %d\n", err);
            exit(1);
        }
     
        // Execute the kernel over the entire range of our 1d input data set
        // using the maximum number of work group items for this device
        //
        
        global = count;
        err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
        if (err)
        {
            printf("Error: Failed to execute kernel!\n");
            return EXIT_FAILURE;
        }
     
        // Wait for the command commands to get serviced before reading back results
        //
        clFinish(commands);
     
        // Read back the results from the device to verify the output
        //
        err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );  
        if (err != CL_SUCCESS)
        {
            printf("Error: Failed to read output array! %d\n", err);
            exit(1);
        }
    }
    double timeEnd = getTime(t);
    
    // Validate our results
    //
    correct = 0;
    for(int i = 0; i < arraySize; i++)
    {
        if(results[i] >= 0){
            correct++;
            if(i==0){
               printf("%d",results[i]);
            }else{
               printf(",%d",results[i]);
            }
        }
    }
    printf("\n");
    

    // Print a brief summary detailing the results
    printf("Computed '%d/%d' values to 1!\n", correct, arraySize);
    printf("TIME- %f\n",timeEnd);
    
    // Shutdown and cleanup
    clReleaseMemObject(input);
    clReleaseMemObject(output);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);
	return 0;
}

开发者ID:TrevorDev，项目名称:CollatzConjecture-openCL，代码行数:101，代码来源:openCL-allToOne.c

示例12: Extrae_OpenCL_clCreateCommandQueue

void Extrae_OpenCL_clCreateCommandQueue (cl_command_queue queue,
	cl_device_id device, cl_command_queue_properties properties)
{
	if (!Extrae_OpenCL_lookForOpenCLQueue (queue, NULL))
	{
		cl_int err;
		char _threadname[THREAD_INFO_NAME_LEN];
		char _hostname[HOST_NAME_MAX];
		char *_device_type;
		int prev_threadid, found, idx;
		cl_device_type device_type;
		cl_event event;

		idx = nCommandQueues;
		CommandQueues = (RegisteredCommandQueue_t*) realloc (
			CommandQueues,
			sizeof(RegisteredCommandQueue_t)*(nCommandQueues+1));
		if (CommandQueues == NULL)
		{
			fprintf (stderr, PACKAGE_NAME": Fatal error! Failed to allocate memory for OpenCL Command Queues\n");
			exit (-1);
		}

		CommandQueues[idx].queue = queue;
		CommandQueues[idx].isOutOfOrder =
			(properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;

		err = clGetDeviceInfo (device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
		if (err == CL_SUCCESS)
		{
			if (device_type  == CL_DEVICE_TYPE_GPU)
				_device_type = "GPU";
			else if (device_type == CL_DEVICE_TYPE_CPU)
				_device_type = "CPU";
			else
				_device_type = "Other";
		}
		else
			_device_type = "Unknown";

		/* Was the thread created before (i.e. did we executed a cudadevicereset?) */
		if (gethostname(_hostname, HOST_NAME_MAX) == 0)
			sprintf (_threadname, "OpenCL-%s-CQ%d-%s", _device_type, 1+idx,
			  _hostname);
		else
			sprintf (_threadname, "OpenCL-%s-CQ%d-%s", _device_type, 1+idx,
			  "unknown-host");

		prev_threadid = Extrae_search_thread_name (_threadname, &found);

		if (found)
		{
			/* If thread name existed, reuse its thread id */
			CommandQueues[idx].threadid = prev_threadid;
		}
		else
		{
			/* For timing purposes we change num of threads here instead of doing Backend_getNumberOfThreads() + CUDAdevices*/
			Backend_ChangeNumberOfThreads (Backend_getNumberOfThreads() + 1);
			CommandQueues[idx].threadid = Backend_getNumberOfThreads()-1;

			/* Set thread name */
			Extrae_set_thread_name (CommandQueues[idx].threadid, _threadname);
		}

		CommandQueues[idx].nevents = 0;

#ifdef CL_VERSION_1_2
		err = clEnqueueBarrierWithWaitList (queue, 0, NULL, &event);
#else
		err = clEnqueueBarrier (queue);
		if (err == CL_SUCCESS)
			err = clEnqueueMarker (queue, &event);
#endif
		CommandQueues[idx].host_reference_time = TIME;

		if (err == CL_SUCCESS)
		{
			err = clFinish(queue);
			if (err != CL_SUCCESS)
			{
				fprintf (stderr, PACKAGE_NAME": Error in clFinish (error = %d)! Dying...\n", err);
				exit (-1);
			}

			err = clGetEventProfilingInfo (event, CL_PROFILING_COMMAND_SUBMIT,
				sizeof(cl_ulong), &(CommandQueues[idx].device_reference_time),
				NULL);
			if (err != CL_SUCCESS)
			{
				fprintf (stderr, PACKAGE_NAME": Error in clGetEventProfilingInfo (error = %d)! Dying...\n", err);
				exit (-1);
			}
		}
		else
		{
			fprintf (stderr, PACKAGE_NAME": Error while looking for clock references in host & accelerator\n");
			exit (-1);
		}

//.........这里部分代码省略.........

开发者ID:polca-project，项目名称:polca-toolbox，代码行数:101，代码来源:opencl_common.c

示例13: main


//.........这里部分代码省略.........
        exit(1);
    }

    // Get the preferred workgroup size multiple
    //
    err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to retrieve kernel work group info! %d\n", err);
        exit(1);
    }

    // Get the preferred workgroup size
	//
	err = clGetKernelWorkGroupInfo(kernel, device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to retrieve kernel work group info! %d\n", err);
		exit(1);
	}

    // Create the output array in device memory for our calculation
    //
    output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, NULL);
    if (!input || !output)
    {
        printf("Error: Failed to allocate device memory!\n");
        exit(1);
    }

    // Write our data set into the input array in device memory
    //
    err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to write to source array!\n");
        exit(1);
    }

    // Set the arguments to our compute kernel
    //
    err = 0;
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
    err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to set kernel arguments! %d\n", err);
        exit(1);
    }

    // Execute the kernel over the entire range of our 1d input data set
    // using the maximum number of work group items for this device
    //
//    global = count;
    global = local;
    err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
    if (err)
    {
        printf("Error: Failed to execute kernel!\n");
        return EXIT_FAILURE;
    }

    // Wait for the command commands to get serviced before reading back results
    //
    clFinish(commands);

    // Read back the results from the device to verify the output
    //
    err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL );
    if (err != CL_SUCCESS)
    {
        printf("Error: Failed to read output array! %d\n", err);
        exit(1);
    }

    // Validate our results
    //
    correct = 0;
    for(i = 0; i < count; i++)
    {
        if((results[i]) == data[i] * data[i])
            correct++;
    }

    // Print a brief summary detailing the results
    //
    printf("Computed '%d/%d' correct values!\n", correct, count);

    // Shutdown and cleanup
    //
    clReleaseMemObject(input);
    clReleaseMemObject(output);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    return 0;
}

开发者ID:InternetofAwesome，项目名称:opencl_demo，代码行数:101，代码来源:main.c

示例14: main

int main()
{
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_platform_id platform_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
 
char string[MEM_SIZE];
 
FILE *fp;
char fileName[] = "./hello.cl";
char *source_str;
size_t source_size;
 
/* Load the source code containing the kernel*/
fp = fopen(fileName, "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
 
/* Get Platform and Device Info */
ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
 
/* Create OpenCL context */
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
 
/* Create Command Queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
 
/* Create Memory Buffer */
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE,MEM_SIZE * sizeof(char), NULL, &ret);
 
/* Create Kernel Program from the source */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
(const size_t *)&source_size, &ret);
 
/* Build Kernel Program */
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
/* Create OpenCL Kernel */
kernel = clCreateKernel(program, "hello", &ret);
 
/* Set OpenCL Kernel Parameters */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
 
/* Execute OpenCL Kernel */
ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);
 
/* Copy results from the memory buffer */
ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0,
MEM_SIZE * sizeof(char),string, 0, NULL, NULL);
 
/* Display Result */
puts(string);
 
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
 
free(source_str);

system("Pause");
return 0;
}

开发者ID:daves14，项目名称:workspace，代码行数:80，代码来源:main.cpp

示例15: sizeof

void btParticlesDynamicsWorld::runIntegrateMotionKernel()
{
    cl_int ciErrNum;
	if(m_useCpuControls[SIMSTAGE_INTEGRATE_MOTION]->m_active)
	{
		// CPU version
#if 1
		// read from GPU
		unsigned int memSize = sizeof(btVector3) * m_numParticles;
		ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NU