C++ clCreateKernel函数代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中clCreateKernel函数的典型用法代码示例。如果您正苦于以下问题：C++ clCreateKernel函数的具体用法？C++ clCreateKernel怎么用？C++ clCreateKernel使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了clCreateKernel函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: CreateProgramAndKernel

    // Function to read in kernel from uncompiled source, create the OCL program and build the OCL program 
    // **************************************************************************************************
    int CreateProgramAndKernel(cl_context cxGPUContext, cl_device_id* cdDevices, const char *kernel_name, cl_kernel *kernel, bool bDouble)
    {
        cl_program cpProgram;
        size_t szSourceLen;
        cl_int ciErrNum = CL_SUCCESS; 

        // Read the kernel in from file
        shrLog("\nLoading Uncompiled kernel from .cl file, using %s\n", clSourcefile);
        char* cPathAndFile = shrFindFilePath(clSourcefile, cExecutablePath);
        oclCheckError(cPathAndFile != NULL, shrTRUE);
        char* pcSource = oclLoadProgSource(cPathAndFile, "", &szSourceLen);
        oclCheckError(pcSource != NULL, shrTRUE);

	// Check OpenCL version -> vec3 types are supported only from version 1.1 and above
	char cOCLVersion[32];
	clGetDeviceInfo(cdDevices[0], CL_DEVICE_VERSION, sizeof(cOCLVersion), &cOCLVersion, 0);

	int iVec3Length = 3;
	if( strncmp("OpenCL 1.0", cOCLVersion, 10) == 0 ) {
		iVec3Length = 4;
	}


		//for double precision
		char *pcSourceForDouble;
		std::stringstream header;
		if (bDouble)
		{
			header << "#define REAL double";
			header << std::endl;
			header << "#define REAL4 double4";
			header << std::endl;
			header << "#define REAL3 double" << iVec3Length;
			header << std::endl;
			header << "#define ZERO3 {0.0, 0.0, 0.0" << ((iVec3Length == 4) ? ", 0.0}" : "}");
			header << std::endl;
		}
		else
		{
			header << "#define REAL float";
			header << std::endl;
			header << "#define REAL4 float4";
			header << std::endl;
			header << "#define REAL3 float" << iVec3Length;
			header << std::endl;
			header << "#define ZERO3 {0.0f, 0.0f, 0.0f" << ((iVec3Length == 4) ? ", 0.0f}" : "}");
			header << std::endl;
		}
		
		header << pcSource;
		pcSourceForDouble = (char *)malloc(header.str().size() + 1);
		szSourceLen = header.str().size();
#ifdef WIN32
        strcpy_s(pcSourceForDouble, szSourceLen + 1, header.str().c_str());
#else
        strcpy(pcSourceForDouble, header.str().c_str());
#endif

        // create the program 
        cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&pcSourceForDouble, &szSourceLen, &ciErrNum);
        oclCheckError(ciErrNum, CL_SUCCESS);
        shrLog("clCreateProgramWithSource\n"); 

        // Build the program with 'mad' Optimization option
#ifdef MAC
	char *flags = "-cl-fast-relaxed-math -DMAC";
#else
	char *flags = "-cl-fast-relaxed-math";
#endif
        ciErrNum = clBuildProgram(cpProgram, 0, NULL, flags, NULL, NULL);
        if (ciErrNum != CL_SUCCESS)
        {
            // write out standard error, Build Log and PTX, then cleanup and exit
            shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR);
            oclLogBuildInfo(cpProgram, oclGetFirstDev(cxGPUContext));
            oclLogPtx(cpProgram, oclGetFirstDev(cxGPUContext), "oclNbody.ptx");
            oclCheckError(ciErrNum, CL_SUCCESS); 
        }
        shrLog("clBuildProgram\n"); 

        // create the kernel
        *kernel = clCreateKernel(cpProgram, kernel_name, &ciErrNum);
        oclCheckError(ciErrNum, CL_SUCCESS); 
        shrLog("clCreateKernel\n"); 

		size_t wgSize;
		ciErrNum = clGetKernelWorkGroupInfo(*kernel, cdDevices[0], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
		if (wgSize == 64) {
		  shrLog(
			 "ERROR: Minimum work-group size 256 required by this application is not supported on this device.\n");
		  exit(0);
		}
	
		free(pcSourceForDouble);

        return 0;
    }

开发者ID:AswinMohanASU，项目名称:nvidia-opencl-examples，代码行数:99，代码来源:oclBodySystemOpenclLaunch.cpp

示例2: calloc


//.........这里部分代码省略.........
		memcpy(&length, w + 289, 4);
		w = binaries[gpu]; remaining = binary_sizes[gpu];
		if (!advance(&w, &remaining, "ELF"))
			{patchbfi = 0; goto build;}
		w++; remaining--;
		if (!advance(&w, &remaining, "ELF")) {
			/* 32 bit builds only one ELF */
			w--; remaining++;
		}
		w--; remaining++;
		w += start; remaining -= start;
		if (opt_debug)
			applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
				w, remaining);
		patch_opcodes(w, length);

		status = clReleaseProgram(clState->program);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error: Releasing program. (clReleaseProgram)");
			return NULL;
		}

		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
			return NULL;
		}

		clRetainProgram(clState->program);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error: Retaining Program (clRetainProgram)");
			return NULL;
		}
	}

	free(source);
	free(rawsource);

	/* Save the binary to be loaded next time */
	binaryfile = fopen(binaryfilename, "wb");
	if (!binaryfile) {
		/* Not a fatal problem, just means we build it again next time */
		if (opt_debug)
			applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
	} else {
		if (unlikely(fwrite(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu])) {
			applog(LOG_ERR, "Unable to fwrite to binaryfile");
			return NULL;
		}
		fclose(binaryfile);
	}
	if (binaries[gpu])
		free(binaries[gpu]);
built:
	free(binaries);
	free(binary_sizes);

	applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
	       filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);

	/* create a cl program executable for all the devices specified */
	status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error: Building Program (clBuildProgram)");
		size_t logSize;
		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);

		char *log = malloc(logSize);
		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
		applog(LOG_INFO, "%s", log);
		return NULL;
	}

	/* get a kernel object handle for a kernel with the given name */
	clState->kernel = clCreateKernel(clState->program, "search", &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error: Creating Kernel from program. (clCreateKernel)");
		return NULL;
	}

	/////////////////////////////////////////////////////////////////
	// Create an OpenCL command queue
	/////////////////////////////////////////////////////////////////
	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
	if (status != CL_SUCCESS) /* Try again without OOE enable */
		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Creating Command Queue. (clCreateCommandQueue)");
		return NULL;
	}

	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, BUFFERSIZE, NULL, &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error: clCreateBuffer (outputBuffer)");
		return NULL;
	}

	return clState;
}

开发者ID:furyan，项目名称:cgminer，代码行数:101，代码来源:ocl.c

示例3: main


//.........这里部分代码省略.........

        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateProgramWithSource' failed\n");
                exit(1);
        }
        printf("program=%p\n", program);

        /* Build program */
        ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
        if (ret != CL_SUCCESS )
        {
                size_t size;
                char *log;

                /* Get log size */
                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,0, NULL, &size);

                /* Allocate log and print */
                log = malloc(size);
                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,size, log, NULL);
                printf("error: call to 'clBuildProgram' failed:\n%s\n", log);
                
                /* Free log and exit */
                free(log);
                exit(1);
        }

        printf("program built\n");
        printf("\n");
        
        /* Create a Kernel Object */
        cl_kernel kernel;
        kernel = clCreateKernel(program, "logb_float", &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateKernel' failed\n");
                exit(1);
        }
        
        /* Create and allocate host buffers */
        size_t num_elem = 10;
        
        /* Create and init host side src buffer 0 */
        cl_float *src_0_host_buffer;
        src_0_host_buffer = malloc(num_elem * sizeof(cl_float));
        for (int i = 0; i < num_elem; i++)
                src_0_host_buffer[i] = (cl_float)(2.0);
        
        /* Create and init device side src buffer 0 */
        cl_mem src_0_device_buffer;
        src_0_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_float), NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: could not create source buffer\n");
                exit(1);
        }        
        ret = clEnqueueWriteBuffer(command_queue, src_0_device_buffer, CL_TRUE, 0, num_elem * sizeof(cl_float), src_0_host_buffer, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueWriteBuffer' failed\n");
                exit(1);
        }

        /* Create host dst buffer */
        cl_float *dst_host_buffer;

开发者ID:xianggong，项目名称:m2c-llvm-devtools-host，代码行数:67，代码来源:logb_float_src.c

示例4: clCreateFromGLTexture3D

void OpenCLExecuter::ocl_filter_shared(void)
{
	cl_int err;							// debugging variables
	size_t szParmDataBytes;				// Byte size of context information        
	cl_mem src_buffer;					// OpenCL device source buffer
	cl_mem dst_buffer;					// OpenCL device source buffer
	cl_sampler sampler;					// OpenCL sampler
	cl_kernel ckKernel;					// OpenCL kernel

	int iNumElements = volobj->texwidth*volobj->texheight*volobj->texdepth; // Length of float arrays to process

	// set Local work size dimensions
//	size_t local_threads[3] ={256,256,64};
	// set Global work size dimensions
//	size_t global_threads[3] ={roundup((int) volobj->texwidth/local_threads[0], 0)*local_threads[0], roundup((int) volobj->texheight/local_threads[1], 0)*local_threads[1], roundup((int) volobj->texdepth/local_threads[2], 0)*local_threads[2]};

	// set Global work size dimensions
	size_t global_threads[3] ={volobj->texwidth, volobj->texheight, volobj->texdepth};

	// allocate the source buffer memory object
	src_buffer = clCreateFromGLTexture3D (ocl_wrapper->context, CL_MEM_READ_WRITE, GL_TEXTURE_3D, 0, volobj->TEXTURE3D_RED, &err);
	printf("OPENCL: clCreateFromGLTexture3D: %s\n", ocl_wrapper->get_error(err));

	// allocate the destination buffer memory object
	dst_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE,  sizeof(unsigned char) * iNumElements, NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));

	// create a sampler object
	sampler = clCreateSampler(ocl_wrapper->context, CL_FALSE, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST, &err);
	printf("OPENCL: clCreateSampler: %s\n", ocl_wrapper->get_error(err));
 
    // Create the kernel
	ckKernel = clCreateKernel (cpProgram, "myFunc", &err);
	printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
  
	// Set the Argument values
	err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 1, sizeof(cl_mem), (void*)&dst_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 2, sizeof(sampler), (void*)&sampler);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));

	size_t local;
	err = clGetKernelWorkGroupInfo(ckKernel, ocl_wrapper->devices[ocl_wrapper->deviceUsed], CL_KERNEL_LOCAL_MEM_SIZE , sizeof(local), &local, NULL);
	printf("OPENCL: clGetKernelWorkGroupInfo (kernel memory): %s\n", ocl_wrapper->get_error(err));
	printf("OPENCL: Kernel local memory use: %d Bytes\n", (int)local);

	// grab input data from OpenGL, compute, copy the results back to OpenGL
	// Runs asynchronous to host, up until blocking clFinish at the end

	glFinish();
	glFlush();
	
	// grab the OpenGL texture object for read/writing from OpenCL
	err = clEnqueueAcquireGLObjects(ocl_wrapper->commandQue, 1, &src_buffer, 0,NULL,NULL);
	printf("OPENCL: clEnqueueAcquireGLObjects: %s\n", ocl_wrapper->get_error(err));

	// Execute a kernel
	err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 3, NULL, global_threads, NULL, 0, NULL, NULL);
	printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));

	/*
	// Blocking read of results from GPU to Host
	int size = volobj->texwidth*volobj->texheight*volobj->texdepth;
	unsigned char* result = new unsigned char[size];
	err = clEnqueueReadBuffer (ocl_wrapper->commandQue, dst_buffer, CL_TRUE, 0, sizeof(unsigned char) * iNumElements, result, 0, NULL, NULL);
	printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
	for(int i=0; i<size; i++) volobj->texture3d[3*i+0] = result[i];
	delete[] result;
	*/

	// copy OpenCL buffer to OpenGl texture
	size_t corigin[3] = {0,0,0};
	size_t cdimensions[3] = {(unsigned int)volobj->texwidth, (unsigned int)volobj->texheight, (unsigned int)volobj->texdepth};
	err = clEnqueueCopyBufferToImage(ocl_wrapper->commandQue , dst_buffer, src_buffer, 0, corigin, cdimensions, 0, NULL, NULL);
	printf("OPENCL: clEnqueueCopyBufferToImage: %s\n", ocl_wrapper->get_error(err));

	//make sure we block until we are done.
	//err = clFinish(ocl_wrapper->commandQue);
	//printf("OPENCL: clFinish: %s\n", ocl_wrapper->get_error(err));
	
	//release opengl objects now
	err = clEnqueueReleaseGLObjects(ocl_wrapper->commandQue, 1, &src_buffer, 0,0,0);
	printf("OPENCL: clEnqueueAcquireGLObjects: %s\n", ocl_wrapper->get_error(err));

	// Cleanup allocated objects
	printf("OPENCL: Releasing kernel memory\n");
    if(ckKernel)clReleaseKernel(ckKernel); 
   
    //need to release any other OpenCL memory objects here
    if(src_buffer)clReleaseMemObject(src_buffer);
    if(dst_buffer)clReleaseMemObject(dst_buffer);
}

开发者ID:ut666，项目名称:VolViewer，代码行数:94，代码来源:OpenCLExecuter.cpp

示例5: roundup

void OpenCLExecuter::ocl_parrallelReduction(void)
{
	cl_int err;							// debugging variables
	size_t szParmDataBytes;				// Byte size of context information        
	cl_mem src_buffer;					// OpenCL device source buffer
	cl_mem tmp_buffer;					// OpenCL device source buffer
	cl_mem dst_buffer;					// OpenCL device source buffer
	size_t szGlobalWorkSize;			// 1D var for Total # of work items
	size_t szLocalWorkSize;				// 1D var for # of work items in the work group
	size_t numWorkGroups;
	cl_kernel ckKernel;					// OpenCL kernel

	int iNumElements = 65536; //65536 // Length of float arrays to process

	// set Local work size dimensions
	szLocalWorkSize = 512;
	// set Global work size dimensions
	szGlobalWorkSize = roundup((int) iNumElements/szLocalWorkSize, 0)*szLocalWorkSize;  
	//szGlobalWorkSize = iNumElements;
	numWorkGroups = (float)szGlobalWorkSize/(float)szLocalWorkSize;
	printf("OPENCL: number of elements: %d\n", (int)iNumElements);
	printf("OPENCL: local worksize: %d\n", (int)szLocalWorkSize);
	printf("OPENCL: global worksize: %d\n", (int)szGlobalWorkSize);
	printf("OPENCL: work groups: %d\n", (int)(numWorkGroups));
	
	//temp array
	int* data = new int[iNumElements];

	for(int i=0; i<iNumElements; i++)
		data[i] = randomFloat(1.0, (float)iNumElements);

	data[iNumElements/2] = -100.0;

	//for(int i=0; i<iNumElements; i++)
	//	printf("data: %d\n", data[i]);

	size_t global_threads[1] ={iNumElements};

	// allocate the source buffer memory object
	src_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_ONLY,  sizeof(int) * iNumElements, NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
		
	// allocate the temp buffer memory object
	tmp_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE,  sizeof(int) * iNumElements, NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
	
	// allocate the destination buffer memory object
	dst_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_WRITE_ONLY,  sizeof(int) * iNumElements, NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));

    // Create the kernel
	ckKernel = clCreateKernel (cpProgram, "min_reduce", &err);
	printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
  
	// Set the Argument values
	err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 1, sizeof(int)*szLocalWorkSize, NULL);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 2, sizeof(int), (void*)&iNumElements);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 3, sizeof(cl_mem), (void*)&dst_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));

	// Copy input data to GPU, compute, copy results back
	// Runs asynchronous to host, up until blocking read at end
	
	int numb_iterations = sqrt((float)numWorkGroups);
	numb_iterations=0;
	bool cont = true;

	Timer timer;
	timer.startTimer();
	//for(int i=0; i<numb_iterations; i++)
	while(cont)
	{
		// Write data from host to GPU
		err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, src_buffer, CL_FALSE, 0, sizeof(int) * iNumElements, data, 0, NULL, NULL);
		printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
	
		// Launch kernel 
		err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL);
		printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));

		// Blocking read of results from GPU to Host
		err = clEnqueueReadBuffer (ocl_wrapper->commandQue, dst_buffer, CL_TRUE, 0, sizeof(int) * iNumElements, data, 0, NULL,  NULL);
		printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
		
		numb_iterations++;
		if(data[1]==0) cont = false;

		//printf("min: %d\n", data[0]);
		for(int i=0; i<numWorkGroups; i++)
			printf("min: %d\n", data[i]);
	}
	timer.endTimer("GPU find min");

	timer.startTimer();
	int min=iNumElements;
	for(int i=0; i<iNumElements; i++)
//.........这里部分代码省略.........

开发者ID:ut666，项目名称:VolViewer，代码行数:101，代码来源:OpenCLExecuter.cpp

示例6: printf

void OpenCLExecuter::ocl_filter_multi(void)
{
	cl_int err;										// debugging variables
	size_t szParmDataBytes;							// Byte size of context information        

	cl_mem src_buffer[MAX_DEVICES];					// OpenCL device source buffer
	cl_mem dst_buffer[MAX_DEVICES];					// OpenCL device source buffer
	cl_command_queue queues[MAX_DEVICES];			// OpenCL device queue
	cl_kernel ckKernel[MAX_DEVICES];				// OpenCL kernel

	cl_event gpuDone[MAX_DEVICES];

//	int iNumElements = volobj->texwidth*volobj->texheight*volobj->texdepth*3; // Length of float arrays to process

	int xdim, ydim, zdim;
	xdim = (float)volobj->texwidth; // (float)ocl_wrapper->numDevices;
	ydim = (float)volobj->texheight; // (float)ocl_wrapper->numDevices;
	zdim = (float)volobj->texdepth / (float)ocl_wrapper->numDevices;

	//Length of array to process
	int iNumElements = (xdim*ydim*zdim);
	size_t global_threads[3] = {xdim, ydim, zdim};
	
	//temp array
	unsigned char** data = new unsigned char*[ocl_wrapper->numDevices];

	for(int i=0; i<ocl_wrapper->numDevices; i++)
		data[i] = new unsigned char[iNumElements];

	for(int i=0; i<ocl_wrapper->numDevices; i++)
	{
		printf("OPENCL: Computing Device%d\n", i);

		//create the command queue we will use to execute OpenCL commands
		queues[i] = clCreateCommandQueue(ocl_wrapper->context, ocl_wrapper->devices[i], 0, &err);
		printf("OPENCL: clCreateCommandQueue: %s\n", ocl_wrapper->get_error(err));
		
		// allocate the source buffer memory object
		src_buffer[i] = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_ONLY,  sizeof(unsigned char) * iNumElements, NULL, &err);
		printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
		
		// allocate the destination buffer memory object
		dst_buffer[i] = clCreateBuffer (ocl_wrapper->context, CL_MEM_WRITE_ONLY,  sizeof(unsigned char) * iNumElements, NULL, &err);
		printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));

		// Create the kernel
		ckKernel[i] = clCreateKernel (cpProgram, "myFunc", &err);
		printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
  
		// Set the Argument values
		err = clSetKernelArg (ckKernel[i], 0, sizeof(cl_mem), (void*)&src_buffer[i]);
		printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
		err = clSetKernelArg (ckKernel[i], 1, sizeof(cl_mem), (void*)&dst_buffer[i]);
		printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
		err = clSetKernelArg (ckKernel[i], 2, sizeof(int), (void*)&global_threads[0]);
		printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
		err = clSetKernelArg (ckKernel[i], 3, sizeof(int), (void*)&global_threads[1]);
		printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
		err = clSetKernelArg (ckKernel[i], 4, sizeof(int), (void*)&global_threads[2]);
		printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	
		//Prepare data to upload
		int iOffsetElements = (xdim*ydim*zdim*i);
		for(int j=iOffsetElements; j<iNumElements+iOffsetElements; j++)
			data[i][j-iOffsetElements] = volobj->texture3d[3*j+0];

		// Write data from host to GPU
		err = clEnqueueWriteBuffer (queues[i], src_buffer[i], CL_FALSE, 0, sizeof(unsigned char) * iNumElements, data[i], 0, NULL, NULL);
		printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
	}

	for(int i=0; i<ocl_wrapper->numDevices; i++)
	{
		// Launch kernel 
		err = clEnqueueNDRangeKernel (queues[i], ckKernel[i], 3, NULL, global_threads, NULL, 0, NULL, NULL);
		printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));
	}

	for(int i=0; i<ocl_wrapper->numDevices; i++)
	{
		// Blocking read of results from GPU to Host
		err = clEnqueueReadBuffer (queues[i], dst_buffer[i], CL_TRUE, 0, sizeof(unsigned char) * iNumElements, data[i], 0, NULL,  &gpuDone[i]);
		printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));
	}

	// Synchronize with the GPUs
    printf("OPENCL: Waiting for devices to sync\n");
	clWaitForEvents(ocl_wrapper->numDevices, gpuDone);

	for(int i=0; i<ocl_wrapper->numDevices; i++)
	{
		//read data back
		int iOffsetElements = (xdim*ydim*zdim*i);
		for(int j=iOffsetElements; j<iNumElements+iOffsetElements; j++)
			volobj->texture3d[3*j+0] = data[i][j-iOffsetElements];
	}

	for(int i=0; i<ocl_wrapper->numDevices; i++)
	{
		// Cleanup allocated objects
//.........这里部分代码省略.........

开发者ID:ut666，项目名称:VolViewer，代码行数:101，代码来源:OpenCLExecuter.cpp

示例7: clCreateBuffer

void OpenCLExecuter::ocl_filterBoundingBox(int channel, int window_size)
{
	cl_int err;							// debugging variables
	size_t szParmDataBytes;				// Byte size of context information        
	cl_mem src_buffer;					// OpenCL device source buffer
	cl_mem bbmin_buffer;				// OpenCL device source buffer
	cl_mem bbmax_buffer;				// OpenCL device source buffer
	size_t szGlobalWorkSize;			// 1D var for Total # of work items
	size_t szLocalWorkSize;				// 1D var for # of work items in the work group
	cl_kernel ckKernel;					// OpenCL kernel

	cl_int4 minbb;
	cl_int4 maxbb;

	minbb.s[0] = minbb.s[1] = minbb.s[2] = 8192;
	maxbb.s[0] = maxbb.s[1] = maxbb.s[2] = -8192;

	int iNumElements = 3*volobj->texwidth*volobj->texheight*volobj->texdepth; // Length of float arrays to process

	size_t global_threads[3] ={volobj->texwidth, volobj->texheight, volobj->texdepth};

	// allocate the source buffer memory object
	src_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_ONLY,  sizeof(unsigned char) * iNumElements, NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));
		
	// allocate the destination buffer memory object
	bbmin_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE,  sizeof(cl_int4), NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));

 	bbmax_buffer = clCreateBuffer (ocl_wrapper->context, CL_MEM_READ_WRITE,  sizeof(cl_int4), NULL, &err);
	printf("OPENCL: clCreateBuffer: %s\n", ocl_wrapper->get_error(err));

	// Create the kernel
	ckKernel = clCreateKernel (cpProgram, "myFunc", &err);
	printf("OPENCL: clCreateKernel: %s\n", ocl_wrapper->get_error(err));
  
	// Set the Argument values
	err = clSetKernelArg (ckKernel, 0, sizeof(cl_mem), (void*)&src_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 1, sizeof(cl_mem), (void*)&bbmin_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 2, sizeof(cl_mem), (void*)&bbmax_buffer);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 2, sizeof(int), (void*)&volobj->texwidth);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 3, sizeof(int), (void*)&volobj->texheight);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 4, sizeof(int), (void*)&volobj->texdepth);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));
	err = clSetKernelArg (ckKernel, 5, sizeof(int), (void*)&channel);
	printf("OPENCL: clSetKernelArg: %s\n", ocl_wrapper->get_error(err));

	// Copy input data to GPU, compute, copy results back
	// Runs asynchronous to host, up until blocking read at end

	// Write data from host to GPU
	err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, src_buffer, CL_FALSE, 0, sizeof(unsigned char) * iNumElements, volobj->texture3d, 0, NULL, NULL);
	printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));
	
	err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, bbmin_buffer, CL_FALSE, 0, sizeof(cl_int4), (void*)&minbb, 0, NULL, NULL);
	printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));

	err = clEnqueueWriteBuffer (ocl_wrapper->commandQue, bbmax_buffer, CL_FALSE, 0, sizeof(cl_int4), (void*)&maxbb, 0, NULL, NULL);
	printf("OPENCL: clEnqueueWriteBuffer: %s\n", ocl_wrapper->get_error(err));

	// Launch kernel
	err = clEnqueueNDRangeKernel (ocl_wrapper->commandQue, ckKernel, 3, NULL, global_threads, NULL, 0, NULL, NULL);
	printf("OPENCL: clEnqueueNDRangeKernel: %s\n", ocl_wrapper->get_error(err));

	// Blocking read of results from GPU to Host
	err = clEnqueueReadBuffer (ocl_wrapper->commandQue, bbmin_buffer, CL_TRUE, 0, sizeof(cl_int4), (void*)&minbb, 0, NULL,  NULL);
	printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));

	err = clEnqueueReadBuffer (ocl_wrapper->commandQue, bbmax_buffer, CL_TRUE, 0, sizeof(cl_int4), (void*)&maxbb, 0, NULL,  NULL);
	printf("OPENCL: clEnqueueReadBuffer: %s\n", ocl_wrapper->get_error(err));

	// Cleanup allocated objects
	printf("OPENCL: Releasing kernel memory\n");
    if(ckKernel)clReleaseKernel(ckKernel); 
   
    //need to release any other OpenCL memory objects here
    if(src_buffer)clReleaseMemObject(src_buffer);
    if(bbmin_buffer)clReleaseMemObject(bbmin_buffer);
    if(bbmax_buffer)clReleaseMemObject(bbmax_buffer);
	
	maxbb.s[0] += (float)window_size/2.0;
	maxbb.s[1] += (float)window_size/2.0;
	maxbb.s[2] += (float)window_size/2.0;

	minbb.s[0] -= (float)window_size/2.0;
	minbb.s[1] -= (float)window_size/2.0;
	minbb.s[2] -= (float)window_size/2.0;

	maxbb.s[0] += 2;
	maxbb.s[1] += 2;
	maxbb.s[2] += 2;

	minbb.s[0] -= 2;
	minbb.s[1] -= 2;
	minbb.s[2] -= 2;	
//.........这里部分代码省略.........

开发者ID:ut666，项目名称:VolViewer，代码行数:101，代码来源:OpenCLExecuter.cpp

示例8: main

int main(void) {
    // se crea los 2 vectores de entrada
    int i;
    const int LIST_SIZE = 1024;
    int *A = (int*)malloc(sizeof(int)*LIST_SIZE);
    int *B = (int*)malloc(sizeof(int)*LIST_SIZE);
    for(i = 0; i < LIST_SIZE; i++) {
        A[i] = i;
        B[i] = LIST_SIZE - i;
    }
 
    // cargamos el kernel en source_str
    FILE *fp;
    char *source_str;
    size_t source_size;
 
    fp = fopen("vector_add_kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );
 
    // obtenemos las plataformas y informacion de los devices
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;   
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, 
            &device_id, &ret_num_devices);
 
    // creamos un contexto OpenCL
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
 
    // creamos la cola de comandos
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
 
    // creamos el buffer de memoria en el device para cada vector
    cl_mem a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, 
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
            LIST_SIZE * sizeof(int), NULL, &ret);
    cl_mem c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
            LIST_SIZE * sizeof(int), NULL, &ret);
 
    // copiamos los vectores A y B a sus respectivas memorias buffer
    ret = clEnqueueWriteBuffer(command_queue, a_mem_obj, CL_TRUE, 0,
            LIST_SIZE * sizeof(int), A, 0, NULL, NULL);
    ret = clEnqueueWriteBuffer(command_queue, b_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(int), B, 0, NULL, NULL);
 
    // creamos un programa para el kernel 
    cl_program program = clCreateProgramWithSource(context, 1, 
            (const char **)&source_str, (const size_t *)&source_size, &ret);
 
    // generamos el programa
    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
    // creamos el kernel 
    cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);
 
    // establecemos los argumentos del kernel 
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_mem_obj);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_mem_obj);
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_mem_obj);
 
    // ejecutamos el kernel de la lista
    size_t global_item_size = LIST_SIZE; 
    size_t local_item_size = 64; // dividimos los work items en grupos de 64
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
            &global_item_size, &local_item_size, 0, NULL, NULL);
 
    // copiamos la memoria buffer C del device hacia la variable local C
    int *C = (int*)malloc(sizeof(int)*LIST_SIZE);
    ret = clEnqueueReadBuffer(command_queue, c_mem_obj, CL_TRUE, 0, 
            LIST_SIZE * sizeof(int), C, 0, NULL, NULL);
 
    // muestra el resultado
    for(i = 0; i < LIST_SIZE; i++)
        printf("%d + %d = %d\n", A[i], B[i], C[i]);
 
    
    free(A);
    free(B);
    free(C);
    return 0;
}

开发者ID:pioh123，项目名称:parallel，代码行数:90，代码来源:vector.c

示例9: main


//.........这里部分代码省略.........
     u_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &status);
     chk(status,"clCreatebuffer");
     f_d = clCreateBuffer(context, CL_MEM_READ_WRITE, size1, NULL, &status);
     chk(status, "clCreatebuffer");

     // perform computing on GPU
     // copy data from host to device
     status = clEnqueueWriteBuffer(cmdQueue, u_d, CL_FALSE, 0, size, u_h, 0, NULL, NULL);
     chk(status,"ClEnqueueWriteBuffer");
     status = clEnqueueWriteBuffer(cmdQueue, f_d, CL_FALSE, 0, size1, f_h, 0, NULL, NULL);
     chk(status, "clEnqueueWriteBuffer");
     
     // create program with source code
     cl_program program = clCreateProgramWithSource(context,1,(const char**)&programSource, NULL, &status);
     chk(status, "clCreateProgramWithSource");

     // Compile program for the device
     status = clBuildProgram(program, numDevices, devices, NULL, NULL,NULL);
      // chk(status, "ClBuildProgram");
      if(status != CL_SUCCESS){
      printf("clBuildProgram failed (%d) \n", status);
      size_t log_size;
      clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
      
      char *log = (char *) malloc(log_size);
      clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
      printf("%s\n", log);
      exit(-1);
     }
      printf("successfully built program \n");
      
     // Create lattice-boltzman kernel
     cl_kernel kernel, kernel1;
     kernel = clCreateKernel(program, "lbiteration", &status);
     kernel1 = clCreateKernel(program, "Denrho", &status);
     chk(status, "clCreateKernel");
      printf("successfully create kernel \n");
     
     // Associate the input and output buffers with the kernel
     status = clSetKernelArg(kernel,0, sizeof(cl_mem), &f_d);
     status |= clSetKernelArg(kernel1,0, sizeof(cl_mem), &u_d);
     status |= clSetKernelArg(kernel1,1, sizeof(cl_mem), &f_d);
     status |= clSetKernelArg(kernel, 1, sizeof(int), &ArraySizeX);
     status |= clSetKernelArg(kernel1,2, sizeof(int), &ArraySizeX);
     status |= clSetKernelArg(kernel, 2, sizeof(int), &ArraySizeY);
     status |= clSetKernelArg(kernel1,3, sizeof(int),&ArraySizeY);
     chk(status, "clSerKernelArg");
    
     // set the work dimensions
     size_t localworksize[2] = {BLOCK_SIZE_X,BLOCK_SIZE_Y};
     int nBLOCKSX = (ArraySizeX-2)/(BLOCK_SIZE_X -2);
     int nBLOCKSY = (ArraySizeY-2)/(BLOCK_SIZE_Y -2);
     size_t globalworksize[2] = {nBLOCKSX*BLOCK_SIZE_X,nBLOCKSY*BLOCK_SIZE_Y};

     // loop the kernel
     for( nsteps = 0; nsteps < 100; nsteps++){
     status = clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, globalworksize,localworksize,0,NULL,&event);
     clWaitForEvents(1 , &event);
     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
           sizeof(time_start), &time_start, NULL);
     clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
           sizeof(time_end), &time_end, NULL);
     total_time += time_end - time_start;
     }
     printf("Good so far \n");
     status = clEnqueueNDRangeKernel(cmdQueue, kernel1, 2, NULL, globalworksize,localworksize,0,NULL,&event);

开发者ID:hietwll，项目名称:parallel-lattice-Boltzmann，代码行数:67，代码来源:Lattice_BoltzmannOpenCL.c

示例10: setup_opencl

//---------------------------------------------------------------------
// Set up the OpenCL environment.
//---------------------------------------------------------------------
void setup_opencl(int argc, char *argv[])
{
  cl_int err_code;
  char *source_dir = "EP";
  if (argc > 1) source_dir = argv[1];

#ifdef TIMER_DETAIL
  if (timers_enabled) {
    int i;
    for (i = T_OPENCL_API; i < T_END; i++) timer_clear(i);
  }
#endif

  DTIMER_START(T_OPENCL_API);

  // 1. Find the default device type and get a device for the device type
  device_type = clu_GetDefaultDeviceType();
  device      = clu_GetAvailableDevice(device_type);
  device_name = clu_GetDeviceName(device);

  // 2. Create a context for the specified device
  context = clCreateContext(NULL, 1, &device, NULL, NULL, &err_code);
  clu_CheckError(err_code, "clCreateContext()");

  // 3. Create a command queue
  cmd_queue = clCreateCommandQueue(context, device, 0, &err_code);
  clu_CheckError(err_code, "clCreateCommandQueue()");

  DTIMER_STOP(T_OPENCL_API);

  // 4. Build the program
  DTIMER_START(T_BUILD);
  char *source_file;
  char build_option[30];
  sprintf(build_option, "-DM=%d -I.", M);
  if (device_type == CL_DEVICE_TYPE_CPU) {
    source_file = "ep_cpu.cl";
    GROUP_SIZE = 16;
  } else {
    source_file = "ep_gpu.cl";
    GROUP_SIZE = 64;
  }
  program = clu_MakeProgram(context, device, source_dir, source_file,
                            build_option);
  DTIMER_STOP(T_BUILD);

  // 5. Create buffers
  DTIMER_START(T_BUFFER_CREATE);

  gq_size  = np / GROUP_SIZE * NQ * sizeof(double);
  gsx_size = np / GROUP_SIZE * sizeof(double);
  gsy_size = np / GROUP_SIZE * sizeof(double);

  pgq = clCreateBuffer(context, CL_MEM_READ_WRITE, gq_size, NULL, &err_code);
  clu_CheckError(err_code, "clCreateBuffer() for pgq");

  pgsx = clCreateBuffer(context, CL_MEM_READ_WRITE, gsx_size,NULL, &err_code);
  clu_CheckError(err_code, "clCreateBuffer() for pgsx");

  pgsy = clCreateBuffer(context, CL_MEM_READ_WRITE, gsy_size,NULL, &err_code);
  clu_CheckError(err_code, "clCreateBuffer() for pgsy");

  DTIMER_STOP(T_BUFFER_CREATE);

  // 6. Create a kernel
  DTIMER_START(T_OPENCL_API);
  kernel = clCreateKernel(program, "embar", &err_code);
  clu_CheckError(err_code, "clCreateKernel()");
  DTIMER_STOP(T_OPENCL_API);
}

开发者ID:NatTuck，项目名称:cakemark，代码行数:73，代码来源:ep.c

示例11: main

int
main(int argc, char** argv)
{


   srand(1000);
   int i;

   unsigned int size_A = WA * HA;
   unsigned int mem_size_A = sizeof(float) * size_A;
   float* h_A = (float*) malloc(mem_size_A);

   unsigned int size_B = WB * HB;
   unsigned int mem_size_B = sizeof(float) * size_B;
   float* h_B = (float*) malloc(mem_size_B);


   randomInit(h_A, size_A);
   randomInit(h_B, size_B);


   unsigned int size_C = WC * HC;
   unsigned int mem_size_C = sizeof(float) * size_C;
   float* h_C = (float*) malloc(mem_size_C);

   cl_context clGPUContext;
   cl_command_queue clCommandQue;
   cl_program clProgram;
   cl_kernel clKernel;
   cl_event mm;

   size_t dataBytes;
   size_t kernelLength;
   cl_int errcode;


   cl_mem d_A;
   cl_mem d_B;
   cl_mem d_C;


   clGPUContext = clCreateContextFromType(0,
                   CL_DEVICE_TYPE_GPU,
                   NULL, NULL, &errcode);



   errcode = clGetContextInfo(clGPUContext,
              CL_CONTEXT_DEVICES, 0, NULL,
              &dataBytes);
   cl_device_id *clDevices = (cl_device_id *)
              malloc(dataBytes);
   errcode |= clGetContextInfo(clGPUContext,
              CL_CONTEXT_DEVICES, dataBytes,
              clDevices, NULL);



   clCommandQue = clCreateCommandQueue(clGPUContext,
                  clDevices[0], CL_QUEUE_PROFILING_ENABLE, &errcode);



   d_C = clCreateBuffer(clGPUContext,
          CL_MEM_READ_WRITE,
          mem_size_A, NULL, &errcode);
   d_A = clCreateBuffer(clGPUContext,
          CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
          mem_size_A, h_A, &errcode);
   d_B = clCreateBuffer(clGPUContext,
          CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
          mem_size_B, h_B, &errcode);


   FILE* fp = fopen("hw2.cl", "r");
   fseek (fp , 0 , SEEK_END);
   const size_t lSize = ftell(fp);
   rewind(fp);
   unsigned char* buffer;
   buffer = (unsigned char*) malloc (lSize);
   fread(buffer, 1, lSize, fp);
   fclose(fp);

   cl_int status;
   clProgram = clCreateProgramWithBinary(clGPUContext,
                1, (const cl_device_id *)clDevices,
                &lSize, (const unsigned char**)&buffer,
                &status, &errcode);
   errcode = clBuildProgram(clProgram, 0, NULL, NULL,
                NULL, NULL);


   errcode = clBuildProgram(clProgram, 0,
              NULL, NULL, NULL, NULL);


   clKernel = clCreateKernel(clProgram,
               "MM", &errcode);


//.........这里部分代码省略.........

开发者ID:hemantjp，项目名称:HW2，代码行数:101，代码来源:hw2.c

示例12: clCreateKernel

  
  // Create the kernel
  cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
  
  // Set all the arguments to the kernel function
  int bones_num_args = 0;
  <kernel_argument_list>
  // Start the kernel
  size_t bones_global_worksize[] = {<parallelism>};
  bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize,NULL,0,NULL,&bones_event); error_check(bones_errors);
  
  // Synchronize and clean-up the kernel
  clFinish(bones_queue);
  clReleaseKernel(bones_kernel_<algorithm_name>_0);

开发者ID:gjvdbraak，项目名称:bones，代码行数:13，代码来源:default.host.c

示例13: crackMD5

int crackMD5(char *hash, char *cs, int passlen) {

	clut_device dev;	// device struct
	cl_event  evt;      // performance measurement event
	cl_kernel kernel;	// execution kernel
	cl_int ret;			// error code

	double td;
	int cs_len, sync_flag;
	long chunk, disp;
	unsigned char bin_hash[HASH_SIZE];

	cs_len = strlen(cs);
	sync_flag = 0;
	strToBin(hash, bin_hash, 2*HASH_SIZE);

	disp = DISPOSITIONS(cs_len, passlen);
	chunk = DISP_PER_CORE(disp, AVAILABLE_THREADS);

	debug("HOST", "Numero di disposizione da calcolare per stream processing unit = %lu\n", chunk);

	clut_open_device(&dev, PATH_TO_KERNEL);
	clut_print_device_info(&dev);


	/* ----------------------------------------- Create execution kernel ----------------------------------------- */
	kernel = clCreateKernel(dev.program, KERNEL_NAME, &ret);
	clut_check_err(ret, "Fallita la creazione del kernel");


	/* ----------------------------------- Create memory buffers on the device ----------------------------------- */
	cl_mem dchunk = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, sizeof(long), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione del chunk");

	cl_mem dhash = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, HASH_SIZE * sizeof(unsigned char), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione dell'hash");

	cl_mem charset = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, cs_len * sizeof(char), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione del charset");

	cl_mem charset_size = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, sizeof(int), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della taglia del charset");

	cl_mem dpasslen = clCreateBuffer(dev.context, CL_MEM_READ_ONLY, sizeof(int), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della taglia del charset");

	//cl_mem sync = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, AVAILABLE_CORES * sizeof(int), NULL, &ret);
	cl_mem sync = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, sizeof(int), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione del flag di sync");

	cl_mem dcracked = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, HASH_SIZE, NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della password in chiaro");

	cl_mem computed_hash = clCreateBuffer(dev.context, CL_MEM_READ_WRITE, HASH_SIZE * sizeof(unsigned char), NULL, &ret);
	if (ret)
		clut_panic(ret, "Fallita l'allocazione della memoria sul device per la memorizzazione della password in chiaro");


	/* ----------------------------------- Write memory buffers on the device ------------------------------------ */
	ret = clEnqueueWriteBuffer(dev.queue, dchunk, CL_TRUE, 0, sizeof(long), &chunk, 0, NULL, NULL);
	if(ret)
	   clut_panic(ret, "Fallita la scrittura del chunk sul buffer di memoria del device");

	ret = clEnqueueWriteBuffer(dev.queue, dhash, CL_TRUE, 0, HASH_SIZE * sizeof(unsigned char), (int *)bin_hash, 0, NULL, NULL);
	if(ret)
	   clut_panic(ret, "Fallita la scrittura dell'hash sul buffer di memoria del device");

	ret = clEnqueueWriteBuffer(dev.queue, charset, CL_TRUE, 0, cs_len * sizeof(char), cs, 0, NULL, NULL);
	if(ret)
	   clut_panic(ret, "Fallita la scrittura de