C++ cudaGetDevice函数代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中cudaGetDevice函数的典型用法代码示例。如果您正苦于以下问题：C++ cudaGetDevice函数的具体用法？C++ cudaGetDevice怎么用？C++ cudaGetDevice使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了cudaGetDevice函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: switch

// 内部使用的  
// 如果当前未初始化直接在GPU分配内存  
// 如果当前在CPU，则在GPU上分配内存并且复制到GPU  
// 如果数据已经在GPU则啥也不做  
inline void SyncedMemory::to_gpu() {  
#ifndef CPU_ONLY  
  switch (head_) {  
  case UNINITIALIZED:  
    // 获取设备  
    CUDA_CHECK(cudaGetDevice(&gpu_device_));  
    // 在设备上分配内存  
    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));  
    // 初始化为0  
    caffe_gpu_memset(size_, 0, gpu_ptr_);  
    head_ = HEAD_AT_GPU;  
    own_gpu_data_ = true;  
    break;  
  case HEAD_AT_CPU:  
    if (gpu_ptr_ == NULL) {  
      CUDA_CHECK(cudaGetDevice(&gpu_device_));  
      CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));  
      own_gpu_data_ = true;  
    }  
    caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);  
    head_ = SYNCED;  
    break;  
  case HEAD_AT_GPU:  
  case SYNCED:  
    break;  
  }  
#else  
  NO_GPU;  
#endif  
}

开发者ID:runaway，项目名称:studycaffe，代码行数:34，代码来源:syncedmem.cpp

示例2: cuda_assert

        ~cuda_pattern_data()
        {
            int current_id;
            cuda_assert( cudaGetDevice(&current_id) );
            if ( current_id != device_id ) cuda_assert( cudaSetDevice( device_id ) );

            if ( ar ) cuda_assert( cudaFree(ar) );
            if ( dim ) cuda_assert( cudaFree(dim) );
            if ( I_diff ) cuda_assert( cudaFree(I_diff) );
            if ( I_exp ) cuda_assert( cudaFree(I_exp) );
            if ( I_exp ) cuda_assert( cudaFree(I_zigmoid) );
            if ( diag ) cuda_assert( cudaFree(diag) );
            if ( ug ) cuda_assert( cudaFree(ug) );
            if ( cache ) cuda_assert( cudaFree(cache) );
            if ( beams ) cuda_assert( cudaFree(beams) );
            if ( kt_factor ) cuda_assert( cudaFree(kt_factor) );
            if ( gvec ) cuda_assert( cudaFree(gvec) );
            if ( tilt ) cuda_assert( cudaFree(tilt) );

            ar = 0;
            dim = 0;
            I_diff = 0;
            I_exp = 0;
            I_zigmoid = 0;
            diag = 0;
            ug = 0;
            cache = 0;
            gvec = 0;
            tilt = 0;
        }

开发者ID:fengwang，项目名称:larbed-refinement，代码行数:30，代码来源:cuda_rotated_pattern_data.hpp

示例3: THCState_getCurrentDeviceProperties

struct cudaDeviceProp* THCState_getCurrentDeviceProperties(THCState* state)
{
  int curDev = -1;
  THCudaCheck(cudaGetDevice(&curDev));

  return &(state->deviceProperties[curDev]);
}

开发者ID:ASAPPinc，项目名称:cutorch，代码行数:7，代码来源:THCGeneral.c

示例4: printf

void Engine::DeviceQuery() {
  cudaDeviceProp prop;
  int device;
  if (cudaSuccess != cudaGetDevice(&device)) {
    printf("No cuda device present.\n");
    return;
  }
  CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
  LOG(INFO) << "Device id:                     " << device;
  LOG(INFO) << "Major revision number:         " << prop.major;
  LOG(INFO) << "Minor revision number:         " << prop.minor;
  LOG(INFO) << "Name:                          " << prop.name;
  LOG(INFO) << "Total global memory:           " << prop.totalGlobalMem;
  LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
  LOG(INFO) << "Total registers per block:     " << prop.regsPerBlock;
  LOG(INFO) << "Warp size:                     " << prop.warpSize;
  LOG(INFO) << "Maximum memory pitch:          " << prop.memPitch;
  LOG(INFO) << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
  LOG(INFO) << "Maximum dimension of block:    "
      << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
      << prop.maxThreadsDim[2];
  LOG(INFO) << "Maximum dimension of grid:     "
      << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
      << prop.maxGridSize[2];
  LOG(INFO) << "Clock rate:                    " << prop.clockRate;
  LOG(INFO) << "Total constant memory:         " << prop.totalConstMem;
  LOG(INFO) << "Texture alignment:             " << prop.textureAlignment;
  LOG(INFO) << "Concurrent copy and execution: "
      << (prop.deviceOverlap ? "Yes" : "No");
  LOG(INFO) << "Number of multiprocessors:     " << prop.multiProcessorCount;
  LOG(INFO) << "Kernel execution timeout:      "
      << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
  return;
}

开发者ID:airxcy，项目名称:ITF_Tracker，代码行数:34，代码来源:common.cpp

示例5: THCudaMemGetInfoCached

cudaError_t THCudaMemGetInfoCached(THCState *state,  size_t* freeBytes, size_t* totalBytes, size_t* largestBlock)
{
  size_t cachedBytes = 0;
  THCDeviceAllocator* allocator = state->cudaDeviceAllocator;

  *largestBlock = 0;
  /* get info from CUDA first */
  cudaError_t ret = cudaMemGetInfo(freeBytes, totalBytes);
  if (ret!= cudaSuccess)
    return ret;

  int device;
  ret = cudaGetDevice(&device);
  if (ret!= cudaSuccess)
    return ret;

  /* not always true - our optimistic guess here */
  *largestBlock = *freeBytes;

  if (allocator->cacheInfo != NULL)
    allocator->cacheInfo(allocator->state, device, &cachedBytes, largestBlock);

  /* Adjust resulting free bytes number. largesBlock unused for now */
  *freeBytes += cachedBytes;
  return cudaSuccess;
}

开发者ID:HustlehardInc，项目名称:pytorch，代码行数:26，代码来源:THCGeneral.cpp

示例6: printf

void Caffe::DeviceQuery() {
  cudaDeviceProp prop;
  int device;
  if (cudaSuccess != cudaGetDevice(&device)) {
    printf("No cuda device present.\n");
    return;
  }
  CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
  printf("Device id:                     %d\n", device);
  printf("Major revision number:         %d\n", prop.major);
  printf("Minor revision number:         %d\n", prop.minor);
  printf("Name:                          %s\n", prop.name);
  printf("Total global memory:           %lu\n", prop.totalGlobalMem);
  printf("Total shared memory per block: %lu\n", prop.sharedMemPerBlock);
  printf("Total registers per block:     %d\n", prop.regsPerBlock);
  printf("Warp size:                     %d\n", prop.warpSize);
  printf("Maximum memory pitch:          %lu\n", prop.memPitch);
  printf("Maximum threads per block:     %d\n", prop.maxThreadsPerBlock);
  printf("Maximum dimension of block:    %d, %d, %d\n",
      prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
  printf("Maximum dimension of grid:     %d, %d, %d\n",
      prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
  printf("Clock rate:                    %d\n", prop.clockRate);
  printf("Total constant memory:         %lu\n", prop.totalConstMem);
  printf("Texture alignment:             %lu\n", prop.textureAlignment);
  printf("Concurrent copy and execution: %s\n",
      (prop.deviceOverlap ? "Yes" : "No"));
  printf("Number of multiprocessors:     %d\n", prop.multiProcessorCount);
  printf("Kernel execution timeout:      %s\n",
      (prop.kernelExecTimeoutEnabled ? "Yes" : "No"));
  return;
}

开发者ID:FuchenUSTC，项目名称:caffe-c3d，代码行数:32，代码来源:common.cpp

示例7: cudaGetDeviceCount

void GpuDeviceInformationDialog::setupGpuDeviceTabPages()
{
	
	int numDevs = 0;
	cudaGetDeviceCount(&numDevs);

	this->setWindowTitle(QString("GPU Device Information (") + QString::number(numDevs) + QString(" devices found)"));

	for(int i = 0; i < numDevs; i++)
	{
		cudaDeviceProp devProp;
		cudaGetDeviceProperties(&devProp, i);

		QWidget* deviceTabPage = new GpuDeviceInformationDialogTabPage(devProp, i);
		
		this->tabWidget->addTab(deviceTabPage, devProp.name);

		connect(deviceTabPage, SIGNAL(setMainComputeDevice(int)), this, SLOT(on_setMainComputeDevice(int)));
		connect(this, SIGNAL(hasChangedMainComputeDevice(int)), deviceTabPage, SLOT(on_hasChangedMainComputeDevice(int)));

	}

	int currentComputeDevice;
	cudaGetDevice(&currentComputeDevice);

	emit hasChangedMainComputeDevice(currentComputeDevice);
}

开发者ID:apartridge，项目名称:GpuRayTracer，代码行数:27，代码来源:GpuDeviceInformationDialog.cpp

示例8: __declspec

__declspec(dllexport) int __stdcall GetDevice()
{
    int device = 0;
    cudaGetDevice(&device);

    return device;
}

开发者ID:dtegunov，项目名称:warp，代码行数:7，代码来源:Device.cpp

示例9: rcrackiThreadEntryPoint

// start processing of jobs
void rcrackiThread::rcrackiThreadEntryPoint()
{
#if GPU
	if(gpu != 0 && cudaGetDevice(&cudaDevId) == CUDA_SUCCESS) {
		cudaBuffCount = 0x2000;
		cudaChainSize = 100;

		cudaDeviceProp deviceProp;
		if(cudaGetDeviceProperties(&deviceProp, cudaDevId) == CUDA_SUCCESS) {
			switch(deviceProp.major) {
			case 1: ; break;
			case 2:
				cudaBuffCount = 0x4000;
				cudaChainSize = 200;
				break;
			}
		}
		cudaBuffCount = rcuda::GetChainsBufferSize(cudaBuffCount);
	}
	else
#endif
		cudaDevId = -1;

	if (falseAlarmChecker) {
		if (falseAlarmCheckerO) {
			CheckAlarmO();
		}
		else {
			CheckAlarm();
		}
	}
	else {
		PreCalculate();
	}
}

开发者ID:ChunHungLiu，项目名称:ctf-writeup，代码行数:36，代码来源:rcrackiThread.cpp

示例10: TryToAddSingleFitStream

bool TryToAddSingleFitStream(void * vpsM, WorkerInfoQueue* q){
#ifdef ION_COMPILE_CUDA
  int dev_id = 0;
  cudaStreamManager * psM = (cudaStreamManager *) vpsM;
  SingleFitStream * temp;
  cudaGetDevice( &dev_id );
  int i;
    try{ // exception handling to allow fallback to CPU Fit if not a single strweam could be created
      temp =  new SingleFitStream(q);
      i = psM->addStreamUnit( temp);
      std::cout <<"CUDA: Device " <<  dev_id <<  " Single Fit stream " << i <<" created " << std::endl;
      psM->printMemoryUsage();
    }
    catch(cudaException& e)
    {
      cout << e.what() << endl;
      if(psM->getNumStreams() > 0){ 
        cout << "CUDA: Device " << dev_id<< " could not create more than " << psM->getNumStreams() << " Single Fit streams" << std::endl;       
        psM->printMemoryUsage();
      }else{
        std::cout << "CUDA: Device " << dev_id << " no Single Fit streams could be created >>>>>>>>>>>>>>>>> FALLING BACK TO CPU!"<< std::endl;
        return false;
      }
    }

#endif
  return true;
}

开发者ID:bdiegel，项目名称:TS，代码行数:28，代码来源:cudaWrapper.cpp

示例11: getCurrentDeviceProperties

const cudaDeviceProp& getCurrentDeviceProperties() {
  int device = 0;
  auto err = cudaGetDevice(&device);
  checkCuda(err, std::string("CUDA ERROR: cudaGetDeviceCount "));

  return getDeviceProperties(device);
}

开发者ID:JohnJPS，项目名称:fbcuda，代码行数:7，代码来源:CachedDeviceProperties.cpp

示例12: m_initialized

  GpuSurfDetectorInternal::GpuSurfDetectorInternal(GpuSurfConfiguration config) : 
    m_initialized(false),
    m_config(config)
  {
    int deviceCount;
    int device;
    cudaError_t err;
    cudaGetDeviceCount(&deviceCount);
    ASRL_ASSERT_GT(deviceCount,0,"There are no CUDA capable devices present");
    
	
    err = cudaGetDevice(&device);
    ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device: " << cudaGetErrorString(err));		
    //std::cout << "Found device " << device << std::endl;
    err = cudaGetDeviceProperties(&m_deviceProp,device);
    ASRL_ASSERT_EQ(err,cudaSuccess, "Unable to get the CUDA device properties: " << cudaGetErrorString(err));		

    // Some more checking...
    ASRL_ASSERT_GE(m_deviceProp.major,1,"Minimum compute capability 1.1 is necessary");
    ASRL_ASSERT_GE(m_deviceProp.minor,1,"Minimum compute capability 1.1 is necessary");

    m_maxmin.init(ASRL_SURF_MAX_CANDIDATES,false);
    m_maxmin.memset(0);

  }

开发者ID:dongmingdmdm，项目名称:camnavi，代码行数:25，代码来源:GpuSurfDetectorInternal.cpp

示例13: cudaGetDevice

int CUDADevicesService::getMaximumTexture2DHeight() {
	int device;
	cudaGetDevice(&device);
	cudaDeviceProp* devProperties = new cudaDeviceProp();
	cudaGetDeviceProperties(devProperties, device);
	return devProperties->maxTexture2D[1];
}

开发者ID:christiantinauer，项目名称:relaxometry，代码行数:7，代码来源:CUDADevicesService.cpp

示例14: cutorch_streamWaitFor

/*
   Usage:
   cutorch.streamWaitFor(waiterStream, {waitForStream1, ..., waitForStreamN})
   for streams on the current device. Creates a one-way barrier where
   waiterStream waits for waitForStream1-N to reach the current point.
*/
static int cutorch_streamWaitFor(lua_State *L)
{
  THCState *state = cutorch_getstate(L);

  int curDev = -1;
  THCudaCheck(cudaGetDevice(&curDev));

  /* Check that the waiting stream is in bounds; this will error out if not */
  int waitingId = (int) luaL_checknumber(L, 1);
  cudaStream_t streamWaiting =
    THCState_getDeviceStream(state, curDev, waitingId);

  /* Validate the streams that we are waiting on */
  int streams = checkAndCountListOfStreams(L, state, 2, curDev);

  if (streams < 1) {
    /* nothing to synchronize */
    return 0;
  }
  /* One-way dependency; streamWaiting will wait for the list of streams to
     wait on to complete execution of pending scheduled kernels/events */
  cudaEvent_t * events = (cudaEvent_t*)malloc(sizeof(cudaEvent_t) * streams);
  createSingleDeviceEvents(L, state, 2, curDev, events);
  /* Then, wait on them */
  for (int i = 0; i < streams; i++) {
    THCudaCheck(cudaStreamWaitEvent(streamWaiting, events[i], 0));
    THCudaCheck(cudaEventDestroy(events[i]));
  }
  free(events);
  return 0;
}

开发者ID:ASAPPinc，项目名称:cutorch，代码行数:37，代码来源:init.c

示例15: gpu_print_properties

void gpu_print_properties(FILE* out){
  int device = -1;
  gpu_safe( cudaGetDevice(&device) );
  
  cudaDeviceProp prop;
  gpu_safe( cudaGetDeviceProperties(&prop, device) ); 
  
  int MiB = 1024 * 1024;
  int kiB = 1024;
  
  fprintf(out, "     Device number: %d\n", device);
  fprintf(out, "       Device name: %s\n", prop.name);
  fprintf(out, "     Global Memory: %d MiB\n", (int)(prop.totalGlobalMem/MiB));
  fprintf(out, "     Shared Memory: %d kiB/block\n", (int)(prop.sharedMemPerBlock/kiB));
  fprintf(out, "   Constant memory: %d kiB\n", (int)(prop.totalConstMem/kiB));
  fprintf(out, "         Registers: %d per block\n", (int)(prop.regsPerBlock/kiB));
  fprintf(out, "         Warp size: %d threads\n", (int)(prop.warpSize));
  //fprintf(out, "  Max memory pitch: %d bytes\n", (int)(prop.memPitch));
  fprintf(out, " Texture alignment: %d bytes\n", (int)(prop.textureAlignment));
  fprintf(out, " Max threads/block: %d\n", prop.maxThreadsPerBlock);
  fprintf(out, "    Max block size: %d x %d x %d threads\n", prop.maxThreadsDim[X], prop.maxThreadsDim[Y], prop.maxThreadsDim[Z]);
  fprintf(out, "     Max grid size: %d x %d x %d blocks\n", prop.maxGridSize[X], prop.maxGridSize[Y], prop.maxGridSize[Z]);
  fprintf(out, "Compute capability: %d.%d\n", prop.major, prop.minor);
  fprintf(out, "        Clock rate: %d MHz\n", prop.clockRate/1000);
  fprintf(out, "   Multiprocessors: %d\n", prop.multiProcessorCount);
  fprintf(out, "   Timeout enabled: %d\n", prop.kernelExecTimeoutEnabled);
  fprintf(out, "      Compute mode: %d\n", prop.computeMode);
  fprintf(out, "    Device overlap: %d\n", prop.deviceOverlap);
  fprintf(out, "Concurrent kernels: %d\n", prop.concurrentKernels);
  fprintf(out, "        Integrated: %d\n", prop.integrated);
  fprintf(out, "  Can map host mem: %d\n", prop.canMapHostMemory);
  
}

开发者ID:LStoleriu，项目名称:hotspin，代码行数:33，代码来源:gpu_properties.cpp

示例16: cutorch_streamBarrier

/*
   Usage:
   cutorch.streamBarrier({stream1, stream2, ..., streamN})
   applies to streams for the current device. Creates a N-way barrier
   to synchronize all of the streams given
*/
static int cutorch_streamBarrier(lua_State *L)
{
  THCState *state = cutorch_getstate(L);

  int curDev = -1;
  THCudaCheck(cudaGetDevice(&curDev));

  int streams = checkAndCountListOfStreams(L, state, 1, curDev);

  if (streams < 2) {
    /* nothing to synchronize together */
    return 0;
  }
  /* Multi-way dependency (barrier); all streams must complete execution
     of pending scheduled kernels/events */
  cudaEvent_t * events = (cudaEvent_t*)malloc(sizeof(cudaEvent_t) * streams);
  /* First, create an event and record them for all streams */
  int eventsCreated =  createSingleDeviceEvents(L, state, 1, curDev, events);

  /* Then, wait on the event. Each stream is actually waiting on itself here
     too, but that's harmless and isn't worth weeding out. */
  waitSingleDeviceEvents(L, state, 1, curDev, events, eventsCreated);
  for (int i = 0; i < eventsCreated; i++)
    THCudaCheck(cudaEventDestroy(events[i]));

  free(events);
  return 0;
}

开发者ID:ASAPPinc，项目名称:cutorch，代码行数:34，代码来源:init.c

示例17: cudppMoveToFrontTransform

/**
 * @brief Performs the Move-to-Front Transform
 *
 * Performs a parallel move-to-front transform on 1,048,576 elements.
 * The MTF uses a scan-based algorithm to parallelize the computation.
 * The MTF uses a scan-based algorithm described in our paper "Parallel
 * Lossless Data Compression on the GPU". (See the \ref references bibliography).
 *
 * - Currently, the MTF can only be performed on 1,048,576 (uchar) elements.
 * - The transformed string is written to \a d_mtfOut.
 *
 * @param[in] planHandle Handle to plan for MTF
 * @param[out] d_out Output data
 * @param[in] d_in Input data
 * @param[in] numElements Number of elements
 * @returns CUDPPResult indicating success or error condition
 *
 * @see cudppPlan, CUDPPConfiguration, CUDPPAlgorithm
 */
CUDPP_DLL
CUDPPResult cudppMoveToFrontTransform(CUDPPHandle planHandle,
                                      unsigned char *d_in,
                                      unsigned char *d_out,
                                      size_t numElements)
{
    // first check: is this device >= 2.0? if not, return error
    int dev;
    cudaGetDevice(&dev);

    cudaDeviceProp devProps;
    cudaGetDeviceProperties(&devProps, dev);

    if((int)devProps.major < 2) {
        // Only supported on devices with compute
        // capability 2.0 or greater
        return CUDPP_ERROR_ILLEGAL_CONFIGURATION;
    }

    CUDPPMtfPlan * plan = 
        (CUDPPMtfPlan *) getPlanPtrFromHandle<CUDPPMtfPlan>(planHandle);
    
    if(plan != NULL)
    {
        if (plan->m_config.algorithm != CUDPP_MTF)
            return CUDPP_ERROR_INVALID_PLAN;
        if (plan->m_config.datatype != CUDPP_UCHAR)
            return CUDPP_ERROR_ILLEGAL_CONFIGURATION;

        cudppMtfDispatch(d_in, d_out, numElements, plan);
        return CUDPP_SUCCESS;
    }
    else
        return CUDPP_ERROR_INVALID_HANDLE;
}

开发者ID:UIKit0，项目名称:cudpp，代码行数:54，代码来源:cudpp.cpp

示例18: getDevCapability

/**
 * Returns the compute capability of the selected GPU.
 * @return the compute capability in the integer format (210 means
 * version 2.1)
 */
int getDevCapability() {
    cudaDeviceProp devProp;
    int dev;
    cudaGetDevice(&dev);
    cutilSafeCall(cudaGetDeviceProperties(&devProp, dev));
    return devProp.major*100+devProp.minor*10;
}

开发者ID:edanssandes，项目名称:MASA-CUDAlign，代码行数:12，代码来源:cuda_util.cpp

示例19: CUDA_CHECK

void P2PSync<Dtype>::on_gradients_ready(Timer* timer, ostringstream* timing) {
#ifndef CPU_ONLY
#ifdef DEBUG
  int device;
  CUDA_CHECK(cudaGetDevice(&device));
  CHECK(device == solver_->param().device_id());
#endif

  // Sum children gradients as they appear in the queue
  for (int i = 0; i < children_.size(); ++i) {
    timer->Start();
    P2PSync<Dtype> *child = queue_.pop();
    Dtype* src = child->parent_grads_;
    Dtype* dst = diff_;

#ifdef DEBUG
    bool ok = false;
    for (int j = 0; j < children_.size(); ++j) {
      if (child == children_[j]) {
        ok = true;
      }
    }
    CHECK(ok);
    cudaPointerAttributes attributes;
    CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
    CHECK(attributes.device == device);
    CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
    CHECK(attributes.device == device);
#endif

    caffe_gpu_add(size_, src, dst, dst);
    *timing << " add_grad: " << timer->MilliSeconds();
  }

  // Send gradients to parent
  if (parent_) {
    timer->Start();
    Dtype* src = diff_;
    Dtype* dst = parent_grads_;

#ifdef DEBUG
    cudaPointerAttributes attributes;
    CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
    CHECK(attributes.device == device);
    CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
    CHECK(attributes.device == parent_->solver_->param().device_id());
#endif

    CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype),  //
        cudaMemcpyDeviceToDevice, cudaStreamDefault));
    CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
    parent_->queue_.push(this);
    *timing << " send_grad: " << timer->MilliSeconds();
  } else {
    // Loss functions divide gradients by the batch size, so to compensate
    // for split batch, the root solver divides by number of solvers.
    caffe_gpu_scal(size_, Dtype(1.0 / Caffe::solver_count()), diff_);
  }
#endif
}

开发者ID:bbshocking，项目名称:caffe，代码行数:60，代码来源:parallel.cpp

示例20: checkDeviceMeetComputeSpec

void checkDeviceMeetComputeSpec(int argc, char **argv)
{
    int device = 0;
    cudaGetDevice(&device);

    if (checkCUDAProfile(device, MIN_RUNTIME_VERSION, MIN_COMPUTE_VERSION))
    {
        fprintf(stderr,"\nCUDA Capable Device %d, meets minimum required specs.\n", device);
    }
    else
    {
        fprintf(stderr, "\nNo configuration with minimum compute capabilities found.  Exiting...\n");
        fprintf(stderr, "This sample requires:\n");
        fprintf(stderr, "\tCUDA Compute Capability >= %d.%d is required\n", MIN_COMPUTE_VERSION/16, MIN_COMPUTE_VERSION%16);
        fprintf(stderr, "\tCUDA Runtime Version    >= %d.%d is required\n", MIN_RUNTIME_VERSION/1000, (MIN_RUNTIME_VERSION%100)/10);

        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        exit(EXIT_SUCCESS);
    }
}

开发者ID:ajperalt，项目名称:nvidia-cuda-7.0-samples，代码行数:25，代码来源:volumeFiltering.cpp

注：本文中的cudaGetDevice函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。