C++ cudaEventElapsedTime函数代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中cudaEventElapsedTime函数的典型用法代码示例。如果您正苦于以下问题：C++ cudaEventElapsedTime函数的具体用法？C++ cudaEventElapsedTime怎么用？C++ cudaEventElapsedTime使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了cudaEventElapsedTime函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: cudaEventRecord

float TimerGPU::read() {
  cudaEventRecord(stop_, stream_);
  cudaEventSynchronize(stop_);
  float time;
  cudaEventElapsedTime(&time, start_, stop_);
  return time;
}

开发者ID:bbferka，项目名称:simtrack，代码行数:7，代码来源:utilities.cpp

示例2: runBenchmark

void runBenchmark(int iterations)
{
    // once without timing to prime the GPU
    nbody->update(activeParams.m_timestep);

    cutilSafeCall(cudaEventRecord(startEvent, 0));

    for (int i = 0; i < iterations; ++i)
    {
        nbody->update(activeParams.m_timestep);
    }

    cutilSafeCall(cudaEventRecord(stopEvent, 0));  
    cudaEventSynchronize(stopEvent);

    float milliseconds = 0;
    cutilSafeCall( cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
    double interactionsPerSecond = 0;
    double gflops = 0;
    computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
    
    printf("%d bodies, total time for %d iterations: %0.3f ms\n", 
           numBodies, iterations, milliseconds);
    printf("= %0.3f billion interactions per second\n", interactionsPerSecond);
    printf("= %0.3f GFLOP/s at %d flops per interaction\n", gflops, 20);   
}

开发者ID:AnkurAnandapu，项目名称:ocelot-fork，代码行数:26，代码来源:nbody.cpp

示例3: cudaEventRecord

//-----------------------------------------------------------------------------
void CUDA::Timer::Stop ()
{
    cudaEventRecord(mStop, 0);
    cudaEventSynchronize(mStop);
    cudaEventElapsedTime(&mTime, mStart, mStop);
    mState = CT_STOPPED;
}

开发者ID:segfault11，项目名称:TwoScale2D，代码行数:8，代码来源:cuda.cpp

示例4: record_async_times

/* Assumes that all recorded events have completed */
static pb_Timestamp record_async_times(struct pb_TimerSet* tset)
{
  struct pb_async_time_marker_list * next_interval = NULL;
  struct pb_async_time_marker_list * last_marker = get_last_async(tset);
  pb_Timestamp total_async_time = 0;
  enum pb_TimerID timer;
  for(next_interval = tset->async_markers; next_interval != last_marker; 
      next_interval = next_interval->next) {
    float interval_time_ms;
    cudaEventElapsedTime(&interval_time_ms, *((cudaEvent_t *)next_interval->marker), 
                                         *((cudaEvent_t *)next_interval->next->marker));
    pb_Timestamp interval = (pb_Timestamp) (interval_time_ms * 1e3);
    tset->timers[next_interval->timerID].elapsed += interval;
    if (next_interval->label != NULL) {
      struct pb_SubTimer *subtimer = tset->sub_timer_list[next_interval->timerID]->subtimer_list;
      while (subtimer != NULL) {
        if ( strcmp(subtimer->label, next_interval->label) == 0) {
          subtimer->timer.elapsed += interval;
          break;
        }
        subtimer = subtimer->next;
      }      
    }        
    total_async_time += interval;
    next_interval->timerID = INVALID_TIMERID;
  }

  if(next_interval != NULL)
    next_interval->timerID = INVALID_TIMERID;
    

  
  return total_async_time;
}

开发者ID:anshumang，项目名称:lammps-analytics，代码行数:35，代码来源:parboil_cuda.c

示例5: LOG

float Timer::MicroSeconds() {
  if (!has_run_at_least_once()) {
    LOG(WARNING)<< "Timer has never been run before reading time.";
    return 0;
  }
  if (running()) {
    Stop();
  }
  if (Caffe::mode() == Caffe::GPU
      && Caffe::GetDefaultDevice()->backend() == BACKEND_CUDA) {
#ifndef CPU_ONLY
#ifdef USE_CUDA
    CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
            stop_gpu_));
    // Cuda only measure milliseconds
    elapsed_microseconds_ = elapsed_milliseconds_ * 1000;
#endif  // USE_CUDA
#else
    NO_GPU;
#endif
  } else {
    elapsed_microseconds_ = (stop_cpu_ - start_cpu_).total_microseconds();
  }
  return elapsed_microseconds_;
}

开发者ID:strin，项目名称:caffe-opencl，代码行数:25，代码来源:benchmark.cpp

示例6: GetTimeMillis

unsigned int GetTimeMillis () {
  float elapsedTime;
  cudaEventRecord(timerStop,0);
  cudaEventSynchronize(timerStop);
  cudaEventElapsedTime(&elapsedTime, timerStart, timerStop);
  return (unsigned int)(elapsedTime);
}

开发者ID:smallGum，项目名称:gpuocelot，代码行数:7，代码来源:tictoc.c

示例7: time_invocation_cuda

  double time_invocation_cuda(std::size_t num_trials, Function f, Arg1 arg1, Arg2 arg2, Arg3 arg3)
{
  cudaEvent_t start, stop;
  cudaEventCreate(&start);
  cudaEventCreate(&stop);

  cudaEventRecord(start);
  for(std::size_t i = 0;
      i < num_trials;
      ++i)
  {
    f(arg1,arg2,arg3);
  }
  cudaEventRecord(stop);
  cudaThreadSynchronize();

  float msecs = 0;
  cudaEventElapsedTime(&msecs, start, stop);

  cudaEventDestroy(start);
  cudaEventDestroy(stop);

  // return mean msecs
  return msecs / num_trials;
}

开发者ID:egaburov，项目名称:bulk，代码行数:25，代码来源:time_invocation_cuda.hpp

示例8: main

int main()
{
	cudaEvent_t start;
	cudaEvent_t end;
	float duration;

	const float overestimateRate = 0.01f;
	const float errorRate = 0.01f;
	Tokenizer tokenizer( overestimateRate, errorRate );

	/************** Test counting string tokens *************/
	TextReader reader;

	cudaEventCreate( &start );
	cudaEventRecord( start, 0 );

	reader.Read();
	tokenizer.StartTokenizing( 
		reader.GetCharBuffer(), 
		reader.GetOffsetBuffer(), 
		reader.GetCharBufferSize(), 
		reader.GetOffsetBufferSize() );
	
	cudaEventCreate( &end );
	cudaEventRecord( end, 0 );
	cudaEventSynchronize( end );

	cudaEventElapsedTime( &duration, start, end );
	printf( "Time taken: %.3lf milliseconds\n", duration );

	tokenizer.GetFrequency( "a" );
}

开发者ID:YSZhuoyang，项目名称:CountMinParallel，代码行数:32，代码来源:Main.cpp

示例9: Elapsed

	float Elapsed()
	{
		float elapsed;
		cudaEventSynchronize(stop);
		cudaEventElapsedTime(&elapsed, start, stop);
		return elapsed;
	}

开发者ID:tmquan，项目名称:hetero，代码行数:7，代码来源:timer.hpp

示例10: runCuda

void runCuda()
{
	//////////////////////
	// Timing cuda call //
	//////////////////////
	float time;
	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);
	cudaEventRecord(start, 0);

	// Map OpenGL buffer object for writing from CUDA on a single GPU
	// No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer
	dptr=NULL;

	vbo = mesh->getVBO();
	vbosize = mesh->getVBOsize();

	nbo = mesh->getNBO();
	nbosize = mesh->getNBOsize();

#if RGBONLY == 1
	float newcbo[] = {0.0, 1.0, 0.0, 
					0.0, 0.0, 1.0, 
					1.0, 0.0, 0.0};
	cbo = newcbo;
	cbosize = 9;
#elif RGBONLY == 0
	vec3 defaultColor(0.5f, 0.5f, 0.5f);
	mesh->changeColor(defaultColor);
	cbo = mesh->getCBO();
	cbosize = mesh->getCBOsize();
#endif

	ibo = mesh->getIBO();
	ibosize = mesh->getIBOsize();

	cudaGLMapBufferObject((void**)&dptr, pbo);

	updateCamera();

	cudaRasterizeCore(cam, dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, lights, lightsize, alpha, beta, displayMode);
	cudaGLUnmapBufferObject(pbo);

	vbo = NULL;
	cbo = NULL;
	ibo = NULL;

	frame++;
	fpstracker++;

	//////////////////////
	// Timing cuda call //
	//////////////////////
	cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	cudaEventElapsedTime(&time, start, stop);
	printf("runCuda runtime: %3.1f ms \n", time);
}

开发者ID:mchen15，项目名称:Project4-Rasterizer，代码行数:59，代码来源:main.cpp

示例11: cudaEventSynchronize

float libcgt::cuda::Event::synchronizeAndGetMillisecondsElapsed()
{
	cudaEventSynchronize( m_stop );

	float ms;
	cudaEventElapsedTime( &ms, m_start, m_stop );
	return ms;
}

开发者ID:zxwglzi，项目名称:libcgt，代码行数:8，代码来源:Event.cpp

示例12: cudaEventRecord

//----------------------------------------------------------------------------//
double CUDAImpl::_StopTimer()
{
    cudaEventRecord(_stop, 0);
    cudaEventSynchronize(_stop);
    float time;
    cudaEventElapsedTime(&time, _start, _stop);
    return time;
}

开发者ID:karlssonper，项目名称:gpuip，代码行数:9，代码来源:cuda.cpp

示例13: cudaEventRecord

double CudaTimer::Split() {
	cudaEventRecord(end);
	cudaDeviceSynchronize();
	float t;
	cudaEventElapsedTime(&t, start, end);
	start.Swap(end);
	return (t / 1000.0);
}

开发者ID:BillOmg，项目名称:moderngpu，代码行数:8，代码来源:mgpucontext.cpp

示例14: contractTT

void contractTT(sTensorGPU *TT1, sTensorGPU *TT2, const int n, const int size)
{
	cublasHandle_t handle;
	cublasCreate(&handle);
	type result=0;

	sTensorGPU temp1 = emptyTensor(size*size,2);
	sTensorGPU temp2 = emptyTensor(size*size*2,3);
	cudaEvent_t start;
	cudaEventCreate(&start);
	cudaEvent_t stop;
	cudaEventCreate(&stop);

	//printf("Start contractTT\n");

	cudaEventRecord(start, NULL);
	int indA = TT1[0].size[0];
	int indB = TT2[0].size[0];

	sTensorCPU tt1start = copyToCPU(TT1[0]);
	sTensorCPU tt2start = copyToCPU(TT2[0]);
	sTensorCPU tt1end = copyToCPU(TT1[n - 1]);
	sTensorCPU tt2end = copyToCPU( TT2[n - 1]);


	for (int i = 0; i < indA; i++){
		TT1[0] = prepareTensorStart(tt1start, i);
		TT1[n - 1] = prepareTensorEnd(tt1end, i);
		for (int j = 0; j < indB; j++){
			TT2[0] = prepareTensorStart(tt2start, j);
			TT2[n - 1] = prepareTensorEnd(tt2end, j);
			contractTensor(handle, TT1[0], TT2[0], temp1);
			for (int i = 1; i < n; i++){
				contractTensor(handle, temp1, TT1[i], temp2);
				contractTensor(handle, temp2, TT2[i], temp1, 2);
			}
			type add = 0;
			cudaMemcpy(&add, temp1.deviceData, sizeof(type), cudaMemcpyDeviceToHost);
			//printf("%e ", add);
			result += add;
		}
	}
	cudaEventRecord(stop, NULL);
	cudaEventSynchronize(stop);
	
	float msecTotal = 0.0f;
	cudaEventElapsedTime(&msecTotal, start, stop);
	printf("Time: %.3fms\n", msecTotal);
	printf("Ops: %.0f\n", bops);
	double gigaFlops = (bops * 1.0e-9f) / (msecTotal / 1000.0f);
	printf("Perf= %.2f GFlop/s\n", gigaFlops);

	cublasDestroy(handle);
	cudaDeviceReset();

	printf("%.5e \n", result);
	exit(0);
}

开发者ID:thomas-hoer，项目名称:cuTT，代码行数:58，代码来源:bigSizeTensors.cpp

示例15: HANDLE_ERROR

float gpuNUFFT::GpuNUFFTOperator::stopTiming()
{
  float time;

  HANDLE_ERROR( cudaEventRecord(stop, 0) );
  HANDLE_ERROR( cudaEventSynchronize(stop) );
  HANDLE_ERROR( cudaEventElapsedTime(&time, start, stop) );
  return time;
}

开发者ID:davidssmith，项目名称:TRON，代码行数:9，代码来源:gpuNUFFT_operator.cpp

示例16: cudaEventRecord

NVENCSTATUS NVEncFilter::filter(FrameInfo *pInputFrame, FrameInfo **ppOutputFrames, int *pOutputFrameNum) {
    cudaError_t cudaerr = cudaSuccess;
    if (m_bCheckPerformance) {
        cudaerr = cudaEventRecord(*m_peFilterStart.get());
        if (cudaerr != cudaSuccess) {
            AddMessage(RGY_LOG_ERROR, _T("failed cudaEventRecord(m_peFilterStart): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
        }
    }

    if (pInputFrame == nullptr) {
        *pOutputFrameNum = 0;
        ppOutputFrames[0] = nullptr;
    }
    if (m_pParam
        && m_pParam->bOutOverwrite //上書きか?
        && pInputFrame != nullptr && pInputFrame->ptr != nullptr //入力が存在するか?
        && ppOutputFrames != nullptr && ppOutputFrames[0] == nullptr) { //出力先がセット可能か?
        ppOutputFrames[0] = pInputFrame;
        *pOutputFrameNum = 1;
    }
    const auto ret = run_filter(pInputFrame, ppOutputFrames, pOutputFrameNum);
    const int nOutFrame = *pOutputFrameNum;
    if (!m_pParam->bOutOverwrite && nOutFrame > 0) {
        if (m_nPathThrough & FILTER_PATHTHROUGH_TIMESTAMP) {
            if (nOutFrame != 1) {
                AddMessage(RGY_LOG_ERROR, _T("timestamp path through can only be applied to 1-in/1-out filter.\n"));
                return NV_ENC_ERR_INVALID_CALL;
            } else {
                ppOutputFrames[0]->timestamp = pInputFrame->timestamp;
                ppOutputFrames[0]->duration  = pInputFrame->duration;
            }
        }
        for (int i = 0; i < nOutFrame; i++) {
            if (m_nPathThrough & FILTER_PATHTHROUGH_FLAGS)     ppOutputFrames[i]->flags     = pInputFrame->flags;
            if (m_nPathThrough & FILTER_PATHTHROUGH_PICSTRUCT) ppOutputFrames[i]->picstruct = pInputFrame->picstruct;
        }
    }
    if (m_bCheckPerformance) {
        cudaerr = cudaEventRecord(*m_peFilterFin.get());
        if (cudaerr != cudaSuccess) {
            AddMessage(RGY_LOG_ERROR, _T("failed cudaEventRecord(m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
        }
        cudaerr = cudaEventSynchronize(*m_peFilterFin.get());
        if (cudaerr != cudaSuccess) {
            AddMessage(RGY_LOG_ERROR, _T("failed cudaEventSynchronize(m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
        }
        float time_ms = 0.0f;
        cudaerr = cudaEventElapsedTime(&time_ms, *m_peFilterStart.get(), *m_peFilterFin.get());
        if (cudaerr != cudaSuccess) {
            AddMessage(RGY_LOG_ERROR, _T("failed cudaEventElapsedTime(m_peFilterStart - m_peFilterFin): %s.\n"), char_to_tstring(cudaGetErrorString(cudaerr)).c_str());
        }
        m_dFilterTimeMs += time_ms;
        m_nFilterRunCount++;
    }
    return ret;
}

开发者ID:ming-hai，项目名称:NVEnc，代码行数:56，代码来源:NVEncFilter.cpp

示例17: sobel1

void sobel1(int *h_result, unsigned int *h_pic, int xsize, int ysize, int thresh)
{

	
	int *d_result;
	unsigned int *d_pic;
	
	 
	int resultSize = xsize * ysize  * 3 * sizeof(int);
	int picSize = xsize * ysize * sizeof(int);

	 
	cudaMalloc( (void**)&d_result, resultSize);
	if( !d_result) {
		exit(-1);
	}
	cudaMalloc( (void**)&d_pic, picSize);
	if( !d_pic) {
		exit(-1);
	}

	 
	cudaMemcpy(d_result, h_result, resultSize, cudaMemcpyHostToDevice);
	cudaMemcpy(d_pic, h_pic, picSize, cudaMemcpyHostToDevice);
	
	 
	
	dim3 threadsPerBlock(BLOCKSIZE, BLOCKSIZE);
	dim3 numBlocks(ceil((float)ysize/(float)threadsPerBlock.x), ceil((float)xsize/(float)threadsPerBlock.y));
	
	 
	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);
{	__set_CUDAConfig(numBlocks, threadsPerBlock ); 
          
	d_sobel1 (d_result, d_pic, xsize, ysize, thresh);}
          
	
	 
	cudaEventSynchronize(stop);
	float elapsedTime;
	cudaEventElapsedTime(&elapsedTime, start, stop);
	cudaEventDestroy(start);
	cudaEventDestroy(stop);

	 
	 
	cudaMemcpy(h_result, d_result, resultSize, cudaMemcpyDeviceToHost);
	cudaMemcpy(h_pic, d_pic, picSize, cudaMemcpyDeviceToHost);

	 
	cudaFree(d_result);
	cudaFree(d_pic);
	
}

开发者ID:drolfe00，项目名称:CUDAVerificationkernels，代码行数:56，代码来源:cudaSobel.cpp

示例18: dslashCUDA

// execute kernel
double dslashCUDA(int niter) {

  cudaEvent_t start, end;
  cudaEventCreate(&start);
  cudaEventCreate(&end);
  cudaEventRecord(start, 0);

  for (int i = 0; i < niter; i++) {
    switch (test_type) {
    case 0:
      if (transfer) {
	dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
      } else {
	//inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
	//inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
	//dslashQuda(cudaSpinorOut->V(), cudaSpinor->V(), &inv_param, parity);
	dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
      }
      break;
    case 1:
    case 2:
      if (transfer) {
	MatQuda(spinorOut->V(), spinor->V(), &inv_param);
      } else {
	dirac->M(*cudaSpinorOut, *cudaSpinor);
      }
      break;
    case 3:
    case 4:
      if (transfer) {
	MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
      } else {
	dirac->MdagM(*cudaSpinorOut, *cudaSpinor);
      }
      break;
    }
  }
    
  cudaEventRecord(end, 0);
  cudaEventSynchronize(end);
  float runTime;
  cudaEventElapsedTime(&runTime, start, end);
  cudaEventDestroy(start);
  cudaEventDestroy(end);

  double secs = runTime / 1000; //stopwatchReadSeconds();

  // check for errors
  cudaError_t stat = cudaGetLastError();
  if (stat != cudaSuccess)
    printfQuda("with ERROR: %s\n", cudaGetErrorString(stat));

  return secs;
}

开发者ID:kpetrov，项目名称:quda，代码行数:55，代码来源:dslash_test.cpp

示例19: stop_timing_cuda

void stop_timing_cuda(cudaEvent_t* start,cudaEvent_t* stop, char* info_str) {
  realw time;
  // stops events
  cudaEventRecord( *stop, 0 );
  cudaEventSynchronize( *stop );
  cudaEventElapsedTime( &time, *start, *stop );
  cudaEventDestroy( *start );
  cudaEventDestroy( *stop );
  // user output
  printf("%s: Execution Time = %f ms\n",info_str,time);
}

开发者ID:Kerilk，项目名称:specfem3d_globe，代码行数:11，代码来源:helper_functions_gpu.c

示例20: dslashCUDA

// execute kernel
double dslashCUDA() {

  printfQuda("Executing %d kernel loops...\n", loops);
  fflush(stdout);

  if (test_type < 2)
    dirac->Tune(*cudaSpinorOut, *cudaSpinor, *tmp);
  else
    dirac->Tune(cudaSpinorOut->Even(), cudaSpinor->Even(), *tmp);

  cudaEvent_t start, end;
  cudaEventCreate(&start);
  cudaEventRecord(start, 0);
  cudaEventSynchronize(start);

  for (int i = 0; i < loops; i++) {
    switch (test_type) {
    case 0:
      if (transfer) {
	dslashQuda(spinorOut->V(), spinor->V(), &inv_param, parity);
      } else {
	dirac->Dslash(*cudaSpinorOut, *cudaSpinor, parity);
      }
      break;
    case 1:
    case 2:
      if (transfer) {
	MatQuda(spinorOut->V(), spinor->V(), &inv_param);
      } else {
	dirac->M(*cudaSpinorOut, *cudaSpinor);
      }
      break;
    }
  }
    
  cudaEventCreate(&end);
  cudaEventRecord(end, 0);
  cudaEventSynchronize(end);
  float runTime;
  cudaEventElapsedTime(&runTime, start, end);
  cudaEventDestroy(start);
  cudaEventDestroy(end);

  double secs = runTime / 1000; //stopwatchReadSeconds();

  // check for errors
  cudaError_t stat = cudaGetLastError();
  if (stat != cudaSuccess)
    printf("with ERROR: %s\n", cudaGetErrorString(stat));

  printf("done.\n\n");

  return secs;
}

开发者ID:fwinter，项目名称:quda，代码行数:55，代码来源:domain_wall_dslash_test.cpp

注：本文中的cudaEventElapsedTime函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。