One problem is that the logical sequence of your program is incorrect. This line assigns a value to data
provided by the CUDA API:
err = cudaHostAlloc((void**)&data, data_file_size, cudaHostAllocMapped);
This line then overwrites that value, with a new one:
data = (uint32_t*) mmap(0, data_file_size, PROT_READ, MAP_PRIVATE, data_file, 0);
At that point, the value of data
is not recognized by the CUDA API as being a pinned memory space anymore, so when you call this:
err = cudaHostGetDevicePointer((void**)&dev_data, (void*)data, 0);
you get an error, because the value contained in data
is not recognized.
EDIT: (based on this question)
Apart from that issue, it seems that if you change the file handling from read-only, to read-write, then this process can be made to work (throws no runtime errors). Here's a complete code (which doesn't contain the above logical flaw) that demonstrates this (I have previously created a test.dat
file of size 566316 bytes):
$ cat t706.cu
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdint.h>
int main(void)
{
struct stat buf;
char *dev_data;
cudaDeviceProp cuda_prop;
cudaGetDeviceProperties(&cuda_prop, 0);
if (!cuda_prop.canMapHostMemory)
exit(EXIT_FAILURE);
cudaSetDeviceFlags(cudaDeviceMapHost);
int data_file = open("test.dat", O_RDWR);
int stat = fstat(data_file, &buf);
int data_file_size = buf.st_size;
printf("data_file_size = %d
", data_file_size);
char *data = (char *) mmap(0, data_file_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, data_file, 0);
if (data == MAP_FAILED) {
printf("mmap failure
");
exit(EXIT_FAILURE);}
cudaError_t err = cudaHostRegister(data, data_file_size, cudaHostRegisterDefault);
if (err != cudaSuccess) { //ERROR HERE.
printf("cudaHostRegister fail
");
exit(EXIT_FAILURE);}
err = cudaHostGetDevicePointer((void**)&dev_data, (void*)data, 0);
if (err == cudaErrorMemoryAllocation)
{
printf("cudaHostGetDevicePointer - Mem Alloc Err
");
exit(EXIT_FAILURE);
}
else if (err == cudaErrorInvalidValue)
{
printf("cudaHostGetDevicePointer - Invalid Val Err
");
exit(EXIT_FAILURE);
}
}
$ nvcc -arch=sm_30 -o t706 t706.cu
$ ./t706
data_file_size = 566316
$