• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python driver.mem_get_info函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pycuda.driver.mem_get_info函数的典型用法代码示例。如果您正苦于以下问题:Python mem_get_info函数的具体用法?Python mem_get_info怎么用?Python mem_get_info使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了mem_get_info函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: allocate

    def allocate(self, size):
        from traceback import extract_stack
        stack = tuple(frm[2] for frm in extract_stack())
        description = self.describe(stack, size)

        histogram = {}
        for bsize, descr in self.blocks.itervalues():
            histogram[bsize, descr] = histogram.get((bsize, descr), 0) + 1

        from pytools import common_prefix
        cpfx = common_prefix(descr for bsize, descr in histogram)

        print >> self.logfile, \
                "\n  Allocation of size %d occurring " \
                "(mem: last_free:%d, free: %d, total:%d) (pool: held:%d, active:%d):" \
                "\n      at: %s" % (
                (size, self.last_free)
                + cuda.mem_get_info()
                + (self.held_blocks, self.active_blocks,
                    description))

        hist_items = sorted(list(histogram.iteritems()))
        for (bsize, descr), count in hist_items:
            print >> self.logfile, \
                    "  %s (%d bytes): %dx" % (descr[len(cpfx):], bsize, count)

        if self.interactive:
            raw_input("  [Enter]")

        result = DeviceMemoryPool.allocate(self, size)
        self.blocks[result] = size, description
        self.last_free, _ = cuda.mem_get_info()
        return result
开发者ID:minrk,项目名称:PyCUDA,代码行数:33,代码来源:tools.py


示例2: test_memleak

def test_memleak():
    log.info("test_memleak()")
    from pycuda import driver
    #use the first device for this test
    start_free_memory = None
    for i in range(100):
        d = driver.Device(0)
        context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
        if start_free_memory is None:
            start_free_memory, _ = driver.mem_get_info()
        free_memory, total_memory = driver.mem_get_info()
        log.info("%s%% free_memory: %s MB, total_memory: %s MB", str(i).rjust(3), free_memory/1024/1024, total_memory/1024/1024)
        context.pop()
        context.detach()
        w = random.randint(16, 128)*8
        h = random.randint(16, 128)*8
        n = random.randint(2, 10)
        test_encoder(encoder_module, options={}, dimensions=[(w, h)], n_images=n)

    d = driver.Device(0)
    context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
    end_free_memory, _ = driver.mem_get_info()
    context.pop()
    context.detach()
    log.info("memory lost: %s MB", (start_free_memory-end_free_memory)/1024/1024)
开发者ID:svn2github,项目名称:Xpra,代码行数:25,代码来源:test_nvenc.py


示例3: __init__

    def __init__(self, init_data, n_generators):

        self.ctx = curr_gpu.make_context()
        self.module = pycuda.compiler.SourceModule(kernels_cuda_src, no_extern_c=True)
        (free, total) = cuda.mem_get_info()
        print(("Global memory occupancy:%f%% free" % (free * 100 / total)))
        print(("Global free memory :%i Mo free" % (free / 10 ** 6)))

        ################################################################################################################

        self.width_mat = np.int32(init_data.shape[0])
        #        self.gpu_init_data = ga.to_gpu(init_data)
        self.gpu_init_data = cuda.mem_alloc(init_data.nbytes)
        cuda.memcpy_htod(self.gpu_init_data, init_data)

        self.cpu_new_data = np.zeros_like(init_data, dtype=np.float32)
        print("size new data = ", self.cpu_new_data.nbytes / 10 ** 6)
        (free, total) = cuda.mem_get_info()
        print(("Global memory occupancy:%f%% free" % (free * 100 / total)))
        print(("Global free memory :%i Mo free" % (free / 10 ** 6)))

        self.gpu_new_data = cuda.mem_alloc(self.cpu_new_data.nbytes)
        cuda.memcpy_htod(self.gpu_new_data, self.cpu_new_data)
        #        self.gpu_new_data = ga.to_gpu(self.cpu_new_data)

        self.cpu_vect_sum = np.zeros((self.width_mat,), dtype=np.float32)
        self.gpu_vect_sum = cuda.mem_alloc(self.cpu_vect_sum.nbytes)
        cuda.memcpy_htod(self.gpu_vect_sum, self.cpu_vect_sum)
        #        self.gpu_vect_sum = ga.to_gpu(self.cpu_vect_sum)
        ################################################################################################################
        self.init_rng = self.module.get_function("init_rng")
        self.gen_rand_mat = self.module.get_function("gen_rand_mat")
        self.sum_along_axis = self.module.get_function("sum_along_axis")
        self.norm_along_axis = self.module.get_function("norm_along_axis")
        self.init_vect_sum = self.module.get_function("init_vect_sum")
        self.copy_mat = self.module.get_function("copy_mat")
        ################################################################################################################
        self.n_generators = n_generators
        seed = 1
        self.rng_states = cuda.mem_alloc(
            n_generators
            * characterize.sizeof("curandStateXORWOW", "#include <curand_kernel.h>")
        )
        self.init_rng(
            np.int32(n_generators),
            self.rng_states,
            np.uint64(seed),
            np.uint64(0),
            block=(64, 1, 1),
            grid=(n_generators // 64 + 1, 1),
        )
        (free, total) = cuda.mem_get_info()

        size_block_x = 32
        size_block_y = 32
        n_blocks_x = int(self.width_mat) // (size_block_x) + 1
        n_blocks_y = int(self.width_mat) // (size_block_y) + 1
        self.grid = (n_blocks_x, n_blocks_y, 1)
        self.block = (size_block_x, size_block_y, 1)
开发者ID:koszullab,项目名称:centroID,代码行数:59,代码来源:cuda_lib.py


示例4: swap_out_to_CPU

def swap_out_to_CPU(elem):
	# prepare variables
	return_falg = True
	u, ss, sp = elem
	dp = data_list[u][ss][sp]
	bytes = dp.data_bytes

	# now we will swap out, this data to CPU
	# so first we should check CPU has enough free memory

	MemFree = cpu_mem_check()

	if log_type in ['memory']:
		fm,tm = cuda.mem_get_info()
		log_str = "CPU MEM CEHCK Before swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(MemFree),'-',print_bytes(bytes))
		log(log_str,'memory',log_type)


	if bytes > MemFree:
		# not enough memory for swap out to CPU
		return False
	
	# we have enough memory so we can swap out
	# if other process not malloc during this swap out oeprataion

	try:
		buf = numpy.empty((dp.data_memory_shape), dtype= dp.data_contents_memory_dtype)
	except:
		# we failed memory allocation in the CPU
		return False

	# do the swap out
	#cuda.memcpy_dtoh_async(buf, dp.devptr, stream=stream[1])
	cuda.memcpy_dtoh(buf, dp.devptr)
	ctx.synchronize()

	dp.devptr.free()
	dp.devptr = None
	dp.data = buf
	dp.data_dtype = numpy.ndarray
	dp.memory_type = 'memory'


	gpu_list.remove(elem)
	cpu_list.append(elem)

	if log_type in ['memory']:
		fm,tm = cuda.mem_get_info()
		log_str = "GPU MEM CEHCK After swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(fm),print_bytes(tm),print_bytes(bytes))
		
		log(log_str,'memory',log_type)


	return True
开发者ID:Anukura,项目名称:Vivaldi,代码行数:54,代码来源:GPU_unit.py


示例5: show_GPU_mem

def show_GPU_mem():
    import pycuda.driver as cuda

    mem_free = float(cuda.mem_get_info()[0])
    mem_free_per = mem_free/float(cuda.mem_get_info()[1])
    mem_used = float(cuda.mem_get_info()[1] - cuda.mem_get_info()[0])
    mem_used_per = mem_used/float(cuda.mem_get_info()[1])
    
    print '\nGPU memory available {0} Mbytes, {1} % of total \n'.format(
    mem_free/1024**2, 100*mem_free_per)
    
    print 'GPU memory used {0} Mbytes, {1} % of total \n'.format(
    mem_used/1024**2, 100*mem_used_per)
开发者ID:jtksai,项目名称:PyCOOL,代码行数:13,代码来源:misc_functions.py


示例6: swap_out_to_hard_disk

def swap_out_to_hard_disk(elem):
	# prepare variables
	return_falg = True
	u, ss, sp = elem
	dp = data_list[u][ss][sp]
	bytes = dp.data_bytes

	# now we will swap out, this CPU to hard disk
	# so first we should check hard disk has enough free memory
	file_name = '%d_temp'%(rank)
	os.system('df . > %s'%(file_name))

	f = open(file_name)
	s = f.read()
	f.close()

	ss = s.split()

	# get available byte
	avail = int(ss[10])

	if log_type in ['memory']:
		fm,tm = cuda.mem_get_info()
		log_str = "HARD disk MEM CEHCK Before swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(avail),'-',print_bytes(bytes))
		log(log_str,'memory',log_type)

	if bytes > avail:
		# we failed make swap file in hard disk
		return False

	# now we have enough hard disk to make swap file
	# temp file name, "temp_data, rank, u, ss, sp"
	file_name = 'temp_data, %s, %s, %s, %s'%(rank, u, ss, sp)
	f = open(file_name,'wb')
	f.write(dp.data)
	f.close()

	dp.data = None
	dp.hard_disk = file_name
	dp.memory_type = 'hard_disk'

	cpu_list.remove(elem)
	hard_list.append(elem)
	
	if log_type in ['memory']:
		fm,tm = cuda.mem_get_info()
		log_str = "CPU MEM CEHCK After swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(fm),print_bytes(tm),print_bytes(bytes))
		log(log_str,'memory',log_type)

	return True
开发者ID:Anukura,项目名称:Vivaldi,代码行数:50,代码来源:GPU_unit.py


示例7: run

    def run(self):
        drv.init()
        a0=numpy.zeros((p,),dtype=numpy.complex64)
        self.dev = drv.Device(self.number)
        self.ctx = self.dev.make_context()
#TO VERIFY WHETHER ALL THE MEMORY IS FREED BEFORE NEXT ALLOCATION (THIS DOES NOT HAPPEN IN MULTITHREADING)
        print drv.mem_get_info() 
        self.gpu_a = garray.empty((self.input_cpu.size,), dtype=numpy.complex64)
        self.gpu_b = garray.zeros_like(self.gpu_a)
        self.gpu_a = garray.to_gpu(self.input_cpu)
        plan = Plan(a0.shape,context=self.ctx)
        plan.execute(self.gpu_a, self.gpu_b, batch=p/m)
        self.temp = self.gpu_b.get()
        print output_cpu._closed
        self.output_cpu.put(self.temp)
开发者ID:bbkiwi,项目名称:SpyderWork,代码行数:15,代码来源:2DFFTNoMulti.py


示例8: init_module

def init_module():
    global context, context_wrapper
    if context_wrapper is not None:
        return
    log_sys_info()
    device_id, device = select_device()
    context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
    debug("testing with context=%s", context)
    debug("api version=%s", context.get_api_version())
    free, total = driver.mem_get_info()
    debug("using device %s",  device_info(device))
    debug("memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
    context_wrapper = CudaContextWrapper(context)

    #generate kernel sources:
    for rgb_format, yuv_formats in COLORSPACES_MAP.items():
        m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
        KERNELS_MAP.update(m)
    _kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
    log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))

    #now, pre-compile the kernels:
    for src_format, dst_format in KERNELS_MAP.keys():
        get_CUDA_kernel(device_id, src_format, dst_format)
    context.pop()
开发者ID:svn2github,项目名称:Xpra,代码行数:25,代码来源:colorspace_converter.py


示例9: filter

    def filter(self, video_input):
        """
        Performs RF filtering on input video
        for all the rfs
        """
        if len(video_input.shape) == 2:
            # if input has 2 dimensions
            assert video_input.shape[1] == self.size
        else:
            # if input has 3 dimensions
            assert (video_input.shape[1]*video_input.shape[2] ==
                    self.size)
        # rasterizing inputs
        video_input.resize((video_input.shape[0], self.size))

        d_video = parray.to_gpu(video_input)
        d_output = parray.empty((self.num_neurons, video_input.shape[0]),
                                self.dtype)
        free, total = cuda.mem_get_info()
        self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize)
                                 * 3 // 4 // self.size)
        self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2
        self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons)
        handle = la.cublashandle()

        for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS):
            Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
            self.generate_filters(startbias=i, N_filters=Nfilters)
            la.dot(self.filters, d_video, opb='t',
                   C=d_output[i: i+Nfilters],
                   handle=handle)
        del self.filters
        return d_output.T()
开发者ID:neurokernel,项目名称:retina,代码行数:33,代码来源:vrf.py


示例10: filter

    def filter(self, V):
        """
        Filter a video V
        Must set up parameters of CS RF first
        
        Parameters
        ----------
        V : 3D ndarray, with shape (num_frames, Px, Py)
           
        Returns
        -------
        the filtered output by the gabor filters specified in self
        output is a PitchArray with shape (num_neurons, num_frames),
        jth row of which is the output of jth gabor filter

        """
        d_output = parray.empty((self.num_neurons, V.shape[0]), self.dtype)
        d_video = parray.to_gpu(V.reshape(V.shape[0], V.shape[1]*V.shape[2]))
    
        free,total = cuda.mem_get_info()
        self.ONE_TIME_FILTERS = (free / self.dtype.itemsize) * 3/4 / self.Pxall / self.Pyall
        
        handle = la.cublashandle()
        for i in np.arange(0,self.num_neurons,self.ONE_TIME_FILTERS):
            Nfilters =  min(self.ONE_TIME_FILTERS, self.num_neurons - i)
            self.generate_visual_receptive_fields(startbias = i, N_filters = Nfilters)
            cublasDgemm(handle.handle, 't','n', V.shape[0], int(Nfilters), self.Pxall*self.Pyall, self.dx*self.dy, d_video.gpudata, d_video.ld, self.filters.gpudata, self.filters.ld, 0, int(int(d_output.gpudata)+int(d_output.ld*i*d_output.dtype.itemsize)) , d_output.ld)
        return d_output.T()
开发者ID:bionet,项目名称:vtem,代码行数:28,代码来源:vrf.py


示例11: init_cuda

def init_cuda():
    """Initialize CUDA functionality

    This function attempts to load the necessary interfaces
    (hardware connectivity) to run CUDA-based filtering. This
    function should only need to be run once per session.

    If the config var (set via mne.set_config or in ENV)
    MNE_USE_CUDA == 'true', this function will be executed when
    importing mne. If this variable is not set, this function can
    be manually executed.
    """
    global cuda_capable
    global cuda_multiply_inplace_c128
    global cuda_halve_c128
    global cuda_real_c128
    if cuda_capable is True:
        logger.info("CUDA previously enabled, currently %s available memory" % sizeof_fmt(mem_get_info()[0]))
        return
    # Triage possible errors for informative messaging
    cuda_capable = False
    try:
        import pycuda.gpuarray
        import pycuda.driver
    except ImportError:
        logger.warning("module pycuda not found, CUDA not enabled")
        return
    try:
        # Initialize CUDA; happens with importing autoinit
        import pycuda.autoinit  # noqa, analysis:ignore
    except ImportError:
        logger.warning("pycuda.autoinit could not be imported, likely " "a hardware error, CUDA not enabled")
        return
    # Make sure scikits.cuda is installed
    try:
        from scikits.cuda import fft as cudafft
    except ImportError:
        logger.warning("module scikits.cuda not found, CUDA not " "enabled")
        return

    # Make our multiply inplace kernel
    from pycuda.elementwise import ElementwiseKernel

    # let's construct our own CUDA multiply in-place function
    cuda_multiply_inplace_c128 = ElementwiseKernel(
        "pycuda::complex<double> *a, pycuda::complex<double> *b", "b[i] *= a[i]", "multiply_inplace"
    )
    cuda_halve_c128 = ElementwiseKernel("pycuda::complex<double> *a", "a[i] /= 2.0", "halve_value")
    cuda_real_c128 = ElementwiseKernel("pycuda::complex<double> *a", "a[i] = real(a[i])", "real_value")

    # Make sure we can use 64-bit FFTs
    try:
        cudafft.Plan(16, np.float64, np.complex128)  # will get auto-GC'ed
    except:
        logger.warning("Device does not support 64-bit FFTs, " "CUDA not enabled")
        return
    cuda_capable = True
    # Figure out limit for CUDA FFT calculations
    logger.info("Enabling CUDA with %s available memory" % sizeof_fmt(mem_get_info()[0]))
开发者ID:TanayGahlot,项目名称:mne-python,代码行数:59,代码来源:cuda.py


示例12: is_memory_enough

    def is_memory_enough(a):
        try:
            rest, total = driver.mem_get_info()
        except driver.LogicError: # child thread cannot use context from the main thread...
            # the following does not work yet

            from pycuda import tools
            import skcuda
            
            driver.init()
            context = tools.make_default_context() # try to make as new context, but cannot deactivate the old context stack
            device = context.get_device()
            skcuda.misc.init_context(device)
            rest, total = driver.mem_get_info()
            
        if (sys.getsizeof(a) * 2) < rest:
            return True
开发者ID:macronucleus,项目名称:Chromagnon,代码行数:17,代码来源:fftgpu.py


示例13: is_gpu_memory_enough

 def is_gpu_memory_enough(self, a):
     if CUDA:
         rest, total = driver.mem_get_info()
         
         if (sys.getsizeof(a) * 2) < rest:
             return True
     else:
         return True
开发者ID:macronucleus,项目名称:Chromagnon,代码行数:8,代码来源:fftmanager.py


示例14: init_all_devices

def init_all_devices():
    global DEVICES, DEVICE_INFO
    if DEVICES is not None:
        return  DEVICES
    log.info("CUDA initialization (this may take a few seconds)")
    driver.init()
    DEVICES = []
    DEVICE_INFO = {}
    log("CUDA driver version=%s", driver.get_driver_version())
    ngpus = driver.Device.count()
    if ngpus==0:
        log.info("CUDA %s / PyCUDA %s, no devices found", ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT)
        return DEVICES
    da = driver.device_attribute
    cf = driver.ctx_flags
    for i in range(ngpus):
        device = None
        context = None
        devinfo = "gpu %i" % i
        try:
            device = driver.Device(i)
            devinfo = device_info(device)
            log(" + testing device %s: %s", i, devinfo)
            DEVICE_INFO[i] = devinfo
            host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
            if not host_mem:
                log.warn("skipping device %s (cannot map host memory)", devinfo)
                continue
            context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
            try:
                log("   created context=%s", context)
                log("   api version=%s", context.get_api_version())
                free, total = driver.mem_get_info()
                log("   memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
                log("   multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
                log("   max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
                log("   max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
                max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
                max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
                log("   maximum texture size: %sx%s", max_width, max_height)
                log("   max pitch: %s", device.get_attribute(da.MAX_PITCH))
                SMmajor, SMminor = device.compute_capability()
                compute = (SMmajor<<4) + SMminor
                log("   compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
                if i==0:
                    #we print the list info "header" from inside the loop
                    #so that the log output is bunched up together
                    log.info("CUDA %s / PyCUDA %s, found %s device%s:",
                             ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus))
                DEVICES.append(i)
                log.info("  + %s (memory: %s%% free, compute: %s.%s)", device_info(device), 100*free/total, SMmajor, SMminor)
            finally:
                context.pop()
        except Exception as e:
            log.error("error on device %s: %s", devinfo, e)
    return DEVICES
开发者ID:svn2github,项目名称:Xpra,代码行数:56,代码来源:cuda_context.py


示例15: select_device

def select_device(preferred_device_id=-1, preferred_device_name=None, min_compute=0):
    if preferred_device_name is None:
        preferred_device_name = get_pref("device-name")
    if preferred_device_id<0:
        device_id = get_pref("device-id")
        if device_id>=0:
            preferred_device_id = device_id
    devices = init_all_devices()
    global DEVICE_STATE
    free_pct = 0
    cf = driver.ctx_flags
    #split device list according to device state:
    ok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is True]
    nok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is not True]
    for list_name, device_list in {"OK" : ok_devices, "failing" : nok_devices}.items():
        selected_device_id = None
        selected_device = None
        log("will test %s device%s from %s list: %s", len(device_list), engs(device_list), list_name, device_list)
        for device_id in device_list:
            context = None
            try:
                device = driver.Device(device_id)
                log("select_device: testing device %s: %s", device_id, device_info(device))
                context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
                log("created context=%s", context)
                free, total = driver.mem_get_info()
                log("memory: free=%sMB, total=%sMB",  int(free/1024/1024), int(total/1024/1024))
                tpct = 100*free/total
                SMmajor, SMminor = device.compute_capability()
                compute = (SMmajor<<4) + SMminor
                if compute<min_compute:
                    log("ignoring device %s: compute capability %#x (minimum %#x required)", device_info(device), compute, min_compute)
                elif device_id==preferred_device_id:
                    l = log
                    if len(device_list)>1:
                        l = log.info
                    l("device matches preferred device id %s: %s", preferred_device_id, device_info(device))
                    return device_id, device
                elif preferred_device_name and device_info(device).find(preferred_device_name)>=0:
                    log("device matches preferred device name: %s", preferred_device_name)
                    return device_id, device
                elif tpct>free_pct:
                    selected_device = device
                    selected_device_id = device_id
                    free_pct = tpct
            finally:
                if context:
                    context.pop()
                    context.detach()
        if selected_device_id>=0 and selected_device:
            l = log
            if len(devices)>1:
                l = log.info
            l("selected device %s: %s", device_id, device_info(device))
            return selected_device_id, selected_device
    return -1, None
开发者ID:svn2github,项目名称:Xpra,代码行数:56,代码来源:cuda_context.py


示例16: ShowGPUInfo

def ShowGPUInfo():
    (free,total) = driver.mem_get_info()
    print('Global memory occupancy:%f%% free' % (free*100 / total))
    for devicenum in range(driver.Device.count()):
        device = driver.Device(devicenum)
        attrs = device.get_attributes()
        #Beyond this point is just pretty printing
        print('\n===Attributes for device %d' % devicenum)
        for (key,value) in attrs.iteritems():
            print('    %s:%s' % (str(key), str(value)))
开发者ID:zweiein,项目名称:NeuralNetworkBasedLogisticRegression,代码行数:10,代码来源:nnet_LR_gpu.py


示例17: mem_check_and_malloc

def mem_check_and_malloc(bytes):
	fm,tm = cuda.mem_get_info()

	if log_type in ['memory']:
		log_str = "RANK %d, GPU MEM CEHCK before malloc: %s Free, %s Maximum, %s Want to use"%(rank, print_bytes(fm),print_bytes(tm),print_bytes(bytes))
		log(log_str,'memory',log_type)
		

	# we have enough memory

	if fm < bytes:
		# we don't have enough memory, free data fool
		print "BUFFER POOL"
		size = fm
		for elem in list(data_pool):
			usage = elem['usage']
			devptr = elem['devptr']
			devptr.free()
			print "FREE data", usage
			size += usage
			data_pool.remove(elem)
			if size >= bytes: break
	
		fm,tm = cuda.mem_get_info()

	if fm >= bytes:
		# we have enough memory, just malloc
		afm,tm = cuda.mem_get_info()

		devptr = cuda.mem_alloc(bytes)
		
		bfm,tm = cuda.mem_get_info()

		if log_type in ['memory']:
			fm,tm = cuda.mem_get_info()
			log_str = "RANK %d, GPU MALLOC AFTER: %s Free, %s Maximum, %s Want to use"%(rank, print_bytes(fm),print_bytes(tm),print_bytes(bytes))
			log(log_str, 'memory', log_type)
		return True, devptr


	# we don't have enough memory
	return False, None
开发者ID:davidhildebrand,项目名称:Vivaldi_public,代码行数:42,代码来源:GPU_unit.py


示例18: meminfo

    def meminfo(self,kernel,k=-1,o=-1,threads=[],name=""):
        (free,total)=cuda.mem_get_info()
        shared=kernel.shared_size_bytes
        regs=kernel.num_regs
        local=kernel.local_size_bytes
        const=kernel.const_size_bytes
        mbpt=kernel.max_threads_per_block
        devdata=ctools.DeviceData()
        occupancy=ctools.OccupancyRecord(devdata,threads[0], shared_mem=shared,registers=regs)

        util.log.info("%s(%03d,%d)=L:%d,S:%d,R:%d,C:%d,MT:%d,T:%d,OC:%f,Free:%d"%(name,k,o,local,shared,regs,const,mbpt,threads[0],occupancy.occupancy,(free*100)/total))
开发者ID:andrewbolster,项目名称:multiuserDSM,代码行数:11,代码来源:gpu.py


示例19: run

    def run(self):
        try:
            #Initialise this device
            self.local.dev = cuda.Device(self.device)
            self.local.ctx = self.local.dev.make_context()
            self.local.ctx.push()
            (free,total)=cuda.mem_get_info()
            util.log.info("Initialising CUDA device %d:(%.2f%% Free)"%(self.device,(free*100.0/total)))
        except pycuda._driver.MemoryError:
            util.log.info("Balls")
            raise
            return
        
        #Initialise the kernel
        self.local.kernels=SourceModule(self.r_kernels)
                
        gridmax=65535
        
        #Kernels
        self.k_osbprepare=self.local.kernels.get_function("lk_osbprepare_permutations")
        self.k_osbsolve=self.local.kernels.get_function("solve_permutations")
        self.k_osblk=self.local.kernels.get_function("lk_max_permutations")
        self.k_solve=self.local.kernels.get_function("solve")        
        self.k_isboptimise=self.local.kernels.get_function("isb_optimise_pk")
        self.k_isboptimise_inc=self.local.kernels.get_function("isb_optimise_inc")
        self.k_calcpsd=self.local.kernels.get_function("calc_psd")
        self.k_osb_optimise_p=self.local.kernels.get_function("osb_optimise_p")

        #loop to empty queue
        while True:
            #grab args from queue (block until recieved)
            queueitem=self.argqueue.get()
            func=queueitem[0]
            args=queueitem[1:]
            
            if func=='osb_optimise_p':
                result=self.osb_optimise_p(*args)
                self.resqueue.put((func,result))
            elif func=='isb_optimise_p':
                result=self.isb_optimise_p(*args)
                self.resqueue.put((func,result))
            elif func=='isb_optimise_inc':
                result=self.isb_optimise_inc(*args)
                self.resqueue.put((func,result))
            elif func=='mipb_update_cost':
                result=self.mipb_update_cost(*args)
                self.resqueue.put((func,result))
            elif func=='calc_psd':
                result=self.calc_psd(*args)
                self.resqueue.put((func,result))
            else:
                self.resqueue.put(None)
            
            self.argqueue.task_done()#nothing seems to get past this
开发者ID:andrewbolster,项目名称:multiuserDSM,代码行数:54,代码来源:gpu.py


示例20: cuda_mem_check

def cuda_mem_check(device_dictionary,cache_size,arrays):
    """Function to check if there will be enought memory in the GPU
       to perform the computation"""

    module_logger.info('Checking if the system has enought memory on device.')

    input_size=0

    for array in arrays:
        input_size = input_size + array.nbytes

    cache_size_bytes = cache_size *4

    free,total= driver.mem_get_info()

    max_mem_size=512*1000

    memory_limit=(total-input_size)/device_dictionary['MULTIPROCESSOR_COUNT']/device_dictionary[
        'MAX_THREADS_PER_MULTIPROCESSOR']

    limitator=min(max_mem_size,memory_limit)

    if cache_size_bytes >= limitator:

        module_logger.error("Cache memory per thread ("+bytes2human(cache_size_bytes)+") is greater than memory "
                            "limitation per thread ("+bytes2human(limitator)+")")
        exit()


    elif input_size >= total:

        module_logger.error("The arrays to transfer ("+bytes2human(input_size)+") is greater than global memory "
                            "limitations ("+bytes2human(total)+")")
        exit()


    else:

        headers=("Cache size per thread","Maximum memory size per thread")
        printdata=(bytes2human(cache_size_bytes),bytes2human(limitator))
        stype('\n'+'Memory limitation status on device:')
        stype (tabulate.tabulate(zip(headers,printdata), headers=['Variable Name', 'Value'],
                                             tablefmt='rst')+'\n')

        module_logger.ok('The system has enought memory to perform the calculation.')
        module_logger.info('Using '+bytes2human(cache_size_bytes)+' out of '+bytes2human(limitator)+'.')

        # module_logger.warning("Warning: The cuda kernel will use max capacity of graphics procesors, the screen could "
        #                "become unresponsible during the process.")

        stype('\n'+bcolors.WARNING +"Warning: The cuda kernel will use max capacity of graphics procesors,"
                                    +'\n the screen could become unresponsible during the process.'+ bcolors.ENDC+'\n')
开发者ID:pablogsal,项目名称:HADES,代码行数:52,代码来源:cuda_toolbox.py



注:本文中的pycuda.driver.mem_get_info函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python driver.memcpy_dtod函数代码示例发布时间:2022-05-25
下一篇:
Python driver.mem_alloc函数代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap