本文整理汇总了Python中pycuda.driver.mem_get_info函数的典型用法代码示例。如果您正苦于以下问题:Python mem_get_info函数的具体用法?Python mem_get_info怎么用?Python mem_get_info使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mem_get_info函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: allocate
def allocate(self, size):
from traceback import extract_stack
stack = tuple(frm[2] for frm in extract_stack())
description = self.describe(stack, size)
histogram = {}
for bsize, descr in self.blocks.itervalues():
histogram[bsize, descr] = histogram.get((bsize, descr), 0) + 1
from pytools import common_prefix
cpfx = common_prefix(descr for bsize, descr in histogram)
print >> self.logfile, \
"\n Allocation of size %d occurring " \
"(mem: last_free:%d, free: %d, total:%d) (pool: held:%d, active:%d):" \
"\n at: %s" % (
(size, self.last_free)
+ cuda.mem_get_info()
+ (self.held_blocks, self.active_blocks,
description))
hist_items = sorted(list(histogram.iteritems()))
for (bsize, descr), count in hist_items:
print >> self.logfile, \
" %s (%d bytes): %dx" % (descr[len(cpfx):], bsize, count)
if self.interactive:
raw_input(" [Enter]")
result = DeviceMemoryPool.allocate(self, size)
self.blocks[result] = size, description
self.last_free, _ = cuda.mem_get_info()
return result
开发者ID:minrk,项目名称:PyCUDA,代码行数:33,代码来源:tools.py
示例2: test_memleak
def test_memleak():
log.info("test_memleak()")
from pycuda import driver
#use the first device for this test
start_free_memory = None
for i in range(100):
d = driver.Device(0)
context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
if start_free_memory is None:
start_free_memory, _ = driver.mem_get_info()
free_memory, total_memory = driver.mem_get_info()
log.info("%s%% free_memory: %s MB, total_memory: %s MB", str(i).rjust(3), free_memory/1024/1024, total_memory/1024/1024)
context.pop()
context.detach()
w = random.randint(16, 128)*8
h = random.randint(16, 128)*8
n = random.randint(2, 10)
test_encoder(encoder_module, options={}, dimensions=[(w, h)], n_images=n)
d = driver.Device(0)
context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
end_free_memory, _ = driver.mem_get_info()
context.pop()
context.detach()
log.info("memory lost: %s MB", (start_free_memory-end_free_memory)/1024/1024)
开发者ID:svn2github,项目名称:Xpra,代码行数:25,代码来源:test_nvenc.py
示例3: __init__
def __init__(self, init_data, n_generators):
self.ctx = curr_gpu.make_context()
self.module = pycuda.compiler.SourceModule(kernels_cuda_src, no_extern_c=True)
(free, total) = cuda.mem_get_info()
print(("Global memory occupancy:%f%% free" % (free * 100 / total)))
print(("Global free memory :%i Mo free" % (free / 10 ** 6)))
################################################################################################################
self.width_mat = np.int32(init_data.shape[0])
# self.gpu_init_data = ga.to_gpu(init_data)
self.gpu_init_data = cuda.mem_alloc(init_data.nbytes)
cuda.memcpy_htod(self.gpu_init_data, init_data)
self.cpu_new_data = np.zeros_like(init_data, dtype=np.float32)
print("size new data = ", self.cpu_new_data.nbytes / 10 ** 6)
(free, total) = cuda.mem_get_info()
print(("Global memory occupancy:%f%% free" % (free * 100 / total)))
print(("Global free memory :%i Mo free" % (free / 10 ** 6)))
self.gpu_new_data = cuda.mem_alloc(self.cpu_new_data.nbytes)
cuda.memcpy_htod(self.gpu_new_data, self.cpu_new_data)
# self.gpu_new_data = ga.to_gpu(self.cpu_new_data)
self.cpu_vect_sum = np.zeros((self.width_mat,), dtype=np.float32)
self.gpu_vect_sum = cuda.mem_alloc(self.cpu_vect_sum.nbytes)
cuda.memcpy_htod(self.gpu_vect_sum, self.cpu_vect_sum)
# self.gpu_vect_sum = ga.to_gpu(self.cpu_vect_sum)
################################################################################################################
self.init_rng = self.module.get_function("init_rng")
self.gen_rand_mat = self.module.get_function("gen_rand_mat")
self.sum_along_axis = self.module.get_function("sum_along_axis")
self.norm_along_axis = self.module.get_function("norm_along_axis")
self.init_vect_sum = self.module.get_function("init_vect_sum")
self.copy_mat = self.module.get_function("copy_mat")
################################################################################################################
self.n_generators = n_generators
seed = 1
self.rng_states = cuda.mem_alloc(
n_generators
* characterize.sizeof("curandStateXORWOW", "#include <curand_kernel.h>")
)
self.init_rng(
np.int32(n_generators),
self.rng_states,
np.uint64(seed),
np.uint64(0),
block=(64, 1, 1),
grid=(n_generators // 64 + 1, 1),
)
(free, total) = cuda.mem_get_info()
size_block_x = 32
size_block_y = 32
n_blocks_x = int(self.width_mat) // (size_block_x) + 1
n_blocks_y = int(self.width_mat) // (size_block_y) + 1
self.grid = (n_blocks_x, n_blocks_y, 1)
self.block = (size_block_x, size_block_y, 1)
开发者ID:koszullab,项目名称:centroID,代码行数:59,代码来源:cuda_lib.py
示例4: swap_out_to_CPU
def swap_out_to_CPU(elem):
# prepare variables
return_falg = True
u, ss, sp = elem
dp = data_list[u][ss][sp]
bytes = dp.data_bytes
# now we will swap out, this data to CPU
# so first we should check CPU has enough free memory
MemFree = cpu_mem_check()
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "CPU MEM CEHCK Before swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(MemFree),'-',print_bytes(bytes))
log(log_str,'memory',log_type)
if bytes > MemFree:
# not enough memory for swap out to CPU
return False
# we have enough memory so we can swap out
# if other process not malloc during this swap out oeprataion
try:
buf = numpy.empty((dp.data_memory_shape), dtype= dp.data_contents_memory_dtype)
except:
# we failed memory allocation in the CPU
return False
# do the swap out
#cuda.memcpy_dtoh_async(buf, dp.devptr, stream=stream[1])
cuda.memcpy_dtoh(buf, dp.devptr)
ctx.synchronize()
dp.devptr.free()
dp.devptr = None
dp.data = buf
dp.data_dtype = numpy.ndarray
dp.memory_type = 'memory'
gpu_list.remove(elem)
cpu_list.append(elem)
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "GPU MEM CEHCK After swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(fm),print_bytes(tm),print_bytes(bytes))
log(log_str,'memory',log_type)
return True
开发者ID:Anukura,项目名称:Vivaldi,代码行数:54,代码来源:GPU_unit.py
示例5: show_GPU_mem
def show_GPU_mem():
import pycuda.driver as cuda
mem_free = float(cuda.mem_get_info()[0])
mem_free_per = mem_free/float(cuda.mem_get_info()[1])
mem_used = float(cuda.mem_get_info()[1] - cuda.mem_get_info()[0])
mem_used_per = mem_used/float(cuda.mem_get_info()[1])
print '\nGPU memory available {0} Mbytes, {1} % of total \n'.format(
mem_free/1024**2, 100*mem_free_per)
print 'GPU memory used {0} Mbytes, {1} % of total \n'.format(
mem_used/1024**2, 100*mem_used_per)
开发者ID:jtksai,项目名称:PyCOOL,代码行数:13,代码来源:misc_functions.py
示例6: swap_out_to_hard_disk
def swap_out_to_hard_disk(elem):
# prepare variables
return_falg = True
u, ss, sp = elem
dp = data_list[u][ss][sp]
bytes = dp.data_bytes
# now we will swap out, this CPU to hard disk
# so first we should check hard disk has enough free memory
file_name = '%d_temp'%(rank)
os.system('df . > %s'%(file_name))
f = open(file_name)
s = f.read()
f.close()
ss = s.split()
# get available byte
avail = int(ss[10])
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "HARD disk MEM CEHCK Before swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(avail),'-',print_bytes(bytes))
log(log_str,'memory',log_type)
if bytes > avail:
# we failed make swap file in hard disk
return False
# now we have enough hard disk to make swap file
# temp file name, "temp_data, rank, u, ss, sp"
file_name = 'temp_data, %s, %s, %s, %s'%(rank, u, ss, sp)
f = open(file_name,'wb')
f.write(dp.data)
f.close()
dp.data = None
dp.hard_disk = file_name
dp.memory_type = 'hard_disk'
cpu_list.remove(elem)
hard_list.append(elem)
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "CPU MEM CEHCK After swap out: %s Free, %s Maximum, %s Want to use"%(print_bytes(fm),print_bytes(tm),print_bytes(bytes))
log(log_str,'memory',log_type)
return True
开发者ID:Anukura,项目名称:Vivaldi,代码行数:50,代码来源:GPU_unit.py
示例7: run
def run(self):
drv.init()
a0=numpy.zeros((p,),dtype=numpy.complex64)
self.dev = drv.Device(self.number)
self.ctx = self.dev.make_context()
#TO VERIFY WHETHER ALL THE MEMORY IS FREED BEFORE NEXT ALLOCATION (THIS DOES NOT HAPPEN IN MULTITHREADING)
print drv.mem_get_info()
self.gpu_a = garray.empty((self.input_cpu.size,), dtype=numpy.complex64)
self.gpu_b = garray.zeros_like(self.gpu_a)
self.gpu_a = garray.to_gpu(self.input_cpu)
plan = Plan(a0.shape,context=self.ctx)
plan.execute(self.gpu_a, self.gpu_b, batch=p/m)
self.temp = self.gpu_b.get()
print output_cpu._closed
self.output_cpu.put(self.temp)
开发者ID:bbkiwi,项目名称:SpyderWork,代码行数:15,代码来源:2DFFTNoMulti.py
示例8: init_module
def init_module():
global context, context_wrapper
if context_wrapper is not None:
return
log_sys_info()
device_id, device = select_device()
context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
debug("testing with context=%s", context)
debug("api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
debug("using device %s", device_info(device))
debug("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
context_wrapper = CudaContextWrapper(context)
#generate kernel sources:
for rgb_format, yuv_formats in COLORSPACES_MAP.items():
m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats)
KERNELS_MAP.update(m)
_kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()]))
log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_))
#now, pre-compile the kernels:
for src_format, dst_format in KERNELS_MAP.keys():
get_CUDA_kernel(device_id, src_format, dst_format)
context.pop()
开发者ID:svn2github,项目名称:Xpra,代码行数:25,代码来源:colorspace_converter.py
示例9: filter
def filter(self, video_input):
"""
Performs RF filtering on input video
for all the rfs
"""
if len(video_input.shape) == 2:
# if input has 2 dimensions
assert video_input.shape[1] == self.size
else:
# if input has 3 dimensions
assert (video_input.shape[1]*video_input.shape[2] ==
self.size)
# rasterizing inputs
video_input.resize((video_input.shape[0], self.size))
d_video = parray.to_gpu(video_input)
d_output = parray.empty((self.num_neurons, video_input.shape[0]),
self.dtype)
free, total = cuda.mem_get_info()
self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize)
* 3 // 4 // self.size)
self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2
self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons)
handle = la.cublashandle()
for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS):
Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
self.generate_filters(startbias=i, N_filters=Nfilters)
la.dot(self.filters, d_video, opb='t',
C=d_output[i: i+Nfilters],
handle=handle)
del self.filters
return d_output.T()
开发者ID:neurokernel,项目名称:retina,代码行数:33,代码来源:vrf.py
示例10: filter
def filter(self, V):
"""
Filter a video V
Must set up parameters of CS RF first
Parameters
----------
V : 3D ndarray, with shape (num_frames, Px, Py)
Returns
-------
the filtered output by the gabor filters specified in self
output is a PitchArray with shape (num_neurons, num_frames),
jth row of which is the output of jth gabor filter
"""
d_output = parray.empty((self.num_neurons, V.shape[0]), self.dtype)
d_video = parray.to_gpu(V.reshape(V.shape[0], V.shape[1]*V.shape[2]))
free,total = cuda.mem_get_info()
self.ONE_TIME_FILTERS = (free / self.dtype.itemsize) * 3/4 / self.Pxall / self.Pyall
handle = la.cublashandle()
for i in np.arange(0,self.num_neurons,self.ONE_TIME_FILTERS):
Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
self.generate_visual_receptive_fields(startbias = i, N_filters = Nfilters)
cublasDgemm(handle.handle, 't','n', V.shape[0], int(Nfilters), self.Pxall*self.Pyall, self.dx*self.dy, d_video.gpudata, d_video.ld, self.filters.gpudata, self.filters.ld, 0, int(int(d_output.gpudata)+int(d_output.ld*i*d_output.dtype.itemsize)) , d_output.ld)
return d_output.T()
开发者ID:bionet,项目名称:vtem,代码行数:28,代码来源:vrf.py
示例11: init_cuda
def init_cuda():
"""Initialize CUDA functionality
This function attempts to load the necessary interfaces
(hardware connectivity) to run CUDA-based filtering. This
function should only need to be run once per session.
If the config var (set via mne.set_config or in ENV)
MNE_USE_CUDA == 'true', this function will be executed when
importing mne. If this variable is not set, this function can
be manually executed.
"""
global cuda_capable
global cuda_multiply_inplace_c128
global cuda_halve_c128
global cuda_real_c128
if cuda_capable is True:
logger.info("CUDA previously enabled, currently %s available memory" % sizeof_fmt(mem_get_info()[0]))
return
# Triage possible errors for informative messaging
cuda_capable = False
try:
import pycuda.gpuarray
import pycuda.driver
except ImportError:
logger.warning("module pycuda not found, CUDA not enabled")
return
try:
# Initialize CUDA; happens with importing autoinit
import pycuda.autoinit # noqa, analysis:ignore
except ImportError:
logger.warning("pycuda.autoinit could not be imported, likely " "a hardware error, CUDA not enabled")
return
# Make sure scikits.cuda is installed
try:
from scikits.cuda import fft as cudafft
except ImportError:
logger.warning("module scikits.cuda not found, CUDA not " "enabled")
return
# Make our multiply inplace kernel
from pycuda.elementwise import ElementwiseKernel
# let's construct our own CUDA multiply in-place function
cuda_multiply_inplace_c128 = ElementwiseKernel(
"pycuda::complex<double> *a, pycuda::complex<double> *b", "b[i] *= a[i]", "multiply_inplace"
)
cuda_halve_c128 = ElementwiseKernel("pycuda::complex<double> *a", "a[i] /= 2.0", "halve_value")
cuda_real_c128 = ElementwiseKernel("pycuda::complex<double> *a", "a[i] = real(a[i])", "real_value")
# Make sure we can use 64-bit FFTs
try:
cudafft.Plan(16, np.float64, np.complex128) # will get auto-GC'ed
except:
logger.warning("Device does not support 64-bit FFTs, " "CUDA not enabled")
return
cuda_capable = True
# Figure out limit for CUDA FFT calculations
logger.info("Enabling CUDA with %s available memory" % sizeof_fmt(mem_get_info()[0]))
开发者ID:TanayGahlot,项目名称:mne-python,代码行数:59,代码来源:cuda.py
示例12: is_memory_enough
def is_memory_enough(a):
try:
rest, total = driver.mem_get_info()
except driver.LogicError: # child thread cannot use context from the main thread...
# the following does not work yet
from pycuda import tools
import skcuda
driver.init()
context = tools.make_default_context() # try to make as new context, but cannot deactivate the old context stack
device = context.get_device()
skcuda.misc.init_context(device)
rest, total = driver.mem_get_info()
if (sys.getsizeof(a) * 2) < rest:
return True
开发者ID:macronucleus,项目名称:Chromagnon,代码行数:17,代码来源:fftgpu.py
示例13: is_gpu_memory_enough
def is_gpu_memory_enough(self, a):
if CUDA:
rest, total = driver.mem_get_info()
if (sys.getsizeof(a) * 2) < rest:
return True
else:
return True
开发者ID:macronucleus,项目名称:Chromagnon,代码行数:8,代码来源:fftmanager.py
示例14: init_all_devices
def init_all_devices():
global DEVICES, DEVICE_INFO
if DEVICES is not None:
return DEVICES
log.info("CUDA initialization (this may take a few seconds)")
driver.init()
DEVICES = []
DEVICE_INFO = {}
log("CUDA driver version=%s", driver.get_driver_version())
ngpus = driver.Device.count()
if ngpus==0:
log.info("CUDA %s / PyCUDA %s, no devices found", ".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT)
return DEVICES
da = driver.device_attribute
cf = driver.ctx_flags
for i in range(ngpus):
device = None
context = None
devinfo = "gpu %i" % i
try:
device = driver.Device(i)
devinfo = device_info(device)
log(" + testing device %s: %s", i, devinfo)
DEVICE_INFO[i] = devinfo
host_mem = device.get_attribute(da.CAN_MAP_HOST_MEMORY)
if not host_mem:
log.warn("skipping device %s (cannot map host memory)", devinfo)
continue
context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
try:
log(" created context=%s", context)
log(" api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
log(" memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
log(" multi-processors: %s, clock rate: %s", device.get_attribute(da.MULTIPROCESSOR_COUNT), device.get_attribute(da.CLOCK_RATE))
log(" max block sizes: (%s, %s, %s)", device.get_attribute(da.MAX_BLOCK_DIM_X), device.get_attribute(da.MAX_BLOCK_DIM_Y), device.get_attribute(da.MAX_BLOCK_DIM_Z))
log(" max grid sizes: (%s, %s, %s)", device.get_attribute(da.MAX_GRID_DIM_X), device.get_attribute(da.MAX_GRID_DIM_Y), device.get_attribute(da.MAX_GRID_DIM_Z))
max_width = device.get_attribute(da.MAXIMUM_TEXTURE2D_WIDTH)
max_height = device.get_attribute(da.MAXIMUM_TEXTURE2D_HEIGHT)
log(" maximum texture size: %sx%s", max_width, max_height)
log(" max pitch: %s", device.get_attribute(da.MAX_PITCH))
SMmajor, SMminor = device.compute_capability()
compute = (SMmajor<<4) + SMminor
log(" compute capability: %#x (%s.%s)", compute, SMmajor, SMminor)
if i==0:
#we print the list info "header" from inside the loop
#so that the log output is bunched up together
log.info("CUDA %s / PyCUDA %s, found %s device%s:",
".".join([str(x) for x in driver.get_version()]), pycuda.VERSION_TEXT, ngpus, engs(ngpus))
DEVICES.append(i)
log.info(" + %s (memory: %s%% free, compute: %s.%s)", device_info(device), 100*free/total, SMmajor, SMminor)
finally:
context.pop()
except Exception as e:
log.error("error on device %s: %s", devinfo, e)
return DEVICES
开发者ID:svn2github,项目名称:Xpra,代码行数:56,代码来源:cuda_context.py
示例15: select_device
def select_device(preferred_device_id=-1, preferred_device_name=None, min_compute=0):
if preferred_device_name is None:
preferred_device_name = get_pref("device-name")
if preferred_device_id<0:
device_id = get_pref("device-id")
if device_id>=0:
preferred_device_id = device_id
devices = init_all_devices()
global DEVICE_STATE
free_pct = 0
cf = driver.ctx_flags
#split device list according to device state:
ok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is True]
nok_devices = [device_id for device_id in devices if DEVICE_STATE.get(device_id, True) is not True]
for list_name, device_list in {"OK" : ok_devices, "failing" : nok_devices}.items():
selected_device_id = None
selected_device = None
log("will test %s device%s from %s list: %s", len(device_list), engs(device_list), list_name, device_list)
for device_id in device_list:
context = None
try:
device = driver.Device(device_id)
log("select_device: testing device %s: %s", device_id, device_info(device))
context = device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
log("created context=%s", context)
free, total = driver.mem_get_info()
log("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024))
tpct = 100*free/total
SMmajor, SMminor = device.compute_capability()
compute = (SMmajor<<4) + SMminor
if compute<min_compute:
log("ignoring device %s: compute capability %#x (minimum %#x required)", device_info(device), compute, min_compute)
elif device_id==preferred_device_id:
l = log
if len(device_list)>1:
l = log.info
l("device matches preferred device id %s: %s", preferred_device_id, device_info(device))
return device_id, device
elif preferred_device_name and device_info(device).find(preferred_device_name)>=0:
log("device matches preferred device name: %s", preferred_device_name)
return device_id, device
elif tpct>free_pct:
selected_device = device
selected_device_id = device_id
free_pct = tpct
finally:
if context:
context.pop()
context.detach()
if selected_device_id>=0 and selected_device:
l = log
if len(devices)>1:
l = log.info
l("selected device %s: %s", device_id, device_info(device))
return selected_device_id, selected_device
return -1, None
开发者ID:svn2github,项目名称:Xpra,代码行数:56,代码来源:cuda_context.py
示例16: ShowGPUInfo
def ShowGPUInfo():
(free,total) = driver.mem_get_info()
print('Global memory occupancy:%f%% free' % (free*100 / total))
for devicenum in range(driver.Device.count()):
device = driver.Device(devicenum)
attrs = device.get_attributes()
#Beyond this point is just pretty printing
print('\n===Attributes for device %d' % devicenum)
for (key,value) in attrs.iteritems():
print(' %s:%s' % (str(key), str(value)))
开发者ID:zweiein,项目名称:NeuralNetworkBasedLogisticRegression,代码行数:10,代码来源:nnet_LR_gpu.py
示例17: mem_check_and_malloc
def mem_check_and_malloc(bytes):
fm,tm = cuda.mem_get_info()
if log_type in ['memory']:
log_str = "RANK %d, GPU MEM CEHCK before malloc: %s Free, %s Maximum, %s Want to use"%(rank, print_bytes(fm),print_bytes(tm),print_bytes(bytes))
log(log_str,'memory',log_type)
# we have enough memory
if fm < bytes:
# we don't have enough memory, free data fool
print "BUFFER POOL"
size = fm
for elem in list(data_pool):
usage = elem['usage']
devptr = elem['devptr']
devptr.free()
print "FREE data", usage
size += usage
data_pool.remove(elem)
if size >= bytes: break
fm,tm = cuda.mem_get_info()
if fm >= bytes:
# we have enough memory, just malloc
afm,tm = cuda.mem_get_info()
devptr = cuda.mem_alloc(bytes)
bfm,tm = cuda.mem_get_info()
if log_type in ['memory']:
fm,tm = cuda.mem_get_info()
log_str = "RANK %d, GPU MALLOC AFTER: %s Free, %s Maximum, %s Want to use"%(rank, print_bytes(fm),print_bytes(tm),print_bytes(bytes))
log(log_str, 'memory', log_type)
return True, devptr
# we don't have enough memory
return False, None
开发者ID:davidhildebrand,项目名称:Vivaldi_public,代码行数:42,代码来源:GPU_unit.py
示例18: meminfo
def meminfo(self,kernel,k=-1,o=-1,threads=[],name=""):
(free,total)=cuda.mem_get_info()
shared=kernel.shared_size_bytes
regs=kernel.num_regs
local=kernel.local_size_bytes
const=kernel.const_size_bytes
mbpt=kernel.max_threads_per_block
devdata=ctools.DeviceData()
occupancy=ctools.OccupancyRecord(devdata,threads[0], shared_mem=shared,registers=regs)
util.log.info("%s(%03d,%d)=L:%d,S:%d,R:%d,C:%d,MT:%d,T:%d,OC:%f,Free:%d"%(name,k,o,local,shared,regs,const,mbpt,threads[0],occupancy.occupancy,(free*100)/total))
开发者ID:andrewbolster,项目名称:multiuserDSM,代码行数:11,代码来源:gpu.py
示例19: run
def run(self):
try:
#Initialise this device
self.local.dev = cuda.Device(self.device)
self.local.ctx = self.local.dev.make_context()
self.local.ctx.push()
(free,total)=cuda.mem_get_info()
util.log.info("Initialising CUDA device %d:(%.2f%% Free)"%(self.device,(free*100.0/total)))
except pycuda._driver.MemoryError:
util.log.info("Balls")
raise
return
#Initialise the kernel
self.local.kernels=SourceModule(self.r_kernels)
gridmax=65535
#Kernels
self.k_osbprepare=self.local.kernels.get_function("lk_osbprepare_permutations")
self.k_osbsolve=self.local.kernels.get_function("solve_permutations")
self.k_osblk=self.local.kernels.get_function("lk_max_permutations")
self.k_solve=self.local.kernels.get_function("solve")
self.k_isboptimise=self.local.kernels.get_function("isb_optimise_pk")
self.k_isboptimise_inc=self.local.kernels.get_function("isb_optimise_inc")
self.k_calcpsd=self.local.kernels.get_function("calc_psd")
self.k_osb_optimise_p=self.local.kernels.get_function("osb_optimise_p")
#loop to empty queue
while True:
#grab args from queue (block until recieved)
queueitem=self.argqueue.get()
func=queueitem[0]
args=queueitem[1:]
if func=='osb_optimise_p':
result=self.osb_optimise_p(*args)
self.resqueue.put((func,result))
elif func=='isb_optimise_p':
result=self.isb_optimise_p(*args)
self.resqueue.put((func,result))
elif func=='isb_optimise_inc':
result=self.isb_optimise_inc(*args)
self.resqueue.put((func,result))
elif func=='mipb_update_cost':
result=self.mipb_update_cost(*args)
self.resqueue.put((func,result))
elif func=='calc_psd':
result=self.calc_psd(*args)
self.resqueue.put((func,result))
else:
self.resqueue.put(None)
self.argqueue.task_done()#nothing seems to get past this
开发者ID:andrewbolster,项目名称:multiuserDSM,代码行数:54,代码来源:gpu.py
示例20: cuda_mem_check
def cuda_mem_check(device_dictionary,cache_size,arrays):
"""Function to check if there will be enought memory in the GPU
to perform the computation"""
module_logger.info('Checking if the system has enought memory on device.')
input_size=0
for array in arrays:
input_size = input_size + array.nbytes
cache_size_bytes = cache_size *4
free,total= driver.mem_get_info()
max_mem_size=512*1000
memory_limit=(total-input_size)/device_dictionary['MULTIPROCESSOR_COUNT']/device_dictionary[
'MAX_THREADS_PER_MULTIPROCESSOR']
limitator=min(max_mem_size,memory_limit)
if cache_size_bytes >= limitator:
module_logger.error("Cache memory per thread ("+bytes2human(cache_size_bytes)+") is greater than memory "
"limitation per thread ("+bytes2human(limitator)+")")
exit()
elif input_size >= total:
module_logger.error("The arrays to transfer ("+bytes2human(input_size)+") is greater than global memory "
"limitations ("+bytes2human(total)+")")
exit()
else:
headers=("Cache size per thread","Maximum memory size per thread")
printdata=(bytes2human(cache_size_bytes),bytes2human(limitator))
stype('\n'+'Memory limitation status on device:')
stype (tabulate.tabulate(zip(headers,printdata), headers=['Variable Name', 'Value'],
tablefmt='rst')+'\n')
module_logger.ok('The system has enought memory to perform the calculation.')
module_logger.info('Using '+bytes2human(cache_size_bytes)+' out of '+bytes2human(limitator)+'.')
# module_logger.warning("Warning: The cuda kernel will use max capacity of graphics procesors, the screen could "
# "become unresponsible during the process.")
stype('\n'+bcolors.WARNING +"Warning: The cuda kernel will use max capacity of graphics procesors,"
+'\n the screen could become unresponsible during the process.'+ bcolors.ENDC+'\n')
开发者ID:pablogsal,项目名称:HADES,代码行数:52,代码来源:cuda_toolbox.py
注:本文中的pycuda.driver.mem_get_info函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论