本文整理汇总了Python中pycuda.driver.memcpy_dtod函数的典型用法代码示例。如果您正苦于以下问题:Python memcpy_dtod函数的具体用法?Python memcpy_dtod怎么用?Python memcpy_dtod使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了memcpy_dtod函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _read_external_input
def _read_external_input(self):
if not self.input_eof or self.frame_count<self.frames_in_buffer:
cuda.memcpy_dtod(int(int(self.synapse_state.gpudata) + \
self.total_synapses*self.synapse_state.dtype.itemsize), \
int(int(self.I_ext.gpudata) + self.frame_count*self.I_ext.ld*self.I_ext.dtype.itemsize), \
self.num_input * self.synapse_state.dtype.itemsize)
self.frame_count += 1
else:
self.logger.info('Input end of file reached. Subsequent behaviour is undefined.')
if self.frame_count >= self._one_time_import and not self.input_eof:
input_ld = self.input_h5file.root.array.shape[0]
if input_ld - self.file_pointer < self._one_time_import:
h_ext = self.input_h5file.root.array.read(self.file_pointer, input_ld)
else:
h_ext = self.input_h5file.root.array.read(self.file_pointer, self.file_pointer + self._one_time_import)
if h_ext.shape[0] == self.I_ext.shape[0]:
self.I_ext.set(h_ext)
self.file_pointer += self._one_time_import
self.frame_count = 0
else:
pad_shape = list(h_ext.shape)
self.frames_in_buffer = h_ext.shape[0]
pad_shape[0] = self._one_time_import - h_ext.shape[0]
h_ext = np.concatenate((h_ext, np.zeros(pad_shape)), axis=0)
self.I_ext.set(h_ext)
self.file_pointer = input_ld
if self.file_pointer == self.input_h5file.root.array.shape[0]:
self.input_eof = True
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:29,代码来源:LPU.py
示例2: _gpuarray_copy
def _gpuarray_copy(array):
if not array.flags.forc:
raise RuntimeError('only contiguous arrays may copied.')
new = GPUArray(array.shape, array.dtype, allocator=array.allocator)
drv.memcpy_dtod(new.gpudata, array.gpudata, array.nbytes)
return new
开发者ID:ALEXGUOQ,项目名称:chainer,代码行数:7,代码来源:cuda.py
示例3: copy
def copy(self):
if not self.flags.forc:
raise RuntimeError("only contiguous arrays may copied.")
new = GPUArray(self.shape, self.dtype)
drv.memcpy_dtod(new.gpudata,self.gpudata,self.nbytes)
return new
开发者ID:hannes-brt,项目名称:pycuda,代码行数:7,代码来源:gpuarray.py
示例4: _read_external_input
def _read_external_input(self):
# if eof not reached or there are frames in buffer not read
# copy the input from buffer to synapse state array
if not self.input_eof or self.frame_count < self.frames_in_buffer:
cuda.memcpy_dtod(
int(int(self.synapse_state.gpudata) + self.total_synapses * self.synapse_state.dtype.itemsize),
int(int(self.I_ext.gpudata) + self.frame_count * self.I_ext.ld * self.I_ext.dtype.itemsize),
self.num_input * self.synapse_state.dtype.itemsize,
)
self.frame_count += 1
else:
self.log_info("Input end of file reached. " "Subsequent behaviour is undefined.")
# if all buffer frames were read, read from file
if self.frame_count >= self._one_time_import and not self.input_eof:
input_ld = self.input_h5file.root.array.shape[0]
if input_ld - self.file_pointer < self._one_time_import:
h_ext = self.input_h5file.root.array.read(self.file_pointer, input_ld)
else:
h_ext = self.input_h5file.root.array.read(self.file_pointer, self.file_pointer + self._one_time_import)
if h_ext.shape[0] == self.I_ext.shape[0]:
self.I_ext.set(h_ext)
self.file_pointer += self._one_time_import
self.frame_count = 0
else:
pad_shape = list(h_ext.shape)
self.frames_in_buffer = h_ext.shape[0]
pad_shape[0] = self._one_time_import - h_ext.shape[0]
h_ext = np.concatenate((h_ext, np.zeros(pad_shape)), axis=0)
self.I_ext.set(h_ext)
self.file_pointer = input_ld
if self.file_pointer == self.input_h5file.root.array.shape[0]:
self.input_eof = True
开发者ID:yiyin,项目名称:neurokernel,代码行数:33,代码来源:LPU.py
示例5: swapHashTableValues
def swapHashTableValues(new_vals):
table_vals, table_vals_size = mod.get_global('table_values') # (device_ptr, size_in_bytes)
old_vals_gpu = cuda.mem_alloc(table_vals_size)
# old_vals_gpu = gpuarray.empty((table_vals_size,1), )
cuda.memcpy_dtod(old_vals_gpu, table_vals, table_vals_size)
cuda.memcpy_dtod(table_vals, new_vals.gpudata, table_vals_size)
return old_vals_gpu
开发者ID:AdrianLsk,项目名称:permutohedral_pycuda,代码行数:7,代码来源:filter_pycuda.py
示例6: cache_z
def cache_z(self, z):
x = np.require(z.real, dtype = np.double, requirements = ['A','W','O','C'])
y = np.require(z.imag, dtype = np.double, requirements = ['A','W','O','C'])
xd = gpuarray.to_gpu(x)
yd = gpuarray.to_gpu(y)
cuda.memcpy_dtod(self.xd, xd.ptr, xd.nbytes)
cuda.memcpy_dtod(self.yd, yd.ptr, yd.nbytes)
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:7,代码来源:riemanntheta_omegas.py
示例7: matvec
def matvec(self, v):
x = v.reshape((self.D, self.D))
self.xG.set(x)
#self.out2.set(self.xG)
#self.out2[:] = self.xG
cd.memcpy_dtod(self.out2.gpudata, self.xG.gpudata, self.xG.nbytes)
out = [self.out, self.out_p]
out2 = [self.out2, self.out2_p]
if self.left: #Multiplying from the left, but x is a col. vector, so use mat_dagger
for k in range(len(self.A1G)):
if self.use_batch:
eps_l_noop_batch(out2[1], self.A1G_p[k], self.A2G_p[k], out[0],
self.tmp_p, self.tmp2_p, self.tmp2, self.hdl)
else:
eps_l_noop_strm_dev(out2[0], self.A1G[k], self.A2G[k], out[0],
self.tmp, self.tmp2, self.ones, self.zeros,
self.streams, self.hdl)
out, out2 = out2, out
Ehx = out2[0]
if self.pseudo:
QEQhx = Ehx - self.lG * m.adot(self.r, x)
#res = QEQhx.mul_add(-sp.exp(-1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(-1.j * self.p),
QEQhx.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
else:
#res = Ehx.mul_add(-sp.exp(-1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(-1.j * self.p),
Ehx.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
else:
for k in range(len(self.A2G) - 1, -1, -1):
if self.use_batch:
eps_r_noop_batch(out2[1], self.A1G_p[k], self.A2G_p[k], out[0],
self.tmp_p, self.tmp2_p, self.tmp2, self.hdl)
else:
eps_r_noop_strm_dev(out2[0], self.A1G[k], self.A2G[k], out[0],
self.tmp, self.tmp2, self.ones, self.zeros,
self.streams, self.hdl)
out, out2 = out2, out
Ex = out2[0]
if self.pseudo:
QEQx = Ex - self.rG * m.adot(self.l, x)
#res = QEQx.mul_add(-sp.exp(1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(1.j * self.p),
QEQx.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
else:
#res = Ex.mul_add(-sp.exp(1.j * self.p), self.xG, 1)
cb.cublasZaxpy(self.hdl, self.D**2, -sp.exp(1.j * self.p),
Ex.gpudata, 1, self.xG.gpudata, 1)
res = self.xG
return res.get().ravel()
开发者ID:amilsted,项目名称:evoMPS,代码行数:59,代码来源:cuda_alternatives.py
示例8: set_data
def set_data(filenames, file_count,subb, config, count, cur, img_mean, gpu_data, gpu_data_remote, ctx, icomm,img_batch_empty):
load_time = time.time()
data=None
# aa = config['rank']+count/subb*size
# img_list = range(aa*config['file_batch_size'],(aa+1)*config['file_batch_size'],1)
#print rank, img_list
if config['data_source'] in ['hkl','both']:
data_hkl = hkl.load(str(filenames[file_count]))# c01b
data = data_hkl
if config['data_source'] in ['lmdb', 'both']:
data_lmdb = lmdb_load_cur(cur,config,img_batch_empty)
data = data_lmdb
if config['data_source']=='both':
if config['rank']==0: print (rank,(data_hkl-data_lmdb)[1,0:3,1,1].tolist())
load_time = time.time()-load_time #)*
sub_time = time.time() #(
data = data -img_mean
sub_time = time.time()-sub_time
crop_time = time.time() #(
for minibatch_index in range(subb):
count+=1
batch_data = data[:,:,:,minibatch_index*config['batch_size']:(minibatch_index+1)*batch_size]
if mode == 'train':
rand_arr = get_rand3d(config['random'], count+(rank+1)*n_files*(subb))
else:
rand_arr = np.float32([0.5, 0.5, 0])
batch_data = crop_and_mirror(batch_data, rand_arr, flag_batch=config['batch_crop_mirror'],cropsize=config['input_width'])
gpu_data[minibatch_index].set(batch_data)
crop_time = time.time() - crop_time #)
#print 'load_time: %f (load %f, sub %f, crop %f)' % (load_time+crop_time+sub_time, load_time,sub_time, crop_time)
# wait for computation on last file to finish
msg = icomm.recv(source=MPI.ANY_SOURCE,tag=35)
assert msg == "calc_finished"
for minibatch_index in range(subb):
# copy from preload area
drv.memcpy_dtod(gpu_data_remote[minibatch_index].ptr,
gpu_data[minibatch_index].ptr,
gpu_data[minibatch_index].dtype.itemsize *
gpu_data[minibatch_index].size
)
ctx.synchronize()
icomm.isend("copy_finished",dest=0,tag=55)
return count
开发者ID:hma02,项目名称:platoon,代码行数:59,代码来源:proc_load_mpi.py
示例9: copy
def copy(self):
"""
returns a duplicated copy of self
"""
result = self._new_like_me()
if self.size:
cuda.memcpy_dtod(result.gpudata, self.gpudata, self.mem_size * self.dtype.itemsize)
return result
开发者ID:bionet,项目名称:vtem,代码行数:9,代码来源:parray.py
示例10: _loadInput
def _loadInput(self, stim):
logging.debug('loadInput')
# shortcuts
nrXY = self.nrX * self.nrY
nrXYD = self.nrX * self.nrY * self.nrDirs
# parse input
assert type(stim).__module__ == "numpy", "stim must be numpy array"
assert type(stim).__name__ == "ndarray", "stim must be numpy.ndarray"
assert stim.size > 0, "stim cannot be []"
stim = stim.astype(np.ubyte)
rows, cols = stim.shape
logging.debug("- stim shape={0}x{1}".format(rows, cols))
# shift d_stimBuf in time by 1 frame, from frame i to frame i-1
# write our own memcpy kernel... :-(
gdim = (int(iDivUp(nrXY, 128)), 1)
bdim = (128, 1, 1)
for i in xrange(1, self.nrT):
stimBufPt_dst = np.intp(self.d_stimBuf) + self.szXY * (i - 1)
stimBufPt_src = np.intp(self.d_stimBuf) + self.szXY * i
self.dev_memcpy_dtod(
stimBufPt_dst,
stimBufPt_src,
np.int32(nrXY),
block=bdim, grid=gdim)
# index into d_stimBuf array to place the new stim at the end
# (newest frame at pos: nrT-1)
d_stimBufPt = np.intp(self.d_stimBuf) + self.szXY * (self.nrT-1)
# \TODO implement RGB support
self.dev_split_gray(
d_stimBufPt,
cuda.In(stim),
np.int32(stim.size),
block=bdim, grid=gdim)
# create working copy of d_stimBuf
cuda.memcpy_dtod(self.d_scalingStimBuf, self.d_stimBuf,
self.szXY*self.nrT)
# reset V1complex responses to 0
# \FIXME not sure how to use memset...doesn't seem to give expected
# result
tmp = np.zeros(nrXYD).astype(np.float32)
cuda.memcpy_htod(self.d_respV1c, tmp)
# allocate d_resp, which will contain the response to all 28
# (nrFilters) space-time orientations at 3 (nrScales) scales for
# every pixel location (nrX*nrY)
tmp = np.zeros(nrXY*self.nrFilters*self.nrScales).astype(np.float32)
cuda.memcpy_htod(self.d_resp, tmp)
开发者ID:UCI-CARL,项目名称:MotionEnergy,代码行数:55,代码来源:motionenergy.py
示例11: _update_buffer
def _update_buffer(self):
if self.my_num_gpot_neurons>0:
cuda.memcpy_dtod(int(self.buffer.gpot_buffer.gpudata) + \
self.buffer.gpot_current*self.buffer.gpot_buffer.ld* \
self.buffer.gpot_buffer.dtype.itemsize, self.V.gpudata, \
self.V.nbytes)
if self.my_num_spike_neurons>0:
cuda.memcpy_dtod(int(self.buffer.spike_buffer.gpudata) + \
self.buffer.spike_current*self.buffer.spike_buffer.ld* \
self.buffer.spike_buffer.dtype.itemsize, self.spike_state.gpudata,\
int(self.spike_state.dtype.itemsize*self.my_num_spike_neurons))
开发者ID:prabindh,项目名称:neurokernel,代码行数:11,代码来源:LPU.py
示例12: arrayp2g
def arrayp2g(pary):
"""convert a PitchArray to a GPUArray"""
from pycuda.gpuarray import GPUArray
result = GPUArray(pary.shape, pary.dtype)
if pary.size:
if pary.M == 1:
cuda.memcpy_dtod(result.gpudata, pary.gpudata, pary.mem_size * pary.dtype.itemsize)
else:
PitchTrans(pary.shape, result.gpudata, _pd(result.shape), pary.gpudata, pary.ld, pary.dtype)
return result
开发者ID:bionet,项目名称:vtem,代码行数:11,代码来源:parray.py
示例13: _set_state
def _set_state(self, k, v):
cls = type(self)
if k in self.params_dict:
cuda.memcpy_dtod(self.states[k].gpudata,
self.params_dict[k].gpudata,
self.params_dict[k].nbytes)
else:
if isinstance(v, float):
self.states[k].fill(self.floattype(v))
else:
assert(v in cls.states)
self.states[k].fill(self.floattype(cls.states[v]))
开发者ID:chungheng,项目名称:neurodriver,代码行数:12,代码来源:NDComponent.py
示例14: update
def update(self):
nn, ne, nne = np.int32([self.nn, self.ne, self.nne])
dt, de, vf = np.float64([self.dt, self.de, self.vf])
bs, gs = (256,1,1), (self.nn//256+1,1)
ul, ul_prev, ul_tmp = self.ul_gpu, self.ul_prev_gpu, self.ul_tmp_gpu
kl = self.kl_gpu
el_sum = self.el_sum_gpu
c_ul_tmps = np.float32([0, 0.5, 0.5, 1])
c_uls = np.float32([1./6, 1./3, 1./3, 1./6])
cuda.memcpy_dtod(ul_prev, ul, self.ul.nbytes)
for c_ul_tmp, c_ul in zip(c_ul_tmps, c_uls):
self.update_pre(nn, nne, vf, c_ul_tmp, ul, ul_prev, ul_tmp, kl, el_sum, block=bs, grid=gs)
self.update_ul(nn, ne, nne, dt, de, vf, c_ul, ul, ul_tmp, kl, el_sum, block=bs, grid=gs)
开发者ID:wbkifun,项目名称:my_research,代码行数:14,代码来源:dg_modal_gpu.py
示例15: stepFunction
def stepFunction():
global animIter
cuda.memcpy_dtod( plotDataFloat_d.ptr, concentrationOut_d.ptr, concentrationOut_d.nbytes )
maxVal = (gpuarray.max(plotDataFloat_d)).get()
multiplyByScalarReal( cudaPre(0.5/(maxVal)), plotDataFloat_d )
floatToUchar( plotDataFloat_d, plotDataChars_d)
copyToScreenArray()
if cudaP == "float": [ oneIteration_tex() for i in range(nIterationsPerPlot) ]
#else: [ oneIteration_sh() for i in range(nIterationsPerPlot//2) ]
if plotting and animIter%25 == 0:
maxVals.append( maxVal )
sumConc.append( gpuarray.sum(concentrationIn_d).get() )
plotData( maxVals, sumConc )
animIter += 1
开发者ID:bvillasen,项目名称:percolation,代码行数:14,代码来源:percolation3D.py
示例16: update_other_rest
def update_other_rest(self, gpot_data, my_num_gpot_neurons, num_virtual_gpot_neurons):
if self.num_gpot_neurons > 0:
d_other_rest = garray.zeros(num_virtual_gpot_neurons, np.double)
a = 0
for data in gpot_data.itervalues():
if len(data) > 0:
cuda.memcpy_htod(int(d_other_rest.gpudata) + a , data)
a += data.nbytes
for i in range(self.gpot_delay_steps):
cuda.memcpy_dtod( int(self.gpot_buffer.gpudata) + \
(self.gpot_buffer.ld * i + int(my_num_gpot_neurons)) * \
self.gpot_buffer.dtype.itemsize, d_other_rest.gpudata, \
d_other_rest.nbytes )
开发者ID:prabindh,项目名称:neurokernel,代码行数:14,代码来源:LPU.py
示例17: _get_external_input
def _get_external_input(self):
# use of intermediate I_ext can possibly be avoided
input_ext = self.input_generator.next_input()
if type(input_ext) == np.ndarray:
self.I_ext.set(input_ext)
cuda.memcpy_dtod(
int(int(self.synapse_state.gpudata) +
self.total_synapses*self.synapse_state.dtype.itemsize),
int(self.I_ext.gpudata),
self.num_input*self.synapse_state.dtype.itemsize)
else:
cuda.memcpy_dtod(
int(int(self.synapse_state.gpudata) +
self.total_synapses*self.synapse_state.dtype.itemsize),
int(input_ext.gpudata),
self.num_input*self.synapse_state.dtype.itemsize)
开发者ID:neurokernel,项目名称:lamina,代码行数:16,代码来源:LPU.py
示例18: stepFunction
def stepFunction():
global animIter
if showActivity:
cuda.memset_d8(activeBlocks_d.ptr, 0, nBlocks )
findActivityKernel( cudaPre(1.e-10), concentrationIn_d, activeBlocks_d, grid=grid2D, block=block2D )
getActivityKernel( activeBlocks_d, activeThreads_d, grid=grid2D, block=block2D )
cuda.memcpy_dtod( plotData_d.ptr, concentrationOut_d.ptr, concentrationOut_d.nbytes )
maxVal = gpuarray.max( plotData_d ).get()
scalePlotData(100./maxVal, plotData_d, np.uint8(showActivity), activeThreads_d )
if cudaP == "float": [ oneIteration_tex() for i in range(nIterationsPerPlot) ]
else: [ oneIteration_sh() for i in range(nIterationsPerPlot//2) ]
if plotting and animIter%25 == 0:
maxVals.append( maxVal )
sumConc.append( gpuarray.sum(concentrationIn_d).get() )
plotData( maxVals, sumConc )
animIter += 1
开发者ID:bvillasen,项目名称:percolation,代码行数:16,代码来源:percolation2D.py
示例19: _update_buffer
def _update_buffer(self):
"""
Update circular buffer of past neuron states.
"""
if self.total_num_gpot_neurons>0:
cuda.memcpy_dtod(int(self.buffer.gpot_buffer.gpudata) +
self.buffer.gpot_current*self.buffer.gpot_buffer.ld*
self.buffer.gpot_buffer.dtype.itemsize,
self.V.gpudata, self.V.dtype.itemsize*self.total_num_gpot_neurons)
if self.total_num_spike_neurons>0:
cuda.memcpy_dtod(int(self.buffer.spike_buffer.gpudata) +
self.buffer.spike_current*self.buffer.spike_buffer.ld*
self.buffer.spike_buffer.dtype.itemsize,
self.spike_state.gpudata,
int(self.spike_state.dtype.itemsize*self.total_num_spike_neurons))
开发者ID:neurokernel,项目名称:neurodriver,代码行数:16,代码来源:LPU.py
示例20: _assign
def _assign(self, value):
if isinstance(value, (int, float)):
# if we have a contiguous array, then use the speedy driver kernel
if self.is_contiguous:
value = self.dtype.type(value)
if self.dtype.itemsize == 1:
drv.memset_d8( self.gpudata,
unpack_from('B', value)[0],
self.size)
elif self.dtype.itemsize == 2:
drv.memset_d16(self.gpudata,
unpack_from('H', value)[0],
self.size)
else:
drv.memset_d32(self.gpudata,
unpack_from('I', value)[0],
self.size)
# otherwise use our copy kerel
else:
OpTreeNode.build("assign", self, value)
elif isinstance(value, GPUTensor):
# TODO: add an is_binary_compat like function
if self.is_contiguous and value.is_contiguous and self.dtype == value.dtype:
drv.memcpy_dtod(self.gpudata, value.gpudata, self.nbytes)
else:
OpTreeNode.build("assign", self, value)
# collapse and execute an op tree as a kernel
elif isinstance(value, OpTreeNode):
OpTreeNode.build("assign", self, value)
# assign to numpy array (same as set())
elif isinstance(value, np.ndarray):
self.set(value)
else:
raise TypeError("Invalid type for assignment: %s" % type(value))
return self
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:45,代码来源:nervanagpu.py
注:本文中的pycuda.driver.memcpy_dtod函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论