本文整理汇总了Python中pycuda.gpuarray.zeros_like函数的典型用法代码示例。如果您正苦于以下问题:Python zeros_like函数的具体用法?Python zeros_like怎么用?Python zeros_like使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zeros_like函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, name, input_shape, n_out, epsW=0.001, epsB=0.002, initW = 0.01, initB = 0.0, weight =
None, bias = None):
Layer.__init__(self, name, 'fc')
self.epsW = epsW
self.epsB = epsB
self.initW = initW
self.initB = initB
self.inputShape = input_shape
self.inputSize, self.batchSize = input_shape
self.outputSize = n_out
self.weightShape = (self.outputSize, self.inputSize)
if weight is None:
self.weight = gpuarray.to_gpu(np.random.randn(*self.weightShape) *
self.initW).astype(np.float32)
else:
self.weight = gpuarray.to_gpu(weight).astype(np.float32)
if bias is None:
self.bias = gpuarray.to_gpu(np.random.randn(self.outputSize, 1) *
self.initB).astype(np.float32)
else:
self.bias = gpuarray.to_gpu(bias).astype(np.float32)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
开发者ID:smessing,项目名称:striate,代码行数:27,代码来源:layer.py
示例2: add_cld
def add_cld(
self,
name,
proj_mats,
offset_mats,
cloud_xyz,
kernel,
scale_params,
r_traj,
r_traj_K,
l_traj,
l_traj_K,
update_ptrs=False,
):
"""
does the normal add, but also adds the trajectories
"""
# don't update ptrs there, do it after this
GPUContext.add_cld(self, name, proj_mats, offset_mats, cloud_xyz, kernel, scale_params, update_ptrs=False)
self.r_traj.append(gpu_pad(r_traj, (MAX_TRAJ_LEN, DATA_DIM)))
self.r_traj_K.append(gpu_pad(r_traj_K, (MAX_TRAJ_LEN, MAX_CLD_SIZE)))
self.l_traj.append(gpu_pad(l_traj, (MAX_TRAJ_LEN, DATA_DIM)))
self.l_traj_K.append(gpu_pad(l_traj_K, (MAX_TRAJ_LEN, MAX_CLD_SIZE)))
self.r_traj_w.append(gpuarray.zeros_like(self.r_traj[-1]))
self.l_traj_w.append(gpuarray.zeros_like(self.l_traj[-1]))
self.l_traj_dims.append(l_traj.shape[0])
self.r_traj_dims.append(r_traj.shape[0])
if update_ptrs:
self.update_ptrs()
开发者ID:rll,项目名称:lfd,代码行数:32,代码来源:batchtps.py
示例3: __init__
def __init__(self, bend_coefs, N, QN, NON, NR, x_nd, K_nn, rot_coef,
QN_gpu = None, WQN_gpu = None, NON_gpu = None, NHN_gpu = None):
for b in bend_coefs:
assert b in NON, 'no solver found for bending coefficient {}'.format(b)
self.rot_coef = rot_coef
self.n, self.d = x_nd.shape
self.bend_coefs = bend_coefs
self.N = N
self.QN = QN
self.NON = NON
self.NR = NR
self.x_nd = x_nd
self.K_nn = K_nn
## set up GPU memory
if QN_gpu is None:
self.QN_gpu = gpuarray.to_gpu(self.QN)
else:
self.QN_gpu = QN_gpu
if WQN_gpu is None:
self.WQN_gpu = gpuarray.zeros_like(self.QN_gpu)
else:
self.WQN_gpu = WQN_gpu
if NON_gpu is None:
self.NON_gpu = {}
for b in bend_coefs:
self.NON_gpu[b] = gpuarray.to_gpu(self.NON[b])
else:
self.NON_gpu = NON_gpu
if NHN_gpu is None:
self.NHN_gpu = gpuarray.zeros_like(self.NON_gpu[bend_coefs[0]])
else:
self.NHN_gpu = NHN_gpu
self.valid = True
开发者ID:antingshen,项目名称:lfd,代码行数:33,代码来源:transformations.py
示例4: _init_weights
def _init_weights(self, weight_shape, bias_shape):
if self.weight is None:
if self.name == 'noise':
assert(weight_shape[0] == weight_shape[1])
self.weight = gpuarray.to_gpu(np.eye(weight_shape[0], dtype = np.float32))
else:
self.weight = gpuarray.to_gpu(randn(weight_shape, np.float32) * self.initW)
if self.bias is None:
if self.initB > 0.0:
self.bias = gpuarray.to_gpu((np.ones(bias_shape, dtype=np.float32) * self.initB))
else:
self.bias = gpuarray.zeros(bias_shape, dtype=np.float32)
Assert.eq(self.weight.shape, weight_shape)
Assert.eq(self.bias.shape, bias_shape)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
if self.momW > 0.0:
if self.weightIncr is None:
self.weightIncr = gpuarray.zeros_like(self.weight)
if self.biasIncr is None:
self.biasIncr = gpuarray.zeros_like(self.bias)
Assert.eq(self.weightIncr.shape, weight_shape)
Assert.eq(self.biasIncr.shape, bias_shape)
开发者ID:tesatory,项目名称:fastnet-noisy,代码行数:28,代码来源:layer.py
示例5: rfftn
def rfftn(self):
# it seems that we can just take half of the original fft
# in both arr, arrC so that we match what was here originally
zeros = gpuarray.zeros_like(self.arr)
arr = gpuarray.zeros_like(self.arr)
arrC = gpuarray.zeros_like(self.arr)
self.plan.execute(self.arr, zeros, data_out_re=arr, data_out_im=arrC)
return CUDAArray(arr, arrC)
开发者ID:mattbierbaum,项目名称:cuda-plasticity,代码行数:8,代码来源:CUDAGridArray.py
示例6: same_reduce_multiview
def same_reduce_multiview(target, vec, num_view):
block = (target.size, 1, 1)
grid = (1, 1)
tmp = gpuarray.zeros_like(target)
ids = gpuarray.zeros_like(target)
_same_reduce_multiview_(target, vec, tmp, ids, I(num_view), block = block , grid = grid)
tmp = tmp.reshape((1, tmp.size))
res = gpuarray.to_gpu(np.zeros((1, 1)).astype(np.float32))
add_row_sum_to_vec(res, tmp)
return res.get()[0, 0]
开发者ID:rjpower,项目名称:fastnet,代码行数:11,代码来源:cuda_kernel.py
示例7: __init__
def __init__(self, gpu_detector, ndaq=1):
self.earliest_time_gpu = ga.empty(gpu_detector.nchannels*ndaq, dtype=np.float32)
self.earliest_time_int_gpu = ga.empty(gpu_detector.nchannels*ndaq, dtype=np.uint32)
self.channel_history_gpu = ga.zeros_like(self.earliest_time_int_gpu)
self.channel_q_int_gpu = ga.zeros_like(self.earliest_time_int_gpu)
self.channel_q_gpu = ga.zeros(len(self.earliest_time_int_gpu), dtype=np.float32)
self.detector_gpu = gpu_detector.detector_gpu
self.solid_id_map_gpu = gpu_detector.solid_id_map
self.solid_id_to_channel_index_gpu = gpu_detector.solid_id_to_channel_index_gpu
self.module = get_cu_module('daq.cu', options=cuda_options,
include_source_directory=True)
self.gpu_funcs = GPUFuncs(self.module)
self.ndaq = ndaq
self.stride = gpu_detector.nchannels
开发者ID:BenLand100,项目名称:chroma,代码行数:15,代码来源:daq.py
示例8: ewsum
def ewsum(d_a, d_w):
"""
YORI NOTES
This method is faster than CPU if num_w is large, and non_width is small:
When num_w is large, the for loop is small
When non_width is large, there are more threads necessary
"""
width = d_a.shape[0]
total_dim = d_a.size
num_w = d_w.shape[0]
d_tmp_out = gpuarray.zeros_like(d_a)
thread_size = min(d_a.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_a.size / float(thread_size))), 1)
ewsum_kernel(d_a, d_w, d_tmp_out,
numpy.int32(num_w), numpy.int32(width), numpy.int32(total_dim),
block=(thread_size,1,1), grid=(block_size,1,1))
# TODO: There HAS to be a better way to do this
x = width / num_w
d_out = gpuarray.zeros((x,) + d_a.shape[1:], numpy.float32)
thread_size = min(d_out.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_out.size / float(thread_size))), 1)
ewsum_sum_kernel(d_tmp_out, d_out,
numpy.int32(num_w), numpy.int32(width), numpy.int32(total_dim),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
开发者ID:Captricity,项目名称:sciguppy,代码行数:28,代码来源:misc.py
示例9: test_cublasDcopy
def test_cublasDcopy(self):
x = np.random.rand(5).astype(np.float64)
x_gpu = gpuarray.to_gpu(x)
y_gpu = gpuarray.zeros_like(x_gpu)
cublas.cublasDcopy(self.cublas_handle, x_gpu.size, x_gpu.gpudata, 1,
y_gpu.gpudata, 1)
assert np.allclose(y_gpu.get(), x_gpu.get())
开发者ID:Brainiarc7,项目名称:scikit-cuda,代码行数:7,代码来源:test_cublas.py
示例10: execute
def execute(self):
resulting_image = None
nda = None
f_first = True
img_cnt = 0
for itr_img in self.images_iterator:
img_cnt += 1
if f_first:
nda = np.ndarray(shape=itr_img.image.shape,
dtype=itr_img.image.dtype)
nda[:] = itr_img.image[:]
self.resulting_image = itr_img
resulting_image = gpuarray.to_gpu(nda)
current_image = gpuarray.zeros_like(resulting_image)
f_first = False
shape = itr_img.shape
continue
if shape != itr_img.shape:
img_cnt -= 1
continue
current_image.set(itr_img.image)
resulting_image += current_image
resulting_image /= img_cnt
self.resulting_image.image[:] = resulting_image.get()
开发者ID:simon-r,项目名称:SerialPhotoMerge,代码行数:35,代码来源:mergeAverageImage.py
示例11: softmax_back
def softmax_back(d_a, d_error, s):
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_out.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_out.size / float(thread_size))), 1)
softmax_back_kernel(d_a, d_error, d_out, numpy.float32(s), numpy.int32(d_out.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
开发者ID:Captricity,项目名称:sciguppy,代码行数:7,代码来源:misc.py
示例12: map_elementwise_max
def map_elementwise_max(self, op, field_expr):
field = self.rec(field_expr)
field_out = gpuarray.zeros_like(field)
func_rec = self.executor.get_elwise_max_kernel(field.dtype)
func_rec.func.prepared_call((func_rec.grid_dim, 1),
field.gpudata, field_out.gpudata, func_rec.mb_count)
return field_out
开发者ID:felipeh,项目名称:hedge,代码行数:10,代码来源:execute.py
示例13: rectify_back
def rectify_back(d_a, d_error, inplace=False):
if inplace:
d_out = d_a
else:
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_out.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_out.size / float(thread_size))), 1)
rectify_back_kernel(d_a, d_error, d_out, numpy.int32(d_out.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
开发者ID:Captricity,项目名称:sciguppy,代码行数:10,代码来源:misc.py
示例14: exp
def exp(d_a, mode=MathModes.ACC):
if mode == MathModes.ACC:
return cumath.exp(d_a)
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_a.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_a.size / float(thread_size))), 1)
exp_fast_kernel(d_a, d_out, numpy.int32(d_a.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
开发者ID:Captricity,项目名称:sciguppy,代码行数:10,代码来源:expit.py
示例15: __init__
def __init__(self, name, type, epsW, epsB, initW, initB, momW, momB, wc, weight, bias,
weightIncr , biasIncr, weightShape, biasShape):
Layer.__init__(self, name, type)
self.epsW = F(epsW)
self.epsB = F(epsB)
self.initW = initW
self.initB = initB
self.momW = F(momW)
self.momB = F(momB)
self.wc = F(wc)
if weight is None:
self.weight = gpuarray.to_gpu(randn(weightShape, np.float32) * self.initW)
else:
print >> sys.stderr, 'init weight from disk'
self.weight = gpuarray.to_gpu(weight)#.astype(np.float32)
if bias is None:
if self.initB > 0.0:
self.bias = gpuarray.to_gpu((np.ones(biasShape, dtype=np.float32) * self.initB))
else:
self.bias = gpuarray.zeros(biasShape, dtype=np.float32)
else:
print >> sys.stderr, 'init bias from disk'
self.bias = gpuarray.to_gpu(bias).astype(np.float32)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
if self.momW > 0.0:
if weightIncr is None:
self.weightIncr = gpuarray.zeros_like(self.weight)
else:
print >> sys.stderr, 'init weightIncr from disk'
#weightIncr = np.require(weightIncr, dtype = np.float, requirements = 'C')
self.weightIncr = gpuarray.to_gpu(weightIncr)
if self.momW > 0.0:
if biasIncr is None:
self.biasIncr = gpuarray.zeros_like(self.bias)
else:
print >> sys.stderr, 'init biasIncr from disk'
#biasIncr = np.require(biasIncr, dtype = np.float, requirements = 'C')
self.biasIncr = gpuarray.to_gpu(biasIncr)
开发者ID:phecy,项目名称:striate,代码行数:43,代码来源:layer.py
示例16: test_2d_fp_surfaces
def test_2d_fp_surfaces(self):
orden = "C"
npoints = 32
for prec in [np.int16,np.float32,np.float64,np.complex64,np.complex128]:
prec_str = dtype_to_ctype(prec)
if prec == np.complex64: fpName_str = 'fp_tex_cfloat'
elif prec == np.complex128: fpName_str = 'fp_tex_cdouble'
elif prec == np.float64: fpName_str = 'fp_tex_double'
else: fpName_str = prec_str
A_cpu = np.zeros([npoints,npoints],order=orden,dtype=prec)
A_cpu[:] = np.random.rand(npoints,npoints)[:]
A_gpu = gpuarray.to_gpu(A_cpu) # Array randomized
myKernRW = '''
#include <pycuda-helpers.hpp>
surface<void, cudaSurfaceType2DLayered> mtx_tex;
__global__ void copy_texture(cuPres *dest, int rw)
{
int row = blockIdx.x*blockDim.x + threadIdx.x;
int col = blockIdx.y*blockDim.y + threadIdx.y;
int layer = 1;
int tid = row + col*blockDim.x*gridDim.x ;
if (rw==0){
cuPres aux = dest[tid];
fp_surf2DLayeredwrite(aux, mtx_tex, row, col, layer,cudaBoundaryModeClamp);}
else {
cuPres aux = 0;
fp_surf2DLayeredread(&aux, mtx_tex, col, row, layer, cudaBoundaryModeClamp);
dest[tid] = aux;
}
}
'''
myKernRW = myKernRW.replace('fpName',fpName_str)
myKernRW = myKernRW.replace('cuPres',prec_str)
modW = SourceModule(myKernRW)
copy_texture = modW.get_function("copy_texture")
mtx_tex = modW.get_surfref("mtx_tex")
cuBlock = (8,8,1)
if cuBlock[0]>npoints:
cuBlock = (npoints,npoints,1)
cuGrid = (npoints//cuBlock[0]+1*(npoints % cuBlock[0] != 0 ),npoints//cuBlock[1]+1*(npoints % cuBlock[1] != 0 ),1)
copy_texture.prepare('Pi')#,texrefs=[mtx_tex])
A_gpu2 = gpuarray.zeros_like(A_gpu) # To initialize surface with zeros
cudaArray = drv.gpuarray_to_array(A_gpu2,orden,allowSurfaceBind=True)
A_cpu = A_gpu.get() # To remember original array
mtx_tex.set_array(cudaArray)
copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(0)) # Write random array
copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(1)) # Read, but transposed
assert np.sum(np.abs(A_gpu.get()-np.transpose(A_cpu))) == np.array(0,dtype=prec)
A_gpu.gpudata.free()
开发者ID:FreddieWitherden,项目名称:pycuda,代码行数:54,代码来源:test_driver.py
示例17: expit_back
def expit_back(d_a, d_error):
"""Implments the following function
out = in * (1 - in) * error
"""
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_a.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_a.size / float(thread_size))), 1)
expit_back_kernel(d_a, d_error, d_out, numpy.int32(d_a.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
开发者ID:Captricity,项目名称:sciguppy,代码行数:11,代码来源:expit.py
示例18: robust_pca
def robust_pca(D):
"""
Parrallel RPCA using ALM, adapted from https://github.com/nwbirnie/rpca.
Takes and returns numpy arrays
"""
M = gpuarray.to_gpu(D)
L = gpuarray.zeros_like(M)
S = gpuarray.zeros_like(M)
Y = gpuarray.zeros_like(M)
print M.shape
mu = (M.shape[0] * M.shape[1]) / (4.0 * L1Norm(M))
lamb = max(M.shape) ** -0.5
while not converged(M, L, S):
L = svd_shrink(M - S - (mu**-1) * Y, mu)
S = shrink(M - L + (mu**-1) * Y, lamb * mu)
Y = Y + mu * (M - L - S)
return L.get(), S.get()
开发者ID:cs205-surveillance,项目名称:cs205-surveillance,代码行数:20,代码来源:rpca_cuda.py
示例19: expit
def expit(d_a, mode=MathModes.ACC):
"""Implements the expit function (aka sigmoid)
expit(x) = 1 / (1 + exp(-x))
"""
d_out = gpuarray.zeros_like(d_a)
thread_size = min(d_a.size, MAX_BLOCK_SIZE)
block_size = max(int(math.ceil(d_a.size / float(thread_size))), 1)
kernel = expit_fast_kernel if mode == MathModes.FAST else expit_kernel
kernel(d_a, d_out, numpy.int32(d_a.size),
block=(thread_size,1,1), grid=(block_size,1,1))
return d_out
开发者ID:Captricity,项目名称:sciguppy,代码行数:12,代码来源:expit.py
示例20: __init__
def __init__(self, mesh, context=None):
'''
Args:
mesh The mesh on which the solver will operate. The dimensionality
is deducted from mesh.dimension
'''
# create the mesh grid and compute the greens function on it
self.mesh = mesh
self._context = context
mesh_shape = self.mesh.shape # nz, ny, (nx)
mesh_shape2 = [2*n for n in mesh_shape] # 2*nz, 2*ny, (2*nx)
mesh_distances = list(reversed(self.mesh.distances)) #dz, dy, dx
self.fgreentr = gpuarray.empty(mesh_shape2,
dtype=np.complex128)
self.tmpspace = gpuarray.zeros_like(self.fgreentr)
sizeof_complex = np.dtype(np.complex128).itemsize
# dimensionality function dispatch
dim = self.mesh.dimension
self._fgreen = getattr(self, '_fgreen' + str(dim) + 'd')
self._mirror = getattr(self, '_mirror' + str(dim) + 'd')
copy_fn = {'3d' : get_Memcpy3D_d2d, '2d': get_Memcpy2D_d2d}
memcpy_nd = copy_fn[str(dim) + 'd']
dim_args = self.mesh.shape
self._cpyrho2tmp = memcpy_nd(
src=None, dst=self.tmpspace, # None because src(rho) not yet known
src_pitch=self.mesh.nx*sizeof_complex,
dst_pitch=2*self.mesh.nx*sizeof_complex,
dim_args=dim_args,
itemsize=np.dtype(np.complex128).itemsize,
src_height=self.mesh.ny,
dst_height=2*self.mesh.ny)
self._cpytmp2rho = memcpy_nd(
src=self.tmpspace, dst=None, # None because dst(rho) not yet know
src_pitch=2*self.mesh.nx*sizeof_complex,
dst_pitch=self.mesh.nx*sizeof_complex,
dim_args=dim_args,
itemsize=np.dtype(np.complex128).itemsize,
src_height=2*self.mesh.ny,
dst_height=self.mesh.ny)
mesh_arr = [-mesh_distances[i]/2 + np.arange(mesh_shape[i]+1)
* mesh_distances[i]
for i in xrange(self.mesh.dimension)
]
# mesh_arr is [mz, my, mx]
mesh_grids = np.meshgrid(*mesh_arr, indexing='ij')
fgreen = self._fgreen(*mesh_grids)
fgreen = self._mirror(fgreen)
self.plan_forward = cu_fft.Plan(self.tmpspace.shape, in_dtype=np.complex128,
out_dtype=np.complex128)
self.plan_backward = cu_fft.Plan(self.tmpspace.shape, in_dtype=np.complex128,
out_dtype=np.complex128)
cu_fft.fft(gpuarray.to_gpu(fgreen), self.fgreentr, plan=self.plan_forward)
开发者ID:giadarol,项目名称:PyPIC,代码行数:53,代码来源:FFT_solver.py
注:本文中的pycuda.gpuarray.zeros_like函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论