本文整理汇总了Python中pycuda.gpuarray.zeros函数的典型用法代码示例。如果您正苦于以下问题:Python zeros函数的具体用法?Python zeros怎么用?Python zeros使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了zeros函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _initialize_gpu_ds
def _initialize_gpu_ds(self):
"""
Setup GPU arrays.
"""
self.synapse_state = garray.zeros(int(self.total_synapses) + \
len(self.input_neuron_list), np.float64)
if self.my_num_gpot_neurons>0:
self.V = garray.zeros(int(self.my_num_gpot_neurons), np.float64)
else:
self.V = None
if self.my_num_spike_neurons>0:
self.spike_state = garray.zeros(int(self.my_num_spike_neurons), np.int32)
if len(self.public_gpot_list)>0:
self.public_gpot_list_g = garray.to_gpu(self.public_gpot_list)
self.projection_gpot = garray.zeros(len(self.public_gpot_list), np.double)
self._extract_gpot = self._extract_projection_gpot_func()
if len(self.public_spike_list)>0:
self.public_spike_list_g = garray.to_gpu( \
(self.public_spike_list-self.spike_shift).astype(np.int32))
self.projection_spike = garray.zeros(len(self.public_spike_list), np.int32)
self._extract_spike = self._extract_projection_spike_func()
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:25,代码来源:LPU.py
示例2: get_next_batch
def get_next_batch(self, batch_size):
if self._reader is None:
self._start_read()
if self._gpu_batch is None:
self._fill_reserved_data()
height, width = self._gpu_batch.data.shape
gpu_data = self._gpu_batch.data
gpu_labels = self._gpu_batch.labels
if self.index + batch_size >= width:
width = width - self.index
labels = gpu_labels[self.index:self.index + batch_size]
#data = gpu_data[:, self.index:self.index + batch_size]
data = gpuarray.zeros((height, width), dtype = np.float32)
gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + width)
self.index = 0
self._fill_reserved_data()
else:
labels = gpu_labels[self.index:self.index + batch_size]
#data = gpu_data[:, self.index:self.index + batch_size]
data = gpuarray.zeros((height, batch_size), dtype = np.float32)
gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + batch_size)
#labels = gpu_labels[self.index:self.index + batch_size]
self.index += batch_size
return BatchData(data, labels, self._gpu_batch.epoch)
开发者ID:tesatory,项目名称:fastnet,代码行数:29,代码来源:data.py
示例3: _initialize_gpu_ds
def _initialize_gpu_ds(self):
"""
Setup GPU arrays.
"""
self.synapse_state = garray.zeros(max(int(self.total_synapses) + len(self.input_neuron_list), 1), np.float64)
if self.total_num_gpot_neurons > 0:
self.V = garray.zeros(int(self.total_num_gpot_neurons), np.float64)
else:
self.V = None
if self.total_num_spike_neurons > 0:
self.spike_state = garray.zeros(int(self.total_num_spike_neurons), np.int32)
self.block_extract = (256, 1, 1)
if len(self.out_ports_ids_gpot) > 0:
self.out_ports_ids_gpot_g = garray.to_gpu(self.out_ports_ids_gpot)
self.sel_out_gpot_ids_g = garray.to_gpu(self.sel_out_gpot_ids)
self._extract_gpot = self._extract_projection_gpot_func()
if len(self.out_ports_ids_spk) > 0:
self.out_ports_ids_spk_g = garray.to_gpu((self.out_ports_ids_spk - self.spike_shift).astype(np.int32))
self.sel_out_spk_ids_g = garray.to_gpu(self.sel_out_spk_ids)
self._extract_spike = self._extract_projection_spike_func()
if self.ports_in_gpot_mem_ind is not None:
inds = self.sel_in_gpot_ids
self.inds_gpot = garray.to_gpu(inds)
if self.ports_in_spk_mem_ind is not None:
inds = self.sel_in_spk_ids
self.inds_spike = garray.to_gpu(inds)
开发者ID:yiyin,项目名称:neurokernel,代码行数:35,代码来源:LPU.py
示例4: riemanntheta_high_dim
def riemanntheta_high_dim(X, Yinv, T, z, g, rad, max_points = 10000000):
parRiemann = RiemannThetaCuda(1,512)
#initialize parRiemann
parRiemann.compile(g)
parRiemann.cache_omega_real(X)
parRiemann.cache_omega_imag(Yinv,T)
#compile the box_points program
point_finder = func1()
R = get_rad(T, rad)
print R
num_int_points = (2*R + 1)**g
num_partitions = num_int_points//max_points
num_final_partition = num_int_points - num_partitions*max_points
osc_part = 0 + 0*1.j
if (num_partitions > 0):
S = gpuarray.zeros(np.int(max_points * g), dtype=np.double)
print "Required number of iterations"
print num_partitions
print
for p in range(num_partitions):
print p
print
S = box_points(point_finder, max_points*p, max_points*(p+1),g,R, S)
parRiemann.cache_intpoints(S, gpu_already=True)
osc_part += parRiemann.compute_v_without_derivs(np.array([z]))
S = gpuarray.zeros(np.int((num_int_points - num_partitions*max_points)*g), dtype = np.double)
print num_partitions*max_points,num_int_points
S = box_points(point_finder, num_partitions*max_points, num_int_points, g, R,S)
parRiemann.cache_intpoints(S,gpu_already = True)
osc_part += parRiemann.compute_v_without_derivs(np.array([z]))
print osc_part
return osc_part
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:32,代码来源:box_points.py
示例5: __init__
def __init__( self, s_dict, synapse_state, dt, debug=False):
self.debug = debug
self.dt = dt
self.num = len( s_dict['id'] )
self.pre = garray.to_gpu( np.asarray( s_dict['pre'], dtype=np.int32 ))
self.ar = garray.to_gpu( np.asarray( s_dict['ar'], dtype=np.float64 ))
self.ad = garray.to_gpu( np.asarray( s_dict['ad'], dtype=np.float64 ))
self.gmax = garray.to_gpu( np.asarray( s_dict['gmax'], dtype=np.float64 ))
self.a0 = garray.zeros( (self.num,), dtype=np.float64 )
self.a1 = garray.zeros( (self.num,), dtype=np.float64 )
self.a2 = garray.zeros( (self.num,), dtype=np.float64 )
self.cond = synapse_state
_num_dendrite_cond = np.asarray(
[s_dict['num_dendrites_cond'][i] for i in s_dict['id']],\
dtype=np.int32).flatten()
_num_dendrite = np.asarray(
[s_dict['num_dendrites_I'][i] for i in s_dict['id']],\
dtype=np.int32).flatten()
self._cum_num_dendrite = garray.to_gpu(_0_cumsum(_num_dendrite))
self._cum_num_dendrite_cond = garray.to_gpu(_0_cumsum(_num_dendrite_cond))
self._num_dendrite = garray.to_gpu(_num_dendrite)
self._num_dendrite_cond = garray.to_gpu(_num_dendrite_cond)
self._pre = garray.to_gpu(np.asarray(s_dict['I_pre'], dtype=np.int32))
self._cond_pre = garray.to_gpu(np.asarray(s_dict['cond_pre'], dtype=np.int32))
self._V_rev = garray.to_gpu(np.asarray(s_dict['reverse'],dtype=np.double))
self.I = garray.zeros(self.num, np.double)
#self._update_I_cond = self._get_update_I_cond_func()
self._update_I_non_cond = self._get_update_I_non_cond_func()
self.update = self._get_gpu_kernel()
开发者ID:yiyin,项目名称:neurokernel,代码行数:32,代码来源:AlphaSynapsePre.py
示例6: compute_v_without_derivs
def compute_v_without_derivs(self, Xs, Yinvs, Ts):
#Turn the parts of omega into gpuarrays
Xs = np.require(Xs, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
Yinvs = np.require(Yinvs, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
Ts = np.require(Ts, dtype = np.double, requirements=['A', 'W', 'O', 'C'])
Xs_d = gpuarray.to_gpu(Xs)
Yinvs_d = gpuarray.to_gpu(Yinvs)
Ts_d = gpuarray.to_gpu(Ts)
#Determine N = the number of integer points to sum over
# K = the number of different omegas to compute the function at
N = self.Sd.size/self.g
K = Xs.size/(self.g**2)
#Create room on the gpu for the real and imaginary finite sum calculations
fsum_reald = gpuarray.zeros(N*K, dtype=np.double)
fsum_imagd = gpuarray.zeros(N*K, dtype=np.double)
#Turn all scalars into numpy data types
Nd = np.int32(N)
Kd = np.int32(K)
gd = np.int32(self.g)
blocksize = (self.tilewidth, self.tileheight, 1)
gridsize = (N//self.tilewidth + 1, K//self.tileheight + 1, 1)
self.finite_sum_without_derivs(fsum_reald, fsum_imagd, Xs_d, Yinvs_d, Ts_d,
self.Sd, gd, Nd, Kd,
block = blocksize,
grid = gridsize)
cuda.Context.synchronize()
fsums_real = self.sum_reduction(fsum_reald, N, K, Kd, Nd)
fsums_imag = self.sum_reduction(fsum_imagd, N, K, Kd, Nd)
return fsums_real + 1.0j*fsums_imag
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:29,代码来源:riemanntheta_omegas.py
示例7: prepare_for_train
def prepare_for_train(data, label):
assert len(data.shape) == 4
if data.shape[3] != self.batchSize:
self.batchSize = data.shape[3]
for l in self.layers:
l.change_batch_size(self.batchSize)
self.inputShapes = None
self.imgShapes = None
self.outputs = []
self.grads = []
self.local_outputs = []
self.local_grads = []
self.imgShapes = [(self.numColor, self.imgSize / 2, self.imgSize / 2, self.batchSize)]
self.inputShapes = [(self.numColr * (self.imgSize ** 2) / 4, self.batchSize)]
fc = False
for layer in self.layers:
outputShape = layer.get_output_shape()
row = outputShape[0] * outputShape[1] * outputShape[2]
col = outputShape[3]
if layer.type == 'softmax':
row *= comm.Get_size()
outputShape = (outputShape[0] * comm.Get_size(), 1, 1, outputShape[3])
self.inputShapes.append((row, col))
self.imgShapes.append(outputShape)
area = make_area(outputShape)
self.outputs.append(virtual_array(rank, area = area))
self.local_outputs.append(gpuarray.zeros((row, col), dtype =np.float32))
inputShape = self.inputShapes[-2]
#if layer.type == 'fc':
# inputShape = (inputShape[0] * comm.Get_size(), inputShape[1])
# self.local_grads.append(gpuarray.zeors(inputShape, dtype = np.float32))
# area = make_plain_area(inputShape)
#else:
# self.local_grads.append(gpuarray.zeros(inputShape, dtype= np.float32))
# area = make_area(self.imgShapes[-2])
#self.grads.append(virtual_array(rank, area = area))
area = make_area((self.numColor, self.imgSize / 2, self.imgSize / 2, self.batchSize))
self.data = virtual_array(rank, local = gpuarray.to_gpu(data.__getitem__(area.to_slice())),
area = area)
if not isinstance(label, GPUArray):
self.label = gpuarray.to_gpu(label).astype(np.float32)
else:
self.label = label
self.label = self.label.reshape((label.size, 1))
self.numCase += data.shape[1]
outputShape = self.inputShapes[-1]
if self.output is None or self.output.shape != outputShape:
self.output = gpuarray.zeros(outputShape, dtype = np.float32)
开发者ID:iskandr,项目名称:striate,代码行数:60,代码来源:fastnet.py
示例8: logreg_cost
def logreg_cost(self, label, output):
if self.cost.shape[0] != self.batchSize:
self.cost = gpuarray.zeros((self.batchSize, 1), dtype=np.float32)
maxid = gpuarray.zeros((self.batchSize, 1), dtype=np.float32)
find_col_max_id(maxid, output)
self.batchCorrect = same_reduce(label , maxid)
logreg_cost_col_reduce(output, label, self.cost)
开发者ID:phecy,项目名称:striate,代码行数:7,代码来源:layer.py
示例9: update_ptrs
def update_ptrs(self):
self.tps_param_ptrs = get_gpu_ptrs(self.tps_params)
self.trans_d_ptrs = get_gpu_ptrs(self.trans_d)
self.lin_dd_ptrs = get_gpu_ptrs(self.lin_dd)
self.w_nd_ptrs = get_gpu_ptrs(self.w_nd)
for b in self.bend_coefs:
self.proj_mat_ptrs[b] = get_gpu_ptrs(self.proj_mats[b])
self.offset_mat_ptrs[b] = get_gpu_ptrs(self.offset_mats[b])
self.pt_ptrs = get_gpu_ptrs(self.pts)
self.kernel_ptrs = get_gpu_ptrs(self.kernels)
self.pt_w_ptrs = get_gpu_ptrs(self.pts_w)
self.pt_t_ptrs = get_gpu_ptrs(self.pts_t)
self.corr_cm_ptrs = get_gpu_ptrs(self.corr_cm)
self.corr_rm_ptrs = get_gpu_ptrs(self.corr_rm)
self.r_coef_ptrs = get_gpu_ptrs(self.r_coefs)
self.c_coef_rn_ptrs = get_gpu_ptrs(self.c_coefs_rn)
self.c_coef_cn_ptrs = get_gpu_ptrs(self.c_coefs_cn)
# temporary space for warping cost computations
self.warp_err = gpuarray.zeros((self.N, MAX_CLD_SIZE), np.float32)
self.bend_res_mat = gpuarray.zeros((DATA_DIM * self.N, DATA_DIM), np.float32)
self.bend_res = [self.bend_res_mat[i * DATA_DIM : (i + 1) * DATA_DIM] for i in range(self.N)]
self.bend_res_ptrs = get_gpu_ptrs(self.bend_res)
self.dims_gpu = gpuarray.to_gpu(np.array(self.dims, dtype=np.int32))
self.ptrs_valid = True
开发者ID:rll,项目名称:lfd,代码行数:27,代码来源:batchtps.py
示例10: compute_v_without_derivs
def compute_v_without_derivs(self, Z):
#Turn the numpy set Z into gpuarrays
x = Z.real
y = Z.imag
x = np.require(x, dtype = np.double, requirements=['A','W','O','C'])
y = np.require(y, dtype = np.double, requirements=['A','W','O','C'])
xd = gpuarray.to_gpu(x)
yd = gpuarray.to_gpu(y)
self.yd = yd
#Detemine N = the number of integer points to sum over and
# K = the number of values to compute the function at
N = self.Sd.size/self.g
K = Z.size/self.g
#Create room on the gpu for the real and imaginary finite sum calculations
fsum_reald = gpuarray.zeros(N*K, dtype=np.double)
fsum_imagd = gpuarray.zeros(N*K, dtype=np.double)
#Make all scalars into numpy data types
Nd = np.int32(N)
Kd = np.int32(K)
gd = np.int32(self.g)
blocksize = (self.tilewidth, self.tileheight, 1)
gridsize = (N//self.tilewidth + 1, K//self.tileheight + 1, 1)
self.finite_sum_without_derivs(fsum_reald, fsum_imagd, xd, yd,
self.Sd, gd, Nd, Kd,
block = blocksize,
grid = gridsize)
cuda.Context.synchronize()
fsums_real = self.sum_reduction(fsum_reald, N, K, Kd, Nd)
fsums_imag = self.sum_reduction(fsum_imagd, N, K, Kd, Nd)
return fsums_real + 1.0j*fsums_imag
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:30,代码来源:riemanntheta_cuda.py
示例11: setup_pdf_eval
def setup_pdf_eval(self, event_hit, event_time, event_charge, min_twidth,
trange, min_qwidth, qrange, min_bin_content=10,
time_only=True):
"""Setup GPU arrays to compute PDF values for the given event.
The pdf_eval calculation allows the PDF to be evaluated at a
single point for each channel as the Monte Carlo is run. The
effective bin size will be as small as (`min_twidth`,
`min_qwidth`) around the point of interest, but will be large
enough to ensure that `min_bin_content` Monte Carlo events
fall into the bin.
event_hit: ndarray
Hit or not-hit status for each channel in the detector.
event_time: ndarray
Hit time for each channel in the detector. If channel
not hit, the time will be ignored.
event_charge: ndarray
Integrated charge for each channel in the detector.
If channel not hit, the charge will be ignored.
min_twidth: float
Minimum bin size in the time dimension
trange: (float, float)
Range of time dimension in PDF
min_qwidth: float
Minimum bin size in charge dimension
qrange: (float, float)
Range of charge dimension in PDF
min_bin_content: int
The bin will be expanded to include at least this many events
time_only: bool
If True, only the time observable will be used in the PDF.
"""
self.event_nhit = count_nonzero(event_hit)
# Define a mapping from an array of len(event_hit) to an array of length event_nhit
self.map_hit_offset_to_channel_id = np.where(event_hit)[0].astype(np.uint32)
self.map_hit_offset_to_channel_id_gpu = ga.to_gpu(self.map_hit_offset_to_channel_id)
self.map_channel_id_to_hit_offset = np.maximum(0, event_hit.cumsum() - 1).astype(np.uint32)
self.map_channel_id_to_hit_offset_gpu = ga.to_gpu(self.map_channel_id_to_hit_offset)
self.event_hit_gpu = ga.to_gpu(event_hit.astype(np.uint32))
self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32))
self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32))
self.eval_hitcount_gpu = ga.zeros(len(event_hit), dtype=np.uint32)
self.eval_bincount_gpu = ga.zeros(len(event_hit), dtype=np.uint32)
self.nearest_mc_gpu = ga.empty(shape=self.event_nhit * min_bin_content,
dtype=np.float32)
self.nearest_mc_gpu.fill(1e9)
self.min_twidth = min_twidth
self.trange = trange
self.min_qwidth = min_qwidth
self.qrange = qrange
self.min_bin_content = min_bin_content
assert time_only # Only support time right now
self.time_only = time_only
开发者ID:BenLand100,项目名称:chroma,代码行数:59,代码来源:pdf.py
示例12: fprop
def fprop(self, input, output):
max = gpuarray.zeros((1, self.batchSize), dtype = np.float32)
col_max_reduce(max, input)
add_vec_to_cols(input, max, output, alpha = -1)
gpu_copy_to(cumath.exp(output), output)
sum = gpuarray.zeros(max.shape, dtype = np.float32)
add_col_sum_to_vec(sum, output, alpha = 0)
div_vec_to_cols(output, sum)
开发者ID:smessing,项目名称:striate,代码行数:8,代码来源:layer.py
示例13: createHashTable
def createHashTable(kd, vd, capacity):
table_capacity_gpu, _ = mod.get_global('table_capacity')
cuda.memcpy_htod(table_capacity_gpu, np.uint([capacity]))
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_capacity,
# &capacity,
# sizeof(unsigned int)));
table_vals_gpu, table_vals_size = mod.get_global('table_values') # pointer-2-pointer
values_gpu = gpuarray.zeros((capacity*vd,1), dtype=np.float32)
# values_gpu = gpuarray.zeros((capacity*vd,1), dtype=np.float32)
# cuda.memset_d32(values_gpu.gpudata, 0, values_gpu.size)
cuda.memcpy_dtod(table_vals_gpu, values_gpu.gpudata, table_vals_size)
# float *values;
# allocateCudaMemory((void**)&values, capacity*vd*sizeof(float));
# CUDA_SAFE_CALL(cudaMemset((void *)values, 0, capacity*vd*sizeof(float)));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_values,
# &values,
# sizeof(float *)));
table_entries, table_entries_size = mod.get_global('table_entries')
entries_gpu = gpuarray.empty((capacity*2,1), dtype=np.int)
entries_gpu.fill(-1)
# cuda.memset_d32(entries_gpu.gpudata, 1, entries_gpu.size)
cuda.memcpy_dtod(table_entries, entries_gpu.gpudata, table_entries_size)
# int *entries;
# allocateCudaMemory((void **)&entries, capacity*2*sizeof(int));
# CUDA_SAFE_CALL(cudaMemset((void *)entries, -1, capacity*2*sizeof(int)));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_entries,
# &entries,
# sizeof(unsigned int *)));
########################################
# Assuming LINEAR_D_MEMORY not defined #
########################################
# #ifdef LINEAR_D_MEMORY
# char *ranks;
# allocateCudaMemory((void**)&ranks, capacity*sizeof(char));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_rank,
# &ranks,
# sizeof(char *)));
#
# signed short *zeros;
# allocateCudaMemory((void**)&zeros, capacity*sizeof(signed short));
# CUDA_SAFE_CALL(cudaMemcpyToSymbol(table_zeros,
# &zeros,
# sizeof(char *)));
#
# #else
table_keys_gpu, table_keys_size = mod.get_global('table_keys')
keys_gpu = gpuarray.zeros((capacity*kd,1), dtype=np.short)
# keys_gpu = gpuarray.empty((capacity*kd,1), dtype=np.short)
# cuda.memset_d32(keys_gpu.gpudata, 0, keys_gpu.size)
cuda.memcpy_dtod(table_keys_gpu, keys_gpu.gpudata, table_keys_size)
开发者ID:AdrianLsk,项目名称:permutohedral_pycuda,代码行数:58,代码来源:filter_pycuda.py
示例14: logreg_cost_multiview
def logreg_cost_multiview(self, label, output, num_view):
unit = self.batch_size / num_view
if self.cost.shape[0] != unit:
self.cost = gpuarray.zeros((unit, 1), dtype = np.float32)
maxid = gpuarray.zeros((self.batch_size, 1), dtype = np.float32)
find_col_max_id(maxid, output)
self.batchCorrect = same_reduce_multiview(label, maxid, num_view)
tmp = gpuarray.zeros((output.shape[0], unit), dtype = np.float32)
gpu_partial_copy_to(output, tmp, 0, output.shape[0], 0, unit)
logreg_cost_col_reduce(tmp, label, self.cost)
开发者ID:alemagnani,项目名称:fastnet,代码行数:10,代码来源:layer.py
示例15: fprop
def fprop(self, input, output, train=TRAIN):
max = gpuarray.zeros((1, self.batchSize), dtype=np.float32)
col_max_reduce(max, input)
add_vec_to_cols(input, max, output, alpha= -1)
eltwise_exp(output)
sum = gpuarray.zeros(max.shape, dtype=np.float32)
add_col_sum_to_vec(sum, output, alpha=0)
div_vec_to_cols(output, sum)
if PFout:
print_matrix(output, self.name)
开发者ID:phecy,项目名称:striate,代码行数:10,代码来源:layer.py
示例16: __init__
def __init__(self, A1, A2, left, use_batch=False):
"""Creates a new LinearOperator interface to the superoperator E.
This is a wrapper to be used with SciPy's sparse linear algebra routines.
Parameters
----------
A1 : ndarray
Ket parameter tensor.
A2 : ndarray
Bra parameter tensor.
left : bool
Whether to multiply with a vector to the left (or to the right).
"""
self.A1G = [list(map(garr.to_gpu, A1k)) for A1k in A1]
self.A2G = [list(map(garr.to_gpu, A2k)) for A2k in A2]
self.tmp = list(map(garr.empty_like, self.A1G[0]))
self.tmp2 = list(map(garr.empty_like, self.A1G[0]))
self.use_batch = use_batch
self.left = left
self.D = A1[0].shape[1]
self.shape = (self.D**2, self.D**2)
self.dtype = sp.dtype(A1[0][0].dtype)
self.calls = 0
self.out = garr.empty((self.D, self.D), dtype=self.dtype)
self.xG = garr.empty((self.D, self.D), dtype=self.dtype)
if use_batch:
self.A1G_p = list(map(get_batch_ptrs, self.A1G))
self.A2G_p = list(map(get_batch_ptrs, self.A2G))
self.tmp_p = get_batch_ptrs(self.tmp)
self.tmp2_p = get_batch_ptrs(self.tmp2)
self.xG_p = get_batch_ptrs([self.xG] * len(A1[0]))
self.out_p = get_batch_ptrs([self.out] * len(A1[0]))
else:
self.A1G_p = None
self.A2G_p = None
self.tmp_p = None
self.tmp2_p = None
self.xG_p = None
self.out_p = None
self.ones = [garr.zeros((1), dtype=sp.complex128) for s in range(len(A1[0]))]
self.ones = [one.fill(1) for one in self.ones]
self.zeros = [garr.zeros((1), dtype=sp.complex128) for s in range(len(A1[0]))]
self.streams = []
for s in range(A1[0].shape[0]):
self.streams.append(cd.Stream())
self.hdl = cb.cublasCreate()
开发者ID:amilsted,项目名称:evoMPS,代码行数:55,代码来源:cuda_alternatives.py
示例17: get_next_batch
def get_next_batch(self, batch_size):
if self._reader is None:
self._start_read()
if self._gpu_batch is None:
self._fill_reserved_data()
if not self.multiview:
height, width = self._gpu_batch.data.shape
gpu_data = self._gpu_batch.data
gpu_labels = self._gpu_batch.labels
epoch = self._gpu_batch.epoch
if self.index + batch_size >= width:
width = width - self.index
labels = gpu_labels[self.index:self.index + batch_size]
data = gpuarray.zeros((height, width), dtype = np.float32)
gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + width)
self.index = 0
self._fill_reserved_data()
else:
labels = gpu_labels[self.index:self.index + batch_size]
data = gpuarray.zeros((height, batch_size), dtype = np.float32)
gpu_partial_copy_to(gpu_data, data, 0, height, self.index, self.index + batch_size)
self.index += batch_size
else:
# multiview provider
# number of views should be 10
# when using multiview, do not pre-move data and labels to gpu
height, width = self._cpu_batch.data.shape
cpu_data = self._cpu_batch.data
cpu_labels = self._cpu_batch.labels
epoch = self._cpu_batch.epoch
width /= self.num_view
if self.index + batch_size >= width:
batch_size = width - self.index
labels = cpu_labels[self.index:self.index + batch_size]
data = np.zeros((height, batch_size * self.num_view), dtype = np.float32)
for i in range(self.num_view):
data[:, i* batch_size: (i+ 1) * batch_size] = cpu_data[:, self.index + width * i : self.index + width * i + batch_size]
data = copy_to_gpu(np.require(data, requirements = 'C'))
labels = copy_to_gpu(np.require(labels, requirements = 'C'))
self.index = (self.index + batch_size) / width
#util.log_info('Batch: %s %s %s', data.shape, gpu_labels.shape, labels.shape)
return BatchData(data, labels, epoch)
开发者ID:rjpower,项目名称:fastnet,代码行数:53,代码来源:data.py
示例18: cuda_hogbom
def cuda_hogbom(gpu_dirty,gpu_dpsf,gpu_cpsf,thresh=0.2,damp=1,gain=0.1,prefix='test'):
"""
Use CUDA to implement the Hogbom CLEAN algorithm
A nice description of the algorithm is given by the NRAO, here:
http://www.cv.nrao.edu/~abridle/deconvol/node8.html
Parameters:
* dirty: The dirty image (2D numpy array)
* dpsf: The dirty beam psf (2D numpy array)
* thresh: User-defined threshold to stop iteration, as a fraction of the max pixel intensity (float)
* damp: The damping factor to scale the dirty beam by
* prefix: prefix for output image file names
"""
height,width=np.shape(gpu_dirty)
## Grid parameters - #improvable#
tsize=8
blocksize = (int(tsize),int(tsize),1) # The number of threads per block (x,y,z)
gridsize = (int(width/tsize),int(height/tsize)) # The number of thread blocks (x,y)
## Setup cleam image and point source model
gpu_pmodel = gpu.zeros([height,width],dtype=np.float32)
gpu_clean = gpu.zeros([height,width],dtype=np.float32)
## Setup GPU constants
gpu_max_id = gpu.to_gpu(np.int32(0))
imax=gpu_getmax(gpu_dirty)
thresh_val=np.float32(thresh*imax)
## Steps 1-3 - Iterate until threshold has been reached
t_start=time.time()
i=0
while abs(imax)>(thresh_val):
if (np.mod(i,100)==0):
print "Hogbom iteration",i
## Step 1 - Find max
find_max_kernel(gpu_dirty,gpu_max_id,imax,np.int32(width),np.int32(height),gpu_pmodel,\
block=blocksize, grid=gridsize)
## Step 2 - Subtract the beam (assume that it is normalized to have max 1)
## This kernel simultaneously reconstructs the CLEANed image.
if PLOTME: print "Subtracting dirty beam "+str(i)+", maxval=%0.8f"%imax+' at x='+str(gpu_max_id.get()%width)+\
', y='+str(gpu_max_id.get()/width)
sub_beam_kernel(gpu_dirty,gpu_dpsf,gpu_max_id,gpu_clean,gpu_cpsf,np.float32(gain*imax),np.int32(width),\
np.int32(height), block=blocksize, grid=gridsize)
i+=1
## Step 3 - Find maximum value using gpuarray
imax=gpu_getmax(gpu_dirty)
t_end=time.time()
t_full=t_end-t_start
print "Hogbom execution time %0.5f"%t_full+' s'
print "\t%0.5f"%(t_full/i)+' s per iteration'
## Step 4 - Add the residuals back in
add_noise_kernel(gpu_dirty,gpu_clean,np.float32(width+height))
return gpu_dirty,gpu_pmodel,gpu_clean
开发者ID:shaoguangleo,项目名称:autoFits,代码行数:51,代码来源:gICLEAN.py
示例19: __init__
def __init__(self,**params):
'''
Hack-ish way to avoid initialisation until the weights are transfered:
'''
should_apply = self.apply_output_fns_init
params['apply_output_fns_init'] = False
super(GPUSparseCFProjection,self).__init__(**params)
# Transfering the weights:
self.pycuda_stream = cuda.Stream()
self.weights_gpu = cusparse.CSR.to_CSR(self.weights.toSparseArray().transpose())
# Getting the row and columns indices for the *transposed* matrix. Used for Hebbian learning and normalisation:
nzcols, nzrows = self.weights.nonzero()
tups = sorted(zip(nzrows, nzcols))
nzrows = [x[0] for x in tups]
nzcols = [x[1] for x in tups]
'''
Allocating a page-locked piece of memory for the activity so that GPU could transfer data to the
main memory without the involvment of the CPU:
'''
self.activity = cuda.pagelocked_empty(self.activity.shape, np.float32)
self.activity_gpu_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)
self.input_buffer_pagelocked = cuda.pagelocked_empty(shape=(self.weights_gpu.shape[1],), dtype=np.float32, mem_flags=cuda.host_alloc_flags.WRITECOMBINED)
self.input_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[1], ), dtype=np.float32)
self.norm_total_gpu = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32)
# Getting them on the GPU:
self.nzcount = self.weights.getnnz()
self.nzrows_gpu = gpuarray.to_gpu(np.array(nzrows, np.int32))
self.nzcols_gpu = gpuarray.to_gpu(np.array(nzcols, np.int32))
# Helper array for normalization:
self.norm_ones_gpu = gpuarray.to_gpu(np.array([1.0] * self.weights_gpu.shape[1], np.float32))
# Kernel that applies the normalisation:
self.normalize_kernel = ElementwiseKernel(
"int *nzrows, float *norm_total, float *weights",
"weights[i] *= norm_total[nzrows[i]]",
"divisive_normalize")
# Kernel that calculates the learning:
self.hebbian_kernel = ElementwiseKernel(
"float single_conn_lr, int *row, int *col, float *src_activity, float *dest_activity, float *result",
"result[i] += single_conn_lr * src_activity[col[i]] * dest_activity[row[i]]",
"hebbian_learning")
params['apply_output_fns_init'] = should_apply
self.apply_output_fns_init = should_apply
if self.apply_output_fns_init:
self.apply_learn_output_fns()
开发者ID:Tasignotas,项目名称:topographica_mirror,代码行数:51,代码来源:projection.py
示例20: append_layer
def append_layer(self, layer):
self.layers.append(layer)
if layer.type == 'conv':
self.numConv += 1
outputShape = layer.get_output_shape()
row = outputShape[0] * outputShape[1] * outputShape[2]
col = outputShape[3]
self.inputShapes.append((row, col))
self.imgShapes.append(outputShape)
self.outputs.append(gpuarray.zeros((row, col), dtype=np.float32))
self.grads.append(gpuarray.zeros(self.inputShapes[-2], dtype=np.float32))
print >> sys.stderr, '%s[%s]:%s' % (layer.name, layer.type, outputShape)
开发者ID:iskandr,项目名称:striate,代码行数:14,代码来源:fastnet.py
注:本文中的pycuda.gpuarray.zeros函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论