本文整理汇总了Python中pycuda.gpuarray.sum函数的典型用法代码示例。如果您正苦于以下问题:Python sum函数的具体用法?Python sum怎么用?Python sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sum函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: apply_mds_parallel2
def apply_mds_parallel2(self):
print("Applying parallel MDS via SMACOF...")
current_time = time.clock()
graph_d = gpu.to_gpu(np.float32(self.graph))
row_sum_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
score_current_d = gpu.to_gpu(np.float32(np.random.uniform(0, 10, size=self.N)))
score_next_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
sigma_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
delta_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
mds2_kernel = cuda_compile(_kernel_source, 'mds2_kernel')
stress = 1
while (stress > 0.001):
mds2_kernel(
graph_d,
row_sum_d,
score_current_d,
score_next_d,
sigma_d,
delta_d,
np.int32(self.N),
block=(1024, 1, 1),
grid=(int(self.N / 1024 + 1), int(1))
)
score_current_d = score_next_d
score_next_d = gpu.to_gpu(np.float32(np.zeros(self.N)))
stress = gpu.sum(sigma_d).get() / gpu.sum(delta_d).get()
self.outdata = score_current_d.get()
print "Time to apply parallel MDS: %6.2f s" % (time.clock() - current_time)
开发者ID:physicsistic,项目名称:recommend_songs,代码行数:28,代码来源:isomap_parallel.py
示例2: psiDerivativecomputations
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu']
dvar_gpu = self.gpuCache['dvar_gpu']
dl_gpu = self.gpuCache['dl_gpu']
dZ_gpu = self.gpuCache['dZ_gpu']
dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
dgamma_gpu = self.gpuCache['dgamma_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative()
# t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1compDer '+str(t)
# t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2compDer '+str(t)
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
sum_axis(grad_gamma_gpu,dgamma_gpu,N*Q,self.blocknum)
dL_dgamma = grad_gamma_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
开发者ID:Arthurkorn,项目名称:GPy,代码行数:60,代码来源:ssrbf_psi_gpucomp.py
示例3: _sum_axis
def _sum_axis(x_gpu, axis=None, out=None, calc_mean=False, ddof=0,
keepdims=False):
global _global_cublas_allocator
assert isinstance(ddof, numbers.Integral)
if axis is None or len(x_gpu.shape) <= 1:
out_shape = (1,)*len(x_gpu.shape) if keepdims else ()
if calc_mean == False:
return gpuarray.sum(x_gpu).reshape(out_shape)
else:
return gpuarray.sum(x_gpu).reshape(out_shape) / (x_gpu.dtype.type(x_gpu.size-ddof))
if axis < 0:
axis += 2
if axis > 1:
raise ValueError('invalid axis')
if x_gpu.flags.c_contiguous:
n, m = x_gpu.shape[1], x_gpu.shape[0]
lda = x_gpu.shape[1]
trans = "n" if axis == 0 else "t"
sum_axis, out_axis = (m, n) if axis == 0 else (n, m)
else:
n, m = x_gpu.shape[0], x_gpu.shape[1]
lda = x_gpu.shape[0]
trans = "t" if axis == 0 else "n"
sum_axis, out_axis = (n, m) if axis == 0 else (m, n)
if calc_mean:
alpha = (1.0 / (sum_axis-ddof))
else:
alpha = 1.0
if (x_gpu.dtype == np.complex64):
gemv = cublas.cublasCgemv
elif (x_gpu.dtype == np.float32):
gemv = cublas.cublasSgemv
elif (x_gpu.dtype == np.complex128):
gemv = cublas.cublasZgemv
elif (x_gpu.dtype == np.float64):
gemv = cublas.cublasDgemv
alloc = _global_cublas_allocator
ons = ones((sum_axis, ), x_gpu.dtype, allocator=alloc)
if keepdims:
out_shape = (1, out_axis) if axis == 0 else (out_axis, 1)
else:
out_shape = (out_axis,)
if out is None:
out = gpuarray.empty(out_shape, x_gpu.dtype, alloc)
else:
assert out.dtype == x_gpu.dtype
assert out.size >= out_axis
gemv(_global_cublas_handle, trans, n, m,
alpha, x_gpu.gpudata, lda,
ons.gpudata, 1, 0.0, out.gpudata, 1)
return out
开发者ID:Eric89GXL,项目名称:scikits.cuda,代码行数:59,代码来源:misc.py
示例4: Average_TotalProbabilityP
def Average_TotalProbabilityP( self, Psi1_GPU, Psi2_GPU, Psi3_GPU, Psi4_GPU):
temp = gpuarray.sum( Psi1_GPU*Psi1_GPU.conj() ).get()
temp += gpuarray.sum( Psi2_GPU*Psi2_GPU.conj() ).get()
temp += gpuarray.sum( Psi3_GPU*Psi3_GPU.conj() ).get()
temp += gpuarray.sum( Psi4_GPU*Psi4_GPU.conj() ).get()
return temp * self.dPx*self.dPy
开发者ID:cabrer7,项目名称:PyWignerCUDA,代码行数:8,代码来源:GPU_DiracDaviau2D.py
示例5: gpu_sharpen
def gpu_sharpen(kernel, orig_image):
# allocate memory for input and output
curr_im, next_im = np.array(orig_image, dtype=np.float64), np.array(orig_image, dtype=np.float64)
# Get image data
height, width = np.int32(orig_image.shape)
N = height * width
print "Processing %d x %d image" % (width, height)
# Allocate device memory and copy host to device
start_transfer = time.time()
d_curr = gpu.to_gpu(curr_im)
d_next = gpu.to_gpu(next_im)
stop_transfer = time.time()
host_to_device = stop_transfer - start_transfer
print "host to device tranfer time: " + str(host_to_device)
# Block size (threads per block)
b_size = (32, 32, 1)
33
# Grid size (blocks per grid)
g_size = (int(np.ceil(float(width)/float(b_size[0]))), int(np.ceil(float(height)/float(b_size[1]))))
# Initialize the GPU event trackers for timing
start_gpu_time = cu.Event()
end_gpu_time = cu.Event()
start_gpu_time.record()
# Compute the image's initial mean and variance
init_mean = np.float64(gpu.sum(d_curr).get())/N
var = ReductionKernel(dtype_out=np.float64, neutral= "0", reduce_expr= "a+b", map_expr="(x[i]-mu)*(x[i]-mu)/size", arguments="double* x, double mu, double size")
init_variance = var(d_curr, np.float64(init_mean), np.float64(N)).get()
variance = 0
total = 0
# while variance is less than a 20% difference from the initial variance, continue to sharpen
while variance < 1.2 * init_variance:
kernel(d_curr, EPSILON, d_next, height, width, block=b_size, grid=g_size)
# Swap references to the images, next_im => curr_im
d_curr, d_next = d_next, d_curr
# calculate mean and variance
mean = np.float64(gpu.sum(d_curr).get())/N
variance = var(d_curr, np.float64(mean), np.float64(N)).get()
print "Mean = %f, Variance = %f" % (mean, variance)
end_gpu_time.record()
end_gpu_time.synchronize()
gpu_time = start_gpu_time.time_till(end_gpu_time)*1*1e-3
print "GPU Time: %f" % gpu_time
return d_curr.get()
开发者ID:ealehman,项目名称:image_video_edit,代码行数:58,代码来源:sharpen.py
示例6: gCOVAR
def gCOVAR(data1, data2):
dA1 = gpuarray.to_gpu(data1.astype(np.float32))
dA2 = gpuarray.to_gpu(data2.astype(np.float32))
dM1 = gpuarray.sum(dA1)/len(data1)
dM2 = gpuarray.sum(dA2)/len(data1)
covar = np.float64(kn.kCOVAR(dA1, dA2, dM1, dM2).get()/len(data1))
return covar
开发者ID:darien,项目名称:fatsheet,代码行数:9,代码来源:__init__.py
示例7: Norm_P_GPU
def Norm_P_GPU( self, Psi1, Psi2, Psi3, Psi4):
norm = gpuarray.sum( Psi1.__abs__()**2 ).get()
norm += gpuarray.sum( Psi2.__abs__()**2 ).get()
norm += gpuarray.sum( Psi3.__abs__()**2 ).get()
norm += gpuarray.sum( Psi4.__abs__()**2 ).get()
norm = np.sqrt(norm*self.dPx * self.dPy )
return norm
开发者ID:cabrer7,项目名称:PyWignerCUDA,代码行数:9,代码来源:GPU_DiracDaviau2D.py
示例8: gCORREL
def gCORREL(data1, data2):
dA1 = gpuarray.to_gpu(data1.astype(np.float32))
dA2 = gpuarray.to_gpu(data2.astype(np.float32))
dM1 = gpuarray.sum(dA1)/len(data1)
dM2 = gpuarray.sum(dA2)/len(data1)
correl = np.float64(kn.kCOVAR(dA1, dA2, dM1, dM2).get() / \
(kn.kSTDEV(dA1, dM1).get() * kn.kSTDEV(dA2, dM2).get())**.5)
return correl
开发者ID:darien,项目名称:fatsheet,代码行数:10,代码来源:__init__.py
示例9: Norm_GPU
def Norm_GPU( self, Psi1, Psi2, Psi3, Psi4):
norm = gpuarray.sum( Psi1.__abs__()**2 ).get()
norm += gpuarray.sum( Psi2.__abs__()**2 ).get()
norm += gpuarray.sum( Psi3.__abs__()**2 ).get()
norm += gpuarray.sum( Psi4.__abs__()**2 ).get()
norm = np.sqrt(norm*self.dX * self.dY * self.dZ )
#print ' norm GPU = ', norm
return norm
开发者ID:cabrer7,项目名称:PyWignerCUDA,代码行数:11,代码来源:GPU_Dirac3D.py
示例10: gen_summary_stats
def gen_summary_stats(data):
lb = data[data < mask]
ub = data[data > mask]
prob_ub = ub / lb
n_ub = ub.size
n_lb = lb.size
mean_ub = gpuarray.sum(ub) / n_ub
mean_lb = gpuarray.sum(lb) / n_lb
var_ub = (ub - mean_ub)**2 / n_ub
var_lb = (lb - mean_lb)**2 / n_lb
开发者ID:twiecki,项目名称:sim_drift_gpu,代码行数:11,代码来源:sim_drift_gpu.py
示例11: get_wigner_time
def get_wigner_time(self, wigner_current, wigner_init, t):
"""
Calculate the integral:
int_{H(x, p, t) > -Ip} [wigner_current(x,p) - wigner_init(x,p)] dxdp
:param wigner_current: gpuarray containing current Wigner function
:param wigner_init: gpuarray containing initial Wigner function
:param t: current time
:return: float
"""
# If kernel calculating the wigner time is not present, compile it
try:
wigner_time_mapper = self._wigner_time_mapper
except AttributeError:
# Allocate memory to map
self._tmp_wigner_time = gpuarray.empty(self.rho.shape, np.float64)
wigner_time_mapper = self._wigner_time_mapper = SourceModule(
self.wigner_time_mapper_cuda_code.format(
cuda_consts=self.cuda_consts, K=self.K, V=self.V
),
).get_function("Kernel")
wigner_time_mapper(self._tmp_wigner_time, wigner_current, wigner_init, t, **self.rho_mapper_params)
return gpuarray.sum(self._tmp_wigner_time).get() * self.wigner_dxdp
开发者ID:andregcampos,项目名称:FastWigner,代码行数:27,代码来源:rho_vneumann_cuda_1d.py
示例12: ERA_probe
def ERA_probe(self, iters=1):
exits2_gpu = self.thr.empty_like(self.exits_gpu)
print 'i, eMod, eSup'
for i in range(iters):
exits2_gpu = self.Pmod(self.exits_gpu)
#
self.error_mod.append(gpuarray.sum(abs(self.exits_gpu - exits2_gpu)**2).get()/self.diffNorm)
#
exits = exits2_gpu.get()
self.Psup_probe(exits)
#
self.thr.to_device(makeExits2(self.sample, self.probe, self.coords, exits), dest=self.exits_gpu)
#
self.error_sup.append(gpuarray.sum(abs(self.exits_gpu - exits2_gpu)**2).get()/self.diffNorm)
#
update_progress(i / max(1.0, float(iters-1)), 'ERA probe', i, self.error_mod[-1], self.error_sup[-1])
开发者ID:andyofmelbourne,项目名称:Ptychography,代码行数:16,代码来源:Ptychography_2dsample_2dprobe_farfield_gpu.py
示例13: bloch_single_step_propagation
def bloch_single_step_propagation(self, dbeta):
"""
Perform a single step propagation with respect to the inverse temperature via the Bloch equation.
The final Wigner function is not normalized.
:param dbeta: (float) the inverse temperature step size
:return: self.wignerfunction
"""
self.p2theta_transform()
self.bloch_expV_bulk(self.wigner_theta_x, dbeta, **self.V_bulk_mapper_params)
self.bloch_expV_boundary(self.wigner_theta_x, dbeta, **self.V_boundary_mapper_params)
self.theta2p_transform()
self.x2lambda_transform()
self.bloch_expK_bulk(self.wigner_p_lambda, dbeta, **self.K_bulk_mapper_params)
self.bloch_expK_boundary(self.wigner_p_lambda, dbeta, **self.K_boundary_mapper_params)
self.lambda2x_transform()
self.p2theta_transform()
self.bloch_expV_bulk(self.wigner_theta_x, dbeta, **self.V_bulk_mapper_params)
self.bloch_expV_boundary(self.wigner_theta_x, dbeta, **self.V_boundary_mapper_params)
self.theta2p_transform()
# normalize
self.wignerfunction /= gpuarray.sum(self.wignerfunction).get() * self.dXdP
return self.wignerfunction
开发者ID:andregcampos,项目名称:FastWigner,代码行数:26,代码来源:wigner_bloch_cuda_1d.py
示例14: get_wigner
def get_wigner(self):
"""
Transform the density matrix saved in self.rho into the unormalized Wigner function
:return: self.wignerfunction
"""
# Create the density matrix out of the wavefunction
self.psi2rho(self.wavefunction, self.wignerfunction, **self.wigner_mapper_params)
# Step 1: Rotate by +45 degrees
# Shear X
cufft.fft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)
self.phase_shearX(self.wignerfunction, **self.wigner_mapper_params)
cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)
# Shear Y
cufft.fft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax0)
self.phase_shearY(self.wignerfunction, **self.wigner_mapper_params)
cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax0)
# Shear X
cufft.fft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)
self.phase_shearX(self.wignerfunction, **self.wigner_mapper_params)
cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax1)
# Step 2: FFt the Blokhintsev function
self.sign_flip(self.wignerfunction, **self.wigner_mapper_params)
cufft.ifft_Z2Z(self.wignerfunction, self.wignerfunction, self.plan_Z2Z_ax0)
self.sign_flip(self.wignerfunction, **self.wigner_mapper_params)
# normalize
self.wignerfunction /= gpuarray.sum(self.wignerfunction).get().real * self.wigner_dXdP
return self.wignerfunction
开发者ID:dibondar,项目名称:AccurateWigner,代码行数:33,代码来源:schrodinger_wigner_cuda_1d.py
示例15: _sum_axis
def _sum_axis(x_gpu, axis=None, out=None, calc_mean=False):
global _global_cublas_allocator
if axis is None:
if calc_mean == False:
return gpuarray.sum(x_gpu).get()
else:
return gpuarray.sum(x_gpu).get() / x_gpu.dtype.type(x_gpu.size)
if axis < 0:
axis += 2
if axis > 1:
raise ValueError('invalid axis')
if x_gpu.flags.c_contiguous:
n, m = x_gpu.shape[1], x_gpu.shape[0]
lda = x_gpu.shape[1]
trans = "n" if axis == 0 else "t"
sum_axis, out_axis = (m, n) if axis == 0 else (n, m)
else:
n, m = x_gpu.shape[0], x_gpu.shape[1]
lda = x_gpu.shape[0]
trans = "t" if axis == 0 else "n"
sum_axis, out_axis = (n, m) if axis == 0 else (m, n)
alpha = (1.0 / sum_axis) if calc_mean else 1.0
if (x_gpu.dtype == np.complex64):
gemv = cublas.cublasCgemv
elif (x_gpu.dtype == np.float32):
gemv = cublas.cublasSgemv
elif (x_gpu.dtype == np.complex128):
gemv = cublas.cublasZgemv
elif (x_gpu.dtype == np.float64):
gemv = cublas.cublasDgemv
alloc = _global_cublas_allocator
ons = ones((sum_axis, ), x_gpu.dtype, alloc)
if out is None:
out = gpuarray.empty((out_axis, ), x_gpu.dtype, alloc)
else:
assert out.dtype == x_gpu.dtype
assert out.size >= out_axis
gemv(_global_cublas_handle, trans, n, m,
alpha, x_gpu.gpudata, lda,
ons.gpudata, 1, 0.0, out.gpudata, 1)
return out
开发者ID:oursland,项目名称:scikits.cuda,代码行数:47,代码来源:misc.py
示例16: test_sum_allocator
def test_sum_allocator(self):
import pycuda.tools
pool = pycuda.tools.DeviceMemoryPool()
rng = np.random.randint(low=512,high=1024)
a = gpuarray.arange(rng,dtype=np.int32)
b = gpuarray.sum(a)
c = gpuarray.sum(a, allocator=pool.allocate)
# Test that we get the correct results
assert b.get() == rng*(rng-1)//2
assert c.get() == rng*(rng-1)//2
# Test that result arrays were allocated with the appropriate allocator
assert b.allocator == a.allocator
assert c.allocator == pool.allocate
开发者ID:rutsky,项目名称:pycuda,代码行数:17,代码来源:test_gpuarray.py
示例17: gSTDEV
def gSTDEV(data1):
dA = gpuarray.to_gpu(data1.astype(np.float32))
dM = gpuarray.sum(dA)/len(data1)
hR = kn.kSTDEV(dA, dM).get()
stdev = np.float64((hR/(len(data1)-1))**.5)
return stdev
开发者ID:darien,项目名称:fatsheet,代码行数:8,代码来源:__init__.py
示例18: calibrate_learning_rate
def calibrate_learning_rate(self, data_provider):
lr_multiplier = []
for data, targets in data_provider:
_, gradients = self.training_pass(data, targets)
lr_multiplier.append([float((grad.size / gpuarray.sum(grad.__abs__())).get()) for grad in gradients])
lr_multiplier = np.array(lr_multiplier).mean(0)
lr_multiplier /= lr_multiplier.max()
self.lr_multiplier = lr_multiplier.tolist()
开发者ID:liyangdal,项目名称:hebel,代码行数:8,代码来源:neural_net.py
示例19: calibrate_learning_rate
def calibrate_learning_rate(self, data_provider, mini_batches=None):
lr_multiplier = []
for i, (data, targets) in enumerate(data_provider):
if mini_batches is not None and i > mini_batches: break
_, gradients = self.training_pass(data, targets)
lr_multiplier.append([float((grad.size / gpuarray.sum(grad.__abs__())).get()) for grad in gradients])
lr_multiplier = np.array(lr_multiplier).mean(0)
lr_multiplier /= lr_multiplier.max()
self.lr_multiplier = lr_multiplier.tolist()
开发者ID:hani1986ye,项目名称:hebel,代码行数:9,代码来源:neural_net.py
示例20: test_sum
def test_sum(self):
from pycuda.curandom import rand as curand
a_gpu = curand((200000,))
a = a_gpu.get()
sum_a = np.sum(a)
sum_a_gpu = gpuarray.sum(a_gpu).get()
assert abs(sum_a_gpu-sum_a)/abs(sum_a) < 1e-4
开发者ID:rutsky,项目名称:pycuda,代码行数:10,代码来源:test_gpuarray.py
注:本文中的pycuda.gpuarray.sum函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论