本文整理汇总了Python中pycuda.gpuarray.to_gpu函数的典型用法代码示例。如果您正苦于以下问题:Python to_gpu函数的具体用法?Python to_gpu怎么用?Python to_gpu使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了to_gpu函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main(dtype):
from pycuda.elementwise import get_linear_combination_kernel
lc_kernel, lc_texrefs = get_linear_combination_kernel((
(True, dtype, dtype),
(True, dtype, dtype)
), dtype)
for size_exp in range(10, 26):
size = 1 << size_exp
from pycuda.curandom import rand
a = gpuarray.to_gpu(numpy.array(5, dtype=dtype))
x = rand(size, dtype=dtype)
b = gpuarray.to_gpu(numpy.array(7, dtype=dtype))
y = rand(size, dtype=dtype)
z = gpuarray.empty_like(x)
start = drv.Event()
stop = drv.Event()
start.record()
for i in range(20):
a.bind_to_texref_ext(lc_texrefs[0], allow_double_hack=True)
b.bind_to_texref_ext(lc_texrefs[1], allow_double_hack=True)
lc_kernel.prepared_call(x._grid, x._block,
x.gpudata, y.gpudata, z.gpudata, x.mem_size)
stop.record()
stop.synchronize()
print size, size_exp, stop.time_since(start)
开发者ID:sluo1989,项目名称:computing,代码行数:32,代码来源:GpuScalarMult.py
示例2: calculate_circuit_graph_vertex_data_device
def calculate_circuit_graph_vertex_data_device(d_D, d_C, length):
logger = logging.getLogger('eulercuda.pyeulertour.calculate_circuit_graph_vertex_data_device')
logger.info("started.")
mod = SourceModule("""
__global__ void calculateCircuitGraphVertexData( unsigned int * D,unsigned int * C,unsigned int ecount){
unsigned int tid=(blockDim.x*blockDim.y * gridDim.x*blockIdx.y) + (blockDim.x*blockDim.y*blockIdx.x)+(blockDim.x*threadIdx.y)+threadIdx.x;
if( tid <ecount)
{
unsigned int c=D[tid];
atomicExch(C+c,1);
}
}
""")
calculate_circuit_graph_vertex_data = mod.get_function('calculateCircuitGraphVertexData')
block_dim, grid_dim = getOptimalLaunchConfiguration(length, 512)
logger.info('block_dim = %s, grid_dim = %s' % (block_dim, grid_dim))
np_d_D = gpuarray.to_gpu(d_D)
np_d_C = gpuarray.to_gpu(d_C)
calculate_circuit_graph_vertex_data(
np_d_D,
np_d_C,
np.uintc(length),
block=block_dim, grid=grid_dim
)
np_d_D.get(d_D)
np_d_C.get(d_C)
# devdata = pycuda.tools.DeviceData()
# orec = pycuda.tools.OccupancyRecord(devdata, block_dim[0] * grid_dim[1])
# logger.info("Occupancy = %s" % (orec.occupancy * 100))
logger.info("Finished. Leaving.")
return d_D, d_C
开发者ID:zenlc2000,项目名称:pycuda-euler,代码行数:32,代码来源:pyeulertour.py
示例3: test_cublasDgemmBatched
def test_cublasDgemmBatched(self):
l, m, k, n = 11, 7, 5, 3
A = np.random.rand(l, m, k).astype(np.float64)
B = np.random.rand(l, k, n).astype(np.float64)
C_res = np.einsum('nij,njk->nik',A,B)
a_gpu = gpuarray.to_gpu(A)
b_gpu = gpuarray.to_gpu(B)
c_gpu = gpuarray.empty((l, m, n), np.float64)
alpha = np.float64(1.0)
beta = np.float64(0.0)
a_arr = bptrs(a_gpu)
b_arr = bptrs(b_gpu)
c_arr = bptrs(c_gpu)
cublas.cublasDgemmBatched(self.cublas_handle, 'n','n',
n, m, k, alpha,
b_arr.gpudata, n,
a_arr.gpudata, k,
beta, c_arr.gpudata, n, l)
assert np.allclose(C_res, c_gpu.get())
开发者ID:Brainiarc7,项目名称:scikit-cuda,代码行数:25,代码来源:test_cublas.py
示例4: gpu_sweep_col_mult
def gpu_sweep_col_mult(X, y):
""" X * y = X across the columns """
if type(X)==GPUArray:
gX = X
else:
gX = to_gpu(np.asarray(X, dtype=np.float32))
if type(y)==GPUArray:
gy = y
else:
gy = to_gpu(np.asarray(y, dtype=np.float32))
dims = np.asarray(X.shape, dtype=np.int32)
if devinfo.max_block_threads >= 1024:
blocksize = 32
else:
blocksize = 16
gridsize = int(dims[0] / blocksize) + 1
shared = 4*blocksize
if gX.flags.c_contiguous:
func = CUDA_Kernels.get_function("sweep_columns_mult")
else:
func = CUDA_Kernels.get_function("sweep_columns_mult_cm")
func(gX, gy, dims[0], dims[1], block=(blocksize, blocksize,1),
grid = (gridsize,1), shared = shared)
if type(y)!=GPUArray:
X = gX.get()
开发者ID:brodyh,项目名称:dpmix,代码行数:31,代码来源:cuda_functions.py
示例5: cache_z
def cache_z(self, z):
x = np.require(z.real, dtype = np.double, requirements = ['A','W','O','C'])
y = np.require(z.imag, dtype = np.double, requirements = ['A','W','O','C'])
xd = gpuarray.to_gpu(x)
yd = gpuarray.to_gpu(y)
cuda.memcpy_dtod(self.xd, xd.ptr, xd.nbytes)
cuda.memcpy_dtod(self.yd, yd.ptr, yd.nbytes)
开发者ID:abelfunctions,项目名称:abelfunctions,代码行数:7,代码来源:riemanntheta_omegas.py
示例6: _init_weights
def _init_weights(self, weight_shape, bias_shape):
if self.weight is None:
if self.name == 'noise':
assert(weight_shape[0] == weight_shape[1])
self.weight = gpuarray.to_gpu(np.eye(weight_shape[0], dtype = np.float32))
else:
self.weight = gpuarray.to_gpu(randn(weight_shape, np.float32) * self.initW)
if self.bias is None:
if self.initB > 0.0:
self.bias = gpuarray.to_gpu((np.ones(bias_shape, dtype=np.float32) * self.initB))
else:
self.bias = gpuarray.zeros(bias_shape, dtype=np.float32)
Assert.eq(self.weight.shape, weight_shape)
Assert.eq(self.bias.shape, bias_shape)
self.weightGrad = gpuarray.zeros_like(self.weight)
self.biasGrad = gpuarray.zeros_like(self.bias)
if self.momW > 0.0:
if self.weightIncr is None:
self.weightIncr = gpuarray.zeros_like(self.weight)
if self.biasIncr is None:
self.biasIncr = gpuarray.zeros_like(self.bias)
Assert.eq(self.weightIncr.shape, weight_shape)
Assert.eq(self.biasIncr.shape, bias_shape)
开发者ID:tesatory,项目名称:fastnet-noisy,代码行数:28,代码来源:layer.py
示例7: gpu_sweep_row_div
def gpu_sweep_row_div(X, y):
""" X / y = X down the rows """
if type(X)==GPUArray:
gX = X
else:
gX = to_gpu(np.asarray(X, dtype=np.float32))
if type(y)==GPUArray:
gy = y
else:
gy = to_gpu(np.asarray(y, dtype=np.float32))
dims = np.asarray(X.shape, dtype=np.int32)
if devinfo.max_block_threads >= 1024:
blocksize = 32
else:
blocksize = 16
gridsize = int(dims[0] / blocksize) + 1
shared = int(4*dims[1])
if gX.flags.c_contiguous:
func = CUDA_Kernels.get_function("sweep_rows_div")
else:
func = CUDA_Kernels.get_functions("sweep_rows_div_cm")
func(gX, gy, dims[0], dims[1], block=(blocksize, blocksize,1),
grid = (gridsize,1), shared = shared)
if type(y)!=GPUArray:
X = gX.get()
开发者ID:brodyh,项目名称:dpmix,代码行数:31,代码来源:cuda_functions.py
示例8: cuda_dot3
def cuda_dot3(A, b):
print("cuda_dot3", A.shape, b.shape)
# send b to GPU
b_gpu = gpuarray.to_gpu(b)
# transpose b on GPU
bt_gpu = linalg.transpose(b_gpu)
#remove b for now
b_gpu.gpudata.free()
del(b_gpu)
# send A to GPU
A_gpu = gpuarray.to_gpu(A)
temp_gpu = linalg.dot(bt_gpu, A_gpu)
bt_gpu.gpudata.free()
del(bt_gpu)
A_gpu.gpudata.free()
del(A_gpu)
# send b to GPU
b_gpu = gpuarray.to_gpu(b)
c_gpu = linalg.dot(temp_gpu, b_gpu)
temp_gpu.gpudata.free()
del(temp_gpu)
b_gpu.gpudata.free()
del(b_gpu)
#theoretically possible to move into RAM, force cleanup on GPU and then return from RAM
#but most likely not necessary
return c_gpu.get()
开发者ID:sneshyba,项目名称:ice3,代码行数:32,代码来源:facetbrightnessstuff3.py
示例9: test_set_by_inds_from_inds
def test_set_by_inds_from_inds(self):
dest_gpu = gpuarray.to_gpu(np.zeros(5, dtype=np.float32))
ind_dest = gpuarray.to_gpu(np.array([0, 2, 4]))
src_gpu = gpuarray.to_gpu(np.arange(5, 10, dtype=np.float32))
ind_src = gpuarray.to_gpu(np.array([2, 3, 4]))
gpu.set_by_inds_from_inds(dest_gpu, ind_dest, src_gpu, ind_src)
assert np.allclose(dest_gpu.get(), np.array([7, 0, 8, 0, 9], dtype=np.float32))
开发者ID:NeuralSci,项目名称:neurokernel,代码行数:7,代码来源:test_gpu.py
示例10: set_by_inds
def set_by_inds(self, inds, data):
"""
Set mapped data by integer indices.
Parameters
----------
inds : sequence of int
Integer indices of data elements to update.
data : numpy.ndarray
Data to assign.
"""
assert len(np.shape(inds)) == 1
assert issubclass(inds.dtype.type, numbers.Integral)
N = len(inds)
assert N == len(data)
if not isinstance(inds, gpuarray.GPUArray):
inds = gpuarray.to_gpu(inds)
if not isinstance(data, gpuarray.GPUArray):
data = gpuarray.to_gpu(data)
# Allocate data array if it doesn't exist:
if not self.data:
self.data = gpuarray.empty(N, data.dtype)
else:
assert self.data.dtype == data.dtype
try:
func = self.set_by_inds.cache[inds.dtype]
except KeyError:
inds_ctype = tools.dtype_to_ctype(inds.dtype)
v = "{data_ctype} *dest, {inds_ctype} *inds, {data_ctype} *src".format(data_ctype=self.data_ctype, inds_ctype=inds_ctype)
func = elementwise.ElementwiseKernel(v, "dest[inds[i]] = src[i]")
self.set_by_inds.cache[inds.dtype] = func
func(self.data, inds, data, range=slice(0, N, 1))
开发者ID:MariyaS,项目名称:neurokernel,代码行数:35,代码来源:pm_gpu.py
示例11: main
def main():
import numpy as np
import pycuda.autoinit
from pycuda import gpuarray
from skdata import toy
from hebel import memory_pool
from hebel.data_providers import BatchDataProvider
from hebel.models import NeuralNetRegression
from hebel.optimizers import SGD
from hebel.parameter_updaters import SimpleSGDUpdate
from hebel.monitors import SimpleProgressMonitor
from hebel.schedulers import exponential_scheduler
# Get data
data_cpu, targets_cpu = toy.Boston().regression_task()
data = gpuarray.to_gpu(data_cpu.astype(np.float32), allocator=memory_pool.allocate)
targets = gpuarray.to_gpu(targets_cpu.astype(np.float32), allocator=memory_pool.allocate)
data_provider = BatchDataProvider(data, targets)
# Create model object
model = NeuralNetRegression(n_in=data_cpu.shape[1], n_out=targets_cpu.shape[1],
layers=[100], activation_function='relu')
# Create optimizer object
optimizer = SGD(model, SimpleSGDUpdate, data_provider, data_provider,
learning_rate_schedule=exponential_scheduler(.1, .9999),
early_stopping=True)
optimizer.run(3000)
开发者ID:DavidDJChen,项目名称:hebel,代码行数:28,代码来源:neural_net_regression_example.py
示例12: cuda_ageSols
def cuda_ageSols(sols):
""" makes solutions to age """
#get num sols
num_sols = len(sols);
#convert to form of numpy arrays
sols_arr = numpy.array(sols, numpy.float32);
ones_arr = numpy.zeros_like(sols,numpy.float32);
ones_arr[:,constants.AGE_GENE] = 1;
#copy each to gpu
sols_gpu = gpuarray.to_gpu(sols_arr);
mask_gpu = gpuarray.to_gpu(ones_arr);
#debug
if debug == True:
print mask_gpu.view();
#apply mask
aged_sols_gpu = sols_gpu + mask_gpu;
sols = aged_sols_gpu.get().tolist();
开发者ID:adamuas,项目名称:coevondm,代码行数:25,代码来源:cudaInterface.py
示例13: _initialize_gpu_ds
def _initialize_gpu_ds(self):
"""
Setup GPU arrays.
"""
self.synapse_state = garray.zeros(int(self.total_synapses) + \
len(self.input_neuron_list), np.float64)
if self.my_num_gpot_neurons>0:
self.V = garray.zeros(int(self.my_num_gpot_neurons), np.float64)
else:
self.V = None
if self.my_num_spike_neurons>0:
self.spike_state = garray.zeros(int(self.my_num_spike_neurons), np.int32)
if len(self.public_gpot_list)>0:
self.public_gpot_list_g = garray.to_gpu(self.public_gpot_list)
self.projection_gpot = garray.zeros(len(self.public_gpot_list), np.double)
self._extract_gpot = self._extract_projection_gpot_func()
if len(self.public_spike_list)>0:
self.public_spike_list_g = garray.to_gpu( \
(self.public_spike_list-self.spike_shift).astype(np.int32))
self.projection_spike = garray.zeros(len(self.public_spike_list), np.int32)
self._extract_spike = self._extract_projection_spike_func()
开发者ID:LuisMoralesAlonso,项目名称:neurokernel,代码行数:25,代码来源:LPU.py
示例14: main_no_tex
def main_no_tex(dtype):
lc_kernel = get_lin_comb_kernel_no_tex((
(True, dtype, dtype),
(True, dtype, dtype)
), dtype)
for size_exp in range(10,26):
size = 1 << size_exp
from pycuda.curandom import rand
a = gpuarray.to_gpu(numpy.array(5, dtype=dtype))
x = rand(size, dtype=dtype)
b = gpuarray.to_gpu(numpy.array(7, dtype=dtype))
y = rand(size, dtype=dtype)
z = gpuarray.empty_like(x)
start = drv.Event()
stop = drv.Event()
start.record()
for i in range(20):
lc_kernel.prepared_call(x._grid, x._block,
a.gpudata, x.gpudata,
b.gpudata, y.gpudata,
z.gpudata, x.mem_size)
stop.record()
stop.synchronize()
print size, size_exp, stop.time_since(start)
开发者ID:sluo1989,项目名称:computing,代码行数:31,代码来源:GpuScalarMult.py
示例15: test_neural_net_regression
def test_neural_net_regression(self):
for _ in range(20):
N = 10000 # Number of data points
D = 100 # Dimensionality of exogenous data
P = 50 # Dimensionality of endogenous data
W_true = 10 * np.random.rand(D, P) - 5
b_true = 100 * np.random.rand(P) - 50
X = np.random.randn(N, D)
Y = np.dot(X, W_true) + b_true[np.newaxis, :] + np.random.randn(N, P)
W_lstsq = np.linalg.lstsq(np.c_[np.ones((N, 1)), X], Y)[0]
b_lstsq = W_lstsq[0]
W_lstsq = W_lstsq[1:]
data_provider = BatchDataProvider(gpuarray.to_gpu(X.astype(np.float32),
allocator=memory_pool.allocate),
gpuarray.to_gpu(Y.astype(np.float32),
allocator=memory_pool.allocate))
model = NeuralNetRegression([], n_in=D, n_out=P)
optimizer = SGD(model, SimpleSGDUpdate,
data_provider, data_provider,
learning_rate_schedule=constant_scheduler(10.),
early_stopping=True)
optimizer.run(100)
self.assertLess(np.abs(W_lstsq - model.top_layer.W.get()).max(),
1e-5)
开发者ID:amit2014,项目名称:hebel,代码行数:30,代码来源:hebel_test.py
示例16: generate
def generate(self, width, height, real_axis_range, imag_axis_range, tasks):
if not is_gpu_accelerated():
self._logger.error(
'No GPU acceleration is available, please use CPU.')
return
iterations = np.empty(width * height, np.int32)
iterations_gpu = gpuarray.to_gpu(iterations)
z_values = np.empty(width * height, np.float32)
z_values_gpu = gpuarray.to_gpu(z_values)
cmin = complex(real_axis_range[0], imag_axis_range[0])
cmax = complex(real_axis_range[1], imag_axis_range[1])
dc = cmax - cmin
dx, mx = divmod(width, self._block_size[0])
dy, my = divmod(height, self._block_size[1])
grid_size = ((dx + (mx > 0)), (dy + (my > 0)))
self._get_pixel_iterations(
iterations_gpu, z_values_gpu,
np.int32(width), np.int32(height),
np.complex64(cmin), np.complex64(dc),
block=self._block_size, grid=grid_size)
return (iterations_gpu, z_values_gpu, abs(dc))
开发者ID:gkostadinov,项目名称:py-mandelbrot,代码行数:27,代码来源:mandelbrot_gpu.py
示例17: _pre_run
def _pre_run(self):
assert(self.LPU_obj)
assert(all([var in self.memory_manager.variables
for var in self.variables.keys()]))
for var, d in self.variables.items():
v_dict = self.memory_manager.variables[var]
if not d['uids']:
uids = v_dict['uids'].keys()
inds = v_dict['uids'].values()
o = np.argsort(inds)
d['uids'] = [uids[i] for i in o]
self.src_inds[var] = garray.to_gpu(np.arange(len(d['uids'])))
else:
uids = []
inds = []
for uid in d['uids']:
try:
inds.append(v_dict['uids'][uid])
uids.append(uid)
except:
pass
inds = np.array(inds,np.int32)
o = np.argsort(inds)
self.src_inds[var] = garray.to_gpu(inds[o])
d['uids'] = [uids[i] for i in o]
self._d_output[var] = garray.empty(len(d['uids']),
v_dict['buffer'].dtype)
d['output']=np.zeros(len(d['uids']), v_dict['buffer'].dtype)
self.pre_run()
开发者ID:chungheng,项目名称:neurodriver,代码行数:29,代码来源:BaseOutputProcessor.py
示例18: add_neurons
def add_neurons(self, number, func, W, B):
"""Add prepared neurons to the SLFN, merge with existing ones.
Adds a number of specific neurons to SLFN network. Weights and biases
must be provided for that function.
If neurons of such type already exist, they are merged together.
Args:
number (int): the number of new neurons to add
func (str): transformation function of hidden layer. Linear function creates a linear model.
W (matrix): a 2-D matrix of neuron weights, size (`inputs` * `number`)
B (vector): a 1-D vector of neuron biases, size (`number` * 1)
"""
ntypes = [nr[1] for nr in self.neurons] # existing types of neurons
if func in ntypes:
# add to an existing neuron type
i = ntypes.index(func)
nn0, _, devW, devB = self.neurons[i]
number = nn0 + number
devW = gpuarray.to_gpu(np.hstack((devW.get(), W)))
devB = gpuarray.to_gpu(np.hstack((devB.get(), B)))
self.neurons[i] = (number, func, devW, devB)
else:
# create a new neuron type
devW = gpuarray.to_gpu(W)
devB = gpuarray.to_gpu(B)
self.neurons.append((number, func, devW, devB))
self.reset()
self.B = None
开发者ID:IstanbulBoy,项目名称:hpelm,代码行数:30,代码来源:slfn_skcuda.py
示例19: __init__
def __init__(self, name, type, epsW, epsB, initW, initB, momW, momB, wc, weight, bias,
weightIncr , biasIncr, disableBprop = False):
Layer.__init__(self, name, type, disableBprop)
self.epsW = F(epsW)
self.epsB = F(epsB)
self.initW = initW
self.initB = initB
self.momW = F(momW)
self.momB = F(momB)
self.wc = F(wc)
if weight is not None:
self.weight = gpuarray.to_gpu(weight)#.astype(np.float32)
else:
self.weight = None
if bias is not None:
self.bias = gpuarray.to_gpu(bias).astype(np.float32)
else:
self.bias = None
if self.momW > 0.0:
if weightIncr is not None:
self.weightIncr = gpuarray.to_gpu(weightIncr)
else:
self.weightIncr = None
if biasIncr is not None:
self.biasIncr = gpuarray.to_gpu(biasIncr)
else:
self.biasIncr = None
开发者ID:tesatory,项目名称:fastnet-noisy,代码行数:32,代码来源:layer.py
示例20: add_batch
def add_batch(self, X, T, wc=None):
"""Add a batch of training data to an iterative solution, weighted if neeed.
The batch is processed as a whole, the training data is splitted in `ELM.add_data()` method.
With parameters HH_out, HT_out, the output will be put into these matrices instead of model.
Args:
X (matrix): input data matrix size (N * `inputs`)
T (matrix): output data matrix size (N * `outputs`)
wc (vector): vector of weights for data samples, one weight per sample, size (N * 1)
HH_out, HT_out (matrix, optional): output matrices to add batch result into, always given together
"""
devH = self._project(X, dev=True)
T = np.array(T, order="C", dtype=self.precision)
devT = gpuarray.to_gpu(T)
if wc is not None: # apply weights if given
w = np.array(wc**0.5, dtype=self.precision)[:, None] # re-shape to column matrix
devWC = gpuarray.to_gpu(w)
misc.mult_matvec(devH, devWC, axis=0, out=devH)
misc.mult_matvec(devT, devWC, axis=0, out=devT)
if self.HH is None: # initialize space for self.HH, self.HT
self.HT = misc.zeros((self.L, self.outputs), dtype=self.precision)
self.HH = linalg.eye(self.L, dtype=self.precision)
self.HH *= self.norm
linalg.add_dot(devH, devT, self.HT, transa='T')
if self.precision is np.float64:
linalg.add_dot(devH, devH, self.HH, transa='T')
else:
cublas.cublasSsyrk(self.handle, 'L', 'N', self.L, X.shape[0], 1, devH.ptr, self.L, 1, self.HH.ptr, self.L)
开发者ID:IstanbulBoy,项目名称:hpelm,代码行数:31,代码来源:slfn_skcuda.py
注:本文中的pycuda.gpuarray.to_gpu函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论