• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python nervanagpu.NervanaGPU类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nervanagpu.NervanaGPU的典型用法代码示例。如果您正苦于以下问题:Python NervanaGPU类的具体用法?Python NervanaGPU怎么用?Python NervanaGPU使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了NervanaGPU类的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

 def __init__(self, rng_seed, stochastic_round=False, device_id=0):
     self.ng = NervanaGPU(stochastic_round=stochastic_round)
     logger.info("Initialized NervanaGPU with stochastic_round=%s",
                 stochastic_round)
     self.rng_seed = rng_seed
     self.rng_init()
     self.device_id = device_id if device_id is not None else 0
开发者ID:YouVentures,项目名称:neon,代码行数:7,代码来源:gpu.py


示例2: __init__

 def __init__(self, rng_seed, stochastic_round=False, device_id=0):
     import pycuda.driver as drv
     drv.init()
     global ctx
     ctx = drv.Device(device_id).make_context()
     import atexit
     atexit.register(ctx.pop)
     self.ng = NervanaGPU(stochastic_round=stochastic_round)
     logger.info("Initialized NervanaGPU with stochastic_round=%s",
                 stochastic_round)
     self.rng_seed = rng_seed
     self.rng_init()
     self.device_id = device_id if device_id is not None else 0
开发者ID:neuroidss,项目名称:neon,代码行数:13,代码来源:gpu.py


示例3: NervanaGPU

import numpy         as np
import pycuda.driver as drv
from nervanagpu      import NervanaGPU
from pycuda.autoinit import context
from operator        import mul

print context.get_device().name()

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .3f" % x})

dtype  = np.float16
cpu    = 1
repeat = 1

ng = NervanaGPU(stochastic_round=False, bench=True)

pool = ng.pool_layer(
    "max",
    64,         # N
    64,1,64,64, # C,D,H,W
    4,1,2,2,    # J,T,R,S
    0,0,0,0,    # padding
    4,1,2,2)    # strides

dimI = pool.dimI
dimO = pool.dimO

# colapse pooling dimensions into one
# this allows for easy cpu pooling in numpy
def slicable(dim, pad=0):
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:30,代码来源:pool_test.py


示例4: range

    
    # Swap A and B to map from C order to Fortran 
    for r in range(repeat):
        cublas.cublasSgemm(handle, opB, opA, n, m, k, 1.0, B.gpudata, ldb, A.gpudata, lda, 0.0, C.gpudata, ldc)

    end.record()
    end.synchronize()
    msecs = end.time_since(start) / repeat
    gflops = (m * n * k * 2.0) / (msecs * 1000000.0)
    print "%7.3f msecs %4.0f gflops (%s_%s   : %d,%d,%d)" % (msecs,gflops,"cublas",op,m,n,k)

    return gflops

np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})

ng = NervanaGPU(stochastic_round=False, bench=True)

for dtype in (np.float16,np.float32):
    
    for K, C, N in ((3072,3072*1,32),(3072,3072*1,64),(3072,3072*1,96),(3072,3072*1,128),
                    (3072,3072*2,32),(3072,3072*2,64),(3072,3072*2,96),(3072,3072*2,128),
                    (3072,3072*3,32),(3072,3072*3,64),(3072,3072*3,96),(3072,3072*3,128),
                    (3072,3072*4,32),(3072,3072*4,64),(3072,3072*4,96),(3072,3072*4,128),): 
                    #(3072,3072,32+128*0),(3072,3072,64+128*0),(3072,3072,96+128*0),(3072,3072,128+128*0),
                    #(3072,3072,32+128*1),(3072,3072,64+128*1),(3072,3072,96+128*1),(3072,3072,128+128*1),
                    #(3072,3072,32+128*2),(3072,3072,64+128*2),(3072,3072,96+128*2),(3072,3072,128+128*2),
                    #(3072,3072,32+128*3),(3072,3072,64+128*3),(3072,3072,96+128*3),(3072,3072,128+128*3),): 
        for op,  dimA,  dimB,  dimC in (
          ("nn", (K,C), (C,N), (K,N) ),  # fprop
          ("tn", (K,C), (K,N), (C,N) ),  # bprop
          ("nt", (K,N), (C,N), (K,C) )): # update
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:30,代码来源:cublas2.py


示例5: NervanaGPU

import numpy as np
import pycuda.autoinit
from nervanagpu import NervanaGPU
nrv = NervanaGPU(default_dtype=np.float32)

a = nrv.array(np.random.randn(200,200))
b = nrv.empty_like(a)
b[:] = a**2
assert not np.any(np.isnan(b.get())), "Shouldn't have any nan's here"

开发者ID:awni,项目名称:nervanagpu,代码行数:9,代码来源:pow_failure.py


示例6: range

    # Swap A and B to map from C order to Fortran 
    for r in range(repeat):
        cublas.cublasSgemm(handle, opB, opA, n, m, k, alpha, B.gpudata, ldb, A.gpudata, lda, beta, C.gpudata, ldc)

    if repeat > 1:
        end.record()
        end.synchronize()
        msecs = end.time_since(start) / repeat
        gflops = (m * n * k * 2.0) / (msecs * 1000000.0)
        print("%7.3f msecs %4.0f gflops (%s_%s   : %d,%d,%d)" %
              (msecs,gflops,"cublas",op,m,n,k))


np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})

ng = NervanaGPU(stochastic_round=0, bench=0)

small_1  = (1,2,3,4,5,6,7,8,9,16,32,64,65,72,120,127,128,192)
medium_1 = (32,64,128,192,778,785,786,787,794)
big_1    = (32,64,128,1532,1535,1536,1537,1540,3073,4095)

small_2  = (8,16,32,64,72,96,120,128,192)
medium_2 = (32,64,128,192,256,786-32,786-16,786,786+16,786+32)
big_2    = (32,64,128,1536-80,1536-64,1536,1536+64,1536+80,3072,4096)

# sharedDim = (4096,4096)
# devA1s = ng.empty(sharedDim, dtype=np.float32)
# devB1s = ng.empty(sharedDim, dtype=np.float32)
# devC1s = ng.empty(sharedDim, dtype=np.float32)
# devA2s = ng.empty(sharedDim, dtype=np.float32)
# devB2s = ng.empty(sharedDim, dtype=np.float32)
开发者ID:leonardt,项目名称:nervanagpu,代码行数:31,代码来源:cublas_test.py


示例7: NervanaGPU

# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from nervanagpu import NervanaGPU
from pycuda.autoinit import context

print context.get_device().name()

np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})

ng = NervanaGPU(stochastic_round=False, bench=True)

dtype  = np.float16
repeat = 1
cpu    = 1  # Set CPU to 1 to check against CPU

for data_type in ("All Ones", "Random Data",): #"All Ones", "Random Data"
    print data_type
    for size in ((3072,3072,3072*2),): #(4095,4095,4095) 
        m, n, k = size
        for op in ("tn","nn","nt"): #"tn","nn","nt"

            dimA = (m,k) if op[0] == 'n' else (k,m)
            dimB = (k,n) if op[1] == 'n' else (n,k)
            dimC = (m,n)
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:30,代码来源:gemm_test2.py


示例8: range

    
    # Swap A and B to map from C order to Fortran 
    for r in range(repeat):
        cublas.cublasSgemm(handle, opB, opA, n, m, k, alpha, B.gpudata, ldb, A.gpudata, lda, beta, C.gpudata, ldc)

    end.record()
    end.synchronize()
    msecs = end.time_since(start) / repeat
    gflops = (m * n * k * 2.0) / (msecs * 1000000.0)
    print("%7.3f msecs %4.0f gflops (%s_%s   : %d,%d,%d)" %
          (msecs,gflops,"cublas",op,m,n,k))


np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})

ng = NervanaGPU(stochastic_round=False, bench=True)

repeat = 1

for dtype in (np.float16, np.float32,):
    
    for K, C, N in ((32,4096,1512),):

        for alpha, beta in ((1.0,0.0), (0.5,0.5)):

            for op,  dimA,  dimB,  dimC in (
              ("nn", (K,C), (C,N), (K,N) ),  # fprop
              ("tn", (K,C), (K,N), (C,N) ),  # bprop
              ("nt", (K,N), (C,N), (K,C) ),): # update

                devA1 = ng.empty(dimA, dtype=dtype)
开发者ID:leonardt,项目名称:nervanagpu,代码行数:30,代码来源:cublas.py


示例9: __init__

    def __init__(self, rng_seed, stochastic_round=False, device_id=0,
                 num_dev=2):
        drv.init()
        self.num_dev = num_dev

        if device_id == 0:
            self.dev_list = range(num_dev)
        else:
            self.dev_list = device_id

        assert len(self.dev_list) == self.num_dev
        assert self.num_dev <= drv.Device.count()

        self.ctxs = []
        self.devs = []
        self._strms = []
        self._redstrms = []

        self._events = []
        self._redevents = []

        self.async = True
        self._nostrms = [None for i in self.dev_list]

        for i in self.dev_list:
            self.devs.append(drv.Device(i))

        for dev in self.devs:
            self.ctxs.append(
                dev.make_context(drv.ctx_flags.SCHED_BLOCKING_SYNC))
            self._strms.append(drv.Stream())
            self._redstrms.append(drv.Stream())
            self._events.append(drv.Event())
            self._redevents.append(drv.Event())
            drv.Context.pop()

        self.ctxs[0].push()
        atexit.register(drv.Context.pop)
        MGPUTensor.ctxs = self.ctxs
        MGPUTensor.num_dev = num_dev

        self.ng = NervanaGPU(stochastic_round=stochastic_round)
        logger.info("Initialized %d device NervanaGPU, stochastic_round=%s",
                    num_dev, stochastic_round)
        self.ng.block = None
        self.rng_seed = rng_seed
        self.rng_init()

        # Setup the pairwise contexts
        # TODO clean up this code to avoid indexing
        for dev1, ctx1 in zip(self.devs, self.ctxs):
            ctx1.push()
            for dev2, ctx2 in zip(self.devs, self.ctxs):
                if dev1 == dev2:
                    continue
                if dev1.can_access_peer(dev2):
                    ctx1.enable_peer_access(ctx2)
                else:
                    print('Cannot enable peer access between '
                          '{:d} and {:d}'.format(dev1, dev2))
            ctx1.pop()
开发者ID:neuroidss,项目名称:neon,代码行数:61,代码来源:mgpu.py


示例10: NervanaGPU

# Note GoogLeNet2 only fits in fp16 currently.  I need to work out delta sharing in inception layers.
nets = ("Alexnet","Overfeat","GoogLeNet1","GoogLeNet2","VGG","VGG_E",)

#Available dtypes: np.float16, np.float32
dtypes = (np.float16,np.float32)

# number of full iterations
loops       = 10
# show bechmark details for each layer
layer_bench = 0
# show layer stats after each operation
print_stats = 0
# run network with all zeros to see speed difference
zeros       = 0

ng = NervanaGPU(bench=layer_bench)

# common convolutional layer settings
conv11    = { "R":11, "S":11, "pad_h":2, "pad_w":2, "str_h":4, "str_w":4 }
conv11p0  = { "R":11, "S":11, "pad_h":0, "pad_w":0, "str_h":4, "str_w":4 }
conv7     = { "R":7,  "S":7,  "pad_h":3, "pad_w":3, "str_h":2, "str_w":2 }
conv5     = { "R":5,  "S":5,  "pad_h":2, "pad_w":2 }
conv5p0   = { "R":5,  "S":5,  "pad_h":0, "pad_w":0 }
conv3     = { "R":3,  "S":3,  "pad_h":1, "pad_w":1 }
conv2     = { "R":2,  "S":2,  "pad_h":0, "pad_w":0, "str_h":2, "str_w":2 }
conv1     = { "R":1,  "S":1,  "pad_h":0, "pad_w":0 }

# traditional pooling
pool2s2p0 = { "R":2, "S":2 }
pool3s2p0 = { "R":3, "S":3, "str_h":2, "str_w":2 }
pool3s2p1 = { "R":3, "S":3, "str_h":2, "str_w":2, "pad_h":1, "pad_w":1 }
开发者ID:3upperm2n,项目名称:convnet-benchmarks,代码行数:31,代码来源:convnet-benchmarks.py


示例11: start_bench

dtype  = np.float16
repeat = 20

start, end = (drv.Event(), drv.Event())

def start_bench():
    start.record()

def end_bench(op):
    end.record()
    end.synchronize()
    msecs  = end.time_since(start) / repeat
    gflops = conv.flops / (msecs * 1000000.0)
    print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)

ng = NervanaGPU(stochastic_round=False, bench=True)

# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()

C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()

# Set some options and tensor dimensions
NCHW_fmt  = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype  = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:31,代码来源:cudnn.py


示例12: print

from pycuda.autoinit   import context
from nervanagpu        import NervanaGPU
from nervanagpu.layers import DataLayer, ConvLayer, PoolLayer, FullLayer
print(context.get_device().name())

# Compare results here:
# https://github.com/soumith/convnet-benchmarks

# number of full iterations
loops       = 10
# show bechmark details for each layer
layer_bench = 0
# show layer stats after each operation
print_stats = 0

ng = NervanaGPU(bench=layer_bench)

# don't learn, just benchmark
momentum      = 0.0
learning_rate = 0.0

# common convolutional layer settings
conv3   = { "R":3, "S":3, "pad_h":1, "pad_w":1 }
conv1   = { "R":1, "S":1, "pad_h":0, "pad_w":0 }

# traditional pooling
pool2   = { "op":"max", "R":2, "S":2 }
pool3   = { "op":"max", "R":3, "S":3, "str_h":2, "str_w":2 }

# maxout pooling
pool1j2 = { "op":"max", "J":2 } # maxout in the fc layers
开发者ID:ekelsen,项目名称:nervanagpu,代码行数:31,代码来源:convnet-benchmarks.py


示例13: NervanaGPU

#!/usr/bin/python

import numpy         as np
import pycuda.driver as drv
from nervanagpu      import NervanaGPU
from pycuda.autoinit import context
from ipdb            import set_trace

np.set_printoptions(threshold=8192*4, linewidth=600, formatter={'int':lambda x: "%2d" % x,'float':lambda x: "%2.0f" % x})

ng = NervanaGPU(stochastic_round=0, bench=1)

dtype  = np.float32 # np.float16 or np.float32
repeat = 50          # repeat count for benchmarking
ones   = 0          # simpler data for debugging
cpu    = 0          # valdiate against numpy
size   = 32         # 32, 64, 128, None=auto

X = 100   # Batch Size
N = 32   # Minibatch Size
C = 3072  # Input  Features
K = 3072  # Output Features
Nin = True

dimW = (K,C)
if Nin:
    dimI = (X,C,N)
    dimO = (X,K,N)
else:
    dimI = (X,N,C)
    dimO = (X,N,K)
开发者ID:chagge,项目名称:nervanagpu,代码行数:31,代码来源:batched_dot_test.py


示例14: run

def run():
    ng = NervanaGPU(stochastic_round=False)

    dt = np.float32
    # N: Number of images in mini-batch
    # C: Number of input feature maps
    # K: Number of output feature maps

    # D: Depth  of input image
    # H: Height of input image
    # W: Width  of input image

    # T: Depth  of filter kernel
    # R: Height of filter kernel
    # S: Width  of filter kernel
    # 
    # * images:      (numColors, imgSizeY, imgSizeX, numImages) with stride given
    # * filters:     (numColors, filterPixels, numFilters) if conv
    # *              (numModules, numColors, filterPixels, numFilters) otherwise
    # *
    # * targets:     (numFilters, numModulesY, numModulesX, numImages)

    N = 128
    C = 3
    K = 64

    D = 1
    H = 64
    W = 64

    T = 1
    R = 8
    S = 8

    pad_h = pad_w = 0
    str_h = str_w = 4

    layer = ng.conv_layer(dt, N, C, K,
            D=D, H=H, W=W,
            T=T, R=R, S=S,
            pad_d=0, pad_h=pad_h, pad_w=pad_w,
            str_d=1, str_h=str_h, str_w=str_w,
            grid_P=0, grid_Q=0, update_size=None)

    numImages = N 
    numFilters = K

    numModulesY = int(math.ceil(float(H - R + 1 + 2*pad_h) / str_h))
    numModulesX = int(math.ceil(float(W - S + 1 + 2*pad_w) / str_w))

    print "Num Modules ", numModulesX, numModulesY


    # Set up images, filters, and outputs
    # imgd = np.loadtxt("im1.txt")
    # img = np.zeros((64, 64, 3))
    # print imgd.shape
    # for i in range(3):
    #     img[:, :, i] = imgd[i*64:(i+1)*64, :]
    # hostImages = np.tile(img)

    hostImages = np.random.rand(C, H, W, N)
    hostFilters = np.random.uniform(low=0.0, high=1.0, size=(C, S*R, numFilters)) #np.ones((C, S*R, numFilters)) #
    hostOutputs = np.zeros((numFilters, numModulesY, numModulesX, N))

    print "Input sum", np.sum(hostImages)

    # Run cc2 kernel    
    devI = ng.array(hostImages, dtype=dt)
    devF = ng.array(hostFilters, dtype=dt)
    devO = ng.array(hostOutputs, dtype=dt)

    ng.fprop_cuda_conv(layer, devI, devF, devO)

    print "CC2 input sum: ", np.sum(devI.asnumpyarray())
    print "CC2 output sum: ", np.sum(devO.asnumpyarray())

    # Run maxwel kernel
    # images: (C * H * W, N)
    # filters:  (C * S * R , numFilters)
    # outputs:  (numFilters * numModulesX * numModulesY, N)
    devI = ng.array(hostImages.reshape((C*H*W, N)), dtype=dt)
    devF = ng.array(hostFilters.reshape((C*S*R, numFilters)), dtype=dt)
    devO2 = ng.array(hostOutputs.reshape(numFilters*numModulesX*numModulesY, N), dtype=dt)

    ng.fprop_conv(layer, devI, devF, devO2)
    print "NG input sum: ", np.sum(devI.asnumpyarray())
    print "NG output sum: ", np.sum(devO2.asnumpyarray())

    hostOutputs1 = np.reshape(devO.asnumpyarray(), devO2.shape)
    hostOutputs2 = devO2.asnumpyarray()

    for i in xrange(hostOutputs1.shape[0]):
       for j in xrange(hostOutputs1.shape[1]):
           assert(abs(hostOutputs1[i, j] - hostOutputs2[i, j]) < 1e-4)
开发者ID:jcoreyes,项目名称:nervanagpu,代码行数:95,代码来源:testcudaconv.py


示例15: set

import pycuda.driver as drv
from nervanagpu      import NervanaGPU
from pycuda.autoinit import context
from operator        import mul

print context.get_device().name()

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .0f" % x})

ops  = set(("update",)) # "fprop","bprop","update"
ones = 0
cpu  = 0  # Set CPU to 1 to check against CPU
repeat = 1
dtype = np.float32

ng = NervanaGPU(stochastic_round=False, bench=True)

conv = ng.conv_layer(
    dtype,
    16,3,8,    # N,C,K
    1,64,64,   # D,H,W
    1,3,3,     # T,R,S
    0,1,1,     # padding
    1,1,1)     # strides


dimI = conv.dimI
dimF = conv.dimF
dimO = conv.dimO

# colapse outer dimensions into one and preserve inner dimension
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:31,代码来源:conv_test.py


示例16: GPU

class GPU(Backend):
    """
    Sets up a NervanaGPU based backend for matrix operations.
    Note that some functions defined in the generic Backend class such as are
    cross-map pooling and normalization and adaDelta are not implemented for
    this backend.
    """
    default_dtype = np.float32

    def __init__(self, rng_seed, stochastic_round=False, device_id=0):
        self.ng = NervanaGPU(stochastic_round=stochastic_round)
        logger.info("Initialized NervanaGPU with stochastic_round=%s",
                    stochastic_round)
        self.rng_seed = rng_seed
        self.rng_init()
        self.device_id = device_id if device_id is not None else 0

    def __getstate__(self):
        """
        Defines what and how we go about serializing an instance of this class.

        Returns:
            self.__dict__: The full contents of the backend class instance,
                           except for the mem_pool which is on device and
                           cannot be serialized.
        """
        if hasattr(self, 'mem_pool') and self.mem_pool is not None:
            self.mem_pool_pickle = {'shape': self.mem_pool.shape,
                                    'dtype': np.float32}
            self.mem_pool = None

        return self.__dict__

    def __setstate__(self, state):
        """
        Defines how we go about deserializing into an instance of this class.

        Arguments:
            self.__dict__: The full contents of the backend class instance,
                           except for the mem_pool which is on device and
                           cannot be serialized.
        """
        self.__dict__.update(state)
        self.mem_pool = self.ng.empty(self.mem_pool_pickle['shape'],
                                      dtype=self.mem_pool_pickle['dtype'])

    def init_mempool(self, shape, dtype=default_dtype):
        """
        Allocates a memory pool for temporary storage
        """
        self.mem_pool = self.ng.empty(shape, dtype=dtype)

    def alloc_host_mem(self, shape, dtype):
        return drv.pagelocked_empty(shape, dtype, order="C", mem_flags=0)

    def create_stream(self):
        return drv.Stream()

    def async_copy(self, dest, src, stream=None):
        drv.memcpy_htod_async(dest.gpudata, src, stream)

    def rng_init(self):
        """
        Initialize and seed the pseudo random number genrator. Random numbers
        are generated on the host using numpy, then transfered to device.
        """
        seed = None
        if 'rng_seed' in self.__dict__:
            seed = self.rng_seed
            logger.info("Seeding random number generator with: %s", str(seed))
        np.random.seed(seed)

    def flop_timing_init(self, decorate_fc, decorate_conv, decorate_ew):
        """
        Initialize FLOP timing.  Wraps the specified MOP calls via a decorator
        to record elapsed time and number of operations.

        Arguments:
           decorate_fc (list): string giving the function names of fully
                               connected layer forward/backward/update calls
                               to time.
           decorate_conv (list): string giving the function names of
                                 convolutional layer forward/backward/update
                                 calls to time.
           decorate_ew (list): string giving the function names of element-wise
                               calls to time.

        Notes:
            Must be called prior to first flop_timing_start call
        """
        self.start = drv.Event()
        self.end = drv.Event()
        self.flop_timer = FlopsDecorator(self)
        self.flop_timer.decorate(decorate_fc=decorate_fc,
                                 decorate_conv=decorate_conv,
                                 decorate_ew=decorate_ew)

    def flop_timinig_start(self):
        """
        Start a new FLOP timer.
#.........这里部分代码省略.........
开发者ID:YouVentures,项目名称:neon,代码行数:101,代码来源:gpu.py


示例17: NervanaGPU

# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Using just cublas compare N as the contiguous dimension verses the non-contiguous dimension.

import numpy as np
import pycuda.driver as drv
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
from scikits.cuda import cublas

print context.get_device().name()

ng = NervanaGPU(stochastic_round=False, bench=True)

handle = cublas.cublasCreate()

start, end = (drv.Event(), drv.Event())

def cublas_dot(op, A, B, C, repeat=1, warmup=False):

    lda = A.shape[0]
    ldb = B.shape[0]
    ldc = C.shape[0]

    m = C.shape[0]
    n = C.shape[1]
    k = A.shape[1] if op[0] == 'n' else A.shape[0]
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:30,代码来源:minibatch_layout_diff.py


示例18: MGPU

class MGPU(GPU):
    default_dtype = np.float32
    num_dev = 1
    is_dist = True

    def __init__(self, rng_seed, stochastic_round=False, device_id=0,
                 num_dev=2):
        drv.init()
        self.num_dev = num_dev

        if device_id == 0:
            self.dev_list = range(num_dev)
        else:
            self.dev_list = device_id

        assert len(self.dev_list) == self.num_dev
        assert self.num_dev <= drv.Device.count()

        self.ctxs = []
        self.devs = []
        self._strms = []
        self._redstrms = []

        self._events = []
        self._redevents = []

        self.async = True
        self._nostrms = [None for i in self.dev_list]

        for i in self.dev_list:
            self.devs.append(drv.Device(i))

        for dev in self.devs:
            self.ctxs.append(
                dev.make_context(drv.ctx_flags.SCHED_BLOCKING_SYNC))
            self._strms.append(drv.Stream())
            self._redstrms.append(drv.Stream())
            self._events.append(drv.Event())
            self._redevents.append(drv.Event())
            drv.Context.pop()

        self.ctxs[0].push()
        atexit.register(drv.Context.pop)
        MGPUTensor.ctxs = self.ctxs
        MGPUTensor.num_dev = num_dev

        self.ng = NervanaGPU(stochastic_round=stochastic_round)
        logger.info("Initialized %d device NervanaGPU, stochastic_round=%s",
                    num_dev, stochastic_round)
        self.ng.block = None
        self.rng_seed = rng_seed
        self.rng_init()

        # Setup the pairwise contexts
        # TODO clean up this code to avoid indexing
        for dev1, ctx1 in zip(self.devs, self.ctxs):
            ctx1.push()
            for dev2, ctx2 in zip(self.devs, self.ctxs):
                if dev1 == dev2:
                    continue
                if dev1.can_access_peer(dev2):
                    ctx1.enable_peer_access(ctx2)
                else:
                    print('Cannot enable peer access between '
                          '{:d} and {:d}'.format(dev1, dev2))
            ctx1.pop()

    def make_events(self):
        evtlist = []
        for ctx in self.ctxs:
            ctx.push()
            evtlist.append(drv.Event())
            ctx.pop()
        return evtlist

    # These definitions are for performing grouped context commands
    # This is experimental and should remove _stack for actual usage
    def begin_stack(self, block, identifier):
        if block == Block.update:
            self.ng.block = Block.update
            self.call_stack = []
        else:
            pass

    def end_stack(self, block, identifier):
        if block == Block.update:
            self.ng.block = None
            for idx, ctx in enumerate(self.ctxs):
                ctx.push()
                self.ng.stream = self.strms[idx]
                for method, args, kwargs in self.call_stack:
                    myargs = [a._tensorlist[idx] if isinstance(
                        a, MGPUTensor) else a for a in args]
                    mykwargs = {k: v._tensorlist[idx] if isinstance(
                        v, MGPUTensor) else v for k, v in kwargs.iteritems()}
                    getattr(super(MGPU, self), method)(*myargs, **mykwargs)
                self.ng.stream = None
                ctx.pop()
            self.call_stack = None
        else:
#.........这里部分代码省略.........
开发者ID:neuroidss,项目名称:neon,代码行数:101,代码来源:mgpu.py



注:本文中的nervanagpu.NervanaGPU类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python grace.status函数代码示例发布时间:2022-05-27
下一篇:
Python helpers.url_for函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap