Python tensor.clip函数代码示例

本文整理汇总了Python中theano.tensor.clip函数的典型用法代码示例。


示例1: get_output_for

    def get_output_for(self, inputs, **kwargs):
        mu_area, sigma_area, is_not_padded, slicedists = inputs

        # Rescale input
        mu_area = mu_area / self.rescale_input
        sigma_area = sigma_area / self.rescale_input

        # For each slice pair, compute if both of them are valid
        is_pair_not_padded = is_not_padded[:, :-1] + is_not_padded[:, 1:] > 1.5

        # Compute the distance between slices
        h = slicedists[:, :-1]

        # Compute mu for each slice pair
        m1 = mu_area[:, :-1]
        m2 = mu_area[:, 1:]
        eps = 1e-2
        mu_volumes = (m1 + m2 + T.sqrt(T.clip(m1*m2, eps, utils.maxfloat))) * h / 3.0
        mu_volumes = mu_volumes * is_pair_not_padded

        # Compute sigma for each slice pair
        s1 = sigma_area[:, :-1]
        s2 = sigma_area[:, 1:]
        sigma_volumes = h*(s1 + s2) / 3.0
        sigma_volumes = sigma_volumes * is_pair_not_padded

        # Compute mu and sigma per patient
        mu_volume_patient = T.sum(mu_volumes, axis=1)
        sigma_volume_patient = T.sqrt(T.clip(T.sum(sigma_volumes**2, axis=1), eps, utils.maxfloat))

        # Concat and return
        return T.concatenate([
            mu_volume_patient.dimshuffle(0, 'x'),
            sigma_volume_patient.dimshuffle(0, 'x')], axis=1)

示例2: _modify_updates

 def _modify_updates(self, updates):
     if self.zero_hidbias:
         hidbias_updated = updates[self.hidbias]
         updates[self.hidbias] = tensor.clip(hidbias_updated, 0, 0)
     if self.zero_visbias:
         visbias_updated = updates[self.visbias]
         updates[self.visbias] = tensor.clip(visbias_updated, 0, 0)

示例3: build_and_train_model

	def build_and_train_model(self,n_hu,n_hl):
		print('Building Model')

		input_phrase = T.imatrix('train_inputmatrix')
		labels = T.imatrix('trainphrase_matrix')

		network = self.define_layers(input_phrase,labels,n_hu,n_hl)

		print("Defining loss")
		#Prediction or loss
		prediction = []

		loss = l.define_loss(prediction[0],prediction[1])
		self.model = network
		#define params
		params = lasagne.layers.get_all_params(network)
		updates = lasagne.updates.adadelta(loss,params)

		#run test

		train_fn = theano.function([input_phrase,labels],[loss, prediction[0], prediction[1]],updates=updates,allow_input_downcast=True)

		print("Model and params defined now training")
		epoch = 0
		for epoch in range(self.end_epoch):
			train_loss = 0
			train_pred = []
			start_time = time.time()
			loss, predicted, phrase = train_fn(self.train_inputmatrix,self.trainphrase_matrix)
			print('Training Loss: ' + str(loss) + ' Train Epoch ' + str(epoch))

示例4: gaussian_likelihood_diagonal_variance

def gaussian_likelihood_diagonal_variance(t, mu, sig, dim):
    Gaussian Likelihood along first dimension
    t   : TensorVariable
    mu  : FullyConnected (Linear)
    sig : FullyConnected (Softplus)
    dim : First dimension of the target vector t
    # First clip sig
    sig_clip = T.clip(sig, 1e-40, 1e40)

    # Since the variance matrix is diagonal, normalization term is easier to compute,
    # and calculus overflow can easily be prevented by first summing by 2*pi and taking square
    sig_time_2pi = T.sqrt(sig_clip * 2 * math.pi)

    # This is the problem... product goes to 0
    normalization_coeff = T.clip(T.prod(sig_time_2pi, axis=0), 1e-40, 1e40)

    # Once again, fact that sig is diagonal allows for simplifications :
    # term by term division instead of inverse matrix multiplication
    exp_term = (T.exp(- 0.5 * (t-mu) * (t-mu) / sig_clip).sum(axis=0))
    pdf = exp_term / normalization_coeff
    return pdf

示例5: rmsprop

    def rmsprop(self, lr, tparams, grads, inp_list, cost, params):
        clip = params["grad_clip"]
        decay_rate = tensor.constant(params["decay_rate"], dtype=theano.config.floatX)
        smooth_eps = tensor.constant(params["smooth_eps"], dtype=theano.config.floatX)
        zipped_grads = [theano.shared(np.zeros_like(p.get_value()), name="%s_grad" % k) for k, p in tparams.iteritems()]
        running_grads2 = [
            theano.shared(np.zeros_like(p.get_value()), name="%s_rgrad2" % k) for k, p in tparams.iteritems()
        zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
        if clip > 0.0:
            rg2up = [
                    tensor.clip(decay_rate * rg2 + (1 - decay_rate) * (tensor.clip(g, -clip, clip) ** 2), 0.0, np.inf),
                for rg2, g in zip(running_grads2, grads)
            rg2up = [
                (rg2, tensor.clip(decay_rate * rg2 + (1 - decay_rate) * (g ** 2), 0.0, np.inf))
                for rg2, g in zip(running_grads2, grads)

        f_grad_shared = theano.function(inp_list, cost, updates=zgup + rg2up, name="rmsprop_f_grad_shared")

        updir = [theano.shared(p.get_value() * numpy_floatX(0.0), name="%s_updir" % k) for k, p in tparams.iteritems()]
        updir_new = [
            (ud, -lr * zg / (tensor.sqrt(rg2) + smooth_eps)) for ud, zg, rg2 in zip(updir, zipped_grads, running_grads2)
        param_up = [(p, p + udn[1]) for p, udn in zip(tparams.values(), updir_new)]
        f_update = theano.function(
            [lr], [], updates=updir_new + param_up, on_unused_input="ignore", name="rmsprop_f_update"

        return f_grad_shared, f_update, zipped_grads, running_grads2, updir

示例6: custom_loss

def custom_loss(y_true, y_pred):
  epsilon = 0.001
  first_log = T.log(T.clip(y_pred, 0.001, np.inf) + 1.)
  second_log = T.log(T.clip(y_true, 0.001, np.inf) + 1.)
  first_sum = T.log(T.sum(T.clip(y_pred, 0.001, np.inf))+1)
  second_sum = T.log(T.sum(T.clip(y_true, 0.001, np.inf))+1)
  return T.mean(T.square(first_log-second_log), axis=-1) + CMC_PENALTY*T.square(first_sum-second_sum)

示例7: redo_theano

    def redo_theano(self):

        self.h = shared(N.zeros(self.nhid, dtype=floatX), name="h")
        self.v = shared(N.zeros(self.nvis, dtype=floatX), name="v")

        input_v = T.vector()
        assert input_v.type.dtype == floatX

        self.init_h_v = function([input_v], updates={self.h: self.predict(input_v), self.v: input_v})

        coding_obj = self.coding_obj(self.v, self.h)
        assert len(coding_obj.type.broadcastable) == 0

        coding_grad = T.grad(coding_obj, self.h)
        assert len(coding_grad.type.broadcastable) == 1

        self.coding_obj_grad = function([], [coding_obj, coding_grad])

        self.new_h = shared(N.zeros(self.nhid, dtype=floatX), name="new_h")

        alpha = T.scalar(name="alpha")

        outside_grad = T.vector(name="outside_grad")

        new_h = T.clip(self.h * T.exp(-alpha * outside_grad), 1e-10, 1e4)

        new_obj = self.coding_obj(self.v, new_h)

        self.try_step = function([alpha, outside_grad], updates={self.new_h: new_h}, outputs=new_obj)

        self.accept_h = function([], updates={self.h: self.new_h})

        self.get_h = function([], self.h)

        V = T.matrix(name="V")
        H = T.matrix(name="H")

        coding_obj_batch = self.coding_obj_batch(V, H)

        self.code_learning_obj = function([V, H], coding_obj_batch)

        learning_grad = T.grad(coding_obj_batch, self.W)
        self.code_learning_step = function([V, H, alpha], updates={self.W: self.W - alpha * learning_grad})

        pred_obj = T.mean(T.sqr(self.predict(V) - H))

        predictor_params = [self.pred_W, self.pred_b, self.pred_g]

        pred_grads = T.grad(pred_obj, wrt=predictor_params)

        predictor_updates = {}

        for param, grad in zip(predictor_params, pred_grads):
            predictor_updates[param] = param - alpha * grad

        predictor_updates[self.pred_g] = T.clip(
            predictor_updates[self.pred_g], N.cast[floatX](0.5), N.cast[floatX](1000.0)

        self.train_predictor = function([V, H, alpha], updates=predictor_updates)

示例8: get_constraint_updates

    def get_constraint_updates(self):
        constraint_updates = OrderedDict() 
        if self.flags['scalar_lambd']:
            constraint_updates[self.lambd] = T.mean(self.lambd) * T.ones_like(self.lambd)

        # constraint filters to have unit norm
        if self.flags['wv_norm'] in ('unit', 'max_unit'):
            wv = constraint_updates.get(self.Wv, self.Wv)
            wv_norm = T.sqrt(T.sum(wv**2, axis=0))
            if self.flags['wv_norm'] == 'unit':
                constraint_updates[self.Wv] = wv / wv_norm
            elif self.flags['wv_norm'] == 'max_unit':
                constraint_updates[self.Wv] = wv / wv_norm * T.minimum(wv_norm, 1.0)

        constraint_updates[self.scalar_norms] = T.maximum(1.0, self.scalar_norms)
        ## clip parameters to maximum values (if applicable)
        for (k,v) in self.clip_max.iteritems():
            assert k in [param.name for param in self.params()]
            param = constraint_updates.get(k, getattr(self, k))
            constraint_updates[param] = T.clip(param, param, v)

        ## clip parameters to minimum values (if applicable)
        for (k,v) in self.clip_min.iteritems():
            assert k in [param.name for param in self.params()]
            param = constraint_updates.get(k, getattr(self, k))
            constraint_updates[param] = T.clip(constraint_updates.get(param, param), v, param)

        return constraint_updates

示例9: train

 def train(self, X, evalinter=10):
     function to call to train this NMF GD on given matrix X
     Calls trainingloop()
     # define errors and cost
     tErr = (1./2.) * ((self.X - T.dot(self.W, self.H))**2).sum()
     tReg = (1./2.) * ((self.W**2).sum() * self.Wreg + (self.H**2).sum() * self.Hreg)
     tCost = tErr + tReg
     # get gradients
     gW, gH = T.grad(tCost, [self.W, self.H])
     # define updates and function
     updW = (self.W, T.clip(self.W - self.lr * gW, 0, np.infty))
     updH = (self.H, T.clip(self.H - self.lr * gH, 0, np.infty))
     trainf = theano.function(
         updates=[updW, updH]
     normf = theano.function(
             (self.W, (self.W.T/T.sum(self.W, axis=1)).T),
     # train loop
     err = self.trainloop(X, trainf=trainf, evalinter=evalinter)
     return self.W.get_value(), self.H.get_value(), err

示例10: init_process

def init_process(model, gaussian, delta, fn_type):
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    import theano.tensor as T
    input_var_list = [T.tensor4('inputs{}'.format(i))
                      for i in range(scales)]
    target_var = T.imatrix('targets')

    # Create network model
    if model == 'jy':
        print('Building JY CNN...')
        network = JY_cnn(input_var_list, gaussian, delta)
        learning_rate = 0.006
    # elif model == 'fcrnn':
    #     print('Building FCRNN...')
    #     network = FCRNN(input_var_list, delta)
    #     learning_rate = 0.0005

    print('defining loss function')
    prediction = lasagne.layers.get_output(network)
    prediction = T.clip(prediction, 1e-7, 1.0 - 1e-7)
    loss = lasagne.objectives.binary_crossentropy(prediction, target_var)
    loss = loss.mean()

    print('defining update')
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=learning_rate, momentum=0.9)
    # updates = lasagne.updates.adagrad(loss, params, learning_rate=learning_rate)

    print('defining testing method')
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_prediction = T.clip(test_prediction, 1e-7, 1.0 - 1e-7)

    #frame prediction
    layer_list = lasagne.layers.get_all_layers(network)
    gauss_layer = layer_list[-3]
    pre_gauss_layer = layer_list[-4] if gaussian else layer_list[-3]
    gauss_pred = lasagne.layers.get_output(gauss_layer, deterministic=True)
    pre_gauss_pred = lasagne.layers.get_output(pre_gauss_layer, deterministic=True)

    test_loss = lasagne.objectives.binary_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()
    test_pred_result = T.argmax(test_prediction, axis=1)
    target_result = T.argmax(target_var, axis=1)
    test_acc = T.mean(T.eq(test_pred_result, target_result),

    if fn_type == 'train':
        print('compiling training function')
        func = theano.function(input_var_list + [target_var], 
                    [loss, prediction, gauss_pred, pre_gauss_pred], updates=updates)
    elif fn_type == 'val' or fn_type == 'test':
        print('compiling validation and testing function')
        func = theano.function(input_var_list + [target_var], 
                    [test_loss, test_acc, test_pred_result, test_prediction, gauss_pred, pre_gauss_pred])

    return func, network

示例11: lcn_std_diff

def lcn_std_diff(x,size=9):
    # Function borrowed from bengioe_util
    p = x.reshape((1,1,48,48))
    #p = (p-TT.mean(p))/T.std(p)
    g = gaussian(size,1.591/size)
    g = numpy.float32(g.reshape((1,1,size,size)))
    mean = TT.nnet.conv.conv2d(p,TT.constant(g),
    mean = mean[size/2:48+size/2,
    meansq = TT.nnet.conv.conv2d(TT.sqr(p),TT.constant(g),
    meansq = meansq[size/2:48+size/2,
    var = meansq - TT.sqr(mean)
    var = TT.clip(var, 0, 1e30)
    std = TT.sqrt(var)
    std = TT.clip(std, TT.mean(std), 1e30)
    out = (p - mean) / std
    return out - out.min()

示例12: sigmoid_readout_old

def sigmoid_readout_old(operators, v_in, h_L, g):
    """Sigmoid readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param params: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_visible)
    :return: shape (timesteps, n_hidden)
    weight = operators[0]
    bias = operators[1]
    v_pred = g(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Cost:
    cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = cost.sum() / v_in.shape[0]

    # Sample is just rounded to nearest integer:
    v_sample = T.round(v_pred)
    v_sample_c = T.clip(v_sample, 1.0e-7, 1.0 - 1.0e-7)

    # Monitor (needs to return something... for now):
    monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    monitor = monitor.sum() / v_in.shape[0]

    return v_sample, cost, monitor, None

示例13: softmax_readout

def softmax_readout(operators, v_in, h_L, external):
    """Softmax readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param operators: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_hidden)
    :return: shape (timesteps, n_visible)
    weight = operators[0]
    bias = operators[1]

    v_pred = softmax(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Sampled value is just the argmax of softmax:
    v_sample = rng.multinomial(pvals=v_pred, dtype=theano.config.floatX)
    v_sample_c = T.clip(v_sample, eps, 1.0 - eps)

    # Cost:
    # cost = 1000 * ((v_pred[:-1] - v_in[1:]) ** 2).mean()
    # cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \
    #       T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = crossent(v_pred_c[:-1], v_in_c[1:])
    cost = cost.mean()

    # Monitor:
    # monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \
    #          T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    # TODO: changed monitor to v_pred_c!!!
    monitor = crossent(v_pred_c[:-1], v_in_c[1:])
    monitor = monitor.mean()

    return v_sample, cost, monitor, None

示例14: sigmoid_readout

def sigmoid_readout(operators, v_in, h_L, external):
    """Sigmoid readout layer. Cost is the binary crossentropy and
    monitor is RMSE.
    :param operators: list of [weight, bias] with shapes (n_hidden, n_visible)
        and (n_visible, )
    :param h_L: shape (timesteps, n_hidden)
    :return: shape (timesteps, n_visible)
    weight = operators[0]
    bias = operators[1]
    v_pred = sigmoid(T.dot(h_L, weight) + bias)  # broadcastable bias??
    v_pred_c = T.clip(v_pred, 1.0e-7, 1.0 - 1.0e-7)
    v_in_c = T.clip(v_in, 1.0e-7, 1.0 - 1.0e-7)

    # Sample is just rounded to nearest integer:
    v_sample = T.round(v_pred)
    v_sample_c = T.clip(v_sample, eps, 1.0 - eps)

    # Cost:
    # cost = 1000 * ((v_pred[:-1] - v_in[1:]) ** 2).mean()
    # cost = -T.xlogx.xlogy0(v_in_c[1:], v_pred_c[:-1]) - \
    #       T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_pred_c[:-1])
    cost = crossent(v_pred_c[:-1], v_in_c[1:])  # TODO: v_sample_c !!!
    cost = cost.mean()

    # Monitor:
    # monitor = -T.xlogx.xlogy0(v_in_c[1:], v_sample_c[:-1]) - \
    #          T.xlogx.xlogy0(1 - v_in_c[1:], 1 - v_sample_c[:-1])
    monitor = crossent(v_sample_c[:-1], v_in_c[1:])
    monitor = monitor.mean()

    return v_sample, cost, monitor, None

示例15: get_constraint_updates

    def get_constraint_updates(self):
        updates = OrderedDict()

        ## unit-variance constraint on hidden-unit activations ##
        if self.flags['unit_std']:
            updates[self.Wv] = self.Wv / self.avg_hact_std

        ## clip parameters to maximum values (if applicable)
        for (k,v) in self.clip_max.iteritems():
            assert k in [param.name for param in self.params()]
            param = getattr(self, k)
            updates[param] = T.clip(param, param, v)

        ## clip parameters to minimum values (if applicable)
        for (k,v) in self.clip_min.iteritems():
            assert k in [param.name for param in self.params()]
            param = getattr(self, k)
            updates[param] = T.clip(updates.get(param, param), v, param)
        ## constrain lambd to be a scalar
        if self.flags['scalar_lambd']:
            lambd = updates.get(self.lambd, self.lambd)
            updates[self.lambd] = T.mean(lambd) * T.ones_like(lambd)

        return updates

示例16: __init__

    def __init__(self, rng, input, filter_shape, image_shape, W=None, bias=False, padding='valid',activation=T.nnet.relu):

        assert image_shape[1] == filter_shape[1]
        self.input = input
        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        if W==None:
            W = theano.shared(
                    rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
        self.W =W

        conv_out = K.conv2d(
        if bias==True:
            b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)
            self.output = self.output = T.clip(activation(conv_out + self.b.dimshuffle('x', 0, 'x', 'x')), 0.001, 0.999)
            self.params = [self.W, self.b]
            self.output = T.clip(activation(conv_out), 0.001, 0.999)
            self.params = [self.W]
        self.input = input

示例17: unet_crossentropy_loss_sampled

    def unet_crossentropy_loss_sampled(y_true, y_pred):
        epsilon = 1.0e-4
        y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
        y_true = T.flatten(y_true)
        # this seems to work
        # it is super ugly though and I am sure there is a better way to do it
        # but I am struggling with theano to cooperate
        # filter the right indices
        classPos = 1
        classNeg = 0
        indPos   = T.eq(y_true, classPos).nonzero()[0]
        indNeg   = T.eq(y_true, classNeg).nonzero()[0]
        #pos      = y_true[ indPos ]
        #neg      = y_true[ indNeg ]

        # shuffle
        n = indPos.shape[0]
        indPos = indPos[UNET.srng.permutation(n=n)]
        n = indNeg.shape[0]
        indNeg = indNeg[UNET.srng.permutation(n=n)]
        # take equal number of samples depending on which class has less
        n_samples = T.cast(T.min([ indPos.shape[0], indNeg.shape[0]]), dtype='int64')
        #n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

        indPos = indPos[:n_samples]
        indNeg = indNeg[:n_samples]
        #loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
        loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(y_pred_clipped[indNeg]))
        loss_vector = T.clip(loss_vector, epsilon, 1.0-epsilon)
        average_loss = T.mean(loss_vector)
        if T.isnan(average_loss):
            average_loss = T.mean( y_pred_clipped[indPos])
        return average_loss

示例18: exe

    def exe(self, mainloop):
        .. todo::

        for k, p in mainloop.updates.items():
            for key in self.keys:
                if key in str(k):
                    token = 1

                    for waiver in self.waivers:
                        if waiver in str(k):
                            token = 0

                    if token:
                        updated_param = mainloop.updates[k]

                        if self.is_vector:
                            col_norms = T.sqrt(T.sqr(updated_param).sum(axis=0))
                            desired_norms = T.clip(col_norms, 0, self.weight_norm)
                            ratio = (desired_norms / (1e-7 + col_norms))
                            mainloop.updates[k] = updated_param * ratio
                            norm = T.sqrt(T.sqr(updated_param).sum())
                            desired_norm = T.clip(norm, 0, self.weight_norm)
                            ratio = (desired_norm / (1e-7 + norm))
                            mainloop.updates[k] = updated_param * ratio

示例19: cost

 def cost(self):
   :rtype: (theano.Variable | None, dict[theano.Variable,theano.Variable] | None)
   :returns: cost, known_grads
   known_grads = None
   if self.loss == 'ce' or self.loss == 'priori':
     if self.attrs.get("target", "").endswith("[sparse:coo]"):
       assert isinstance(self.y, tuple)
       assert len(self.y) == 3
       from NativeOp import crossentropy_softmax_and_gradient_z_sparse
       y_mask = self.network.j[self.attrs.get("target", "").replace("[sparse:coo]", "[sparse:coo:2:0]")]
       ce, grad_z = crossentropy_softmax_and_gradient_z_sparse(
         self.z, self.index, self.y[0], self.y[1], self.y[2], y_mask)
       return self.norm * T.sum(ce), {self.z: grad_z}
     if self.y_data_flat.type == T.ivector().type:
       # Use crossentropy_softmax_1hot to have a more stable and more optimized gradient calculation.
       # Theano fails to use it automatically; I guess our self.i indexing is too confusing.
       #idx = self.index.flatten().dimshuffle(0,'x').repeat(self.y_m.shape[1],axis=1) # faster than line below
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m * idx, y_idx=self.y_data_flat * self.index.flatten())
       nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i])
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = -T.log(T.nnet.softmax(self.y_m)[self.i,self.y_data_flat[self.i]])
       #z_c = T.exp(self.z[:,self.y])
       #nll = -T.log(z_c / T.sum(z_c,axis=2,keepdims=True))
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = T.set_subtensor(nll[self.j], T.constant(0.0))
       nll = -T.dot(T.log(T.clip(self.p_y_given_x[self.i], 1.e-38, 1.e20)), self.y_data_flat[self.i].T)
     return self.norm * T.sum(nll), known_grads
   elif self.loss == 'entropy':
     h_e = T.exp(self.y_m) #(TB)
     pcx = T.clip((h_e / T.sum(h_e, axis=1, keepdims=True)).reshape((self.index.shape[0],self.index.shape[1],self.attrs['n_out'])), 1.e-6, 1.e6) # TBD
     ee = -T.sum(pcx[self.i] * T.log(pcx[self.i])) # TB
     #nll, pcxs = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y[self.i])
     nll, _ = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) # TB
     ce = nll.reshape(self.index.shape) * self.index # TB
     y = self.y_data_flat.reshape(self.index.shape) * self.index # TB
     f = T.any(T.gt(y,0), axis=0) # B
     return T.sum(f * T.sum(ce, axis=0) + (1-f) * T.sum(ee, axis=0)), known_grads
     #return T.sum(T.switch(T.gt(T.sum(y,axis=0),0), T.sum(ce, axis=0), -T.sum(ee, axis=0))), known_grads
     #return T.switch(T.gt(T.sum(self.y_m[self.i]),0), T.sum(nll), -T.sum(pcx * T.log(pcx))), known_grads
   elif self.loss == 'priori':
     pcx = self.p_y_given_x[self.i, self.y_data_flat[self.i]]
     pcx = T.clip(pcx, 1.e-38, 1.e20)  # For pcx near zero, the gradient will likely explode.
     return -T.sum(T.log(pcx)), known_grads
   elif self.loss == 'sse':
     if self.y_data_flat.dtype.startswith('int'):
       y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim=1), 'int32')
       y_oh = T.eq(T.shape_padleft(T.arange(self.attrs['n_out']), y_f.ndim), T.shape_padright(y_f, 1))
       return T.mean(T.sqr(self.p_y_given_x[self.i] - y_oh[self.i])), known_grads
       #return T.sum(T.sum(T.sqr(self.y_m - self.y.reshape(self.y_m.shape)), axis=1)[self.i]), known_grads
       return T.sum(T.sqr(self.y_m[self.i] - self.y_data_flat.reshape(self.y_m.shape)[self.i])), known_grads
       #return T.sum(T.sum(T.sqr(self.z - (self.y.reshape((self.index.shape[0], self.index.shape[1], self.attrs['n_out']))[:self.z.shape[0]])), axis=2).flatten()[self.i]), known_grads
       #y_z = T.set_subtensor(T.zeros((self.index.shape[0],self.index.shape[1],self.attrs['n_out']), dtype='float32')[:self.z.shape[0]], self.z).flatten()
       #return T.sum(T.sqr(y_z[self.i] - self.y[self.i])), known_grads
       #return T.sum(T.sqr(self.y_m - self.y[:self.z.shape[0]*self.index.shape[1]]).flatten()[self.i]), known_grads
     assert False, "unknown loss: %s" % self.loss

示例20: kl_divergence

def kl_divergence(y_true, y_pred):

    y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon)
    y_true = T.clip(y_true, epsilon, 1.0 - epsilon)

    kld = T.mean(y_true * ( T.log(y_true) - T.log(y_pred)))
    return kld









