本文整理汇总了Python中theano.tensor.mod函数的典型用法代码示例。如果您正苦于以下问题:Python mod函数的具体用法?Python mod怎么用?Python mod使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mod函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: AdaMaxAvg2
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1):
if n_accum == 1:
return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum
gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')
new = OrderedDict()
from theano.ifelse import ifelse
it = G.sharedf(0.)
new[it] = it + 1
reset = T.eq(T.mod(it,n_accum), 0)
update = T.eq(T.mod(it,n_accum), n_accum-1)
ws_avg = []
for j in range(len(ws)):
w_avg = {}
for i in ws[j]:
_w = ws[j][i]
_g = gs[j][i]
#_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
mom1 = G.sharedf(_w.get_value() * 0.)
_max = G.sharedf(_w.get_value() * 0.)
w_avg[i] = G.sharedf(_w.get_value())
g_sum = G.sharedf(_w.get_value() * 0.)
new[g_sum] = ifelse(reset, _g, g_sum + _g)
new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w)
new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i])
ws_avg += [w_avg]
return new, ws_avg
开发者ID:gburt,项目名称:iaf,代码行数:34,代码来源:optim.py
示例2: custom_svrg1
def custom_svrg1(loss, params, m=100, learning_rate=0.01):
grads = theano.grad(loss, params)
updates = OrderedDict()
it_num = theano.shared(np.cast['int16'](0.))
it = it_num + 1
for param, grad in zip(params, grads):
value = param.get_value(borrow=True)
mu = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable)
grad_w_tilde = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable)
new_grad_w_tilde = theano.ifelse.ifelse(T.eq(it, m), grad, grad_w_tilde)
mu_acc = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable)
updates[param] = param - learning_rate * (grad - grad_w_tilde + mu)
updates[grad_w_tilde] = new_grad_w_tilde
updates[mu] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), mu_acc, mu)
updates[mu_acc] = theano.ifelse.ifelse(T.eq(T.mod(it, m), 0), 0*mu_acc, mu_acc + grad)
updates[it_num] = theano.ifelse.ifelse(T.eq(it, m), np.cast['int16'](1), np.cast['int16'](m))
return updates
开发者ID:justanothercoder,项目名称:NaturalGradient,代码行数:28,代码来源:custom_updates.py
示例3: in_transit
def in_transit(self, t, r=0.0, texp=None):
"""Get a list of timestamps that are in transit
Args:
t (vector): A vector of timestamps to be evaluated.
r (Optional): The radii of the planets.
texp (Optional[float]): The exposure time.
Returns:
The indices of the timestamps that are in transit.
"""
z = tt.zeros_like(self.a)
r = tt.as_tensor_variable(r) + z
R = self.r_star + z
# Wrap the times into time since transit
hp = 0.5 * self.period
dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp
if self.ecc is None:
# Equation 14 from Winn (2010)
k = r / R
arg = tt.square(1 + k) - tt.square(self.b)
factor = R / (self.a * self.sin_incl)
hdur = hp * tt.arcsin(factor * tt.sqrt(arg)) / np.pi
t_start = -hdur
t_end = hdur
flag = z
else:
M_contact = self.contact_points_op(
self.a, self.ecc, self.cos_omega, self.sin_omega,
self.cos_incl + z, self.sin_incl + z, R + r)
flag = M_contact[2]
t_start = (M_contact[0] - self.M0) / self.n
t_start = tt.mod(t_start + hp, self.period) - hp
t_end = (M_contact[1] - self.M0) / self.n
t_end = tt.mod(t_end + hp, self.period) - hp
t_start = tt.switch(tt.gt(t_start, 0.0),
t_start - self.period, t_start)
t_end = tt.switch(tt.lt(t_end, 0.0),
t_end + self.period, t_end)
if texp is not None:
t_start -= 0.5*texp
t_end += 0.5*texp
mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1)
result = ifelse(tt.all(tt.eq(flag, 0)),
tt.arange(t.size)[mask],
tt.arange(t.size))
return result
开发者ID:dfm,项目名称:exoplanet,代码行数:57,代码来源:keplerian.py
示例4: ShiftConv
def ShiftConv(w_t_g, s_t, N):
shift = 2.*s_t-1.
Z = T.mod(shift+N, N)
simj = 1 - (Z - T.floor(Z))
imj = T.mod(T.arange(N) + T.iround(T.floor(Z)),N)
w_t_g_roll_1 = T.roll(w_t_g, -T.iround(T.floor(Z)))
w_t_g_roll_2 = T.roll(w_t_g, -(T.iround(T.floor(Z))+1))
w_t_s = w_t_g_roll_1*simj + w_t_g_roll_2*(1-simj)
return w_t_s
开发者ID:chiggum,项目名称:Neural-Turing-Machines,代码行数:9,代码来源:ntm_v1.py
示例5: get_stencil
def get_stencil(self, t, r=None, texp=None):
if r is None or texp is None:
return tt.shape_padright(t)
z = tt.zeros_like(self.a)
r = tt.as_tensor_variable(r)
R = self.r_star + z
hp = 0.5 * self.period
if self.ecc is None:
# Equation 14 from Winn (2010)
k = r / self.r_star
arg1 = tt.square(1 + k) - tt.square(self.b)
arg2 = tt.square(1 - k) - tt.square(self.b)
factor = R / (self.a * self.sin_incl)
hdur1 = hp * tt.arcsin(factor * tt.sqrt(arg1)) / np.pi
hdur2 = hp * tt.arcsin(factor * tt.sqrt(arg2)) / np.pi
ts = [-hdur1, -hdur2, hdur2, hdur1]
flag = z
else:
M_contact1 = self.contact_points_op(
self.a, self.ecc, self.cos_omega, self.sin_omega,
self.cos_incl + z, self.sin_incl + z, R + r)
M_contact2 = self.contact_points_op(
self.a, self.ecc, self.cos_omega, self.sin_omega,
self.cos_incl + z, self.sin_incl + z, R - r)
flag = M_contact1[2] + M_contact2[2]
ts = [
tt.mod((M_contact1[0]-self.M0)/self.n+hp, self.period)-hp,
tt.mod((M_contact2[0]-self.M0)/self.n+hp, self.period)-hp,
tt.mod((M_contact2[1]-self.M0)/self.n+hp, self.period)-hp,
tt.mod((M_contact1[1]-self.M0)/self.n+hp, self.period)-hp
]
start = self.period * tt.floor((tt.min(t) - self.t0) / self.period)
end = self.period * (tt.ceil((tt.max(t) - self.t0) / self.period) + 1)
start += self.t0
end += self.t0
tout = []
for i in range(4):
if z.ndim < 1:
tout.append(ts[i] + tt.arange(start, end, self.period))
else:
tout.append(theano.scan(
fn=lambda t0, s0, e0, p0: t0 + tt.arange(s0, e0, p0),
sequences=[ts[i], start, end, self.period],
)[0].flatten())
ts = tt.sort(tt.concatenate(tout))
return ts, flag
开发者ID:dfm,项目名称:exoplanet,代码行数:53,代码来源:keplerian.py
示例6: __init__
def __init__(self, **kwargs):
super(ConcatConv, self).__init__(**kwargs)
inputs = T.concatenate([s.output for s in self.sources], axis=2) # (time, batch, input-dim = row * features)
time = inputs.shape[0]
batch = inputs.shape[1]
if self.status[0]:
self.input = T.concatenate([s.Output for s in self.sources], axis=3) # (batch, stack_size, row, time)
else:
inputs2 = inputs.reshape((time, batch, inputs.shape[2], self.filter_shape[1])) # (time, batch, row, stack)
self.input = inputs2.dimshuffle(1, 3, 2, 0) # (batch, stack_size, row, time)
self.input.name = "conv_layer_input_final"
if self.pool_params[0][1] > 1:
xp = T.constant(self.pool_params[0][1], 'int32')
self.input = T.concatenate([self.input, T.zeros((batch, self.filter_shape[1], self.input.shape[2],
xp - T.mod(self.input.shape[3], xp)), 'float32')], axis=3)
self.index = T.concatenate([self.index, T.zeros((xp - T.mod(self.index.shape[0], xp), batch), 'int8')], axis=0)
if self.modes[0] == "valid":
if self.filter_shape[3] > 1:
idx = int(self.filter_shape[3] / 2)
self.index = self.index[idx:-idx]
self.Output = self.run_cnn(
inputs=self.input,
filter_shape=self.filter_shape,
params=self.pool_params,
modes=self.modes,
others=self.other_params
)
if self.attrs['batch_norm']:
self.Output = self.batch_norm(
self.Output.dimshuffle(0, 2, 3, 1).reshape(
(self.Output.shape[0] * self.Output.shape[2] * self.Output.shape[3],
self.Output.shape[1])
),
self.attrs['n_features']
).reshape((self.Output.shape[0],
self.Output.shape[2],
self.Output.shape[3],
self.Output.shape[1])).dimshuffle(0, 3, 1, 2)
# our CRNN only accept 3D tensor (time, batch, dim)
# so, we have to convert back the output to 3D tensor
output2 = self.Output.dimshuffle(3, 0, 1, 2) # (time, batch, features, out-row)
self.output = output2.reshape((output2.shape[0], output2.shape[1],
output2.shape[2] * output2.shape[3])) # (time, batch, out-dim)
开发者ID:atuxhe,项目名称:returnn,代码行数:50,代码来源:NetworkCNNLayer.py
示例7: input_row_from_variables
def input_row_from_variables(ori_ip,dest_ip,ori_lat,ori_long,dest_lat,dest_long,ori_type,dest_type,dist):
'''Create an input row for the MLP from the inputs'''
input_row = tensor.zeros([input_size])
offset = 0
ips = [ori_ip,dest_ip]
for ip in ips:
for _ in range(4):
input_row = add_one_shot(input_row, offset, tensor.mod(ip,256))
ip = tensor.int_div(ip,256)
offset += 256
for lat_,long_ in [(ori_lat,ori_long),(dest_lat,dest_long)]:
translated_lat = tensor.iround((coordinate_size-1)*(lat_/180 + 0.5))
input_row = add_thermo(input_row, offset,translated_lat)
offset += coordinate_size
translated_long = tensor.iround((coordinate_size-1)*(long_/360 + 0.5))
input_row = add_thermo(input_row, offset,translated_long)
offset += coordinate_size
for type_ in [ori_type,dest_type]:
add_one_shot(input_row, offset, type_ +1)
offset += type_size
translated_dist = tensor.iround((dest_size-1)*(tensor.minimum(1,dist/max_earth_distance)))
input_row = add_thermo(input_row, offset,translated_dist)
#could be useful if we want to add something
offset +=dest_size
return input_row
开发者ID:Mr-Kumar-Abhishek,项目名称:pings,代码行数:34,代码来源:theano_play.py
示例8: init_train_updates
def init_train_updates(self):
step = self.variables.step
previous_delta = self.variables.prev_delta
previous_gradient = self.variables.prev_gradient
n_parameters = count_parameters(self)
parameters = list(iter_parameters(self))
param_vector = parameters2vector(self)
gradients = T.grad(self.variables.error_func, wrt=parameters)
full_gradient = T.concatenate([grad.flatten() for grad in gradients])
beta = self.update_function(previous_gradient, full_gradient,
previous_delta)
parameter_delta = ifelse(
T.eq(T.mod(self.variables.epoch, n_parameters), 1),
-full_gradient,
-full_gradient + beta * previous_delta
)
updated_parameters = param_vector + step * parameter_delta
updates = [
(previous_gradient, full_gradient),
(previous_delta, parameter_delta),
]
parameter_updates = setup_parameter_updates(parameters,
updated_parameters)
updates.extend(parameter_updates)
return updates
开发者ID:EdwardBetts,项目名称:neupy,代码行数:30,代码来源:conjgrad.py
示例9: time_mask
def time_mask(update_freq, maxlen, batch_size):
'''
update_freq- after how many time steps, hiddens
should be updated.
maxlen - maximum length of the input sequence.
batch_size - Batch Size for training!
'''
new_mask = tensor.alloc(1, maxlen)
qw = tensor.extra_ops.cumsum(new_mask)
qw2 = tensor.switch(tensor.eq(tensor.mod(qw,update_freq), 0), 1, 0)
temp = qw2
for i in range(batch_size - 1):
qw2 = tensor.concatenate([qw2,temp], axis=0)
qw2 = qw2.reshape([batch_size, maxlen])
qw2 = qw2.T
new_mask = qw2
if update_freq ==1:
return new_mask, None, None
ones_array = numpy.ones([1, maxlen])
cumsum = numpy.cumsum(ones_array)
mod_array = [int(i%(update_freq)) for i in cumsum]
mod_array = numpy.asarray(mod_array)
alpha_mask = numpy.where(mod_array==0)[0]
interpolation_mask = []
for i in reversed(range(update_freq)):
interpolation_mask.append(((i+1)*1.0)/update_freq)
return new_mask, alpha_mask, interpolation_mask
开发者ID:anirudh9119,项目名称:mscale,代码行数:31,代码来源:lm.py
示例10: ShiftConv
def ShiftConv(w_t_g, s_t, N, num_shifts):
# pad = (num_shifts//2, (num_shifts-1)//2)
# w_t_g_pd_ = T.concatenate([w_t_g[(-pad[0]-1):-1], w_t_g, w_t_g[:(pad[1])]])
# w_t_g_pd = w_t_g_pd_.dimshuffle('x','x','x', 0)
# filter = s_t.dimshuffle('x', 'x', 'x', 0)
# convolution = T.nnet.conv2d(w_t_g_pd, filter,
# input_shape=(1, 1, 1, N + pad[0] + pad[1]),
# filter_shape=(1, 1, 1, num_shifts),
# subsample=(1, 1),
# border_mode='valid')
# w_t_s = convolution[0, 0, 0, :]
shift = 2.*s_t-1.
Z = T.mod(shift+N, N)
simj = 1 - (Z - T.floor(Z))
imj = T.mod(T.arange(N) + T.iround(T.floor(Z)),N)
w_t_g_roll_1 = T.roll(w_t_g, -T.iround(T.floor(Z)))
w_t_g_roll_2 = T.roll(w_t_g, -(T.iround(T.floor(Z))+1))
w_t_s = w_t_g_roll_1*simj + w_t_g_roll_2*(1-simj)
return w_t_s
开发者ID:chiggum,项目名称:Neural-Turing-Machines,代码行数:19,代码来源:ntm_v2.py
示例11: fprop
def fprop(self, X):
idx = X[0]
X = X[1:]
z = theano.ifelse.ifelse(T.neq(T.mod(idx, self.N), 0),
T.zeros((X[0].shape[0]*self.num_sample,
self.nout),
dtype=X[0].dtype),
self.inner_fn(X))
z.name = self.name
return z
开发者ID:anirudh9119,项目名称:SpeechSyn,代码行数:10,代码来源:layer.py
示例12: train_givens
def train_givens(self, batch_index, batch_size):
'''
batch_index is a theano_variable.
'''
# compute the gpu batch index
# these will all be theano variables
solver_batches_per_gpu_batch = T.cast(T.int_div(self.num_GPU_store,batch_size), 'int32')
real_batch_index = T.cast(T.mod(batch_index, solver_batches_per_gpu_batch), 'int32')
givens = {self.X_batch_var:self.GPU_X_train[real_batch_index*batch_size:(real_batch_index+1)*batch_size]}
givens[self.y_batch_var] = self.GPU_y_train[real_batch_index*batch_size:(real_batch_index+1)*batch_size]
return givens
开发者ID:Sandy4321,项目名称:caffe-theano-conversion,代码行数:12,代码来源:dataset.py
示例13: pooling
def pooling(self, inp, input_dim):
inp_shuffle = inp.dimshuffle(1,0,2)
n_timestep = inp_shuffle.shape[1]
output, _ = theano.scan(
fn=lambda timestep: T.max(inp_shuffle[:,timestep:timestep+1,:], axis=1),
sequences=T.arange(0, T.floor(n_timestep/2))*2
)
if T.mod(n_timestep, 2) != 0:
output = T.concatenate([output, inp[-1:,:,:]], axis=0)
return output
开发者ID:jazzsaxmafia,项目名称:m_CNN,代码行数:12,代码来源:m_CNN.py
示例14: get_phase
def get_phase(states):
v, w = states
angle = T.switch(w > 0,
np.pi * v.clip(0, 1),
w * (np.pi / T.abs_(T.min(w))))
mean = T.arctan2(T.sin(angle).mean(axis=-1),
T.cos(angle).mean(axis=-1))
### calculate angles around the mean
angle = T.mod(angle + (np.pi - mean[:,None]), 2*np.pi) - np.pi
std = T.sqrt((angle**2).mean(-1))
return std
开发者ID:ctn-archive,项目名称:hunsberger-neco2014,代码行数:13,代码来源:neurons.py
示例15: fprop_step
def fprop_step(state_below, index, state_before, W, U, b):
state_now = state_before.copy()
index = self.num_modules -\
tensor.nonzero(tensor.mod(index+1, self.M))[0].shape[0]
this_range = index * self.module_dim
z = tensor.dot(state_below, W[:, :this_range]) +\
tensor.dot(state_before, U[:, :this_range]) +\
b[:this_range]
z = tensor.tanh(z)
state_now = tensor.set_subtensor(state_now[:, :this_range], z)
return state_now
开发者ID:zhangmeishan,项目名称:pylearn2,代码行数:13,代码来源:rnn.py
示例16: calc_time_gate
def calc_time_gate(time_input_n):
# Broadcast the time across all units
t_broadcast = time_input_n.dimshuffle([0,'x'])
# Get the time within the period
in_cycle_time = T.mod(t_broadcast + shift_broadcast, period_broadcast)
# Find the phase
is_up_phase = T.le(in_cycle_time, on_mid_broadcast)
is_down_phase = T.gt(in_cycle_time, on_mid_broadcast)*T.le(in_cycle_time, on_end_broadcast)
# Set the mask
sleep_wake_mask = T.switch(is_up_phase, in_cycle_time/on_mid_broadcast,
T.switch(is_down_phase,
(on_end_broadcast-in_cycle_time)/on_mid_broadcast,
off_slope*(in_cycle_time/period_broadcast)))
return sleep_wake_mask
开发者ID:HenryWoodOTC,项目名称:time_lstm,代码行数:15,代码来源:plstm.py
示例17: AdaMax2
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2):
print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum
g = T.grad(objective.sum(), w, disconnected_inputs='warn')
new = OrderedDict()
from theano.ifelse import ifelse
it = G.sharedf(0.)
new[it] = it + 1
reset = T.eq(T.mod(new[it],n_accum), 0)
update = T.eq(T.mod(new[it],n_accum), n_accum-1)
for i in range(len(w)):
mom1 = G.sharedf(w[i].get_value() * 0.)
_max = G.sharedf(w[i].get_value() * 0.)
g_sum = G.sharedf(w[i].get_value() * 0.)
#gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
new[g_sum] = ifelse(reset, g[i], g_sum + g[i])
new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
new[w[i]] = ifelse(update, w[i] + alpha * new[mom1] / new[_max], w[i])
return new
开发者ID:gburt,项目名称:iaf,代码行数:24,代码来源:optim.py
示例18: step
def step(input_step, previous_activation, time_step, W_in, W_self, biases):
new_activation = previous_activation.copy()
modzero = T.nonzero(T.eq(T.mod(time_step, self.group_labels), 0))[0]
W_in_now = T.flatten(W_in[:, modzero, :], outdim=2)
W_self_now = T.flatten(W_self[:, modzero, :], outdim=2)
biases_now = T.flatten(biases[modzero, :])
activation = T.dot(input_step, W_in_now)
activation += T.dot(previous_activation, W_self_now)
activation += biases_now
activation = self.activation_function(activation)
modzero_activation_changes = (modzero * self.group_size) + (
T.ones((modzero.shape[0], self.group_size), dtype='int32') * T.arange(self.group_size, dtype='int32')).T
modzero_flatten = T.flatten(modzero_activation_changes).astype('int32')
new_activation = T.set_subtensor(new_activation[:, modzero_flatten], activation)
time_step += 1
return new_activation, time_step
开发者ID:ZenCCoding,项目名称:clockworkrnn-1,代码行数:16,代码来源:Clockwork.py
示例19: attend
def attend(self, y_p):
inp, updates = 0, {}
for i in range(len(self.base)):
for g in range(self.n_glm):
B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g)
z_i = self.distance(C, H)
w_i = self.softmax(z_i, I)
if self.attrs['momentum'] == 'conv2d':
F = self.item('F',i)
context = F.shape[3]
padding = T.zeros((2,context/2,C.shape[1]),'float32')
att = T.concatenate([padding, T.stack([self.item('att',i), w_i]), padding],axis=1) # 2TB
v_i = T.nnet.sigmoid(T.dot(T.nnet.conv2d(border_mode='valid',
input=att.dimshuffle(2,'x',0,1), # B12T
filters=F).dimshuffle(3,0,2,1),self.item('U',i)).reshape((C.shape[0],C.shape[1])))
w_i *= v_i
w_i = w_i / w_i.sum(axis=0, keepdims=True)
elif self.attrs['momentum'] == 'mono': # gating function
idx = T.arange(z_i.shape[0],dtype='float32').dimshuffle(0,'x').repeat(w_i.shape[1],axis=1) # TB
d_i = idx - T.sum(self.item('att', i) * idx,axis=0,keepdims=True)
f_i = T.nnet.sigmoid(T.dot(T.tanh(T.dot(d_i.dimshuffle(0,1,'x'), self.item('D_in', i))), self.item("D_out", i)) + self.item('Db_out',i))[:,:,0]
w_i = T.exp(-z_i) * f_i * I
w_i = w_i / w_i.sum(axis=0, keepdims=True)
self.glimpses[i].append(T.sum(C * w_i.dimshuffle(0,1,'x').repeat(C.shape[2],axis=2),axis=0))
if self.attrs['smooth']:
updates[self.state_vars['datt_%d' % i]] = w_i - self.state_vars['att_%d' % i]
if self.attrs['store']:
updates[self.state_vars['att_%d' % i]] = theano.gradient.disconnected_grad(w_i)
if self.attrs['align']:
Q,K = self.align(w_i,self.item("Q", i))
updates[self.state_vars['Q_%d' % i]] = Q
updates[self.state_vars['K_%d' % i]] = K
if self.attrs['accumulator'] == 'rnn':
def rnn(x_t, w_t, c_p):
c = x_t * w_t + c_p * (numpy.float32(1.) - w_t)
return T.switch(T.ge(c, 0), c, T.exp(c) - 1)
zT, _ = theano.scan(rnn, sequences=[B,w_i.dimshuffle(0, 1, 'x').repeat(B.shape[2], axis=2)],
outputs_info = [T.zeros_like(B[0])])
z = zT[-1]
else:
if self.attrs['nbest'] == 1:
z = B[T.argmax(w_i,axis=0),T.arange(w_i.shape[1])]
else:
z = T.sum(B * w_i.dimshuffle(0, 1, 'x').repeat(B.shape[2], axis=2), axis=0)
inp += T.dot(z, W_att_in) + b_att_in
ifelse(T.eq(T.mod(self.n[0],self.attrs['ndec']),0), inp, T.zeros((self.n.shape[0],self.layer.attrs['n_out'] * 4),'float32'))
return inp, updates
开发者ID:atuxhe,项目名称:returnn,代码行数:47,代码来源:RecurrentTransform.py
示例20: __init__
def __init__(self, base, momentum=0.1, oracle=False, msteps=100, esteps=200, **kwargs):
kwargs['loss'] = 'ce'
super(UnsupervisedOutputLayer, self).__init__(**kwargs)
if base:
self.set_attr('base', base[0].name)
self.set_attr('momentum', momentum)
self.set_attr('oracle', oracle)
self.set_attr('msteps', msteps)
self.set_attr('esteps', esteps)
eps = T.constant(1e-30, 'float32')
pc = theano.gradient.disconnected_grad(base[1].output) # TBV
pc = print_to_file('pc', pc)
pcx = base[0].output # TBV
self.cnt = self.add_param(theano.shared(numpy.zeros((1,), 'float32'), 'cnt'),
custom_update=T.constant(1, 'float32'))
domax = T.ge(T.mod(T.cast(self.cnt[0], 'int32'), numpy.int32(msteps + esteps)), esteps)
hyp = T.mean(pcx, axis=1, keepdims=True)
hyp = hyp / hyp.sum(axis=2, keepdims=True)
self.hyp = self.add_param(
theano.shared(numpy.ones((self.attrs['n_out'],), 'float32') / numpy.float32(self.attrs['n_out']), 'hyp'), 'hyp',
custom_update=T.mean(hyp[:, 0, :], axis=0),
custom_update_condition=domax,
custom_update_normalized=True,
custom_update_exp_average=1. / (1. - momentum))
hyp = numpy.float32(1. - momentum) * hyp + numpy.float32(momentum) * self.hyp.dimshuffle('x', 'x', 0).repeat(
hyp.shape[1], axis=1).repeat(hyp.shape[0], axis=0)
order = T.argsort(self.hyp)[::-1]
# order = print_to_file('order', order)
shyp = hyp[:, :, order]
spcx = pcx[:, :, order]
# spcx = print_to_file('pcx', spcx)
# shyp = print_to_file('shyp', shyp)
K = numpy.float32(1. / (1. - momentum)) * T.sum(T.sum(pc * T.log(pc / shyp), axis=2), axis=0)
Q = -T.sum(T.sum(pcx * T.log(pcx), axis=2), axis=0)
# K = print_to_file('K', K)
# Q = print_to_file('Q', Q)
self.L = T.sum(T.switch(domax, Q, K))
self.y_m = spcx.reshape((spcx.shape[0] * spcx.shape[1], spcx.shape[2]))
开发者ID:atuxhe,项目名称:returnn,代码行数:47,代码来源:NetworkOutputLayer.py
注:本文中的theano.tensor.mod函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论