本文整理汇总了Python中utils.AttributeDict类的典型用法代码示例。如果您正苦于以下问题:Python AttributeDict类的具体用法?Python AttributeDict怎么用?Python AttributeDict使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了AttributeDict类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: encoder
def encoder(input_, path_name, input_noise_std=0, noise_std=[]):
h = input_
logger.info(' 0: noise %g' % input_noise_std)
if input_noise_std > 0.:
h = h + self.noise_like(h) * input_noise_std
d = AttributeDict()
d.unlabeled = self.new_activation_dict()
d.labeled = self.new_activation_dict()
d.labeled.z[0] = self.labeled(h)
d.unlabeled.z[0] = self.unlabeled(h)
prev_dim = input_dim
for i, (spec, _, act_f) in layers[1:]:
d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
noise = noise_std[i] if i < len(noise_std) else 0.
curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f,
path_name=path_name,
noise_std=noise)
assert self.layer_dims.get(i) in (None, curr_dim)
self.layer_dims[i] = curr_dim
d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
d.unlabeled.s[i] = s
d.unlabeled.m[i] = m
prev_dim = curr_dim
d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
return d
开发者ID:fulldecent,项目名称:LRE,代码行数:27,代码来源:ladder.py
示例2: decoder
def decoder(self, clean, corr):
est = self.new_activation_dict()
costs = AttributeDict()
costs.denois = AttributeDict()
for i, ((_, spec), act_f) in self.layers[::-1]:
z_corr = corr.unlabeled.z[i]
z_clean = clean.unlabeled.z[i]
z_clean_s = clean.unlabeled.s.get(i)
z_clean_m = clean.unlabeled.m.get(i)
# It's the last layer
if i == len(self.layers) - 1:
fspec = (None, None)
ver = corr.unlabeled.h[i]
ver_dim = self.layer_dims[i]
top_g = True
else:
fspec = self.layers[i + 1][1][0]
ver = est.z.get(i + 1)
ver_dim = self.layer_dims.get(i + 1)
top_g = False
z_est = self.g(z_lat=z_corr,
z_ver=ver,
in_dims=ver_dim,
out_dims=self.layer_dims[i],
num=i,
fspec=fspec,
top_g=top_g)
# The first layer
if z_clean_s:
z_est_norm = (z_est - z_clean_m) / z_clean_s
else:
z_est_norm = z_est
se = SquaredError('denois' + str(i))
costs.denois[i] = se.apply(z_est_norm.flatten(2),
z_clean.flatten(2)) \
/ np.prod(self.layer_dims[i], dtype=floatX)
costs.denois[i].name = 'denois' + str(i)
# Store references for later use
est.z[i] = z_est
est.h[i] = apply_act(z_est, act_f)
est.s[i] = None
est.m[i] = None
return est, costs
开发者ID:codeaudit,项目名称:ladder_network,代码行数:48,代码来源:ladder.py
示例3: decoder
def decoder(self, clean, corr, batch_size):
get_unlabeled = lambda x: x[batch_size:] if x is not None else x
est = self.new_activation_dict()
costs = AttributeDict()
costs.denois = AttributeDict()
for i, ((_, spec), act_f) in self.layers[::-1]:
z_corr = get_unlabeled(corr.z[i])
z_clean = get_unlabeled(clean.z[i])
z_clean_s = get_unlabeled(clean.s.get(i))
z_clean_m = get_unlabeled(clean.m.get(i))
# It's the last layer
if i == len(self.layers) - 1:
fspec = (None, None)
ver = get_unlabeled(corr.h[i])
ver_dim = self.layer_dims[i]
top_g = True
else:
fspec = self.layers[i + 1][1][0]
ver = est.z.get(i + 1)
ver_dim = self.layer_dims.get(i + 1)
top_g = False
z_est = self.g(
z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], num=i, fspec=fspec, top_g=top_g
)
# For semi-supervised version
if z_clean_s:
z_est_norm = (z_est - z_clean_m) / z_clean_s
else:
z_est_norm = z_est
z_est_norm = z_est
se = SquaredError("denois" + str(i))
costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) / np.prod(
self.layer_dims[i], dtype=floatX
)
costs.denois[i].name = "denois" + str(i)
# Store references for later use
est.z[i] = z_est
est.h[i] = apply_act(z_est, act_f)
est.s[i] = None
est.m[i] = None
return est, costs
开发者ID:mohammadpz,项目名称:ladder_network,代码行数:46,代码来源:ladder.py
示例4: get_mnist_data_dict
def get_mnist_data_dict(unlabeled_samples, valid_set_size, test_set=False):
train_set = MNIST(("train",))
# Make sure the MNIST data is in right format
train_set.data_sources = (
(train_set.data_sources[0] / 255.).astype(numpy.float32),
train_set.data_sources[1])
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
rng = numpy.random.RandomState(seed=1)
rng.shuffle(all_ind)
data = AttributeDict()
# Choose the training set
data.train = train_set
data.train_ind = all_ind[:unlabeled_samples]
# Then choose validation set from the remaining indices
data.valid = train_set
data.valid_ind = numpy.setdiff1d(all_ind, data.train_ind)[:valid_set_size]
logger.info('Using %d examples for validation' % len(data.valid_ind))
# Only touch test data if requested
if test_set:
data.test = MNIST(("test",))
data.test_ind = numpy.arange(data.test.num_examples)
return data
开发者ID:codeaudit,项目名称:ladder_network,代码行数:28,代码来源:datasets.py
示例5: load_and_log_params
def load_and_log_params(cli_params):
cli_params = AttributeDict(cli_params)
if cli_params.get('load_from'):
p = load_df(cli_params.load_from, 'params').to_dict()[0]
p = AttributeDict(p)
for key in cli_params.iterkeys():
if key not in p:
p[key] = None
new_params = cli_params
loaded = True
else:
p = cli_params
new_params = {}
loaded = False
# Make dseed seed unless specified explicitly
if p.get('dseed') is None and p.get('seed') is not None:
p['dseed'] = p['seed']
logger.info('== COMMAND LINE ==')
logger.info(' '.join(sys.argv))
logger.info('== PARAMETERS ==')
for k, v in p.iteritems():
if new_params.get(k) is not None:
p[k] = new_params[k]
replace_str = "<- " + str(new_params.get(k))
else:
replace_str = ""
logger.info(" {:20}: {:<20} {}".format(k, v, replace_str))
return p, loaded
开发者ID:lude-ma,项目名称:ladder,代码行数:31,代码来源:run.py
示例6: setup_data
def setup_data(p, test_set=False):
dataset_class, training_set_size = {"cifar10": (CIFAR10, 40000), "mnist": (MNIST, 50000)}[p.dataset]
# Allow overriding the default from command line
if p.get("unlabeled_samples") is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class("train")
# Make sure the MNIST data is in right format
if p.dataset == "mnist":
d = train_set.data_sources[train_set.sources.index("features")]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), "Make sure data is in float format and in range 0 to 1"
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get("dseed"):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[: p.valid_set_size]
logger.info("Using %d examples for validation" % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class("test")
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
in_dim = train_set.data_sources[train_set.sources.index("features")].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, "Need %d whitening dimensions, not %d" % (
numpy.product(in_dim),
p.whiten_zca,
)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index("features")]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info("Whitening using %d ZCA components" % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
开发者ID:MultiPath,项目名称:ladder,代码行数:59,代码来源:run.py
示例7: setup_data
def setup_data(p, test_set=False):
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
'reddit': (SubredditTopPhotosFeatures22, 20000)
}[p.dataset]
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class(("train",))
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class(("test",))
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
开发者ID:youralien,项目名称:ladder,代码行数:57,代码来源:run.py
示例8: _load_extends_settings
def _load_extends_settings(self, section_name, store):
"""
Loads all settings from other template(s) specified by a section's
'extends' setting.
This method walks a dependency tree of sections from bottom up. Each
step is a group of settings for a section in the form of a dictionary.
A 'master' dictionary is updated with the settings at each step. This
causes the next group of settings to override the previous, and so on.
The 'section_name' settings are at the top of the dependency tree.
"""
section = store[section_name]
extends = section.get('extends')
if extends is None:
return
if DEBUG_CONFIG:
log.debug('%s extends %s' % (section_name, extends))
extensions = [section]
while True:
extends = section.get('extends', None)
if not extends:
break
try:
section = store[extends]
if section in extensions:
exts = ', '.join([self._get_section_name(x['__name__'])
for x in extensions])
raise exception.ConfigError(
"Cyclical dependency between sections %s. "
"Check your EXTENDS settings." % exts)
extensions.insert(0, section)
except KeyError:
raise exception.ConfigError(
"%s can't extend non-existent section %s" %
(section_name, extends))
transform = AttributeDict()
for extension in extensions:
transform.update(extension)
store[section_name] = transform
开发者ID:fauziharoon,项目名称:metapathways2,代码行数:39,代码来源:config.py
示例9: doPreprocessing
def doPreprocessing(self):
results = AttributeDict()
results.dataset = []
for i in range(len(self.params.dataset)):
# shall we just load it?
filename = '%s/preprocessing-%s%s.mat' % (self.params.dataset[i].savePath, self.params.dataset[i].saveFile, self.params.saveSuffix)
if self.params.dataset[i].preprocessing.load and os.path.isfile(filename):
r = loadmat(filename)
print('Loading file %s ...' % filename)
results.dataset[i].preprocessing = r.results_preprocessing
else:
# or shall we actually calculate it?
p = deepcopy(self.params)
p.dataset = self.params.dataset[i]
d = AttributeDict()
d.preprocessing = np.copy(SeqSLAM.preprocessing(p))
results.dataset.append(d)
if self.params.dataset[i].preprocessing.save:
results_preprocessing = results.dataset[i].preprocessing
savemat(filename, {'results_preprocessing': results_preprocessing})
return results
开发者ID:breezeflutter,项目名称:pySeqSLAM,代码行数:23,代码来源:seqslam.py
示例10: encoder
def encoder(self, input_, path_name, input_noise_std, noise_std):
h = input_
h = h + (self.rstream.normal(size=h.shape).astype(floatX) *
input_noise_std)
d = AttributeDict()
d.unlabeled = self.new_activation_dict()
d.labeled = self.new_activation_dict()
d.labeled.z[0], d.unlabeled.z[0] = self.split_lu(h)
prev_dim = self.input_dim
for i, (spec, act_f) in self.layers[1:]:
d.labeled.h[i - 1], d.unlabeled.h[i - 1] = self.split_lu(h)
noise = noise_std[i] if i < len(noise_std) else 0.
curr_dim, z, m, s, h = self.f(h, prev_dim, spec, i, act_f,
path_name=path_name,
noise_std=noise)
self.layer_dims[i] = curr_dim
d.labeled.z[i], d.unlabeled.z[i] = self.split_lu(z)
d.unlabeled.s[i] = s
d.unlabeled.m[i] = m
prev_dim = curr_dim
d.labeled.h[i], d.unlabeled.h[i] = self.split_lu(h)
return d
开发者ID:codeaudit,项目名称:ladder_network,代码行数:24,代码来源:ladder.py
示例11: setup_data
def setup_data(p, test_set=False):
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
}[p.dataset]
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class("train")
# Make sure the MNIST data is in right format
if p.dataset == 'mnist':
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class("test")
d.test_ind = numpy.arange(d.test.num_examples)
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
return in_dim, d
开发者ID:msevrens,项目名称:ladder-1,代码行数:49,代码来源:run.py
示例12: AttributeDict
import sys
from utils import AttributeDict
from tagger_exp import TaggerExperiment
p = AttributeDict()
p.encoder_proj = (3000, 2000, 1000)
p.input_noise = 0.2
p.class_cost_x = 0.
p.zhat_init_value = 0.5
p.n_iterations = 3
p.n_groups = 4
p.lr = 0.001
p.labeled_samples = 1000
p.save_freq = 50
p.seed = 1
p.num_epochs = 150
p.batch_size = 100
p.valid_batch_size = 100
p.objects_per_sample = 2
p.dataset = 'freq20-2mnist'
p.input_type = 'continuous'
if __name__ == '__main__':
if len(sys.argv) == 2 and sys.argv[1] == '--pretrain':
p.save_to = 'freq20-2mnist-pretraining'
experiment = TaggerExperiment(p)
experiment.train()
elif len(sys.argv) == 3 and sys.argv[1] == '--continue':
开发者ID:CuriousAI,项目名称:tagger,代码行数:31,代码来源:runner-freq20-2mnist.py
示例13: AttributeDict
from utils import AttributeDict
from tagger_exp import TaggerExperiment
p = AttributeDict()
p.encoder_proj = (2000, 1000, 500)
p.input_noise = 0.2
p.class_cost_x = 0
p.zhat_init_value = 0.26 # mean of the input data.
p.n_iterations = 3
p.n_groups = 4
p.lr = 0.0004
p.seed = 10
p.num_epochs = 100
p.batch_size = 100
p.valid_batch_size = 100
p.dataset = 'shapes50k20x20'
p.input_type = 'binary'
p.save_to = 'shapes50k20x20'
if __name__ == '__main__':
experiment = TaggerExperiment(p)
experiment.train()
开发者ID:CuriousAI,项目名称:tagger,代码行数:26,代码来源:runner-shapes50k20x20.py
示例14: demo
def demo():
# set the parameters
# start with default parameters
params = defaultParameters()
# Nordland spring dataset
ds = AttributeDict()
ds.name = 'spring'
try:
path = os.environ['DATASET_1_PATH']
except:
path = '../datasets/nordland/64x32-grayscale-1fps/spring'
print "Warning: Environment variable DATASET_1_PATH not found! Trying '"+path+"'"
ds.imagePath = path
ds.prefix='images-'
ds.extension='.png'
ds.suffix=''
ds.imageSkip = 100 # use every n-nth image
ds.imageIndices = range(1, 35700, ds.imageSkip)
ds.savePath = 'results'
ds.saveFile = '%s-%d-%d-%d' % (ds.name, ds.imageIndices[0], ds.imageSkip, ds.imageIndices[-1])
ds.preprocessing = AttributeDict()
ds.preprocessing.save = 1
ds.preprocessing.load = 0 #1
#ds.crop=[1 1 60 32] # x0 y0 x1 y1 cropping will be done AFTER resizing!
ds.crop=[]
spring=ds
ds2 = deepcopy(ds)
# Nordland winter dataset
ds2.name = 'winter'
#ds.imagePath = '../datasets/nordland/64x32-grayscale-1fps/winter'
try:
path = os.environ['DATASET_2_PATH']
except:
path = '../datasets/nordland/64x32-grayscale-1fps/winter'
print "Warning: Environment variable DATASET_2_PATH not found! Trying '"+path+"'"
ds2.saveFile = '%s-%d-%d-%d' % (ds2.name, ds2.imageIndices[0], ds2.imageSkip, ds2.imageIndices[-1])
# ds.crop=[5 1 64 32]
ds2.crop=[]
winter=ds2
params.dataset = [spring, winter]
# load old results or re-calculate?
params.differenceMatrix.load = 0
params.contrastEnhanced.load = 0
params.matching.load = 0
# where to save / load the results
params.savePath='results'
## now process the dataset
ss = SeqSLAM(params)
t1=time.time()
results = ss.run()
t2=time.time()
print "time taken: "+str(t2-t1)
## show some results
if len(results.matches) > 0:
m = results.matches[:,0] # The LARGER the score, the WEAKER the match.
thresh=0.9 # you can calculate a precision-recall plot by varying this threshold
m[results.matches[:,1]>thresh] = np.nan # remove the weakest matches
plt.plot(m,'.') # ideally, this would only be the diagonal
plt.title('Matchings')
plt.show()
else:
print "Zero matches"
开发者ID:breezeflutter,项目名称:pySeqSLAM,代码行数:76,代码来源:demo.py
示例15: setup_data
def setup_data(p, test_set=False):
if p.dataset in ['cifar10','mnist']:
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
}[p.dataset]
else:
from fuel.datasets import H5PYDataset
from fuel.utils import find_in_data_path
from functools import partial
fn=p.dataset
fn=os.path.join(fn, fn + '.hdf5')
def dataset_class(which_sets):
return H5PYDataset(file_or_path=find_in_data_path(fn),
which_sets=which_sets,
load_in_memory=True)
training_set_size = None
train_set = dataset_class(["train"])
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None and p.unlabeled_samples >= 0:
training_set_size = p.unlabeled_samples
elif training_set_size is None:
training_set_size = train_set.num_examples
# Make sure the MNIST data is in right format
if p.dataset == 'mnist':
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class(["test"])
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
开发者ID:fulldecent,项目名称:LRE,代码行数:76,代码来源:run.py
示例16: setup_data
def setup_data(p, test_set=False):
dataset_class = {
'cifar10': (CIFAR10),
'jos' : (JOS),
'mnist': (MNIST),
}[p.dataset]
training_set_size = p.unlabeled_samples
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
train_set = dataset_class(["train"])
# Make sure the MNIST data is in right format
if p.dataset == 'mnist':
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
d.train_ind = all_ind[:training_set_size]
# Then choose validation set from the remaining indices
d.valid = train_set
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# Only touch test data if requested
if test_set:
d.test = dataset_class(["test"])
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
fn = find_in_data_path(train_set.filename)
#iprint(fn)
s1 = H5PYDataset(fn, ("train",))
handle = s1.open()
in_dim = s1.get_data(handle,slice(0,1))[0].shape[1:]
s1.close(handle)
#in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=list(i))[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
data = numpy.require(data, dtype=numpy.float32)
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
开发者ID:josvr,项目名称:ladder,代码行数:71,代码来源:run.py
示例17: setup_data
def setup_data(p, test_set=False):
# CIFAR10与MNIST都是封装过后的HDF5数据集
# p.dataset为命令行传入的参数,在cifar10与mnist之间选择其一
dataset_class, training_set_size = {
'cifar10': (CIFAR10, 40000),
'mnist': (MNIST, 50000),
}[p.dataset]
# 可以通过命令行指定为标注样本的大小
# Allow overriding the default from command line
if p.get('unlabeled_samples') is not None:
training_set_size = p.unlabeled_samples
# 选出mnist数据集里面的train子集
train_set = dataset_class("train")
# Make sure the MNIST data is in right format
# 对minst进行数据检查,查看是否所有值都在0-1之间且都为float
if p.dataset == 'mnist':
# features大小为60000*1*28*28,num_examples*channel*height*weight,minst为灰度图片所以channel=1
d = train_set.data_sources[train_set.sources.index('features')]
assert numpy.all(d <= 1.0) and numpy.all(d >= 0.0), \
'Make sure data is in float format and in range 0 to 1'
# 随机打乱样本顺序
# Take all indices and permutate them
all_ind = numpy.arange(train_set.num_examples)
if p.get('dseed'):
# 通过dseed制作一个随机器,用于打乱样本编号
rng = numpy.random.RandomState(seed=p.dseed)
rng.shuffle(all_ind)
d = AttributeDict()
# Choose the training set
d.train = train_set
# 此时index应该都被打乱
# 取出前training_set_size个数的样本做为训练集(的index)
d.train_ind = all_ind[:training_set_size]
# 选出一部分数据作为验证集
# Then choose validation set from the remaining indices
d.valid = train_set
# 全部的数据集中去掉训练用的样本,剩下的作为验证集
d.valid_ind = numpy.setdiff1d(all_ind, d.train_ind)[:p.valid_set_size]
logger.info('Using %d examples for validation' % len(d.valid_ind))
# 如果有测试数据的话,生成测试数据的index
# Only touch test data if requested
if test_set:
d.test = dataset_class("test")
d.test_ind = numpy.arange(d.test.num_examples)
# Setup optional whitening, only used for Cifar-10
# 计算特征值的维度,shape[1:]:获取第一个样本的维度
in_dim = train_set.data_sources[train_set.sources.index('features')].shape[1:]
if len(in_dim) > 1 and p.whiten_zca > 0:
assert numpy.product(in_dim) == p.whiten_zca, \
'Need %d whitening dimensions, not %d' % (numpy.product(in_dim),
p.whiten_zca)
# 归一化参数如果不为空,创建归一化类
cnorm = ContrastNorm(p.contrast_norm) if p.contrast_norm != 0 else None
def get_data(d, i):
data = d.get_data(request=i)[d.sources.index('features')]
# Fuel provides Cifar in uint8, convert to float32
# 检查data集合中的item是否符合float32类型
data = numpy.require(data, dtype=numpy.float32)
# TODO ContrastNorm.apply
return data if cnorm is None else cnorm.apply(data)
if p.whiten_zca > 0:
logger.info('Whitening using %d ZCA components' % p.whiten_zca)
# TODO ZCA
whiten = ZCA()
whiten.fit(p.whiten_zca, get_data(d.train, d.train_ind))
else:
whiten = None
return in_dim, d, whiten, cnorm
开发者ID:ryukinkou,项目名称:ladder_customized,代码行数:84,代码来源:run.py
示例18: __init__
def __init__(self, *args, **kwargs):
AttributeDict.__init__(self)
self.env = Environment.getInstance()
self.resourceType = self.__class__.__name__
self.isUpdated = False
seen = set()
if not hasattr(self, '_schema'):
raise Fail("Resource failed to define a valid _schema")
# union global schema with local schema
schema = copy.deepcopy(self._schema)
for key in self.s_globalSchema:
if not key in schema:
schema[key] = self.s_globalSchema[key]
resolvedArgs = { }
keys = schema.keys()
keysLen = len(keys)
index = 0
# resolve unnamed arguments with names corresponding to the order
# they were passed to Resource's ctor and their relative definitions
# in the subclass' ResourceArgumentSchema (which is an OrderedDict,
# so as to retain this ordering information).
for arg in args:
if index < keysLen:
key = keys[index]
resolvedArgs[keys[index]] = arg
else:
raise InvalidArgument("Invalid unnamed argument %s provided to resource %s" % (arg, str(self)))
index += 1
for arg in kwargs:
if arg in resolvedArgs:
raise InvalidArgument("Invalid mixture of named and unnamed arguments provided to resource %s, possibly around argument %s" % (str(self), arg))
else:
resolvedArgs[arg] = kwargs[arg]
utils.log("Initializing resource '%s' with args: %s" % (self.resourceType, resolvedArgs))
# validate resource arguments
output = schema.validate(resolvedArgs)
for key in output:
self[key] = output[key]
self.subscriptions = {
'immediate' : set(),
'delayed' : set()
}
for sub in self.subscribes:
if len(sub) == 2:
action, resource = sub
immediate = False
else:
action, resource, immediate = sub
resource.subscribe(action, self, immediate)
for sub in self.notifies:
self.subscribe(*sub)
self._validate()
self._register()
utils.log("Added new resource '%s'" % (str(self), ))
开发者ID:Stamped,项目名称:stamped-bootstrap,代码行数:69,代码来源:resource.py
示例19: apply_tagger
def apply_tagger(self, x, apply_noise, y=None):
""" Build one path of Tagger """
mb_size = x.shape[1]
input_shape = (self.p.n_groups, mb_size) + self.in_dim
in_dim = np.prod(self.in_dim)
# Add noise
x_corr = self.corrupt(x) if apply_noise else x
# Repeat input
x_corr = T.repeat(x_corr, self.p.n_groups, 0)
# Compute v
if self.p.input_type == 'binary':
v = None
elif self.p.input_type == 'continuous':
v = self.weight(1., 'v')
v = v * T.alloc(1., *input_shape)
# Cap to positive range
v = nn.exp_inv_sinh(v)
d = AttributeDict()
if y:
d.pred = []
d.class_error, d.class_cost = [], []
# here we have the book-keeping of z and m for the visualizations.
d.z = []
d.m = []
else:
d.denoising_cost, d.ami_score, d.ami_score_per_sample = [], [], []
assert self.p.n_iterations >= 1
# z_hat is the value for the next iteration of tagger.
# z is the current iteration tagger input
# m is the current iteration mask input
# m_hat is the value for the next iteration of tagger.
# m_lh is the mask likelihood.
# z_delta is the gradient of z, which depends on x, z and m.
for step in xrange(self.p.n_iterations):
# Encoder
# =======
# Compute m, z and z_hat_pre_bin
if step == 0:
# No values from previous iteration, so let's make them up
m, z = self.init_m_z(input_shape)
z_hat_pre_bin = None
# let's keep in the bookkeeping for the visualizations.
if y:
d.z.append(z)
d.m.append(m)
else:
# Feed in the previous iteration's estimates
z = z_hat
m = m_hat
# Compute m_lh
m_lh = self.m_lh(x_corr, z, v)
z_delta = self.f_z_deriv(x_corr, z, m)
z_tilde = z_hat_pre_bin if z_hat_pre_bin is not None else z
# Concatenate all inputs
inputs = [z_tilde, z_delta, m, m_lh]
inputs = T.concatenate(inputs, axis=2)
# Projection, batch-normalization and activation to a hidden layer
z = self.proj(inputs, in_dim * 4, self.p.encoder_proj[0])
z -= z.mean((0, 1), keepdims=True)
z /= T.sqrt(z.var((0, 1), keepdims=True) + np.float32(1e-10))
z += self.bias(0.0 * np.ones(self.p.encoder_proj[0]), 'b')
h = self.apply_act(z, 'relu')
# The first dimension is the group. Let's flatten together with
# minibatch in order to have parametric mapping compute all groups
# in parallel
h, undo_flatten = flatten_first_two_dims(h)
# Parametric Mapping
# ==================
self.ladder.apply(None, self.y, h)
ladder_encoder_output = undo_flatten(self.ladder.act.corr.unlabeled.h[len(self.p.encoder_proj) - 1])
ladder_decoder_output = undo_flatten(self.ladder.act.est.z[0])
# Decoder
# =======
# compute z_hat
z_u = self.proj(ladder_decoder_output, self.p.encoder_proj[0], in_dim, scope='z_u')
z_u -= z_u.mean((0, 1), keepdims=True)
z_u /= T.sqrt(z_u.var((0, 1), keepdims=True) + np.float32(1e-10))
z_hat = self.weight(np.ones(in_dim), 'c1') * z_u + self.bias(np.zeros(in_dim), 'b1')
z_hat = z_hat.reshape(input_shape)
# compute m_hat
#.........这里部分代码省略.........
开发者ID:CuriousAI,项目名称:tagger,代码行数:101,代码来源:tagger.py
示例20: setup_data
def setup_data(p, use_unlabeled=True, use_labeled=True):
assert use_unlabeled or use_labeled, 'Cannot train without cost'
dataset_class = DATASETS[p.dataset]
dataset = dataset_class(p)
train_ind = dataset.trn.ind
if 'labeled_samples' not in p or p.labeled_samples == 0:
n_labeled = len(train_ind)
else:
n_labeled = p.labeled_samples
if 'unlabeled_samples' not in p:
n_unlabeled = len(train_ind)
else:
n_unlabeled = p.unlabeled_samples
assert p.batch_size <= n_labeled, "batch size too large"
assert len(train_ind) >= n_labeled
assert len(train_ind) >= n_unlabeled, "not enough training samples"
assert n_labeled <= n_unlabeled, \
"at least as many unlabeled
|
请发表评论