本文整理汇总了Python中mvpa2.generators.splitters.Splitter类的典型用法代码示例。如果您正苦于以下问题:Python Splitter类的具体用法?Python Splitter怎么用?Python Splitter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Splitter类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_custom_split
def test_custom_split(self):
#simulate half splitter
hs = CustomPartitioner([(None,[0,1,2,3,4]),(None,[5,6,7,8,9])])
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
self.failUnless(len(splits) == 2)
for i,p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 50 )
self.failUnless( p[1].nsamples == 50 )
assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])
# check fully customized split with working and validation set specified
cs = CustomPartitioner([([0,3,4],[5,9])])
# we want to discared the unselected partition of the data, hence attr_value
# these two splitters should do exactly the same thing
splitters = (Splitter(attr='partitions', attr_values=[1,2]),
Splitter(attr='partitions', ignore_values=(0,)))
for spl in splitters:
splits = [ list(spl.generate(p)) for p in cs.generate(self.data) ]
self.failUnless(len(splits) == 1)
for i,p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 30 )
self.failUnless( p[1].nsamples == 20 )
self.failUnless((splits[0][1].sa['chunks'].unique == [5, 9]).all())
self.failUnless((splits[0][0].sa['chunks'].unique == [0, 3, 4]).all())
开发者ID:psederberg,项目名称:PyMVPA,代码行数:35,代码来源:test_splitter.py
示例2: test_repeated_features
def test_repeated_features(self):
class CountFeatures(Measure):
is_trained = True
def _call(self, ds):
return Dataset([ds.nfeatures],
fa={'nonbogus_targets': list(ds.fa['nonbogus_targets'].unique)})
cf = CountFeatures()
spl = Splitter('fa.nonbogus_targets')
nsplits = len(list(spl.generate(self.dataset)))
assert_equal(nsplits, 3)
rm = RepeatedMeasure(cf, spl, concat_as='features')
res = rm(self.dataset)
assert_equal(res.shape, (1, nsplits))
# due to https://github.com/numpy/numpy/issues/641 we are
# using list(set(...)) construct and there order of
# nonbogus_targets.unique can vary from run to run, thus there
# is no guarantee that we would get 18 first, which is a
# questionable assumption anyways, thus performing checks
# which do not require any specific order.
# And yet due to another issue
# https://github.com/numpy/numpy/issues/3759
# we can't just == None for the bool mask
None_fa = np.array([x == None for x in res.fa.nonbogus_targets])
assert_array_equal(res.samples[0, None_fa], [18])
assert_array_equal(res.samples[0, ~None_fa], [1, 1])
if sys.version_info[0] < 3:
# with python2 order seems to be consistent
assert_array_equal(res.samples[0], [18, 1, 1])
开发者ID:andreirusu,项目名称:PyMVPA,代码行数:30,代码来源:test_datameasure.py
示例3: test_label_splitter
def test_label_splitter(self):
oes = OddEvenPartitioner(attr='targets')
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]
assert_array_equal(splits[0][0].sa['targets'].unique, [0,2])
assert_array_equal(splits[0][1].sa['targets'].unique, [1,3])
assert_array_equal(splits[1][0].sa['targets'].unique, [1,3])
assert_array_equal(splits[1][1].sa['targets'].unique, [0,2])
开发者ID:psederberg,项目名称:PyMVPA,代码行数:10,代码来源:test_splitter.py
示例4: test_simplest_cv_pat_gen
def test_simplest_cv_pat_gen(self):
# create the generator
nfs = NFoldPartitioner(cvtype=1)
spl = Splitter(attr='partitions')
# now get the xval pattern sets One-Fold CV)
xvpat = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
self.failUnless( len(xvpat) == 10 )
for i,p in enumerate(xvpat):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 90 )
self.failUnless( p[1].nsamples == 10 )
self.failUnless( p[1].chunks[0] == i )
开发者ID:psederberg,项目名称:PyMVPA,代码行数:14,代码来源:test_splitter.py
示例5: test_slicing
def test_slicing(self):
hs = HalfPartitioner()
spl = Splitter(attr="partitions")
splits = list(hs.generate(self.data))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is self.data.samples)
splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
# with numpy 1.7.0b1 "chaining" was deprecated so let's create
# check function appropriate for the given numpy version
_a = np.arange(5)
__a = _a[:4][:3]
if __a.base is _a:
# 1.7.0b1
def is_the_same_base(x, base=self.data.samples):
return x.base is base
elif __a.base.base is _a:
# prior 1.7.0b1
def is_the_same_base(x, base=self.data.samples):
return x.base.base is base
else:
raise RuntimeError("Uknown handling of .base by numpy")
for s in splits:
# we get slicing all the time
assert_true(is_the_same_base(s[0].samples))
assert_true(is_the_same_base(s[1].samples))
spl = Splitter(attr="partitions", noslicing=True)
splits = [list(spl.generate(p)) for p in hs.generate(self.data)]
for s in splits:
# we no slicing at all
assert_false(s[0].samples.base is self.data.samples)
assert_false(s[1].samples.base is self.data.samples)
nfs = NFoldPartitioner()
spl = Splitter(attr="partitions")
splits = [list(spl.generate(p)) for p in nfs.generate(self.data)]
for i, s in enumerate(splits):
# training only first and last split
if i == 0 or i == len(splits) - 1:
assert_true(is_the_same_base(s[0].samples))
else:
assert_true(s[0].samples.base is None)
# we get slicing all the time
assert_true(is_the_same_base(s[1].samples))
step_ds = Dataset(np.random.randn(20, 2), sa={"chunks": np.tile([0, 1], 10)})
oes = OddEvenPartitioner()
spl = Splitter(attr="partitions")
splits = list(oes.generate(step_ds))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is step_ds.samples)
splits = [list(spl.generate(p)) for p in oes.generate(step_ds)]
assert_equal(len(splits), 2)
for s in splits:
# we get slicing all the time
assert_true(is_the_same_base(s[0].samples, step_ds.samples))
assert_true(is_the_same_base(s[1].samples, step_ds.samples))
开发者ID:pckillerbrici,项目名称:PyMVPA,代码行数:60,代码来源:test_splitter.py
示例6: test_repeated_features
def test_repeated_features(self):
print self.dataset
print self.dataset.fa.nonbogus_targets
class CountFeatures(Measure):
is_trained = True
def _call(self, ds):
return ds.nfeatures
cf = CountFeatures()
spl = Splitter('fa.nonbogus_targets')
nsplits = len(list(spl.generate(self.dataset)))
assert_equal(nsplits, 3)
rm = RepeatedMeasure(cf, spl, concat_as='features')
res = rm(self.dataset)
assert_equal(res.shape, (1, nsplits))
assert_array_equal(res.samples[0], [18,1,1])
开发者ID:arnaudsj,项目名称:PyMVPA,代码行数:16,代码来源:test_datameasure.py
示例7: test_counted_splitting
def test_counted_splitting(self):
spl = Splitter(attr='partitions')
# count > #chunks, should result in 10 splits
nchunks = len(self.data.sa['chunks'].unique)
for strategy in Partitioner._STRATEGIES:
for count, target in [ (nchunks*2, nchunks),
(nchunks, nchunks),
(nchunks-1, nchunks-1),
(3, 3),
(0, 0),
(1, 1)
]:
nfs = NFoldPartitioner(cvtype=1, count=count,
selection_strategy=strategy)
splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
self.failUnless(len(splits) == target)
chosenchunks = [int(s[1].uniquechunks) for s in splits]
# Test if configuration matches as well
nsplits_cfg = len(nfs.get_partition_specs(self.data))
self.failUnlessEqual(nsplits_cfg, target)
# Check if "lastsplit" dsattr was assigned appropriately
nsplits = len(splits)
if nsplits > 0:
# dummy-proof testing of last split
for ds_ in splits[-1]:
self.failUnless(ds_.a.lastpartitionset)
# test all now
for isplit,split in enumerate(splits):
for ds_ in split:
ds_.a.lastpartitionset == isplit==nsplits-1
# Check results of different strategies
if strategy == 'first':
self.failUnlessEqual(chosenchunks, range(target))
elif strategy == 'equidistant':
if target == 3:
self.failUnlessEqual(chosenchunks, [0, 3, 7])
elif strategy == 'random':
# none is selected twice
self.failUnless(len(set(chosenchunks)) == len(chosenchunks))
self.failUnless(target == len(chosenchunks))
else:
raise RuntimeError, "Add unittest for strategy %s" \
% strategy
开发者ID:psederberg,项目名称:PyMVPA,代码行数:46,代码来源:test_splitter.py
示例8: _forward_dataset
def _forward_dataset(self, ds):
if self.__chunks_attr is None:
return self._forward_dataset_helper(ds)
else:
# strip down dataset to speedup local processing
if self.__attr_strategy == "remove":
keep_sa = []
else:
keep_sa = None
proc_ds = ds.copy(deep=False, sa=keep_sa, fa=[], a=[])
# process all chunks individually
# use a customsplitter to speed-up splitting
spl = Splitter(self.__chunks_attr)
dses = [self._forward_dataset_helper(d) for d in spl.generate(proc_ds)]
# and merge them again
mds = vstack(dses)
# put back attributes
mds.fa.update(ds.fa)
mds.a.update(ds.a)
return mds
开发者ID:robbisg,项目名称:PyMVPA,代码行数:20,代码来源:filters.py
示例9: test_svms
def test_svms(self, clf):
knows_probabilities = \
'probabilities' in clf.ca.keys() and clf.params.probability
enable_ca = ['estimates']
if knows_probabilities:
enable_ca += ['probabilities']
clf.ca.change_temporarily(enable_ca = enable_ca)
spl = Splitter('train', count=2)
traindata, testdata = list(spl.generate(datasets['uni2small']))
clf.train(traindata)
predicts = clf.predict(testdata.samples)
# values should be different from predictions for SVMs we have
self.assertTrue(np.any(predicts != clf.ca.estimates))
if knows_probabilities and clf.ca.is_set('probabilities'):
# XXX test more thoroughly what we are getting here ;-)
self.assertEqual( len(clf.ca.probabilities),
len(testdata.samples) )
clf.ca.reset_changed_temporarily()
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:20,代码来源:test_clf.py
示例10: test_exclude_targets_combinations
def test_exclude_targets_combinations():
partitioner = ChainNode(
[NFoldPartitioner(), ExcludeTargetsCombinationsPartitioner(k=2, targets_attr="targets", space="partitions")],
space="partitions",
)
from mvpa2.misc.data_generators import normal_feature_dataset
ds = normal_feature_dataset(snr=0.0, nlabels=4, perlabel=3, nchunks=3, nonbogus_features=[0, 1, 2, 3], nfeatures=4)
partitions = list(partitioner.generate(ds))
assert_equal(len(partitions), 3 * 6)
splitter = Splitter("partitions")
combs = []
comb_chunks = []
for p in partitions:
trds, teds = list(splitter.generate(p))[:2]
comb = tuple(np.unique(teds.targets))
combs.append(comb)
comb_chunks.append(comb + tuple(np.unique(teds.chunks)))
assert_equal(len(set(combs)), 6) # just 6 possible combinations of 2 out of 4
assert_equal(len(set(comb_chunks)), 3 * 6) # all unique
开发者ID:hanke,项目名称:PyMVPA,代码行数:20,代码来源:test_generators.py
示例11: test_half_split
def test_half_split(self):
hs = HalfPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
self.failUnless(len(splits) == 2)
for i,p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 50 )
self.failUnless( p[1].nsamples == 50 )
assert_array_equal(splits[0][1].sa['chunks'].unique, [0, 1, 2, 3, 4])
assert_array_equal(splits[0][0].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][1].sa['chunks'].unique, [5, 6, 7, 8, 9])
assert_array_equal(splits[1][0].sa['chunks'].unique, [0, 1, 2, 3, 4])
# check if it works on pure odd and even chunk ids
moresplits = [ list(spl.generate(p)) for p in hs.generate(splits[0][0])]
for split in moresplits:
self.failUnless(split[0] != None)
self.failUnless(split[1] != None)
开发者ID:psederberg,项目名称:PyMVPA,代码行数:24,代码来源:test_splitter.py
示例12: test_odd_even_split
def test_odd_even_split(self):
oes = OddEvenPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in oes.generate(self.data) ]
self.assertTrue(len(splits) == 2)
for i,p in enumerate(splits):
self.assertTrue( len(p) == 2 )
self.assertTrue( p[0].nsamples == 50 )
self.assertTrue( p[1].nsamples == 50 )
assert_array_equal(splits[0][1].sa['chunks'].unique, [1, 3, 5, 7, 9])
assert_array_equal(splits[0][0].sa['chunks'].unique, [0, 2, 4, 6, 8])
assert_array_equal(splits[1][0].sa['chunks'].unique, [1, 3, 5, 7, 9])
assert_array_equal(splits[1][1].sa['chunks'].unique, [0, 2, 4, 6, 8])
# check if it works on pure odd and even chunk ids
moresplits = [ list(spl.generate(p)) for p in oes.generate(splits[0][0])]
for split in moresplits:
self.assertTrue(split[0] != None)
self.assertTrue(split[1] != None)
开发者ID:PepGardiola,项目名称:PyMVPA,代码行数:24,代码来源:test_splitter.py
示例13: test_slicing
def test_slicing(self):
hs = HalfPartitioner()
spl = Splitter(attr='partitions')
splits = list(hs.generate(self.data))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is self.data.samples)
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
for s in splits:
# we get slicing all the time
assert_true(s[0].samples.base.base is self.data.samples)
assert_true(s[1].samples.base.base is self.data.samples)
spl = Splitter(attr='partitions', noslicing=True)
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
for s in splits:
# we no slicing at all
assert_false(s[0].samples.base is self.data.samples)
assert_false(s[1].samples.base is self.data.samples)
nfs = NFoldPartitioner()
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in nfs.generate(self.data) ]
for i, s in enumerate(splits):
# training only first and last split
if i == 0 or i == len(splits) - 1:
assert_true(s[0].samples.base.base is self.data.samples)
else:
assert_true(s[0].samples.base is None)
# we get slicing all the time
assert_true(s[1].samples.base.base is self.data.samples)
step_ds = Dataset(np.random.randn(20,2),
sa={'chunks': np.tile([0,1], 10)})
oes = OddEvenPartitioner()
spl = Splitter(attr='partitions')
splits = list(oes.generate(step_ds))
for s in splits:
# partitioned dataset shared the data
assert_true(s.samples.base is step_ds.samples)
splits = [ list(spl.generate(p)) for p in oes.generate(step_ds) ]
assert_equal(len(splits), 2)
for s in splits:
# we get slicing all the time
assert_true(s[0].samples.base.base is step_ds.samples)
assert_true(s[1].samples.base.base is step_ds.samples)
开发者ID:psederberg,项目名称:PyMVPA,代码行数:43,代码来源:test_splitter.py
示例14: test_splitter
def test_splitter():
ds = give_data()
# split with defaults
spl1 = Splitter('chunks')
assert_raises(NotImplementedError, spl1, ds)
splits = list(spl1.generate(ds))
assert_equal(len(splits), len(ds.sa['chunks'].unique))
for split in splits:
# it should have perform basic slicing!
assert_true(split.samples.base is ds.samples)
assert_equal(len(split.sa['chunks'].unique), 1)
assert_true('lastsplit' in split.a)
assert_true(splits[-1].a.lastsplit)
# now again, more customized
spl2 = Splitter('targets', attr_values = [0,1,1,2,3,3,3], count=4,
noslicing=True)
splits = list(spl2.generate(ds))
assert_equal(len(splits), 4)
for split in splits:
# it should NOT have perform basic slicing!
assert_false(split.samples.base is ds.samples)
assert_equal(len(split.sa['targets'].unique), 1)
assert_equal(len(split.sa['chunks'].unique), 10)
assert_true(splits[-1].a.lastsplit)
# two should be identical
assert_array_equal(splits[1].samples, splits[2].samples)
# now go wild and split by feature attribute
ds.fa['roi'] = np.repeat([0,1], 5)
# splitter should auto-detect that this is a feature attribute
spl3 = Splitter('roi')
splits = list(spl3.generate(ds))
assert_equal(len(splits), 2)
for split in splits:
assert_true(split.samples.base is ds.samples)
assert_equal(len(split.fa['roi'].unique), 1)
assert_equal(split.shape, (100, 5))
# and finally test chained splitters
cspl = ChainNode([spl2, spl3, spl1])
splits = list(cspl.generate(ds))
# 4 target splits and 2 roi splits each and 10 chunks each
assert_equal(len(splits), 80)
开发者ID:Soletmons,项目名称:PyMVPA,代码行数:47,代码来源:test_generators.py
示例15: _sl_call
def _sl_call(self, dataset, roi_ids, nproc):
"""Call to GNBSearchlight
"""
# Local bindings
gnb = self.gnb
params = gnb.params
generator = self.generator
errorfx = self.errorfx
qe = self.queryengine
## if False:
## class A(Learner):
## pass
## self = A()
## import numpy as np
## from mvpa2.clfs.gnb import GNB
## from mvpa2.generators.partition import NFoldPartitioner
## from mvpa2.misc.errorfx import mean_mismatch_error
## from mvpa2.testing.datasets import datasets as tdatasets
## from mvpa2.datasets import Dataset
## from mvpa2.misc.neighborhood import IndexQueryEngine, Sphere
## from mvpa2.clfs.distance import absmin_distance
## import time
## if __debug__:
## from mvpa2.base import debug
## debug.active += ['SLC.*']
## # XXX is it that ugly?
## debug.active.pop(debug.active.index('SLC_'))
## debug.metrics += ['reltime']
## dataset = tdatasets['3dlarge'].copy()
## dataset.fa['voxel_indices'] = dataset.fa.myspace
## sphere = Sphere(radius=1,
## distance_func=absmin_distance)
## qe = IndexQueryEngine(myspace=sphere)
## # Fracisco's data
## #dataset = ds_fp
## qe = IndexQueryEngine(voxel_indices=sphere)
## qe.train(dataset)
## roi_ids = np.arange(dataset.nfeatures)
## gnb = GNB()
## params = gnb.params
## generator = NFoldPartitioner()
## errorfx = mean_mismatch_error
if __debug__:
time_start = time.time()
targets_sa_name = gnb.get_space()
targets_sa = dataset.sa[targets_sa_name]
if __debug__:
debug_slc_ = 'SLC_' in debug.active
# get the dataset information into easy vars
X = dataset.samples
if len(X.shape) != 2:
raise ValueError, \
'Unlike GNB, GNBSearchlight (for now) operates on already' \
'flattened datasets'
labels = targets_sa.value
ulabels = targets_sa.unique
nlabels = len(ulabels)
label2index = dict((l, il) for il, l in enumerate(ulabels))
labels_numeric = np.array([label2index[l] for l in labels])
ulabels_numeric = [label2index[l] for l in ulabels]
# set the feature dimensions
nsamples = len(X)
nrois = len(roi_ids)
s_shape = X.shape[1:] # shape of a single sample
# The shape of results
r_shape = (nrois,) + X.shape[2:]
#
# Everything toward optimization ;)
#
# Silly Yarik thinks that it might be worth to pre-compute
# statistics per each feature within a block of the samples
# which always come together in splits -- most often it is a
# (chunk, label) combination, but since we simply use a
# generator -- who knows! Therefore lets figure out what are
# those blocks and operate on them instead of original samples.
#
# After additional thinking about this -- probably it would be
# just minor additional improvements (ie not worth it) but
# since it is coded already -- let it be so
# 1. Query generator for the splits we will have
if __debug__:
debug('SLC',
'Phase 1. Initializing partitions using %s on %s'
% (generator, dataset))
# Lets just create a dummy ds which will store for us actual sample
# indicies
# XXX we could make it even more lightweight I guess...
dataset_indicies = Dataset(np.arange(nsamples), sa=dataset.sa)
splitter = Splitter(attr=generator.get_space())
splits = list(tuple(splitter.generate(ds_))
#.........这里部分代码省略.........
开发者ID:arnaudsj,项目名称:PyMVPA,代码行数:101,代码来源:gnbsearchlight.py
示例16: test_n_group_split
def test_n_group_split(self):
"""Test NGroupSplitter alongside with the reversal of the
order of spit out datasets
"""
# Test 2 groups like HalfSplitter first
hs = NGroupPartitioner(2)
for isreversed, splitter in enumerate((hs, hs)):
if isreversed:
spl = Splitter(attr='partitions', reverse=True)
else:
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in hs.generate(self.data) ]
self.failUnless(len(splits) == 2)
for i, p in enumerate(splits):
self.failUnless( len(p) == 2 )
self.failUnless( p[0].nsamples == 50 )
self.failUnless( p[1].nsamples == 50 )
assert_array_equal(splits[0][1-isreversed].sa['chunks'].unique,
[0, 1, 2, 3, 4])
assert_array_equal(splits[0][isreversed].sa['chunks'].unique,
[5, 6, 7, 8, 9])
assert_array_equal(splits[1][1-isreversed].sa['chunks'].unique,
[5, 6, 7, 8, 9])
assert_array_equal(splits[1][isreversed].sa['chunks'].unique,
[0, 1, 2, 3, 4])
# check if it works on pure odd and even chunk ids
moresplits = [ list(spl.generate(p)) for p in hs.generate(splits[0][0])]
for split in moresplits:
self.failUnless(split[0] != None)
self.failUnless(split[1] != None)
# now test more groups
s5 = NGroupPartitioner(5)
# get the splits
for isreversed, s5splitter in enumerate((s5, s5)):
if isreversed:
spl = Splitter(attr='partitions', reverse=True)
else:
spl = Splitter(attr='partitions')
splits = [ list(spl.generate(p)) for p in s5splitter.generate(self.data) ]
# must have 10 splits
self.failUnless(len(splits) == 5)
# check split content
assert_array_equal(splits[0][1-isreversed].sa['chunks'].unique,
[0, 1])
assert_array_equal(splits[0][isreversed].sa['chunks'].unique,
[2, 3, 4, 5, 6, 7, 8, 9])
assert_array_equal(splits[1][1-isreversed].sa['chunks'].unique,
[2, 3])
assert_array_equal(splits[1][isreversed].sa['chunks'].unique,
[0, 1, 4, 5, 6, 7, 8, 9])
# ...
assert_array_equal(splits[4][1-isreversed].sa['chunks'].unique,
[8, 9])
assert_array_equal(splits[4][isreversed].sa['chunks'].unique,
[0, 1, 2, 3, 4, 5, 6, 7])
# Test for too many groups
def splitcall(spl, dat):
return list(spl.generate(dat))
s20 = NGroupPartitioner(20)
self.assertRaises(ValueError,splitcall,s20,self.data)
开发者ID:psederberg,项目名称:PyMVPA,代码行数:71,代码来源:test_splitter.py
示例17: _sl_call
def _sl_call(self, dataset, roi_ids, nproc):
"""Call to SimpleStatBaseSearchlight
"""
# Local bindings
generator = self.generator
qe = self.queryengine
errorfx = self.errorfx
if __debug__:
time_start = time.time()
targets_sa_name = self._get_space()
targets_sa = dataset.sa[targets_sa_name]
if __debug__:
debug_slc_ = 'SLC_' in debug.active
# get the dataset information into easy vars
X = dataset.samples
if len(X.shape) != 2:
raise ValueError(
'Unlike a classifier, %s (for now) operates on already'
'flattened datasets' % (self.__class__.__name__))
labels = targets_sa.value
ulabels = targets_sa.unique
nlabels = len(ulabels)
label2index = dict((l, il) for il, l in enumerate(ulabels))
labels_numeric = np.array([label2index[l] for l in labels])
self._ulabels_numeric = [label2index[l] for l in ulabels]
# set the feature dimensions
nsamples = len(X)
nrois = len(roi_ids)
s_shape = X.shape[1:] # shape of a single sample
# The shape of results
r_shape = (nrois,) + X.shape[2:]
#
# Everything toward optimization ;)
#
# Silly Yarik thinks that it might be worth to pre-compute
# statistics per each feature within a block of the samples
# which always come together in splits -- most often it is a
# (chunk, label) combination, but since we simply use a
# generator -- who knows! Therefore lets figure out what are
# those blocks and operate on them instead of original samples.
#
# After additional thinking about this -- probably it would be
# just minor additional improvements (ie not worth it) but
# since it is coded already -- let it be so
# 1. Query generator for the splits we will have
if __debug__:
debug('SLC',
'Phase 1. Initializing partitions using %s on %s'
% (generator, dataset))
# Lets just create a dummy ds which will store for us actual sample
# indicies
# XXX we could make it even more lightweight I guess...
dataset_indicies = Dataset(np.arange(nsamples), sa=dataset.sa)
splitter = Splitter(attr=generator.get_space())
partitions = list(generator.generate(dataset_indicies))
if __debug__:
for p in partitions:
if not (np.all(p.sa[targets_sa_name].value == labels)):
raise NotImplementedError(
"%s does not yet support partitioners altering the targets "
"(e.g. permutators)" % self.__class__)
nsplits = len(partitions)
# ATM we need to keep the splits instead since they are used
# in two places in the code: step 2 and 5
splits = list(tuple(splitter.generate(ds_)) for ds_ in partitions)
del partitions # not used any longer
# 2. Figure out the new 'chunks x labels' blocks of combinations
# of samples
if __debug__:
debug('SLC',
'Phase 2. Blocking data for %i splits and %i labels'
% (nsplits, nlabels))
# array of indicies for label, split1, split2, ...
# through which we will pass later on to figure out
# unique combinations
combinations = np.ones((nsamples, 1+nsplits), dtype=int)*-1
# labels
combinations[:, 0] = labels_numeric
for ipartition, (split1, split2) in enumerate(splits):
combinations[split1.samples[:, 0], 1+ipartition] = 1
combinations[split2.samples[:, 0], 1+ipartition] = 2
# Check for over-sampling, i.e. no same sample used twice here
if not (len(np.unique(split1.samples[:, 0])) == len(split1) and
len(np.unique(split2.samples[:, 0])) == len(split2)):
raise RuntimeError(
"%s needs a partitioner which does not reuse "
"the same the same samples more than once"
% self.__class__)
# sample descriptions -- should be unique for
# samples within the same block
descriptions = [tuple(c) for c in combinations]
#.........这里部分代码省略.........
开发者ID:Arthurkorn,项目名称:PyMVPA,代码行数:101,代码来源:adhocsearchlightbase.py
注:本文中的mvpa2.generators.splitters.Splitter类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论