本文整理汇总了Python中mvpa2.clfs.meta.SplitClassifier类的典型用法代码示例。如果您正苦于以下问题:Python SplitClassifier类的具体用法?Python SplitClassifier怎么用?Python SplitClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SplitClassifier类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_split_classifier_extended
def test_split_classifier_extended(self, clf_):
clf2 = clf_.clone()
ds = datasets['uni2%s' % self._get_clf_ds(clf2)]
clf = SplitClassifier(clf=clf_, #SameSignClassifier(),
enable_ca=['stats', 'feature_ids'])
clf.train(ds) # train the beast
error = clf.ca.stats.error
cv = CrossValidation(clf2, NFoldPartitioner(), postproc=mean_sample(),
enable_ca=['stats', 'training_stats'])
cverror = cv(ds).samples.squeeze()
if not 'non-deterministic' in clf.__tags__:
self.assertTrue(abs(error-cverror)<0.01,
msg="We should get the same error using split classifier as"
" using CrossValidation. Got %s and %s"
% (error, cverror))
if cfg.getboolean('tests', 'labile', default='yes'):
self.assertTrue(error < 0.25,
msg="clf should generalize more or less fine. "
"Got error %s" % error)
self.assertEqual(len(clf.ca.stats.sets), len(ds.UC),
msg="Should have 1 confusion per each split")
self.assertEqual(len(clf.clfs), len(ds.UC),
msg="Should have number of classifiers equal # of epochs")
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:26,代码来源:test_clf.py
示例2: __test_matthias_question
def __test_matthias_question(self):
rfe_clf = LinearCSVMC(C=1)
rfesvm_split = SplitClassifier(rfe_clf)
clf = \
FeatureSelectionClassifier(
clf = LinearCSVMC(C=1),
feature_selection = RFE(
sensitivity_analyzer = rfesvm_split.get_sensitivity_analyzer(
combiner=first_axis_mean,
transformer=np.abs),
transfer_error=ConfusionBasedError(
rfesvm_split,
confusion_state="confusion"),
stopping_criterion=FixedErrorThresholdStopCrit(0.20),
feature_selector=FractionTailSelector(
0.2, mode='discard', tail='lower'),
update_sensitivity=True))
no_permutations = 1000
permutator = AttributePermutator('targets', count=no_permutations)
cv = CrossValidation(clf, NFoldPartitioner(),
null_dist=MCNullDist(permutator, tail='left'),
enable_ca=['stats'])
error = cv(datasets['uni2small'])
self.assertTrue(error < 0.4)
self.assertTrue(cv.ca.null_prob < 0.05)
开发者ID:PepGardiola,项目名称:PyMVPA,代码行数:27,代码来源:test_rfe.py
示例3: test_splitclf_sensitivities
def test_splitclf_sensitivities():
datasets = [normal_feature_dataset(perlabel=100, nlabels=2,
nfeatures=4,
nonbogus_features=[0, i + 1],
snr=1, nchunks=2)
for i in xrange(2)]
sclf = SplitClassifier(SMLR(),
NFoldPartitioner())
analyzer = sclf.get_sensitivity_analyzer()
senses1 = analyzer(datasets[0])
senses2 = analyzer(datasets[1])
for senses in senses1, senses2:
# This should be False when comparing two folds
assert_false(np.allclose(senses.samples[0],
senses.samples[2]))
assert_false(np.allclose(senses.samples[1],
senses.samples[3]))
# Moreover with new data we should have got different results
# (i.e. it must retrained correctly)
for s1, s2 in zip(senses1, senses2):
assert_false(np.allclose(s1, s2))
# and we should have "selected" "correct" voxels
for i, senses in enumerate((senses1, senses2)):
assert_equal(set(np.argsort(np.max(np.abs(senses), axis=0))[-2:]),
set((0, i + 1)))
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:29,代码来源:test_senses.py
示例4: test_split_clf
def test_split_clf(self):
# set up the classifier
sclf = SplitClassifier(SMLR(),
NFoldPartitioner())
analyzer = sclf.get_sensitivity_analyzer()
senses = analyzer(self.dataset)
# This should be False when comparing two folds
assert_false(np.allclose(senses.samples[0],senses.samples[2]))
开发者ID:psederberg,项目名称:PyMVPA,代码行数:11,代码来源:test_senses.py
示例5: test_harvesting
def test_harvesting(self):
"""Basic testing of harvesting based on SplitClassifier
"""
ds = self.data_bin_1
clf = SplitClassifier(clf=SameSignClassifier(),
enable_ca=['stats', 'training_stats'],
harvest_attribs=['clf.ca.training_time'],
descr="DESCR")
clf.train(ds) # train the beast
# Number of harvested items should be equal to number of chunks
self.assertEqual(
len(clf.ca.harvested['clf.ca.training_time']), len(ds.UC))
# if we can blame multiple inheritance and ClassWithCollections.__init__
self.assertEqual(clf.descr, "DESCR")
开发者ID:arnaudsj,项目名称:PyMVPA,代码行数:14,代码来源:test_clf.py
示例6: __test_fspipeline_with_split_classifier
def __test_fspipeline_with_split_classifier(self, basic_clf):
#basic_clf = LinearNuSVMC()
multi_clf = MulticlassClassifier(clf=basic_clf)
#svm_weigths = LinearSVMWeights(svm)
# Proper RFE: aggregate sensitivities across multiple splits,
# but also due to multi class those need to be aggregated
# somehow. Transfer error here should be 'leave-1-out' error
# of split classifier itself
sclf = SplitClassifier(clf=basic_clf)
rfe = RFE(sensitivity_analyzer=
sclf.get_sensitivity_analyzer(
enable_ca=["sensitivities"]),
transfer_error=trans_error,
feature_selector=FeatureSelectionPipeline(
[FractionTailSelector(0.5),
FixedNElementTailSelector(1)]),
train_pmeasure=True)
# and we get sensitivity analyzer which works on splits and uses
# sensitivity
selected_features = rfe(self.dataset)
开发者ID:andreirusu,项目名称:PyMVPA,代码行数:22,代码来源:test_datameasure.py
示例7: test_split_classifier
def test_split_classifier(self):
ds = self.data_bin_1
clf = SplitClassifier(clf=SameSignClassifier(),
enable_ca=['stats', 'training_stats',
'feature_ids'])
clf.train(ds) # train the beast
error = clf.ca.stats.error
tr_error = clf.ca.training_stats.error
clf2 = clf.clone()
cv = CrossValidation(clf2, NFoldPartitioner(), postproc=mean_sample(),
enable_ca=['stats', 'training_stats'])
cverror = cv(ds)
cverror = cverror.samples.squeeze()
tr_cverror = cv.ca.training_stats.error
self.assertEqual(error, cverror,
msg="We should get the same error using split classifier as"
" using CrossValidation. Got %s and %s"
% (error, cverror))
self.assertEqual(tr_error, tr_cverror,
msg="We should get the same training error using split classifier as"
" using CrossValidation. Got %s and %s"
% (tr_error, tr_cverror))
self.assertEqual(clf.ca.stats.percent_correct,
100,
msg="Dummy clf should train perfectly")
# CV and SplitClassifier should get the same confusion matrices
assert_array_equal(clf.ca.stats.matrix,
cv.ca.stats.matrix)
self.assertEqual(len(clf.ca.stats.sets),
len(ds.UC),
msg="Should have 1 confusion per each split")
self.assertEqual(len(clf.clfs), len(ds.UC),
msg="Should have number of classifiers equal # of epochs")
self.assertEqual(clf.predict(ds.samples), list(ds.targets),
msg="Should classify correctly")
# feature_ids must be list of lists, and since it is not
# feature-selecting classifier used - we expect all features
# to be utilized
# NOT ANYMORE -- for BoostedClassifier we have now union of all
# used features across slave classifiers. That makes
# semantics clear. If you need to get deeper -- use upcoming
# harvesting facility ;-)
# self.assertEqual(len(clf.feature_ids), len(ds.uniquechunks))
# self.assertTrue(np.array([len(ids)==ds.nfeatures
# for ids in clf.feature_ids]).all())
# Just check if we get it at all ;-)
summary = clf.summary()
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:54,代码来源:test_clf.py
示例8: test_split_clf_on_chainpartitioner
def test_split_clf_on_chainpartitioner(self):
# pretty much a smoke test for #156
ds = datasets['uni2small']
part = ChainNode([NFoldPartitioner(cvtype=1),
Balancer(attr='targets', count=2,
limit='partitions', apply_selection=True)])
partitions = list(part.generate(ds))
sclf = SplitClassifier(sample_clf_lin, part, enable_ca=['stats', 'splits'])
sclf.train(ds)
pred = sclf.predict(ds)
assert_equal(len(pred), len(ds)) # rudimentary check
assert_equal(len(sclf.ca.splits), len(partitions))
assert_equal(len(sclf.clfs), len(partitions))
# now let's do sensitivity analyzer just in case
sclf.untrain()
sensana = sclf.get_sensitivity_analyzer()
sens = sensana(ds)
# basic check that sensitivities varied across splits
from mvpa2.mappers.fx import FxMapper
sens_stds = FxMapper('samples', np.std, uattrs=['targets'])(sens)
assert_true(np.any(sens_stds != 0))
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:22,代码来源:test_clf.py
示例9: test_rfe
def test_rfe(self, clf):
# sensitivity analyser and transfer error quantifier use the SAME clf!
sens_ana = clf.get_sensitivity_analyzer(postproc=maxofabs_sample())
pmeasure = ProxyMeasure(clf, postproc=BinaryFxNode(mean_mismatch_error,
'targets'))
cvmeasure = CrossValidation(clf, NFoldPartitioner(),
errorfx=mean_mismatch_error,
postproc=mean_sample())
rfesvm_split = SplitClassifier(clf, OddEvenPartitioner())
# explore few recipes
for rfe, data in [
# because the clf is already trained when computing the sensitivity
# map, prevent retraining for transfer error calculation
# Use absolute of the svm weights as sensitivity
(RFE(sens_ana,
pmeasure,
Splitter('train'),
fselector=FixedNElementTailSelector(1),
train_pmeasure=False),
self.get_data()),
# use cross-validation within training to get error for the stopping point
# but use full training data to derive sensitivity
(RFE(sens_ana,
cvmeasure,
Repeater(2), # give the same full dataset to sens_ana and cvmeasure
fselector=FractionTailSelector(
0.70,
mode='select', tail='upper'),
train_pmeasure=True),
normal_feature_dataset(perlabel=20, nchunks=5, nfeatures=200,
nonbogus_features=[0, 1], snr=1.5)),
# use cross-validation (via SplitClassifier) and get mean
# of normed sensitivities across those splits
(RFE(rfesvm_split.get_sensitivity_analyzer(
postproc=ChainMapper([ FxMapper('features', l2_normed),
FxMapper('samples', np.mean),
FxMapper('samples', np.abs)])),
ConfusionBasedError(rfesvm_split, confusion_state='stats'),
Repeater(2), # we will use the same full cv-training dataset
fselector=FractionTailSelector(
0.50,
mode='select', tail='upper'),
stopping_criterion=NBackHistoryStopCrit(BestDetector(), 10),
train_pmeasure=False, # we just extract it from existing confusion
update_sensitivity=True),
normal_feature_dataset(perlabel=28, nchunks=7, nfeatures=200,
nonbogus_features=[0, 1], snr=1.5))
]:
# prep data
# data = datasets['uni2medium']
data_nfeatures = data.nfeatures
rfe.train(data)
resds = rfe(data)
# fail if orig datasets are changed
self.assertTrue(data.nfeatures == data_nfeatures)
# check that the features set with the least error is selected
if len(rfe.ca.errors):
e = np.array(rfe.ca.errors)
if isinstance(rfe._fselector, FixedNElementTailSelector):
self.assertTrue(resds.nfeatures == data_nfeatures - e.argmin())
else:
imin = np.argmin(e)
if 'does_feature_selection' in clf.__tags__:
# if clf is smart it might figure it out right away
assert_array_less( imin, len(e) )
else:
# in this case we can even check if we had actual
# going down/up trend... although -- why up???
self.assertTrue( 1 < imin < len(e) - 1 )
else:
self.assertTrue(resds.nfeatures == data_nfeatures)
# silly check if nfeatures is in decreasing order
nfeatures = np.array(rfe.ca.nfeatures).copy()
nfeatures.sort()
self.assertTrue( (nfeatures[::-1] == rfe.ca.nfeatures).all() )
# check if history has elements for every step
self.assertTrue(set(rfe.ca.history)
== set(range(len(np.array(rfe.ca.errors)))))
# Last (the largest number) can be present multiple times even
# if we remove 1 feature at a time -- just need to stop well
# in advance when we have more than 1 feature left ;)
self.assertTrue(rfe.ca.nfeatures[-1]
== len(np.where(rfe.ca.history
==max(rfe.ca.history))[0]))
开发者ID:PepGardiola,项目名称:PyMVPA,代码行数:93,代码来源:test_rfe.py
示例10: test_analyzer_with_split_classifier
def test_analyzer_with_split_classifier(self, clfds):
"""Test analyzers in split classifier
"""
clf, ds = clfds # unroll the tuple
# We need to skip some LARSes here
_sclf = str(clf)
if 'LARS(' in _sclf and "type='stepwise'" in _sclf:
# ADD KnownToFail thingie from NiPy
return
# To don't waste too much time testing lets limit to 3 splits
nsplits = 3
partitioner = NFoldPartitioner(count=nsplits)
mclf = SplitClassifier(clf=clf,
partitioner=partitioner,
enable_ca=['training_stats',
'stats'])
sana = mclf.get_sensitivity_analyzer(# postproc=absolute_features(),
pass_attr=['fa.nonbogus_targets'],
enable_ca=["sensitivities"])
ulabels = ds.uniquetargets
nlabels = len(ulabels)
# Can't rely on splitcfg since count-limit is done in __call__
assert(nsplits == len(list(partitioner.generate(ds))))
sens = sana(ds)
assert('nonbogus_targets' in sens.fa) # were they passsed?
# TODO: those few do not expose biases
if not len(set(clf.__tags__).intersection(('lars', 'glmnet', 'gpr'))):
assert('biases' in sens.sa)
# print sens.sa.biases
# It should return either ...
# nlabels * nsplits
req_nsamples = [ nlabels * nsplits ]
if nlabels == 2:
# A single sensitivity in case of binary
req_nsamples += [ nsplits ]
else:
# and for pairs in case of multiclass
req_nsamples += [ (nlabels * (nlabels - 1) / 2) * nsplits ]
# and for 1-vs-1 embedded within Multiclass operating on
# pairs (e.g. SMLR)
req_nsamples += [req_nsamples[-1] * 2]
# Also for regression_based -- they can do multiclass
# but only 1 sensitivity is provided
if 'regression_based' in clf.__tags__:
req_nsamples += [ nsplits ]
# # of features should correspond
self.assertEqual(sens.shape[1], ds.nfeatures)
# # of samples/sensitivities should also be reasonable
self.assertTrue(sens.shape[0] in req_nsamples)
# Check if labels are present
self.assertTrue('splits' in sens.sa)
self.assertTrue('targets' in sens.sa)
# should be 1D -- otherwise dtype object
self.assertTrue(sens.sa.targets.ndim == 1)
sens_ulabels = sens.sa['targets'].unique
# Some labels might be pairs(tuples) so ndarray would be of
# dtype object and we would need to get them all
if sens_ulabels.dtype is np.dtype('object'):
sens_ulabels = np.unique(
reduce(lambda x, y: x + y, [list(x) for x in sens_ulabels]))
assert_array_equal(sens_ulabels, ds.sa['targets'].unique)
errors = [x.percent_correct
for x in sana.clf.ca.stats.matrices]
# lets go through all sensitivities and see if we selected the right
# features
#if 'meta' in clf.__tags__ and len(sens.samples[0].nonzero()[0])<2:
if '5%' in clf.descr \
or (nlabels > 2 and 'regression_based' in clf.__tags__):
# Some meta classifiers (5% of ANOVA) are too harsh ;-)
# if we get less than 2 features with on-zero sensitivities we
# cannot really test
# Also -- regression based classifiers performance for multiclass
# is expected to suck in general
return
if cfg.getboolean('tests', 'labile', default='yes'):
for conf_matrix in [sana.clf.ca.training_stats] \
+ sana.clf.ca.stats.matrices:
self.assertTrue(
conf_matrix.percent_correct >= 70,
msg="We must have trained on each one more or " \
"less correctly. Got %f%% correct on %d labels" %
(conf_matrix.percent_correct,
nlabels))
# Since now we have per split and possibly per label -- lets just find
# mean per each feature per label across splits
sensm = FxMapper('samples', lambda x: np.sum(x),
uattrs=['targets']).forward(sens)
sensgm = maxofabs_sample().forward(sensm) # global max of abs of means
#.........这里部分代码省略.........
开发者ID:andreirusu,项目名称:PyMVPA,代码行数:101,代码来源:test_datameasure.py
示例11: test_regressions
def test_regressions(self, regr):
"""Simple tests on regressions
"""
if not externals.exists('scipy'):
raise SkipTest
else:
from mvpa2.misc.errorfx import corr_error
ds = datasets['chirp_linear']
# we want numeric labels to maintain the previous behavior, especially
# since we deal with regressions here
ds.sa.targets = AttributeMap().to_numeric(ds.targets)
cve = CrossValidation(regr, NFoldPartitioner(), postproc=mean_sample(),
errorfx=corr_error, enable_ca=['training_stats', 'stats'])
# check the default
#self.assertTrue(cve.transerror.errorfx is corr_error)
corr = np.asscalar(cve(ds).samples)
# Our CorrErrorFx should never return NaN
self.assertTrue(not np.isnan(corr))
self.assertTrue(corr == cve.ca.stats.stats['CCe'])
splitregr = SplitClassifier(
regr, partitioner=OddEvenPartitioner(),
enable_ca=['training_stats', 'stats'])
splitregr.train(ds)
split_corr = splitregr.ca.stats.stats['CCe']
split_corr_tr = splitregr.ca.training_stats.stats['CCe']
for confusion, error in (
(cve.ca.stats, corr),
(splitregr.ca.stats, split_corr),
(splitregr.ca.training_stats, split_corr_tr),
):
#TODO: test confusion statistics
# Part of it for now -- CCe
for conf in confusion.summaries:
stats = conf.stats
if cfg.getboolean('tests', 'labile', default='yes'):
self.assertTrue(stats['CCe'] < 0.5)
self.assertEqual(stats['CCe'], stats['Summary CCe'])
s0 = confusion.as_string(short=True)
s1 = confusion.as_string(short=False)
for s in [s0, s1]:
self.assertTrue(len(s) > 10,
msg="We should get some string representation "
"of regression summary. Got %s" % s)
if cfg.getboolean('tests', 'labile', default='yes'):
self.assertTrue(error < 0.2,
msg="Regressions should perform well on a simple "
"dataset. Got correlation error of %s " % error)
# Test access to summary statistics
# YOH: lets start making testing more reliable.
# p-value for such accident to have is verrrry tiny,
# so if regression works -- it better has at least 0.5 ;)
# otherwise fix it! ;)
# YOH: not now -- issues with libsvr in SG and linear kernel
if cfg.getboolean('tests', 'labile', default='yes'):
self.assertTrue(confusion.stats['CCe'] < 0.5)
# just to check if it works fine
split_predictions = splitregr.predict(ds.samples)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:66,代码来源:test_regr.py
注:本文中的mvpa2.clfs.meta.SplitClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论