本文整理汇总了Python中mvpa2.datasets.base.dataset_wizard函数的典型用法代码示例。如果您正苦于以下问题:Python dataset_wizard函数的具体用法?Python dataset_wizard怎么用?Python dataset_wizard使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dataset_wizard函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_correct_dimensions_order
def test_correct_dimensions_order(self, clf):
"""To check if known/present Classifiers are working properly
with samples being first dimension. Started to worry about
possible problems while looking at sg where samples are 2nd
dimension
"""
# specially crafted dataset -- if dimensions are flipped over
# the same storage, problem becomes unseparable. Like in this case
# incorrect order of dimensions lead to equal samples [0, 1, 0]
traindatas = [
dataset_wizard(samples=np.array([ [0, 0, 1.0],
[1, 0, 0] ]), targets=[0, 1]),
dataset_wizard(samples=np.array([ [0, 0.0],
[1, 1] ]), targets=[0, 1])]
clf.ca.change_temporarily(enable_ca = ['training_stats'])
for traindata in traindatas:
clf.train(traindata)
self.assertEqual(clf.ca.training_stats.percent_correct, 100.0,
"Classifier %s must have 100%% correct learning on %s. Has %f" %
(`clf`, traindata.samples, clf.ca.training_stats.percent_correct))
# and we must be able to predict every original sample thus
for i in xrange(traindata.nsamples):
sample = traindata.samples[i,:]
predicted = clf.predict([sample])
self.assertEqual([predicted], traindata.targets[i],
"We must be able to predict sample %s using " % sample +
"classifier %s" % `clf`)
clf.ca.reset_changed_temporarily()
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:30,代码来源:test_clf.py
示例2: test_feature_selection_classifier
def test_feature_selection_classifier(self):
from mvpa2.featsel.base import \
SensitivityBasedFeatureSelection
from mvpa2.featsel.helpers import \
FixedNElementTailSelector
# should give lowest weight to the feature with lowest index
sens_ana = SillySensitivityAnalyzer()
# should give lowest weight to the feature with highest index
sens_ana_rev = SillySensitivityAnalyzer(mult=-1)
# corresponding feature selections
feat_sel = SensitivityBasedFeatureSelection(sens_ana,
FixedNElementTailSelector(1, mode='discard'))
feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev,
FixedNElementTailSelector(1))
samples = np.array([ [0, 0, -1], [1, 0, 1], [-1, -1, 1],
[-1, 0, 1], [1, -1, 1] ])
testdata3 = dataset_wizard(samples=samples, targets=1)
# dummy train data so proper mapper gets created
traindata = dataset_wizard(samples=np.array([ [0, 0, -1], [1, 0, 1] ]),
targets=[1, 2])
# targets
res110 = [1, 1, 1, -1, -1]
res011 = [-1, 1, -1, 1, -1]
# first classifier -- 0th feature should be discarded
clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel,
enable_ca=['feature_ids'])
self.clf_sign.ca.change_temporarily(enable_ca=['estimates'])
clf011.train(traindata)
self.assertEqual(clf011.predict(testdata3.samples), res011)
# just silly test if we get values assigned in the 'ProxyClassifier'
self.assertTrue(len(clf011.ca.estimates) == len(res110),
msg="We need to pass values into ProxyClassifier")
self.clf_sign.ca.reset_changed_temporarily()
self.assertEqual(clf011.mapper._oshape, (2,))
"Feature selection classifier had to be trained on 2 features"
# first classifier -- last feature should be discarded
clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
clf011.train(traindata)
self.assertEqual(clf011.predict(testdata3.samples), res110)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:50,代码来源:test_clf.py
示例3: test_coarsen_chunks
def test_coarsen_chunks(self):
"""Just basic testing for now"""
chunks = [1,1,2,2,3,3,4,4]
ds = dataset_wizard(samples=np.arange(len(chunks)).reshape(
(len(chunks),1)), targets=[1]*8, chunks=chunks)
coarsen_chunks(ds, nchunks=2)
chunks1 = coarsen_chunks(chunks, nchunks=2)
self.assertTrue((chunks1 == ds.chunks).all())
self.assertTrue((chunks1 == np.asarray([0,0,0,0,1,1,1,1])).all())
ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape(
(len(chunks),1)), targets=[1]*8, chunks=range(len(chunks)))
coarsen_chunks(ds2, nchunks=2)
self.assertTrue((chunks1 == ds.chunks).all())
开发者ID:armaneshaghi,项目名称:PyMVPA,代码行数:14,代码来源:test_datasetfx.py
示例4: dumb_feature_binary_dataset
def dumb_feature_binary_dataset():
"""Very simple binary (2 labels) dataset
"""
data = [
[1, 0],
[1, 1],
[2, 0],
[2, 1],
[3, 0],
[3, 1],
[4, 0],
[4, 1],
[5, 0],
[5, 1],
[6, 0],
[6, 1],
[7, 0],
[7, 1],
[8, 0],
[8, 1],
[9, 0],
[9, 1],
[10, 0],
[10, 1],
[11, 0],
[11, 1],
[12, 0],
[12, 1],
]
regs = ([0] * 12) + ([1] * 12)
return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
开发者ID:reka-daniel,项目名称:PyMVPA,代码行数:32,代码来源:data_generators.py
示例5: dumb_feature_dataset
def dumb_feature_dataset():
"""Create a very simple dataset with 2 features and 3 labels
"""
data = [
[1, 0],
[1, 1],
[2, 0],
[2, 1],
[3, 0],
[3, 1],
[4, 0],
[4, 1],
[5, 0],
[5, 1],
[6, 0],
[6, 1],
[7, 0],
[7, 1],
[8, 0],
[8, 1],
[9, 0],
[9, 1],
[10, 0],
[10, 1],
[11, 0],
[11, 1],
[12, 0],
[12, 1],
]
regs = ([1] * 8) + ([2] * 8) + ([3] * 8)
return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
开发者ID:reka-daniel,项目名称:PyMVPA,代码行数:32,代码来源:data_generators.py
示例6: linear1d_gaussian_noise
def linear1d_gaussian_noise(size=100, slope=0.5, intercept=1.0, x_min=-2.0, x_max=3.0, sigma=0.2):
"""A straight line with some Gaussian noise.
"""
x = np.linspace(start=x_min, stop=x_max, num=size)
noise = np.random.randn(size) * sigma
y = x * slope + intercept + noise
return dataset_wizard(samples=x[:, None], targets=y)
开发者ID:reka-daniel,项目名称:PyMVPA,代码行数:7,代码来源:data_generators.py
示例7: test_DissimilarityConsistencyMeasure
def test_DissimilarityConsistencyMeasure():
targets = np.tile(xrange(3),2)
chunks = np.repeat(np.array((0,1)),3)
# correct results
cres1 = 0.41894348
cres2 = np.array([[ 0.16137995, 0.73062639, 0.59441713]])
dc1 = data[0:3,:] - np.mean(data[0:3,:],0)
dc2 = data[3:6,:] - np.mean(data[3:6,:],0)
center = squareform(np.corrcoef(pdist(dc1,'correlation'),pdist(dc2,'correlation')),
checks=False).reshape((1,-1))
dsm1 = stats.rankdata(pdist(data[0:3,:],'correlation').reshape((1,-1)))
dsm2 = stats.rankdata(pdist(data[3:6,:],'correlation').reshape((1,-1)))
spearman = squareform(np.corrcoef(np.vstack((dsm1,dsm2))),
checks=False).reshape((1,-1))
ds = dataset_wizard(samples=data, targets=targets, chunks=chunks)
dscm = DissimilarityConsistencyMeasure()
res1 = dscm(ds)
dscm_c = DissimilarityConsistencyMeasure(center_data=True)
res2 = dscm_c(ds)
dscm_sp = DissimilarityConsistencyMeasure(consistency_metric='spearman')
res3 = dscm_sp(ds)
ds.append(ds)
chunks = np.repeat(np.array((0,1,2,)),4)
ds.sa['chunks'] = chunks
res4 = dscm(ds)
assert_almost_equal(np.mean(res1.samples),cres1)
assert_array_almost_equal(res2.samples, center)
assert_array_almost_equal(res3.samples, spearman)
assert_array_almost_equal(res4.samples,cres2)
开发者ID:mfalkiewicz,项目名称:PyMVPA,代码行数:31,代码来源:test_rsa.py
示例8: test_PDist
def test_PDist():
targets = np.tile(xrange(3),2)
chunks = np.repeat(np.array((0,1)),3)
ds = dataset_wizard(samples=data, targets=targets, chunks=chunks)
data_c = data - np.mean(data,0)
# DSM matrix elements should come out as samples of one feature
# to be in line with what e.g. a classifier returns -- facilitates
# collection in a searchlight ...
euc = pdist(data, 'euclidean')[None].T
pear = pdist(data, 'correlation')[None].T
city = pdist(data, 'cityblock')[None].T
center_sq = squareform(pdist(data_c,'correlation'))
# Now center each chunk separately
dsm1 = PDist()
dsm2 = PDist(pairwise_metric='euclidean')
dsm3 = PDist(pairwise_metric='cityblock')
dsm4 = PDist(center_data=True,square=True)
assert_array_almost_equal(dsm1(ds).samples,pear)
assert_array_almost_equal(dsm2(ds).samples,euc)
dsm_res = dsm3(ds)
assert_array_almost_equal(dsm_res.samples,city)
# length correspondings to a single triangular matrix
assert_equal(len(dsm_res.sa.pairs), len(ds) * (len(ds) - 1) / 2)
# generate label pairs actually reflect the vectorform generated by
# squareform()
dsm_res_square = squareform(dsm_res.samples.T[0])
for i, p in enumerate(dsm_res.sa.pairs):
assert_equal(dsm_res_square[p[0], p[1]], dsm_res.samples[i, 0])
dsm_res = dsm4(ds)
assert_array_almost_equal(dsm_res.samples,center_sq)
# sample attributes are carried over
assert_almost_equal(ds.sa.targets, dsm_res.sa.targets)
开发者ID:Arthurkorn,项目名称:PyMVPA,代码行数:33,代码来源:test_rsa.py
示例9: _test_mcasey20120222
def _test_mcasey20120222(): # pragma: no cover
# http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html
# This one is conditioned on allowing # of samples to be changed
# by the mapper provided to MappedClassifier. See
# https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples
import numpy as np
from mvpa2.datasets.base import dataset_wizard
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.mappers.base import ChainMapper
from mvpa2.mappers.svd import SVDMapper
from mvpa2.mappers.fx import mean_group_sample
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.clfs.meta import MappedClassifier
from mvpa2.measures.base import CrossValidation
mapper = ChainMapper([mean_group_sample(['targets','chunks']),
SVDMapper()])
clf = MappedClassifier(LinearCSVMC(), mapper)
cvte = CrossValidation(clf, NFoldPartitioner(),
enable_ca=['repetition_results', 'stats'])
ds = dataset_wizard(
samples=np.arange(32).reshape((8, -1)),
targets=[1, 1, 2, 2, 1, 1, 2, 2],
chunks=[1, 1, 1, 1, 2, 2, 2, 2])
errors = cvte(ds)
开发者ID:beausievers,项目名称:PyMVPA,代码行数:29,代码来源:test_usecases.py
示例10: test_feature_selection_classifier_with_regression
def test_feature_selection_classifier_with_regression(self):
from mvpa2.featsel.base import \
SensitivityBasedFeatureSelection
from mvpa2.featsel.helpers import \
FixedNElementTailSelector
if sample_clf_reg is None:
# none regression was found, so nothing to test
return
# should give lowest weight to the feature with lowest index
sens_ana = SillySensitivityAnalyzer()
# corresponding feature selections
feat_sel = SensitivityBasedFeatureSelection(sens_ana,
FixedNElementTailSelector(1, mode='discard'))
# now test with regression-based classifier. The problem is
# that it is determining predictions twice from values and
# then setting the values from the results, which the second
# time is set to predictions. The final outcome is that the
# values are actually predictions...
dat = dataset_wizard(samples=np.random.randn(4, 10),
targets=[-1, -1, 1, 1])
clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel)
clf_reg.train(dat)
_ = clf_reg.predict(dat.samples)
self.failIf((np.array(clf_reg.ca.estimates)
- clf_reg.ca.predictions).sum()==0,
msg="Values were set to the predictions in %s." %
sample_clf_reg)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:29,代码来源:test_clf.py
示例11: test_mapper_vs_zscore
def test_mapper_vs_zscore():
"""Test by comparing to results of elderly z-score function
"""
# data: 40 sample feature line in 20d space (40x20; samples x features)
dss = [
dataset_wizard(np.concatenate(
[np.arange(40) for i in range(20)]).reshape(20,-1).T,
targets=1, chunks=1),
] + datasets.values()
for ds in dss:
ds1 = deepcopy(ds)
ds2 = deepcopy(ds)
zsm = ZScoreMapper(chunks_attr=None)
assert_raises(RuntimeError, zsm.forward, ds1.samples)
idhashes = (idhash(ds1), idhash(ds1.samples))
zsm.train(ds1)
idhashes_train = (idhash(ds1), idhash(ds1.samples))
assert_equal(idhashes, idhashes_train)
# forward dataset
ds1z_ds = zsm.forward(ds1)
idhashes_forwardds = (idhash(ds1), idhash(ds1.samples))
# must not modify samples in place!
assert_equal(idhashes, idhashes_forwardds)
# forward samples explicitly
ds1z = zsm.forward(ds1.samples)
idhashes_forward = (idhash(ds1), idhash(ds1.samples))
assert_equal(idhashes, idhashes_forward)
zscore(ds2, chunks_attr=None)
assert_array_almost_equal(ds1z, ds2.samples)
assert_array_equal(ds1.samples, ds.samples)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:35,代码来源:test_zscoremapper.py
示例12: test_origid_handling
def test_origid_handling():
ds = dataset_wizard(np.atleast_2d(np.arange(35)).T)
ds.init_origids('both')
ok_(ds.nsamples == 35)
assert_equal(len(np.unique(ds.sa.origids)), 35)
assert_equal(len(np.unique(ds.fa.origids)), 1)
selector = [3, 7, 10, 15]
subds = ds[selector]
assert_array_equal(subds.sa.origids, ds.sa.origids[selector])
# Now if we request new origids if they are present we could
# expect different behavior
assert_raises(ValueError, subds.init_origids, 'both', mode='raises')
sa_origids = subds.sa.origids.copy()
fa_origids = subds.fa.origids.copy()
for s in ('both', 'samples', 'features'):
assert_raises(RuntimeError, subds.init_origids, s, mode='raise')
subds.init_origids(s, mode='existing')
# we should have the same origids as before
assert_array_equal(subds.sa.origids, sa_origids)
assert_array_equal(subds.fa.origids, fa_origids)
# Lets now change, which should be default behavior
subds.init_origids('both')
assert_equal(len(sa_origids), len(subds.sa.origids))
assert_equal(len(fa_origids), len(subds.fa.origids))
# values should change though
ok_((sa_origids != subds.sa.origids).any())
ok_((fa_origids != subds.fa.origids).any())
开发者ID:psederberg,项目名称:PyMVPA,代码行数:29,代码来源:test_datasetng.py
示例13: test_idhash
def test_idhash():
ds = dataset_wizard(np.arange(12).reshape((4, 3)),
targets=1, chunks=1)
origid = ds.idhash
#XXX BUG -- no assurance that labels would become an array... for now -- do manually
ds.targets = np.array([3, 1, 2, 3]) # change all labels
ok_(origid != ds.idhash,
msg="Changing all targets should alter dataset's idhash")
origid = ds.idhash
z = ds.targets[1]
assert_equal(origid, ds.idhash,
msg="Accessing shouldn't change idhash")
z = ds.chunks
assert_equal(origid, ds.idhash,
msg="Accessing shouldn't change idhash")
z[2] = 333
ok_(origid != ds.idhash,
msg="Changing value in attribute should change idhash")
origid = ds.idhash
ds.samples[1, 1] = 1000
ok_(origid != ds.idhash,
msg="Changing value in data should change idhash")
origid = ds.idhash
orig_labels = ds.targets #.copy()
ds.sa.targets = range(len(ds))
ok_(origid != ds.idhash,
msg="Chaging attribute also changes idhash")
ds.targets = orig_labels
ok_(origid == ds.idhash,
msg="idhash should be restored after reassigning orig targets")
开发者ID:psederberg,项目名称:PyMVPA,代码行数:35,代码来源:test_datasetng.py
示例14: pure_multivariate_signal
def pure_multivariate_signal(patterns, signal2noise = 1.5, chunks=None, targets=[0, 1]):
""" Create a 2d dataset with a clear multivariate signal, but no
univariate information.
::
%%%%%%%%%
% O % X %
%%%%%%%%%
% X % O %
%%%%%%%%%
"""
# start with noise
data = np.random.normal(size=(4*patterns, 2))
# add signal
data[:2*patterns, 1] += signal2noise
data[2*patterns:4*patterns, 1] -= signal2noise
data[:patterns, 0] -= signal2noise
data[2*patterns:3*patterns, 0] -= signal2noise
data[patterns:2*patterns, 0] += signal2noise
data[3*patterns:4*patterns, 0] += signal2noise
# two conditions
regs = np.array((targets[0:1] * patterns) + (targets[1:2] * 2 * patterns) + (targets[0:1] * patterns))
if chunks is None:
chunks = range(len(data))
return dataset_wizard(samples=data, targets=regs, chunks=chunks)
开发者ID:psederberg,项目名称:PyMVPA,代码行数:31,代码来源:data_generators.py
示例15: test_aggregation
def test_aggregation(self):
data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1)
ag_data = aggregate_features(data, np.mean)
ok_(ag_data.nsamples == 4)
ok_(ag_data.nfeatures == 1)
assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
开发者ID:armaneshaghi,项目名称:PyMVPA,代码行数:8,代码来源:test_datasetfx.py
示例16: setUp
def setUp(self):
self.clf_sign = SameSignClassifier()
self.clf_less1 = Less1Classifier()
# simple binary dataset
self.data_bin_1 = dataset_wizard(
samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]],
targets=[1, 1, 1, -1, -1], # labels
chunks=[0, 1, 2, 2, 3]) # chunks
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:9,代码来源:test_clf.py
示例17: test_nfold_random_counted_selection_partitioner_huge
def test_nfold_random_counted_selection_partitioner_huge(self):
# Just test that it completes in a reasonable time and does
# not blow up as if would do if it was not limited by count
kwargs = dict(count=10)
ds = dataset_wizard(np.arange(1000).reshape((-1, 1)), targets=range(1000), chunks=range(500) * 2)
split_partitions_random = [
tuple(x.sa.partitions) for x in NFoldPartitioner(100, selection_strategy="random", **kwargs).generate(ds)
]
assert_equal(len(split_partitions_random), 10) # we get just 10
开发者ID:pckillerbrici,项目名称:PyMVPA,代码行数:9,代码来源:test_splitter.py
示例18: test_str
def test_str():
args = ( np.arange(12, dtype=np.int8).reshape((4, 3)),
range(4),
[1, 1, 2, 2])
for iargs in range(1, len(args)):
ds = dataset_wizard(*(args[:iargs]))
ds_s = str(ds)
ok_(ds_s.startswith('<Dataset: [email protected]'))
ok_(ds_s.endswith('>'))
开发者ID:psederberg,项目名称:PyMVPA,代码行数:9,代码来源:test_datasetng.py
示例19: test_nonfinite_features_removal
def test_nonfinite_features_removal(self):
r = np.random.normal(size=(4, 5))
ds = dataset_wizard(r, targets=1, chunks=1)
ds.samples[2,0]=np.NaN
ds.samples[3,3]=np.Inf
dsc = remove_nonfinite_features(ds)
self.assertTrue(dsc.nfeatures == 3)
assert_array_equal(ds[:, [1, 2, 4]].samples, dsc.samples)
开发者ID:Anhmike,项目名称:PyMVPA,代码行数:10,代码来源:test_datasetfx.py
示例20: test_invar_features_removal
def test_invar_features_removal(self):
r = np.random.normal(size=(3, 1))
ds = dataset_wizard(samples=np.hstack((np.zeros((3, 2)), r)), targets=1)
self.failUnless(ds.nfeatures == 3)
dsc = remove_invariant_features(ds)
self.failUnless(dsc.nfeatures == 1)
self.failUnless((dsc.samples == r).all())
开发者ID:psederberg,项目名称:PyMVPA,代码行数:10,代码来源:test_datasetfx.py
注:本文中的mvpa2.datasets.base.dataset_wizard函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论