本文整理汇总了Python中mvpa.datasets.base.dataset_wizard函数的典型用法代码示例。如果您正苦于以下问题:Python dataset_wizard函数的具体用法?Python dataset_wizard怎么用?Python dataset_wizard使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dataset_wizard函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_correct_dimensions_order
def test_correct_dimensions_order(self, clf):
"""To check if known/present Classifiers are working properly
with samples being first dimension. Started to worry about
possible problems while looking at sg where samples are 2nd
dimension
"""
# specially crafted dataset -- if dimensions are flipped over
# the same storage, problem becomes unseparable. Like in this case
# incorrect order of dimensions lead to equal samples [0, 1, 0]
traindatas = [
dataset_wizard(samples=np.array([ [0, 0, 1.0],
[1, 0, 0] ]), targets=[0, 1]),
dataset_wizard(samples=np.array([ [0, 0.0],
[1, 1] ]), targets=[0, 1])]
clf.ca.change_temporarily(enable_ca = ['training_stats'])
for traindata in traindatas:
clf.train(traindata)
self.failUnlessEqual(clf.ca.training_stats.percent_correct, 100.0,
"Classifier %s must have 100%% correct learning on %s. Has %f" %
(`clf`, traindata.samples, clf.ca.training_stats.percent_correct))
# and we must be able to predict every original sample thus
for i in xrange(traindata.nsamples):
sample = traindata.samples[i,:]
predicted = clf.predict([sample])
self.failUnlessEqual([predicted], traindata.targets[i],
"We must be able to predict sample %s using " % sample +
"classifier %s" % `clf`)
clf.ca.reset_changed_temporarily()
开发者ID:esc,项目名称:PyMVPA,代码行数:30,代码来源:test_clf.py
示例2: test_feature_selection_classifier
def test_feature_selection_classifier(self):
from mvpa.featsel.base import \
SensitivityBasedFeatureSelection
from mvpa.featsel.helpers import \
FixedNElementTailSelector
# should give lowest weight to the feature with lowest index
sens_ana = SillySensitivityAnalyzer()
# should give lowest weight to the feature with highest index
sens_ana_rev = SillySensitivityAnalyzer(mult=-1)
# corresponding feature selections
feat_sel = SensitivityBasedFeatureSelection(sens_ana,
FixedNElementTailSelector(1, mode='discard'))
feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev,
FixedNElementTailSelector(1))
samples = np.array([ [0, 0, -1], [1, 0, 1], [-1, -1, 1],
[-1, 0, 1], [1, -1, 1] ])
testdata3 = dataset_wizard(samples=samples, targets=1)
# dummy train data so proper mapper gets created
traindata = dataset_wizard(samples=np.array([ [0, 0, -1], [1, 0, 1] ]),
targets=[1, 2])
# targets
res110 = [1, 1, 1, -1, -1]
res011 = [-1, 1, -1, 1, -1]
# first classifier -- 0th feature should be discarded
clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel,
enable_ca=['feature_ids'])
self.clf_sign.ca.change_temporarily(enable_ca=['estimates'])
clf011.train(traindata)
self.failUnlessEqual(clf011.predict(testdata3.samples), res011)
# just silly test if we get values assigned in the 'ProxyClassifier'
self.failUnless(len(clf011.ca.estimates) == len(res110),
msg="We need to pass values into ProxyClassifier")
self.clf_sign.ca.reset_changed_temporarily()
self.failUnlessEqual(clf011.mapper._oshape, (2,))
"Feature selection classifier had to be trained on 2 features"
# first classifier -- last feature should be discarded
clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
clf011.train(traindata)
self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
开发者ID:esc,项目名称:PyMVPA,代码行数:50,代码来源:test_clf.py
示例3: test_coarsen_chunks
def test_coarsen_chunks(self):
"""Just basic testing for now"""
chunks = [1,1,2,2,3,3,4,4]
ds = dataset_wizard(samples=np.arange(len(chunks)).reshape(
(len(chunks),1)), targets=[1]*8, chunks=chunks)
coarsen_chunks(ds, nchunks=2)
chunks1 = coarsen_chunks(chunks, nchunks=2)
self.failUnless((chunks1 == ds.chunks).all())
self.failUnless((chunks1 == np.asarray([0,0,0,0,1,1,1,1])).all())
ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape(
(len(chunks),1)), targets=[1]*8, chunks=range(len(chunks)))
coarsen_chunks(ds2, nchunks=2)
self.failUnless((chunks1 == ds.chunks).all())
开发者ID:B-Rich,项目名称:PyMVPA,代码行数:14,代码来源:test_datasetfx.py
示例4: test_feature_selection_classifier_with_regression
def test_feature_selection_classifier_with_regression(self):
from mvpa.featsel.base import \
SensitivityBasedFeatureSelection
from mvpa.featsel.helpers import \
FixedNElementTailSelector
if sample_clf_reg is None:
# none regression was found, so nothing to test
return
# should give lowest weight to the feature with lowest index
sens_ana = SillySensitivityAnalyzer()
# corresponding feature selections
feat_sel = SensitivityBasedFeatureSelection(sens_ana,
FixedNElementTailSelector(1, mode='discard'))
# now test with regression-based classifier. The problem is
# that it is determining predictions twice from values and
# then setting the values from the results, which the second
# time is set to predictions. The final outcome is that the
# values are actually predictions...
dat = dataset_wizard(samples=np.random.randn(4, 10),
targets=[-1, -1, 1, 1])
clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel)
clf_reg.train(dat)
_ = clf_reg.predict(dat.samples)
self.failIf((np.array(clf_reg.ca.estimates)
- clf_reg.ca.predictions).sum()==0,
msg="Values were set to the predictions in %s." %
sample_clf_reg)
开发者ID:esc,项目名称:PyMVPA,代码行数:29,代码来源:test_clf.py
示例5: dumb_feature_binary_dataset
def dumb_feature_binary_dataset():
"""Very simple binary (2 labels) dataset
"""
data = [
[1, 0],
[1, 1],
[2, 0],
[2, 1],
[3, 0],
[3, 1],
[4, 0],
[4, 1],
[5, 0],
[5, 1],
[6, 0],
[6, 1],
[7, 0],
[7, 1],
[8, 0],
[8, 1],
[9, 0],
[9, 1],
[10, 0],
[10, 1],
[11, 0],
[11, 1],
[12, 0],
[12, 1],
]
regs = ([0] * 12) + ([1] * 12)
return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
开发者ID:arokem,项目名称:PyMVPA,代码行数:32,代码来源:data_generators.py
示例6: dumb_feature_dataset
def dumb_feature_dataset():
"""Create a very simple dataset with 2 features and 3 labels
"""
data = [
[1, 0],
[1, 1],
[2, 0],
[2, 1],
[3, 0],
[3, 1],
[4, 0],
[4, 1],
[5, 0],
[5, 1],
[6, 0],
[6, 1],
[7, 0],
[7, 1],
[8, 0],
[8, 1],
[9, 0],
[9, 1],
[10, 0],
[10, 1],
[11, 0],
[11, 1],
[12, 0],
[12, 1],
]
regs = ([1] * 8) + ([2] * 8) + ([3] * 8)
return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
开发者ID:arokem,项目名称:PyMVPA,代码行数:32,代码来源:data_generators.py
示例7: linear1d_gaussian_noise
def linear1d_gaussian_noise(size=100, slope=0.5, intercept=1.0, x_min=-2.0, x_max=3.0, sigma=0.2):
"""A straight line with some Gaussian noise.
"""
x = np.linspace(start=x_min, stop=x_max, num=size)
noise = np.random.randn(size) * sigma
y = x * slope + intercept + noise
return dataset_wizard(samples=x[:, None], targets=y)
开发者ID:arokem,项目名称:PyMVPA,代码行数:7,代码来源:data_generators.py
示例8: test_origid_handling
def test_origid_handling():
ds = dataset_wizard(np.atleast_2d(np.arange(35)).T)
ds.init_origids('both')
ok_(ds.nsamples == 35)
assert_equal(len(np.unique(ds.sa.origids)), 35)
assert_equal(len(np.unique(ds.fa.origids)), 1)
selector = [3, 7, 10, 15]
subds = ds[selector]
assert_array_equal(subds.sa.origids, ds.sa.origids[selector])
# Now if we request new origids if they are present we could
# expect different behavior
assert_raises(ValueError, subds.init_origids, 'both', mode='raises')
sa_origids = subds.sa.origids.copy()
fa_origids = subds.fa.origids.copy()
for s in ('both', 'samples', 'features'):
assert_raises(RuntimeError, subds.init_origids, s, mode='raise')
subds.init_origids(s, mode='existing')
# we should have the same origids as before
assert_array_equal(subds.sa.origids, sa_origids)
assert_array_equal(subds.fa.origids, fa_origids)
# Lets now change, which should be default behavior
subds.init_origids('both')
assert_equal(len(sa_origids), len(subds.sa.origids))
assert_equal(len(fa_origids), len(subds.fa.origids))
# values should change though
ok_((sa_origids != subds.sa.origids).any())
ok_((fa_origids != subds.fa.origids).any())
开发者ID:geeragh,项目名称:PyMVPA,代码行数:29,代码来源:test_datasetng.py
示例9: test_idhash
def test_idhash():
ds = dataset_wizard(np.arange(12).reshape((4, 3)),
targets=1, chunks=1)
origid = ds.idhash
#XXX BUG -- no assurance that labels would become an array... for now -- do manually
ds.targets = np.array([3, 1, 2, 3]) # change all labels
ok_(origid != ds.idhash,
msg="Changing all targets should alter dataset's idhash")
origid = ds.idhash
z = ds.targets[1]
assert_equal(origid, ds.idhash,
msg="Accessing shouldn't change idhash")
z = ds.chunks
assert_equal(origid, ds.idhash,
msg="Accessing shouldn't change idhash")
z[2] = 333
ok_(origid != ds.idhash,
msg="Changing value in attribute should change idhash")
origid = ds.idhash
ds.samples[1, 1] = 1000
ok_(origid != ds.idhash,
msg="Changing value in data should change idhash")
origid = ds.idhash
orig_labels = ds.targets #.copy()
ds.permute_targets()
ok_(origid != ds.idhash,
msg="Permutation also changes idhash")
ds.targets = orig_labels
ok_(origid == ds.idhash,
msg="idhash should be restored after reassigning orig targets")
开发者ID:geeragh,项目名称:PyMVPA,代码行数:35,代码来源:test_datasetng.py
示例10: pure_multivariate_signal
def pure_multivariate_signal(patterns, signal2noise = 1.5, chunks=None, targets=[0, 1]):
""" Create a 2d dataset with a clear multivariate signal, but no
univariate information.
::
%%%%%%%%%
% O % X %
%%%%%%%%%
% X % O %
%%%%%%%%%
"""
# start with noise
data = np.random.normal(size=(4*patterns, 2))
# add signal
data[:2*patterns, 1] += signal2noise
data[2*patterns:4*patterns, 1] -= signal2noise
data[:patterns, 0] -= signal2noise
data[2*patterns:3*patterns, 0] -= signal2noise
data[patterns:2*patterns, 0] += signal2noise
data[3*patterns:4*patterns, 0] += signal2noise
# two conditions
regs = np.array((targets[0:1] * patterns) + (targets[1:2] * 2 * patterns) + (targets[0:1] * patterns))
if chunks is None:
chunks = range(len(data))
return dataset_wizard(samples=data, targets=regs, chunks=chunks)
开发者ID:esc,项目名称:PyMVPA,代码行数:31,代码来源:data_generators.py
示例11: test_aggregation
def test_aggregation(self):
data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1)
ag_data = aggregate_features(data, np.mean)
ok_(ag_data.nsamples == 4)
ok_(ag_data.nfeatures == 1)
assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
开发者ID:B-Rich,项目名称:PyMVPA,代码行数:8,代码来源:test_datasetfx.py
示例12: setUp
def setUp(self):
self.clf_sign = SameSignClassifier()
self.clf_less1 = Less1Classifier()
# simple binary dataset
self.data_bin_1 = dataset_wizard(
samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]],
targets=[1, 1, 1, -1, -1], # labels
chunks=[0, 1, 2, 2, 3]) # chunks
开发者ID:esc,项目名称:PyMVPA,代码行数:9,代码来源:test_clf.py
示例13: test_str
def test_str():
args = ( np.arange(12, dtype=np.int8).reshape((4, 3)),
range(4),
[1, 1, 2, 2])
for iargs in range(1, len(args)):
ds = dataset_wizard(*(args[:iargs]))
ds_s = str(ds)
ok_(ds_s.startswith('<Dataset: [email protected]'))
ok_(ds_s.endswith('>'))
开发者ID:geeragh,项目名称:PyMVPA,代码行数:9,代码来源:test_datasetng.py
示例14: test_mergeds2
def test_mergeds2():
"""Test composition of new datasets by addition of existing ones
"""
data = dataset_wizard([range(5)], targets=1, chunks=1)
assert_array_equal(data.UT, [1])
# simple sequence has to be a single pattern
assert_equal(data.nsamples, 1)
# check correct pattern layout (1x5)
assert_array_equal(data.samples, [[0, 1, 2, 3, 4]])
# check for single labels and origin
assert_array_equal(data.targets, [1])
assert_array_equal(data.chunks, [1])
# now try adding pattern with wrong shape
assert_raises(DatasetError,
data.append,
dataset_wizard(np.ones((2,3)), targets=1, chunks=1))
# now add two real patterns
dss = datasets['uni2large'].samples
data.append(dataset_wizard(dss[:2, :5], targets=2, chunks=2))
assert_equal(data.nfeatures, 5)
assert_array_equal(data.targets, [1, 2, 2])
assert_array_equal(data.chunks, [1, 2, 2])
# test automatic origins
data.append(dataset_wizard(dss[3:5, :5], targets=3, chunks=[0, 1]))
assert_array_equal(data.chunks, [1, 2, 2, 0, 1])
# test unique class labels
assert_array_equal(data.UT, [1, 2, 3])
# test wrong label length
assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[ 1, 2, 3 ],
chunks=2)
# test wrong origin length
assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[ 1, 2, 3, 4 ],
chunks=[ 2, 2, 2 ])
开发者ID:geeragh,项目名称:PyMVPA,代码行数:42,代码来源:test_datasetng.py
示例15: test_invar_features_removal
def test_invar_features_removal(self):
r = np.random.normal(size=(3,1))
ds = dataset_wizard(samples=np.hstack((np.zeros((3,2)), r)),
targets=1)
self.failUnless(ds.nfeatures == 3)
dsc = remove_invariant_features(ds)
self.failUnless(dsc.nfeatures == 1)
self.failUnless((dsc.samples == r).all())
开发者ID:B-Rich,项目名称:PyMVPA,代码行数:11,代码来源:test_datasetfx.py
示例16: sin_modulated
def sin_modulated(n_instances, n_features, flat=False, noise=0.4):
""" Generate a (quite) complex multidimensional non-linear dataset
Used for regression testing. In the data label is a sin of a x^2 +
uniform noise
"""
if flat:
data = np.arange(0.0, 1.0, 1.0 / n_instances) * np.pi
data.resize(n_instances, n_features)
else:
data = np.random.rand(n_instances, n_features) * np.pi
label = np.sin((data ** 2).sum(1)).round()
label += np.random.rand(label.size) * noise
return dataset_wizard(samples=data, targets=label)
开发者ID:arokem,项目名称:PyMVPA,代码行数:14,代码来源:data_generators.py
示例17: test_arrayattributes
def test_arrayattributes():
samples = np.arange(12).reshape((4, 3))
labels = range(4)
chunks = [1, 1, 2, 2]
ds = dataset_wizard(samples, labels, chunks)
for a in (ds.samples, ds.targets, ds.chunks):
ok_(isinstance(a, np.ndarray))
ds.targets = labels
ok_(isinstance(ds.targets, np.ndarray))
ds.chunks = chunks
ok_(isinstance(ds.chunks, np.ndarray))
开发者ID:geeragh,项目名称:PyMVPA,代码行数:14,代码来源:test_datasetng.py
示例18: test_combined_samplesfeature_selection
def test_combined_samplesfeature_selection():
data = dataset_wizard(np.arange(20).reshape((4, 5)).view(myarray),
targets=[1,2,3,4],
chunks=[5,6,7,8])
# array subclass survives
ok_(isinstance(data.samples, myarray))
ok_(data.nsamples == 4)
ok_(data.nfeatures == 5)
sel = data[[0, 3], [1, 2]]
ok_(sel.nsamples == 2)
ok_(sel.nfeatures == 2)
assert_array_equal(sel.targets, [1, 4])
assert_array_equal(sel.chunks, [5, 8])
assert_array_equal(sel.samples, [[1, 2], [16, 17]])
# array subclass survives
ok_(isinstance(sel.samples, myarray))
# should yield the same result if done sequentially
sel2 = data[:, [1, 2]]
sel2 = sel2[[0, 3]]
assert_array_equal(sel.samples, sel2.samples)
ok_(sel2.nsamples == 2)
ok_(sel2.nfeatures == 2)
# array subclass survives
ok_(isinstance(sel.samples, myarray))
assert_raises(ValueError, data.__getitem__, (1, 2, 3))
# test correct behavior when selecting just single rows/columns
single = data[0]
ok_(single.nsamples == 1)
ok_(single.nfeatures == 5)
assert_array_equal(single.samples, [[0, 1, 2, 3, 4]])
single = data[:, 0]
ok_(single.nsamples == 4)
ok_(single.nfeatures == 1)
assert_array_equal(single.samples, [[0], [5], [10], [15]])
single = data[1, 1]
ok_(single.nsamples == 1)
ok_(single.nfeatures == 1)
assert_array_equal(single.samples, [[6]])
# array subclass survives
ok_(isinstance(single.samples, myarray))
开发者ID:geeragh,项目名称:PyMVPA,代码行数:47,代码来源:test_datasetng.py
示例19: chirp_linear
def chirp_linear(n_instances, n_features=4, n_nonbogus_features=2, data_noise=0.4, noise=0.1):
""" Generates simple dataset for linear regressions
Generates chirp signal, populates n_nonbogus_features out of
n_features with it with different noise level and then provides
signal itself with additional noise as labels
"""
x = np.linspace(0, 1, n_instances)
y = np.sin((10 * np.pi * x ** 2))
data = np.random.normal(size=(n_instances, n_features)) * data_noise
for i in xrange(n_nonbogus_features):
data[:, i] += y[:]
labels = y + np.random.normal(size=(n_instances,)) * noise
return dataset_wizard(samples=data, targets=labels)
开发者ID:arokem,项目名称:PyMVPA,代码行数:17,代码来源:data_generators.py
示例20: test_samplesgroup_mapper
def test_samplesgroup_mapper():
data = np.arange(24).reshape(8,3)
labels = [0, 1] * 4
chunks = np.repeat(np.array((0,1)),4)
# correct results
csamples = [[3, 4, 5], [6, 7, 8], [15, 16, 17], [18, 19, 20]]
clabels = [0, 1, 0, 1]
cchunks = [0, 0, 1, 1]
ds = dataset_wizard(samples=data, targets=labels, chunks=chunks)
# add some feature attribute -- just to check
ds.fa['checker'] = np.arange(3)
ds.init_origids('samples')
m = mean_group_sample(['targets', 'chunks'])
mds = m.forward(ds)
assert_array_equal(mds.samples, csamples)
# FAs should simply remain the same
assert_array_equal(mds.fa.checker, np.arange(3))
# now without grouping
m = mean_sample()
# forwarding just the samples should yield the same result
assert_array_equal(m.forward(ds.samples),
m.forward(ds).samples)
# directly apply to dataset
# using untrained mapper
m = mean_group_sample(['targets', 'chunks'])
mapped = ds.get_mapped(m)
assert_equal(mapped.nsamples, 4)
assert_equal(mapped.nfeatures, 3)
assert_array_equal(mapped.samples, csamples)
assert_array_equal(mapped.targets, clabels)
assert_array_equal(mapped.chunks, cchunks)
# make sure origids get regenerated
assert_array_equal([s.count('+') for s in mapped.sa.origids], [1] * 4)
# disbalanced dataset -- lets remove 0th sample so there is no target
# 0 in 0th chunk
ds_ = ds[[0, 1, 3, 5]]
mapped = ds_.get_mapped(m)
ok_(len(mapped) == 3)
ok_(not None in mapped.sa.origids)
开发者ID:esc,项目名称:PyMVPA,代码行数:46,代码来源:test_fxmapper.py
注:本文中的mvpa.datasets.base.dataset_wizard函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论