本文整理汇总了Python中pySPACE.resources.dataset_defs.base.BaseDataset类的典型用法代码示例。如果您正苦于以下问题:Python BaseDataset类的具体用法?Python BaseDataset怎么用?Python BaseDataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BaseDataset类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _merge_pickle_files
def _merge_pickle_files(self, target_collection_path, source_collection_pathes,
train_set_name_suffix, target_collection_params):
""" Merge all collections in source_collection_pathes and store them \
in the target collection"""
# load a first collection, in which the data of all other collections
# is assembled
target_collection = BaseDataset.load(source_collection_pathes[0])
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.",level=logging.WARNING)
date = time.strftime("%Y%m%d_%H_%M_%S")
# Delete node_chain file name
try:
target_collection.meta_data.pop("node_chain_file_name")
except:
pass
# Update meta data and store it
k = "test" if self.reverse else "train"
target_collection_params["__INPUT_DATASET__"][k] = \
[s_c_p.split(os.sep)[-2] for s_c_p in source_collection_pathes]
target_collection_params["__RESULT_DIRECTORY__"] = self.result_directory
target_collection.meta_data.update({
"author" : author,
"date" : date,
"dataset_directory" : target_collection_path,
"train_test" : True,
"parameter_setting" : target_collection_params,
"input_collection_name" : source_collection_pathes[0][len(
pySPACE.configuration.storage):]
})
# merge data of all other collections to target collection
for source_collection_path in source_collection_pathes[1:]:
source_collection = BaseDataset.load(source_collection_path)
for run in source_collection.get_run_numbers():
for split in source_collection.get_split_numbers():
data = source_collection.get_data(run, split,
train_set_name_suffix)
target_data = target_collection.get_data(run, split,
train_set_name_suffix)
# actual data is stored in a list that has to be extended
target_data.extend(data)
# if only test data was given, the "Rest_vs" collection is stored as
# training data
if not self.reverse and "test" == train_set_name_suffix:
# exchange the "test" in key tuple to "train" before storing
for key in target_collection.data.keys():
assert("test" == key[2])
value = target_collection.data.pop(key)
key = (key[0],key[1],"train")
target_collection.data[key] = value
target_collection.store(target_collection_path)
开发者ID:BioinformaticsArchive,项目名称:pyspace,代码行数:57,代码来源:merge.py
示例2: store
def store(self, result_dir, s_format = "None"):
if not s_format == "None":
self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
return
# Update the meta data
author = get_author()
self.update_meta_data({"type": "only output of individual nodes stored",
"storage_format": s_format,
"author" : author,
"data_pattern": "no data stored"})
# Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
开发者ID:MMKrell,项目名称:pyspace,代码行数:13,代码来源:dummy.py
示例3: __init__
def __init__(self, dataset_dir, command_template, parametrization,
run_number, split_number, operation_result_dir,
hide_parameters = []):
super(WEKAFilterProcess, self).__init__()
# Determine the directory in which the of the process' results
# are stored
result_collection_name = dataset_dir.split(os.sep)[-2]
for parameter_name, parameter_value in parametrization.iteritems():
# If this is a parameter that should not be hidden, then we have to
# encode it in the result collection name
if not parameter_name in hide_parameters:
result_collection_name += "{__%s__:%s}" % (parameter_name.upper(),
parameter_value)
self.result_directory = os.path.join(operation_result_dir,
result_collection_name)
# Create directory for intermediate results if it does not exist yet
create_directory(self.result_directory
+ os.sep + "data_run%s" % run_number)
# Create collection
collection = BaseDataset.load(dataset_dir)
# The parametrization that is independent of the collection type
# and the specific weka command template that is executed
self.params = {"dataset_name": dataset_dir.replace('/','_'),
"dataset_dir": dataset_dir,
"run_number": run_number,
"split_number": split_number,
"weka_class_path": pySPACE.configuration.weka_class_path,
"temp_results": self.result_directory}
# Load the abbreviations
abbreviations_file = open(os.path.join(pySPACE.configuration.spec_dir,
'operations/weka_templates',
'abbreviations.yaml'), 'r')
self.abbreviations = yaml.load(abbreviations_file)
# Add custom parameters for the weka command template
for parameter_name, parameter_value in parametrization.iteritems():
# Auto-expand abbreviations
if parameter_value in self.abbreviations:
parameter_value = self.abbreviations[parameter_value]
self.params[parameter_name] = parameter_value
# Build the WEKA command by repeatedly replacing all placeholders in
# the template
while True:
instantiated_template = command_template % self.params
if instantiated_template == command_template:
# All placeholders replace
self.weka_command = instantiated_template
break
else:
# We have to continue since we are not converged
command_template = instantiated_template
self.handler_class = None
开发者ID:Crespo911,项目名称:pyspace,代码行数:60,代码来源:weka_filter.py
示例4: _createProcesses
def _createProcesses(cls, processes, result_directory, operation_spec,
parameter_settings, input_collections, command_template):
# For each combination of classifier, input-collection and
# run number, create one WEKA_process
for dataset_dir in input_collections:
collection = BaseDataset.load(dataset_dir)
# Determine the number of iterations and splits to be used
iterations = collection.meta_data["runs"]
splits = collection.meta_data["splits"]
if "runs" in operation_spec:
assert(iterations in [1, operation_spec["runs"]])
iterations = operation_spec["runs"]
if "cv_folds" in operation_spec:
assert(splits in [1, operation_spec["cv_folds"]])
splits = operation_spec["cv_folds"]
for parametrization in parameter_settings:
for run_number in range(iterations):
process = WEKAClassificationProcess(dataset_dir,
command_template,
parametrization,
splits,
run_number,
result_directory)
processes.put(process)
# give executing process the sign that creation is now finished
processes.put(False)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:28,代码来源:weka_classification.py
示例5: _copy_file
def _copy_file(self, source_collection_path, target_collection_path,
train_set_name_suffix):
""" Copy a dataset to a new destination
**Parameters**
:source_collection_path:
The path to the dataset that has to be copied.
:target_collection_path:
The path to where the dataset should be copied.
:train_set_name_suffix:
Either 'train' or 'test'. Specifies if the target dataset is
handeled as training or testing data.
"""
source_collection = BaseDataset.load(source_collection_path)
# if only test data was given, the "Rest_vs" collection is stored as
# training data
if self.reverse and "test" == train_set_name_suffix:
# exchange the "test" in key tuple to "train" before storing
for key in source_collection.data.keys():
assert("test" == key[2])
value = source_collection.data.pop(key)
key = (key[0],key[1],"train")
source_collection.data[key] = value
# we store the data in the same format as before
source_collection.store(target_collection_path,
source_collection.meta_data["storage_format"])
开发者ID:MMKrell,项目名称:pyspace,代码行数:29,代码来源:merge.py
示例6: store
def store(self, result_dir, s_format = "None"):
if not s_format == "None":
self._log("The format %s is not supported!"%s_format, level=logging.CRITICAL)
return
# Update the meta data
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.",level=logging.WARNING)
self.update_meta_data({"type": "only output of individual nodes stored",
"storage_format": s_format,
"author" : author,
"data_pattern": "no data stored"})
# Store meta data
BaseDataset.store_meta_data(result_dir,self.meta_data)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:17,代码来源:dummy.py
示例7: create
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]):
"""
A factory method that creates an WEKA operation based on the
information given in the operation specification operation_spec
"""
assert(operation_spec["type"] == "weka_classification")
# Determine all parameter combinations that should be tested
parameter_settings = cls._get_parameter_space(operation_spec)
# Read the command template from a file
template_file = open(os.path.join(pySPACE.configuration.spec_dir,
"operations",
"weka_templates",
operation_spec["template"]),
'r')
command_template = template_file.read()
template_file.close()
# number of processes
if "runs" in operation_spec:
number_processes = len(input_paths) * len(parameter_settings) * \
operation_spec["runs"]
else: # approximate the number of processes
runs = []
for dataset_dir in input_paths:
collection = BaseDataset.load(dataset_dir)
runs.append(collection.meta_data["runs"])
runs = max(runs)
number_processes = len(input_paths) * len(parameter_settings) * \
runs
if debug == True:
# To better debug creation of processes we don't limit the queue
# and create all processes before executing them
processes = processing.Queue()
cls._createProcesses(processes, result_directory, operation_spec,
parameter_settings, input_paths,
command_template)
# create and return the weka operation object
return cls(processes, operation_spec, result_directory,
number_processes)
else:
# Create all processes by calling a recursive helper method in
# another thread so that already created processes can be executed in
# parallel. Therefore a queue is used which size is maximized to
# guarantee that not to much objects are created (because this costs
# memory). However, the actual number of 100 is arbitrary and might
# be reviewed.
processes = processing.Queue(100)
create_process = processing.Process(target=cls._createProcesses,
args=( processes, result_directory, operation_spec,
parameter_settings, input_paths,
command_template))
create_process.start()
# create and return the weka operation object
return cls(processes, operation_spec, result_directory,
number_processes, create_process)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:57,代码来源:weka_classification.py
示例8: _get_result_dataset_dir
def _get_result_dataset_dir(base_dir, input_dataset_dir,
parameter_setting, hide_parameters):
""" Determines the name of the result directory
Determines the name of the result directory based on the
input_dataset_dir, the node_chain_name and the parameter setting.
"""
input_name = input_dataset_dir.strip(os.sep).split(os.sep)[-1]
input_name = input_name.strip("{}")
# If the input is already the result of an operation
if input_name.count("}{") > 0:
input_name_parts = input_name.split("}{")
input_name = input_name_parts[0]
# Load the input meta data
dataset_dir = os.sep.join([pySPACE.configuration.storage,
input_dataset_dir])
dataset_md = BaseDataset.load_meta_data(dataset_dir)
# We are going to change the parameter_setting and don't want to
# interfere with later runs so we work on a copy
parameter_setting = copy.deepcopy(parameter_setting)
# Ignore pseudo parameter "__PREPARE_OPERATION__"
if "__PREPARE_OPERATION__" in parameter_setting:
parameter_setting.pop("__PREPARE_OPERATION__")
# Add the input parameters meta data to the given parameter setting
if "parameter_setting" in dataset_md:
parameter_setting.update(dataset_md["parameter_setting"])
# We have to remove ' characters from the parameter value since
# Weka does ignore them
for key, value in parameter_setting.iteritems():
if isinstance(value, basestring) and value.count("'") > 1:
parameter_setting[key] = eval(value)
# Determine the result_directory name
# String between Key and value changed from ":" to "#",
# because ot problems in windows and with windows file servers
parameter_str = "}{".join(("%s#%s" % (key, value))
for key, value in parameter_setting.iteritems()
if key not in hide_parameters)
result_name = "{%s}" % input_name
if parameter_str != "":
result_name += "{%s}" % (parameter_str)
# Determine the path where this result will be stored
# and create the directory if necessary
result_dir = base_dir
result_dir += os.sep + result_name
create_directory(result_dir)
return result_dir
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:56,代码来源:node_chain.py
示例9: __call__
def __call__(self):
""" Executes this process on the respective modality """
# Restore configuration
pySPACE.configuration = self.configuration
# reduce log_level for processing a second time and
# set communication possibility for nodes to backend
pySPACE.configuration.min_log_level = self.min_log_level
pySPACE.configuration.logging_com = self.handler_args
pySPACE.configuration.backend_com = self.backend_com
############## Prepare benchmarking ##############
super(NodeChainProcess, self).pre_benchmarking()
# Load the data and check that it can be processed
# Note: This can not be done in the objects constructor since in
# that case the whole input would need to be pickled
# when doing the remote call
abs_dataset_dir = os.sep.join([self.storage,
self.rel_dataset_dir])
input_collection = BaseDataset.load(abs_dataset_dir)
# We have to remember parameters used for generating this specific
# input dataset
if 'parameter_setting' in input_collection.meta_data.keys():
# but not __INPUT_DATASET__ and __RESULT_DIRECTORY__
for k, v in input_collection.meta_data['parameter_setting'].items():
if k not in ["__INPUT_DATASET__", "__RESULT_DIRECTORY__"]:
self.parameter_setting[k] = v
NodeChainProcess._check_node_chain_dataset_consistency(self.node_chain,
input_collection)
############## Do the actual benchmarking ##############
self._log("Start benchmarking run %s of node_chain %s on dataset %s"
% (self.run,
self.node_chain_spec,
self.rel_dataset_dir))
# Do the actual benchmarking for this collection/node_chain combination
try:
result_collection = \
self.node_chain.benchmark(input_collection = input_collection,
run = self.run,
persistency_directory = self.persistency_dir,
store_node_chain = self.store_node_chain)
except Exception, exception:
# Send Exception to Logger
import traceback
print traceback.format_exc()
self._log(traceback.format_exc(), level = logging.ERROR)
raise
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:55,代码来源:node_chain.py
示例10: _copy_pickle_file
def _copy_pickle_file(self, source_collection_path, target_collection_path,
train_set_name_suffix):
source_collection = BaseDataset.load(source_collection_path)
# if only test data was given, the "Rest_vs" collection is stored as
# training data
if self.reverse and "test" == train_set_name_suffix:
# exchange the "test" in key tuple to "train" before storing
for key in source_collection.data.keys():
assert("test" == key[2])
value = source_collection.data.pop(key)
key = (key[0],key[1],"train")
source_collection.data[key] = value
source_collection.store(target_collection_path)
开发者ID:BioinformaticsArchive,项目名称:pyspace,代码行数:14,代码来源:merge.py
示例11: create
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]):
""" A factory method that creates an Analysis operation based on the
information given in the operation specification operation_spec
"""
assert(operation_spec["type"] == "analysis")
input_path = operation_spec["input_path"]
summary = BaseDataset.load(os.path.join(pySPACE.configuration.storage,
input_path))
data_dict = summary.data
# Determine the parameters that should be analyzed
parameters = operation_spec["parameters"]
# Determine the metrics that should be plotted
metrics = operation_spec["metrics"]
# Determine how many processes will be created
number_parameter_values = [len(set(data_dict[param])) for param in parameters]
number_processes = cls._numberOfProcesses(0, number_parameter_values)+1
if debug == True:
# To better debug creation of processes we don't limit the queue
# and create all processes before executing them
processes = processing.Queue()
cls._createProcesses(processes, result_directory, data_dict, parameters,
metrics, True)
return cls( processes, operation_spec, result_directory, number_processes)
else:
# Create all plot processes by calling a recursive helper method in
# another thread so that already created processes can be executed
# although creation of processes is not finished yet. Therefore a queue
# is used which size is limited to guarantee that not to much objects
# are created (since this costs memory). However, the actual number
# of 100 is arbitrary and might be changed according to the system at hand.
processes = processing.Queue(100)
create_process = processing.Process(target=cls._createProcesses,
args=( processes, result_directory, data_dict,
parameters, metrics, True))
create_process.start()
# create and return the operation object
return cls( processes, operation_spec, result_directory, number_processes, create_process)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:41,代码来源:analysis.py
示例12: test_time_series_storing
def test_time_series_storing(self):
if os.path.exists('tmp') is False :
os.makedirs('tmp')
source = SimpleTimeSeriesSourceNode()
sink = TimeSeriesSinkNode()
sink.register_input_node(source)
sink.set_run_number(0)
sink.process_current_split()
result_collection = sink.get_result_dataset()
result_collection.store('tmp')
#sink.store_results("test_time_series_storing.tmp")
reloaded_collection = BaseDataset.load('tmp')
reloader = TimeSeriesSourceNode()
reloader.set_input_dataset(reloaded_collection)
#set_permanent_attributes(time_series_file = "test_time_series_storing.tmp")
orig_data = list(source.request_data_for_testing())
restored_data = list(reloader.request_data_for_testing())
# Check that the two list have the same length
self.assertEqual(len(orig_data), len(restored_data),
"Numbers of time series before storing and after reloading are not equal!")
# Check that there is a one-to-one correspondence
for orig_datapoint, orig_label in orig_data:
found = False
for restored_datapoint, restored_label in restored_data:
found |= (orig_datapoint.view(numpy.ndarray) == restored_datapoint.view(numpy.ndarray)).all() \
and (orig_label == restored_label)
if found: break
self.assert_(found,
"One of the original time series cannot not be found after reloading")
shutil.rmtree('tmp') # Cleaning up...
开发者ID:Crespo911,项目名称:pyspace,代码行数:38,代码来源:test_time_series_sink.py
示例13: create
def create(cls, operation_spec, result_directory, debug=False, input_paths=[]):
"""
A factory method that creates a statistic operation based on the
information given in the operation specification operation_spec.
If debug is TRUE the creation of the statistic processes will not
be in a separated thread.
"""
assert(operation_spec["type"] == "statistic")
input_path = operation_spec["input_path"]
tabular = BaseDataset.load(os.path.join(pySPACE.configuration.storage, input_path)).data
if operation_spec.has_key("filter"):
conditions= csv_analysis.empty_dict(tabular)
for key,l in operation_spec["filter"].items():
conditions[key].extend(l)
tabular = csv_analysis.strip_dict(tabular,conditions)
metric = operation_spec.get("metric","Balanced_accuracy")
parameter = operation_spec.get("parameter","__Dataset__")
rel_par = operation_spec.get("related_parameters",["__Dataset__", "Key_Run", "Key_Fold"])
average = operation_spec.get("average",None)
if average in rel_par:
rel_par.remove(average)
if metric in rel_par:
rel_par.remove(metric)
if parameter in rel_par:
rel_par.remove(parameter)
reduced_tabular=cls.reduce_tabular(tabular,rel_par,metric,parameter,average)
number_processes = 1
processes = processing.Queue()
cls._createProcesses(processes, result_directory, reduced_tabular)
import shutil
shutil.copy2(os.path.join(pySPACE.configuration.storage, input_path,"results.csv"), os.path.join(result_directory,"results.csv"))
shutil.copy2(os.path.join(pySPACE.configuration.storage, input_path,"metadata.yaml"), os.path.join(result_directory,"metadata.yaml"))
# create and return the shuffle operation object
return cls(processes, operation_spec, result_directory, number_processes)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:38,代码来源:statistic.py
示例14: __call__
#.........这里部分代码省略.........
# Determine names of the original data sets the input
# datasets are based on
base_dataset1 = dataset_name1.strip("}{").split("}{")[0]
base_dataset2 = dataset_name2.strip("}{").split("}{")[0]
# Determine target dataset name and create directory
# for it
mixed_base_dataset = "%s_vs_%s" % (base_dataset1,
base_dataset2)
target_dataset_name = dataset_name1.replace(base_dataset1,
mixed_base_dataset)
target_dataset_dir = os.sep.join([self.result_directory,
target_dataset_name])
create_directory(os.sep.join([target_dataset_dir, "data_run0"]))
if splitted:
# For each split, copy the train data from dataset 1 and
# the test data from dataset 2 to the target dataset
for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
"data_run0",
"*_sp*_train.*"])):
# TODO: We have $n$ train sets and $n$ test sets, we "metadata.yaml"])),
# could use all $n*n$ combinations
target_train_file_name = source_train_file_name.replace(dataset_dir1,
target_dataset_dir)
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_train_file_name,
target_train_file_name,
base_dataset1,
mixed_base_dataset)
else:
os.symlink(source_train_file_name,
target_train_file_name)
source_test_file_name = source_train_file_name.replace(dataset_dir1,
dataset_dir2)
source_test_file_name = source_test_file_name.replace("train.",
"test.")
target_test_file_name = target_train_file_name.replace("train.",
"test.")
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_test_file_name,
target_test_file_name,
base_dataset2,
mixed_base_dataset)
else:
os.symlink(source_test_file_name,
target_test_file_name)
else:
# Use the data set from dataset 1 as training set and
# the data set from dataset 2 as test data
for source_train_file_name in glob.glob(os.sep.join([dataset_dir1,
"data_run0",
"*_sp*_test.*"])):
target_train_file_name = source_train_file_name.replace("test.",
"train.")
target_train_file_name = target_train_file_name.replace(dataset_dir1,
target_dataset_dir)
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_train_file_name,
target_train_file_name,
base_dataset1,
mixed_base_dataset)
else:
os.symlink(source_train_file_name,
target_train_file_name)
source_test_file_name = source_train_file_name.replace(dataset_dir1,
dataset_dir2)
target_test_file_name = target_train_file_name.replace("train.",
"test.")
if source_train_file_name.endswith("arff"):
self._copy_arff_file(source_test_file_name,
target_test_file_name,
base_dataset2,
mixed_base_dataset)
else:
os.symlink(source_test_file_name,
target_test_file_name)
# Write metadata.yaml based on input meta data
input_dataset1_meta = BaseDataset.load_meta_data(dataset_dir1)
output_dataset_meta = dict(input_dataset1_meta)
output_dataset_meta['train_test'] = True
output_dataset_meta['date'] = time.strftime("%Y%m%d_%H_%M_%S")
try:
output_dataset_meta['author'] = pwd.getpwuid(os.getuid())[4]
except :
self._log("Author could not be resolved.",level=logging.WARNING)
output_dataset_meta['author'] = "unknown"
BaseDataset.store_meta_data(target_dataset_dir,output_dataset_meta)
############## Clean up after benchmarking ##############
super(ShuffleProcess, self).post_benchmarking()
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:101,代码来源:shuffle.py
示例15: prepare_training
#.........这里部分代码省略.........
flow_spec = file(self.potentials[key]["prewindowing_flow"]))
self.node_chains[key][0].set_generator(flow_generator(key))
flow = open(self.potentials[key]["prewindowing_flow"])
elif self.operation == "prewindowed_train":
if self.potentials[key].has_key("stream") and self.potentials[key]["stream"] == True:
self.node_chains[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain,
flow_spec = file(self.potentials[key]["postprocess_flow"]))
# create windower
online_logger.info( "Creating Windower")
online_logger.info(self.potentials[key]["windower_spec_path_train"])
self.node_chains[key][0].set_windower_spec_file(os.path.join(spec_base, "node_chains", "windower", self.potentials[key]["windower_spec_path_train"]))
replace_start_and_end_markers = True
else:
self.node_chains[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain, flow_spec = file(self.potentials[key]["postprocess_flow"]))
replace_start_and_end_markers = False
final_collection = TimeSeriesDataset()
final_collection_path = os.path.join(self.prewindowed_data_directory, key, "all_train_data")
# delete previous training collection
if os.path.exists(final_collection_path):
online_logger.info("deleting old training data collection for " + key)
shutil.rmtree(final_collection_path)
# load all prewindowed collections and
# append data to the final collection
prewindowed_sets = \
glob.glob(os.path.join(self.prewindowed_data_directory, key, "*"))
if len(prewindowed_sets) == 0:
online_logger.error("Couldn't find data, please do prewindowing first!")
raise Exception
online_logger.info("concatenating prewindowed data from " + str(prewindowed_sets))
for s,d in enumerate(prewindowed_sets):
collection = BaseDataset.load(d)
data = collection.get_data(0, 0, "train")
for d,(sample,label) in enumerate(data):
if replace_start_and_end_markers:
# in case we concatenate multiple 'Window' labeled
# sets we have to remove every start- and endmarker
for k in sample.marker_name.keys():
# find '{S,s} 8' or '{S,s} 9'
m = re.match("^s\s{0,2}[8,9]{1}$", k, re.IGNORECASE)
if m is not None:
online_logger.info(str("remove %s from %d %d" % (m.group(), s, d)))
del(sample.marker_name[m.group()])
if s == len(prewindowed_sets)-1 and \
d == len(data)-1:
# insert endmarker
sample.marker_name["S 9"] = [0.0]
online_logger.info("added endmarker" + str(s) + " " + str(d))
if s == 0 and d == 0:
# insert startmarker
sample.marker_name["S 8"] = [0.0]
online_logger.info("added startmarker" + str(s) + " " + str(d))
final_collection.add_sample(sample, label, True)
# save final collection (just for debugging)
os.mkdir(final_collection_path)
final_collection.store(final_collection_path)
online_logger.info("stored final collection at " + final_collection_path)
开发者ID:AlexanderFabisch,项目名称:pyspace,代码行数:66,代码来源:trainer.py
示例16: consolidate
def consolidate(self):
""" Consolidates the results obtained by the single processes into a consistent structure
of collections that are stored on the file system.
"""
# Consolidate the results
directory_pattern = os.sep.join([self.result_directory, "{*",])
dataset_pathes = glob.glob(directory_pattern)
# For all collections found
for dataset_path in dataset_pathes:
# Load their meta_data
meta_data = BaseDataset.load_meta_data(dataset_path)
# Determine author and date
try:
author = pwd.getpwuid(os.getuid())[4]
except:
author = "unknown"
self._log("Author could not be resolved.",level=logging.WARNING)
date = time.strftime("%Y%m%d_%H_%M_%S")
# Update meta data and store it
meta_data.update({"author" : author, "date" : date})
BaseDataset.store_meta_data(dataset_path, meta_data)
# Copy the input dataset specification file to the result
# directory in order to make later analysis of
# the results more easy
input_meta_path = os.sep.join([pySPACE.configuration.storage,
meta_data["input_collection_name"]])
input_meta = BaseDataset.load_meta_data(input_meta_path)
BaseDataset.store_meta_data(dataset_path,input_meta,
file_name="input_metadata.yaml")
# Check if some results consist of several runs
# and update the meta data in this case
# TODO: This is not a clean solution
for dataset_dir in glob.glob(os.sep.join([self.result_directory,
"*"])):
if not os.path.isdir(dataset_dir): continue
# There can be either run dirs, persistency dirs, or both of them.
# Check of whichever there are more. If both exist, their numbers
# are supposed to be equal.
nr_run_dirs = len(glob.glob(os.sep.join([dataset_dir,
"data_run*"])))
nr_per_dirs = len(glob.glob(os.sep.join([dataset_dir,
"persistency_run*"])))
nr_runs = max(nr_run_dirs, nr_per_dirs)
if nr_runs > 1:
collection_meta = BaseDataset.load_meta_data(dataset_dir)
collection_meta["runs"] = nr_runs
BaseDataset.store_meta_data(dataset_dir,collection_meta)
# If we don't create a feature vector or time series collection,
# we evaluated our classification using a classification performance sink.
# The resulting files should be merged to one csv tabular.
pathlist = glob.glob(os.path.join(self.result_directory,"results_*"))
if len(pathlist)>0:
# Do the consolidation the same way as for WekaClassificationOperation
self._log("Consolidating results ...")
# We load and store the results once into a PerformanceResultSummary
# This does the necessary consolidation...
self._log("Reading intermediate results...")
result_collection = PerformanceResultSummary(dataset_dir=self.result_directory)
self._log("done")
self._log("Storing result collection")
result_collection.store(self.result_directory)
self._log("done")
PerformanceResultSummary.merge_traces(self.result_directory)
if not(self.compression == False):
# Since we get one result summary,
# we don't need the numerous folders.
# So we zip them to make the whole folder more easy visible.
import zipfile
cwd=os.getcwd()
os.chdir(self.result_directory)
# If there are to many or to large folders, problems may occur.
# This case we want to log, try 64 bit mode, and then skip the zipping.
try:
pathlist = glob.glob(os.path.join(self.result_directory,"{*}"))
if not self.compression == "delete":
|
请发表评论