本文整理汇总了Python中util.read_dfile函数的典型用法代码示例。如果您正苦于以下问题:Python read_dfile函数的具体用法?Python read_dfile怎么用?Python read_dfile使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了read_dfile函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: make_halo
def make_halo(search_distances, scan_distances, ratios=None):
"""returns the organism object to work on"""
keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
gofile = util.read_dfile(GO_FILE_PATH)
rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR,
'Halobacterium sp', 64091)
mo_db = microbes_online.MicrobesOnline(CACHE_DIR)
stringfile = 'testdata/string_links_64091.tab'
nw_factories = []
if stringfile != None:
nw_factories.append(stringdb.get_network_factory('hal', stringfile, 0.5))
else:
logging.warn("no STRING file specified !")
if ratios is not None:
nw_factories.append(microbes_online.get_network_factory(
mo_db, max_operon_size=ratios.num_rows / 20, weight=0.5))
keggorg = util.make_dfile_map(keggfile, 1, 3)['hal']
rsat_organism = rsatdb.get_rsat_organism(keggorg)
rsat_info = org.RsatSpeciesInfo(rsatdb, keggorg, rsat_organism, 64091)
gotax = util.make_dfile_map(gofile, 0, 1)[rsat_info.go_species()]
return org.Microbe('hal', keggorg, rsat_info, gotax, mo_db, nw_factories,
search_distances, scan_distances, True, None)
开发者ID:BioinformaticsArchive,项目名称:cmonkey2,代码行数:25,代码来源:testutil.py
示例2: make_halo
def make_halo(search_distances, scan_distances):
"""returns the organism object to work on"""
keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
gofile = util.read_dfile(GO_FILE_PATH)
rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR )
mo_db = microbes_online.MicrobesOnline()
org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
org.make_rsat_organism_mapper(rsatdb),
org.make_go_taxonomy_mapper(gofile),
mo_db, [])
return org_factory.create('hal', search_distances, scan_distances)
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:13,代码来源:halo_genes_test.py
示例3: __get_kegg_data
def __get_kegg_data(self):
# determine the NCBI code
organism_code = self['organism_code']
if os.path.exists(USER_KEGG_FILE_PATH):
keggfile = util.read_dfile(USER_KEGG_FILE_PATH, comment='#')
elif os.path.exists(SYSTEM_KEGG_FILE_PATH):
keggfile = util.read_dfile(SYSTEM_KEGG_FILE_PATH, comment='#')
else:
raise Exception('KEGG file not found !!')
kegg_map = util.make_dfile_map(keggfile, 1, 3)
kegg2ncbi = util.make_dfile_map(keggfile, 1, 2)
if self['ncbi_code'] is None and organism_code in kegg2ncbi:
self['ncbi_code'] = kegg2ncbi[organism_code]
return self['ncbi_code'], kegg_map[organism_code]
开发者ID:BioinformaticsArchive,项目名称:cmonkey2,代码行数:14,代码来源:cmonkey_run.py
示例4: __make_organism
def __make_organism(self):
"""makes a mock organism with almost real data"""
features = {}
dfile = util.read_dfile('testdata/Halobacterium_sp_features',
comment='--')
for line in dfile.lines:
features[line[0]] = st.Feature(line[0], line[1], line[2],
st.Location(line[3],
int(line[4]),
int(line[5]),
line[6] == 'R'))
tfile = util.read_dfile(
'testdata/Halobacterium_sp_feature_names', comment='--')
synonyms = th.create_from_rsat_feature_names(tfile)
return MockOrganismWithSynonyms('64091', features, synonyms)
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:15,代码来源:operon_nw_test.py
示例5: read_edges2
def read_edges2(filename, organism, ratios):
"""just read a preprocessed file, much faster to debug"""
logging.info("stringdb.read_edges2()")
dfile = util.read_dfile(filename, sep)
result = []
max_score = 0.0
thesaurus = organism.thesaurus()
if ratios:
cano_genes = {thesaurus[row] for row in ratios.row_names
if row in thesaurus}
else:
cano_genes = None
num_ignored = 0
for line in dfile.lines:
node1 = patches.patch_string_gene(organism_code, line[0])
node2 = patches.patch_string_gene(organism_code, line[1])
score = float(line[2])
max_score = max(score, max_score)
if can_add_edge(node1, node2, thesaurus, cano_genes):
result.append((intern(node1), intern(node2), score))
else:
num_ignored += 1
if not normalized:
result = normalize_edges_to_max_score(result, max_score)
logging.info("stringdb.read_edges2(), %d edges read, %d edges ignored",
len(result), num_ignored)
return result
开发者ID:msGenDev,项目名称:cmonkey-python,代码行数:32,代码来源:stringdb.py
示例6: test_motif_scoring
def test_motif_scoring(self):
"""tests the motif scoring in integration"""
search_distances = {'upstream': (-20, 150)}
scan_distances = {'upstream': (-30, 250)}
matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter])
infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
has_header=True, quote='\"')
ratio_matrix = matrix_factory.create_from(infile)
organism = testutil.make_halo(search_distances, scan_distances, ratio_matrix)
membership = FakeMembership()
config_params = {'memb.min_cluster_rows_allowed': 3,
'memb.max_cluster_rows_allowed': 70,
'multiprocessing': False,
'num_clusters': 1,
'output_dir': 'out',
'debug': {},
'search_distances': {'upstream': (-20, 150)},
'num_iterations': 2000,
'MEME': {'schedule': lambda i: True,
'version': '4.3.0',
'global_background': False,
'arg_mod': 'zoops',
'nmotifs_rvec': 'c(rep(1, num_iterations/3), rep(2, num_iterations/3))',
'use_revcomp': 'True', 'max_width': 24, 'background_order': 3},
'Motifs': {'schedule': lambda i: True, 'scaling': ('scaling_const', 1.0)}}
func = motif.MemeScoringFunction(organism, membership, ratio_matrix,
config_params=config_params)
iteration_result = { 'iteration': 100 }
matrix = func.compute(iteration_result)
开发者ID:BioinformaticsArchive,项目名称:cmonkey2,代码行数:30,代码来源:meme430_test.py
示例7: __sequences_for_genes
def __sequences_for_genes(self, seqtype, genes, distance):
"""retrieves the specified sequences from the supplied genomic data"""
if not seqtype in self.__seqs:
logging.info('loading %s sequences' % seqtype)
dfile = util.read_dfile(self.__seq_filenames[seqtype], sep=',')
self.__seqs[seqtype] = {}
for line in dfile.lines:
self.__seqs[seqtype][line[0].upper()] = line[1].upper()
logging.info('loaded %i %s sequences' % (len(self.__seqs[seqtype]), seqtype))
result = {}
for alias in genes:
if alias in self.thesaurus():
gene = self.thesaurus()[alias]
if gene in self.__seqs[seqtype]:
# note that we have to return the sequence as a (location, sequence)
# pair even if we do not actually use the Location
result[gene] = (st.Location(gene, 0, 0, False), self.__seqs[seqtype][gene])
else:
#logging.warn("Gene '%s' not found in 3' UTRs", gene)
pass
else:
#logging.warn("Alias '%s' not in thesaurus !", alias)
pass
return result
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:25,代码来源:organism.py
示例8: test_motif_scoring
def test_motif_scoring(self):
"""tests the motif scoring in integration"""
search_distances = {'upstream': (-20, 150)}
scan_distances = {'upstream': (-30, 250)}
matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter])
infile = util.read_dfile('halo_ratios5.tsv', has_header=True, quote='\"')
ratio_matrix = matrix_factory.create_from(infile)
meme_suite = meme.MemeSuite430(remove_tempfiles=True)
sequence_filters = [
motif.unique_filter,
motif.get_remove_low_complexity_filter(meme_suite),
motif.get_remove_atgs_filter(search_distances['upstream'])]
organism = make_halo(ratio_matrix, search_distances, scan_distances)
membership = FakeMembership()
config_params = {'memb.min_cluster_rows_allowed': 3,
'memb.max_cluster_rows_allowed': 70,
'multiprocessing': False,
'num_clusters': 1,
'output_dir': 'out',
'num_iterations': 2000}
func = motif.MemeScoringFunction(organism, membership, ratio_matrix,
meme_suite,
sequence_filters=sequence_filters,
scaling_func=lambda iter: 1.0,
num_motif_func=motif.default_nmotif_fun,
config_params=config_params)
iteration_result = { 'iteration': 100 }
matrix = func.compute(iteration_result)
"""
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:31,代码来源:meme430_test.py
示例9: test_read_with_semicolon_header_and_comments
def test_read_with_semicolon_header_and_comments(self):
"""Reads a semicolon delimited file with a header and comments"""
dfile = util.read_dfile("testdata/withcomments.ssv", sep=';',
has_header=True, comment='#')
lines = dfile.lines
self.assertEquals(2, len(lines))
self.assertEquals(["header1", "header2"], dfile.header)
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:7,代码来源:util_test.py
示例10: test_read_with_quotes
def test_read_with_quotes(self):
"""Reads a semicolon delimited file with quotes"""
dfile = util.read_dfile("testdata/withquotes.ssv", sep=';',
has_header=False, comment='#', quote='"')
lines = dfile.lines
self.assertEquals(["value11", "value12"], lines[0])
self.assertEquals(["value21", "value22"], lines[1])
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:7,代码来源:util_test.py
示例11: test_read_with_tabs
def test_read_with_tabs(self):
"""Reads a tab delimited file"""
dfile = util.read_dfile("testdata/simple.tsv")
lines = dfile.lines
self.assertEquals(["value11", "value12"], lines[0])
self.assertEquals(["value21", "value22"], lines[1])
self.assertIsNone(dfile.header)
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:7,代码来源:util_test.py
示例12: prepare_ensemble_matrix
def prepare_ensemble_matrix(ratiofile, outdir, n, kmin):
matrix_factory = DataMatrixFactory([nochange_filter,
center_scale_filter])
if os.path.exists(ratiofile):
infile = util.read_dfile(ratiofile, has_header=True, quote='\"')
matrix = matrix_factory.create_from(infile)
split_matrix(matrix, outdir, n, kmin, matrix.num_columns)
开发者ID:msGenDev,项目名称:cmonkey-python,代码行数:7,代码来源:datamatrix.py
示例13: __make_ref_operon_pairs
def __make_ref_operon_pairs(self):
"""returns reference operon pairs for comparison"""
reffile = util.read_dfile('testdata/operon_reftable.tsv',
has_header=True, quote='"')
refpairs = []
for line in reffile.lines:
refpairs.append((line[1], line[2]))
return refpairs
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:8,代码来源:operon_nw_test.py
示例14: read_edges3
def read_edges3(filename):
"""just read a preprocessed file, much faster to debug"""
logging.info("stringdb.read_edges3()")
dfile = util.read_dfile(filename, sep=",", has_header=True, quote='"')
result = []
for line in dfile.lines:
result.append([line[1], line[2], float(line[3])])
return result
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:8,代码来源:stringdb.py
示例15: read_edges2
def read_edges2(filename):
"""just read a preprocessed file, much faster to debug"""
logging.info("\x1b[31mstringdb:\t\x1b[0mreading interaction network - stringdb.read_edges2()")
dfile = util.read_dfile(filename, sep)
result = []
for line in dfile.lines:
result.append((line[0], line[1], float(line[2])))
return result
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:8,代码来源:stringdb.py
示例16: test_read_with_empty_lines
def test_read_with_empty_lines(self):
"""Reads a semicolon delimited file containing emptylines"""
dfile = util.read_dfile("testdata/withemptylines.ssv", sep=';',
has_header=True, comment='#', quote='"')
lines = dfile.lines
self.assertEquals(["header1", "header2"], dfile.header)
self.assertEquals(2, len(lines))
self.assertEquals(["value11", "value12"], lines[0])
self.assertEquals(["value21", "value22"], lines[1])
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:9,代码来源:util_test.py
示例17: read_csv
def read_csv(cls, name, infile, cutoff=None, sep=','):
"""reads a set from a CSV file"""
dfile = util.read_dfile(infile, sep)
sets = {}
for line in dfile.lines:
if line[0] not in sets:
sets[line[0]] = EnrichmentSet('discrete')
sets[line[0]].add(line[1].upper(), 1)
return SetType(name, sets)
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:9,代码来源:set_enrichment.py
示例18: make_microbe
def make_microbe(self):
"""returns the organism object to work on"""
keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
gofile = util.read_dfile(GO_FILE_PATH)
rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, self['cache_dir'])
mo_db = microbes_online.MicrobesOnline()
stringfile = self.config_params['string_file']
kegg_mapper = org.make_kegg_code_mapper(keggfile)
rsat_mapper = org.make_rsat_organism_mapper(rsatdb)
# automatically download STRING file
if stringfile == None:
rsat_info = rsat_mapper(kegg_mapper(self['organism_code']))
ncbi_code = rsat_info.taxonomy_id
print "NCBI CODE IS: ", ncbi_code
url = STRING_URL_PATTERN % ncbi_code
stringfile = "%s/%s.gz" % (self['cache_dir'], ncbi_code)
self['string_file'] = stringfile
logging.info("Automatically using STRING file in '%s'", stringfile)
util.get_url_cached(url, stringfile)
nw_factories = []
if stringfile != None:
nw_factories.append(stringdb.get_network_factory2(
self['organism_code'], stringfile, 0.5))
else:
logging.warn("no STRING file specified !")
nw_factories.append(microbes_online.get_network_factory(
mo_db, max_operon_size=self.ratio_matrix.num_rows / 20,
weight=0.5))
org_factory = org.MicrobeFactory(kegg_mapper,
rsat_mapper,
org.make_go_taxonomy_mapper(gofile),
mo_db,
nw_factories)
return org_factory.create(self['organism_code'],
self['search_distances'],
self['scan_distances'])
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:40,代码来源:cmonkey_run.py
示例19: create_from_delimited_file2
def create_from_delimited_file2(dfile):
"""creates a thesaurus from a delimited file where the format is
<original>SEPARATOR<alt1>;<alt2>;...
..."""
if isinstance(dfile, str):
dfile = util.read_dfile(dfile, sep=',', has_header=False)
result = {}
for line in dfile.lines:
original = line[0].upper() # original should map to itself
result[original] = original
for alternative in line[1].split(';'):
result[alternative.upper()] = original
return result
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:13,代码来源:thesaurus.py
示例20: read_edges2
def read_edges2(filename):
"""just read a preprocessed file, much faster to debug"""
logging.info("stringdb.read_edges2()")
dfile = util.read_dfile(filename, sep)
result = []
max_score = 0.0
for line in dfile.lines:
score = float(line[2])
max_score = max(score, max_score)
result.append((patches.patch_string_gene(organism_code, line[0]),
patches.patch_string_gene(organism_code, line[1]),
score))
if not normalized:
normalize_edges_to_max_score(result, max_score)
return result
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:15,代码来源:stringdb.py
注:本文中的util.read_dfile函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论