• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python util.read_dfile函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中util.read_dfile函数的典型用法代码示例。如果您正苦于以下问题:Python read_dfile函数的具体用法?Python read_dfile怎么用?Python read_dfile使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了read_dfile函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: make_halo

def make_halo(search_distances, scan_distances, ratios=None):
    """returns the organism object to work on"""
    keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
    gofile = util.read_dfile(GO_FILE_PATH)
    rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR,
                               'Halobacterium sp', 64091)
    mo_db = microbes_online.MicrobesOnline(CACHE_DIR)
    stringfile = 'testdata/string_links_64091.tab'

    nw_factories = []
    if stringfile != None:
        nw_factories.append(stringdb.get_network_factory('hal', stringfile, 0.5))
    else:
        logging.warn("no STRING file specified !")

    if ratios is not None:
        nw_factories.append(microbes_online.get_network_factory(
            mo_db, max_operon_size=ratios.num_rows / 20, weight=0.5))

    keggorg = util.make_dfile_map(keggfile, 1, 3)['hal']
    rsat_organism = rsatdb.get_rsat_organism(keggorg)
    rsat_info = org.RsatSpeciesInfo(rsatdb, keggorg, rsat_organism, 64091)
    gotax = util.make_dfile_map(gofile, 0, 1)[rsat_info.go_species()]
    return org.Microbe('hal', keggorg, rsat_info, gotax, mo_db, nw_factories,
                       search_distances, scan_distances, True, None)
开发者ID:BioinformaticsArchive,项目名称:cmonkey2,代码行数:25,代码来源:testutil.py


示例2: make_halo

def make_halo(search_distances, scan_distances):
    """returns the organism object to work on"""
    keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
    gofile = util.read_dfile(GO_FILE_PATH)
    rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, CACHE_DIR        )
    mo_db = microbes_online.MicrobesOnline()

    org_factory = org.MicrobeFactory(org.make_kegg_code_mapper(keggfile),
                                     org.make_rsat_organism_mapper(rsatdb),
                                     org.make_go_taxonomy_mapper(gofile),
                                     mo_db, [])

    return org_factory.create('hal', search_distances, scan_distances)
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:13,代码来源:halo_genes_test.py


示例3: __get_kegg_data

 def __get_kegg_data(self):
     # determine the NCBI code
     organism_code = self['organism_code']
     if os.path.exists(USER_KEGG_FILE_PATH):
         keggfile = util.read_dfile(USER_KEGG_FILE_PATH, comment='#')
     elif os.path.exists(SYSTEM_KEGG_FILE_PATH):
         keggfile = util.read_dfile(SYSTEM_KEGG_FILE_PATH, comment='#')
     else:
         raise Exception('KEGG file not found !!')
     kegg_map = util.make_dfile_map(keggfile, 1, 3)
     kegg2ncbi = util.make_dfile_map(keggfile, 1, 2)
     if self['ncbi_code'] is None and organism_code in kegg2ncbi:
         self['ncbi_code'] = kegg2ncbi[organism_code]
     return self['ncbi_code'], kegg_map[organism_code]
开发者ID:BioinformaticsArchive,项目名称:cmonkey2,代码行数:14,代码来源:cmonkey_run.py


示例4: __make_organism

 def __make_organism(self):
     """makes a mock organism with almost real data"""
     features = {}
     dfile = util.read_dfile('testdata/Halobacterium_sp_features',
                             comment='--')
     for line in dfile.lines:
         features[line[0]] = st.Feature(line[0], line[1], line[2],
                                        st.Location(line[3],
                                                    int(line[4]),
                                                    int(line[5]),
                                                    line[6] == 'R'))
     tfile = util.read_dfile(
         'testdata/Halobacterium_sp_feature_names', comment='--')
     synonyms = th.create_from_rsat_feature_names(tfile)
     return MockOrganismWithSynonyms('64091', features, synonyms)
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:15,代码来源:operon_nw_test.py


示例5: read_edges2

    def read_edges2(filename, organism, ratios):
        """just read a preprocessed file, much faster to debug"""
        logging.info("stringdb.read_edges2()")
        dfile = util.read_dfile(filename, sep)
        result = []
        max_score = 0.0
        thesaurus = organism.thesaurus()
        if ratios:
            cano_genes = {thesaurus[row] for row in ratios.row_names
                          if row in thesaurus}
        else:
            cano_genes = None

        num_ignored = 0

        for line in dfile.lines:
            node1 = patches.patch_string_gene(organism_code, line[0])
            node2 = patches.patch_string_gene(organism_code, line[1])
            score = float(line[2])
            max_score = max(score, max_score)

            if can_add_edge(node1, node2, thesaurus, cano_genes):
                result.append((intern(node1), intern(node2), score))
            else:
                num_ignored += 1

        if not normalized:
            result = normalize_edges_to_max_score(result, max_score)

        logging.info("stringdb.read_edges2(), %d edges read, %d edges ignored",
                     len(result), num_ignored)
        return result
开发者ID:msGenDev,项目名称:cmonkey-python,代码行数:32,代码来源:stringdb.py


示例6: test_motif_scoring

    def test_motif_scoring(self):
        """tests the motif scoring in integration"""
        search_distances = {'upstream': (-20, 150)}
        scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('example_data/hal/halo_ratios5.tsv',
                                 has_header=True, quote='\"')
        ratio_matrix = matrix_factory.create_from(infile)
        organism = testutil.make_halo(search_distances, scan_distances, ratio_matrix)
        membership = FakeMembership()
        config_params = {'memb.min_cluster_rows_allowed': 3,
                         'memb.max_cluster_rows_allowed': 70,
                         'multiprocessing': False,
                         'num_clusters': 1,
                         'output_dir': 'out',
                         'debug': {},
                         'search_distances': {'upstream': (-20, 150)},
                         'num_iterations': 2000,
                         'MEME': {'schedule': lambda i: True,
                                  'version': '4.3.0',
                                  'global_background': False,
                                  'arg_mod': 'zoops',
                                  'nmotifs_rvec': 'c(rep(1, num_iterations/3), rep(2, num_iterations/3))',
                                  'use_revcomp': 'True', 'max_width': 24, 'background_order': 3},
                         'Motifs': {'schedule': lambda i: True, 'scaling': ('scaling_const', 1.0)}}
        func = motif.MemeScoringFunction(organism, membership, ratio_matrix,
                                         config_params=config_params)
        iteration_result = { 'iteration': 100 }
        matrix = func.compute(iteration_result)
开发者ID:BioinformaticsArchive,项目名称:cmonkey2,代码行数:30,代码来源:meme430_test.py


示例7: __sequences_for_genes

    def __sequences_for_genes(self, seqtype, genes, distance):
        """retrieves the specified sequences from the supplied genomic data"""
        if not seqtype in self.__seqs:
            logging.info('loading %s sequences' % seqtype)
            dfile = util.read_dfile(self.__seq_filenames[seqtype], sep=',')
            self.__seqs[seqtype] = {}
            for line in dfile.lines:
                self.__seqs[seqtype][line[0].upper()] = line[1].upper()
            logging.info('loaded %i %s sequences' % (len(self.__seqs[seqtype]), seqtype))

        result = {}
        for alias in genes:
            if alias in self.thesaurus():
                gene = self.thesaurus()[alias]
                if gene in self.__seqs[seqtype]:
                    # note that we have to return the sequence as a (location, sequence)
                    # pair even if we do not actually use the Location
                    result[gene] = (st.Location(gene, 0, 0, False), self.__seqs[seqtype][gene])
                else:
                    #logging.warn("Gene '%s' not found in 3' UTRs", gene)
                    pass
            else:
                #logging.warn("Alias '%s' not in thesaurus !", alias)
                pass
        return result
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:25,代码来源:organism.py


示例8: test_motif_scoring

    def test_motif_scoring(self):
        """tests the motif scoring in integration"""
        search_distances = {'upstream': (-20, 150)}
        scan_distances = {'upstream': (-30, 250)}

        matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter])
        infile = util.read_dfile('halo_ratios5.tsv', has_header=True, quote='\"')
        ratio_matrix = matrix_factory.create_from(infile)
        meme_suite = meme.MemeSuite430(remove_tempfiles=True)
        sequence_filters = [
            motif.unique_filter,
            motif.get_remove_low_complexity_filter(meme_suite),
            motif.get_remove_atgs_filter(search_distances['upstream'])]

        organism = make_halo(ratio_matrix, search_distances, scan_distances)
        membership = FakeMembership()
        config_params = {'memb.min_cluster_rows_allowed': 3,
                         'memb.max_cluster_rows_allowed': 70,
                         'multiprocessing': False,
                         'num_clusters': 1,
                         'output_dir': 'out',
                         'num_iterations': 2000}
        func = motif.MemeScoringFunction(organism, membership, ratio_matrix,
                                         meme_suite,
                                         sequence_filters=sequence_filters,
                                         scaling_func=lambda iter: 1.0,
                                         num_motif_func=motif.default_nmotif_fun,
                                         config_params=config_params)
        iteration_result = { 'iteration': 100 }
        matrix = func.compute(iteration_result)
        """
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:31,代码来源:meme430_test.py


示例9: test_read_with_semicolon_header_and_comments

 def test_read_with_semicolon_header_and_comments(self):
     """Reads a semicolon delimited file with a header and comments"""
     dfile = util.read_dfile("testdata/withcomments.ssv", sep=';',
                             has_header=True, comment='#')
     lines = dfile.lines
     self.assertEquals(2, len(lines))
     self.assertEquals(["header1", "header2"], dfile.header)
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:7,代码来源:util_test.py


示例10: test_read_with_quotes

 def test_read_with_quotes(self):
     """Reads a semicolon delimited file with quotes"""
     dfile = util.read_dfile("testdata/withquotes.ssv", sep=';',
                             has_header=False, comment='#', quote='"')
     lines = dfile.lines
     self.assertEquals(["value11", "value12"], lines[0])
     self.assertEquals(["value21", "value22"], lines[1])
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:7,代码来源:util_test.py


示例11: test_read_with_tabs

 def test_read_with_tabs(self):
     """Reads a tab delimited file"""
     dfile = util.read_dfile("testdata/simple.tsv")
     lines = dfile.lines
     self.assertEquals(["value11", "value12"], lines[0])
     self.assertEquals(["value21", "value22"], lines[1])
     self.assertIsNone(dfile.header)
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:7,代码来源:util_test.py


示例12: prepare_ensemble_matrix

def prepare_ensemble_matrix(ratiofile, outdir, n, kmin):
    matrix_factory = DataMatrixFactory([nochange_filter,
                                        center_scale_filter])
    if os.path.exists(ratiofile):
        infile = util.read_dfile(ratiofile, has_header=True, quote='\"')
        matrix = matrix_factory.create_from(infile)
        split_matrix(matrix, outdir, n, kmin, matrix.num_columns)
开发者ID:msGenDev,项目名称:cmonkey-python,代码行数:7,代码来源:datamatrix.py


示例13: __make_ref_operon_pairs

 def __make_ref_operon_pairs(self):
     """returns reference operon pairs for comparison"""
     reffile = util.read_dfile('testdata/operon_reftable.tsv',
                               has_header=True, quote='"')
     refpairs = []
     for line in reffile.lines:
         refpairs.append((line[1], line[2]))
     return refpairs
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:8,代码来源:operon_nw_test.py


示例14: read_edges3

 def read_edges3(filename):
     """just read a preprocessed file, much faster to debug"""
     logging.info("stringdb.read_edges3()")
     dfile = util.read_dfile(filename, sep=",", has_header=True, quote='"')
     result = []
     for line in dfile.lines:
         result.append([line[1], line[2], float(line[3])])
     return result
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:8,代码来源:stringdb.py


示例15: read_edges2

 def read_edges2(filename):
     """just read a preprocessed file, much faster to debug"""
     logging.info("\x1b[31mstringdb:\t\x1b[0mreading interaction network - stringdb.read_edges2()")
     dfile = util.read_dfile(filename, sep)
     result = []
     for line in dfile.lines:
         result.append((line[0], line[1], float(line[2])))
     return result
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:8,代码来源:stringdb.py


示例16: test_read_with_empty_lines

 def test_read_with_empty_lines(self):
     """Reads a semicolon delimited file containing emptylines"""
     dfile = util.read_dfile("testdata/withemptylines.ssv", sep=';',
                             has_header=True, comment='#', quote='"')
     lines = dfile.lines
     self.assertEquals(["header1", "header2"], dfile.header)
     self.assertEquals(2, len(lines))
     self.assertEquals(["value11", "value12"], lines[0])
     self.assertEquals(["value21", "value22"], lines[1])
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:9,代码来源:util_test.py


示例17: read_csv

 def read_csv(cls, name, infile, cutoff=None, sep=','):
     """reads a set from a CSV file"""
     dfile = util.read_dfile(infile, sep)
     sets = {}
     for line in dfile.lines:
         if line[0] not in sets:
             sets[line[0]] = EnrichmentSet('discrete')
         sets[line[0]].add(line[1].upper(), 1)
     return SetType(name, sets)
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:9,代码来源:set_enrichment.py


示例18: make_microbe

    def make_microbe(self):
        """returns the organism object to work on"""
        keggfile = util.read_dfile(KEGG_FILE_PATH, comment='#')
        gofile = util.read_dfile(GO_FILE_PATH)
        rsatdb = rsat.RsatDatabase(RSAT_BASE_URL, self['cache_dir'])
        mo_db = microbes_online.MicrobesOnline()
        stringfile = self.config_params['string_file']
        kegg_mapper = org.make_kegg_code_mapper(keggfile)
        rsat_mapper = org.make_rsat_organism_mapper(rsatdb)

        # automatically download STRING file
        if stringfile == None:
            rsat_info = rsat_mapper(kegg_mapper(self['organism_code']))
            ncbi_code = rsat_info.taxonomy_id
            print "NCBI CODE IS: ", ncbi_code
            url = STRING_URL_PATTERN % ncbi_code
            stringfile = "%s/%s.gz" % (self['cache_dir'], ncbi_code)
            self['string_file'] = stringfile
            logging.info("Automatically using STRING file in '%s'", stringfile)
            util.get_url_cached(url, stringfile)

        nw_factories = []
        if stringfile != None:
            nw_factories.append(stringdb.get_network_factory2(
                    self['organism_code'], stringfile, 0.5))
        else:
            logging.warn("no STRING file specified !")

        nw_factories.append(microbes_online.get_network_factory(
                mo_db, max_operon_size=self.ratio_matrix.num_rows / 20,
                weight=0.5))

        org_factory = org.MicrobeFactory(kegg_mapper,
                                         rsat_mapper,
                                         org.make_go_taxonomy_mapper(gofile),
                                         mo_db,
                                         nw_factories)
        return org_factory.create(self['organism_code'],
                                  self['search_distances'],
                                  self['scan_distances'])
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:40,代码来源:cmonkey_run.py


示例19: create_from_delimited_file2

def create_from_delimited_file2(dfile):
    """creates a thesaurus from a delimited file where the format is
    <original>SEPARATOR<alt1>;<alt2>;...
    ..."""
    if isinstance(dfile, str):
        dfile = util.read_dfile(dfile, sep=',', has_header=False)
    result = {}
    for line in dfile.lines:
        original = line[0].upper()  # original should map to itself
        result[original] = original
        for alternative in line[1].split(';'):
            result[alternative.upper()] = original
    return result
开发者ID:cplaisier,项目名称:cmonkey-python,代码行数:13,代码来源:thesaurus.py


示例20: read_edges2

 def read_edges2(filename):
     """just read a preprocessed file, much faster to debug"""
     logging.info("stringdb.read_edges2()")
     dfile = util.read_dfile(filename, sep)
     result = []
     max_score = 0.0
     for line in dfile.lines:
         score = float(line[2])
         max_score = max(score, max_score)
         result.append((patches.patch_string_gene(organism_code, line[0]),
                        patches.patch_string_gene(organism_code, line[1]),
                        score))
     if not normalized:
         normalize_edges_to_max_score(result, max_score)
     return result
开发者ID:dreiss-isb,项目名称:cmonkey-python,代码行数:15,代码来源:stringdb.py



注:本文中的util.read_dfile函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python util.read_file函数代码示例发布时间:2022-05-26
下一篇:
Python util.range_check函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap