• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python file.mkstempfname函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中util.file.mkstempfname函数的典型用法代码示例。如果您正苦于以下问题:Python mkstempfname函数的具体用法?Python mkstempfname怎么用?Python mkstempfname使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了mkstempfname函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: filter_lastal_bam

def filter_lastal_bam(inBam, db, outBam, JVMmemory=None):
    ''' Restrict input reads to those that align to the given
        reference database using LASTAL.
    '''
    # convert BAM to paired FASTQ
    inReads1 = mkstempfname('.1.fastq')
    inReads2 = mkstempfname('.2.fastq')
    tools.picard.SamToFastqTool().execute(inBam, inReads1, inReads2)

    # look for hits in inReads1 and inReads2
    hitList1 = mkstempfname('.1.hits')
    hitList2 = mkstempfname('.2.hits')
    lastal_get_hits(inReads1, db, hitList1)
    os.unlink(inReads1)
    lastal_get_hits(inReads2, db, hitList2)
    os.unlink(inReads2)

    # merge hits
    hitList = mkstempfname('.hits')
    with open(hitList, 'wt') as outf:
        subprocess.check_call(['sort', '-u', hitList1, hitList2], stdout=outf)
    os.unlink(hitList1)
    os.unlink(hitList2)

    # filter original BAM file against keep list
    tools.picard.FilterSamReadsTool().execute(inBam, False, hitList, outBam, JVMmemory=JVMmemory)
    os.unlink(hitList)
开发者ID:gzluo,项目名称:viral-ngs,代码行数:27,代码来源:taxon_filter.py


示例2: blastn_chunked_fasta

def blastn_chunked_fasta(fasta, db, chunkSize=1000000):
    """
    Helper function: blastn a fasta file, overcoming apparent memory leaks on
    an input with many query sequences, by splitting it into multiple chunks
    and running a new blastn process on each chunk. Return a list of output
    filenames containing hits
    """
    blastnPath = tools.blast.BlastnTool().install_and_get_path()

    hits_files = []
    with open(fasta, "rt") as fastaFile:
        record_iter = SeqIO.parse(fastaFile, "fasta")
        for batch in batch_iterator(record_iter, chunkSize):
            chunk_fasta = mkstempfname('.fasta')
            with open(chunk_fasta, "wt") as handle:
                SeqIO.write(batch, handle, "fasta")
            batch = None

            chunk_hits = mkstempfname('.hits.txt')
            blastnCmd = [blastnPath, '-db', db, '-word_size', '16', '-evalue', '1e-6', '-outfmt', '6', '-max_target_seqs',
                         '2', '-query', chunk_fasta, '-out', chunk_hits]
            log.debug(' '.join(blastnCmd))
            subprocess.check_call(blastnCmd)

            os.unlink(chunk_fasta)
            hits_files.append(chunk_hits)

    return hits_files
开发者ID:gzluo,项目名称:viral-ngs,代码行数:28,代码来源:taxon_filter.py


示例3: align_and_fix

def align_and_fix(inBam, refFasta, outBamAll=None, outBamFiltered=None,
    novoalign_options='', JVMmemory=None):
    ''' Take reads, align to reference with Novoalign, mark duplicates
        with Picard, realign indels with GATK, and optionally filter
        final file to mapped/non-dupe reads.
    '''
    if not (outBamAll or outBamFiltered):
        log.warn("are you sure you meant to do nothing?")
        return
    
    bam_aligned = mkstempfname('.aligned.bam')
    tools.novoalign.NovoalignTool().execute(
        inBam, refFasta, bam_aligned,
        options=novoalign_options.split(), JVMmemory=JVMmemory)
    
    bam_mkdup = mkstempfname('.mkdup.bam')
    tools.picard.MarkDuplicatesTool().execute(
        [bam_aligned], bam_mkdup,
        picardOptions=['CREATE_INDEX=true'], JVMmemory=JVMmemory)
    os.unlink(bam_aligned)
    
    bam_realigned = mkstempfname('.realigned.bam')
    tools.gatk.GATKTool().local_realign(
        bam_mkdup, refFasta, bam_realigned, JVMmemory=JVMmemory)
    os.unlink(bam_mkdup)
    
    if outBamAll:
        shutil.copyfile(bam_realigned, outBamAll)
        tools.picard.BuildBamIndexTool().execute(outBamAll)
    if outBamFiltered:
        tools.samtools.SamtoolsTool().view(
            ['-b', '-q', '1', '-F', '1028'],
            bam_realigned, outBamFiltered)
        tools.picard.BuildBamIndexTool().execute(outBamFiltered)
    os.unlink(bam_realigned)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:35,代码来源:read_utils.py


示例4: multi_db_deplete_bam

def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam, **kwargs):

    tmpDb = None
    if len(refDbs)>1 and not any(
            not os.path.exists(db)  # indexed db prefix
            or os.path.isdir(db)       # indexed db in directory
            or (os.path.isfile(db) and ('.tar' in db or '.tgz' in db or '.zip' in db)) # packaged indexed db
            for db in refDbs):
        # this is a scenario where all refDbs are unbuilt fasta
        # files. we can simplify and speed up execution by
        # concatenating them all and running deplete_method
        # just once
        tmpDb = mkstempfname('.fasta')
        merge_compressed_files(refDbs, tmpDb, sep='\n')
        refDbs = [tmpDb]

    samtools = tools.samtools.SamtoolsTool()
    tmpBamIn = inBam
    for db in refDbs:
        if not samtools.isEmpty(tmpBamIn):
            tmpBamOut = mkstempfname('.bam')
            deplete_method(tmpBamIn, db, tmpBamOut, **kwargs)
            if tmpBamIn != inBam:
                os.unlink(tmpBamIn)
            tmpBamIn = tmpBamOut
    shutil.copyfile(tmpBamIn, outBam)

    if tmpDb:
        os.unlink(tmpDb)
开发者ID:broadinstitute,项目名称:viral-ngs,代码行数:29,代码来源:taxon_filter.py


示例5: deplete_blastn

def deplete_blastn(inFastq, outFastq, refDbs) :
    'Use blastn to remove reads that match at least one of the databases.'
    
    ## Get tools
    noBlastHits_v3Path = os.path.join(util.file.get_scripts_path(),
                                      'noBlastHits_v3.py')
    
    ## Convert to fasta
    inFasta = mkstempfname('.fasta')
    read_utils.fastq_to_fasta(inFastq, inFasta)
    
    ## Run blastn using each of the databases in turn
    blastOutFiles = []
    for db in refDbs :
        log.info("running blastn on %s against %s", inFastq, db)
        blastOutFiles += blastn_chunked_fasta(inFasta, db)

    ## Combine results from different databases
    blastOutCombined = mkstempfname('.txt')
    catCmd = ['cat'] + blastOutFiles
    log.debug(' '.join(catCmd) + '> ' + blastOutCombined)
    with open(blastOutCombined, 'wt') as outf:
        subprocess.check_call(catCmd, stdout = outf)

    ## run noBlastHits_v3.py to extract reads with no blast hits
    # TODO: slurp the small amount of code in this script into here
    noBlastHitsCmd = ['python', noBlastHits_v3Path, '-b', blastOutCombined,
                     '-r', inFastq, '-m', 'nohit']
    log.debug(' '.join(noBlastHitsCmd) + '> ' + outFastq)
    with util.file.open_or_gzopen(outFastq, 'wt') as outf :
        subprocess.check_call(noBlastHitsCmd, stdout = outf)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:31,代码来源:taxon_filter.py


示例6: trimmomatic

def trimmomatic(inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2,
        clipFasta):
    trimmomaticPath = tools.trimmomatic.TrimmomaticTool().install_and_get_path()
    tmpUnpaired1 = mkstempfname()
    tmpUnpaired2 = mkstempfname()

    #  This java program has a lot of argments...
    javaCmd = ['java', '-Xmx2g',
        '-Djava.io.tmpdir='+tempfile.tempdir,
        '-classpath',
        trimmomaticPath,
        'org.usadellab.trimmomatic.TrimmomaticPE',
        inFastq1,
        inFastq2,
        pairedOutFastq1,
        tmpUnpaired1,
        pairedOutFastq2,
        tmpUnpaired2,
        'LEADING:20', 'TRAILING:20', 'SLIDINGWINDOW:4:25', 'MINLEN:30',
        'ILLUMINACLIP:{}:2:30:12'.format(clipFasta)
        ]

    log.debug(' '.join(javaCmd))
    subprocess.check_call(javaCmd)
    os.unlink(tmpUnpaired1)
    os.unlink(tmpUnpaired2)
开发者ID:mlin,项目名称:viral-ngs,代码行数:26,代码来源:taxon_filter.py


示例7: trimmomatic

def trimmomatic(
    inFastq1,
    inFastq2,
    pairedOutFastq1,
    pairedOutFastq2,
    clipFasta,
    unpairedOutFastq1=None,
    unpairedOutFastq2=None,
    leading_q_cutoff=15,
    trailing_q_cutoff=15,
    minlength_to_keep=30,
    sliding_window_size=4,
    sliding_window_q_cutoff=25
):
    '''Trim read sequences with Trimmomatic.'''
    trimmomaticPath = tools.trimmomatic.TrimmomaticTool().install_and_get_path()
    unpairedFastq1 = unpairedOutFastq1 or mkstempfname()
    unpairedFastq2 = unpairedOutFastq2 or mkstempfname()

    javaCmd = []

    # the conda version wraps the jar file with a shell script
    if trimmomaticPath.endswith(".jar"):
        #  This java program has a lot of argments...
        javaCmd.extend(
            [
                'java', '-Xmx2g', '-Djava.io.tmpdir=' + tempfile.tempdir, '-classpath', trimmomaticPath,
                'org.usadellab.trimmomatic.TrimmomaticPE'
            ]
        )
    else:
        javaCmd.extend([trimmomaticPath, "PE"])

    # Explicitly use Phred-33 quality scores
    javaCmd.extend(['-phred33'])

    javaCmd.extend(
        [
            inFastq1, inFastq2, pairedOutFastq1, unpairedFastq1, pairedOutFastq2, unpairedFastq2,
            'LEADING:{leading_q_cutoff}'.format(leading_q_cutoff=leading_q_cutoff),
            'TRAILING:{trailing_q_cutoff}'.format(trailing_q_cutoff=trailing_q_cutoff),
            'SLIDINGWINDOW:{sliding_window_size}:{sliding_window_q_cutoff}'.format(
                sliding_window_size=sliding_window_size,
                sliding_window_q_cutoff=sliding_window_q_cutoff,
            ), 
            'MINLEN:{minlength_to_keep}'.format(minlength_to_keep=minlength_to_keep),
            'ILLUMINACLIP:{clipFasta}:2:30:12'.format(clipFasta=clipFasta)
        ]
    )

    log.debug(' '.join(javaCmd))
    util.misc.run_and_print(javaCmd, check=True)

    if not unpairedOutFastq1:
        os.unlink(unpairedFastq1)
    if not unpairedOutFastq2:
        os.unlink(unpairedFastq2)
开发者ID:yesimon,项目名称:viral-ngs,代码行数:57,代码来源:taxon_filter.py


示例8: rmdup_mvicuna_bam

def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None):
    ''' Remove duplicate reads from BAM file using M-Vicuna. The
        primary advantage to this approach over Picard's MarkDuplicates tool
        is that Picard requires that input reads are aligned to a reference,
        and M-Vicuna can operate on unaligned reads.
    '''

    # Convert BAM -> FASTQ pairs per read group and load all read groups
    tempDir = tempfile.mkdtemp()
    tools.picard.SamToFastqTool().per_read_group(inBam, tempDir, picardOptions=['VALIDATION_STRINGENCY=LENIENT'])
    read_groups = [x[1:] for x in tools.samtools.SamtoolsTool().getHeader(inBam) if x[0] == '@RG']
    read_groups = [dict(pair.split(':', 1) for pair in rg) for rg in read_groups]

    # Collect FASTQ pairs for each library
    lb_to_files = {}
    for rg in read_groups:
        lb_to_files.setdefault(rg.get('LB', 'none'), set())
        fname = rg['ID']
        if 'PU' in rg:
            fname = rg['PU']
        lb_to_files[rg.get('LB', 'none')].add(os.path.join(tempDir, fname))
    log.info("found %d distinct libraries and %d read groups", len(lb_to_files), len(read_groups))

    # For each library, merge FASTQs and run rmdup for entire library
    readList = mkstempfname('.keep_reads.txt')
    for lb, files in lb_to_files.items():
        log.info("executing M-Vicuna DupRm on library " + lb)

        # create merged FASTQs per library
        infastqs = (mkstempfname('.1.fastq'), mkstempfname('.2.fastq'))
        for d in range(2):
            with open(infastqs[d], 'wt') as outf:
                for fprefix in files:
                    fn = '%s_%d.fastq' % (fprefix, d + 1)
                    if os.path.isfile(fn):
                        with open(fn, 'rt') as inf:
                            for line in inf:
                                outf.write(line)
                        os.unlink(fn)
                    else:
                        log.warn(
                            """no reads found in %s,
                                    assuming that's because there's no reads in that read group""", fn
                        )

        # M-Vicuna DupRm to see what we should keep (append IDs to running file)
        if os.path.getsize(infastqs[0]) > 0 or os.path.getsize(infastqs[1]) > 0:
            mvicuna_fastqs_to_readlist(infastqs[0], infastqs[1], readList)
        for fn in infastqs:
            os.unlink(fn)

    # Filter original input BAM against keep-list
    tools.picard.FilterSamReadsTool().execute(inBam, False, readList, outBam, JVMmemory=JVMmemory)
    return 0
开发者ID:tianyabeef,项目名称:viral-ngs,代码行数:54,代码来源:read_utils.py


示例9: filter_lastal

def filter_lastal(inFastq, refDb, outFastq):
    ''' Restrict input reads to those that align to the given
        reference database using LASTAL.  Also, remove duplicates with prinseq.
    '''
    assert outFastq.endswith('.fastq')
    tempFilePath = mkstempfname('.hits')
    lastalPath = tools.last.Lastal().install_and_get_path()
    mafSortPath = tools.last.MafSort().install_and_get_path()
    mafConvertPath = tools.last.MafConvert().install_and_get_path()
    prinseqPath = tools.prinseq.PrinseqTool().install_and_get_path()
    noBlastLikeHitsPath = os.path.join( util.file.get_scripts_path(),
                                        'noBlastLikeHits.py')

    # each pipe separated cmd gets own line
    # unfortunately, it doesn't seem to work to do .format(**locals()) on the
    # final string as opposed to the individual parts.
    lastalCmd = ' '.join([
        '{lastalPath} -Q1 {refDb} {inFastq}'.format(**locals()),
        '| {mafSortPath} -n2'.format(**locals()),
        '| {mafConvertPath} tab /dev/stdin > {tempFilePath}'.format(**locals()),
        ])
    log.debug(lastalCmd)
    assert not os.system(lastalCmd)

    # filter inFastq against lastal hits
    filteredFastq = mkstempfname('.filtered.fastq')
    with open(filteredFastq, 'wt') as outf:
        noBlastLikeHitsCmd = [
            noBlastLikeHitsPath, '-b', tempFilePath, '-r', inFastq, '-m', 'hit']
        log.debug(' '.join(noBlastLikeHitsCmd) + ' > ' + filteredFastq)
        subprocess.check_call(noBlastLikeHitsCmd, stdout=outf)
    
    # remove duplicate reads and reads with multiple Ns
    if os.path.getsize(filteredFastq) == 0:
        # prinseq-lite fails on empty file input (which can happen in real life
        # if no reads match the refDb) so handle this scenario specially
        log.info("output is empty: no reads in input match refDb")
        shutil.copyfile(filteredFastq, outFastq)
    else:
        prinseqCmd = [
            'perl', prinseqPath,
                '-ns_max_n', '1',
                '-derep', '1',
                '-fastq', filteredFastq,
                '-out_bad', 'null',
                '-line_width', '0',
                '-out_good', outFastq[:-6]
            ]
        log.debug(' '.join(prinseqCmd))
        subprocess.check_call(prinseqCmd)
    os.unlink(filteredFastq)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:51,代码来源:taxon_filter.py


示例10: deplete_blastn_paired

def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs):
    tmpfq1_a = mkstempfname('.fastq')
    tmpfq1_b = mkstempfname('.fastq')
    tmpfq2_b = mkstempfname('.fastq')
    tmpfq2_c = mkstempfname('.fastq')
    # deplete fq1
    deplete_blastn(infq1, tmpfq1_a, refDbs)
    # purge fq2 of read pairs lost in fq1
    # (this should significantly speed up the second run of deplete_blastn)
    read_utils.purge_unmated(tmpfq1_a, infq2, tmpfq1_b, tmpfq2_b)
    # deplete fq2
    deplete_blastn(tmpfq2_b, tmpfq2_c, refDbs)
    # purge fq1 of read pairs lost in fq2
    read_utils.purge_unmated(tmpfq1_b, tmpfq2_c, outfq1, outfq2)
开发者ID:mlin,项目名称:viral-ngs,代码行数:14,代码来源:taxon_filter.py


示例11: deplete_blastn_paired

def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs, threads):
    'Use blastn to remove reads that match at least one of the databases.'
    tmpfq1_a = mkstempfname('.fastq')
    tmpfq1_b = mkstempfname('.fastq')
    tmpfq2_b = mkstempfname('.fastq')
    tmpfq2_c = mkstempfname('.fastq')
    # deplete fq1
    deplete_blastn(infq1, tmpfq1_a, refDbs)
    # purge fq2 of read pairs lost in fq1
    # (this should significantly speed up the second run of deplete_blastn)
    read_utils.purge_unmated(tmpfq1_a, infq2, tmpfq1_b, tmpfq2_b)
    # deplete fq2
    deplete_blastn(tmpfq2_b, tmpfq2_c, refDbs, threads)
    # purge fq1 of read pairs lost in fq2
    read_utils.purge_unmated(tmpfq1_b, tmpfq2_c, outfq1, outfq2)
开发者ID:yesimon,项目名称:viral-ngs,代码行数:15,代码来源:taxon_filter.py


示例12: main_reheader_bams

def main_reheader_bams(args):
    ''' Copy BAM files while renaming elements of the BAM header.
        The mapping file specifies which (key, old value, new value) mappings. For
        example:
            LB  lib1  lib_one
            SM  sample1 Sample_1
            SM  sample2 Sample_2
            SM  sample3 Sample_3
            CN  broad   BI
            FN  in1.bam out1.bam
            FN  in2.bam out2.bam
    '''
    # read mapping file
    mapper = dict((a+':'+b, a+':'+c) for a,b,c in util.file.read_tabfile(args.rgMap) if a != 'FN')
    files = list((b,c) for a,b,c in util.file.read_tabfile(args.rgMap) if a == 'FN')
    header_file = mkstempfname('.sam')
    # read and convert bam headers
    for inBam, outBam in files:
        if os.path.isfile(inBam):
            with open(header_file, 'wt') as outf:
                for row in tools.samtools.SamtoolsTool().getHeader(inBam):
                    if row[0] == '@RG':
                        row = [mapper.get(x, x) for x in row]
                    outf.write('\t'.join(row)+'\n')
            # write new bam with new header
            tools.samtools.SamtoolsTool().reheader(inBam, header_file, outBam)
    os.unlink(header_file)
    return 0
开发者ID:gzluo,项目名称:viral-ngs,代码行数:28,代码来源:read_utils.py


示例13: fastq_to_bam

def fastq_to_bam(inFastq1, inFastq2, outBam, sampleName=None, header=None,
                 JVMmemory=tools.picard.FastqToSamTool.jvmMemDefault, picardOptions=None):
    ''' Convert a pair of fastq paired-end read files and optional text header
        to a single bam file.
    '''
    picardOptions = picardOptions or []

    if header:
        fastqToSamOut = mkstempfname('.bam')
    else:
        fastqToSamOut = outBam
    if sampleName is None:
        sampleName = 'Dummy'  # Will get overwritten by rehead command
    if header:
        # With the header option, rehead will be called after FastqToSam.
        # This will invalidate any md5 file, which would be a slow to construct
        # on our own, so just disallow and let the caller run md5sum if desired.
        if any(opt.lower() == 'CREATE_MD5_FILE=True'.lower() for opt in picardOptions):
            raise Exception("""CREATE_MD5_FILE is not allowed with '--header.'""")
    tools.picard.FastqToSamTool().execute(
        inFastq1,
        inFastq2,
        sampleName,
        fastqToSamOut,
        picardOptions=picardOptions,
        JVMmemory=JVMmemory)

    if header:
        tools.samtools.SamtoolsTool().reheader(fastqToSamOut, header, outBam)

    return 0
开发者ID:gzluo,项目名称:viral-ngs,代码行数:31,代码来源:read_utils.py


示例14: align_and_count_hits

def align_and_count_hits(inBam, refFasta, outCounts, includeZeros=False,
                  JVMmemory=None, threads=1):
    ''' Take reads, align to reference with Novoalign and return aligned
        read counts for each reference sequence.
    '''

    bam_aligned = mkstempfname('.aligned.bam')
    tools.novoalign.NovoalignTool().execute(
        inBam,
        refFasta,
        bam_aligned,
        options=['-r', 'Random'],
        JVMmemory=JVMmemory)

    samtools = tools.samtools.SamtoolsTool()
    seqs = list(dict(x.split(':', 1) for x in row[1:])['SN']
        for row in samtools.getHeader(bam_aligned)
        if row[0]=='@SQ')

    with util.file.open_or_gzopen(outCounts, 'w') as outf:
        for seq in seqs:
            n = samtools.count(bam_aligned, regions=[seq])
            if n>0 or includeZeros:
                outf.write("{}\t{}\n".format(seq, n))

    os.unlink(bam_aligned)
开发者ID:gzluo,项目名称:viral-ngs,代码行数:26,代码来源:read_utils.py


示例15: lastal_get_hits

def lastal_get_hits(
    inFastq,
    db,
    outList,
    max_gapless_alignments_per_position=1,
    min_length_for_initial_matches=5,
    max_length_for_initial_matches=50,
    max_initial_matches_per_position=100
):
    filteredFastq = mkstempfname('.filtered.fastq')
    lastal_chunked_fastq(
        inFastq,
        db,
        filteredFastq,
        max_gapless_alignments_per_position=max_gapless_alignments_per_position,
        min_length_for_initial_matches=min_length_for_initial_matches,
        max_length_for_initial_matches=max_length_for_initial_matches,
        max_initial_matches_per_position=max_initial_matches_per_position
    )

    with open(outList, 'wt') as outf:
        with open(filteredFastq, 'rt') as inf:
            line_num = 0
            for line in inf:
                if (line_num % 4) == 0:
                    seq_id = line.rstrip('\n\r')[1:]
                    if seq_id.endswith('/1') or seq_id.endswith('/2'):
                        seq_id = seq_id[:-2]
                    outf.write(seq_id + '\n')
                line_num += 1

    os.unlink(filteredFastq)
开发者ID:yesimon,项目名称:viral-ngs,代码行数:32,代码来源:taxon_filter.py


示例16: split_bam

def split_bam(inBam, outBams):
    '''Split BAM file equally into several output BAM files. '''
    samtools = tools.samtools.SamtoolsTool()
    picard = tools.picard.PicardTools()

    # get totalReadCount and maxReads
    # maxReads = totalReadCount / num files, but round up to the nearest
    # even number in order to keep read pairs together (assuming the input
    # is sorted in query order and has no unmated reads, which can be
    # accomplished by Picard RevertSam with SANITIZE=true)
    totalReadCount = samtools.count(inBam)
    maxReads = int(math.ceil(float(totalReadCount) / len(outBams) / 2) * 2)
    log.info("splitting %d reads into %d files of %d reads each", totalReadCount, len(outBams), maxReads)

    # load BAM header into memory
    header = samtools.getHeader(inBam)
    if 'SO:queryname' not in header[0]:
        raise Exception('Input BAM file must be sorted in queryame order')

    # dump to bigsam
    bigsam = mkstempfname('.sam')
    samtools.view([], inBam, bigsam)

    # split bigsam into little ones
    with util.file.open_or_gzopen(bigsam, 'rt') as inf:
        for outBam in outBams:
            log.info("preparing file " + outBam)
            tmp_sam_reads = mkstempfname('.sam')
            with open(tmp_sam_reads, 'wt') as outf:
                for row in header:
                    outf.write('\t'.join(row) + '\n')
                for _ in range(maxReads):
                    line = inf.readline()
                    if not line:
                        break
                    outf.write(line)
                if outBam == outBams[-1]:
                    for line in inf:
                        outf.write(line)
            picard.execute(
                "SamFormatConverter", [
                    'INPUT=' + tmp_sam_reads, 'OUTPUT=' + outBam, 'VERBOSITY=WARNING'
                ],
                JVMmemory='512m'
            )
            os.unlink(tmp_sam_reads)
    os.unlink(bigsam)
开发者ID:tianyabeef,项目名称:viral-ngs,代码行数:47,代码来源:read_utils.py


示例17: lastal_get_hits

def lastal_get_hits(inFastq, db, outList):
    lastalPath = tools.last.Lastal().install_and_get_path()
    mafSortPath = tools.last.MafSort().install_and_get_path()
    mafConvertPath = tools.last.MafConvert().install_and_get_path()
    noBlastLikeHitsPath = os.path.join( util.file.get_scripts_path(),
                                        'noBlastLikeHits.py')
    
    lastalOut = mkstempfname('.lastal')
    with open(lastalOut, 'wt') as outf:
        cmd = [lastalPath, '-Q1', db, inFastq]
        log.debug(' '.join(cmd) + ' > ' + lastalOut)
        subprocess.check_call(cmd, stdout=outf)
    # everything below this point in this method should be replaced with
    # our own code that just reads lastal output and makes a list of read names
    
    mafSortOut = mkstempfname('.mafsort')
    with open(mafSortOut, 'wt') as outf:
        with open(lastalOut, 'rt') as inf:
            cmd = [mafSortPath, '-n2']
            log.debug('cat ' + lastalOut + ' | ' + ' '.join(cmd) + ' > ' + mafSortOut)
            subprocess.check_call(cmd, stdin=inf, stdout=outf)
    os.unlink(lastalOut)
    
    mafConvertOut = mkstempfname('.mafconvert')
    with open(mafConvertOut, 'wt') as outf:
        cmd = [mafConvertPath, 'tab', mafSortOut]
        log.debug(' '.join(cmd) + ' > ' + mafConvertOut)
        subprocess.check_call(cmd, stdout=outf)
    os.unlink(mafSortOut)
    
    filteredFastq = mkstempfname('.filtered.fastq')
    with open(filteredFastq, 'wt') as outf:
        cmd = [noBlastLikeHitsPath, '-b', mafConvertOut, '-r', inFastq, '-m', 'hit']
        log.debug(' '.join(cmd) + ' > ' + filteredFastq)
        subprocess.check_call(cmd, stdout=outf)
    os.unlink(mafConvertOut)
    
    with open(outList, 'wt') as outf:
        with open(filteredFastq, 'rt') as inf:
            line_num = 0
            for line in inf:
                if (line_num % 4) == 0:
                    id = line.rstrip('\n\r')[1:]
                    if id.endswith('/1') or id.endswith('/2'):
                        id = id[:-2]
                    outf.write(id+'\n')
                line_num += 1
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:47,代码来源:taxon_filter.py


示例18: multi_db_deplete_bam

def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam):
    tmpBamIn = inBam
    for db in refDbs:
        tmpBamOut = mkstempfname('.bam')
        deplete_method(tmpBamIn, db, tmpBamOut)
        if tmpBamIn != inBam:
            os.unlink(tmpBamIn)
        tmpBamIn = tmpBamOut
    shutil.copyfile(tmpBamIn, outBam)
开发者ID:mlin,项目名称:viral-ngs,代码行数:9,代码来源:taxon_filter.py


示例19: main_deplete_human

def main_deplete_human(args):
    ''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
        Optionally, use lastal to select a specific taxon of interest.'''

    # only RevertSam if inBam is already aligned
    # Most of the time the input will be unaligned
    # so we can save save time if we can skip RevertSam in the unaligned case
    #
    # via the SAM/BAM spec, if the file is aligned, an SQ line should be present
    # in the header. Using pysam, we can check this if header['SQ'])>0
    #   https://samtools.github.io/hts-specs/SAMv1.pdf

    # if the user has requested a revertBam
    revertBamOut = args.revertBam if args.revertBam else mkstempfname('.bam')

    bamToDeplete = args.inBam
    with pysam.AlignmentFile(args.inBam, 'rb', check_sq=False) as bam:
        # if it looks like the bam is aligned, revert it
        if 'SQ' in bam.header and len(bam.header['SQ'])>0:      
            tools.picard.RevertSamTool().execute(
                args.inBam, revertBamOut, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']
            )
            bamToDeplete = revertBamOut
        else:
            # if we don't need to produce a revertBam file
            # but the user has specified one anyway
            # simply touch the output
            if args.revertBam:
                log.warning("An output was specified for 'revertBam', but the input is unaligned, so RevertSam was not needed. Touching the output.")
                util.file.touch(revertBamOut)
                # TODO: error out? run RevertSam anyway?

    multi_db_deplete_bam(
        bamToDeplete,
        args.bmtaggerDbs,
        deplete_bmtagger_bam,
        args.bmtaggerBam,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )

    # if the user has not specified saving a revertBam, we used a temp file and can remove it
    if not args.revertBam:
        os.unlink(revertBamOut)

    read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory)
    multi_db_deplete_bam(
        args.rmdupBam,
        args.blastDbs,
        deplete_blastn_bam,
        args.blastnBam,
        threads=args.threads,
        JVMmemory=args.JVMmemory
    )
    if args.taxfiltBam and args.lastDb:
        filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory)
    return 0
开发者ID:yesimon,项目名称:viral-ngs,代码行数:57,代码来源:taxon_filter.py


示例20: mvicuna_fastqs_to_readlist

def mvicuna_fastqs_to_readlist(inFastq1, inFastq2, readList):
    # Run M-Vicuna on FASTQ files
    outFastq1 = mkstempfname('.1.fastq')
    outFastq2 = mkstempfname('.2.fastq')
    tools.mvicuna.MvicunaTool().rmdup((inFastq1, inFastq2), (outFastq1, outFastq2), None)

    # Make a list of reads to keep
    with open(readList, 'at') as outf:
        for fq in (outFastq1, outFastq2):
            with util.file.open_or_gzopen(fq, 'rt') as inf:
                line_num = 0
                for line in inf:
                    if (line_num % 4) == 0:
                        idVal = line.rstrip('\n')[1:]
                        if idVal.endswith('/1'):
                            outf.write(idVal[:-2] + '\n')
                    line_num += 1
    os.unlink(outFastq1)
    os.unlink(outFastq2)
开发者ID:gzluo,项目名称:viral-ngs,代码行数:19,代码来源:read_utils.py



注:本文中的util.file.mkstempfname函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python forest.get_attributedelta_type函数代码示例发布时间:2022-05-26
下一篇:
Python encode.encodestring函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap