本文整理汇总了Python中util.file.mkstempfname函数的典型用法代码示例。如果您正苦于以下问题:Python mkstempfname函数的具体用法?Python mkstempfname怎么用?Python mkstempfname使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mkstempfname函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: filter_lastal_bam
def filter_lastal_bam(inBam, db, outBam, JVMmemory=None):
''' Restrict input reads to those that align to the given
reference database using LASTAL.
'''
# convert BAM to paired FASTQ
inReads1 = mkstempfname('.1.fastq')
inReads2 = mkstempfname('.2.fastq')
tools.picard.SamToFastqTool().execute(inBam, inReads1, inReads2)
# look for hits in inReads1 and inReads2
hitList1 = mkstempfname('.1.hits')
hitList2 = mkstempfname('.2.hits')
lastal_get_hits(inReads1, db, hitList1)
os.unlink(inReads1)
lastal_get_hits(inReads2, db, hitList2)
os.unlink(inReads2)
# merge hits
hitList = mkstempfname('.hits')
with open(hitList, 'wt') as outf:
subprocess.check_call(['sort', '-u', hitList1, hitList2], stdout=outf)
os.unlink(hitList1)
os.unlink(hitList2)
# filter original BAM file against keep list
tools.picard.FilterSamReadsTool().execute(inBam, False, hitList, outBam, JVMmemory=JVMmemory)
os.unlink(hitList)
开发者ID:gzluo,项目名称:viral-ngs,代码行数:27,代码来源:taxon_filter.py
示例2: blastn_chunked_fasta
def blastn_chunked_fasta(fasta, db, chunkSize=1000000):
"""
Helper function: blastn a fasta file, overcoming apparent memory leaks on
an input with many query sequences, by splitting it into multiple chunks
and running a new blastn process on each chunk. Return a list of output
filenames containing hits
"""
blastnPath = tools.blast.BlastnTool().install_and_get_path()
hits_files = []
with open(fasta, "rt") as fastaFile:
record_iter = SeqIO.parse(fastaFile, "fasta")
for batch in batch_iterator(record_iter, chunkSize):
chunk_fasta = mkstempfname('.fasta')
with open(chunk_fasta, "wt") as handle:
SeqIO.write(batch, handle, "fasta")
batch = None
chunk_hits = mkstempfname('.hits.txt')
blastnCmd = [blastnPath, '-db', db, '-word_size', '16', '-evalue', '1e-6', '-outfmt', '6', '-max_target_seqs',
'2', '-query', chunk_fasta, '-out', chunk_hits]
log.debug(' '.join(blastnCmd))
subprocess.check_call(blastnCmd)
os.unlink(chunk_fasta)
hits_files.append(chunk_hits)
return hits_files
开发者ID:gzluo,项目名称:viral-ngs,代码行数:28,代码来源:taxon_filter.py
示例3: align_and_fix
def align_and_fix(inBam, refFasta, outBamAll=None, outBamFiltered=None,
novoalign_options='', JVMmemory=None):
''' Take reads, align to reference with Novoalign, mark duplicates
with Picard, realign indels with GATK, and optionally filter
final file to mapped/non-dupe reads.
'''
if not (outBamAll or outBamFiltered):
log.warn("are you sure you meant to do nothing?")
return
bam_aligned = mkstempfname('.aligned.bam')
tools.novoalign.NovoalignTool().execute(
inBam, refFasta, bam_aligned,
options=novoalign_options.split(), JVMmemory=JVMmemory)
bam_mkdup = mkstempfname('.mkdup.bam')
tools.picard.MarkDuplicatesTool().execute(
[bam_aligned], bam_mkdup,
picardOptions=['CREATE_INDEX=true'], JVMmemory=JVMmemory)
os.unlink(bam_aligned)
bam_realigned = mkstempfname('.realigned.bam')
tools.gatk.GATKTool().local_realign(
bam_mkdup, refFasta, bam_realigned, JVMmemory=JVMmemory)
os.unlink(bam_mkdup)
if outBamAll:
shutil.copyfile(bam_realigned, outBamAll)
tools.picard.BuildBamIndexTool().execute(outBamAll)
if outBamFiltered:
tools.samtools.SamtoolsTool().view(
['-b', '-q', '1', '-F', '1028'],
bam_realigned, outBamFiltered)
tools.picard.BuildBamIndexTool().execute(outBamFiltered)
os.unlink(bam_realigned)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:35,代码来源:read_utils.py
示例4: multi_db_deplete_bam
def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam, **kwargs):
tmpDb = None
if len(refDbs)>1 and not any(
not os.path.exists(db) # indexed db prefix
or os.path.isdir(db) # indexed db in directory
or (os.path.isfile(db) and ('.tar' in db or '.tgz' in db or '.zip' in db)) # packaged indexed db
for db in refDbs):
# this is a scenario where all refDbs are unbuilt fasta
# files. we can simplify and speed up execution by
# concatenating them all and running deplete_method
# just once
tmpDb = mkstempfname('.fasta')
merge_compressed_files(refDbs, tmpDb, sep='\n')
refDbs = [tmpDb]
samtools = tools.samtools.SamtoolsTool()
tmpBamIn = inBam
for db in refDbs:
if not samtools.isEmpty(tmpBamIn):
tmpBamOut = mkstempfname('.bam')
deplete_method(tmpBamIn, db, tmpBamOut, **kwargs)
if tmpBamIn != inBam:
os.unlink(tmpBamIn)
tmpBamIn = tmpBamOut
shutil.copyfile(tmpBamIn, outBam)
if tmpDb:
os.unlink(tmpDb)
开发者ID:broadinstitute,项目名称:viral-ngs,代码行数:29,代码来源:taxon_filter.py
示例5: deplete_blastn
def deplete_blastn(inFastq, outFastq, refDbs) :
'Use blastn to remove reads that match at least one of the databases.'
## Get tools
noBlastHits_v3Path = os.path.join(util.file.get_scripts_path(),
'noBlastHits_v3.py')
## Convert to fasta
inFasta = mkstempfname('.fasta')
read_utils.fastq_to_fasta(inFastq, inFasta)
## Run blastn using each of the databases in turn
blastOutFiles = []
for db in refDbs :
log.info("running blastn on %s against %s", inFastq, db)
blastOutFiles += blastn_chunked_fasta(inFasta, db)
## Combine results from different databases
blastOutCombined = mkstempfname('.txt')
catCmd = ['cat'] + blastOutFiles
log.debug(' '.join(catCmd) + '> ' + blastOutCombined)
with open(blastOutCombined, 'wt') as outf:
subprocess.check_call(catCmd, stdout = outf)
## run noBlastHits_v3.py to extract reads with no blast hits
# TODO: slurp the small amount of code in this script into here
noBlastHitsCmd = ['python', noBlastHits_v3Path, '-b', blastOutCombined,
'-r', inFastq, '-m', 'nohit']
log.debug(' '.join(noBlastHitsCmd) + '> ' + outFastq)
with util.file.open_or_gzopen(outFastq, 'wt') as outf :
subprocess.check_call(noBlastHitsCmd, stdout = outf)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:31,代码来源:taxon_filter.py
示例6: trimmomatic
def trimmomatic(inFastq1, inFastq2, pairedOutFastq1, pairedOutFastq2,
clipFasta):
trimmomaticPath = tools.trimmomatic.TrimmomaticTool().install_and_get_path()
tmpUnpaired1 = mkstempfname()
tmpUnpaired2 = mkstempfname()
# This java program has a lot of argments...
javaCmd = ['java', '-Xmx2g',
'-Djava.io.tmpdir='+tempfile.tempdir,
'-classpath',
trimmomaticPath,
'org.usadellab.trimmomatic.TrimmomaticPE',
inFastq1,
inFastq2,
pairedOutFastq1,
tmpUnpaired1,
pairedOutFastq2,
tmpUnpaired2,
'LEADING:20', 'TRAILING:20', 'SLIDINGWINDOW:4:25', 'MINLEN:30',
'ILLUMINACLIP:{}:2:30:12'.format(clipFasta)
]
log.debug(' '.join(javaCmd))
subprocess.check_call(javaCmd)
os.unlink(tmpUnpaired1)
os.unlink(tmpUnpaired2)
开发者ID:mlin,项目名称:viral-ngs,代码行数:26,代码来源:taxon_filter.py
示例7: trimmomatic
def trimmomatic(
inFastq1,
inFastq2,
pairedOutFastq1,
pairedOutFastq2,
clipFasta,
unpairedOutFastq1=None,
unpairedOutFastq2=None,
leading_q_cutoff=15,
trailing_q_cutoff=15,
minlength_to_keep=30,
sliding_window_size=4,
sliding_window_q_cutoff=25
):
'''Trim read sequences with Trimmomatic.'''
trimmomaticPath = tools.trimmomatic.TrimmomaticTool().install_and_get_path()
unpairedFastq1 = unpairedOutFastq1 or mkstempfname()
unpairedFastq2 = unpairedOutFastq2 or mkstempfname()
javaCmd = []
# the conda version wraps the jar file with a shell script
if trimmomaticPath.endswith(".jar"):
# This java program has a lot of argments...
javaCmd.extend(
[
'java', '-Xmx2g', '-Djava.io.tmpdir=' + tempfile.tempdir, '-classpath', trimmomaticPath,
'org.usadellab.trimmomatic.TrimmomaticPE'
]
)
else:
javaCmd.extend([trimmomaticPath, "PE"])
# Explicitly use Phred-33 quality scores
javaCmd.extend(['-phred33'])
javaCmd.extend(
[
inFastq1, inFastq2, pairedOutFastq1, unpairedFastq1, pairedOutFastq2, unpairedFastq2,
'LEADING:{leading_q_cutoff}'.format(leading_q_cutoff=leading_q_cutoff),
'TRAILING:{trailing_q_cutoff}'.format(trailing_q_cutoff=trailing_q_cutoff),
'SLIDINGWINDOW:{sliding_window_size}:{sliding_window_q_cutoff}'.format(
sliding_window_size=sliding_window_size,
sliding_window_q_cutoff=sliding_window_q_cutoff,
),
'MINLEN:{minlength_to_keep}'.format(minlength_to_keep=minlength_to_keep),
'ILLUMINACLIP:{clipFasta}:2:30:12'.format(clipFasta=clipFasta)
]
)
log.debug(' '.join(javaCmd))
util.misc.run_and_print(javaCmd, check=True)
if not unpairedOutFastq1:
os.unlink(unpairedFastq1)
if not unpairedOutFastq2:
os.unlink(unpairedFastq2)
开发者ID:yesimon,项目名称:viral-ngs,代码行数:57,代码来源:taxon_filter.py
示例8: rmdup_mvicuna_bam
def rmdup_mvicuna_bam(inBam, outBam, JVMmemory=None):
''' Remove duplicate reads from BAM file using M-Vicuna. The
primary advantage to this approach over Picard's MarkDuplicates tool
is that Picard requires that input reads are aligned to a reference,
and M-Vicuna can operate on unaligned reads.
'''
# Convert BAM -> FASTQ pairs per read group and load all read groups
tempDir = tempfile.mkdtemp()
tools.picard.SamToFastqTool().per_read_group(inBam, tempDir, picardOptions=['VALIDATION_STRINGENCY=LENIENT'])
read_groups = [x[1:] for x in tools.samtools.SamtoolsTool().getHeader(inBam) if x[0] == '@RG']
read_groups = [dict(pair.split(':', 1) for pair in rg) for rg in read_groups]
# Collect FASTQ pairs for each library
lb_to_files = {}
for rg in read_groups:
lb_to_files.setdefault(rg.get('LB', 'none'), set())
fname = rg['ID']
if 'PU' in rg:
fname = rg['PU']
lb_to_files[rg.get('LB', 'none')].add(os.path.join(tempDir, fname))
log.info("found %d distinct libraries and %d read groups", len(lb_to_files), len(read_groups))
# For each library, merge FASTQs and run rmdup for entire library
readList = mkstempfname('.keep_reads.txt')
for lb, files in lb_to_files.items():
log.info("executing M-Vicuna DupRm on library " + lb)
# create merged FASTQs per library
infastqs = (mkstempfname('.1.fastq'), mkstempfname('.2.fastq'))
for d in range(2):
with open(infastqs[d], 'wt') as outf:
for fprefix in files:
fn = '%s_%d.fastq' % (fprefix, d + 1)
if os.path.isfile(fn):
with open(fn, 'rt') as inf:
for line in inf:
outf.write(line)
os.unlink(fn)
else:
log.warn(
"""no reads found in %s,
assuming that's because there's no reads in that read group""", fn
)
# M-Vicuna DupRm to see what we should keep (append IDs to running file)
if os.path.getsize(infastqs[0]) > 0 or os.path.getsize(infastqs[1]) > 0:
mvicuna_fastqs_to_readlist(infastqs[0], infastqs[1], readList)
for fn in infastqs:
os.unlink(fn)
# Filter original input BAM against keep-list
tools.picard.FilterSamReadsTool().execute(inBam, False, readList, outBam, JVMmemory=JVMmemory)
return 0
开发者ID:tianyabeef,项目名称:viral-ngs,代码行数:54,代码来源:read_utils.py
示例9: filter_lastal
def filter_lastal(inFastq, refDb, outFastq):
''' Restrict input reads to those that align to the given
reference database using LASTAL. Also, remove duplicates with prinseq.
'''
assert outFastq.endswith('.fastq')
tempFilePath = mkstempfname('.hits')
lastalPath = tools.last.Lastal().install_and_get_path()
mafSortPath = tools.last.MafSort().install_and_get_path()
mafConvertPath = tools.last.MafConvert().install_and_get_path()
prinseqPath = tools.prinseq.PrinseqTool().install_and_get_path()
noBlastLikeHitsPath = os.path.join( util.file.get_scripts_path(),
'noBlastLikeHits.py')
# each pipe separated cmd gets own line
# unfortunately, it doesn't seem to work to do .format(**locals()) on the
# final string as opposed to the individual parts.
lastalCmd = ' '.join([
'{lastalPath} -Q1 {refDb} {inFastq}'.format(**locals()),
'| {mafSortPath} -n2'.format(**locals()),
'| {mafConvertPath} tab /dev/stdin > {tempFilePath}'.format(**locals()),
])
log.debug(lastalCmd)
assert not os.system(lastalCmd)
# filter inFastq against lastal hits
filteredFastq = mkstempfname('.filtered.fastq')
with open(filteredFastq, 'wt') as outf:
noBlastLikeHitsCmd = [
noBlastLikeHitsPath, '-b', tempFilePath, '-r', inFastq, '-m', 'hit']
log.debug(' '.join(noBlastLikeHitsCmd) + ' > ' + filteredFastq)
subprocess.check_call(noBlastLikeHitsCmd, stdout=outf)
# remove duplicate reads and reads with multiple Ns
if os.path.getsize(filteredFastq) == 0:
# prinseq-lite fails on empty file input (which can happen in real life
# if no reads match the refDb) so handle this scenario specially
log.info("output is empty: no reads in input match refDb")
shutil.copyfile(filteredFastq, outFastq)
else:
prinseqCmd = [
'perl', prinseqPath,
'-ns_max_n', '1',
'-derep', '1',
'-fastq', filteredFastq,
'-out_bad', 'null',
'-line_width', '0',
'-out_good', outFastq[:-6]
]
log.debug(' '.join(prinseqCmd))
subprocess.check_call(prinseqCmd)
os.unlink(filteredFastq)
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:51,代码来源:taxon_filter.py
示例10: deplete_blastn_paired
def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs):
tmpfq1_a = mkstempfname('.fastq')
tmpfq1_b = mkstempfname('.fastq')
tmpfq2_b = mkstempfname('.fastq')
tmpfq2_c = mkstempfname('.fastq')
# deplete fq1
deplete_blastn(infq1, tmpfq1_a, refDbs)
# purge fq2 of read pairs lost in fq1
# (this should significantly speed up the second run of deplete_blastn)
read_utils.purge_unmated(tmpfq1_a, infq2, tmpfq1_b, tmpfq2_b)
# deplete fq2
deplete_blastn(tmpfq2_b, tmpfq2_c, refDbs)
# purge fq1 of read pairs lost in fq2
read_utils.purge_unmated(tmpfq1_b, tmpfq2_c, outfq1, outfq2)
开发者ID:mlin,项目名称:viral-ngs,代码行数:14,代码来源:taxon_filter.py
示例11: deplete_blastn_paired
def deplete_blastn_paired(infq1, infq2, outfq1, outfq2, refDbs, threads):
'Use blastn to remove reads that match at least one of the databases.'
tmpfq1_a = mkstempfname('.fastq')
tmpfq1_b = mkstempfname('.fastq')
tmpfq2_b = mkstempfname('.fastq')
tmpfq2_c = mkstempfname('.fastq')
# deplete fq1
deplete_blastn(infq1, tmpfq1_a, refDbs)
# purge fq2 of read pairs lost in fq1
# (this should significantly speed up the second run of deplete_blastn)
read_utils.purge_unmated(tmpfq1_a, infq2, tmpfq1_b, tmpfq2_b)
# deplete fq2
deplete_blastn(tmpfq2_b, tmpfq2_c, refDbs, threads)
# purge fq1 of read pairs lost in fq2
read_utils.purge_unmated(tmpfq1_b, tmpfq2_c, outfq1, outfq2)
开发者ID:yesimon,项目名称:viral-ngs,代码行数:15,代码来源:taxon_filter.py
示例12: main_reheader_bams
def main_reheader_bams(args):
''' Copy BAM files while renaming elements of the BAM header.
The mapping file specifies which (key, old value, new value) mappings. For
example:
LB lib1 lib_one
SM sample1 Sample_1
SM sample2 Sample_2
SM sample3 Sample_3
CN broad BI
FN in1.bam out1.bam
FN in2.bam out2.bam
'''
# read mapping file
mapper = dict((a+':'+b, a+':'+c) for a,b,c in util.file.read_tabfile(args.rgMap) if a != 'FN')
files = list((b,c) for a,b,c in util.file.read_tabfile(args.rgMap) if a == 'FN')
header_file = mkstempfname('.sam')
# read and convert bam headers
for inBam, outBam in files:
if os.path.isfile(inBam):
with open(header_file, 'wt') as outf:
for row in tools.samtools.SamtoolsTool().getHeader(inBam):
if row[0] == '@RG':
row = [mapper.get(x, x) for x in row]
outf.write('\t'.join(row)+'\n')
# write new bam with new header
tools.samtools.SamtoolsTool().reheader(inBam, header_file, outBam)
os.unlink(header_file)
return 0
开发者ID:gzluo,项目名称:viral-ngs,代码行数:28,代码来源:read_utils.py
示例13: fastq_to_bam
def fastq_to_bam(inFastq1, inFastq2, outBam, sampleName=None, header=None,
JVMmemory=tools.picard.FastqToSamTool.jvmMemDefault, picardOptions=None):
''' Convert a pair of fastq paired-end read files and optional text header
to a single bam file.
'''
picardOptions = picardOptions or []
if header:
fastqToSamOut = mkstempfname('.bam')
else:
fastqToSamOut = outBam
if sampleName is None:
sampleName = 'Dummy' # Will get overwritten by rehead command
if header:
# With the header option, rehead will be called after FastqToSam.
# This will invalidate any md5 file, which would be a slow to construct
# on our own, so just disallow and let the caller run md5sum if desired.
if any(opt.lower() == 'CREATE_MD5_FILE=True'.lower() for opt in picardOptions):
raise Exception("""CREATE_MD5_FILE is not allowed with '--header.'""")
tools.picard.FastqToSamTool().execute(
inFastq1,
inFastq2,
sampleName,
fastqToSamOut,
picardOptions=picardOptions,
JVMmemory=JVMmemory)
if header:
tools.samtools.SamtoolsTool().reheader(fastqToSamOut, header, outBam)
return 0
开发者ID:gzluo,项目名称:viral-ngs,代码行数:31,代码来源:read_utils.py
示例14: align_and_count_hits
def align_and_count_hits(inBam, refFasta, outCounts, includeZeros=False,
JVMmemory=None, threads=1):
''' Take reads, align to reference with Novoalign and return aligned
read counts for each reference sequence.
'''
bam_aligned = mkstempfname('.aligned.bam')
tools.novoalign.NovoalignTool().execute(
inBam,
refFasta,
bam_aligned,
options=['-r', 'Random'],
JVMmemory=JVMmemory)
samtools = tools.samtools.SamtoolsTool()
seqs = list(dict(x.split(':', 1) for x in row[1:])['SN']
for row in samtools.getHeader(bam_aligned)
if row[0]=='@SQ')
with util.file.open_or_gzopen(outCounts, 'w') as outf:
for seq in seqs:
n = samtools.count(bam_aligned, regions=[seq])
if n>0 or includeZeros:
outf.write("{}\t{}\n".format(seq, n))
os.unlink(bam_aligned)
开发者ID:gzluo,项目名称:viral-ngs,代码行数:26,代码来源:read_utils.py
示例15: lastal_get_hits
def lastal_get_hits(
inFastq,
db,
outList,
max_gapless_alignments_per_position=1,
min_length_for_initial_matches=5,
max_length_for_initial_matches=50,
max_initial_matches_per_position=100
):
filteredFastq = mkstempfname('.filtered.fastq')
lastal_chunked_fastq(
inFastq,
db,
filteredFastq,
max_gapless_alignments_per_position=max_gapless_alignments_per_position,
min_length_for_initial_matches=min_length_for_initial_matches,
max_length_for_initial_matches=max_length_for_initial_matches,
max_initial_matches_per_position=max_initial_matches_per_position
)
with open(outList, 'wt') as outf:
with open(filteredFastq, 'rt') as inf:
line_num = 0
for line in inf:
if (line_num % 4) == 0:
seq_id = line.rstrip('\n\r')[1:]
if seq_id.endswith('/1') or seq_id.endswith('/2'):
seq_id = seq_id[:-2]
outf.write(seq_id + '\n')
line_num += 1
os.unlink(filteredFastq)
开发者ID:yesimon,项目名称:viral-ngs,代码行数:32,代码来源:taxon_filter.py
示例16: split_bam
def split_bam(inBam, outBams):
'''Split BAM file equally into several output BAM files. '''
samtools = tools.samtools.SamtoolsTool()
picard = tools.picard.PicardTools()
# get totalReadCount and maxReads
# maxReads = totalReadCount / num files, but round up to the nearest
# even number in order to keep read pairs together (assuming the input
# is sorted in query order and has no unmated reads, which can be
# accomplished by Picard RevertSam with SANITIZE=true)
totalReadCount = samtools.count(inBam)
maxReads = int(math.ceil(float(totalReadCount) / len(outBams) / 2) * 2)
log.info("splitting %d reads into %d files of %d reads each", totalReadCount, len(outBams), maxReads)
# load BAM header into memory
header = samtools.getHeader(inBam)
if 'SO:queryname' not in header[0]:
raise Exception('Input BAM file must be sorted in queryame order')
# dump to bigsam
bigsam = mkstempfname('.sam')
samtools.view([], inBam, bigsam)
# split bigsam into little ones
with util.file.open_or_gzopen(bigsam, 'rt') as inf:
for outBam in outBams:
log.info("preparing file " + outBam)
tmp_sam_reads = mkstempfname('.sam')
with open(tmp_sam_reads, 'wt') as outf:
for row in header:
outf.write('\t'.join(row) + '\n')
for _ in range(maxReads):
line = inf.readline()
if not line:
break
outf.write(line)
if outBam == outBams[-1]:
for line in inf:
outf.write(line)
picard.execute(
"SamFormatConverter", [
'INPUT=' + tmp_sam_reads, 'OUTPUT=' + outBam, 'VERBOSITY=WARNING'
],
JVMmemory='512m'
)
os.unlink(tmp_sam_reads)
os.unlink(bigsam)
开发者ID:tianyabeef,项目名称:viral-ngs,代码行数:47,代码来源:read_utils.py
示例17: lastal_get_hits
def lastal_get_hits(inFastq, db, outList):
lastalPath = tools.last.Lastal().install_and_get_path()
mafSortPath = tools.last.MafSort().install_and_get_path()
mafConvertPath = tools.last.MafConvert().install_and_get_path()
noBlastLikeHitsPath = os.path.join( util.file.get_scripts_path(),
'noBlastLikeHits.py')
lastalOut = mkstempfname('.lastal')
with open(lastalOut, 'wt') as outf:
cmd = [lastalPath, '-Q1', db, inFastq]
log.debug(' '.join(cmd) + ' > ' + lastalOut)
subprocess.check_call(cmd, stdout=outf)
# everything below this point in this method should be replaced with
# our own code that just reads lastal output and makes a list of read names
mafSortOut = mkstempfname('.mafsort')
with open(mafSortOut, 'wt') as outf:
with open(lastalOut, 'rt') as inf:
cmd = [mafSortPath, '-n2']
log.debug('cat ' + lastalOut + ' | ' + ' '.join(cmd) + ' > ' + mafSortOut)
subprocess.check_call(cmd, stdin=inf, stdout=outf)
os.unlink(lastalOut)
mafConvertOut = mkstempfname('.mafconvert')
with open(mafConvertOut, 'wt') as outf:
cmd = [mafConvertPath, 'tab', mafSortOut]
log.debug(' '.join(cmd) + ' > ' + mafConvertOut)
subprocess.check_call(cmd, stdout=outf)
os.unlink(mafSortOut)
filteredFastq = mkstempfname('.filtered.fastq')
with open(filteredFastq, 'wt') as outf:
cmd = [noBlastLikeHitsPath, '-b', mafConvertOut, '-r', inFastq, '-m', 'hit']
log.debug(' '.join(cmd) + ' > ' + filteredFastq)
subprocess.check_call(cmd, stdout=outf)
os.unlink(mafConvertOut)
with open(outList, 'wt') as outf:
with open(filteredFastq, 'rt') as inf:
line_num = 0
for line in inf:
if (line_num % 4) == 0:
id = line.rstrip('\n\r')[1:]
if id.endswith('/1') or id.endswith('/2'):
id = id[:-2]
outf.write(id+'\n')
line_num += 1
开发者ID:ACEGID-Senegal,项目名称:viral-ngs,代码行数:47,代码来源:taxon_filter.py
示例18: multi_db_deplete_bam
def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam):
tmpBamIn = inBam
for db in refDbs:
tmpBamOut = mkstempfname('.bam')
deplete_method(tmpBamIn, db, tmpBamOut)
if tmpBamIn != inBam:
os.unlink(tmpBamIn)
tmpBamIn = tmpBamOut
shutil.copyfile(tmpBamIn, outBam)
开发者ID:mlin,项目名称:viral-ngs,代码行数:9,代码来源:taxon_filter.py
示例19: main_deplete_human
def main_deplete_human(args):
''' Run the entire depletion pipeline: bmtagger, mvicuna, blastn.
Optionally, use lastal to select a specific taxon of interest.'''
# only RevertSam if inBam is already aligned
# Most of the time the input will be unaligned
# so we can save save time if we can skip RevertSam in the unaligned case
#
# via the SAM/BAM spec, if the file is aligned, an SQ line should be present
# in the header. Using pysam, we can check this if header['SQ'])>0
# https://samtools.github.io/hts-specs/SAMv1.pdf
# if the user has requested a revertBam
revertBamOut = args.revertBam if args.revertBam else mkstempfname('.bam')
bamToDeplete = args.inBam
with pysam.AlignmentFile(args.inBam, 'rb', check_sq=False) as bam:
# if it looks like the bam is aligned, revert it
if 'SQ' in bam.header and len(bam.header['SQ'])>0:
tools.picard.RevertSamTool().execute(
args.inBam, revertBamOut, picardOptions=['SORT_ORDER=queryname', 'SANITIZE=true']
)
bamToDeplete = revertBamOut
else:
# if we don't need to produce a revertBam file
# but the user has specified one anyway
# simply touch the output
if args.revertBam:
log.warning("An output was specified for 'revertBam', but the input is unaligned, so RevertSam was not needed. Touching the output.")
util.file.touch(revertBamOut)
# TODO: error out? run RevertSam anyway?
multi_db_deplete_bam(
bamToDeplete,
args.bmtaggerDbs,
deplete_bmtagger_bam,
args.bmtaggerBam,
threads=args.threads,
JVMmemory=args.JVMmemory
)
# if the user has not specified saving a revertBam, we used a temp file and can remove it
if not args.revertBam:
os.unlink(revertBamOut)
read_utils.rmdup_mvicuna_bam(args.bmtaggerBam, args.rmdupBam, JVMmemory=args.JVMmemory)
multi_db_deplete_bam(
args.rmdupBam,
args.blastDbs,
deplete_blastn_bam,
args.blastnBam,
threads=args.threads,
JVMmemory=args.JVMmemory
)
if args.taxfiltBam and args.lastDb:
filter_lastal_bam(args.blastnBam, args.lastDb, args.taxfiltBam, JVMmemory=args.JVMmemory)
return 0
开发者ID:yesimon,项目名称:viral-ngs,代码行数:57,代码来源:taxon_filter.py
示例20: mvicuna_fastqs_to_readlist
def mvicuna_fastqs_to_readlist(inFastq1, inFastq2, readList):
# Run M-Vicuna on FASTQ files
outFastq1 = mkstempfname('.1.fastq')
outFastq2 = mkstempfname('.2.fastq')
tools.mvicuna.MvicunaTool().rmdup((inFastq1, inFastq2), (outFastq1, outFastq2), None)
# Make a list of reads to keep
with open(readList, 'at') as outf:
for fq in (outFastq1, outFastq2):
with util.file.open_or_gzopen(fq, 'rt') as inf:
line_num = 0
for line in inf:
if (line_num % 4) == 0:
idVal = line.rstrip('\n')[1:]
if idVal.endswith('/1'):
outf.write(idVal[:-2] + '\n')
line_num += 1
os.unlink(outFastq1)
os.unlink(outFastq2)
开发者ID:gzluo,项目名称:viral-ngs,代码行数:19,代码来源:read_utils.py
注:本文中的util.file.mkstempfname函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论