本文整理汇总了Python中pyfastaq.sequences.file_reader函数的典型用法代码示例。如果您正苦于以下问题:Python file_reader函数的具体用法?Python file_reader怎么用?Python file_reader使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了file_reader函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: run
def run(description):
parser = argparse.ArgumentParser(
description = 'Takes a random subset of reads from a sequence file and optionally the corresponding read ' +
'from a mates file. Output is interleaved if mates file given',
usage = 'fastaq to_random_subset [options] <infile> <outfile> <percent>')
parser.add_argument('--mate_file', help='Name of mates file')
parser.add_argument('--seed', help='Seed for random number generator. If not given, python\'s default is used', metavar='INT')
parser.add_argument('infile', help='Name of input file')
parser.add_argument('outfile', help='Name of output file')
parser.add_argument('percent', type=float, help='Per cent probability of keeping any given read (pair) in [0,100]', metavar='FLOAT')
options = parser.parse_args()
random.seed(a=options.seed)
seq_reader = sequences.file_reader(options.infile)
fout = utils.open_file_write(options.outfile)
if options.mate_file:
mate_seq_reader = sequences.file_reader(options.mate_file)
for seq in seq_reader:
if options.mate_file:
try:
mate_seq = next(mate_seq_reader)
except StopIteration:
print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
sys.exit(1)
if 100 * random.random() <= options.percent:
print(seq, file=fout)
if options.mate_file:
print(mate_seq, file=fout)
utils.close(fout)
开发者ID:martinghunt,项目名称:Fastaq,代码行数:32,代码来源:to_random_subset.py
示例2: test_file_reader_gff
def test_file_reader_gff(self):
'''Test read gff file'''
good_files = [
'sequences_test_gffv3.gff',
'sequences_test_gffv3.no_FASTA_line.gff'
]
good_files = [os.path.join(data_dir, x) for x in good_files]
for f in good_files:
reader = sequences.file_reader(f)
counter = 1
for seq in reader:
self.assertEqual(seq, sequences.Fasta('seq' + str(counter), 'ACGTACGTAC'))
counter += 1
bad_files = [
'sequences_test_gffv3.no_seq.gff',
'sequences_test_gffv3.no_seq.2.gff'
]
bad_files = [os.path.join(data_dir, x) for x in bad_files]
for filename in bad_files:
with self.assertRaises(sequences.Error):
reader = sequences.file_reader(filename)
for seq in reader:
pass
开发者ID:martinghunt,项目名称:Fastaq,代码行数:26,代码来源:sequences_test.py
示例3: interleave
def interleave(infile_1, infile_2, outfile):
seq_reader_1 = sequences.file_reader(infile_1)
seq_reader_2 = sequences.file_reader(infile_2)
f_out = utils.open_file_write(outfile)
for seq_1 in seq_reader_1:
try:
seq_2 = next(seq_reader_2)
except:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_1.id, ' ... cannot continue')
print(seq_1, file=f_out)
print(seq_2, file=f_out)
try:
seq_2 = next(seq_reader_2)
except:
seq_2 = None
if seq_2 is not None:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_2.id, ' ... cannot continue')
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:25,代码来源:tasks.py
示例4: interleave
def interleave(infile_1, infile_2, outfile, suffix1=None, suffix2=None):
'''Makes interleaved file from two sequence files. If used, will append suffix1 onto end
of every sequence name in infile_1, unless it already ends with suffix1. Similar for sufffix2.'''
seq_reader_1 = sequences.file_reader(infile_1)
seq_reader_2 = sequences.file_reader(infile_2)
f_out = utils.open_file_write(outfile)
for seq_1 in seq_reader_1:
try:
seq_2 = next(seq_reader_2)
except:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_1.id, ' ... cannot continue')
if suffix1 is not None and not seq_1.id.endswith(suffix1):
seq_1.id += suffix1
if suffix2 is not None and not seq_2.id.endswith(suffix2):
seq_2.id += suffix2
print(seq_1, file=f_out)
print(seq_2, file=f_out)
try:
seq_2 = next(seq_reader_2)
except:
seq_2 = None
if seq_2 is not None:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_2.id, ' ... cannot continue')
utils.close(f_out)
开发者ID:satta,项目名称:Fastaq,代码行数:32,代码来源:tasks.py
示例5: filter
def filter(
infile,
outfile,
minlength=0,
maxlength=float('inf'),
regex=None,
ids_file=None,
invert=False,
mate_in=None,
mate_out=None,
both_mates_pass=True,
):
ids_from_file = set()
if ids_file is not None:
f = utils.open_file_read(ids_file)
for line in f:
ids_from_file.add(line.rstrip())
utils.close(f)
if mate_in:
if mate_out is None:
raise Error('Error in filter! mate_in provided. Must also provide mate_out')
seq_reader_mate = sequences.file_reader(mate_in)
f_out_mate = utils.open_file_write(mate_out)
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
if regex is not None:
r = re.compile(regex)
def passes(seq):
return minlength <= len(seq) <= maxlength \
and (regex is None or r.search(seq.id) is not None) \
and (ids_file is None or seq.id in ids_from_file)
for seq in seq_reader:
seq_passes = passes(seq)
if mate_in:
try:
seq_mate = next(seq_reader_mate)
except:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq.id, ' ... cannot continue')
mate_passes = passes(seq_mate)
want_the_pair = (seq_passes and mate_passes) \
or (( seq_passes or mate_passes) and not both_mates_pass)
if want_the_pair != invert:
print(seq, file=f_out)
print(seq_mate, file=f_out_mate)
elif seq_passes != invert:
print(seq, file=f_out)
utils.close(f_out)
if mate_in:
utils.close(f_out_mate)
开发者ID:nds,项目名称:Fastaq,代码行数:58,代码来源:tasks.py
示例6: fasta_to_fastq
def fasta_to_fastq(fasta_in, qual_in, outfile):
fa_reader = sequences.file_reader(fasta_in)
qual_reader = sequences.file_reader(qual_in, read_quals=True)
f_out = utils.open_file_write(outfile)
for seq in fa_reader:
qual = next(qual_reader)
if seq.id != qual.id:
utils.close(f_out)
raise Error('Mismatch in names from fasta and qual file', seq.id, qual.id)
qual.seq = [int(x) for x in qual.seq.split()]
print(seq.to_Fastq(qual.seq), file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:15,代码来源:tasks.py
示例7: acgtn_only
def acgtn_only(infile, outfile):
'''Replace every non-acgtn (case insensitve) character with an N'''
f = utils.open_file_write(outfile)
for seq in sequences.file_reader(infile):
seq.replace_non_acgt()
print(seq, file=f)
utils.close(f)
开发者ID:satta,项目名称:Fastaq,代码行数:7,代码来源:tasks.py
示例8: count_sequences
def count_sequences(infile):
'''Returns the number of sequences in a file'''
seq_reader = sequences.file_reader(infile)
n = 0
for seq in seq_reader:
n += 1
return n
开发者ID:nds,项目名称:Fastaq,代码行数:7,代码来源:tasks.py
示例9: trim_contigs
def trim_contigs(infile, outfile, trim):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
if len(seq) < 2 * trim:
continue
gaps = seq.gaps()
bases = list(seq.seq)
# extend the length of each gap
for gap in gaps:
left_start = max(gap.start - trim, 0)
right_end = min(gap.end + trim + 1, len(seq))
for i in range(left_start, gap.start):
bases[i] = 'N'
for i in range(gap.end, right_end):
bases[i] = 'N'
seq.seq = ''.join(bases)
# trim start/end bases and tidy up any resulting Ns at either end of the trimmed seq
seq.trim(trim, trim)
seq.trim_Ns()
# check that there is some non-N sequence left over
regex = re.compile('[^nN]')
if regex.search(seq.seq) is not None:
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:34,代码来源:tasks.py
示例10: test_file_reader_fasta
def test_file_reader_fasta(self):
'''file_reader should iterate through a fasta file correctly'''
reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test.fa'))
counter = 1
for seq in reader:
self.assertEqual(seq, sequences.Fasta(str(counter), 'ACGTA'))
counter += 1
开发者ID:martinghunt,项目名称:Fastaq,代码行数:7,代码来源:sequences_test.py
示例11: to_fasta
def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False, check_unique=False):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
original_line_length = sequences.Fasta.line_length
sequences.Fasta.line_length = line_length
if check_unique:
used_names = {}
for seq in seq_reader:
if strip_after_first_whitespace:
seq.strip_after_first_whitespace()
if check_unique:
used_names[seq.id] = used_names.get(seq.id, 0) + 1
if type(seq) == sequences.Fastq:
print(sequences.Fasta(seq.id, seq.seq), file=f_out)
else:
print(seq, file=f_out)
utils.close(f_out)
sequences.Fasta.line_length = original_line_length
if check_unique:
all_unique = True
for name, count in used_names.items():
if count > 1:
print('Sequence name "' + name + '" not unique. Found', count, 'times', file=sys.stderr)
all_unique = False
if not all_unique:
raise Error('Not all sequence names unique. Cannot continue')
开发者ID:satta,项目名称:Fastaq,代码行数:33,代码来源:tasks.py
示例12: translate
def translate(infile, outfile, frame=0):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
print(seq.translate(frame=frame), file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:8,代码来源:tasks.py
示例13: reverse_complement
def reverse_complement(infile, outfile):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.revcomp()
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:9,代码来源:tasks.py
示例14: replace_bases
def replace_bases(infile, outfile, old, new):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
for seq in seq_reader:
seq.replace_bases(old, new)
print(seq, file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:9,代码来源:tasks.py
示例15: strip_illumina_suffix
def strip_illumina_suffix(infile, outfile):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
for seq in seq_reader:
seq.strip_illumina_suffix()
print(seq, file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:9,代码来源:tasks.py
示例16: split_by_fixed_size
def split_by_fixed_size(infile, outfiles_prefix, chunk_size, tolerance, skip_if_all_Ns=False):
'''Splits fasta/q file into separate files, with up to (chunk_size + tolerance) bases in each file'''
file_count = 1
coords = []
small_sequences = [] # sequences shorter than chunk_size
seq_reader = sequences.file_reader(infile)
f_coords = utils.open_file_write(outfiles_prefix + '.coords')
for seq in seq_reader:
if skip_if_all_Ns and seq.is_all_Ns():
continue
if len(seq) < chunk_size:
small_sequences.append(copy.copy(seq))
elif len(seq) <= chunk_size + tolerance:
f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
print(seq, file=f)
utils.close(f)
file_count += 1
else:
# make list of chunk coords
chunks = [(x,x+chunk_size) for x in range(0, len(seq), chunk_size)]
if chunks[-1][1] - 1 > len(seq):
chunks[-1] = (chunks[-1][0], len(seq))
if len(chunks) > 1 and (chunks[-1][1] - chunks[-1][0]) <= tolerance:
chunks[-2] = (chunks[-2][0], chunks[-1][1])
chunks.pop()
# write one output file per chunk
offset = 0
for chunk in chunks:
if not(skip_if_all_Ns and seq.is_all_Ns(start=chunk[0], end=chunk[1]-1)):
f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
chunk_id = seq.id + ':' + str(chunk[0]+1) + '-' + str(chunk[1])
print(sequences.Fasta(chunk_id, seq[chunk[0]:chunk[1]]), file=f)
print(chunk_id, seq.id, offset, sep='\t', file=f_coords)
utils.close(f)
file_count += 1
offset += chunk[1] - chunk[0]
# write files of small sequences
if len(small_sequences):
f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
file_count += 1
base_count = 0
for seq in small_sequences:
if base_count > 0 and base_count + len(seq) > chunk_size + tolerance:
utils.close(f)
f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
file_count += 1
base_count = 0
print(seq, file=f)
base_count += len(seq)
utils.close(f)
开发者ID:nds,项目名称:Fastaq,代码行数:56,代码来源:tasks.py
示例17: trim
def trim(infile, outfile, start, end):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.trim(start, end)
if len(seq):
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例18: search_for_seq
def search_for_seq(infile, outfile, search_string):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
hits = seq.search(search_string)
for hit in hits:
print(seq.id, hit[0]+1, hit[1], sep='\t', file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例19: to_fasta_union
def to_fasta_union(infile, outfile, seqname='union'):
seq_reader = sequences.file_reader(infile)
new_seq = []
for seq in seq_reader:
new_seq.append(seq.seq)
f_out = utils.open_file_write(outfile)
print(sequences.Fasta(seqname, ''.join(new_seq)), file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例20: trim_Ns_at_end
def trim_Ns_at_end(infile, outfile):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.trim_Ns()
if len(seq):
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
注:本文中的pyfastaq.sequences.file_reader函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论