本文整理汇总了Python中pyfastaq.utils.close函数的典型用法代码示例。如果您正苦于以下问题:Python close函数的具体用法?Python close怎么用?Python close使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了close函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: acgtn_only
def acgtn_only(infile, outfile):
'''Replace every non-acgtn (case insensitve) character with an N'''
f = utils.open_file_write(outfile)
for seq in sequences.file_reader(infile):
seq.replace_non_acgt()
print(seq, file=f)
utils.close(f)
开发者ID:satta,项目名称:Fastaq,代码行数:7,代码来源:tasks.py
示例2: stats_from_fai
def stats_from_fai(infile):
'''Returns dictionary of length stats from an fai file. Keys are: longest, shortest, mean, total_length, N50, number'''
f = utils.open_file_read(infile)
try:
lengths = sorted([int(line.split('\t')[1]) for line in f], reverse=True)
except:
raise Error('Error getting lengths from fai file ' + infile)
utils.close(f)
stats = {}
if len(lengths) > 0:
stats['longest'] = max(lengths)
stats['shortest'] = min(lengths)
stats['total_length'] = sum(lengths)
stats['mean'] = stats['total_length'] / len(lengths)
stats['number'] = len(lengths)
cumulative_length = 0
for length in lengths:
cumulative_length += length
if cumulative_length >= 0.5 * stats['total_length']:
stats['N50'] = length
break
else:
stats = {x: 0 for x in ('longest', 'shortest', 'mean', 'N50', 'total_length', 'number')}
return stats
开发者ID:satta,项目名称:Fastaq,代码行数:27,代码来源:tasks.py
示例3: run
def run(description):
parser = argparse.ArgumentParser(
description = 'Takes a random subset of reads from a sequence file and optionally the corresponding read ' +
'from a mates file. Output is interleaved if mates file given',
usage = 'fastaq to_random_subset [options] <infile> <outfile> <percent>')
parser.add_argument('--mate_file', help='Name of mates file')
parser.add_argument('--seed', help='Seed for random number generator. If not given, python\'s default is used', metavar='INT')
parser.add_argument('infile', help='Name of input file')
parser.add_argument('outfile', help='Name of output file')
parser.add_argument('percent', type=float, help='Per cent probability of keeping any given read (pair) in [0,100]', metavar='FLOAT')
options = parser.parse_args()
random.seed(a=options.seed)
seq_reader = sequences.file_reader(options.infile)
fout = utils.open_file_write(options.outfile)
if options.mate_file:
mate_seq_reader = sequences.file_reader(options.mate_file)
for seq in seq_reader:
if options.mate_file:
try:
mate_seq = next(mate_seq_reader)
except StopIteration:
print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
sys.exit(1)
if 100 * random.random() <= options.percent:
print(seq, file=fout)
if options.mate_file:
print(mate_seq, file=fout)
utils.close(fout)
开发者ID:martinghunt,项目名称:Fastaq,代码行数:32,代码来源:to_random_subset.py
示例4: trim_contigs
def trim_contigs(infile, outfile, trim):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
if len(seq) < 2 * trim:
continue
gaps = seq.gaps()
bases = list(seq.seq)
# extend the length of each gap
for gap in gaps:
left_start = max(gap.start - trim, 0)
right_end = min(gap.end + trim + 1, len(seq))
for i in range(left_start, gap.start):
bases[i] = 'N'
for i in range(gap.end, right_end):
bases[i] = 'N'
seq.seq = ''.join(bases)
# trim start/end bases and tidy up any resulting Ns at either end of the trimmed seq
seq.trim(trim, trim)
seq.trim_Ns()
# check that there is some non-N sequence left over
regex = re.compile('[^nN]')
if regex.search(seq.seq) is not None:
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:34,代码来源:tasks.py
示例5: fix_blast_coords
def fix_blast_coords(blast_file, coords_file, outfile):
coords_offset = offset_coords_file_to_dict(coords_file)
fin = utils.open_file_read(blast_file)
fout = utils.open_file_write(outfile)
for line in fin:
# blastn sticks a bunch of header lines in the tabulated
# output file. Need to ignore them
if '\t' not in line:
continue
# Lines are supposed to be tab delimited. Sometimes they
# have a space character following a tab character, so
# split on whitespace. This is OK because the pipeline has already
# removed whitespace from sequence names
data = line.rstrip().split()
if data[0] in coords_offset:
data[6] = str(int(data[6]) + coords_offset[data[0]][1])
data[7] = str(int(data[7]) + coords_offset[data[0]][1])
data[0] = coords_offset[data[0]][0]
# always reconstruct the line, because of spaces bug mentioned above
line = '\t'.join(data)
print(line.rstrip(),file=fout)
utils.close(fin)
utils.close(fout)
开发者ID:martinghunt,项目名称:Farm_blast,代码行数:27,代码来源:utils.py
示例6: to_fasta
def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False, check_unique=False):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
original_line_length = sequences.Fasta.line_length
sequences.Fasta.line_length = line_length
if check_unique:
used_names = {}
for seq in seq_reader:
if strip_after_first_whitespace:
seq.strip_after_first_whitespace()
if check_unique:
used_names[seq.id] = used_names.get(seq.id, 0) + 1
if type(seq) == sequences.Fastq:
print(sequences.Fasta(seq.id, seq.seq), file=f_out)
else:
print(seq, file=f_out)
utils.close(f_out)
sequences.Fasta.line_length = original_line_length
if check_unique:
all_unique = True
for name, count in used_names.items():
if count > 1:
print('Sequence name "' + name + '" not unique. Found', count, 'times', file=sys.stderr)
all_unique = False
if not all_unique:
raise Error('Not all sequence names unique. Cannot continue')
开发者ID:satta,项目名称:Fastaq,代码行数:33,代码来源:tasks.py
示例7: test_get_next_from_file
def test_get_next_from_file(self):
'''get_next_from_file() should read seqs from OK, and raise error at badly formatted file'''
bad_files = ['sequences_test_fail_no_AT.fq',
'sequences_test_fail_no_seq.fq',
'sequences_test_fail_no_plus.fq',
'sequences_test_fail_no_qual.fq']
bad_files = [os.path.join(data_dir, x) for x in bad_files]
for fname in bad_files:
f_in = utils.open_file_read(fname)
fq = sequences.Fastq()
with self.assertRaises(sequences.Error):
while fq.get_next_from_file(f_in):
pass
utils.close(f_in)
fname = os.path.join(data_dir, 'sequences_test_good_file.fq')
try:
f_in = open(fname)
except IOError:
print("Error opening '" + fname + "'", file=sys.stderr)
sys.exit(1)
fq = sequences.Fastq()
while fq.get_next_from_file(f_in):
self.assertEqual(fq, sequences.Fastq('ID', 'ACGTA', 'IIIII'))
utils.close(f_in)
开发者ID:martinghunt,项目名称:Fastaq,代码行数:29,代码来源:sequences_test.py
示例8: file_reader
def file_reader(fname):
f = utils.open_file_read(fname)
c = Caf()
while c.get_next_from_file(f):
yield c
utils.close(f)
开发者ID:martinghunt,项目名称:Fastaq,代码行数:8,代码来源:caf.py
示例9: translate
def translate(infile, outfile, frame=0):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
print(seq.translate(frame=frame), file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:8,代码来源:tasks.py
示例10: reverse_complement
def reverse_complement(infile, outfile):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.revcomp()
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:9,代码来源:tasks.py
示例11: strip_illumina_suffix
def strip_illumina_suffix(infile, outfile):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
for seq in seq_reader:
seq.strip_illumina_suffix()
print(seq, file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:9,代码来源:tasks.py
示例12: replace_bases
def replace_bases(infile, outfile, old, new):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
for seq in seq_reader:
seq.replace_bases(old, new)
print(seq, file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:9,代码来源:tasks.py
示例13: trim_Ns_at_end
def trim_Ns_at_end(infile, outfile):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.trim_Ns()
if len(seq):
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例14: search_for_seq
def search_for_seq(infile, outfile, search_string):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
hits = seq.search(search_string)
for hit in hits:
print(seq.id, hit[0]+1, hit[1], sep='\t', file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例15: sort_by_name
def sort_by_name(infile, outfile):
'''Sorts input sequence file by sort -d -k1,1, writes sorted output file.'''
seqs = {}
file_to_dict(infile, seqs)
#seqs = list(seqs.values())
#seqs.sort()
fout = utils.open_file_write(outfile)
for name in sorted(seqs):
print(seqs[name], file=fout)
utils.close(fout)
开发者ID:martinghunt,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例16: to_fasta_union
def to_fasta_union(infile, outfile, seqname='union'):
seq_reader = sequences.file_reader(infile)
new_seq = []
for seq in seq_reader:
new_seq.append(seq.seq)
f_out = utils.open_file_write(outfile)
print(sequences.Fasta(seqname, ''.join(new_seq)), file=f_out)
utils.close(f_out)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例17: test_get_next_from_embl_file
def test_get_next_from_embl_file(self):
f_in = utils.open_file_read(os.path.join(data_dir, 'sequences_test.embl'))
embl = sequences.Embl()
counter = 1
while embl.get_next_from_file(f_in):
self.assertEqual(embl, sequences.Fasta('seq' + str(counter), expected_embl[counter-1]))
counter += 1
utils.close(f_in)
开发者ID:martinghunt,项目名称:Fastaq,代码行数:10,代码来源:sequences_test.py
示例18: trim
def trim(infile, outfile, start, end):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.trim(start, end)
if len(seq):
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例19: sort_by_size
def sort_by_size(infile, outfile, smallest_first=False):
'''Sorts input sequence file by biggest sequence first, writes sorted output file. Set smallest_first=True to have smallest first'''
seqs = {}
file_to_dict(infile, seqs)
seqs = list(seqs.values())
seqs.sort(key=lambda x: len(x), reverse=not smallest_first)
fout = utils.open_file_write(outfile)
for seq in seqs:
print(seq, file=fout)
utils.close(fout)
开发者ID:nds,项目名称:Fastaq,代码行数:10,代码来源:tasks.py
示例20: make_long_reads
def make_long_reads(infile, outfile, method='tiling', fixed_read_length=20000, tile_step=10000, gamma_shape=1.2, gamma_scale=6000, coverage=10, gamma_min_length=20000, seed=None, ins_skip=None, ins_window=None,):
assert method in ['tiling', 'gamma', 'uniform']
assert ins_skip == ins_window == None or None not in [ins_skip, ins_window]
if seed is not None:
random.seed(a=seed)
seq_reader = sequences.file_reader(infile)
f = utils.open_file_write(outfile)
for seq in seq_reader:
if method == 'tiling':
if len(seq) < fixed_read_length:
print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
continue
for i in range(0, len(seq), tile_step):
end = min(len(seq), i + fixed_read_length)
fa = sequences.Fasta('_'.join([seq.id, str(i + 1), str(end)]), seq[i:end])
if ins_skip:
fa.add_insertions(skip=ins_skip, window=ins_window)
print(fa, file=f)
if end >= len(seq):
break
elif method == 'gamma':
if len(seq) < gamma_min_length:
print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
continue
total_read_length = 0
while total_read_length < coverage * len(seq) - 0.5 * gamma_min_length:
read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale))
while read_length < gamma_min_length or read_length > len(seq):
read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale))
start = random.randint(0, len(seq) - read_length)
end = start + read_length - 1
fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1])
total_read_length += len(fa)
if ins_skip:
fa.add_insertions(skip=ins_skip, window=ins_window)
print(fa, file=f)
elif method == 'uniform':
if len(seq) < fixed_read_length:
print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
continue
total_read_length = 0
while total_read_length < coverage * len(seq) - 0.5 * fixed_read_length:
start = random.randint(0, len(seq) - fixed_read_length)
end = start + fixed_read_length - 1
fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1])
total_read_length += len(fa)
if ins_skip:
fa.add_insertions(skip=ins_skip, window=ins_window)
print(fa, file=f)
utils.close(f)
开发者ID:nds,项目名称:Fastaq,代码行数:54,代码来源:tasks.py
注:本文中的pyfastaq.utils.close函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论