本文整理汇总了Python中toolshed.nopen函数的典型用法代码示例。如果您正苦于以下问题:Python nopen函数的具体用法?Python nopen怎么用?Python nopen使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了nopen函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main(prefix, name_re, min_samples, methylation_files):
name_re = re.compile(r"%s" % name_re)
if not prefix.endswith((".", "/")): prefix += "."
fhm = nopen('{prefix}methylation.txt.gz'.format(prefix=prefix), 'w')
fhme = nopen('{prefix}methylated.txt.gz'.format(prefix=prefix), 'w')
fhc = nopen('{prefix}counts.txt.gz'.format(prefix=prefix), 'w')
def source_from_fname(fname):
try:
return name_re.search(fname).groups(0)[0]
except:
return op.basename(fname)
iterables = [gen_iterable(f, source_from_fname) for f in methylation_files]
sources = [source_from_fname(f) for f in methylation_files]
fmt = "{chrom}:{start}\t{vals}\n"
fhm.write("probe\t%s" % "\t".join(sources) + "\n")
fhc.write("probe\t%s" % "\t".join(sources) + "\n")
fhme.write("probe\t%s" % "\t".join(sources) + "\n")
for chrom, start, end, values, counts, meths in bed_merge(iterables, sources):
if sum(tryfloat(v) > 0 for v in values) < min_samples: continue
vals = "\t".join(values)
fhm.write(fmt.format(chrom=chrom, start=start, vals=vals))
counts = "\t".join(counts)
fhc.write(fmt.format(chrom=chrom, start=start, vals=counts))
meths = "\t".join(meths)
fhme.write(fmt.format(chrom=chrom, start=start, vals=meths))
开发者ID:brentp,项目名称:clustermodel,代码行数:28,代码来源:meth-matrix.py
示例2: main
def main(args):
tags = {}
if args.verbose:
sys.stderr.write(">> reading in tag sequences...\n")
with nopen(args.tags) as fasta:
for name, seq in read_fasta(fasta):
tags[name] = seq
i = 0
for fx in args.reads:
if args.verbose:
sys.stderr.write(">> processing %s...\n" % op.basename(fx))
# process either fasta or fastq.
if ".fasta" in fx or ".fa" in fx:
with nopen(fx) as fa:
for f_id, f_seq in read_fasta(fa):
i += 1
if i % 1000000 == 0 and args.verbose:
sys.stderr.write(">> processed %d reads...\n" % i)
print_record(tags, f_id, f_seq)
else:
with nopen(fx) as fq:
for f_id, f_seq, f_qual in read_fastq(fq):
i += 1
if i % 1000000 == 0 and args.verbose:
sys.stderr.write(">> processed %d reads...\n" % i)
print_record(tags, f_id, f_seq)
开发者ID:brwnj,项目名称:repertoire,代码行数:26,代码来源:find_seeds.py
示例3: main
def main(args):
# fields from issake
fields = "contig_id length reads avg_coverage seed v_region j_region".split()
# the only fields i believe make any sense to keep
out = "id v_region j_region length reads avg_coverage percent_of_total sequence".split()
# total reads used in assembly
total = 0.
with nopen(args.fasta_in) as fasta:
for name, seq in read_fasta(fasta):
name = name.replace("size","").replace("cov","").replace("read","").replace("seed:","")
d = dict(zip(fields, name.split("|")))
total += int(d['reads'])
with nopen(args.fasta_in) as fasta,\
open(args.fasta_out, 'wb') as fasta_out,\
open(args.meta, 'wb') as meta:
# print header
meta.write("\t".join(out) + "\n")
for i, (name, seq) in enumerate(read_fasta(fasta)):
# remove some text from iSSAKE output
name = name.replace("size","").replace("cov","").replace("read","").replace("seed:","")
d = dict(zip(fields, name.split("|")))
# want to shorten the read names
d['id'] = "contig_%d" % i
d['percent_of_total'] = "%.6g" % (100 * (int(d['reads']) / total))
d['sequence'] = seq
meta.write("\t".join(map(str, [d[o] for o in out])) + "\n")
write_fasta(fasta_out, d['id'], seq.upper())
开发者ID:brwnj,项目名称:repertoire,代码行数:27,代码来源:reads2meta.py
示例4: _set_structure
def _set_structure(self, structure):
"""
here, we want to intersect the query and subject bed files with the
structure.bed file and give each set of intervals in query and bed
that fall within (or have any overlap with) a unique, fake chromosome
so that all shuffling is within that chromosome.
in order to do this, we also have to create a fake genome file that
contains the lengths of those chromosomes.
"""
if structure in (None, ""): return
self.chrom = True # has to be by chromosome.
n_query_before = sum(1 for _ in nopen(self.query))
n_subject_before = sum(1 for _ in nopen(self.subject))
new_genome = open(mktemp(suffix='.fake_genome'), 'w')
structure = "<(cut -f 1-3 %s)" % structure
seen_segs = {}
for bed in ('query', 'subject', 'exclude', 'include'):
bed_path = getattr(self, "_" + bed, getattr(self, bed))
if not bed_path: continue
new_fh = open(mktemp(suffix='%s.fake' % bed), 'w')
for toks in reader("|bedtools intersect -wo -a %s -b '%s' \
| sort -k4,4 -k5,5g" % (structure, bed_path), header=False):
gtoks, btoks = toks[:3], toks[3:-1] # drop the bp overlap
new_chrom = "_".join(gtoks)
gtoks[1:] = map(int, gtoks[1:])
btoks[1:3] = map(int, btoks[1:3])
glen = gtoks[2] - gtoks[1] # fake chrom length.
if new_chrom.startswith('chr'): new_chrom = new_chrom[3:]
if not new_chrom in seen_segs:
# save it in the genome file.
print >>new_genome, "\t".join((new_chrom, str(glen)))
seen_segs[new_chrom] = True
# with partial overlap, we'll have a negative start or an
# end outside the genome... for now, just truncate.
# adjust the interval to its location the new chrom.
btoks[0] = new_chrom
btoks[1] = max(0, btoks[1] - gtoks[1]) # don't let it go below 0
# chop to end of fake chrom.
btoks[2] = min(btoks[2] - gtoks[1], glen - 1)
assert 0 <= btoks[1] <= btoks[2] < glen
btoks[1:3] = map(str, btoks[1:3])
print >>new_fh, "\t".join(btoks)
new_fh.close()
setattr(self, bed, new_fh.name)
new_genome.close()
self.genome_file = new_genome.name
开发者ID:brentp,项目名称:shuffler,代码行数:52,代码来源:shuffler.py
示例5: extend_bed
def extend_bed(fin, fout, bases):
# we're extending both a.bed and b.bed by this distance
# so divide by 2.
bases /= 2
with nopen(fout, 'w') as fh:
for toks in (l.rstrip("\r\n").split("\t") for l in nopen(fin)):
toks[1] = max(0, int(toks[1]) - bases)
toks[2] = max(0, int(toks[2]) + bases)
if toks[1] > toks[2]: # negative distances
toks[1] = toks[2] = (toks[1] + toks[2]) / 2
assert toks[1] <= toks[2]
print >>fh, "\t".join(map(str, toks))
return fh.name
开发者ID:daler,项目名称:poverlap,代码行数:13,代码来源:poverlap.py
示例6: readfx
def readfx(fastx):
with nopen(fastx) as fp:
last = None
while True:
if not last:
for l in fp:
if l[0] in '>@':
last = l[:-1]
break
if not last: break
name, seqs, last = last[1:].partition(" ")[0], [], None
for l in fp:
if l[0] in '@+>':
last = l[:-1]
break
seqs.append(l[:-1])
if not last or last[0] != '+':
yield name, ''.join(seqs), None
if not last: break
else:
seq, leng, seqs = ''.join(seqs), 0, []
for l in fp:
seqs.append(l[:-1])
leng += len(l) - 1
if leng >= len(seq):
last = None
yield name, seq, ''.join(seqs);
break
if last:
yield name, seq, None
break
开发者ID:hjanime,项目名称:scgc_scripts,代码行数:31,代码来源:fastx.py
示例7: main
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("-p", dest="pvals", help="BED containing all the p values"
" used to generate `regions`")
p.add_argument("-r", dest="regions", help="BED containing all the regions")
p.add_argument("-s", dest="step", type=int, default=50,
help="step size for acf calculation. should be the same "
" value as the step sent to -d arg for acf")
p.add_argument("--mlog", dest="mlog", action="store_true",
default=False, help="do the correlation on the -log10 of"
"the p-values. Default is to do it on the raw values")
p.add_argument("-N", dest="N", help="number of simulations to perform",
type=int, default=0)
p.add_argument("-c", dest="c", help="column number containing the p-value"
" of interest", type=int, default=-1)
p.add_argument("-z", dest="z", help="use z-score correction",
action="store_true")
args = p.parse_args()
if not (args.regions and args.pvals):
import sys
sys.exit(not p.print_help())
from toolshed import nopen
header = nopen(args.regions).next()
if header.startswith("#") or (not header.split("\t")[2].isdigit()):
print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)
return run(args)
开发者ID:ngthomas,项目名称:combined-pvalues,代码行数:27,代码来源:region_p.py
示例8: intersect
def intersect(ref, xref, peaks):
if xref:
xref = xref_to_dict(xref)
# group the output by chr->gene->start
cmd = ("|bedtools intersect -wb -a {peaks} -b {ref} "
"| sort -k1,1 -k8,8 -k2,2n").format(**locals())
cols = ['chrom','start','stop','peak','_chrom',
'_start','_stop','gene','_score','strand']
tmp = open(tempfile.mkstemp(suffix=".bed")[1], 'wb')
for g in grouper(nopen(cmd), cols):
negs = []
for i, l in enumerate(unique_everseen(\
g, lambda t: ret_item(t, cols, 'peak')), start=1):
l = lparser(l, cols)
# negative stranded sites
if l['strand'] == "-":
# need to count through them up, saving l each time
negs.append(l)
continue
# positive stranded sites
print >>tmp, "\t".join(get_out(l, i, xref))
for i, l in izip(count(len(negs), -1), negs):
print >>tmp, "\t".join(get_out(l, i, xref))
tmp.close()
return tmp.name
开发者ID:jayhesselberth,项目名称:polya,代码行数:25,代码来源:merge_sites.py
示例9: local_shuffle
def local_shuffle(bed, loc='500000'):
"""
Randomize the location of each interval in `bed` by moving its
start location to within `loc` bp of its current location or to
its containing interval in `loc`.
Arguments:
bed - input bed file
loc - shuffle intervals to within this distance (+ or -).
If not an integer, then this should be a BED file containing
regions such that each interval in `bed` is shuffled within
its containing interval in `loc`
"""
from random import randint
if str(loc).isdigit():
dist = abs(int(loc))
with nopen(bed) as fh:
for toks in (l.rstrip('\r\n').split('\t') for l in fh):
d = randint(-dist, dist)
toks[1:3] = [str(max(0, int(bloc) + d)) for bloc in toks[1:3]]
print "\t".join(toks)
else:
# we are using dist as the windows within which to shuffle
assert os.path.exists(loc)
bed4 = mktemp()
with open(bed4, 'w') as fh:
# this step is so we don't have to track the number of columns in A
for toks in reader(bed, header=False):
fh.write("%s\t%s\n" % ("\t".join(toks[:3]), SEP.join(toks)))
missing = 0
# we first find the b-interval that contains each a-interval by
# using bedtools intersect
for toks in reader("|bedtools intersect -wao -a {bed4} -b {loc}"
.format(**locals()), header=False):
ajoin = toks[:4]
a = ajoin[3].split(SEP) # extract the full interval
b = toks[4:]
if int(b[-1]) == 0:
missing += 1
continue
assert a[0] == b[0], ('chroms dont match', a, b)
alen = int(a[2]) - int(a[1])
# doesn't care if the new interval is completely contained in b
astart = randint(int(b[1]), int(b[2]))
# subtract half the time.
aend = (astart - alen) if randint(0, 1) == 0 and astart > alen \
else (astart + alen)
a[1], a[2] = map(str, (astart, aend) if astart < aend
else (aend, astart))
print "\t".join(a)
if missing > 0:
print >> sys.stderr, ("found {missing} intervals in {bed} that "
" were not contained in {loc}"
.format(**locals()))
开发者ID:astatham,项目名称:poverlap,代码行数:60,代码来源:poverlap.py
示例10: main
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument("-p", dest="pvals", help="BED containing all the p values"
" used to generate `regions`")
p.add_argument("-r", dest="regions", help="BED containing all the regions")
p.add_argument("-s", "--step", dest="step", type=int, default=50,
help="step size for acf calculation. should be the same "
" value as the step sent to -d arg for acf")
p.add_argument("-c", dest="c", help="column number containing the p-value"
" of interest", type=str, default=-1)
p.add_argument("-z", dest="z", help="use z-score correction",
action="store_true")
args = p.parse_args()
if not (args.regions and args.pvals):
import sys
sys.exit(not p.print_help())
header = ts.nopen(args.regions).next()
if header.startswith("#") or (not header.split("\t")[2].isdigit()):
print "%s\tslk_p\tslk_sidak_p" % (header.rstrip("\r\n"),)
header = ts.header(args.pvals)
if args.c in header:
args.c = header.index(args.c) + 1
else:
args.c = int(args.c)
return run(args)
开发者ID:JinfengChen,项目名称:combined-pvalues,代码行数:27,代码来源:region_p.py
示例11: run_metric
def run_metric(cmd, metric=None):
"""
Metric can be a string, e.g. "wc -l" or a python callable that consumes
lines of input and returns a single value.
e.g.
def mymetric(fh):
val = 0
for line in fh:
val += float(line.split("\t")[4])
return val
The lines sent to the metric function will be the result of bedtools
intersect -wa -- so that both the -a and -b intervals will be present
in each line.
"""
if metric is None:
cmd, metric = cmd
if isinstance(metric, basestring):
return float(run("%s | %s" % (cmd, metric)))
else:
proc = nopen("|%s" % cmd, mode=None)
res = metric(proc.stdout)
check_proc(proc, cmd)
assert isinstance(res, (int, float))
return res
开发者ID:astatham,项目名称:poverlap,代码行数:27,代码来源:poverlap.py
示例12: read_regions
def read_regions(fregions):
if not fregions: return None
regions = {}
for toks in (l.split("\t") for l in ts.nopen(fregions) if l[0] != "#"):
if not toks[0] in regions: regions[toks[0]] = []
regions[toks[0]].append((int(toks[1]), int(toks[2])))
return regions
开发者ID:JinfengChen,项目名称:combined-pvalues,代码行数:7,代码来源:manhattan.py
示例13: tofile
def tofile(fiter, fname):
fh = nopen(fname, "w")
for line in fiter:
print >>fh, line.rstrip("\r\n")
fh.close()
atexit.register(os.unlink, fname)
return fname
开发者ID:brentp,项目名称:shuffler,代码行数:7,代码来源:__main__.py
示例14: protein
def protein(self):
from toolshed import nopen
l = "http://genome.ucsc.edu/cgi-bin/hgGene?hgg_do_getProteinSeq=1&hgg_gene="
url = l + self.name
seq = [x.strip() for x in nopen(url) if x.strip() and
not ">" in x]
return "".join(seq)
开发者ID:brentp,项目名称:cruzdb,代码行数:7,代码来源:models.py
示例15: main
def main():
p = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
p.add_argument('attrs', help='Cytoscape attributes file of overlapping features')
args = p.parse_args()
attrsfile = nopen(args.attrs)
# remove header
attrsfile.readline()
uniqid = os.path.basename(args.attrs).split(".", 1)[0]
previous_feature = None
for line in attrsfile:
attr = line.rstrip("\r\n").split("=", 1)
# everything before the "="
cyto_info = attr[0].strip()
feature = attr[1].strip().upper()
fields = (cyto_info, "= True\n")
if previous_feature and previous_feature == feature:
fileout.write(" ".join(map(str, fields)))
else:
fileout = open("%s.%s.eda" % (uniqid, feature), 'w')
fileout.write("feature%s\n" % feature)
fileout.write(" ".join(map(str, fields)))
previous_feature = feature
开发者ID:brwnj,项目名称:cu_projects,代码行数:29,代码来源:features_to_files.py
示例16: fastq_to_dict
def fastq_to_dict(fastq):
"""docstring for fastq_to_dict"""
d = {}
with nopen(fastq) as fh:
for name, seq, qual in read_fastx(fh):
d[name] = {'seq':seq,'qual':qual}
return d
开发者ID:brwnj,项目名称:repertoire,代码行数:7,代码来源:collapse_reads.py
示例17: convert_reads
def convert_reads(fq1s, fq2s, out=sys.stdout):
for fq1, fq2 in zip(fq1s.split(","), fq2s.split(",")):
sys.stderr.write("converting reads in %s,%s\n" % (fq1, fq2))
fq1 = nopen(fq1)
#examines first five lines to detect if this is an interleaved fastq file
first_five = list(islice(fq1, 5))
r1_header = first_five[0]
r2_header = first_five[-1]
if r1_header.split(' ')[0] == r2_header.split(' ')[0]:
already_interleaved = True
else:
already_interleaved = False
q1_iter = izip(*[chain.from_iterable([first_five,fq1])] * 4)
if fq2 != "NA":
fq2 = nopen(fq2)
q2_iter = izip(*[fq2] * 4)
else:
if already_interleaved:
sys.stderr.write("detected interleaved fastq\n")
else:
sys.stderr.write("WARNING: running bwameth in single-end mode\n")
q2_iter = repeat((None, None, None, None))
lt80 = 0
if already_interleaved:
selected_iter = q1_iter
else:
selected_iter = chain.from_iterable(izip(q1_iter, q2_iter))
for read_i, (name, seq, _, qual) in enumerate(selected_iter):
if name is None: continue
convert_and_write_read(name,seq,qual,read_i%2,out)
if len(seq) < 80:
lt80 += 1
out.flush()
if lt80 > 50:
sys.stderr.write("WARNING: %i reads with length < 80\n" % lt80)
sys.stderr.write(" : this program is designed for long reads\n")
return 0
开发者ID:brentp,项目名称:bwa-meth,代码行数:47,代码来源:bwameth.py
示例18: main
def main():
args = get_args()
if args.verbose:
sys.stderr.write(">> building gene orthology cross-reference...\n")
xref = get_xref(args.xref)
if args.verbose:
sys.stderr.write(">> building uniprot library...\n")
uniprot = parse_uniprot_flat(args.uniprot)
if args.verbose:
sys.stderr.write(">> annotating matrisome...\n")
header = nopen(args.matrisome).readline().rstrip("\r\n").split("\t")
headerext = ['r_ENSRNOP', 'r_score', 'r_geneid', 'r_gene_description', \
'r_uniprot', 'r_interpro', 'r_refseqn', 'r_refseqp', \
'r_ensg', 'r_enst', 'r_ensp']
header.extend(headerext)
print "\t".join(h for h in header)
for entry in reader(args.matrisome):
# reset vars
for h in headerext:
entry[h] = ""
# handle multiple entries delimited by ":"
for entryname in entry[args.xref_col].split(":"):
# looping over entire defaultdict each time
for uid, ddict in xref.iteritems():
# find a matching ortholog
for orthoname in ddict['orthonames']:
if orthoname == entryname:
# use the uid to get the rat names and scores
for ratname, ratscore in izip(xref[uid]['ratnames'], xref[uid]['ratscores']):
# print ratname
entry['r_ENSRNOP'] += "%s:" % ratname
entry['r_score'] += "%s:" % ratscore
# for each rat ENSP, add the corresponding annotation(s)
for uniqueid, uniprot_entry in uniprot.iteritems():
for ensemblname in uniprot_entry['ensemblp']:
if ensemblname == ratname:
#print all of the info for this uid
entry['r_geneid'] += ':'.join(t for t in uniprot[uniqueid]['geneid']) + ":"
entry['r_gene_description'] += ':'.join(t for t in uniprot[uniqueid]['description']) + ":"
entry['r_uniprot'] += ':'.join(t for t in uniprot[uniqueid]['uniprotid']) + ":"
entry['r_interpro'] += ':'.join(t for t in uniprot[uniqueid]['interpro']) + ":"
entry['r_refseqn'] += ':'.join(t for t in uniprot[uniqueid]['refseqn']) + ":"
entry['r_refseqp'] += ':'.join(t for t in uniprot[uniqueid]['refseqp']) + ":"
entry['r_ensg'] += ':'.join(t for t in uniprot[uniqueid]['ensemblg']) + ":"
entry['r_enst'] += ':'.join(t for t in uniprot[uniqueid]['ensemblt']) + ":"
entry['r_ensp'] += ':'.join(t for t in uniprot[uniqueid]['ensemblp']) + ":"
print "\t".join(entry[h].rstrip(":") for h in header)
开发者ID:brwnj,项目名称:cu_projects,代码行数:59,代码来源:xref_matrisome.py
示例19: readfq
def readfq(fq):
with nopen(fq) as fh:
fqclean = (x.strip("\r\n") for x in fh if x.strip())
while True:
rd = [x for x in islice(fqclean, 4)]
if not rd: raise StopIteration
assert all(rd) and len(rd) == 4
yield Fastq(rd)
开发者ID:brwnj,项目名称:cu_projects,代码行数:8,代码来源:join_reads.py
示例20: readstat
def readstat(cifstat):
with nopen(cifstat) as fh:
clean = (x.strip("\r\n") for x in fh if x.strip())
while True:
rd = [x for x in islice(clean, 6)]
if not rd: raise StopIteration
assert all(rd) and len(rd) == 6
yield CifStat(rd)
开发者ID:brwnj,项目名称:cu_projects,代码行数:8,代码来源:cifstat2mx.py
注:本文中的toolshed.nopen函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论