本文整理汇总了Python中tabix.open函数的典型用法代码示例。如果您正苦于以下问题:Python open函数的具体用法?Python open怎么用?Python open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了open函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: init_resource
def init_resource(self):
""" init features and other annotation resources """
for rname in ['dbsnp']:
if self.config.has_option(self.rv, 'dbsnp'):
import tabix
self.resources['dbsnp'] = tabix.open(self.config.get(self.rv, 'dbsnp'))
self.features = []
for rname in self.config.options(self.rv):
featdb = self.config.get(self.rv, rname)
if featdb.endswith('.featuredb'):
self.features.append((rname,tabix.open(featdb)))
开发者ID:zwdzwd,项目名称:transvar,代码行数:12,代码来源:annodb.py
示例2: get_cadd
def get_cadd(config, chrom, start, ref, alt):
'''
add cadd of variant
'''
tabix_fp = config['data_paths']['cadd']['whole_genome_cadd']
try:
tb = tabix.open(tabix_fp)
except:
logging.warning('{0} not available'.format(tabix_fp))
return np.nan
'''if stop != start:
print('WARNING: the start {0} is different than stop {1}'.format(start, stop))
return np.nan'''
try:
records = tb.querys(str(chrom) + ':' + str(start) + '-' + str(start))
except:
logging.warning('Error when trying to query {0}-{1}-{2}-{3}'.format(chrom, start, ref, alt))
return np.nan
for record in records:
if record[2] != ref:
logging.warning('Reference {0} is not the one in CADD for entry {1}-{2}-{3}-{4}'.format(ref, chrom, start, ref, alt))
return np.nan
if record[3] == alt:
return float(record[5])
logging.warning('I do not find a cadd entry for {0}-{1}-{2}-{4}'.format(alt, chrom, start, ref, alt))
return np.nan
开发者ID:DavidTamborero,项目名称:MTBR,代码行数:30,代码来源:ct_lib.py
示例3: checkAndOpen
def checkAndOpen(db):
if 'tabix' not in sys.modules:
return None
db = os.path.expanduser(db)
if not os.path.exists(db):
pass
else:
# 'source' is a variable used to title the column in the output
# it is defined by the user in the configuration script step when generating the JSON file
if os.path.splitext(db)[1] == ".gz" and os.path.exists(db + ".tbi"):
try:
database = gzip.open(db)
except IOError:
print("WARNING: could not open {}".format(db))
return None
elif os.path.splitext(db)[1] == ".vcf":
abortWithMessage("Error: database file {0} must compressed with bgzip".format(db))
elif os.path.splitext(db)[1] == ".gz" and not os.path.exists(db + ".tbi"):
abortWithMessage("Compressed database is not tabix indexed")
else: abortWithMessage("Error opening database files: {0}".format(db))
try:
row = database.readline()
except StopIteration:
print("Empty file {}".format(db))
return None
tb = tabix.open(db)
return tb
开发者ID:blachlylab,项目名称:mucor,代码行数:30,代码来源:databases.py
示例4: get_job_results
def get_job_results(job_id, job=None):
filters = request.args.to_dict()
epacts_filename = job.relative_path("output.epacts.gz")
with gzip.open(epacts_filename, "rt") as f:
header = f.readline().rstrip('\n').split('\t')
if header[1] == "BEG":
header[1] = "BEGIN"
if header[0] == "#CHROM":
header[0] = "CHROM"
assert len(header) > 0
headerpos = {x:i for i,x in enumerate(header)}
if filters.get("region", ""):
tb = tabix.open(epacts_filename)
indata = tb.query(chrom, start_pos, end_pos)
else:
indata = (x.split("\t") for x in gzip.open(epacts_filename))
pass_tests = []
if filters.get("non-monomorphic", False):
if "AC" not in headerpos:
raise Exception("Column AC not found")
ac_index = headerpos["AC"]
def mono_pass(row):
if float(row[ac_index])>0:
return True
else:
return False
pass_tests.append(mono_pass)
if "max-pvalue" in filters:
if "PVALUE" not in headerpos:
raise Exception("Column PVALUE not found")
pval_index = headerpos["PVALUE"]
thresh = float(filters.get("max-pvalue", 1))
def pval_pass(row):
if row[pval_index] == "NA":
return False
if float(row[pval_index])<thresh:
return True
else:
return False
pass_tests.append(pval_pass)
def pass_row(row):
if len(pass_tests)==0:
return True
for f in pass_tests:
if not f(row):
return False
return True
def generate():
yield "\t".join(header) + "\n"
next(indata) #skip header
for row in indata:
if pass_row(row):
yield "\t".join(row)
return Response(generate(), mimetype="text/plain")
开发者ID:statgen,项目名称:gasp,代码行数:60,代码来源:api_blueprint.py
示例5: extract_CADD_score
def extract_CADD_score(arguments, q):
vcf_record, caddfile = arguments
tb = tabix.open(caddfile)
chromosome = (vcf_record.CHROM).replace("chr","")
vcf_record.INFO["RAWCADD"] = 0
vcf_record.INFO["PHREDCADD"] = 0
# Specific for CADD files
# FIXME: get info about chr or not from provided VCF file
records = tb.query(chromosome, vcf_record.POS-1, vcf_record.POS)
# Look for matching mutation
# Works for SNVs, InDels optimisation is ongoing
for rec in records:
if rec[3] == vcf_record.ALT[0]:
# FIXME: Make requested fields optional through arguments
vcf_record.INFO["RAWCADD"] = rec[4]
vcf_record.INFO["PHREDCADD"] = rec[5]
break
# workaround since multiprocess can't handle VCF record class objects
# FIXME: use VCF class records rather than this ugly string
annotated = VCF_WRITER._map(str, [vcf_record.CHROM, vcf_record.POS, vcf_record.ID, vcf_record.REF]) + [VCF_WRITER._format_alt(vcf_record.ALT), str(vcf_record.QUAL) or '.', VCF_WRITER._format_filter(vcf_record.FILTER), VCF_WRITER._format_info(vcf_record.INFO)]
# Return results to Queue
q.put(annotated)
return(annotated)
开发者ID:jdeligt,项目名称:Genetics,代码行数:29,代码来源:Annotate_CADD_Scores_In_VCF.py
示例6: get_exons
def get_exons(chrom, start, stop, file):
tb = tabix.open(file)
records = tb.query(chrom, start, stop)
exons = []
for record in records:
exons.append(record)
return exons
开发者ID:niab,项目名称:exac-scripts,代码行数:7,代码来源:variants_reader.py
示例7: __init__
def __init__(self, _snp, _ref, _vcf, _restrict, \
_num_ctrls, _window, _match_context):
self.snp = _snp
self.ref = pyfasta.Fasta(_ref)
self.vcf = tabix.open(_vcf)
if _restrict is not None:
self.restrict = tabix.open(_restrict)
else: self.restrict = None
self.chromToKey = {}
for k in self.ref.keys():
chrom = k.split()[0]
self.chromToKey[chrom] = k
self.num_ctrls = _num_ctrls
self.window = _window
self.match_context = _match_context
if self.match_context >= 0:
self.snp_context = self.GetContext(self.snp)
开发者ID:mgymrek,项目名称:non-coding-annotations,代码行数:17,代码来源:annotation_score.py
示例8: main
def main(args):
chrom, coords = loadCoords(args.bedFile)
tb = ""
if chrom:
tabix.open(
"/home/evansj/me/data/ExAC/coverage/ftp.broadinstitute.org/pub/ExAC_release/current/coverage/Panel.chr%s.coverage.txt.gz"
% (chrom,)
)
with open(args.outFile, "w") as fout:
if chrom:
for st in coords:
# st is 1-idx in coords
# tabix needs 0-based
records = tb.query(chrom, st - 1, st)
for record in records:
thisChrom, pos, mean, median, c1, c5, c10, c15 = record[0:8]
print("\t".join((thisChrom, pos, c10)), file=fout)
开发者ID:samesense,项目名称:target_exac_setup,代码行数:18,代码来源:pullGeneCov.py
示例9: tabix_vcf
def tabix_vcf(vcf_file, in_chr, in_start, in_stop):
"""A generator to get records in a VCF given a location."""
chrom = str(in_chr); start = int(in_start); stop = int(in_stop)
try:
vcf_tb = tabix.open(vcf_file)
for rec in vcf_tb.query(chrom, start, stop):
yield rec
except:
return
开发者ID:Jana-A,项目名称:WTSI.DDD-VET,代码行数:9,代码来源:parsing_setups.py
示例10: get_tabixhandle
def get_tabixhandle(path):
"""Check if a file is zipped and that the index exists
If something looks wierd raise a TabixError
"""
if not path.endswith('.gz'):
raise TabixError("File {0} does not end with '.gz'".format(path))
index_file = path + '.tbi'
if not os.path.isfile(index_file):
raise TabixError("No index could be found for {0}".format(path))
return tabix.open(path)
开发者ID:moonso,项目名称:genmod,代码行数:11,代码来源:read_tabix_files.py
示例11: __init__
def __init__(self, args):
self.args = args
# parse out TransciptInfos
print('Loading transcripts...', file=sys.stderr)
self.tx_infos = self._parse_tx_infos(args.gencode_gtf)
self.tx_info_by_id = dict([(info.transcript_id, info) for info in self.tx_infos])
# open tabix file
print('Opening tabix file...', file=sys.stderr)
self.tabix = tabix.open(args.gencode_gtf)
# open BAM file and iterate over it
print('Opening BAM file...', file=sys.stderr)
self.sam_file = pysam.AlignmentFile(args.alignment_bam, 'r')
开发者ID:holtgrewe,项目名称:linc_splice,代码行数:12,代码来源:map_linc.py
示例12: ld_expand
def ld_expand(df, ld_beds):
"""
Expand a set of SNVs into all SNVs with LD >= 0.8 and return a BedTool of
the expanded SNPs.
Parameters
----------
df : pandas.DataFrame
Pandas dataframe with SNVs. The index is of the form chrom:pos where pos
is the one-based position of the SNV. The columns are chrom, start, end.
chrom, start, end make a zero-based bed file with the SNV coordinates.
ld_beds : dict
Dict whose keys are chromosomes and whose values are filenames of
tabixed LD bed files. The LD bed files should be formatted like this:
chr1 14463 14464 14464:51479:0.254183
where the the first three columns indicate the zero-based coordinates of
a SNV and the the fourth column has the one-based coordinate of that
SNV, the one-based coordinate of another SNV on the same chromosome, and
the LD between these SNVs (all separated by colons).
Returns
-------
bt : pybedtools.BedTool
BedTool with input SNVs and SNVs they are in LD with.
indepdent SNVs.
"""
import pybedtools as pbt
import tabix
out_snps = []
for chrom in ld_beds.keys():
t = tabix.open(ld_beds[chrom])
tdf = df[df['chrom'].astype(str) == chrom]
for ind in tdf.index:
p = tdf.ix[ind, 'end']
out_snps.append('{}\t{}\t{}\t{}\n'.format(chrom, p - 1, p, ind))
try:
r = t.query('{}'.format(chrom), p - 1, p)
while True:
try:
n = r.next()
p1, p2, r2 = n[-1].split(':')
if float(r2) >= 0.8:
out_snps.append('{}\t{}\t{}\t{}\n'.format(
n[0], int(p2) - 1, int(p2), ind))
except StopIteration:
break
except tabix.TabixError:
continue
bt = pbt.BedTool(''.join(out_snps), from_string=True)
bt = bt.sort()
return bt
开发者ID:cdeboever3,项目名称:cdpybio,代码行数:52,代码来源:analysis.py
示例13: get_genotypes
def get_genotypes(CpG_location):
import tabix
import pandas as pd
tb_file = "/path/to/file/DF_meth_variants.gz"
df = pd.DataFrame(columns=xrange(0,782))
tb = tabix.open(tb_file)
# print CpG_location
records = tb.querys(CpG_location)
num = 0
for record in records:
df.loc[num] = record[3:]
num += 1
return(df)
开发者ID:CrystalHumphries,项目名称:MethylationCorrelationBlock,代码行数:13,代码来源:try_library.py
示例14: test_same_aa_different_positions
def test_same_aa_different_positions(self):
''' check that same_aa() works correctly for different amino acids
'''
lines = make_vcf_header()
lines.append(make_vcf_line(pos=5, extra='Protein_position=2'))
lines.append(make_vcf_line(pos=7, extra='Protein_position=3'))
lines.append(make_vcf_line(pos=8, extra='Protein_position=4'))
self.write_vcf(lines)
vcf = tabix.open(self.path)
pairs = [[('1', 7), ('1', 8)]]
self.assertEqual(same_aa(vcf, pairs), [])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:14,代码来源:test_multinucleotide_variants.py
示例15: test_same_aa
def test_same_aa(self):
''' check that same_aa() works correctly
'''
# get the VCF lines
lines = make_vcf_header()
lines.append(make_vcf_line(pos=2, extra='Protein_position=1'))
lines.append(make_vcf_line(pos=4, extra='Protein_position=1'))
self.write_vcf(lines)
vcf = tabix.open(self.path)
pairs = [[('1', 2), ('1', 4)]]
self.assertEqual(same_aa(vcf, pairs), [[('1', 2), ('1', 4)]])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:14,代码来源:test_multinucleotide_variants.py
示例16: __search_pos_bdg
def __search_pos_bdg(self):
self.pd_frame = {}
for i,bdg_file in enumerate(self.l_bdg_file):
tb = tabix.open( bdg_file )
record = tb.query( self.chrom, self.beg, self.end )
l_pos = []
l_cons = []
l_xticks = [ self.beg+1 ]
bin_size = int( (self.end-self.beg)/10 )
bin_size = 10**int(np.log10(bin_size))
pre_pos = 0
for rec in record:
for pos in xrange( int(rec[1]),int(rec[2]) ):
cons = float(rec[3])
if pre_pos == 0:
pre_pos = int(rec[1])
""" Only consider the given region. """
if self.__is_intersect(pos):
""" If bedGraph has gaps, """
if pos > pre_pos+1:
""" Using zero to fill bedGraph gaps """
for p in xrange( pre_pos+1,pos ):
l_pos.append( p )
l_cons.append( 0.0 )
l_pos.append( pos )
l_cons.append(cons )
if pos % bin_size == 0:
l_xticks.append( pos )
pre_pos = pos
l_xticks.append( self.end )
data = { 'pos':l_pos, 'con':l_cons }
self.pd_frame[ bdg_file ] = pd.DataFrame( data )
if i == 0:
self.l_xpos = l_xticks
self.l_xticks = [ str(tick) for tick in l_xticks ]
开发者ID:yzqheart,项目名称:ChIP,代码行数:49,代码来源:PlotChip_IGV.py
示例17: __init__
def __init__(self, task_queue, results_queue, families={}, phased=False,
vep=False, cadd_raw=False, cadd_file=None, cadd_1000g=None,
cadd_exac=None, cadd_ESP=None, cadd_InDels=None,
thousand_g=None, exac=None, dbNSFP=None, strict=False,
verbosity=False):
Process.__init__(self)
self.task_queue = task_queue
self.families = families
self.results_queue = results_queue
self.verbosity = verbosity
self.phased = phased
self.vep = vep
self.cadd_raw = cadd_raw
self.cadd_file = cadd_file
self.cadd_1000g = cadd_1000g
self.cadd_exac = cadd_exac
self.cadd_ESP = cadd_ESP
self.cadd_InDels = cadd_InDels
self.thousand_g = thousand_g
self.exac = exac
self.dbNSFP = dbNSFP
self.strict = strict
self.any_cadd_info = False
if self.cadd_file:
self.cadd_file = tabix.open(self.cadd_file)
self.any_cadd_info = True
if self.cadd_1000g:
self.cadd_1000g = tabix.open(self.cadd_1000g)
self.any_cadd_info = True
if self.cadd_exac:
self.cadd_exac = tabix.open(self.cadd_exac)
self.any_cadd_info = True
if self.cadd_ESP:
self.cadd_ESP = tabix.open(self.cadd_ESP)
self.any_cadd_info = True
if self.cadd_InDels:
self.cadd_InDels = tabix.open(self.cadd_InDels)
self.any_cadd_info = True
if self.thousand_g:
self.thousand_g = tabix.open(self.thousand_g)
if self.exac:
self.exac = tabix.open(self.exac)
if self.dbNSFP:
self.exac = tabix.open(self.exac)
开发者ID:gpcr,项目名称:genmod,代码行数:44,代码来源:variant_consumer.py
示例18: test_same_aa_missing_protein_positions
def test_same_aa_missing_protein_positions(self):
''' check that same_aa() works correctly when the vars aren't in the CDS
'''
# if one of the variants in the pair does not have a protein position
# listed (i.e. residue number), that indicates the variant could be
# affecting the splice site, so we can't use the pair.
lines = make_vcf_header()
lines.append(make_vcf_line(pos=5))
lines.append(make_vcf_line(pos=7))
lines.append(make_vcf_line(pos=8, extra='Protein_position=4'))
self.write_vcf(lines)
vcf = tabix.open(self.path)
pairs = [[('1', 7), ('1', 8)]]
self.assertEqual(same_aa(vcf, pairs), [])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:17,代码来源:test_multinucleotide_variants.py
示例19: get_1mb_snps
def get_1mb_snps():
import tabix
tb = tabix.open('snps_all.gz')
fname = 'newMethPosFile.txt_2-3_col_1'
snps = {}
with open(fname) as f:
for line in f:
a = line.rstrip('\n').rsplit('\t')
start = str(int(a[1]) - 1000000)
stop = str(int(a[1]) + 1000000)
pos = a[0] + ":" + start + "-" + stop
records = tb.querys(pos)
for record in records:
snps[record[3]] = 0
return(snps)
开发者ID:CrystalHumphries,项目名称:MethylationCorrelationBlock,代码行数:17,代码来源:get_1mb_snps.py
示例20: test_screen_pairs_nonstandard_pair
def test_screen_pairs_nonstandard_pair(self):
''' test that screen_pairs() works correctly
'''
# get the VCF lines
lines = make_vcf_header()
lines.append(make_vcf_line(pos=2))
lines.append(make_vcf_line(pos=4))
lines.append(make_vcf_line(pos=5))
lines.append(make_vcf_line(pos=7))
lines.append(make_vcf_line(pos=8))
self.write_vcf(lines)
vcf = tabix.open(self.path)
# set up a list of 'pairs', where one 'pair' has three variants in it.
# we exclude 'pairs' where n != 2.
pairs = [[('1', 2), ('1', 4), ('1', 5)], [('1', 7), ('1', 8)]]
self.assertEqual(screen_pairs(vcf, pairs, is_not_indel), [[('1', 7), ('1', 8)]])
开发者ID:jeremymcrae,项目名称:clinical-filter,代码行数:18,代码来源:test_multinucleotide_variants.py
注:本文中的tabix.open函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论