本文整理汇总了Python中utils.parseTable函数的典型用法代码示例。如果您正苦于以下问题:Python parseTable函数的具体用法?Python parseTable怎么用?Python parseTable使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parseTable函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: makeFoldTable
def makeFoldTable(annotFile,analysisName,testName,controlName,testMMR,controlMMR,testIdxFile,controlIdxFile,outputFolder,epsilon = 1):
'''
makes the fold table and writes to disk
fold table is ranked by fold change
first column is guideID, second column is gene name, third is fold change
'''
guideDict,geneDict = makeAnnotDict(annotFile)
testIdx = utils.parseTable(testIdxFile,'\t')
controlIdx = utils.parseTable(controlIdxFile,'\t')
#for each guide, divide the count by the MMR then add 1 then take the log2 ratio
outTable = [['GUIDE_ID','GENE','LOG2_RATIO',testName,controlName]]
for i in range(len(testIdx)):
guideID = testIdx[i][0]
gene = guideDict[guideID]
testCount = float(testIdx[i][2])/testMMR + epsilon
controlCount = float(controlIdx[i][2])/controlMMR + epsilon
log2Ratio = numpy.log2(testCount/controlCount)
newLine = [guideID,gene,log2Ratio,round(testCount,4),round(controlCount,4)]
outTable.append(newLine)
outputFile = '%s%s_log2Ratio.txt' % (outputFolder,analysisName)
utils.unParseTable(outTable,outputFile,'\t')
return outputFile
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:33,代码来源:processGeckoBam.py
示例2: makeSEDict
def makeSEDict(enhancerFile, name, superOnly=True):
"""
makes an attribute dict for enhancers keyed by uniqueID
"""
seDict = {}
enhancerTable = utils.parseTable(enhancerFile, "\t")
superLoci = []
for line in enhancerTable:
if line[0][0] == "#":
continue
if line[0][0] == "R":
header = line
supColumn = header.index("isSuper")
continue
if superOnly:
if int(line[supColumn]) == 1:
signal = float(line[6]) - float(line[7])
rank = int(line[-2])
enhancerID = name + "_" + line[0]
seDict[enhancerID] = {"rank": rank, "signal": signal}
else:
signal = float(line[6]) - float(line[7])
rank = int(line[-2])
enhancerID = name + "_" + line[0]
seDict[enhancerID] = {"rank": rank, "signal": signal}
return seDict
开发者ID:jdimatteo,项目名称:pipeline,代码行数:33,代码来源:dynamicEnhancer.py
示例3: makeEnhancerSignalTable
def makeEnhancerSignalTable(mergedRegionMap,medianDict,analysisName,genome,outputFolder):
'''
makes a table where each row is an enhancer and each column is the log2
background corrected signal vs. median
'''
#load in the region map
regionMap = utils.parseTable(mergedRegionMap,'\t')
namesList = medianDict.keys()
signalTable = [['REGION_ID','CHROM','START','STOP','NUM_LOCI','CONSTITUENT_SIZE'] + namesList]
for line in regionMap[1:]:
newLine = line[0:6]
for i in range(len(namesList)):
enhancerIndex = (i*2) + 6
controlIndex = (i*2) + 7
enhancerSignal = float(line[enhancerIndex]) - float(line[controlIndex])
if enhancerSignal < 0:
enhancerSignal = 0
enhancerSignal = enhancerSignal/medianDict[namesList[i]]
newLine.append(enhancerSignal)
signalTable.append(newLine)
outputFile = "%s%s_%s_signalTable.txt" % (outputFolder,genome,analysisName)
print "WRITING MEDIAN NORMALIZED SIGNAL TABLE TO %s" % (outputFile)
utils.unParseTable(signalTable,outputFile,'\t')
return outputFile
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:29,代码来源:clusterEnhancer.py
示例4: callMergeSupers
def callMergeSupers(dataFile,superFile1,superFile2,name1,name2,mergeName,genome,parentFolder):
'''
this is the main run function for the script
all of the work should occur here, but no functions should be defined here
'''
mergedGFFFile = '%s%s_%s_MERGED_REGIONS_-0_+0.gff' % (parentFolder,string.upper(genome),mergeName)
#check to make sure this hasn't been done yet
roseOutput = "%s%s_ROSE/%s_%s_MERGED_REGIONS_-0_+0_SuperEnhancers_ENHANCER_TO_GENE.txt" % (parentFolder,name1,string.upper(genome),mergeName)
try:
foo = utils.parseTable(roseOutput,'\t')
print "ROSE OUTPUT ALREADY FOUND HERE %s" % (roseOutput)
return roseOutput
except IOError:
print "MERGING ENHANCER REGIONS FROM %s and %s" % (superFile1,superFile2)
mergedGFF = mergeCollections(superFile1,superFile2,name1,name2,mergedGFFFile)
#call rose on the merged shit
roseBashFile = callRoseMerged(dataFile,mergedGFF,name1,name2,parentFolder)
print('i can has rose bash file %s' % (roseBashFile))
#run the bash command
os.system('bash %s' % (roseBashFile))
#check for and return output
if utils.checkOutput(roseOutput,1,30):
return roseOutput
else:
print "ERROR: ROSE CALL ON MERGED REGIONS FAILED"
sys.exit()
开发者ID:afederation,项目名称:pipeline,代码行数:35,代码来源:dynamicEnhancer.py
示例5: makeBedCollection
def makeBedCollection(bedFileList):
'''
takes in a list of bedFiles and makes a single huge collection
each locus has as its ID the name of the bed file
'''
bedLoci = []
print("MAKING BED COLLECTION FOR:")
for bedFile in bedFileList:
bedName = bedFile.split('/')[-1].split('.')[0]
print(bedName)
bed = utils.parseTable(bedFile, '\t')
for line in bed:
if len(line) >= 3:
#check that line[0]
if line[0][0:3] == 'chr':
try:
coords = [int(line[1]),int(line[2])]
bedLocus = utils.Locus(line[0], min(coords), max(coords), '.', bedName)
bedLoci.append(bedLocus)
except ValueError:
pass
print("IDENTIFIED %s BED REGIONS" % (len(bedLoci)))
return utils.LocusCollection(bedLoci, 50)
开发者ID:linlabcode,项目名称:pipeline,代码行数:28,代码来源:bamPlot_turbo.py
示例6: makeSEDict
def makeSEDict(enhancerFile,name,superOnly = True):
'''
makes an attribute dict for enhancers keyed by uniqueID
'''
seDict = {}
enhancerTable = utils.parseTable(enhancerFile,'\t')
superLoci = []
for line in enhancerTable:
if line[0][0] == '#':
continue
if line[0][0] == 'R':
header = line
supColumn = header.index('isSuper')
continue
if superOnly:
if int(line[supColumn]) == 1:
signal = float(line[6]) - float(line[7])
rank = int(line[-2])
enhancerID = name+'_'+line[0]
seDict[enhancerID] = {'rank':rank,'signal':signal}
else:
signal = float(line[6]) - float(line[7])
rank = int(line[-2])
enhancerID = name+'_'+line[0]
seDict[enhancerID] = {'rank':rank,'signal':signal}
return seDict
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:33,代码来源:dynamicEnhancer.py
示例7: loadGenome
def loadGenome(genome_build,config_file = ''):
'''
loads annotation for a genome into a genome object
'''
#this nested dictionary has all of the useful information and likely will have to be
#edited so it can be configured any time
genome_build = string.upper(genome_build)
genomeDict = {
'HG19':{'annot_file':'%sannotation/hg19_refseq.ucsc' % (pipeline_dir),
'genome_directory':'/storage/cylin/grail/genomes/Homo_sapiens/UCSC/hg19/Sequence/Chromosomes/',
'tf_file':'%s/annotation/TFlist_NMid_hg19.txt' % (whereAmI),
'mask_file':'/storage/cylin/grail/genomes/Homo_sapiens/UCSC/hg19/Annotation/Masks/hg19_encode_blacklist.bed',
'motif_convert':'%s/annotation/MotifDictionary.txt' % (whereAmI),
'motif_database':'%s/annotation/VertebratePWMs.txt' % (whereAmI),
},
'RN6':{'annot_file':'%sannotation/rn6_refseq.ucsc' % (pipeline_dir),
'genome_directory':'/storage/cylin/grail/genomes/Rattus_norvegicus/UCSC/rn6/Sequence/Chromosomes/',
'tf_file':'%s/annotation/TFlist_NMid_rn6.txt' % (whereAmI),
'motif_convert':'%s/annotation/MotifDictionary.txt' % (whereAmI),
'motif_database':'%s/annotation/VertebratePWMs.txt' % (whereAmI),
},
'MM10':{'annot_file':'%sannotation/mm10_refseq.ucsc' % (pipeline_dir),
'genome_directory':'/storage/cylin/grail/genomes/Mus_musculus/UCSC/mm10/Sequence/Chromosomes/',
'tf_file':'%s/annotation/TFlist_NMid_mm10.txt' % (whereAmI),
'motif_convert':'%s/annotation/MotifDictionary.txt' % (whereAmI),
'motif_database':'%s/annotation/VertebratePWMs.txt' % (whereAmI),
}
}
#allow an optional config file to overwrite default paths
if len(config_file) >0:
config_table = utils.parseTable(config_file,'\t')
for line in config_table[1:]:
(build,field,feature_path) = line[0].split(':')
genomeDict[string.upper(build)][string.lower(field)] = feature_path
if genome_build not in genomeDict:
print('ERROR: UNSUPPORTED GENOME BUILD %s. EXITING NOW' % (genome_build))
sys.exit()
else:
print('USING BUILD %s WITH FOLLOWING FIELDS:' % (genome_build))
print(genomeDict[genome_build])
#now attempt to load the genome
genome = Genome(genome_build,genomeDict[genome_build]['genome_directory'],genomeDict[genome_build]['annot_file'])
#adding additional optional features
genome.addFeature('tf_file',genomeDict[genome_build]['tf_file'])
if genome_build == 'HG19':
genome.addFeature('mask',genomeDict[genome_build]['mask_file'])
genome.addFeature('motif_convert',genomeDict[genome_build]['motif_convert'])
genome.addFeature('motif_database',genomeDict[genome_build]['motif_database'])
return genome
开发者ID:linlabcode,项目名称:pipeline,代码行数:59,代码来源:CRC3.py
示例8: makeEnhancerSignalTable
def makeEnhancerSignalTable(nameDict,mergedRegionMap,medianDict,analysisName,genome,outputFolder):
'''
makes a table where each row is an enhancer and each column is the log2
background corrected signal vs. median
'''
#load in the region map
regionMap = utils.parseTable(mergedRegionMap,'\t')
namesList = nameDict.keys()
namesList.sort()
signalTable = [['REGION_ID','CHROM','START','STOP','NUM_LOCI','CONSTITUENT_SIZE'] + namesList]
print("len of %s for namesList" % (len(namesList)))
print(namesList)
for line in regionMap[1:]:
newLine = line[0:6]
#a little tricky here to add datasets sequentially
i = 6 #start w/ the first column w/ data
for name in namesList:
if nameDict[name]['background'] == True:
enhancerIndex = int(i)
i +=1
controlIndex = int(i)
i +=1
try:
enhancerSignal = float(line[enhancerIndex]) - float(line[controlIndex])
except IndexError:
print line
print len(line)
print enhancerIndex
print controlIndex
sys.exit()
else:
enhancerIndex = int(i)
i+=1
enhancerSignal = float(line[enhancerIndex])
if enhancerSignal < 0:
enhancerSignal = 0
enhancerSignal = enhancerSignal/medianDict[name]
newLine.append(enhancerSignal)
signalTable.append(newLine)
outputFile = "%s%s_%s_signalTable.txt" % (outputFolder,genome,analysisName)
print "WRITING MEDIAN NORMALIZED SIGNAL TABLE TO %s" % (outputFile)
utils.unParseTable(signalTable,outputFile,'\t')
return outputFile
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:57,代码来源:clusterEnhancer.py
示例9: findMotifs
def findMotifs(subpeakFasta,bg_path,candidate_tf_list, projectFolder, analysis_name, motifConvertFile, motifDatabaseFile):
'''
takes the refseq to subpeak seq dict
returns the networkx object with all connections
'''
fimoFolder = utils.formatFolder(projectFolder + 'FIMO/', True)
subpeak_name = subpeakFasta.split('/')[-1].split('.')[0]
output = '%s%s_fimo.txt' % (fimoFolder,subpeak_name)
# Create a dictionary to call motif names keyed on gene names
motifDatabase = utils.parseTable(motifConvertFile, '\t')
motifDatabaseDict = {} #create a dict keyed by TF with multiple motifs
for line in motifDatabase:
motifDatabaseDict[line[1]] = []
for line in motifDatabase:
motifDatabaseDict[line[1]].append(line[0])
candidate_tf_list.sort()
print(candidate_tf_list)
#now make a list of all motifs
motif_list = []
for tf in candidate_tf_list:
motif_list += motifDatabaseDict[tf]
motif_list = utils.uniquify(motif_list)
fimo_bash_path = '%s%s_fimo.sh' % (fimoFolder,analysis_name)
fimo_bash = open(fimo_bash_path,'w')
fimo_bash.write('#!/usr/bin/bash\n\n')
fimoCmd = 'fimo'
for motif in motif_list:
fimoCmd += ' --motif ' + "'%s'" % (str(motif))
#fimoCmd += ' --thresh 1e-5' #if you want to increase stringency
fimoCmd += ' -verbosity 1' # thanks for that ;)!
fimoCmd += ' -text'
fimoCmd += ' -oc ' + projectFolder + 'FIMO'
fimoCmd += ' --bgfile %s' % (bg_path)
fimoCmd += ' ' + motifDatabaseFile + ' '
fimoCmd += subpeakFasta
fimoCmd += ' > '+ output
print fimoCmd
fimo_bash.write(fimoCmd)
fimo_bash.close()
fimoOutput = subprocess.call(fimoCmd, shell=True) #will wait that fimo is done to go on
return output
开发者ID:linlabcode,项目名称:pipeline,代码行数:52,代码来源:CRC3.py
示例10: assignEnhancerRank
def assignEnhancerRank(enhancerToGeneFile,enhancerFile1,enhancerFile2,name1,name2,rankOutput=''):
'''
for all genes in the enhancerToGene Table, assigns the highest overlapping ranked enhancer in the other tables
'''
enhancerToGene = utils.parseTable(enhancerToGeneFile,'\t')
enhancerCollection1 = makeSECollection(enhancerFile1,name1,False)
enhancerCollection2 = makeSECollection(enhancerFile2,name2,False)
enhancerDict1 = makeSEDict(enhancerFile1,name1,False)
enhancerDict2 = makeSEDict(enhancerFile2,name2,False)
#we're going to update the enhancerToGeneTable
enhancerToGene[0] += ['%s_rank' % name1,'%s_rank' % name2]
for i in range(1,len(enhancerToGene)):
line = enhancerToGene[i]
locusLine = utils.Locus(line[1],line[2],line[3],'.',line[0])
#if the enhancer doesn't exist, its ranking is dead last on the enhancer list
enhancer1Overlap = enhancerCollection1.getOverlap(locusLine,'both')
if len(enhancer1Overlap) == 0:
enhancer1Rank = len(enhancerCollection1)
else:
rankList1 = [enhancerDict1[x.ID()]['rank'] for x in enhancer1Overlap]
enhancer1Rank = min(rankList1)
enhancer2Overlap = enhancerCollection2.getOverlap(locusLine,'both')
if len(enhancer2Overlap) == 0:
enhancer2Rank = len(enhancerCollection2)
else:
rankList2 = [enhancerDict2[x.ID()]['rank'] for x in enhancer2Overlap]
enhancer2Rank = min(rankList2)
enhancerToGene[i]+=[enhancer1Rank,enhancer2Rank]
if len(rankOutput) == 0:
return enhancerToGene
else:
utils.unParseTable(enhancerToGene,rankOutput,'\t')
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:50,代码来源:dynamicEnhancer.py
示例11: makeSignalDict
def makeSignalDict(mappedGFFFile, controlMappedGFFFile=''):
'''
makes a signal dict
'''
print('\t called makeSignalDict on %s (ctrl: %s)' % (mappedGFFFile, controlMappedGFFFile))
signalDict = defaultdict(float)
mappedGFF = utils.parseTable(mappedGFFFile, '\t')
if len(controlMappedGFFFile) > 0:
controlGFF = utils.parseTable(controlMappedGFFFile, '\t')
for i in range(1, len(mappedGFF)):
signal = float(mappedGFF[i][2]) - float(controlGFF[i][2])
if signal < 0:
signal = 0.0
signalDict[mappedGFF[i][0]] = signal
else:
for i in range(1, len(mappedGFF)):
signal = float(mappedGFF[i][2])
signalDict[mappedGFF[i][0]] = signal
return signalDict
开发者ID:afederation,项目名称:pipeline,代码行数:23,代码来源:ROSE2_geneMapper.py
示例12: getMedianSignalEnhancer
def getMedianSignalEnhancer(enhancerFile,name,dataFile):
'''
returns the median enhancer signal of a file
'''
dataDict = pipeline_dfci.loadDataTable(dataFile)
enhancerTable = utils.parseTable(enhancerFile,'\t')
enhancerVector = [float(line[6]) for line in enhancerTable[6:]]
median= numpy.median(enhancerVector)
return median
开发者ID:linlabcode,项目名称:pipeline,代码行数:14,代码来源:dynamicEnhancer_meta.py
示例13: getSignalVector
def getSignalVector(regionFile,name,dataFile):
'''
returns the median enhancer signal of a file
'''
dataDict = pipeline_dfci.loadDataTable(dataFile)
regionTable = utils.parseTable(regionFile,'\t')
bamPath = dataDict[name]['bam']
bamName = bamPath.split('/')[-1]
colID = regionTable[0].index(bamName)
signalVector = [float(line[colID]) for line in regionTable[1:]]
return signalVector
开发者ID:linlabcode,项目名称:pipeline,代码行数:15,代码来源:dynamicEnhancer_meta.py
示例14: makeAnnotDict
def makeAnnotDict(annotFile):
'''
makes a dictionary keyed by guideID
'''
guideDict = defaultdict(str)
geneDict = defaultdict(list)
geckoAnnot = utils.parseTable(annotFile,'\t')
for line in geckoAnnot[1:]:
guideDict[line[1]] = line[0]
geneDict[line[0]].append(line[1])
return guideDict,geneDict
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:16,代码来源:processGeckoBam.py
示例15: callMergeSupers
def callMergeSupers(dataFile, superFile1, superFile2, name1, name2, mergeName, genome, parentFolder):
"""
this is the main run function for the script
all of the work should occur here, but no functions should be defined here
"""
mergedGFFFile = "%s%s_%s_MERGED_REGIONS_-0_+0.gff" % (parentFolder, string.upper(genome), mergeName)
# check to make sure this hasn't been done yet
roseOutput = "%s%s_ROSE/%s_%s_MERGED_REGIONS_-0_+0_SuperEnhancers_ENHANCER_TO_GENE.txt" % (
parentFolder,
name1,
string.upper(genome),
mergeName,
)
try:
foo = utils.parseTable(roseOutput, "\t")
print "ROSE OUTPUT ALREADY FOUND HERE %s" % (roseOutput)
return roseOutput
except IOError:
print "MERGING ENHANCER REGIONS FROM %s and %s" % (superFile1, superFile2)
mergedGFF = mergeCollections(superFile1, superFile2, name1, name2, mergedGFFFile)
# call rose on the merged shit
roseBashFile = callRoseMerged(dataFile, mergedGFF, name1, name2, parentFolder)
print ("i can has rose bash file %s" % (roseBashFile))
# run the bash command
os.system("bash %s" % (roseBashFile))
# check for and return output
if utils.checkOutput(roseOutput, 1, 10):
return roseOutput
else:
# try finding it w/ a different name
# this will bug out if nothing is there
roseFolder = "%s%s_ROSE/" % (parentFolder, name1)
roseFileList = [x for x in os.listdir(roseFolder) if x[0] != "."] # no hidden files
if len(roseFileList) == 0:
print "No files found in %s" % (roseFolder)
sys.exit()
enhancerToGeneFile = getFile("_SuperEnhancers_ENHANCER_TO_GENE.txt", roseFileList, roseFolder)
开发者ID:lg72cu,项目名称:pipeline,代码行数:46,代码来源:dynamicEnhancer.py
示例16: getMedianSignal
def getMedianSignal(enhancerFile, name, dataFile):
"""
returns the median enhancer signal of a file
"""
dataDict = pipeline_dfci.loadDataTable(dataFile)
enhancerTable = utils.parseTable(enhancerFile, "\t")
backgroundName = dataDict[name]["background"]
if dataDict.has_key(backgroundName):
enhancerVector = [float(line[6]) - float(line[7]) for line in enhancerTable[6:]]
else:
enhancerVector = [float(line[6]) for line in enhancerTable[6:]]
median = numpy.median(enhancerVector)
return median
开发者ID:lg72cu,项目名称:pipeline,代码行数:17,代码来源:dynamicEnhancer.py
示例17: makeBedCollection
def makeBedCollection(bedFileList):
'''
takes in a list of bedFiles and makes a single huge collection
each locus has as its ID the name of the bed file
'''
bedLoci = []
print("MAKING BED COLLECTION FOR:")
for bedFile in bedFileList:
bedName = bedFile.split('/')[-1].split('.')[0]
print(bedName)
bed = utils.parseTable(bedFile, '\t')
for line in bed:
bedLocus = utils.Locus(line[0], line[1], line[2], '.', bedName)
bedLoci.append(bedLocus)
return utils.LocusCollection(bedLoci, 50)
开发者ID:afederation,项目名称:pipeline,代码行数:18,代码来源:bamPlot_turbo.py
示例18: makeSECollection
def makeSECollection(enhancerFile,name,superOnly = True):
'''
returns a locus collection from a super table
top gives the number of rows
'''
enhancerTable = utils.parseTable(enhancerFile,'\t')
enhancerLoci = []
for line in enhancerTable:
if line[0][0] == '#' or line[0][0] == 'R':
continue
else:
if superOnly and int(line[-1]) == 0:
break
enhancerLoci.append(utils.Locus(line[1],line[2],line[3],'.',name+'_'+line[0]))
return utils.LocusCollection(enhancerLoci,50)
开发者ID:BoulderLabs,项目名称:pipeline,代码行数:19,代码来源:clusterEnhancer.py
示例19: makeMedianDict
def makeMedianDict(nameDict):
'''
for each dataset returns the median background subtracted enhancer signal
'''
medianDict = {}
for name in nameDict:
#open up the allenhancerTable
enhancerTable = utils.parseTable(nameDict[name]['enhancerFile'],'\t')
#assume header ends after line 5
enhancerVector = [float(line[6]) - float(line[7]) for line in enhancerTable[6:]]
medianDict[name] = numpy.median(enhancerVector)
return medianDict
开发者ID:zhouhufeng,项目名称:pipeline,代码行数:19,代码来源:clusterEnhancer.py
示例20: filterGFF
def filterGFF(gffFile,chromList):
'''
takes in a gff and filters out all lines that don't belong to a chrom in the chromList
'''
gff = utils.parseTable(gffFile,'\t')
filteredGFF = []
excludeList=[]
for line in gff:
if chromList.count(line[0]) ==1:
filteredGFF.append(line)
else:
excludeList.append(line[0])
excludeList = utils.uniquify(excludeList)
if len(excludeList) > 0:
print("EXCLUDED GFF REGIONS FROM THE FALLING CHROMS: %s" % (','.join(excludeList)))
return filteredGFF
开发者ID:linlabcode,项目名称:pipeline,代码行数:19,代码来源:ROSE2_META.py
注:本文中的utils.parseTable函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论