本文整理汇总了Python中textblob.tb函数的典型用法代码示例。如果您正苦于以下问题:Python tb函数的具体用法?Python tb怎么用?Python tb使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tb函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: parse_book
def parse_book(book_file):
# chapter titles are all caps and only one word
title_pattern = re.compile("^[A-Z]+$")
book = []
chapter = []
i = 0
with open(book_file, 'r') as f:
for line in f:
line = line.rstrip()
if line:
if title_pattern.match(line):
# if there's something in the chapter, put it in the book
if chapter:
i += 1
chapter = ' '.join(chapter)
book.append(tb(chapter))
chapter = []
else:
# preprocess line and put into chapter
line = preprocess(line)
chapter.append(line)
# put the last chapter in the book
i += 1
chapter = ' '.join(chapter)
book.append(tb(chapter))
return(book)
开发者ID:genbien,项目名称:GoT,代码行数:26,代码来源:most_common_words_in_chapters_tfidf.py
示例2: rankDocs
def rankDocs(keywordList, doclistTuples):
scores = {}
docList = [tb(doc[1].decode('utf-8')) for doc in doclistTuples]
for doc in doclistTuples:
scores[doc[0]] = scoreDoc(keywordList, tb(doc[1].decode('utf-8')), docList)
sortedDocs = sorted(scores.items(), key=lambda x: x[1], reverse = True)
return sortedDocs[:10]
开发者ID:kenumovies,项目名称:question-answer-system,代码行数:8,代码来源:tfidf_scores.py
示例3: setBlob
def setBlob(self,blob_):
paragraph = filter(lambda x: x in printable, blob_)
blob = tb(paragraph)
newBlob = ""
if(self.stemming):
for word in blob.words:
newBlob+=" "+(stem(word.lower()))
self.blob = tb(newBlob)
开发者ID:luciencd,项目名称:suggestr,代码行数:9,代码来源:courseSimilarity.py
示例4: main
def main():
# Takes in commandLine args, and sorts variables if necessary.
parser = argparse.ArgumentParser(description='Analyze Blogs.', formatter_class=RawTextHelpFormatter)
parser.add_argument('-b', '--blog', help='Manually enter the blog text here as a string. Formatted like:\n\nauthor: "authors name"\ntitle: "title"\nblog: "blog text"', default=None)
parser.add_argument('-a', '--author', help='Enter the authors name as a string', default=None)
parser.add_argument('-t', '--title', help='Enter the blogs title as a string', default=None)
parser.add_argument('-i', '--inFile', help='Enter the path to a plain text file with the blog entry in it', default=None)
args = parser.parse_args()
# Save variables from commandline args
newBlogFile = args.inFile
newBlogText = args.blog
newBlogAuthor = args.author
newBlogTitle = args.title
go = True
while(go):
# The below object is a dictionary of 2 dictionaries, good and bad features, and their relevant metadata.
# count is the number of times blogs have been passed through. This is necessary for updates.
features = {"good":{"count": 0, "words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}, "bad": {"count": 0, "words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}}
json_data = importJSON("Writings/writings.json") # get JSON data, creating a dictionary-like object
# Declaring lists of writings
badBlogList = []
goodBlogList = []
# Analyze the current data in the JSON file.
for blog in json_data["writings"]["bad"]:
badBlogList.append(tb(blog["post"]))
for blog in json_data["writings"]["good"]:
goodBlogList.append(tb(blog["post"]))
analysisResults = analyzeBlogs(badBlogList)
features["bad"]["count"], features["bad"]["words"], features["bad"]["names"], features["bad"]["religion"], features["bad"]["weaponry"], features["bad"]["government"] = len(badBlogList), analysisResults.outputsWordsArray, analysisResults.namesScore, analysisResults.religionScore, analysisResults.weaponryScore, analysisResults.governmentScore
analysisResults = analyzeBlogs(goodBlogList)
features["good"]["count"], features["good"]["words"], features["good"]["names"], features["good"]["religion"], features["good"]["weaponry"], features["good"]["government"] = len(goodBlogList), analysisResults.outputsWordsArray, analysisResults.namesScore, analysisResults.religionScore, analysisResults.weaponryScore, analysisResults.governmentScore
print("Current writings in database have been analyzed... \nRunning comparisons against provided writing...\n ----------------------------")
newBlog = None
# Analyze new file
if newBlogFile is not None:
newBlog = buildNewBlog(newBlogFile)
elif newBlogText is not None:
newBlog = buildNewBlog(None, newBlogAuthor, newBlogTitle, newBlogText)
if newBlog is not None:
tempFeatures = {"words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}
analyzeNewBlog(newBlog.post, goodBlogList, badBlogList, features)
print ("Please enter another file for analysis. or 'quit' to quit.\n")
newBlogFile = input('File path: ')
if newBlogFile == "quit" or newBlogFile == "Quit" or newBlogFile == "q":
go = False
print("Closing program...")
开发者ID:dfrank8,项目名称:NaturalLanguageProcessor,代码行数:55,代码来源:NaturalLanguage.py
示例5: readcontent
def readcontent(self):
ope=open('Cs.txt','r')
ope1=open('Is.txt','r')
ope2=open('It.txt','r')
self.CS_Fild=ope.read().lower()
self.Is_filed=ope1.read().lower()
self.IT_field=ope2.read().lower()
self.Cs=tb(self.CS_Fild)
self.Is=tb(self.Is_filed)
self.It=tb(self.IT_field)
self.bloblist = [self.Cs,self.Is,self.It]
开发者ID:hassanabdelhalim23,项目名称:Recommendation-System,代码行数:12,代码来源:anyalzedoc.py
示例6: extract
def extract(text):
bloblist = []
with open("clean_text.csv") as f:
reader = csv.DictReader(f)
for row in reader:
bloblist.append(tb(row['post_text']))
blob = tb(text)
scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
words = ''
for word, score in sorted_words[:15]:
words += word + ' '
return words
开发者ID:AlexandrShestak,项目名称:nlp,代码行数:13,代码来源:MainWordsExtractor.py
示例7: analyzeNewBlog
def analyzeNewBlog(blog, goodBlogList, badBlogList, features):
# Get word densities of the new blog
namesCount, religionCount, weaponryCount, governmentCount, wordCount = 0, 0, 0, 0, 0
for word in tb(blog):
wordCount += 1
if word in terms.governmentTerms(): # increment count based on content to find word densities.
governmentCount += 1
if word in terms.weaponsTerms():
weaponryCount += 1
if word in terms.femaleNames() or word in terms.maleNames():
namesCount += 1
if word in terms.religiousTerms():
religionCount += 1
analysisOutputs = AnalysisObject(namesCount/wordCount,religionCount/wordCount,weaponryCount/wordCount,governmentCount/wordCount, None)
# Compare to the analyzed ones.
scores = {"good": 0.0, "bad": 0.0}
for upperKey in features:
print ("\nComparing this blog to " + upperKey.upper() + " blogs:\n")
for lowerKey in features[upperKey]:
if lowerKey == "words":
for word in features[upperKey][lowerKey]:
if word[0] not in terms.stopWords():
if word[0] in blog:
print ("Word found in " + upperKey + " blog: " + word[0])
scores[upperKey] += word[1] * 100 # If a word is found, update the score relative to its TFIDF score.
elif lowerKey == "religion": # This next section is to compare the density of a term of the new blog compared to the density of that term in the analyzed blogs.
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.religionScore)
print ("Religion variance: " + str(features[upperKey][lowerKey] - analysisOutputs.religionScore))
elif lowerKey == "government":
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.governmentScore)
print ("Government variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.governmentScore)))
elif lowerKey == "weaponry":
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.weaponryScore)
print ("Weaponry variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.weaponryScore)))
elif lowerKey == "names":
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.namesScore)
print ("Names variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.namesScore)))
print ("\nFinal Scores:\n" + "Bad: " + str(scores["bad"]) + "\nGood: " + str(scores["good"]) + "\n")
if abs(scores["good"] - scores["bad"]) < .5:
print ("This post does not trend towards 'good; or 'bad'.")
else:
if scores["good"] > scores["bad"]:
print ("This post has been marked as 'good'.")
goodBlogList.append(tb(blog)) # Add term to the blog list. If this program were running constantly, it would be included in the next baes analysis.
else:
print ("This post has been flagged as 'bad'.")
badBlogList.append(tb(blog))
print ("\n---------------------------------------")
开发者ID:dfrank8,项目名称:NaturalLanguageProcessor,代码行数:49,代码来源:NaturalLanguage.py
示例8: get_tfidf_values
def get_tfidf_values(self, sentence):
blob = tb(sentence)
self.bloblist.append(blob)
blob_list = self.bloblist[:]
# blobList.append(blob)
single_words = blob.words
pairs = [Word(single_words[i] + ' ' + single_words[i + 1]) for i in range(len(single_words) - 1)]
scores_pairs = {word: self.__tfidf__(word, blob, blob_list, 2) for word in pairs}
sorted_words_pairs = sorted(scores_pairs.items(), key=lambda x: x[1], reverse=True)
scores_single = {word: self.__tfidf__(word, blob, blob_list, 1) for word in blob.words}
sorted_words_single = sorted(scores_single.items(), key=lambda x: x[1], reverse=True)
# sorted_words = sorted(sorted_words_pairs + sorted_words_single, key=lambda x: x[1], reverse=True)
ds = 0
nmd = 0
tec = 0
for i, word in enumerate(sorted_words_single):
ds += self.__ds_check__(word[0]) * word[1]
nmd += self.__nmd_check__(word[0]) * word[1]
tec += self.__tec_check__(word[0]) * word[1]
for i, word in enumerate(sorted_words_pairs):
ds += self.__ds_check__(word[0]) * word[1]
nmd += self.__nmd_check__(word[0]) * word[1]
tec += self.__tec_check__(word[0]) * word[1]
return [ds, nmd, tec]
开发者ID:rfire01,项目名称:SAGLET,代码行数:26,代码来源:tfidfAnalyze.py
示例9: features_pos_tag
def features_pos_tag(self):
blob = tb('.'.join([self.title,self.short,self.need,self.essay]))
counts = Counter(tag for word,tag in blob.tags)
total = sum(counts.values())
ratio_dict = tag_dict.copy()
ratio_dict.update(dict((word, float(count)/total) for word,count in counts.items()))
return tuple(map(lambda k: ratio_dict[k], tag_list))
开发者ID:hippozhu,项目名称:kdd2104,代码行数:7,代码来源:essay_feature.py
示例10: get_tweet_info
def get_tweet_info(tweet):
processed_tweet = {
'tweet_id': tweet.id_str,
'created_by_id': tweet.user.id,
'created_at': tweet.created_at,
'text': tweet.text,
'coordinates': tweet.coordinates,
# Note: only returns a non-zero favorite_count for an original
# tweet. We'd need to look up the original tweet itself to get
# the favorite_count, which is possible.
'favorite_count': tweet.favorite_count,
'retweet_count': tweet.retweet_count
# This favorited field only tells us if we, the authenticated user have
# favorited this tweet, which isn't that helpful.
# 'favorited': tweet.favorited,
}
if 'hashtags' in tweet.entities:
processed_tweet['hashtags'] = tweet.entities['hashtags']
else:
processed_tweet['hashtags'] = None
if 'media' in tweet.entities:
processed_tweet['media'] = tweet.entities['media']
else: processed_tweet['media'] = None
# Get Sentiment
blob = tb(tweet.text)
sentiment = {'polarity': blob.sentiment.polarity,
'subjectivity': blob.sentiment.subjectivity
}
processed_tweet['sentiment'] = sentiment
return processed_tweet
开发者ID:zdellison,项目名称:cs194_project,代码行数:32,代码来源:views.py
示例11: stemming
def stemming(doc):
d = toker.tokenize(doc)
d = [k for k in d if k not in cachedStopWords]
for i in range(0,len(d)):
d[i]=lemma.lemmatize(d[i])
return tb(" ".join(d))
开发者ID:goeastagent,项目名称:recdoc,代码行数:7,代码来源:nlp_test_wang_stemming.py
示例12: __init__
def __init__(self,graph):
self.bloblist = []
for node in graph.nodes():
try:
self.bloblist.append(tb(graph.node[node]['abstract']))
except:
print "No abstract for node ",node
开发者ID:alextaylorjones,项目名称:NS202-Visualization-Of-Metro-Maps,代码行数:7,代码来源:concept_helper.py
示例13: buildTestData
def buildTestData(self):
self.testBloblist = {}
for key, value in self.dev.iteritems():
content = '. '.join(self.dev[key]['content'])
content.replace('..','.')
self.testBloblist[key] = (tb(content))
self.testBloblistLength = len(self.testBloblist)
开发者ID:vswamy,项目名称:summarizer,代码行数:7,代码来源:tf_idf_stemmer.py
示例14: extract
def extract(storyString):
storyText = tb(storyString)
results = []
for sentence in storyText.sentences: # split text into sentences
results.append(analyze_sent_semantics(sentence))
return results
开发者ID:gmittal,项目名称:aar-nlp-research-2016,代码行数:7,代码来源:text_parse.py
示例15: main
def main():
#print 'Hello there'
# Command line args are in sys.argv[1], sys.argv[2] ...
# sys.argv[0] is the script name itself and can be ignored
dataList = []
for f in os.listdir('documents'):
filePath = 'documents\\' + f
#print filePath
fileName, fileExtension = os.path.splitext(filePath)
#print fileExtension
if fileExtension.lower() == '.docx':
print '' #'its a {0} {1}{2}'.format('word document', fileName, fileExtension)
doc = docxDocument(filePath)
for p in doc.paragraphs:
dataList.append(p.text) #print p.text
#print "-------------------------------"
elif fileExtension.lower() == '.pdf':
print '' #'its a {0} {1}{2}'.format('pdf document', fileName, fileExtension)
# with open(filePath) as f:
# doc = slate.PDF(f)
# print doc[1]
# exit()
#TODO
elif ((fileExtension.lower() == '.html') or (fileExtension.lower() == '.htm')):
print '' #'its a {0} {1}{2}'.format('html file', fileName, fileExtension)
with codecs.open (filePath, errors='ignore') as myfile:
source = myfile.read()
article = Document(source).summary()
title = Document(source).title()
soup = BeautifulSoup(article, 'lxml')
final = replaceTwoOrMore((title.replace('\n', ' ').replace('\r', '') + '.' + soup.text.replace('\n', ' ').replace('\r', '')))
dataList.append(final)
#print '*** TITLE *** \n\"' + title + '\"\n'
#print '*** CONTENT *** \n\"' + soup.text + '[...]\"'
else:
print '' # 'undectected document type'
print '' #"-------------------------------"
#print dataList
#for i in dataList:
# print i
cachedStopWords = stopwords.words("english")
combined = ' '.join(dataList)
#print combined
bloblist = [tb(combined)]
for i, blob in enumerate(bloblist):
print("Top words in document {}".format(i + 1))
scores = {word: tfidf(word, blob, bloblist) for word in blob.words if word not in nltk.corpus.stopwords.words('english')}
#print scores
sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
#print sorted_words
for word, score in sorted_words:
print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
开发者ID:adamstein,项目名称:mayhem,代码行数:59,代码来源:run.py
示例16: rankSentences
def rankSentences(keywordList, doclist):
scores={}
docList=[tb(doc[1].decode('utf-8')) for doc in doclistEntries]
keywordIDFs={}
for w in keywordList:
keywordIDFs[w]=idf(w, docList)
for i in enumerate(doclist):
text=tb(doc[1].decode('utf-8'))
sums=0
for w in keywordList:
sums=sums+(tf(w,text)*keywordIDFs[w])
scores=[doc[0]]={'score':sums,'object': Sentence}
bestMatches=sorted(scores.items(),key=lambda x: x[1]['score'],reverse=True)
return bestMatches[:0]
开发者ID:ajdime21,项目名称:QA_System,代码行数:17,代码来源:rankDocs.py
示例17: main
def main():
summary_text = eval(open("summaryList.txt").read())
input_text = eval(open("input.txt").read())
summarycommentlist = []
inputcommentlist = []
for comment in summary_text:
summarycommentlist.append(tb(' '.join(get_nonstop_words(comment))))
for comment in input_text:
inputcommentlist.append(tb(' '.join(get_nonstop_words(comment))))
tf_idf_summary = calculate_tfidf_average(summarycommentlist)
tf_idf_input = calculate_tfidf_average(inputcommentlist)
print "Retention Rate = ", tf_idf_summary/tf_idf_input
开发者ID:vikasnar,项目名称:CSCI544-Team-7,代码行数:17,代码来源:RetentionRate.py
示例18: make_bloblist
def make_bloblist(bloblist):
f = open('/home/ashar/nltk_data/corpora/abc/rural.txt','r')
var = f.read()
var = var.lower()
splat=var.split("\n\n")
for i in splat:
temp=tb(i.decode('utf-8'))
bloblist.append(temp)
开发者ID:stillbreeze,项目名称:KAPA,代码行数:8,代码来源:indf.py
示例19: input_tags
def input_tags(infile):
with codecs.open(infile,'r',encoding='utf-8',errors='ignore') as f:
x = []
for line in f:
pid = line.split(',')[0]
tag = line.split(',')[1]
try:
x = tb(str(pid)) + '\t' + tb(str(tag))
except:
x = tb(str(pid))
try:
both = {pid: x.split('\t')[1]}
except:
continue
tag_dict.update(both)
tag_dict_values.append(tag)
words_tuple = pid,tag
ww.append(words_tuple)
开发者ID:velikooutro,项目名称:python_code_snippets,代码行数:18,代码来源:groupby_repeat.py
示例20: parseQuery
def parseQuery(q):
blob = tb(q.lower())
posTags = blob.tags
keepTags = ['NNP', 'NNPS', 'NN', 'NNS', 'JJ', 'JJR', 'JJS', 'CD']
keywords = [w[0] for w in posTags if w[1] in keepTags]
stems = keywords
for i in range(len(stems)):
stems[i] = PorterStemmer().stem_word(stems[i]).encode('utf-8')
return stems
开发者ID:kenumovies,项目名称:question-answer-system,代码行数:9,代码来源:parse_query.py
注:本文中的textblob.tb函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论