本文整理汇总了Python中textblob.TextBlob类的典型用法代码示例。如果您正苦于以下问题:Python TextBlob类的具体用法?Python TextBlob怎么用?Python TextBlob使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TextBlob类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _translate_message
def _translate_message(bot, broadcast_list, context):
if context and "autotranslate" in context:
_autotranslate = context["autotranslate"]
origin_language = _get_room_language(bot, _autotranslate["conv_id"])
for send in broadcast_list:
target_conversation_id = send[0]
response = send[1]
target_language = _get_room_language(bot, target_conversation_id)
if origin_language != target_language:
logger.debug("translating {} to {}".format(origin_language, target_language))
translated = _autotranslate["event_text"]
try:
en_blob = TextBlob(_autotranslate["event_text"])
translated = "{0}".format(en_blob.translate(to=target_language))
#translated = gs.translate(_autotranslate["event_text"], target_language
except Exception:
logger.debug("Translation Api returned string unchanged")
else:
pass
finally:
if _autotranslate["event_text"] != translated:
# mutate the original response by reference
response.extend([
hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK),
hangups.ChatMessageSegment('(' + translated + ')')])
开发者ID:0xD3ADB33F,项目名称:hangoutsbot,代码行数:25,代码来源:_DEPRECATED_syncrooms_autotranslate.py
示例2: check_speech_patterns
def check_speech_patterns(text):
PATTERNS={
("PRP","DT"),
("CC","VBD"),
("VB","RB"),
("VB","PRP$"),
("NN","POS"),
("NN","MD","VB"),
("VB","PRP$","NN"),
("MD","VB","VBN"),
("NN","IN","PRP$"),
("IN","PRP$","JJ"),
("VB","PRP","DT","NN"),
("VBD","RB","JJ","NNS"),
("NNP","NNP","NNP","NNP"),
("PRP$","NN","CC","PRP"),
("NNP", "NNP", "NNP", "NNP", "NNP"),
("NN", "IN", "DT", "NNS", "IN"),
("PRP$", "NN", "IN", "DT", "NN"),
("IN", "DT", "NN", "WDT", "VBZ"),
("NN", "IN", "PRP$", "JJ", "NN"),
("DT", "NN", "IN", "NN", "NN")
}
blob= TextBlob(text)
for i in range (2,6):
ngrams=blob.ngrams(n=i)
for gram in ngrams:
str_gram=" ".join(gram)
gram_blob=TextBlob(str_gram)
tags=gram_blob.tags
lst1, lst2 = zip(*tags)
if lst2 in PATTERNS:
return True
return False
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:34,代码来源:sentiment_analysis_3.py
示例3: scrape
def scrape(self,links=[],ads=True,translator=False):
responses = []
values = {}
data = []
if ads:
for link in links:
r = requests.get(link)
responses.append(r)
else:
for link in links:
r = requests.get(link)
text = unidecode(r.text)
html = lxml.html.fromstring(text)
links = html.xpath("//div[@class='cat']/a/@href")
for link in links:
if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
time.sleep(random.randint(5,27))
try:
responses.append(requests.get(link))
print link
except requests.exceptions.ConnectionError:
print "hitting connection error"
continue
for r in responses:
text = r.text
html = lxml.html.fromstring(text)
values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
values["link"] = unidecode(r.url)
values["new_keywords"] = []
try:
values["images"] = html.xpath("//img/@src")
except IndexError:
values["images"] = "weird index error"
pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")
values["text_body"] = pre_decode_text
try:
values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
except IndexError:
values["posted_at"] = "not given"
values["scraped_at"] = str(datetime.datetime.now())
body_blob = TextBlob(values["text_body"])
title_blob = TextBlob(values["title"])
values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
values["polarity"] = body_blob.polarity
values["subjectivity"] = body_blob.sentiment[1]
if values["language"] != "en" and not translator:
values["translated_body"] = body_blob.translate(from_lang="es")
values["translated_title"] = title_blob.translate(from_lang="es")
else:
values["translated_body"] = "none"
values["translated_title"] = "none"
text_body = values["text_body"]
title = values["title"]
values["phone_numbers"] = self.phone_number_parse(values)
data.append(values)
return data
开发者ID:John-Keating,项目名称:investa_gator_v2,代码行数:60,代码来源:crawler.py
示例4: findLanguage
def findLanguage(reducedList3):
languageMap = {}
currentNumber = 0
shuffle(reducedList3)
for i in reducedList3:
if currentNumber < 5000:
if len(i[0]) > 5:
try:
b = TextBlob(unicode(i[0]))
currentLanguage = b.detect_language()
if currentLanguage in languageMap:
languageMap[currentLanguage] += 1
else:
languageMap[currentLanguage] = 1
except:
pass
currentNumber += 1
print currentNumber
listOfWords = []
for i in languageMap:
for x in range(0, languageMap[i]):
listOfWords.append(i)
listOfWordsCounter = collections.Counter(listOfWords)
print 'Best Languages:', listOfWordsCounter.most_common(5)
print languageMap
开发者ID:AbhiAgarwal,项目名称:classes,代码行数:29,代码来源:Agarwal-Homework3.py
示例5: extract
def extract(ngrams, dataset, doc_id):
# extract keywords
print 'Extracting keywords'
for i, ngram in enumerate(ngrams):
doc = doc_id[i]
if field not in dataset[doc]:
dataset[doc][field] = set()
if doc > 0 and doc % 1000 == 0:
print '\t', doc
for kw in filter(lambda k: '_' in k, ngram):
keyword = kw.replace('_', ' ')
kw_tb = TextBlob(keyword)
# filter out punctuation, etc (make sure that there are two non-punc words)
if len(kw_tb.words) < 2:
continue
# add keywords which are all proper nouns
distinct_tags = set(t[1] for t in kw_tb.tags)
if distinct_tags - {'NNP', 'NNPS'} == {}:
dataset[doc][field].add(kw_tb.lower())
continue
# add noun phrases
for np in kw_tb.lower().noun_phrases:
dataset[doc][field].add(np)
return kw_set_to_list(dataset)
开发者ID:MattL920,项目名称:nasaMining,代码行数:32,代码来源:extract.py
示例6: tag_documents_text
def tag_documents_text(client):
documents = client['cornell']['documents']
for doc in documents.find():
blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger())
parsed_blob = blob.parse()
documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})
开发者ID:matheuscas,项目名称:fuzzy_opinion_mining,代码行数:7,代码来源:model.py
示例7: update_book
def update_book(book):
blob = TextBlob(book.description)
if blob.detect_language() == 'en':
description = ''
nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)
for noun, tag in nouns:
description += noun + " "
if len(noun) > 2:
description += TextBlob(noun).translate(to='ko').string + " "
else:
description = book.description
book_document = search.Document(
doc_id=book.ISBN,
fields=[
search.TextField(name='title', value=remove_punc(book.title)),
search.TextField(name='author', value=remove_punc(book.author)),
search.TextField(name='description', value=remove_punc(description))
]
)
index = get_book_index()
index.put(book_document)
开发者ID:ZeroPage,项目名称:zp-library-old,代码行数:27,代码来源:library_search.py
示例8: process_status
def process_status(status, lang):
text = ""
# translate
if lang == 'en':
text = status['text']
else:
blob = TextBlob(status['text'])
try:
text = str(blob.translate())
except textblob.exceptions.NotTranslated:
text = status['text']
# sentiment analysis
sentiment = TextBlob(text).sentiment
return {
"created_at": 1000 * int(time.mktime((status['created_at']).timetuple()))
, "id_str": status['id_str']
, "text": text
, "sentiment": {"polarity": sentiment[0], "subjectivity": sentiment[1]}
, "retweet_count": status['retweet_count']
, "in_reply_to_status_id_str": status['in_reply_to_status_id_str']
, "geo": status['geo']
, "retweeted": status['retweeted']
, "in_reply_to_user_id_str": status['in_reply_to_user_id_str']
}
开发者ID:fourvvvv,项目名称:dt-twitter-network,代码行数:27,代码来源:status_wall.py
示例9: matchRhyme
def matchRhyme(word1,word2):
#str1 = "tekst"
#word1+="टेक्स्ट"
str1 = ""
str2 = ""
word1+= "टेक्स्ट"
word2+= "टेक्स्ट"
str1 += " " + word1
str2 += " " + word2
hindi_blob1 = TextBlob(str1)
hindi_blob2 = TextBlob(str2)
transliteratedtxt1 = hindi_blob1.translate(from_lang="hi", to='en')
transliteratedtxt1=transliteratedtxt1.substring[:-5]
transliteratedtxt2 = hindi_blob2.translate(from_lang="hi", to='en')
transliteratedtxt2= transliteratedtxt2.substring[:-5]
word1Index= len(transliteratedtxt1)
word2Index= len(transliteratedtxt2)
##Matcing last charater if they are same!!
if (transliteratedtxt1[word1Index-1] == transliteratedtxt2[word2Index-1]):
#rhymeMeter=3;
##Matching if second Last character is any of the Matras!!
if ( ((transliteratedtxt1[word1Index-2]=='a') and (transliteratedtxt2[word2Index-2]=='a')) or ((transliteratedtxt1[word1Index-2]=='e') and (transliteratedtxt2[word2Index-2]=='e'))or ((transliteratedtxt1[word1Index-2]=='o') and (transliteratedtxt2[word2Index-2]=='o')) or ((transliteratedtxt1[word1Index-2]=='i') and (transliteratedtxt2[word2Index-2]=='i')) or ((transliteratedtxt1[word1Index-2]=='u') and (transliteratedtxt2[word2Index-2]=='u')) ):
rhymeMeter=5
else:
if(transliteratedtxt1[word1Index-2]!=transliteratedtxt1[word1Index-2]):
rhymeMeter=4
return rhymeMeter
开发者ID:ManasMahanta,项目名称:Final_project,代码行数:33,代码来源:PhraseClassify.py
示例10: _german
def _german(self, text):
blob = TextBlob(text)
try:
return str(blob.translate(to="en"))
except:
return text
开发者ID:firecurious,项目名称:kokoro,代码行数:7,代码来源:translate.py
示例11: on_command
def on_command(self, msg, stdin, stdout, reply):
# pre-process args
# this might mess up if "from" or "to" is left out and
# the message contains "from" or "to"
self._push_character(msg["args"], "from", "-", 1)
self._push_character(msg["args"], "to", "-", 1)
try:
args = self.parser.parse_args(msg["args"][1:])
except (argparse.ArgumentError, SystemExit):
return
# get message from the appropriate place
if args.message:
message = " ".join(args.message)
else:
message = stdin.read().strip()
# translate
from_lang = args.from_language
to_lang = args.to_language
message = TextBlob(message)
try:
translated = message.translate(from_lang=from_lang, to=to_lang)
except:
pass
else:
print(translated, file=stdout)
开发者ID:bet0x,项目名称:smartbot,代码行数:28,代码来源:translate.py
示例12: getKeywords
def getKeywords(text, useless):
text = TextBlob(text)
for word in text.words:
for bad in useless:
if word is bad:
text.remove(word)
return text
开发者ID:anmousyon,项目名称:python,代码行数:7,代码来源:db_fill.py
示例13: gen_translate
def gen_translate(msg, fromlang=None, outputlang='en'):
try:
blob = TextBlob(msg)
blob = blob.translate(from_lang=fromlang, to=outputlang)
return str(blob)
except NotTranslated:
return msg
开发者ID:ubuntor,项目名称:cslbot,代码行数:7,代码来源:textutils.py
示例14: getEntities
def getEntities(parser, tweet, xEntities):
try:
spacyParsedObject = parser(tweet)
sentence = TextBlob(tweet)
textblobTaggedObject = sentence.parse().split()
patterntaggedObject = tag(tweet, tokenize=True)
for word in patterntaggedObject:
word, wordtag=word
if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
v = str(word)
v = v.strip()
if(v not in xEntities):
xEntities[v]=str(wordtag)
for taggedObject in textblobTaggedObject:
for word in taggedObject:
word, wordtag=word[0], word[1]
if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
v = str(word)
v = v.strip()
if(v not in xEntities):
xEntities[v]=str(wordtag)
for word in spacyParsedObject:
if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
v = str(word)
v = v.strip()
if(v not in xEntities):
xEntities[v]=str(word.tag_)
return xEntities
except Exception as e:
return e
开发者ID:project-spinoza-dev,项目名称:tsakpy,代码行数:30,代码来源:getEntities.py
示例15: hi
def hi(bot, trigger):
lang_codes = ['af', 'ga', 'sq', 'it', 'ar', 'ja', 'az', 'kn', 'eu', 'ko', 'bn', 'la', 'en']
trans = TextBlob('Greetings dear '+trigger.nick+'on the road of life ')
ind = randint(0, 12)
trans = trans.translate(to=lang_codes[ind])
saying = str(trans)
bot.say(saying)
开发者ID:Amanda-Clark,项目名称:IRC_Bot_Code,代码行数:7,代码来源:Amanda.py
示例16: sentiment_pattern
def sentiment_pattern(text, gram_n=6):
blob= TextBlob(text)
ngrams=blob.ngrams(n=gram_n)
sentiment_list=[]
datalist = []
for gram in ngrams:
str_gram=" ".join(gram)
print str_gram
data = (0, 0, str_gram, None)
datalist.append(Datapoint(*data))
#gram_blob=TextBlob(str_gram)
#sentiment=gram_blob.sentiment[0]
#if sentiment>0:
# sentiment=1
#elif sentiment<0:
# sentiment=-1
#sentiment_list.append(sentiment)
predictor = pickle.load(open("predictor.pickle", "rb" ) )
prediction = predictor.predict(datalist)
for sentiment in prediction:
sentiment = int(sentiment)
if sentiment < 2: sentiment_list.append(-1)
if sentiment == 2: sentiment_list.append(0)
if sentiment > 2: sentiment_list.append(1)
print sentiment_list
return sentiment_list
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:31,代码来源:sentiment_analysis_2.py
示例17: tokenize
def tokenize(text, spell=False, stem=False, lemma=False, lower=False, stop=False):
# lowercase, remove non-alphas and punctuation
b = TextBlob(unicode(text, 'utf8'))
if spell:
b = b.correct()
words = b.words
if lower:
words = words.lower()
if lemma:
words = words.lemmatize()
if stem:
words = [stemmer.stem(w) for w in words]
if stop:
tokens = [w.encode('utf-8') for w in words if w.isalpha() and w not in stopwords]
else:
tokens = [w.encode('utf-8') for w in words if w.isalpha()]
# letters_only = re.sub("[^a-zA-Z]", " ", text)
# # ngrams
# temp_list = []
# for i in range(1,ngram+1):
# temp = [list(i) for i in TextBlob(' '.join(tokens)).ngrams(i)]
# try:
# if len(temp[0]) == 1:
# temp_list.extend([i[0] for i in temp])
# else:
# for i in temp:
# temp_list.append(tuple(i))
# except:
# pass
# return temp_list
return tokens
开发者ID:potatochip,项目名称:kojak,代码行数:33,代码来源:text_processors.py
示例18: answer
def answer(question):
global IsAnswer,detected,u
IsAnswer = True
DetectLang = TextBlob(question)
detected = DetectLang.detect_language()
if detected == 'en':
print("language detected: en")
u = 'en'
print(len(words),"len(words)")
low = question.lower()
questions = re.sub('[^\w]',' ',low).split() #list
BadWords(questions)
print(questions)
def writeout(words,question,IsAnswer):
r = []
if len(words) > 3000:
a1 = len(questions)
for x in range(0,a1):
words.remove(random.choice(words))
print(len(words),"len(words)")
else:
pass
os.remove('newwords.txt')
file = open('newwords.txt','w')
words.extend(questions)
r.extend(words)
s = ' '.join(r)
file.write(s)
writeout(words,question,IsAnswer)
randomthought()
else:
u = detected
print("language detected:",u)
randomthought()
开发者ID:tinypirates,项目名称:minniebot-mk2,代码行数:34,代码来源:current.py
示例19: nounize
def nounize(aline):
words = ''
aline = TextBlob(aline.decode('ascii', errors='replace'))
for word, tag in aline.tags:
if tag == 'NN':
word = random.choice(postnouns).strip()
words = words + ' ' + word
return words
开发者ID:therewasaguy,项目名称:recursive-etiquette-poetry-generator,代码行数:8,代码来源:etiquette.py
示例20: gen_translate
def gen_translate(msg, fromlang, outputlang):
try:
blob = TextBlob(msg)
# FIXME: language detection is broken.
blob = blob.translate(from_lang=fromlang, to=outputlang)
return str(blob)
except NotTranslated:
return msg
开发者ID:ComputerScienceHouse,项目名称:cslbot,代码行数:8,代码来源:textutils.py
注:本文中的textblob.TextBlob类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论