• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python wordnet.synset函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.corpus.wordnet.synset函数的典型用法代码示例。如果您正苦于以下问题:Python synset函数的具体用法?Python synset怎么用?Python synset使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了synset函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: process_verb

def process_verb(verb):
    verb = verb[:-1] # Remove newline char
    with open('youtube_setof_verbs.txt') as f:
        verb_dict = f.read()
    verb_dict = verb_dict.split('\n')
    
    max_score = 0
    finl_verb = (verb, '<>')
    verb_list = re.findall('[A-Z][^A-Z]*', verb)
    
    for prob_verb in verb_list:
        if prob_verb[len(prob_verb)-3:] == 'ing':
            prob_verb = prob_verb[:-3] # Remove 'ing' from verb
            if prob_verb.lower() == 'cutt':
                prob_verb = 'cut'
        if wn.synsets(prob_verb):
            try:
                v1 = wn.synset(prob_verb + '.v.01')
                for yout_verb in verb_dict:
                    if yout_verb != '':
                        # if wn.synsets(yout_verb):
                        v2 = wn.synset(yout_verb + '.v.01')
                        score = v1.wup_similarity(v2)
                        if score > max_score:
                            finl_verb = (prob_verb, yout_verb)
                            max_score = score
            except:
                finl_verb = (prob_verb, '<>')
                pass
                
    # print finl_verb, max_score
    return finl_verb[1]
开发者ID:sumitb,项目名称:YouTube2Action,代码行数:32,代码来源:word.py


示例2: process_subj

def process_subj(subj, flag):
    if flag == 1:
        with open('youtube_setof_subjects.txt') as f:
            subj_dict = f.read()
        subj_dict = subj_dict.split('\n')
    elif flag == 2:
        with open('youtube_setof_objects.txt') as f:
            obj_dict = f.read()
        subj_dict = obj_dict.split('\n')
    
    max_score = 0
    finl_subj = (subj, '<>')
    subj_list = subj.split(',')

    if len(subj_list) == 1:
        return subj
    for prob_subj in subj_list:
        prob_subj = prob_subj.strip()
        if wn.synsets(prob_subj):
            try:
                v1 = wn.synset(prob_subj + '.n.01')
                for yout_subj in subj_dict:
                    if yout_subj != '':
                        v2 = wn.synset(yout_subj + '.n.01')
                        score = v1.wup_similarity(v2)
                        if score > max_score:
                            finl_subj = (prob_subj, yout_subj)
                            max_score = score
            except:
                finl_subj = (prob_subj, '<>')
                pass
                
    # print finl_verb, max_score
    return (finl_subj[1])
开发者ID:sumitb,项目名称:YouTube2Action,代码行数:34,代码来源:word.py


示例3: preprocess_docs

def preprocess_docs():
    stopwords = nltk.corpus.stopwords.words('english')
    corpus = list(filtered_corpus())
    counter = 0
    for train, topic, title, text in corpus:
        if counter % 10 == 0:
            print "%.2f %%\r" % (counter * 100.0 / len(corpus),),
            sys.stdout.flush()
        counter += 1
        text = [i for i in nltk.word_tokenize(title) if i.lower() not in stopwords]
        buf = []
        for word in text:
            synsets = wn.synsets(word)
            grain = []
            wheat = [] 
            for s in synsets:
                grain.append(s.path_similarity(wn.synset('wheat.n.02')))
                wheat.append(s.path_similarity(wn.synset('grain.n.08')))

            grain = [i for i in grain if i is not None]
            wheat = [i for i in wheat if i is not None]

            if len(grain) == 0:
                grain = 0
            else:
                grain = sum(grain) * 1.0 / len(grain)
            if len(wheat) == 0:
                wheat = 0
            else:
                wheat = sum(wheat) * 1.0 / len(wheat)
            buf.append((word, grain, wheat))
        yield train, topic, buf
    print ""
开发者ID:Sentimentron,项目名称:CS909-Excercise8,代码行数:33,代码来源:pre713.py


示例4: get_score

def get_score(tags, groups):
  sscore = 0
  scount = 0 
  illegal_word = 0

  if (tags != None ) :
   for g in groups:
    
    for x in k.tags:
     try : 
      #print str(x.text), 
      #check substring else calculate words similarity score
      if g in str(x.text).lower():
	sscore += 2.0
        scount += 1
      else:
       tag = wn.synset(str(x.text).lower()+'.n.01')
       group = wn.synset(g+ '.n.01')  
       sem = wn.path_similarity(group,tag)
       if sem >= 0.3 :
        sscore += sem
	scount += 1     
     except:
	illegal_word += 1
  if scount != 0 :
    return sscore/scount
  else :
    return 0
开发者ID:tushar19,项目名称:Web-Image-Ranking-Retrieval,代码行数:28,代码来源:imgsearch.py


示例5: getSenseSimilarity

def getSenseSimilarity(worda,wordb):

	"""

	find similarity betwwn word senses of two words

	"""

	wordasynsets = wn.synsets(worda)

	wordbsynsets = wn.synsets(wordb)

	synsetnamea = [wn.synset(str(syns.name)) for syns in wordasynsets]

	synsetnameb = [wn.synset(str(syns.name)) for syns in wordbsynsets]



	for sseta, ssetb in [(sseta,ssetb) for sseta in synsetnamea for ssetb in synsetnameb]:

		pathsim = sseta.path_similarity(ssetb)

		wupsim = sseta.wup_similarity(ssetb)

		if pathsim != None:

			print "Path Sim Score: ",pathsim," WUP Sim Score: ",wupsim,"\t",sseta.definition, "\t", ssetb.definition
开发者ID:dxd132630,项目名称:NeoPythonic,代码行数:27,代码来源:similarity.py


示例6: probability

def probability(tokens, category, dictionary, total):   	  
	if category == "sense":
		total_score = 0
		dic = dictionary
		if len(tokens) == 0:
			return 0
		for token in tokens:
			for dict_sense in dic:
				score = wn.synset(token).path_similarity(wn.synset(dict_sense))
				if score is not None:
					total_score += score * dic[dict_sense]
		return (total_score/len(tokens))
	else:
		p = 0 
		dic = dictionary
		total_instances = total
		for token in tokens:
		    if token in dic:
		    	token_prob = dic[token]
		    else:
		    	token_prob = 0
		    # smooth one out
		    curr = token_prob/float(total_instances)
		    p += curr  
	
	return p
开发者ID:aiqiliu,项目名称:AskReddit-analytics,代码行数:26,代码来源:titleAnalysis.py


示例7: get_similar_words

def get_similar_words(word):
    lemmas_noun = hypernyms_noun = lemmas_verb = hypernyms_verb =[]
    try:
        lemmas_noun =  [str(lemma.name()) for lemma in wn.synset(word + '.n.01').lemmas()]    
    except WordNetError:
        pass

    try:
        hypernyms_noun = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.n.01').hypernyms()]    
    except WordNetError:
        pass

    if len(lemmas_noun) == 0 and len(hypernyms_noun) == 0:
        """
        Only try verbs if there are no similar nouns
        """
        try:
            lemmas_verb =  [str(lemma.name()) for lemma in wn.synset(word + '.v.01').lemmas()]    
        except WordNetError:
            pass

        try:
            hypernyms_verb = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.v.01').hypernyms()]    
        except WordNetError:
            pass
    
    similar_words = lemmas_noun + hypernyms_noun + lemmas_verb + hypernyms_verb
    # filter words which are not purely alphabets (there will be words with underscore)
    # this is because if we want to process such words like "domestic_animal", we have to 
    # implement 2-grams search which is not done here
    pattern = re.compile('^[a-zA-Z]+$')
    return filter(lambda x: pattern.match(x) and x != word, similar_words)
开发者ID:seowyanyi,项目名称:cs3245-4,代码行数:32,代码来源:helper.py


示例8: expand_queries

def expand_queries(file):
    '''
    For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
    '''
    file = open(file)
    for sentence in file:
        sentence = sentence.strip()
        if sentence.find('<text>') != -1:
            query = sentence[sentence.find('>')+1: sentence.rfind('<')]
            additions = ''
            updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
            full_q = query
            for word, pos in updated_q:
               if word not in stopwords.words('english'):
                   looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'                   
                   synsets = wn.synsets(word)
                   if looking_for in str(synsets):
                       new_words = (wn.synset(looking_for).lemma_names) #was .definition
                       for new_word in new_words:
                           if new_word.lower() != word.lower():
                               full_q = full_q +' '+ str(new_word)
                   else:
                       if wn.morphy(word) != None:
                           word = wn.morphy(word)
                           looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
                           print str(looking_for) + ' THIS IS WORD'
                           synsets = wn.synsets(word)
                           if looking_for in str(synsets):
                               new_words = (wn.synset(looking_for).lemma_names) #was .definition
                               for new_word in new_words:
                                   if new_word.lower() != word.lower():
                                       full_q = full_q +' '+ str(new_word)
            print query + ' '+ full_q
开发者ID:britth,项目名称:inls890-microblog,代码行数:33,代码来源:wordnetExpansion.py


示例9: get_similarity

	def get_similarity(self,word1,word2):
		'''计算相似度:基于WordNet语义词典'''
		'''
		print 'before stemmed:',word1
		print 'after stemmed:',wn.morphy(word1.lower())
		print 'before stemmed:',word2
		print 'after stemmed:',wn.morphy(word2.lower())
		'''
		#stemmed word
		if wn.morphy(word1.lower()) != None :
			word1 = wn.morphy(word1.lower())
		if wn.morphy(word2.lower()) != None :
			word2 = wn.morphy(word2.lower()) 
		word1_synsets = wn.synsets(word1)
		#print word1_synsets
		word2_synsets = wn.synsets(word2)
		#print word2_synsets
		sim = 0

		for syn1 in word1_synsets:
			w1 = wn.synset(syn1.name())
			for syn2 in word2_synsets:
				w2 = wn.synset(syn2.name())
				tmp = w1.path_similarity(w2)
				#print tmp,syn1.name(),syn2.name()
				if tmp > sim:
					sim = tmp
		return sim
开发者ID:cnspica,项目名称:ASExtractor,代码行数:28,代码来源:EnKeywordExtraction.py


示例10: print_other_lexical_rel

def print_other_lexical_rel():
    good1 = wn.synset('good.a.01')
    wn.lemmas('good')
    print("Antonyms of 'good': " + str(good1.lemmas()[0].antonyms()))
    print("")
    print("Entailment of 'walk': " + str(wn.synset('walk.v.01').entailments()))
    print("")
开发者ID:anirudhcoder,项目名称:Natural-Language-Processing,代码行数:7,代码来源:hw2-part2-wordnet-examples.py


示例11: overlapCount

    def overlapCount(self, sentence):
        #set count to be one so we can guess in case there are no sentences with overlap
        count = 1

        sWiki = TextBlob(self.arrayToString(sentence))
        sVerbs = self.getVerbs(sWiki)

        #compare verbs for similarities and based on wordnet's similarity score
        #if they're exactly the same, they'll score 1
        for sverb in sVerbs:
            synv = wn.synset(sverb + '.v.01')
            for qverb in self.questionVerbs:
                synq = wn.synset(qverb + '.v.01')
                count += synv.path_similarity(synq)

        #remove stop words from sentence AFTER we've gotten POS tags
        s = self.removeStopWords(sentence)
        sLower = self.removeStopWords(sentence.lower())

        for word in self.qList:
             if word in s:
                 count += 1
             else:
                 if word.lower() in sLower:
                     count += 0.1
        return count
开发者ID:FlyingGroundhogs,项目名称:QASystem,代码行数:26,代码来源:VOverlap.py


示例12: compare

 def compare(self, word1, word2):
     tmp1 = wn.synsets(word1)[0].name
     tmp2 = wn.synsets(word2)[0].name
     w1 = wn.synset(tmp1)
     w2 = wn.synset(tmp2)
     val = w1.wup_similarity(w2)
     return val
开发者ID:danjamker,项目名称:N-Fly,代码行数:7,代码来源:WordNet.py


示例13: is_ingredient

def is_ingredient(word):
    """
    Return True if the word is an ingredient, False otherwise.

    >>> is_ingredient('milk')
    True
    >>> is_ingredient('blackberries')
    True
    >>> is_ingredient('Canada')
    False
    >>> is_ingredient('breakfast')
    False
    >>> is_ingredient('dish')
    False
    """
    reject_synsets = ['meal.n.01', 'meal.n.02', 'dish.n.02', 'vitamin.n.01']
    reject_synsets = set(wordnet.synset(w) for w in reject_synsets)
    accept_synsets = ['food.n.01', 'food.n.02']
    accept_synsets = set(wordnet.synset(w) for w in accept_synsets)
    for word_synset in wordnet.synsets(word, wordnet.NOUN):
        all_synsets = set(word_synset.closure(lambda s: s.hypernyms()))
        all_synsets.add(word_synset)
        for synset in reject_synsets:
            if synset in all_synsets:
                return False
        for synset in accept_synsets:
            if synset in all_synsets:
                return True
    return word in wordlists.ingredients
开发者ID:JoshRosen,项目名称:cmps140_creative_cooking_assistant,代码行数:29,代码来源:ingredients.py


示例14: ontoList

	def ontoList(self, synset):
		# things to pick from
		if self.pos == 'v':
			ln = wn.synset(synset).lexname.split('.')[1]
			hyper = self.lemmatize(self.getHypernyms(synset))
			definition = self.getDefinition(synset)
			lemmas = self.lemmatize(self.getLemmas(synset))
			examples = self.getExamples(synset)
			strings = [string.replace("_", " ") for string in self.getFrameStrings(synset)]
			hypo = self.lemmatize(self.getHyponyms(synset))  
			ontologyList = [strings, ln, lemmas, examples, hypo, definition, hyper]
		else:
			ln = wn.synset(synset).lexname.split('.')[1]
			hyper = self.lemmatize(self.getHypernyms(synset))
			definition = self.getDefinition(synset)
			lemmas = self.lemmatize(self.getLemmas(synset))
			examples = self.getExamples(synset)
			hypo = self.lemmatize(self.getHyponyms(synset)) 
			ontologyList = [ln, lemmas, examples, hypo, definition, hyper]

		returnList = list()
		for o in ontologyList:
			if o:
				returnList.append(o)
		return returnList
开发者ID:aankit,项目名称:centrality,代码行数:25,代码来源:wnQuery_dev.py


示例15: calculate_and_write_edge_weigthings_for_synsets

def calculate_and_write_edge_weigthings_for_synsets(synset_filenames_dict, file_name):
  max_co_occurrence = calculate_max_co_occurrence(synset_filenames_dict)
  edge_weigthings_for_synsets = dict()
  how_many_added = 0
  how_many_done = 0
  how_many_to_do = len(synset_filenames_dict.keys()) * (len(synset_filenames_dict.keys())-1)
  write_edge_weightings_to_file(dict(), file_name)

  for synset1, filenames1 in synset_filenames_dict.iteritems():
    for synset2, filenames2 in synset_filenames_dict.iteritems():
      if synset1 < synset2:
        how_many_done += 1
        #if (synset1.name, synset2.name) not in similarity_histogram:
        similarity = wn.synset(synset1).lch_similarity(wn.synset(synset2))
        co_occurence = len(set(synset_filenames_dict[synset1]).intersection(set(synset_filenames_dict[synset2])))
        normalized_co_occurrence = co_occurence/max_co_occurrence
        if similarity < 2.0:
          similarity = 0
        if normalized_co_occurrence < 0.4:
          normalized_co_occurrence = 0
        edge_weighting = similarity + 4*normalized_co_occurrence
        if edge_weighting != 0:
          edge_weigthings_for_synsets[(synset1, synset2)] = edge_weighting
          how_many_added += 1
        if how_many_added > 1000:
          print_status("Done with " + str(how_many_done) + " von " + str(how_many_to_do) + "\n")
          write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
          edge_weigthings_for_synsets = dict()
          how_many_added = 0
  write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
开发者ID:nicolas-fricke,项目名称:semmul2013-group1,代码行数:30,代码来源:mcl_keyword_clustering.py


示例16: get_message

def get_message(message_parser):
  message_split =  message_parser.split("|")
  mobile_number = message_split[0]
  need_synonyms = ["require", "want", "motivation", "motive", "ask", "call for", "demand", "involve", "necessitate", "need", "postulate", "take", "indigence", "pauperism", "pauperization", "penury"]
  supply_synonyms = ["issue", "furnish", "provide", "render", "add", "append", "cater", "ply", "provision", "supplying", "afford", "yield", "commit", "consecrate", "dedicate", "devote", "spring", "springiness", "impart", "leave", "pass on", "ease up", "give way", "move over", "render", "feed", "generate", "return", "throw", "chip in", "contribute", "kick in", "grant", "pay", "break", "cave in", "collapse", "fall in", "founder", "hand", "pass", "reach", "turn over", "have", "hold", "make", "establish", "open", "apply", "gift", "present", "sacrifice"]
  tokens = nltk.word_tokenize(message_split[1])
  need = len(set(tokens) & set(need_synonyms)) > 0
  need_json = {"need": True} if need else {"supply": True}
  need_json.update({"number": mobile_number})
  tagged_tokens = nltk.pos_tag(tokens)
  for i in range(len(tagged_tokens)):
    if tagged_tokens[i][1] == 'CD':
      current_count = get_integer(tagged_tokens[i][0])
    elif  tagged_tokens[i][1] == 'DT':
      current_count = 1
    elif  tagged_tokens[i][1] in ['NNS','NN']:
      if tagged_tokens[i][0] in ["cups", "cup", "packets","packet","bottle", "bottles", "bundle","bundles","packages", "package", need_synonyms, supply_synonyms]:
          continue
      current_category = tagged_tokens[i][0]
      c = wn.synsets(current_category)
      food = wn.synset('food.n.01')
      water = wn.synset('water.n.01')
      food = food.wup_similarity(c[0])
      water = water.wup_similarity(c[0])
      current_category = "food" if food > water else "water"
      print current_count
      try :
        current_count = current_count
      except NameError:
        current_count =1 
      if current_count == None:
        current_count =1
      need_json.update({current_category: current_count})
      current_count = None
  return need_json
开发者ID:iamsiva11,项目名称:Disaster-whatsapp-bot,代码行数:35,代码来源:message_parser.py


示例17: define

def define(word, Webster, bestdef, changed, old_topic, new_topic):
	"""Defines a word, if desired by the user, and if the topic has changed."""
	import answer
	if ((Webster != "") and (not changed)):	return (False, Webster)
	if (Webster == ""):
		answer.write("The word " + word + " was not defined under the topic " + old_topic + ".")
	else:
		asked = ask.getPlay("The word " + word + " was defined under the topic " + old_topic + " as " + Webster + ".\nDo you want this meaning to carry over to the new topic " + new_topic + "?  ")
		if yes(asked):
			return (False, Webster)
	undone = True
	dno = 1
	while (undone):
		if (dno == bestdef):	dno += 1
		string = word + ".n." + str(dno)
		try:
			if (dno < len(wordnet.synsets(word, pos = wordnet.NOUN))):
				asked = ask.getPlay("Does " + wordnet.synset(string).definition + " work for your usage of " + word + "?  ")
				undone = not yes(ask)
				newdef = wordnet.synset(string).definition
				dno += 1
			else:
				newdef = ask.getPlay("Then how would you define " + word + "?  ")
				undone = False
		except(Exception):
			newdef = ask.getPlay("How would you define " + word + "?  ")
			undone = False
	return (True, newdef)
开发者ID:jjs0sbw,项目名称:revelator,代码行数:28,代码来源:define.py


示例18: similarity

def similarity(word1, word2, tag):
    obj1 = wn.synset(word1 + "."+ tag+".01")
    obj2 = wn.synset(word2 + "."+ tag+".01")
    #print(obj1)
    brown_ic = wordnet_ic.ic('ic-brown.dat') 	# Information content
    semcor_ic = wordnet_ic.ic('ic-brown.dat')
    value = obj1.res_similarity(obj2, brown_ic)
    return value
开发者ID:Lightyagami1,项目名称:exploratoryProject,代码行数:8,代码来源:project.py


示例19: wsd

 def wsd(self,sent,target, tag=None):
     if tag is None:
         self.scoring(sent, target)
     else:
         self.scoring(sent, target,tag)
     sense = self.getGreedyBestSenses(10)
     print wordnet.synset(sense).definition
     return sense
开发者ID:blodstone,项目名称:CCS590v2,代码行数:8,代码来源:AdaptedLesk.py


示例20: get_relative_similarity

def get_relative_similarity(a,b):
    '''
        Returns path similarity between two word a and b.
        Used for merging two clusters
    '''
    x=wn.synset("%s.n.01"%a)
    y=wn.synset("%s.n.01"%b)
    return x.path_similarity(y)
开发者ID:ParinSanghavi,项目名称:Mining-Quality-Parameters-from-Yelp-Reviews-for-Improving-Businesses,代码行数:8,代码来源:cluster_yelp.py



注:本文中的nltk.corpus.wordnet.synset函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python wordnet.synsets函数代码示例发布时间:2022-05-27
下一篇:
Python wordnet.path_similarity函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap