• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python tag.map_tag函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.tag.map_tag函数的典型用法代码示例。如果您正苦于以下问题:Python map_tag函数的具体用法?Python map_tag怎么用?Python map_tag使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了map_tag函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_sentiment_count_data

def get_sentiment_count_data(train,test):
	sent_count_train = []
	sent_count_test = []
	v = DictVectorizer(sparse=False)
	for id in test:
		dist = nltk.FreqDist(products[id]['all_pos'].split())
		new_dist = Counter()
		for tag, count in dist.iteritems():
			new_dist[map_tag('en-ptb', 'universal', tag)] += count
		Fscore = 0.5 * ((new_dist['NOUN']+new_dist['ADJ']+new_dist['ADP']+new_dist['DET']) - (dist['UH']+new_dist['VERB']+new_dist['ADV']+new_dist['PRON']) + 100)
		neg_count = 0
		pos_count = 0
		suma = 0
		emotion_words = 0
		for review in products[id]['reviews']:        
			for feature,adjective,score in review['opinions']:
				if score is not None:
					if score < 0:
						neg_count += 1
					else:
						pos_count += 1
					suma += score
					emotion_words += 1
		nwords = len(products[id]['all_text'].split())
		eRatio = emotion_words*1.0/nwords
		posToAllRatio = pos_count*1.0/(pos_count+neg_count)
		emotionFeatures = {'Fscore':Fscore,'eStrength':suma*1.0/emotion_words,'eRatio':eRatio,'posToAllRatio':posToAllRatio}
		sent_count_test.append(emotionFeatures)
	for id in train:
		dist = nltk.FreqDist(products[id]['all_pos'].split())
		new_dist = Counter()
		for tag, count in dist.iteritems():
			new_dist[map_tag('en-ptb', 'universal', tag)] += count
		Fscore = 0.5 * ((new_dist['NOUN']+new_dist['ADJ']+new_dist['ADP']+new_dist['DET']) - (dist['UH']+new_dist['VERB']+new_dist['ADV']+new_dist['PRON']) + 100)
		neg_count = 0
		pos_count = 0
		suma = 0
		emotion_words = 0
		for review in products[id]['reviews']:
			for feature,adjective,score in review['opinions']:
				if score is not None:
					if score < 0:
						neg_count += 1
					else:
						pos_count += 1
					suma += score
					emotion_words += 1
		nwords = len(products[id]['all_text'].split())
		eRatio = emotion_words*1.0/nwords
		posToAllRatio = pos_count*1.0/(pos_count+neg_count)
		emotionFeatures = {'Fscore':Fscore,'eStrength':suma*1.0/emotion_words,'eRatio':eRatio,'posToAllRatio':posToAllRatio}
		sent_count_train.append(emotionFeatures)

	X_sent_train = v.fit_transform(sent_count_train)
	X_sent_test = v.transform(sent_count_test)
	scaler = preprocessing.StandardScaler().fit(X_sent_train)
	X_train = scaler.transform(X_sent_train)
	X_test = scaler.transform(X_sent_test)

	return sent_count_train, sent_count_test, X_train, X_test
开发者ID:sergiooramas,项目名称:music-genre-classification,代码行数:60,代码来源:genre_classification.py


示例2: tag

    def tag(self, tokens):
        tagged = self.model.tag(tokens)

        if not self.tagmap:
            return tagged

        return [(word, map_tag(self.tagmap, "universal", tag)) for word, tag in tagged]
开发者ID:lrei,项目名称:twitter_annotator,代码行数:7,代码来源:seq.py


示例3: _get_parsed_sent

    def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        parse_tags = self._get_column(grid, self._colmap['tree'])

        treestr = ''
        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
            if word == '(': word = '-LRB-'
            if word == ')': word = '-RRB-'
            if pos_tag == '(': pos_tag = '-LRB-'
            if pos_tag == ')': pos_tag = '-RRB-'
            (left, right) = parse_tag.split('*')
            right = right.count(')')*')' # only keep ')'.
            treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
        try:
            tree = self._tree_class.fromstring(treestr)
        except (ValueError, IndexError):
            tree = self._tree_class.fromstring('(%s %s)' %
                                          (self._root_label, treestr))

        if not pos_in_tree:
            for subtree in tree.subtrees():
                for i, child in enumerate(subtree):
                    if (isinstance(child, Tree) and len(child)==1 and
                        isinstance(child[0], string_types)):
                        subtree[i] = (child[0], child.label())

        return tree
开发者ID:Weiming-Hu,项目名称:text-based-six-degree,代码行数:30,代码来源:conll.py


示例4: compute_pos_tag

    def compute_pos_tag(tokens):

        pos_tagged = nltk.pos_tag(tokens)
        simplified_tags = [map_tag('en-ptb', 'universal', tag) for word, tag in pos_tagged]
        lookup = {
            'VERB': 0,
            'NOUN': 1,
            'PRON': 2,
            'ADJ': 3,
            'ADV': 4,
            'ADP': 5,
            'CONJ': 6,
            'DET': 7,
            'NUM': 8,
            'PRT': 9,
            'X': 10
        }

        vector_output = []
        for word in simplified_tags:
            word_v = numpy.zeros(11)
            if word in lookup:
                word_v[lookup[word]] = 1

            vector_output.append(word_v.tolist())
        return vector_output
开发者ID:ProjetPP,项目名称:PPP-QuestionParsing-ML-Standalone,代码行数:26,代码来源:preprocessing.py


示例5: _get_parsed_sent

    def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
        words = self._get_column(grid, self._colmap["words"])
        pos_tags = self._get_column(grid, self._colmap["pos"])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        parse_tags = self._get_column(grid, self._colmap["tree"])

        treestr = ""
        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
            if word == "(":
                word = "-LRB-"
            if word == ")":
                word = "-RRB-"
            if pos_tag == "(":
                pos_tag = "-LRB-"
            if pos_tag == ")":
                pos_tag = "-RRB-"
            (left, right) = parse_tag.split("*")
            right = right.count(")") * ")"  # only keep ')'.
            treestr += "%s (%s %s) %s" % (left, pos_tag, word, right)
        try:
            tree = self._tree_class.parse(treestr)
        except (ValueError, IndexError):
            tree = self._tree_class.parse("(%s %s)" % (self._root_label, treestr))

        if not pos_in_tree:
            for subtree in tree.subtrees():
                for i, child in enumerate(subtree):
                    if isinstance(child, Tree) and len(child) == 1 and isinstance(child[0], compat.string_types):
                        subtree[i] = (child[0], child.label())

        return tree
开发者ID:haadkhan,项目名称:cerebri,代码行数:32,代码来源:conll.py


示例6: tagged_paras

 def tagged_paras(self, fileids=None, tagset=None):
     """
     :return: the given file(s) as a list of
         paragraphs, each encoded as a list of sentences, which are
         in turn encoded as lists of ``(word,tag)`` tuples.
     :rtype: list(list(list(tuple(str,str))))
     """
     if tagset and tagset != self._tagset:
         tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
     else:
         tag_mapping_function = None
     return concat(
         [
             TaggedCorpusView(
                 fileid,
                 enc,
                 True,
                 True,
                 True,
                 self._sep,
                 self._word_tokenizer,
                 self._sent_tokenizer,
                 self._para_block_reader,
                 tag_mapping_function,
             )
             for (fileid, enc) in self.abspaths(fileids, True)
         ]
     )
开发者ID:prz3m,项目名称:kind2anki,代码行数:28,代码来源:tagged.py


示例7: _get_chunked_words

    def _get_chunked_words(self, grid, chunk_types, tagset=None):
        # n.b.: this method is very similar to conllstr2tree.
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        chunk_tags = self._get_column(grid, self._colmap['chunk'])

        stack = [Tree(self._root_label, [])]

        for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
            if chunk_tag == 'O':
                state, chunk_type = 'O', ''
            else:
                (state, chunk_type) = chunk_tag.split('-')
            # If it's a chunk we don't care about, treat it as O.
            if chunk_types is not None and chunk_type not in chunk_types:
                state = 'O'
            # Treat a mismatching I like a B.
            if state == 'I' and chunk_type != stack[-1].label():
                state = 'B'
            # For B or I: close any open chunks
            if state in 'BO' and len(stack) == 2:
                stack.pop()
            # For B: start a new chunk.
            if state == 'B':
                new_chunk = Tree(chunk_type, [])
                stack[-1].append(new_chunk)
                stack.append(new_chunk)
            # Add the word token.
            stack[-1].append((word, pos_tag))

        return stack[0]
开发者ID:Weiming-Hu,项目名称:text-based-six-degree,代码行数:33,代码来源:conll.py


示例8: _get_iob_words

 def _get_iob_words(self, grid, tagset=None):
     pos_tags = self._get_column(grid, self._colmap["pos"])
     if tagset and tagset != self._tagset:
         pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
     return list(
         zip(self._get_column(grid, self._colmap["words"]), pos_tags, self._get_column(grid, self._colmap["chunk"]))
     )
开发者ID:haadkhan,项目名称:cerebri,代码行数:7,代码来源:conll.py


示例9: _tag

 def _tag(self, t, tagset=None):
     tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))]
     if tagset and tagset != self._tagset:
         tagged_sent = [
             (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
         ]
     return tagged_sent
开发者ID:prz3m,项目名称:kind2anki,代码行数:7,代码来源:bracket_parse.py


示例10: tagged_words

 def tagged_words(self, fileids=None, tagset=None):
     """
     :return: the given file(s) as a list of tagged
         words and punctuation symbols, encoded as tuples
         ``(word,tag)``.
     :rtype: list(tuple(str,str))
     """
     if tagset and tagset != self._tagset:
         tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
     else:
         tag_mapping_function = None
     return concat(
         [
             TaggedCorpusView(
                 fileid,
                 enc,
                 True,
                 False,
                 False,
                 self._sep,
                 self._word_tokenizer,
                 self._sent_tokenizer,
                 self._para_block_reader,
                 tag_mapping_function,
             )
             for (fileid, enc) in self.abspaths(fileids, True)
         ]
     )
开发者ID:prz3m,项目名称:kind2anki,代码行数:28,代码来源:tagged.py


示例11: _tag

 def _tag(self, sent, tagset=None):
     tagged_sent = [(w, t) for (t, w) in TAGWORD.findall(sent)]
     if tagset and tagset != self._tagset:
         tagged_sent = [
             (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_sent
         ]
     return tagged_sent
开发者ID:prz3m,项目名称:kind2anki,代码行数:7,代码来源:sinica_treebank.py


示例12: count_ADJ

def count_ADJ(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    adj = tag_fd.get('ADJ')
    if adj is None:
        adj =0
    return adj/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py


示例13: count_DET

def count_DET(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    det = tag_fd.get('DET')
    if det is None:
        det = 0
    return det/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py


示例14: get_last_word_types

def get_last_word_types(text):
    text = nltk.word_tokenize(text)
    posTagged = pos_tag(text)
    lastword_tag = map_tag("en-ptb", "universal", posTagged[-1][1])

    # known types
    # ['NOUN','VERB','CONJ','PRON','ADP', 'PRT', 'DET']
    return lastword_tag
开发者ID:helderm,项目名称:shalk,代码行数:8,代码来源:dbload.py


示例15: _tag

 def _tag(self, t, tagset=None):
     tagged_sent = [(int(o), w, p) for (o,p,w) in SORTTAGWRD.findall(self._normalize(t, ordered = True))]
     tagged_sent.sort()
     if tagset and tagset != self._tagset:
         tagged_sent = [(w, map_tag(self._tagset, tagset, p)) for (o,w,p) in tagged_sent]
     else:
         tagged_sent = [(w,p) for (o,w,p) in tagged_sent]
     return tagged_sent
开发者ID:Copper-Head,项目名称:nltk,代码行数:8,代码来源:bracket_parse.py


示例16: count_X

def count_X(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    x = tag_fd.get('X')
    if x is None:
        x = 0
    return x/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py


示例17: count_PRO

def count_PRO(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    pro = tag_fd.get('PRON')
    if pro is None:
        pro = 0
    return pro/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py


示例18: count_CONJ

def count_CONJ(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    conj = tag_fd.get('CONJ')
    if conj is None:
        conj = 0
    return conj/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py


示例19: tagged_sents

 def tagged_sents(self, fileids=None, tagset=None):
     if tagset and tagset != self._tagset:
         tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
     else:
         tag_mapping_function = None
     return concat([IndianCorpusView(fileid, enc,
                                     True, True, tag_mapping_function)
                    for (fileid, enc) in self.abspaths(fileids, True)])
开发者ID:CaptainAL,项目名称:Spyder,代码行数:8,代码来源:indian.py


示例20: get_list_list_pos_from_list_list_tokens

def get_list_list_pos_from_list_list_tokens(list_list_tokens):
	list_list_pos = []
	for list_tokens in list_list_tokens:
		list_tokens_decoded = [ x.decode('utf8') for x in list_tokens] #pos tagger needs decoded tokens
		list_token_pos_tuple = pos_tag(list_tokens_decoded)
		list_universal_pos_tag = [ map_tag('en-ptb', 'universal', tag).encode('utf8') for word, tag in list_token_pos_tuple]
		list_list_pos.append(list_universal_pos_tag)
	return list_list_pos
开发者ID:sagar-sinha,项目名称:Authorship_attribution,代码行数:8,代码来源:project_data_processing.py



注:本文中的nltk.tag.map_tag函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tag.pos_tag函数代码示例发布时间:2022-05-27
下一篇:
Python wordnet.WordNetLemmatizer类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap