• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python xmldocs.XMLCorpusView类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.corpus.reader.xmldocs.XMLCorpusView的典型用法代码示例。如果您正苦于以下问题:Python XMLCorpusView类的具体用法?Python XMLCorpusView怎么用?Python XMLCorpusView使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了XMLCorpusView类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

 def __init__(self, filename, **kwargs):
     self.tags = kwargs.pop('tags', None)
     self.tagspec = '.*/seg/fs'
     self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml')
     XMLCorpusView.__init__(
         self, self.xml_tool.build_preprocessed_file(), self.tagspec
     )
开发者ID:prz3m,项目名称:kind2anki,代码行数:7,代码来源:nkjp.py


示例2: __init__

    def __init__(self, fileid, sent, tag, strip_space, stem):
        """
        :param fileid: The name of the underlying file.
        :param sent: If true, include sentence bracketing.
        :param tag: The name of the tagset to use, or None for no tags.
        :param strip_space: If true, strip spaces from word tokens.
        :param stem: If true, then substitute stems for words.
        """
        if sent:
            tagspec = '.*/s'
        else:
            tagspec = '.*/s/(.*/)?(c|w)'
        self._sent = sent
        self._tag = tag
        self._strip_space = strip_space
        self._stem = stem

        self.title = None  #: Title of the document.
        self.author = None  #: Author of the document.
        self.editor = None  #: Editor
        self.resps = None  #: Statement of responsibility

        XMLCorpusView.__init__(self, fileid, tagspec)

        # Read in a tasty header.
        self._open()
        self.read_block(self._stream, '.*/teiHeader$', self.handle_header)
        self.close()

        # Reset tag context.
        self._tag_context = {0: ()}
开发者ID:Journo-App,项目名称:flask-by-example,代码行数:31,代码来源:bnc.py


示例3: __init__

 def __init__(self, filename, **kwargs):
     self.mode = kwargs.pop('mode', 0)
     self.tagspec = '.*/div/ab'
     self.segm_dict = dict()
     #xml preprocessing
     self.xml_tool = XML_Tool(filename, 'text.xml')
     #base class init
     XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
开发者ID:esabelhaus,项目名称:secret-octo-dubstep,代码行数:8,代码来源:nkjp.py


示例4: read_block

 def read_block(self, stream, tagspec=None, elt_handler=None):
     return list(
         filter(
             lambda x: x is not None,
             XMLCorpusView.read_block(self, stream, tagspec, elt_handler),
         )
     )
开发者ID:prz3m,项目名称:kind2anki,代码行数:7,代码来源:mte.py


示例5: __init__

    def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag):
        """
        :param fileid: The name of the underlying file.
        :param unit: One of `'token'`, `'word'`, or `'chunk'`.
        :param bracket_sent: If true, include sentence bracketing.
        :param pos_tag: Whether to include part-of-speech tags.
        :param sem_tag: Whether to include semantic tags, namely WordNet lemma
            and OOV named entity status.
        """
        if bracket_sent: tagspec = '.*/s'
        else: tagspec = '.*/s/(punc|wf)'

        self._unit = unit
        self._sent = bracket_sent
        self._pos_tag = pos_tag
        self._sem_tag = sem_tag

        XMLCorpusView.__init__(self, fileid, tagspec)
开发者ID:brymaven,项目名称:nltk,代码行数:18,代码来源:semcor.py


示例6: handle_query

 def handle_query(self):
     self._open()
     header = []
     while True:
         segm = XMLCorpusView.read_block(self, self._stream)
         if len(segm) == 0:
             break
         header.extend(segm)
     self.close()
     return header
开发者ID:esabelhaus,项目名称:secret-octo-dubstep,代码行数:10,代码来源:nkjp.py


示例7: _detect_encoding

    def _detect_encoding(self, fileid):
        if isinstance(fileid, PathPointer): 
            s = fileid.open().readline() 
        else: 
            s = open(fileid, 'rb').readline()
        
        m = re.search(r'encoding="([^"]+)"', s)
        if m: return m.group(1)
        m = re.search(r"encoding='([^']+)'", s)
        if m: return m.group(1)

        return XMLCorpusView._detect_encoding(self, fileid)
开发者ID:IMAmuseum,项目名称:getty-vocab-reconciliation,代码行数:12,代码来源:getty.py


示例8: read_block

    def read_block(self, stream, tagspec=None, elt_handler=None):
        """
        Returns text as a list of sentences.
        """
        txt = []
        while True:
            segm = XMLCorpusView.read_block(self, stream)
            if len(segm) == 0:
                break
            for part in segm:
                txt.append(part)

        return [' '.join([segm for segm in txt])]
开发者ID:esabelhaus,项目名称:secret-octo-dubstep,代码行数:13,代码来源:nkjp.py


示例9: __init__

 def __init__(self, fileid, tagspec, elt_handler=None):
     XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
开发者ID:Copper-Head,项目名称:nltk,代码行数:2,代码来源:mte.py


示例10: zip

    np.savetxt("lsa_model.csv", matrix, delimiter="\t")  # raw output

    doc_2d = []
    for doc, file in zip(matrix, filenames):  # reduce the data to 2 dimensions
        # print(file, "\n", doc, "\n\n")    # debug msg
        doc_2d.append(TSNE().fit_transform(doc).tolist()[0])

    matrix = np.asarray(doc_2d)  # update matrix array

    # raw output
    np.savetxt("lsa_reduced.csv", matrix, delimiter="\t")  # raw output

    # build list of tags from the metadata
    metadata = pd.DataFrame(index=filenames, columns=["Tags"])

    view = XMLCorpusView("txt/export-abstracts.xml", ".*/article")
    iter = view.iterate_from(0)
    for entry in iter:
        metadata.loc[entry.attrib["{http://www.w3.org/XML/1998/namespace}id"] + ".txt", "Tags"] = entry.attrib["type"]

    metadata.to_csv("lsa_metadata.csv")

    ##############################################################################
    # CLUSTERING

    print("clustering ...\n")

    # af = AffinityPropagation(damping=0.9, affinity="euclidean", preference=-50).fit(matrix)
    af = AffinityPropagation().fit(matrix)  # default

    cluster_centers_indices = af.cluster_centers_indices_
开发者ID:stefanpernes,项目名称:word-embedding,代码行数:31,代码来源:lsa.py



注:本文中的nltk.corpus.reader.xmldocs.XMLCorpusView类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python reuters.categories函数代码示例发布时间:2022-05-27
下一篇:
Python xmldocs.XMLCorpusReader类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap