• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python treebank.parsed_sents函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中nltk.corpus.treebank.parsed_sents函数的典型用法代码示例。如果您正苦于以下问题:Python parsed_sents函数的具体用法?Python parsed_sents怎么用?Python parsed_sents使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了parsed_sents函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: pcfg_demo

def pcfg_demo():
    """
    A demonstration showing how C{WeightedGrammar}s can be created and used.
    """

    from nltk.corpus import treebank
    from nltk import treetransforms
    from nltk import induce_pcfg
    from nltk.parse import pchart

    pcfg_prods = toy_pcfg1.productions()

    pcfg_prod = pcfg_prods[2]
    print('A PCFG production:', repr(pcfg_prod))
    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
    print()

    grammar = toy_pcfg2
    print('A PCFG grammar:', repr(grammar))
    print('    grammar.start()       =>', repr(grammar.start()))
    print('    grammar.productions() =>', end=' ')
    # Use string.replace(...) is to line-wrap the output.
    print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 26))
    print()

    print('Coverage of input words by a grammar:')
    print(grammar.covers(['a', 'boy']))
    print(grammar.covers(['a', 'girl']))

    # extract productions from three trees and induce the PCFG
    print("Induce PCFG grammar from treebank data:")

    productions = []
    for item in treebank.items[:2]:
        for tree in treebank.parsed_sents(item):
            # perform optional tree transformations, e.g.:
            tree.collapse_unary(collapsePOS=False)
            tree.chomsky_normal_form(horzMarkov=2)

            productions += tree.productions()

    S = Nonterminal('S')
    grammar = induce_pcfg(S, productions)
    print(grammar)
    print()

    print("Parse sentence using induced grammar:")

    parser = pchart.InsideChartParser(grammar)
    parser.trace(3)

    # doesn't work as tokens are different:
    #sent = treebank.tokenized('wsj_0001.mrg')[0]

    sent = treebank.parsed_sents('wsj_0001.mrg')[0].leaves()
    print(sent)
    for parse in parser.nbest_parse(sent):
        print(parse)
开发者ID:ggosline,项目名称:taxonparser,代码行数:60,代码来源:grammar.py


示例2: grammar_development_with_treebank

def grammar_development_with_treebank():
    from nltk.corpus import treebank

    t = treebank.parsed_sents("wsj_0001.mrg")[0]
    print t
    print "identify verbs for SV in VP -> SV S", [
        subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(_grammar_filter)
    ]
开发者ID:prashiyn,项目名称:nltk-examples,代码行数:8,代码来源:ch08.py


示例3: learn_treebank

def learn_treebank(files=None, markov_order=None):
    """
    Learn a PCFG from the Penn Treebank, and return it.
    
    By default, this learns from NLTK's 10% sample of the Penn Treebank.
    You can give the filename of a Treebank file; 'wsj-02-21.mrg' will
    learn from the entire training section of Treebank.
    """
    if files is None: bank = treebank.parsed_sents()
    else: bank = treebank.parsed_sents(files)
    return learn_trees(bank, collapse=True, markov_order=markov_order)
开发者ID:salmanahmad,项目名称:6.863,代码行数:11,代码来源:learn_pcfg.py


示例4: grammarDevelopmen

def grammarDevelopmen():
    print "page 315 8.6  Grammar Developmen"
    print "=============== Treebanks and Grammars ==============="
    from nltk.corpus import treebank
    t = treebank.parsed_sents('wsj_0001.mrg')[0]
    print t

    def filter(tree):
        child_nodes = [child.node for child in tree if isinstance(child, nltk.Tree)]
        return  (tree.node == 'VP') and ('S' in child_nodes)

    print [subtree for tree in treebank.parsed_sents() for subtree in tree.subtrees(filter)]
开发者ID:hbdhj,项目名称:python,代码行数:12,代码来源:chapter8.py


示例5: test

def test():
    """Do some tree drawing tests."""
    def print_tree(n, tree, sentence=None, ansi=True, **xargs):
        print()
        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
        print(tree)
        print()
        drawtree = TreePrettyPrinter(tree, sentence)
        try:
            print(drawtree.text(unicodelines=ansi, ansi=ansi, **xargs))
        except (UnicodeDecodeError, UnicodeEncodeError):
            print(drawtree.text(unicodelines=False, ansi=False, **xargs))

    from nltk.corpus import treebank
    for n in [0, 1440, 1591, 2771, 2170]:
        tree = treebank.parsed_sents()[n]
        print_tree(n, tree, nodedist=2, maxwidth=8)
    print()
    print('ASCII version:')
    print(TreePrettyPrinter(tree).text(nodedist=2))

    tree = Tree.fromstring(
        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
        '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
    sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
                ' zwemmen of terrassen .'.split())
    print_tree('Discontinuous tree', tree, sentence, nodedist=2)
开发者ID:CaptainAL,项目名称:Spyder,代码行数:28,代码来源:treeprettyprinter.py


示例6: main

def main(transform_func = None, n = 10):
    parser=StanfordParser(
        path_to_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser.jar",
        path_to_models_jar = "/cs/fs/home/hxiao/code/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models.jar",
        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
    )

    test_sents = treebank.sents()[-n:]

    print "len(test_sents) = %d" %(len(test_sents))

    if transform_func and callable(transform_func):
        print "transforming it using ", transform_func
        test_sents = [[transform_func(w) for w in s] 
                      for s in test_sents] # transform it

    print test_sents[:10]

    print "predicting"
    pred_parses = parser.parse_sents(test_sents)
    
    gold_parses = treebank.parsed_sents()
    
    print "evaluating"

    correct_n = gold_n = predicted_n = 0.0
    
    for gparse, pparse in zip(gold_parses, pred_parses):
        cn, gn, pn = precision_and_recall_stat(get_nodes_with_range(gparse), 
                                               get_nodes_with_range(pparse))
        correct_n += cn
        gold_n += gn
        predicted_n += pn
        
    print "Prediction: %f, Recall: %f" %(correct_n / predicted_n, correct_n / gold_n)
开发者ID:xiaohan2012,项目名称:capitalization-restoration-train,代码行数:35,代码来源:parse.py


示例7: sentences

def sentences():
    for f in treebank.fileids():
        for t in treebank.parsed_sents(f):
            t.chomsky_normal_form(horzMarkov=1)
            t.collapse_unary(collapsePOS=True)

            yield (t, t.leaves())
开发者ID:brucespang,项目名称:pcyk,代码行数:7,代码来源:pcyk.py


示例8: convert_wsj

def convert_wsj(file_obj):
    from nltk.corpus import treebank

    sys.stderr.write("Converting Penn Treebank sampler...\n")
    tb = TreebankConverter()
    for sentence in treebank.parsed_sents():
        tb.add_sentence(sentence)
    tb.write(file_obj)
开发者ID:Sandy4321,项目名称:nltk_contrib,代码行数:8,代码来源:demo.py


示例9: main

def main():
    answers = open('coref_key.txt', 'r')
    this_correct = 0
    correct = 0
    total = 0
    prev_sentences = deque()
    for file in FILENAMES:
        this_correct = 0
        this_total = 0
        prev_sentences.clear()
        for tree in treebank.parsed_sents(file):


            tree = ParentedTree.convert(tree)

            for pronoun, np_node in find_pronouns(tree):

                # i = 0
                # for t in list(prev_sentences)[-3:]:
                #     t.pretty_print()
                #     print("-"*25)
                #     i = i + 1
                #     if i == 3: break
                proposed = hobbs_to_string(hobbs(np_node, pronoun.lower(), prev_sentences))
                tree.pretty_print()

                actual = answers.readline()

                if  proposed == actual[:-1]:
                    update_pronoun_results(pronoun, 1)
                    correct += 1
                    this_correct += 1

                update_pronoun_results(pronoun, 0)
                total += 1
                this_total += 1

                print "Pronoun: '" + pronoun + "'   Proposed: '" + proposed + "'   Actual: '" + actual + "'"

                if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"


                print("*"*100)
                print("*"*100)
            prev_sentences.append(tree)
        print("-"*50)
        if this_correct: print file,":\tCorrect:", this_correct, "\tTotal:", this_total, "\tPercentage:", this_correct/float(this_total), "\n"
        if total: print "Overall:\tCorrect:", correct, "\tTotal:", total, "\tPercentage:", correct/float(total), "\n"
        print("-"*50)

    print "Male correct:", PRONOUN_RESULTS['male'], "\tMale total:", PRONOUN_RESULTS['male_total'], "\tPercent correct:", PRONOUN_RESULTS['male_pct']
    print "Female correct:", PRONOUN_RESULTS['female'], "\tFemale total:", PRONOUN_RESULTS['female_total'], "\tPercent correct:", PRONOUN_RESULTS['female_pct']
    print "Neutral correct:", PRONOUN_RESULTS['neutral'], "\tNeutral total:", PRONOUN_RESULTS['neutral_total'], "\tPercent correct:", PRONOUN_RESULTS['neutral_pct']
    print "Plural correct:", PRONOUN_RESULTS['they'], "\tPlural total:", PRONOUN_RESULTS['they_total'], "\tPercent correct:", PRONOUN_RESULTS['they_pct']
    print "Reflexive correct:", PRONOUN_RESULTS['reflexive'], "\tReflexive total:", PRONOUN_RESULTS['reflexive_total'], "\tPercent correct:", PRONOUN_RESULTS['reflexive_pct']
    print "Total correct:", correct, "\tTotal:", total, "\tPercent correct:", correct/float(total)
开发者ID:treyfeldman,项目名称:Hobb-s-Algorithm,代码行数:56,代码来源:HobbsImplementation.py


示例10: getTrees

def getTrees(source,size):
    '''Load the trees from source, return first SIZE trees'''
    if source=='treebank':
        from nltk.corpus import treebank
        trees = treebank.parsed_sents()
        #inds = random.permutation(range(0,len(trees)))[0:size]
        trees = trees[:size]
        return trees
    else:
        return list()
开发者ID:Jsalim,项目名称:NLP-Stuff,代码行数:10,代码来源:PCFG_util.py


示例11: TreebankNoTraces

def TreebankNoTraces():
    tb = []
    for t in treebank.parsed_sents():
        if t.label() != "S": continue
        RemoveFunctionTags(t)
        RemoveTraces(t)
        t.collapse_unary(collapsePOS = True, collapseRoot = True)
        t.chomsky_normal_form()
        tb.append(t)
    return tb
开发者ID:weitongruan,项目名称:Comp150NLP,代码行数:10,代码来源:pset4.py


示例12: learn_treebank

def learn_treebank(trees=None):
    """
    Learn a PCFG from the Penn Treebank, and return it.
    
    By default, this learns from NLTK's 10% sample of the Penn Treebank.
    You can also pass a set of trees.
    """
    if trees is None: bank = treebank.parsed_sents()
    else: bank = trees
    return learn_trees(bank, collapse=True)
开发者ID:JakeBrawer,项目名称:org,代码行数:10,代码来源:learn_pcfg.py


示例13: write_example_tree

def write_example_tree(features, f):
    filename = features['_filename']
    sen = features['_sentence_id']
    phr = features['_phrase_id']
    tree = treebank.parsed_sents(filename)[sen]
    phrase = tree[tree.treepositions('preorder')[phr]]
    l = treebank_helper.get_label(phrase)
    treebank_helper.set_label(phrase, '***' + l + '***')
    f.write(str(tree))
    f.write('\n')
    treebank_helper.set_label(phrase, l)
开发者ID:EddieNejadi,项目名称:Machine_Learning,代码行数:11,代码来源:funtag.py


示例14: treebank_accessor

def treebank_accessor():
  '''
  Function that reads the Penn treebank and returns all the trees 
  for each sentence in the corpus.
  '''
  trees = []

  for i in range(1, TREEBANK_FILES + 1):
    file_number = "%03d" % (i,)
    t = treebank.parsed_sents('wsj_0' + file_number + '.mrg')

    for sentence in range(len(t)):
      # For each sentence in the file, convert to a tree and add it to trees[]
      trees.append(t[sentence])

  return trees
开发者ID:barbaragabriela,项目名称:inlp-probabilistic-parsing,代码行数:16,代码来源:helper.py


示例15: get_treebank_rules

def get_treebank_rules(cutoff=0, include_counts=False):
    all_rules = cache_utils.cache_get('treebank_rules', 'rules')
    if not all_rules:
        log('Generating lexical rules from Penn Treebank', 4)
        from nltk.corpus import treebank
        all_rules = dict()
        for tree in treebank.parsed_sents():
            for rule, count in lexical_rules(tree).items():
                all_rules[rule] = all_rules.get(rule, 0) + count

        cache_utils.cache_set('treebank_rules', 'rules', all_rules)

    if include_counts:
        return {k: v for (k, v) in all_rules.items() if v > cutoff}
    else:
        rules_set = set([rule for rule, count in all_rules.items() if count > cutoff])
        return rules_set
开发者ID:snyderp,项目名称:cs412-scorer,代码行数:17,代码来源:syntactic_formation.py


示例16: read_treebank_files

def read_treebank_files(files, extractor,fe):
    """Read the listed treebank files and collect function tagging examples
    from each tree.

    The user-provided feature extractor is applied to each phrase in each 
    tree. The extracted feature dicts and the true function tags for each
    phrase are stored in two separate lists, which are returned.
    """
    X = []
    Y = []
    for filename in files:
        scount = 0
        for tree in treebank.parsed_sents(filename):
            tree = ParentedTree.convert(tree)
            treebank_helper.postprocess(tree)
            find_examples_in_tree(tree, X, Y, extractor,fe, filename, scount, 0)
            scount += 1
    return X, Y
开发者ID:EddieNejadi,项目名称:Machine_Learning,代码行数:18,代码来源:funtag.py


示例17: create_forests

    def create_forests(self, filename=None, treelist=None, clear=False):
        """ This will read sentences to parse. One sentence per line, no periods etc.

        :param filename: not used
        :param clear: start with empty
        """
        filename = filename or Document.get_default_treeset_file()

        forests = []
        input_trees = []

        shared_lexicon = load_lexicon(Document.get_default_lexicon_file())
        print('loaded shared_lexicon: ', shared_lexicon)
        if treelist:
            input_trees = treelist
        elif has_nltk:
            print(f"reading trees {NLTK_TREE_RANGE[0]}-{NLTK_TREE_RANGE[1]} from NLTK's treebank")
            for i in range(*NLTK_TREE_RANGE):  # 199
                trees = treebank.parsed_sents(f'wsj_0{str(i).rjust(3, "0")}.mrg')
                for j, tree in enumerate(trees):
                    tree.chomsky_normal_form()
                    tree.collapse_unary()
                    input_trees.append(as_list(tree))
        else:
            readfile = open(filename, 'r')
            for line in readfile:
                line = line.strip()
                if line:
                    if line.startswith('[') and line.endswith(']'):
                        input_trees.append(ast.literal_eval(line))
                    else:
                        input_trees.append(line)

        for input_tree in input_trees:
            syn = classes.SyntaxAPI()
            syn.lexicon = shared_lexicon
            if isinstance(input_tree, list):
                syn.input_tree = input_tree
            else:
                syn.input_text = input_tree
            forest = Forest(heading_text=str(input_tree), syntax=syn)
            forests.append(forest)
        return forests
开发者ID:jpurma,项目名称:Kataja,代码行数:43,代码来源:Document.py


示例18: get_trees

def get_trees(fileids=None, verbose=False):
	""" 
	Get the CNF trees for the treebank fileids given, or for the entire treebank
	"""
	if not fileids:
		# Get the Penn Treebank corpus
		fileids = treebank.fileids()

	# Get the sentence-trees in each file
	tree_lists = [treebank.parsed_sents(file_id) for file_id in fileids]
	trees = [sent for sent_list in tree_lists for sent in sent_list]
	if verbose:
		print("obtained", len(trees), "trees from the corpus.")

	cnf_trees = [ctc.convert_tree(t) for t in trees]
	if verbose:
		print("converted", len(trees), "trees to cnf.")

	return cnf_trees
开发者ID:hmc-cs159-spring2016,项目名称:banana,代码行数:19,代码来源:main.py


示例19: train_pcfg

def train_pcfg():
    print 'training grammar'
    productions = []
    # print len(treebank.fileids())
    trees = []
    # up to 199 less for shorter grammar for quicker training
    for fileid in treebank.fileids()[0:20]:
        for tree in treebank.parsed_sents(fileid):
            # perform optional tree transformations, e.g.:
            # Remove branches A->B->C into A->B+C so we can avoid infinite
            # productions
            tree.collapse_unary(collapsePOS=False)
            # Remove A->(B,C,D) into A->B,C+D->D (binarization req'd by CKY parser)
            # horizontal and vertical Markovization: remember parents and siblings in tree
            #     This gives a performance boost, but makes the grammar HUGE
            #     If we use these we would need to implement a tag forgetting method
            #tree.chomsky_normal_form(horzMarkov = 0, vertMarkov=0)
            tree.chomsky_normal_form()
            productions += tree.productions()
    S = nltk.Nonterminal('S')
    grammar = nltk.induce_pcfg(S, productions)
    print "grammar trained!"
    return grammar
开发者ID:owenst,项目名称:geotweets,代码行数:23,代码来源:write.py


示例20: getAvgNodeLength

import nltk
from nltk.corpus import treebank
from nltk.probability import *
from nltk.grammar import *

### RETRIEVE ALL TREES AND THEN SELECT THE FIRST 100.
all_trees = treebank.parsed_sents()
trees_100 = all_trees[0:100]

### FUNCTION EXTRACTING LEAVES OF NODES WITH LABEL AS A PARAMETER OF getAvgNodeLength().
def getAvgNodeLength(label):

    l_leaves = list()
    for tree in trees_100:
        for node in tree:
            if node.label() == label:
                l_leaves.append(node.leaves())

### CREATED OWN LIST OF PUNCTUATION TO EXCLUDE SINCE USING string.punctuation WOULD
### HAVE DELETED WORDS SUCH AS "Dr.", "World-Wide", "U.S.", etc. WHICH ARE OF INTEREST.
    punct = [u"*", u",", u"&", u"'"]

    for wordlist in l_sbj:
        for word in wordlist:
            for i in punct:
                if i in word:
                    wordlist.remove(word)

### CREATE LIST OF LENGTHS (IN WORDS) OF NODES.
    l_len = list()
    for wordlist in l_leaves:
开发者ID:albertomh,项目名称:py-nltk-parsing,代码行数:31,代码来源:avg+viterbi.py



注:本文中的nltk.corpus.treebank.parsed_sents函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python treebank.tagged_sents函数代码示例发布时间:2022-05-27
下一篇:
Python stopwords.words函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap