本文整理汇总了Python中viterbi.viterbi函数的典型用法代码示例。如果您正苦于以下问题:Python viterbi函数的具体用法?Python viterbi怎么用?Python viterbi使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了viterbi函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: learning
def learning(T):
weight = defaultdict(lambda : uniform(-1.0, 1.0))
data = []
possible_tags, transition = set(["<s>", "</s>"]), set()
for line in iter(sys.stdin.readline, ""):
X, Y = [], []
pre_y = "<s>"
for x_y in line.rstrip().split():
(x, y) = x_y.split('_')
X.append(x)
Y.append(y)
possible_tags.add(y)
transition.add(" ".join([pre_y, y]))
pre_y = y
transition.add(" ".join([pre_y, "</s>"]) )
data.append((X, Y))
data_size = len(data)
for t in range(T):
for line_num, (X, Y_prime) in enumerate(data):
sys.stdout.write("\rIteration %d, linenum %d / %d" % (t+1, line_num+1, data_size))
sys.stdout.flush()
Y_hat = viterbi(weight, X, possible_tags, transition)
phi_prime = create_feature(X, Y_prime)
phi_hat = create_feature(X, Y_hat)
update_weight(weight, phi_prime, phi_hat)
return (weight, possible_tags, transition)
开发者ID:biligee,项目名称:nlp_programming_tutorial,代码行数:26,代码来源:learning.py
示例2: evaluate
def evaluate(model, examples, gold, label=None):
output = list(model.predict({'input': examples },
batch_size=config.batch_size)['output'])
pred = np.argmax(np.asarray(output), axis=2).flatten()
vpred = viterbi.viterbi(np.concatenate(output), *viterbi_probabilities)
return (common.classification_summary(gold, pred) + '\n' +
'w/viterbi ' + common.classification_summary(gold, vpred))
开发者ID:billy322,项目名称:BioNLP-2016,代码行数:7,代码来源:rnn.py
示例3: seg
def seg(inp):
"segmenter main function"
tail = ""
if len(inp) % 2 != 0:
tail = inp[-1]
inp = inp[:-1]
#load wubimap
for line in wubi:
wubi_map[line[0]] = int(line[1])
#all segments here
seg = []
all_seg(inp,seg,0,tail)
#find viterbi path every segment
vit = []
for segline in seg:
ans = vt.viterbi(segline,uni_map, big_map, wd_map, wubi)
if not ans == None:
#ans[1] = ans[1] - 20*len(ans[0])
vit.append(ans)
vit.sort(key=lambda path: path[1], reverse=False)
for v in vit:
#print(v)
pass
#return max viterbi path
if len(vit) > 0:
return vit[-1][0]
else:
return False
开发者ID:ChestnutHeng,项目名称:Wubi-Kernel,代码行数:34,代码来源:segment.py
示例4: viterbi_decode
def viterbi_decode(self, seq):
node_potentials, edge_potentials = self.build_potentials(seq)
viterbi_path, _ = viterbi(node_potentials, edge_potentials)
res = viterbi_path
new_seq = seq.copy_sequence()
new_seq.y = res
return new_seq
开发者ID:Joao-M-Almeida,项目名称:lxmls-toolkit,代码行数:7,代码来源:hmm_n_order.py
示例5: decode
def decode(self, initials):
timer = Timer()
states = set()
for obs in initials:
states.update(self.words_by_letter[obs])
logger.info("Searching %s possible states", len(states))
result = viterbi.viterbi(initials, states, self.start_p, self.transition_p, self.emission_p)
logger.info("Decoding %r took %s s", initials, timer.elapsed())
return result
开发者ID:shirlston,项目名称:initialisms,代码行数:9,代码来源:decode.py
示例6: t_BMES
def t_BMES():
PI, A, B = build()
S = B.keys()
for k in S:
if k not in PI:
PI[k] = 0.0
for sen in samples:
Y = tuple(sen)
prob, X = viterbi(Y, S, PI, A, B)
print u''.join(sen[i] + (X[i] in 'ES' and '|' or '') for i in xrange(len(sen)))
开发者ID:Catentropy,项目名称:mylab,代码行数:10,代码来源:t_wordseg.py
示例7: test_trellis
def test_trellis(self):
_, actual, _ = viterbi(self.obs, self.A, self.B, self.pi)
expected = LMatrix(("H", "L"),
xrange(len(self.obs)),
data = np.array([
[ -2.737, -5.474, -8.211, -11.533, -14.007, -17.329, -19.54, -22.862, -25.657],
[ -3.322, -6.059, -8.796, -10.948, -14.007, -16.481, -19.54, -22.014, -24.487]
])
)
for s in actual.rlabels:
for t in actual.clabels:
self.assertAlmostEqual(actual[s,t], expected[s,t], 3)
开发者ID:xiaohan2012,项目名称:irem,代码行数:12,代码来源:t_viterbi.py
示例8: evaluate
def evaluate():
global possible_tags
global strings
global cca_length
get_words()
get_strings()
get_alpha()
get_phi()
get_regExp()
# get_codeWords()
get_cca()
# cca_length = len(cca1['amended'])
cca_length = 20
data = open('inputs/eng.test{0}'.format(sys.argv[2]), 'r')
s = 'outputs_cca_pos_egw30_rounding_currentOnly/result_{0}_{1}.txt'.format(sys.argv[2], sys.argv[1])
output = open(s, 'w')
line = data.readline()
output.write('{0}\n\n'.format(line.strip()))
line = data.readline()
vals = get_sentence(data)
sentence = vals[0]
correct_tags = vals[1]
POS = vals[2]
count = 0
time1 = 0.0
time2 = 0.0
avg_time = 0.0
time_val = 0.0
first = True
while sentence:
#------------------------
#-------TIME-STATS-------
#------------------------
count += 1
time2 = time()
if not first:
avg_time = (avg_time*(count-1)+(time2-time1))/count
time_val = int((avg_time)*(number_of_sentences-count))
first = False
progress = open('progress_test.txt', 'w')
progress.write('Percent complete:\n{0}/{1} = {2}%\n\nTime remaining: \n{3} h {4} min {5} sec'.format(int(count), int(number_of_sentences), float(count*100)/float(number_of_sentences), time_val/3600, (time_val%3600)/60, time_val%60))
time1 = time2
progress.close()
#--------------------------
#--------------------------
tags = viterbi.viterbi(sentence, POS, phi, possible_tags, alpha, strings, Words, regExp, codes, cca1, cca_length)
for i in range(len(sentence)):
output.write('{0} {1} {2} {3}\n'.format(sentence[i], POS[i][0], correct_tags[i], tags[i]))
output.write('\n')
vals = get_sentence(data)
sentence = vals[0]
correct_tags = vals[1]
POS = vals[2]
开发者ID:NweZinOo,项目名称:perceptron-crf,代码行数:53,代码来源:perceptron_test.py
示例9: perceptron
def perceptron(print_alpha = 0, mult = 0, import_alpha = 0):
global alpha
global alpha_average
global possible_tags
global strings
global strings_abr
global add_factor
global mult_factor
init_phi_alpha(mult)
get_strings()
if import_alpha:
read_alpha()
alpha_average = copy.deepcopy(alpha)
for t in range(T_DEFAULT):
print '---{0}---'.format(t)
sys.stdout.flush()
dont_repeat = True
data = open(sys.argv[2], 'r')
vals = get_sentence_and_tags(data)
j = 0
while vals:
sentence = vals[0]
correct_tags = vals[1]
result = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, mult)
z = result[0]
indices = result[1]
if not z == correct_tags:
dont_repeat = False
correct_indices = get_indices(sentence, correct_tags)
if mult:
for i in indices:
alpha[i] = float(alpha[i])/mult_factor
for i in correct_indices:
alpha[i] = float(alpha[i])*mult_factor
else:
for i in indices:
alpha[i] += -1*add_factor
for i in correct_indices:
alpha[i] += add_factor
else:
j += 1
for i in range(len(alpha)):
alpha_average[i] += alpha[i]
vals = get_sentence_and_tags(data)
data.close()
if dont_repeat:
print 'SUCCESS!!!'
break
# print 'number correct: {0}'.format(j)
if print_alpha:
write_alpha(t)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:51,代码来源:perceptron.py
示例10: run
def run(self):
if self.isTest:
print "Running HMM"
h = HiddenMarkovModel(self.train_file,smoothed=self.smoothing)
print "Running Viterbi"
toc = time.clock()
predicted = viterbi(h,self.test_file, test = False)
tic = time.clock()
print "Viterbi ran in %f seconds"%(tic-toc)
actual, tokens = zip(*self.parse_file(self.test_answers))
return (predicted,actual,tokens)
else:
print "Splitting Data"
(train,test) = self.splitCV(self.parse_file(self.train_file),self.cv_validation_percentage)
print "Converting Lists"
train_text = "".join(["%s %s\n" % (p,t) for [p,t] in train])
test_text = "".join(["%s\n" % t for [p,t] in test])
print "Running HMM"
h = HiddenMarkovModel(text=train_text, smoothed=self.smoothing)
print "Running Viterbi"
predicted = viterbi(h,text=test_text, test=False)
actual = self.getActual(test)
return (predicted,actual)
开发者ID:amm385,项目名称:POS,代码行数:23,代码来源:Analyzer.py
示例11: t_wordseg
def t_wordseg():
PI, A, B = build(True)
for k in B.keys():
if '|' == k[-1]:
B[k[:-1]] = {k[:-1]: 1.0}
else:
B[k + '|'] = B[k]
S = B.keys()
for k in S:
if k not in PI:
PI[k] = 0.0
for sen in samples:
Y = tuple(sen)
prob, X = viterbi(Y, S, PI, A, B)
print u''.join(X)
开发者ID:Catentropy,项目名称:mylab,代码行数:15,代码来源:t_wordseg.py
示例12: __cut
def __cut(sentence):
prob, pos_list = viterbi.viterbi(sentence,char_state_tab_P, start_P, trans_P, emit_P)
begin, next = 0,0
for i,char in enumerate(sentence):
pos = pos_list[i][0]
if pos=='B':
begin = i
elif pos=='E':
yield pair(sentence[begin:i+1], pos_list[i][1])
next = i+1
elif pos=='S':
yield pair(char,pos_list[i][1])
next = i+1
if next<len(sentence):
yield pair(sentence[next:], pos_list[next][1] )
开发者ID:smerdy,项目名称:newsmosaic,代码行数:16,代码来源:__init__.py
示例13: viterbi_run
def viterbi_run(training, test_file):
#returns a list of sentence list containing tuples (word,part of speech)
corpus_list = viterbi.corpus_list(training)
#creates a dictionary of corpus part of speech tag : occurences
corpus_dictionary = viterbi.corpus_dictionary(training)
#pos_keys
keys = viterbi.key_list(corpus_dictionary)
#creates the prior_probabilities transitions table for the entire corpus
prior_probabilities_table = viterbi.transition_table(corpus_dictionary,corpus_list)
#creates a word dictionary
#word: list of part of speeches and increment occurences of word as part of speech
word_dic = viterbi.word_dic(corpus_list,keys)
#word_keys
words = viterbi.key_list(word_dic)
#likelihood_table
likelihood_table = viterbi.word_freq(corpus_dictionary,word_dic)
#Emissions and Transitions
sentences = viterbi.corpus_list_2(test_file)
error_list = []
error_list_i = []
new_sentences = []
count = 0
for sentence in sentences:
trans = viterbi.sentence_tag(sentence,keys,words,likelihood_table)
s_pos = viterbi.sentence_pos(trans)
transition_table = viterbi.transition_probabilities(trans,s_pos,prior_probabilities_table,keys)
observed_like = viterbi.observed_likelihoods(sentence,s_pos,trans,likelihood_table,words,keys)
vit_sent = viterbi.viterbi(observed_like,sentence,s_pos,transition_table)
开发者ID:lesliemanrique1,项目名称:HMM-Part-of-Speech-Tagger,代码行数:46,代码来源:viterbi_run.py
示例14: evaluate
def evaluate():
global possible_tags
global strings
global strings_abr
get_words()
get_strings()
get_alpha()
get_phi()
get_regExp()
data = open(sys.argv[4], 'r')
output = open(sys.argv[5], 'w')
sentence = get_sentence(data)
while sentence:
tags = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, Words, regExp)
for i in range(len(sentence)):
output.write('{} {}\n'.format(sentence[i], tags[i]))
output.write('\n')
sentence = get_sentence(data)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:18,代码来源:perceptron_test.py
示例15: gen_couplet
def gen_couplet(transition_prob_tree, output_prob_tree, unigram_freq, first_half):
assert type(first_half) == unicode
couplet_length = len(first_half)
visible_words = np.array([first_half[i] for i in range (couplet_length)])
hidden_candidate_words = np.array([u' ' for _ in range(top_k_word*couplet_length)]).reshape(top_k_word, couplet_length)
output_prob = np.random.rand(top_k_word, couplet_length)
for i in range(couplet_length):
key = first_half[i]
if not output_prob_tree.has_key(key):
print '%s, Cannot generate couplet' % key
return ''
hash_leaf = output_prob_tree[key]
hidden_candidate_words[:,i], output_prob[:,i] = gen_candidates(first_half, hash_leaf, top_k_word)
for i in range(couplet_length):
candidate = u''
for j in range(top_k_word):
candidate += hidden_candidate_words[j, i]
try:
transition_prob, init_prob = init_model(transition_prob_tree, unigram_freq, hidden_candidate_words, top_k_word)
except:
return ''
optimal_path, prob = viterbi(transition_prob, output_prob, init_prob, [], visible_words, top_k_word, top_k_candidate)
optimal_path = deal_repeat(first_half, optimal_path)
results = []
for i in range(optimal_path.shape[0]):
second_half = ''
for j in range(optimal_path.shape[1]):
second_half += hidden_candidate_words[optimal_path[i, j], j]
score = ranking_function(output_prob_tree, first_half, second_half)
results.append((score, second_half))
results = sorted(results, reverse=True)[:top_k_output]
return results
开发者ID:dnc1994,项目名称:ReKan,代码行数:39,代码来源:gen_couplets.py
示例16: perceptron
def perceptron(print_alpha = 0):
global possible_tags
global strings
global strings_abr
global add_factor
get_regExp()
get_strings()
get_tags()
for t in range(T_DEFAULT):
print '---{0}---'.format(t)
sys.stdout.flush()
dont_repeat = True
data = open(sys.argv[1], 'r')
vals = get_sentence_and_tags(data)
j = 0
while vals:
sentence = vals[0]
correct_tags = vals[1]
tags = viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, Words, regExp)
indices = get_indices(sentence, tags)
if not tags == correct_tags:
dont_repeat = False
correct_indices = get_indices(sentence, correct_tags)
for i in indices:
alpha[i] += -1*add_factor
for i in correct_indices:
alpha[i] += add_factor
else:
j += 1
for i in alpha:
alpha_average[i] += alpha[i]
vals = get_sentence_and_tags(data)
data.close()
if dont_repeat:
print 'SUCCESS!!!'
break
print 'number correct: {0}'.format(j)
if print_alpha:
write_alpha(t)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:39,代码来源:perceptron.py
示例17: k_fold_cross_valid_known
def k_fold_cross_valid_known(k, parsed, known, discounts):
res = defaultdict(list)
for train, test in _fold(parsed, k):
for discount in discounts:
print 'train: ', len(train), 'test: ', len(test)
tag2id, word2id = build_dict(parsed)
id2tag = {v: k for k, v in tag2id.iteritems()}
id2word = {v: k for k, v in word2id.iteritems()}
emission, transition = _counter_known(parsed, train, known,
0.85, tag2id, word2id, discount)
count_ok, count_total = 0., 0.
for i, seq in enumerate(test):
out = viterbi(seq, transition, emission, word2id, tag2id)
ok, total = _compare(seq[1:-1], id_to_token(out, id2word, id2tag))
count_ok += ok; count_total += total
if DEBUG:
print 'evaluating', i, 'th sentence.', count_ok/count_total, 'so far.'
res[discount].append(count_ok/count_total)
print 'Fold accuracy: ', res[discount][-1], 'discount: ', discount
for d in res:
print 'discount:', d, '->', 'avg:', np.mean(res[d])
开发者ID:liusiqi43,项目名称:ox-computational-linguistics,代码行数:22,代码来源:eval.py
示例18: test_model
def test_model(corpus):
cp = corpus.corpus_sentence
word_list = list()
pos_list = list()
for paragraph in cp:
text = []
pos = []
for tp in paragraph:
text.append(tp[0])
pos.append(tp[1])
word_list.append(text)
pos_list.append(pos)
initp, trans_bi, emiss = corpus.get_statistics_model(tri_gram=False)
_, trans_tri, emiss = corpus.get_statistics_model(tri_gram=True)
bigram_result = []
trigram_result = []
count = 0
for paragraph in word_list:
pos_bi = vtb.viterbi(paragraph, corpus.pos_list_sentence, initp, trans_bi, emiss)
# pos_tri = vtb.viterbi_trigram(paragraph, corpus.pos_list_sentence, initp, trans_tri, emiss)
bigram_result.append(pos_bi)
# trigram_result.append(pos_tri)
print(count)
count += 1
if count == 1000:
break
tp, tn, fp, fn, other = evaluate_sentence(pos_list[0:1000], bigram_result)
write_results_to_file("test/test_model_orchid_bigram", word_list[0:1000], pos_list, bigram_result, tp, tn, fp, fn, other, test_text="bigram model test")
开发者ID:myscloud,项目名称:Question-Generation-Thai,代码行数:38,代码来源:test_model.py
示例19: predict_one
def predict_one(weight, words, possible_tags, transition):
return " ".join(viterbi(weight, words, possible_tags, transition))
开发者ID:biligee,项目名称:nlp_programming_tutorial,代码行数:2,代码来源:predict.py
示例20: viterbi
import numpy as np
from viterbi import viterbi
if __name__ == '__main__':
n_hid = 2
n_obs = 3
trans_hid = np.array( [ [0.5,0.5], [0.5,0.5] ] )
trans_obs = np.array( [ [0.5,0.4,0.1], [0.4,0.1, 0.5] ])
solver = viterbi(n_hid, n_obs, trans_hid, trans_obs)
obs = np.array( [0,1,1,0,2,0,2,2,2,0,2,2,2,2,2,0,0,1,1,2] )
mlp = solver.get_MLP(obs)
print mlp
开发者ID:EmCeeEs,项目名称:machine_learning,代码行数:18,代码来源:chimp.py
注:本文中的viterbi.viterbi函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论