本文整理汇总了Python中tamil.utf8.get_letters函数的典型用法代码示例。如果您正苦于以下问题:Python get_letters函数的具体用法?Python get_letters怎么用?Python get_letters使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_letters函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: removeSuffix
def removeSuffix(self, word):
removed = False
if not self.possible_suffixes:
# init once
self.setSuffixes()
self.prepareSuffixes()
word_lett = utf8.get_letters(word)
rword_lett = copy.copy(word_lett)
rword_lett.reverse()
# print('rev word ->',rword_lett)
rword = u"".join(rword_lett)
longest_match = ""
for itr in range(len(self.reversed_suffixes)):
suffix = self.reversed_suffixes[itr]
# print(itr,utf8.get_letters(suffix))
if rword.startswith(suffix):
if len(longest_match) <= len(suffix):
longest_match = suffix
# print('L-match-->',utf8.get_letters(longest_match))
continue
if len(longest_match) > 0:
removed = True
sfx = []
for itr in range(len(utf8.get_letters(longest_match))):
sfx.append(word_lett.pop())
word = u"".join(word_lett)
sfx.reverse()
sfx = u"".join(sfx)
# rule to replace suffix
alt_suffix = self.replace_suffixes.get(sfx, None)
if alt_suffix:
word = word + alt_suffix
return word, removed
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:33,代码来源:morphology.py
示例2: test_reverse_words
def test_reverse_words( self ):
""" unittest for reverse a Tamil string"""
print utf8.get_letters(u"இந்த")
print u"".join(utf8.get_letters(u"இந்த"))
for word in u"இந்த (C) tamil முத்தையா அண்ணாமலை 2013 இந்த ஒரு எழில் தமிழ் நிரலாக்க மொழி உதாரணம்".split():
rword = utf8.reverse_word(word)
print word,rword
self.assertTrue( utf8.get_letters(rword)[0] == utf8.get_letters(word)[-1] )
return
开发者ID:tk120404,项目名称:open-tamil,代码行数:9,代码来源:letter_tests.py
示例3: test_istamil
def test_istamil( self ):
zz = u"முத்தையா அண்ணாமலை எந்த ஒரு தெரிந்த அல்லது தெரியாத எழுத்துருவாகவிருந்தாலும் அதனை மேல்தட்டில் உள்ளிட்டு கீழே உள்ள முடியும்"
for z in zz.split(u" "):
print("********** t/f ********")
for x,y in zip(map(utf8.istamil,utf8.get_letters(z)),utf8.get_letters(z)):
print("%s => %s"%(y,x))
assert( all( map( utf8.istamil, utf8.get_letters( z ) ) ) )
z = u"முத்தையா அண்ணாமலை"
assert( any( map( utf8.istamil, utf8.get_letters( z ) ) ) )
correct = [True, True, True, True, False, True, True, True, True, True, False, False, False, False, False]
assert( map(utf8.istamil,utf8.get_letters(u"முத்தையா அண்ணாமலை 2013")) == correct )
开发者ID:tk120404,项目名称:open-tamil,代码行数:13,代码来源:letter_tests.py
示例4: test_entity
def test_entity(self):
word = u"nuthin"
q = WordEntity(word,row=5,col=6)
self.assertEqual(q.word,word)
self.assertEqual(q.letters,utf8.get_letters(u"nuthin"))
self.assertEqual((q.row, q.col),(5,6))
self.assertTrue(q.isWord())
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:7,代码来源:solthiruthi_dom.py
示例5: getWordCount
def getWordCount(self,word):
isWord, ref_trie = self.isWord( word, ret_ref_trie = True)
if not isWord:
raise Exception(u"Word does not exist in Trie")
#pprint(str(ref_trie))
letters = utf8.get_letters( word )
return ref_trie.count[ letters[-1] ]
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:7,代码来源:datastore.py
示例6: norvig_suggestor
def norvig_suggestor(word,alphabets=None,nedits=1,limit=float("inf")):
if not alphabets:
alphabets = tamil_letters
if not type(word) is list:
wordL = get_letters(word)
else:
wordL = word
# recursive method for edit distance > 1
if nedits > 1:
result = []
for nAlternate in norvig_suggestor(wordL,alphabets,nedits-1,limit-len(result)):
if len(result) > limit:
break
result.extend( norvig_suggestor(nAlternate,alphabets,1,limit-len(result)) )
return set(result)
ta_splits = [ [u"".join(wordL[:idx-1]),u"".join(wordL[idx:])] for idx in range(len(wordL) + 1)]
#pprint( ta_splits )
ta_deletes = [a + b[1:] for a, b in ta_splits if b]
ta_transposes = [a + b[1] + b[0] + b[2:] for a, b in ta_splits if len(b)>1]
ta_replaces = [a + c + b[1:] for a, b in ta_splits for c in alphabets ]
ta_replaces2 = [ c + b for a, b in ta_splits for c in alphabets ]
ta_inserts = [a + c + b for a, b in ta_splits for c in alphabets]
# TODO: add a normalizing pass word words in vowel+consonant forms to eliminate dangling ligatures
return set(ta_deletes + ta_transposes + ta_replaces + ta_replaces2 + ta_inserts )
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:25,代码来源:suggestions.py
示例7: test_letter_extract_with_ascii
def test_letter_extract_with_ascii(self):
letters = utf8.get_letters(u"கூவிளம் is என்பது also என்ன a சீர்")
print "len ==== > " , len(letters)
assert(len(letters) == 25 )
for pos,letter in enumerate(letters):
print(u"%d %s"%(pos,letter))
assert( letters[-4] == u"a" )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:7,代码来源:letter_tests.py
示例8: test_classifier
def test_classifier(self):
expected = []
expected.extend(['english']*3)
expected.extend(['digit']*4)
expected.extend(['kuril','nedil','uyirmei','vallinam','uyirmei'])
data = list(map(utf8.classify_letter,utf8.get_letters(u"abc1230அஆரெட்டை")))
self.assertEqual(data,expected)
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:7,代码来源:letter_tests2.py
示例9: test_words_to_letters
def test_words_to_letters(self):
k1 = u"இந்தக் குளிர்ல டெய்லி தலைக்கு குளிக்கற நல்லவங்க இருக்கறதாலதான் கோவை இப்படி சூப்பரா இருக்காம்"
word_length = [4,4,3,4,5,6,9,2,4,4,5]
for idx,kk in enumerate(k1.split(' ')):
idx_len = len( get_letters(kk) )
print('w# ',idx, idx_len )
self.assertEqual( word_length[idx], idx_len)
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:7,代码来源:keechu.py
示例10: test_letter_extract_from_code_pts
def test_letter_extract_from_code_pts(self):
letters = utf8.get_letters(u"கூவிளம் என்பது என்ன சீர்")
#print "len ==== > " , len(letters)
assert( len(letters) == 15 )
for pos,letter in enumerate(letters):
print(u"%d %s"%(pos,letter))
assert( letter == (u"ர்") )
开发者ID:tk120404,项目名称:open-tamil,代码行数:7,代码来源:letter_tests.py
示例11: get
def get(word):
word = word.strip()
word = word.replace(u' ',u'')
letters = utf8.get_letters(word)
F = Feature()
F.nletters = len(letters)*1.0
F.unigscore = unigram_score(letters)
F.bigscore = max(bigram_scores(letters))
for l in letters:
try:
rtl = reverse_transliterate(l)
if any( [rtl.startswith(l) for l in ['a','e','i','o','u'] ] ):
F.vowels += 1.0
except Exception as ioe:
pass
kind = utf8.classify_letter(l)
if kind == 'kuril':
F.kurils += 1
elif kind == 'nedil':
F.nedils += 1
elif kind == 'ayudham':
F.ayudhams += 1
elif kind == 'vallinam':
F.vallinams += 1
elif kind == 'mellinam':
F.mellinams += 1
elif kind == 'idayinam':
F.idayinams += 1
elif kind in ['english','digit']:
continue
elif kind == 'tamil_or_grantham':
F.granthams += 1
F.kurils /= F.nletters
F.nedils /= F.nletters
F.ayudhams /= F.nletters
F.vallinams /= F.nletters
F.vallinams /= F.nletters
F.mellinams /= F.nletters
F.idayinams /= F.nletters
F.granthams /= F.nletters
F.vowels /= F.nletters
if letters[0] in utf8.uyir_letters:
F.first += 1.0
if letters[0] in utf8.mei_letters:
F.first += F.first + 0.25
if letters[0] in utf8.uyirmei_letters:
F.first += F.first + 0.05
if letters[-1] in utf8.uyir_letters:
F.last += 1.0
if letters[-1] in utf8.mei_letters:
F.last += F.last + 0.25
if letters[-1] in utf8.uyirmei_letters:
F.last += F.last + 0.05
return F
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:59,代码来源:preprocess.py
示例12: anagram
def anagram(request,word):
AllTrueDictionary = wordutils.DictionaryWithPredicate(lambda x: True)
TVU,TVU_size = DictionaryBuilder.create(TamilVU)
length = len(utf8.get_letters(word))
actual =list(wordutils.anagrams(word,TVU))
json_string = json.dumps(actual,ensure_ascii = False)
#creating a Response object to set the content type and the encoding
response = HttpResponse(json_string,content_type="application/json; charset=utf-8" )
return response
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:9,代码来源:views.py
示例13: test_tamil_only_words
def test_tamil_only_words(self):
s = u"உடனே உடனே seventh heaven எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
words = s.replace(u"seventh heaven ",u"").split(u" ")
letters = utf8.get_letters( s )
outWords = utf8.get_tamil_words( letters )
if ( LINUX ):
print( u"|".join(words) )
print( u"|".join(outWords) )
self.assertEqual( outWords, words )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:9,代码来源:letter_tests.py
示例14: getAllWordsPrefix
def getAllWordsPrefix(self,prefix):
raise Exception("NOT IMPLEMENTED RIGHT")
all_words = []
val,ref_trie,ref_word_limits = self.isWord(prefix,ret_ref_trie=True)
# ignore val
if val: all_words.append( prefix )
prefix_letters = utf8.get_letters(prefix)
self.getAllWordsHelper( ref_trie, ref_word_limits, prefix_letters, all_words)
return all_words
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:9,代码来源:datastore.py
示例15: keech
def keech(request,k1):
dic={}
for idx,kk in enumerate(k1.split(' ')):
idx_len = len( get_letters(kk) )
#print('w# ',idx, idx_len )
dic[idx]=idx_len
json_string = json.dumps(dic,ensure_ascii = False)
#creating a Response object to set the content type and the encoding
response = HttpResponse(json_string,content_type="application/json; charset=utf-8" )
return response
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:10,代码来源:views.py
示例16: test_words
def test_words(self):
_str = u"உடனே random elevator jazz உடனே எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
words = _str.split(u" ")
letters = utf8.get_letters( _str )
outWords = utf8.get_words( letters, tamil_only = False )
if ( LINUX ):
print( u"|".join(words) )
print( u"|".join(outWords) )
self.assertEqual( outWords, words )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:10,代码来源:letter_tests.py
示例17: xkcd
def xkcd():
obj = RemoveCaseSuffix()
expected = [u"பதிவிற்",u"கட்டளைக",u"அவர்"]
words_list = [u"பதிவிற்க்கு",u"கட்டளைகளை",u"அவர்கள்"]
for w,x in zip(words_list,expected):
rval = obj.removeSuffix(w)
assert(rval[1])
print(utf8.get_letters(w),'->',rval[1])
assert(rval[0] == x)
return
开发者ID:atvKumar,项目名称:open-tamil,代码行数:10,代码来源:morphology.py
示例18: test_words
def test_words(self):
string = u"உடனே random elevator jazz உடனே எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
words = string.split(u" ")
letters = utf8.get_letters( string )
outWords = utf8.get_words( letters )
print u"|".join(words)
print u"|".join(outWords)
assert( outWords == words )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:11,代码来源:letter_tests.py
示例19: test_letter_extract_yield_with_ascii
def test_letter_extract_yield_with_ascii(self):
letters = []
ta_str = u"கூவிளம் is என்பது also என்ன a சீர்"
for l in utf8.get_letters_iterable(ta_str):
letters.append( l )
act_letters = utf8.get_letters(ta_str)
print( "len ==== > " , len(letters),"get_letters CALL = ",len(act_letters) )
assert(len(letters) == len(act_letters) )
for pos,letter in enumerate(letters):
if ( LINUX ): print( u"%d %s"%(pos,letter) )
self.assertEqual( letters[-4], u"a" )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:11,代码来源:letter_tests.py
示例20: test_tamil_only_words
def test_tamil_only_words(self):
string = u"உடனே உடனே seventh heaven எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
words = string.replace(u"seventh heaven ",u"").split(u" ")
letters = utf8.get_letters( string )
outWords = utf8.get_tamil_words( letters )
print u"|".join(words)
print u"|".join(outWords)
assert( outWords == words )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:11,代码来源:letter_tests.py
注:本文中的tamil.utf8.get_letters函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论