• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python utf8.get_letters函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tamil.utf8.get_letters函数的典型用法代码示例。如果您正苦于以下问题:Python get_letters函数的具体用法?Python get_letters怎么用?Python get_letters使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了get_letters函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: removeSuffix

 def removeSuffix(self, word):
     removed = False
     if not self.possible_suffixes:
         # init once
         self.setSuffixes()
         self.prepareSuffixes()
     word_lett = utf8.get_letters(word)
     rword_lett = copy.copy(word_lett)
     rword_lett.reverse()
     # print('rev word ->',rword_lett)
     rword = u"".join(rword_lett)
     longest_match = ""
     for itr in range(len(self.reversed_suffixes)):
         suffix = self.reversed_suffixes[itr]
         # print(itr,utf8.get_letters(suffix))
         if rword.startswith(suffix):
             if len(longest_match) <= len(suffix):
                 longest_match = suffix
                 # print('L-match-->',utf8.get_letters(longest_match))
         continue
     if len(longest_match) > 0:
         removed = True
         sfx = []
         for itr in range(len(utf8.get_letters(longest_match))):
             sfx.append(word_lett.pop())
         word = u"".join(word_lett)
         sfx.reverse()
         sfx = u"".join(sfx)
         # rule to replace suffix
         alt_suffix = self.replace_suffixes.get(sfx, None)
         if alt_suffix:
             word = word + alt_suffix
     return word, removed
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:33,代码来源:morphology.py


示例2: test_reverse_words

 def test_reverse_words( self ):
     """ unittest for reverse a Tamil string"""
     print utf8.get_letters(u"இந்த")
     print u"".join(utf8.get_letters(u"இந்த"))
     for word in u"இந்த (C) tamil முத்தையா அண்ணாமலை 2013 இந்த ஒரு எழில் தமிழ் நிரலாக்க மொழி உதாரணம்".split():
         rword = utf8.reverse_word(word)
         print word,rword
         self.assertTrue( utf8.get_letters(rword)[0] == utf8.get_letters(word)[-1] )
     return
开发者ID:tk120404,项目名称:open-tamil,代码行数:9,代码来源:letter_tests.py


示例3: test_istamil

 def test_istamil( self ):
     zz = u"முத்தையா அண்ணாமலை எந்த ஒரு தெரிந்த அல்லது தெரியாத எழுத்துருவாகவிருந்தாலும் அதனை மேல்தட்டில் உள்ளிட்டு கீழே உள்ள முடியும்"
     for z in zz.split(u" "):
         print("********** t/f ********")
         for x,y in zip(map(utf8.istamil,utf8.get_letters(z)),utf8.get_letters(z)):
             print("%s => %s"%(y,x))        
             assert( all( map( utf8.istamil, utf8.get_letters( z ) ) ) )
     
     z = u"முத்தையா அண்ணாமலை"
     assert( any( map( utf8.istamil, utf8.get_letters( z ) ) ) )
     
     correct = [True, True, True, True, False, True, True, True, True, True, False, False, False, False, False]
     assert( map(utf8.istamil,utf8.get_letters(u"முத்தையா அண்ணாமலை 2013")) == correct )
开发者ID:tk120404,项目名称:open-tamil,代码行数:13,代码来源:letter_tests.py


示例4: test_entity

 def test_entity(self):
     word = u"nuthin"
     q = WordEntity(word,row=5,col=6)
     self.assertEqual(q.word,word)
     self.assertEqual(q.letters,utf8.get_letters(u"nuthin"))
     self.assertEqual((q.row, q.col),(5,6))
     self.assertTrue(q.isWord())
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:7,代码来源:solthiruthi_dom.py


示例5: getWordCount

 def getWordCount(self,word):
     isWord, ref_trie = self.isWord( word, ret_ref_trie = True)
     if not isWord:
         raise Exception(u"Word does not exist in Trie")
     #pprint(str(ref_trie))
     letters = utf8.get_letters( word )
     return ref_trie.count[ letters[-1] ]
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:7,代码来源:datastore.py


示例6: norvig_suggestor

def norvig_suggestor(word,alphabets=None,nedits=1,limit=float("inf")):
    if not alphabets:
        alphabets = tamil_letters
    if not type(word) is list:
        wordL = get_letters(word)
    else:
        wordL = word
    # recursive method for edit distance > 1
    if nedits > 1:
        result = []
        for nAlternate in norvig_suggestor(wordL,alphabets,nedits-1,limit-len(result)):
            if len(result) > limit:
                break
            result.extend( norvig_suggestor(nAlternate,alphabets,1,limit-len(result)) )
        return set(result)
       
    ta_splits     = [ [u"".join(wordL[:idx-1]),u"".join(wordL[idx:])] for idx in range(len(wordL) + 1)]
    #pprint( ta_splits )
    ta_deletes    = [a + b[1:] for a, b in ta_splits if b]
    ta_transposes = [a + b[1] + b[0] + b[2:] for a, b in ta_splits if len(b)>1]
    ta_replaces   = [a + c + b[1:] for a, b in ta_splits for c in alphabets ]
    ta_replaces2   = [ c + b for a, b in ta_splits for c in alphabets ]
    ta_inserts    = [a + c + b     for a, b in ta_splits for c in alphabets]
    # TODO: add a normalizing pass word words in vowel+consonant forms to eliminate dangling ligatures
    return set(ta_deletes + ta_transposes + ta_replaces + ta_replaces2 + ta_inserts )
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:25,代码来源:suggestions.py


示例7: test_letter_extract_with_ascii

 def test_letter_extract_with_ascii(self):
     letters = utf8.get_letters(u"கூவிளம் is என்பது also என்ன a சீர்")
     print "len ==== > " , len(letters)
     assert(len(letters) == 25 )
     for pos,letter in  enumerate(letters):
         print(u"%d %s"%(pos,letter))
     assert( letters[-4] == u"a" )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:7,代码来源:letter_tests.py


示例8: test_classifier

 def test_classifier(self):
     expected = []
     expected.extend(['english']*3)
     expected.extend(['digit']*4)
     expected.extend(['kuril','nedil','uyirmei','vallinam','uyirmei'])
     data = list(map(utf8.classify_letter,utf8.get_letters(u"abc1230அஆரெட்டை")))
     self.assertEqual(data,expected)
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:7,代码来源:letter_tests2.py


示例9: test_words_to_letters

 def test_words_to_letters(self):
     k1 = u"இந்தக் குளிர்ல டெய்லி தலைக்கு குளிக்கற நல்லவங்க இருக்கறதாலதான் கோவை இப்படி சூப்பரா இருக்காம்"
     word_length = [4,4,3,4,5,6,9,2,4,4,5]
     for idx,kk in enumerate(k1.split(' ')):
         idx_len = len( get_letters(kk) )
         print('w# ',idx, idx_len )
         self.assertEqual( word_length[idx], idx_len)
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:7,代码来源:keechu.py


示例10: test_letter_extract_from_code_pts

 def test_letter_extract_from_code_pts(self):
     letters = utf8.get_letters(u"கூவிளம் என்பது என்ன சீர்")
     #print "len ==== > " , len(letters)
     assert( len(letters) == 15 )
     for pos,letter in  enumerate(letters):
         print(u"%d %s"%(pos,letter))
     assert( letter == (u"ர்") )
开发者ID:tk120404,项目名称:open-tamil,代码行数:7,代码来源:letter_tests.py


示例11: get

 def get(word):
     word = word.strip()
     word = word.replace(u' ',u'')
     letters = utf8.get_letters(word)
     F = Feature()
     F.nletters = len(letters)*1.0
     F.unigscore = unigram_score(letters)
     F.bigscore = max(bigram_scores(letters))
     for l in letters:
         try:
             rtl = reverse_transliterate(l)
             if any( [rtl.startswith(l) for l  in ['a','e','i','o','u'] ] ):
                 F.vowels += 1.0
         except Exception as ioe:
             pass
         
         kind = utf8.classify_letter(l)
         if kind == 'kuril':
             F.kurils += 1
         elif kind == 'nedil':
             F.nedils += 1
         elif kind == 'ayudham':
             F.ayudhams += 1
         elif kind == 'vallinam':
             F.vallinams += 1
         elif kind == 'mellinam':
             F.mellinams += 1
         elif kind == 'idayinam':
             F.idayinams += 1
         elif kind in ['english','digit']:
             continue
         elif kind == 'tamil_or_grantham':
             F.granthams += 1
     
     F.kurils /= F.nletters
     F.nedils /= F.nletters
     F.ayudhams /= F.nletters
     F.vallinams /= F.nletters
     F.vallinams /= F.nletters
     F.mellinams /= F.nletters
     F.idayinams /= F.nletters
     F.granthams /= F.nletters
     F.vowels /= F.nletters
     
     if letters[0] in utf8.uyir_letters:
         F.first += 1.0
     if letters[0] in utf8.mei_letters:
         F.first += F.first + 0.25
     if letters[0] in utf8.uyirmei_letters:
         F.first += F.first + 0.05
     
     if letters[-1] in utf8.uyir_letters:
         F.last += 1.0
     if letters[-1] in utf8.mei_letters:
         F.last += F.last + 0.25
     if letters[-1] in utf8.uyirmei_letters:
         F.last += F.last + 0.05
     
     return F
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:59,代码来源:preprocess.py


示例12: anagram

def anagram(request,word):
    AllTrueDictionary = wordutils.DictionaryWithPredicate(lambda x: True)
    TVU,TVU_size = DictionaryBuilder.create(TamilVU)
    length = len(utf8.get_letters(word))
    actual =list(wordutils.anagrams(word,TVU))
    json_string = json.dumps(actual,ensure_ascii = False)
    #creating a Response object to set the content type and the encoding
    response = HttpResponse(json_string,content_type="application/json; charset=utf-8" )
    return response 
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:9,代码来源:views.py


示例13: test_tamil_only_words

 def test_tamil_only_words(self):
     s = u"உடனே உடனே seventh heaven எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
     words = s.replace(u"seventh heaven ",u"").split(u" ")
     letters = utf8.get_letters( s )
     outWords = utf8.get_tamil_words( letters )
     if ( LINUX ):
         print( u"|".join(words) )
         print( u"|".join(outWords) )
     self.assertEqual( outWords, words )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:9,代码来源:letter_tests.py


示例14: getAllWordsPrefix

 def getAllWordsPrefix(self,prefix):
     raise Exception("NOT IMPLEMENTED RIGHT")
     all_words = []
     val,ref_trie,ref_word_limits = self.isWord(prefix,ret_ref_trie=True)
     # ignore val
     if val: all_words.append( prefix )
     prefix_letters = utf8.get_letters(prefix)
     self.getAllWordsHelper( ref_trie, ref_word_limits, prefix_letters, all_words)
     return all_words
开发者ID:msathia,项目名称:Ezhil-Lang,代码行数:9,代码来源:datastore.py


示例15: keech

def keech(request,k1):
    dic={}
    for idx,kk in enumerate(k1.split(' ')):
            idx_len = len( get_letters(kk) )
            #print('w# ',idx, idx_len )
            dic[idx]=idx_len
    json_string = json.dumps(dic,ensure_ascii = False)
    #creating a Response object to set the content type and the encoding
    response = HttpResponse(json_string,content_type="application/json; charset=utf-8" )
    return response
开发者ID:Ezhil-Language-Foundation,项目名称:open-tamil,代码行数:10,代码来源:views.py


示例16: test_words

 def test_words(self):
     _str = u"உடனே random elevator jazz உடனே எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
     words = _str.split(u" ")
     
     letters = utf8.get_letters( _str )
     outWords = utf8.get_words( letters, tamil_only = False )
     if ( LINUX ):
         print( u"|".join(words) )
         print( u"|".join(outWords) )
     self.assertEqual( outWords, words )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:10,代码来源:letter_tests.py


示例17: xkcd

def xkcd():
    obj = RemoveCaseSuffix()
    expected = [u"பதிவிற்",u"கட்டளைக",u"அவர்"]
    words_list = [u"பதிவிற்க்கு",u"கட்டளைகளை",u"அவர்கள்"]
    for w,x in zip(words_list,expected):
        rval = obj.removeSuffix(w)
        assert(rval[1])
        print(utf8.get_letters(w),'->',rval[1])
        assert(rval[0] == x)
    return
开发者ID:atvKumar,项目名称:open-tamil,代码行数:10,代码来源:morphology.py


示例18: test_words

    def test_words(self):
        string = u"உடனே random elevator jazz உடனே எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
        words = string.split(u" ")

        letters = utf8.get_letters( string )
        outWords = utf8.get_words( letters )
        
        print u"|".join(words)
        print u"|".join(outWords)
        
        assert( outWords == words )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:11,代码来源:letter_tests.py


示例19: test_letter_extract_yield_with_ascii

 def test_letter_extract_yield_with_ascii(self):
     letters = []
     ta_str = u"கூவிளம் is என்பது also என்ன a சீர்"
     for l in  utf8.get_letters_iterable(ta_str):
         letters.append( l )
     act_letters = utf8.get_letters(ta_str)
     print( "len ==== > " , len(letters),"get_letters CALL = ",len(act_letters) )
     assert(len(letters) == len(act_letters) )
     for pos,letter in  enumerate(letters):
         if ( LINUX ): print( u"%d %s"%(pos,letter) )
     self.assertEqual( letters[-4], u"a" )
开发者ID:nomad-vino,项目名称:open-tamil,代码行数:11,代码来源:letter_tests.py


示例20: test_tamil_only_words

    def test_tamil_only_words(self):
        string = u"உடனே உடனே seventh heaven எழுதினால் செய்திப் பத்திரிகை போஆகிவிடும் அசோகமித்திரன் நேர்காணல்"
        words = string.replace(u"seventh heaven ",u"").split(u" ")

        letters = utf8.get_letters( string )
        outWords = utf8.get_tamil_words( letters )
        
        print u"|".join(words)
        print u"|".join(outWords)
        
        assert( outWords == words )
开发者ID:srikanthlogic,项目名称:open-tamil,代码行数:11,代码来源:letter_tests.py



注:本文中的tamil.utf8.get_letters函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tamproxy.Timer类代码示例发布时间:2022-05-27
下一篇:
Python tambo.Transport类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap