本文整理汇总了Golang中github.com/kljensen/snowball/snowballword.SnowballWord类的典型用法代码示例。如果您正苦于以下问题:Golang SnowballWord类的具体用法?Golang SnowballWord怎么用?Golang SnowballWord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SnowballWord类的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。
示例1: removeVerbEnding
// Remove verb endings and return true if one was removed.
//
func removeVerbEnding(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"уйте", "ейте", "ыть", "ыло", "ыли", "ыла", "уют", "ует",
"нно", "йте", "ишь", "ить", "ите", "ило", "или", "ила",
"ешь", "ете", "ены", "ено", "ена", "ят", "ют", "ыт", "ым",
"ыл", "ую", "уй", "ть", "ны", "но", "на", "ло", "ли", "ла",
"ит", "им", "ил", "ет", "ен", "ем", "ей", "ю", "н", "л", "й",
)
switch suffix {
case "ла", "на", "ете", "йте", "ли", "й", "л", "ем", "н",
"ло", "но", "ет", "ют", "ны", "ть", "ешь", "нно":
// These are "Group 1" verb endings.
// Group 1 endings must follow а (a) or я (ia) in RV.
if precededByARinRV(word, len(suffixRunes)) == false {
suffix = ""
}
}
if suffix != "" {
word.RemoveLastNRunes(len(suffixRunes))
return true
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:28,代码来源:step1.go
示例2: findRegions
// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {
// R1 & R2 are defined in the standard manner.
r1start = romance.VnvSuffix(word, isLowerVowel, 0)
r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
// Set RV, by default, as empty.
rvstart = len(word.RS)
// Handle the three special cases: "par", "col", & "tap"
//
prefix, prefixRunes := word.FirstPrefix("par", "col", "tap")
if prefix != "" {
rvstart = len(prefixRunes)
return
}
// If the word begins with two vowels, RV is the region after the third letter
if len(word.RS) >= 3 && isLowerVowel(word.RS[0]) && isLowerVowel(word.RS[1]) {
rvstart = 3
return
}
// Otherwise the region after the first vowel not at the beginning of the word.
for i := 1; i < len(word.RS); i++ {
if isLowerVowel(word.RS[i]) {
rvstart = i + 1
return
}
}
return
}
开发者ID:kljensen,项目名称:snowball,代码行数:35,代码来源:common.go
示例3: step2
// Step 2 is the removal of the "и" suffix.
//
func step2(word *snowballword.SnowballWord) bool {
suffix, _ := word.RemoveFirstSuffixIn(word.RVstart, "и")
if suffix != "" {
return true
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:9,代码来源:step2.go
示例4: step5
// Step 5 Undouble non-vowel endings
//
func step5(word *snowballword.SnowballWord) bool {
suffix, _ := word.FirstSuffix("enn", "onn", "ett", "ell", "eill")
if suffix != "" {
word.RemoveLastNRunes(1)
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:10,代码来源:step5.go
示例5: preprocess
func preprocess(word *snowballword.SnowballWord) {
r1start, r2start, rvstart := findRegions(word)
word.R1start = r1start
word.R2start = r2start
word.RVstart = rvstart
}
开发者ID:kljensen,项目名称:snowball,代码行数:8,代码来源:preprocess.go
示例6: step0
// Step 0 is to strip off apostrophes and "s".
//
func step0(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("'s'", "'s", "'")
if suffix == "" {
return false
}
w.RemoveLastNRunes(len(suffixRunes))
return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:10,代码来源:step0.go
示例7: step2a
// Step 2a is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2a(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ya", "ye", "yan", "yen", "yeron", "yendo", "yo", "yó", "yas", "yes", "yais", "yamos")
if suffix != "" {
idx := len(word.RS) - len(suffixRunes) - 1
if idx >= 0 && word.RS[idx] == 117 {
word.RemoveLastNRunes(len(suffixRunes))
return true
}
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:15,代码来源:step2a.go
示例8: step3
// Step 3 is the removal of the derivational suffix.
//
func step3(word *snowballword.SnowballWord) bool {
// Search for a DERIVATIONAL ending in R2 (i.e. the entire
// ending must lie in R2), and if one is found, remove it.
suffix, _ := word.RemoveFirstSuffixIn(word.R2start, "ост", "ость")
if suffix != "" {
return true
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:13,代码来源:step3.go
示例9: r1r2
// Find the starting point of the two regions R1 & R2.
//
// R1 is the region after the first non-vowel following a vowel,
// or is the null region at the end of the word if there is no
// such non-vowel.
//
// R2 is the region after the first non-vowel following a vowel
// in R1, or is the null region at the end of the word if there
// is no such non-vowel.
//
// See http://snowball.tartarus.org/texts/r1r2.html
//
func r1r2(word *snowballword.SnowballWord) (r1start, r2start int) {
specialPrefix, _ := word.FirstPrefix("gener", "commun", "arsen")
if specialPrefix != "" {
r1start = len(specialPrefix)
} else {
r1start = romance.VnvSuffix(word, isLowerVowel, 0)
}
r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
return
}
开发者ID:kljensen,项目名称:snowball,代码行数:24,代码来源:common.go
示例10: trimLeftApostrophes
// Trim off leading apostropes. (Slight variation from
// NLTK implementation here, in which only the first is removed.)
//
func trimLeftApostrophes(word *snowballword.SnowballWord) {
var (
numApostrophes int
r rune
)
for numApostrophes, r = range word.RS {
// Check for "'", which is unicode code point 39
if r != 39 {
break
}
}
if numApostrophes > 0 {
word.RS = word.RS[numApostrophes:]
word.R1start = word.R1start - numApostrophes
word.R2start = word.R2start - numApostrophes
}
}
开发者ID:kljensen,项目名称:snowball,代码行数:22,代码来源:common.go
示例11: removePerfectiveGerundEnding
// Remove perfective gerund endings and return true if one was removed.
//
func removePerfectiveGerundEnding(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"ившись", "ывшись", "вшись", "ивши", "ывши", "вши", "ив", "ыв", "в",
)
switch suffix {
case "в", "вши", "вшись":
// These are "Group 1" perfective gerund endings.
// Group 1 endings must follow а (a) or я (ia) in RV.
if precededByARinRV(word, len(suffixRunes)) == false {
suffix = ""
}
}
if suffix != "" {
word.RemoveLastNRunes(len(suffixRunes))
return true
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:23,代码来源:step1.go
示例12: removeAdjectivalEnding
// Remove adjectival endings and return true if one was removed.
//
func removeAdjectivalEnding(word *snowballword.SnowballWord) bool {
// Remove adjectival endings. Start by looking for
// an adjective ending.
//
suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
"ими", "ыми", "его", "ого", "ему", "ому", "ее", "ие",
"ые", "ое", "ей", "ий", "ый", "ой", "ем", "им", "ым",
"ом", "их", "ых", "ую", "юю", "ая", "яя", "ою", "ею",
)
if suffix != "" {
// We found an adjective ending. Remove optional participle endings.
//
newSuffix, newSuffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"ивш", "ывш", "ующ",
"ем", "нн", "вш", "ющ", "щ",
)
switch newSuffix {
case "ем", "нн", "вш", "ющ", "щ":
// These are "Group 1" participle endings.
// Group 1 endings must follow а (a) or я (ia) in RV.
if precededByARinRV(word, len(newSuffixRunes)) == false {
newSuffix = ""
}
}
if newSuffix != "" {
word.RemoveLastNRunes(len(newSuffixRunes))
}
return true
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:37,代码来源:step1.go
示例13: step1
// Step 1 is the removal of standard suffixes, all of which must
// occur in RV.
//
//
// Search for a PERFECTIVE GERUND ending. If one is found remove it, and
// that is then the end of step 1. Otherwise try and remove a REFLEXIVE
// ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or
// (3) a NOUN ending. As soon as one of the endings (1) to (3) is found
// remove it, and terminate step 1.
//
func step1(word *snowballword.SnowballWord) bool {
// `stop` will be used to signal early termination
var stop bool
// Search for a PERFECTIVE GERUND ending
stop = removePerfectiveGerundEnding(word)
if stop {
return true
}
// Next remove reflexive endings
word.RemoveFirstSuffixIn(word.RVstart, "ся", "сь")
// Next remove adjectival endings
stop = removeAdjectivalEnding(word)
if stop {
return true
}
// Next remove verb endings
stop = removeVerbEnding(word)
if stop {
return true
}
// Next remove noun endings
suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
"иями", "ями", "иях", "иям", "ием", "ией", "ами", "ях",
"ям", "ья", "ью", "ье", "ом", "ой", "ов", "ия", "ию",
"ий", "ии", "ие", "ем", "ей", "еи", "ев", "ах", "ам",
"я", "ю", "ь", "ы", "у", "о", "й", "и", "е", "а",
)
if suffix != "" {
return true
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:49,代码来源:step1.go
示例14: step2b
// Step 2b is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2b(word *snowballword.SnowballWord) bool {
suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
"iésemos", "iéramos", "iríamos", "eríamos", "aríamos", "ásemos",
"áramos", "ábamos", "isteis", "iríais", "iremos", "ieseis",
"ierais", "eríais", "eremos", "asteis", "aríais", "aremos",
"íamos", "irías", "irían", "iréis", "ieses", "iesen", "ieron",
"ieras", "ieran", "iendo", "erías", "erían", "eréis", "aseis",
"arías", "arían", "aréis", "arais", "abais", "íais", "iste",
"iría", "irás", "irán", "imos", "iese", "iera", "idos", "idas",
"ería", "erás", "erán", "aste", "ases", "asen", "aría", "arás",
"arán", "aron", "aras", "aran", "ando", "amos", "ados", "adas",
"abas", "aban", "ías", "ían", "éis", "áis", "iré", "irá", "ido",
"ida", "eré", "erá", "emos", "ase", "aré", "ará", "ara", "ado",
"ada", "aba", "ís", "ía", "ió", "ir", "id", "es", "er", "en",
"ed", "as", "ar", "an", "ad",
)
switch suffix {
case "":
return false
case "en", "es", "éis", "emos":
// Delete, and if preceded by gu delete the u (the gu need not be in RV)
word.RemoveLastNRunes(len(suffixRunes))
guSuffix, _ := word.FirstSuffix("gu")
if guSuffix != "" {
word.RemoveLastNRunes(1)
}
default:
// Delete
word.RemoveLastNRunes(len(suffixRunes))
}
return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:40,代码来源:step2b.go
示例15: step1a
// Step 1a is normalization of various special "s"-endings.
//
func step1a(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("sses", "ied", "ies", "us", "ss", "s")
switch suffix {
case "sses":
// Replace by ss
w.ReplaceSuffixRunes(suffixRunes, []rune("ss"), true)
return true
case "ies", "ied":
// Replace by i if preceded by more than one letter,
// otherwise by ie (so ties -> tie, cries -> cri).
var repl string
if len(w.RS) > 4 {
repl = "i"
} else {
repl = "ie"
}
w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
return true
case "us", "ss":
// Do nothing
return false
case "s":
// Delete if the preceding word part contains a vowel
// not immediately before the s (so gas and this retain
// the s, gaps and kiwis lose it)
//
for i := 0; i < len(w.RS)-2; i++ {
if isLowerVowel(w.RS[i]) {
w.RemoveLastNRunes(len(suffixRunes))
return true
}
}
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:47,代码来源:step1a.go
示例16: step4
// al, ance, ence, er, ic, able, ible, ant, ement, ment,
// ent, ism, ate, iti, ous, ive, ize
// delete
//
// ion
// delete if preceded by s or t
func step4(w *snowballword.SnowballWord) bool {
// Find all endings in R1
suffix, suffixRunes := w.FirstSuffix(
"ement", "ance", "ence", "able", "ible", "ment",
"ent", "ant", "ism", "ate", "iti", "ous", "ive",
"ize", "ion", "al", "er", "ic",
)
// If it does not fit in R2, do nothing.
if len(suffixRunes) > len(w.RS)-w.R2start {
return false
}
// Handle special cases
switch suffix {
case "":
return false
case "ion":
// Replace by og if preceded by l
// l = 108
rsLen := len(w.RS)
if rsLen >= 4 {
switch w.RS[rsLen-4] {
case 115, 116:
w.RemoveLastNRunes(len(suffixRunes))
return true
}
}
return false
}
// Handle basic replacements
w.RemoveLastNRunes(len(suffixRunes))
return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:45,代码来源:step4.go
示例17: step1b
// Step 1b is the normalization of various "ly" and "ed" sufficies.
//
func step1b(w *snowballword.SnowballWord) bool {
suffix, suffixRunes := w.FirstSuffix("eedly", "ingly", "edly", "ing", "eed", "ed")
switch suffix {
case "":
// No suffix found
return false
case "eed", "eedly":
// Replace by ee if in R1
if len(suffixRunes) <= len(w.RS)-w.R1start {
w.ReplaceSuffixRunes(suffixRunes, []rune("ee"), true)
}
return true
case "ed", "edly", "ing", "ingly":
hasLowerVowel := false
for i := 0; i < len(w.RS)-len(suffixRunes); i++ {
if isLowerVowel(w.RS[i]) {
hasLowerVowel = true
break
}
}
if hasLowerVowel {
// This case requires a two-step transformation and, due
// to the way we've implemented the `ReplaceSuffix` method
// here, information about R1 and R2 would be lost between
// the two. Therefore, we need to keep track of the
// original R1 & R2, so that we may set them below, at the
// end of this case.
//
originalR1start := w.R1start
originalR2start := w.R2start
// Delete if the preceding word part contains a vowel
w.RemoveLastNRunes(len(suffixRunes))
// ...and after the deletion...
newSuffix, newSuffixRunes := w.FirstSuffix("at", "bl", "iz", "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt")
switch newSuffix {
case "":
// If the word is short, add "e"
if isShortWord(w) {
// By definition, r1 and r2 are the empty string for
// short words.
w.RS = append(w.RS, []rune("e")...)
w.R1start = len(w.RS)
w.R2start = len(w.RS)
return true
}
case "at", "bl", "iz":
// If the word ends "at", "bl" or "iz" add "e"
w.ReplaceSuffixRunes(newSuffixRunes, []rune(newSuffix+"e"), true)
case "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt":
// If the word ends with a double remove the last letter.
// Note that, "double" does not include all possible doubles,
// just those shown above.
//
w.RemoveLastNRunes(1)
}
// Because we did a double replacement, we need to fix
// R1 and R2 manually. This is just becase of how we've
// implemented the `ReplaceSuffix` method.
//
rsLen := len(w.RS)
if originalR1start < rsLen {
w.R1start = originalR1start
} else {
w.R1start = rsLen
}
if originalR2start < rsLen {
w.R2start = originalR2start
} else {
w.R2start = rsLen
}
return true
}
}
return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:98,代码来源:step1b.go
示例18: printDebug
func printDebug(debug bool, w *snowballword.SnowballWord) {
if debug {
log.Println(w.DebugString())
}
}
开发者ID:kljensen,项目名称:snowball,代码行数:5,代码来源:stem.go
注:本文中的github.com/kljensen/snowball/snowballword.SnowballWord类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论