• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Golang snowballword.SnowballWord类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Golang中github.com/kljensen/snowball/snowballword.SnowballWord的典型用法代码示例。如果您正苦于以下问题:Golang SnowballWord类的具体用法?Golang SnowballWord怎么用?Golang SnowballWord使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了SnowballWord类的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Golang代码示例。

示例1: removeVerbEnding

// Remove verb endings and return true if one was removed.
//
func removeVerbEnding(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"уйте", "ейте", "ыть", "ыло", "ыли", "ыла", "уют", "ует",
		"нно", "йте", "ишь", "ить", "ите", "ило", "или", "ила",
		"ешь", "ете", "ены", "ено", "ена", "ят", "ют", "ыт", "ым",
		"ыл", "ую", "уй", "ть", "ны", "но", "на", "ло", "ли", "ла",
		"ит", "им", "ил", "ет", "ен", "ем", "ей", "ю", "н", "л", "й",
	)
	switch suffix {
	case "ла", "на", "ете", "йте", "ли", "й", "л", "ем", "н",
		"ло", "но", "ет", "ют", "ны", "ть", "ешь", "нно":

		// These are "Group 1" verb endings.
		// Group 1 endings must follow а (a) or я (ia) in RV.
		if precededByARinRV(word, len(suffixRunes)) == false {
			suffix = ""
		}

	}

	if suffix != "" {
		word.RemoveLastNRunes(len(suffixRunes))
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:28,代码来源:step1.go


示例2: findRegions

// Find the starting point of the regions R1, R2, & RV
//
func findRegions(word *snowballword.SnowballWord) (r1start, r2start, rvstart int) {

	// R1 & R2 are defined in the standard manner.
	r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)

	// Set RV, by default, as empty.
	rvstart = len(word.RS)

	// Handle the three special cases: "par", "col", & "tap"
	//
	prefix, prefixRunes := word.FirstPrefix("par", "col", "tap")
	if prefix != "" {
		rvstart = len(prefixRunes)
		return
	}

	// If the word begins with two vowels, RV is the region after the third letter
	if len(word.RS) >= 3 && isLowerVowel(word.RS[0]) && isLowerVowel(word.RS[1]) {
		rvstart = 3
		return
	}

	// Otherwise the region after the first vowel not at the beginning of the word.
	for i := 1; i < len(word.RS); i++ {
		if isLowerVowel(word.RS[i]) {
			rvstart = i + 1
			return
		}
	}

	return
}
开发者ID:kljensen,项目名称:snowball,代码行数:35,代码来源:common.go


示例3: step2

// Step 2 is the removal of the "и" suffix.
//
func step2(word *snowballword.SnowballWord) bool {
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart, "и")
	if suffix != "" {
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:9,代码来源:step2.go


示例4: step5

// Step 5 Undouble non-vowel endings
//
func step5(word *snowballword.SnowballWord) bool {

	suffix, _ := word.FirstSuffix("enn", "onn", "ett", "ell", "eill")
	if suffix != "" {
		word.RemoveLastNRunes(1)
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:10,代码来源:step5.go


示例5: preprocess

func preprocess(word *snowballword.SnowballWord) {

	r1start, r2start, rvstart := findRegions(word)
	word.R1start = r1start
	word.R2start = r2start
	word.RVstart = rvstart

}
开发者ID:kljensen,项目名称:snowball,代码行数:8,代码来源:preprocess.go


示例6: step0

// Step 0 is to strip off apostrophes and "s".
//
func step0(w *snowballword.SnowballWord) bool {
	suffix, suffixRunes := w.FirstSuffix("'s'", "'s", "'")
	if suffix == "" {
		return false
	}
	w.RemoveLastNRunes(len(suffixRunes))
	return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:10,代码来源:step0.go


示例7: step2a

// Step 2a is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2a(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS), "ya", "ye", "yan", "yen", "yeron", "yendo", "yo", "yó", "yas", "yes", "yais", "yamos")
	if suffix != "" {
		idx := len(word.RS) - len(suffixRunes) - 1
		if idx >= 0 && word.RS[idx] == 117 {
			word.RemoveLastNRunes(len(suffixRunes))
			return true
		}
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:15,代码来源:step2a.go


示例8: step3

// Step 3 is the removal of the derivational suffix.
//
func step3(word *snowballword.SnowballWord) bool {

	// Search for a DERIVATIONAL ending in R2 (i.e. the entire
	// ending must lie in R2), and if one is found, remove it.

	suffix, _ := word.RemoveFirstSuffixIn(word.R2start, "ост", "ость")
	if suffix != "" {
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:13,代码来源:step3.go


示例9: r1r2

// Find the starting point of the two regions R1 & R2.
//
// R1 is the region after the first non-vowel following a vowel,
// or is the null region at the end of the word if there is no
// such non-vowel.
//
// R2 is the region after the first non-vowel following a vowel
// in R1, or is the null region at the end of the word if there
// is no such non-vowel.
//
// See http://snowball.tartarus.org/texts/r1r2.html
//
func r1r2(word *snowballword.SnowballWord) (r1start, r2start int) {

	specialPrefix, _ := word.FirstPrefix("gener", "commun", "arsen")

	if specialPrefix != "" {
		r1start = len(specialPrefix)
	} else {
		r1start = romance.VnvSuffix(word, isLowerVowel, 0)
	}
	r2start = romance.VnvSuffix(word, isLowerVowel, r1start)
	return
}
开发者ID:kljensen,项目名称:snowball,代码行数:24,代码来源:common.go


示例10: trimLeftApostrophes

// Trim off leading apostropes.  (Slight variation from
// NLTK implementation here, in which only the first is removed.)
//
func trimLeftApostrophes(word *snowballword.SnowballWord) {
	var (
		numApostrophes int
		r              rune
	)

	for numApostrophes, r = range word.RS {

		// Check for "'", which is unicode code point 39
		if r != 39 {
			break
		}
	}
	if numApostrophes > 0 {
		word.RS = word.RS[numApostrophes:]
		word.R1start = word.R1start - numApostrophes
		word.R2start = word.R2start - numApostrophes
	}
}
开发者ID:kljensen,项目名称:snowball,代码行数:22,代码来源:common.go


示例11: removePerfectiveGerundEnding

// Remove perfective gerund endings and return true if one was removed.
//
func removePerfectiveGerundEnding(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"ившись", "ывшись", "вшись", "ивши", "ывши", "вши", "ив", "ыв", "в",
	)
	switch suffix {
	case "в", "вши", "вшись":

		// These are "Group 1" perfective gerund endings.
		// Group 1 endings must follow а (a) or я (ia) in RV.
		if precededByARinRV(word, len(suffixRunes)) == false {
			suffix = ""
		}

	}

	if suffix != "" {
		word.RemoveLastNRunes(len(suffixRunes))
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:23,代码来源:step1.go


示例12: removeAdjectivalEnding

// Remove adjectival endings and return true if one was removed.
//
func removeAdjectivalEnding(word *snowballword.SnowballWord) bool {

	// Remove adjectival endings.  Start by looking for
	// an adjective ending.
	//
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
		"ими", "ыми", "его", "ого", "ему", "ому", "ее", "ие",
		"ые", "ое", "ей", "ий", "ый", "ой", "ем", "им", "ым",
		"ом", "их", "ых", "ую", "юю", "ая", "яя", "ою", "ею",
	)
	if suffix != "" {

		// We found an adjective ending.  Remove optional participle endings.
		//
		newSuffix, newSuffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
			"ивш", "ывш", "ующ",
			"ем", "нн", "вш", "ющ", "щ",
		)
		switch newSuffix {
		case "ем", "нн", "вш", "ющ", "щ":

			// These are "Group 1" participle endings.
			// Group 1 endings must follow а (a) or я (ia) in RV.
			if precededByARinRV(word, len(newSuffixRunes)) == false {
				newSuffix = ""
			}
		}

		if newSuffix != "" {
			word.RemoveLastNRunes(len(newSuffixRunes))
		}
		return true
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:37,代码来源:step1.go


示例13: step1

// Step 1 is the removal of standard suffixes, all of which must
// occur in RV.
//
//
// Search for a PERFECTIVE GERUND ending. If one is found remove it, and
// that is then the end of step 1. Otherwise try and remove a REFLEXIVE
// ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or
// (3) a NOUN ending. As soon as one of the endings (1) to (3) is found
// remove it, and terminate step 1.
//
func step1(word *snowballword.SnowballWord) bool {

	// `stop` will be used to signal early termination
	var stop bool

	// Search for a PERFECTIVE GERUND ending
	stop = removePerfectiveGerundEnding(word)
	if stop {
		return true
	}

	// Next remove reflexive endings
	word.RemoveFirstSuffixIn(word.RVstart, "ся", "сь")

	// Next remove adjectival endings
	stop = removeAdjectivalEnding(word)
	if stop {
		return true
	}

	// Next remove verb endings
	stop = removeVerbEnding(word)
	if stop {
		return true
	}

	// Next remove noun endings
	suffix, _ := word.RemoveFirstSuffixIn(word.RVstart,
		"иями", "ями", "иях", "иям", "ием", "ией", "ами", "ях",
		"ям", "ья", "ью", "ье", "ом", "ой", "ов", "ия", "ию",
		"ий", "ии", "ие", "ем", "ей", "еи", "ев", "ах", "ам",
		"я", "ю", "ь", "ы", "у", "о", "й", "и", "е", "а",
	)
	if suffix != "" {
		return true
	}

	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:49,代码来源:step1.go


示例14: step2b

// Step 2b is the removal of verb suffixes beginning y,
// Search for the longest among the following suffixes
// in RV, and if found, delete if preceded by u.
//
func step2b(word *snowballword.SnowballWord) bool {
	suffix, suffixRunes := word.FirstSuffixIn(word.RVstart, len(word.RS),
		"iésemos", "iéramos", "iríamos", "eríamos", "aríamos", "ásemos",
		"áramos", "ábamos", "isteis", "iríais", "iremos", "ieseis",
		"ierais", "eríais", "eremos", "asteis", "aríais", "aremos",
		"íamos", "irías", "irían", "iréis", "ieses", "iesen", "ieron",
		"ieras", "ieran", "iendo", "erías", "erían", "eréis", "aseis",
		"arías", "arían", "aréis", "arais", "abais", "íais", "iste",
		"iría", "irás", "irán", "imos", "iese", "iera", "idos", "idas",
		"ería", "erás", "erán", "aste", "ases", "asen", "aría", "arás",
		"arán", "aron", "aras", "aran", "ando", "amos", "ados", "adas",
		"abas", "aban", "ías", "ían", "éis", "áis", "iré", "irá", "ido",
		"ida", "eré", "erá", "emos", "ase", "aré", "ará", "ara", "ado",
		"ada", "aba", "ís", "ía", "ió", "ir", "id", "es", "er", "en",
		"ed", "as", "ar", "an", "ad",
	)
	switch suffix {
	case "":
		return false

	case "en", "es", "éis", "emos":

		// Delete, and if preceded by gu delete the u (the gu need not be in RV)
		word.RemoveLastNRunes(len(suffixRunes))
		guSuffix, _ := word.FirstSuffix("gu")
		if guSuffix != "" {
			word.RemoveLastNRunes(1)
		}

	default:

		// Delete
		word.RemoveLastNRunes(len(suffixRunes))
	}
	return true
}
开发者ID:kljensen,项目名称:snowball,代码行数:40,代码来源:step2b.go


示例15: step1a

// Step 1a is normalization of various special "s"-endings.
//
func step1a(w *snowballword.SnowballWord) bool {

	suffix, suffixRunes := w.FirstSuffix("sses", "ied", "ies", "us", "ss", "s")
	switch suffix {

	case "sses":

		// Replace by ss
		w.ReplaceSuffixRunes(suffixRunes, []rune("ss"), true)
		return true

	case "ies", "ied":

		// Replace by i if preceded by more than one letter,
		// otherwise by ie (so ties -> tie, cries -> cri).

		var repl string
		if len(w.RS) > 4 {
			repl = "i"
		} else {
			repl = "ie"
		}
		w.ReplaceSuffixRunes(suffixRunes, []rune(repl), true)
		return true

	case "us", "ss":

		// Do nothing
		return false

	case "s":

		// Delete if the preceding word part contains a vowel
		// not immediately before the s (so gas and this retain
		// the s, gaps and kiwis lose it)
		//
		for i := 0; i < len(w.RS)-2; i++ {
			if isLowerVowel(w.RS[i]) {
				w.RemoveLastNRunes(len(suffixRunes))
				return true
			}
		}
	}
	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:47,代码来源:step1a.go


示例16: step4

// al, ance, ence, er, ic, able, ible, ant, ement, ment,
// ent, ism, ate, iti, ous, ive, ize
// delete
//
// ion
// delete if preceded by s or t
func step4(w *snowballword.SnowballWord) bool {

	// Find all endings in R1
	suffix, suffixRunes := w.FirstSuffix(
		"ement", "ance", "ence", "able", "ible", "ment",
		"ent", "ant", "ism", "ate", "iti", "ous", "ive",
		"ize", "ion", "al", "er", "ic",
	)

	// If it does not fit in R2, do nothing.
	if len(suffixRunes) > len(w.RS)-w.R2start {
		return false
	}

	// Handle special cases
	switch suffix {
	case "":
		return false

	case "ion":
		// Replace by og if preceded by l
		// l = 108
		rsLen := len(w.RS)
		if rsLen >= 4 {
			switch w.RS[rsLen-4] {
			case 115, 116:
				w.RemoveLastNRunes(len(suffixRunes))
				return true
			}

		}
		return false
	}

	// Handle basic replacements
	w.RemoveLastNRunes(len(suffixRunes))
	return true

}
开发者ID:kljensen,项目名称:snowball,代码行数:45,代码来源:step4.go


示例17: step1b

// Step 1b is the normalization of various "ly" and "ed" sufficies.
//
func step1b(w *snowballword.SnowballWord) bool {

	suffix, suffixRunes := w.FirstSuffix("eedly", "ingly", "edly", "ing", "eed", "ed")

	switch suffix {

	case "":
		// No suffix found
		return false

	case "eed", "eedly":

		// Replace by ee if in R1
		if len(suffixRunes) <= len(w.RS)-w.R1start {
			w.ReplaceSuffixRunes(suffixRunes, []rune("ee"), true)
		}
		return true

	case "ed", "edly", "ing", "ingly":
		hasLowerVowel := false
		for i := 0; i < len(w.RS)-len(suffixRunes); i++ {
			if isLowerVowel(w.RS[i]) {
				hasLowerVowel = true
				break
			}
		}
		if hasLowerVowel {

			// This case requires a two-step transformation and, due
			// to the way we've implemented the `ReplaceSuffix` method
			// here, information about R1 and R2 would be lost between
			// the two.  Therefore, we need to keep track of the
			// original R1 & R2, so that we may set them below, at the
			// end of this case.
			//
			originalR1start := w.R1start
			originalR2start := w.R2start

			// Delete if the preceding word part contains a vowel
			w.RemoveLastNRunes(len(suffixRunes))

			// ...and after the deletion...

			newSuffix, newSuffixRunes := w.FirstSuffix("at", "bl", "iz", "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt")
			switch newSuffix {

			case "":

				// If the word is short, add "e"
				if isShortWord(w) {

					// By definition, r1 and r2 are the empty string for
					// short words.
					w.RS = append(w.RS, []rune("e")...)
					w.R1start = len(w.RS)
					w.R2start = len(w.RS)
					return true
				}

			case "at", "bl", "iz":

				// If the word ends "at", "bl" or "iz" add "e"
				w.ReplaceSuffixRunes(newSuffixRunes, []rune(newSuffix+"e"), true)

			case "bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt":

				// If the word ends with a double remove the last letter.
				// Note that, "double" does not include all possible doubles,
				// just those shown above.
				//
				w.RemoveLastNRunes(1)
			}

			// Because we did a double replacement, we need to fix
			// R1 and R2 manually. This is just becase of how we've
			// implemented the `ReplaceSuffix` method.
			//
			rsLen := len(w.RS)
			if originalR1start < rsLen {
				w.R1start = originalR1start
			} else {
				w.R1start = rsLen
			}
			if originalR2start < rsLen {
				w.R2start = originalR2start
			} else {
				w.R2start = rsLen
			}

			return true
		}

	}

	return false
}
开发者ID:kljensen,项目名称:snowball,代码行数:98,代码来源:step1b.go


示例18: printDebug

func printDebug(debug bool, w *snowballword.SnowballWord) {
	if debug {
		log.Println(w.DebugString())
	}
}
开发者ID:kljensen,项目名称:snowball,代码行数:5,代码来源:stem.go



注:本文中的github.com/kljensen/snowball/snowballword.SnowballWord类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Golang specs.Expect函数代码示例发布时间:2022-05-23
下一篇:
Golang config.ReadDefault函数代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap