本文整理汇总了C#中TokenStream类的典型用法代码示例。如果您正苦于以下问题:C# TokenStream类的具体用法?C# TokenStream怎么用?C# TokenStream使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TokenStream类属于命名空间,在下文中一共展示了TokenStream类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C#代码示例。
示例1: TypeTokenFilter
public TypeTokenFilter(Version version, bool enablePositionIncrements, TokenStream input, HashSet<string> stopTypes, bool useWhiteList)
: base(version, enablePositionIncrements, input)
{
typeAttribute = AddAttribute<ITypeAttribute>();
this.stopTypes = stopTypes;
this.useWhiteList = useWhiteList;
}
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:7,代码来源:TypeTokenFilter.cs
示例2: IndonesianStemFilter
/// <summary>
/// Create a new IndonesianStemFilter.
/// <para>
/// If <code>stemDerivational</code> is false,
/// only inflectional suffixes (particles and possessive pronouns) are stemmed.
/// </para>
/// </summary>
public IndonesianStemFilter(TokenStream input, bool stemDerivational)
: base(input)
{
this.stemDerivational = stemDerivational;
termAtt = AddAttribute<ICharTermAttribute>();
keywordAtt = AddAttribute<IKeywordAttribute>();
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:14,代码来源:IndonesianStemFilter.cs
示例3: FilteringTokenFilter
/// <summary>
/// Create a new <seealso cref="FilteringTokenFilter"/>. </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="in"> the <seealso cref="TokenStream"/> to consume </param>
public FilteringTokenFilter(LuceneVersion version, TokenStream @in)
: base(@in)
{
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
this.version = version;
this.enablePositionIncrements = true;
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:FilteringTokenFilter.cs
示例4: CompoundWordTokenFilterBase
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
: base(input)
{
termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute;
offsetAtt = AddAttribute<IOffsetAttribute>();
posIncAtt = AddAttribute<IPositionIncrementAttribute>();
this.matchVersion = matchVersion;
this.tokens = new LinkedList<CompoundToken>();
if (minWordSize < 0)
{
throw new System.ArgumentException("minWordSize cannot be negative");
}
this.minWordSize = minWordSize;
if (minSubwordSize < 0)
{
throw new System.ArgumentException("minSubwordSize cannot be negative");
}
this.minSubwordSize = minSubwordSize;
if (maxSubwordSize < 0)
{
throw new System.ArgumentException("maxSubwordSize cannot be negative");
}
this.maxSubwordSize = maxSubwordSize;
this.onlyLongestMatch = onlyLongestMatch;
this.dictionary = dictionary;
}
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:27,代码来源:CompoundWordTokenFilterBase.cs
示例5: AddScriptMacro
public void AddScriptMacro(String Script)
{
var stream = new TokenStream(new StringIterator(Script), Context);
var declaration = Parse.ParseMacroDeclaration(stream, Context);
declaration.OwnerContextID = Context.ID;
Context.PendingEmission.Add(declaration);
}
开发者ID:Blecki,项目名称:EtcScript,代码行数:7,代码来源:Environment.cs
示例6: NGramTokenFilter
/// <summary>
/// Creates NGramTokenFilter with given min and max n-grams. </summary>
/// <param name="version"> Lucene version to enable correct position increments.
/// See <a href="#version">above</a> for details. </param>
/// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param>
/// <param name="minGram"> the smallest n-gram to generate </param>
/// <param name="maxGram"> the largest n-gram to generate </param>
public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue))
{
this.version = version;
this.charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
if (minGram < 1)
{
throw new System.ArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram)
{
throw new System.ArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
if (version.onOrAfter(Version.LUCENE_44))
{
posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
posLenAtt = addAttribute(typeof(PositionLengthAttribute));
}
else
{
posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this);
posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this);
}
}
开发者ID:paulirwin,项目名称:lucene.net,代码行数:32,代码来源:NGramTokenFilter.cs
示例7: CodepointCountFilter
/// <summary>
/// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose
/// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#CodePointCount(char[], int, int)"/>
/// < min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> > max). </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="in"> the <seealso cref="TokenStream"/> to consume </param>
/// <param name="min"> the minimum length </param>
/// <param name="max"> the maximum length </param>
public CodepointCountFilter(LuceneVersion version, TokenStream @in, int min, int max)
: base(version, @in)
{
this.min = min;
this.max = max;
termAtt = AddAttribute<ICharTermAttribute>();
}
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:15,代码来源:CodepointCountFilter.cs
示例8: NorwegianMinimalStemFilter
/// <summary>
/// Creates a new NorwegianLightStemFilter </summary>
/// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>,
/// <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
public NorwegianMinimalStemFilter(TokenStream input, int flags)
: base(input)
{
this.stemmer = new NorwegianMinimalStemmer(flags);
termAtt = AddAttribute<ICharTermAttribute>();
keywordAttr = AddAttribute<IKeywordAttribute>();
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:NorwegianMinimalStemFilter.cs
示例9: CapitalizationFilter
/// <summary>
/// Creates a CapitalizationFilter with the specified parameters. </summary>
/// <param name="in"> input tokenstream </param>
/// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
/// <param name="keep"> a keep word list. Each word that should be kept separated by whitespace. </param>
/// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
/// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
/// <param name="minWordLength"> how long the word needs to be to get capitalization applied. If the
/// minWordLength is 3, "and" > "And" but "or" stays "or". </param>
/// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
/// assumed to be correct. </param>
/// <param name="maxTokenLength"> ??? </param>
public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength)
: base(@in)
{
// LUCENENET: The guard clauses were copied here from the version of Lucene.
// Apparently, the tests were not ported from 4.8.0 because they expected this and the
// original tests did not. Adding them anyway because there is no downside to this.
if (minWordLength < 0)
{
throw new ArgumentOutOfRangeException("minWordLength must be greater than or equal to zero");
}
if (maxWordCount < 1)
{
throw new ArgumentOutOfRangeException("maxWordCount must be greater than zero");
}
if (maxTokenLength < 1)
{
throw new ArgumentOutOfRangeException("maxTokenLength must be greater than zero");
}
this.onlyFirstWord = onlyFirstWord;
this.keep = keep;
this.forceFirstLetter = forceFirstLetter;
this.okPrefix = okPrefix;
this.minWordLength = minWordLength;
this.maxWordCount = maxWordCount;
this.maxTokenLength = maxTokenLength;
termAtt = AddAttribute<ICharTermAttribute>();
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:40,代码来源:CapitalizationFilter.cs
示例10: EdgeNGramTokenFilter
public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram)
: base(input)
{
if (version == null)
{
throw new System.ArgumentException("version must not be null");
}
if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK)
{
throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
}
if (side == null)
{
throw new System.ArgumentException("sideLabel must be either front or back");
}
if (minGram < 1)
{
throw new System.ArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram)
{
throw new System.ArgumentException("minGram must not be greater than maxGram");
}
this.version = version;
this.charUtils = version.onOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance;
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
}
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:34,代码来源:EdgeNGramTokenFilter.cs
示例11: GetTokenOutliningAction
public override void GetTokenOutliningAction(TokenStream tokenStream, ref string outliningKey, ref OutliningNodeAction tokenAction)
{
Token token = tokenStream.Peek();
if ((token.Key == "OpenCurlyBraceToken" || token.Key == "CloseCurlyBraceToken") && g.Config.b_Ed_CodeFold == false)
return;
switch (token.Key) {
case "OpenCurlyBraceToken":
outliningKey = "CodeBlock";
tokenAction = OutliningNodeAction.Start;
break;
case "CloseCurlyBraceToken":
outliningKey = "CodeBlock";
tokenAction = OutliningNodeAction.End;
break;
case "RegionStartToken":
outliningKey = "CodeRegion";
tokenAction = OutliningNodeAction.Start;
break;
case "RegionEndToken":
outliningKey = "CodeRegion";
tokenAction = OutliningNodeAction.End;
break;
}
}
开发者ID:Bloodknight,项目名称:TorqueDev,代码行数:26,代码来源:CSemanticParser.cs
示例12: SnowballFilter
public SnowballFilter(TokenStream input, SnowballProgram stemmer)
: base(input)
{
this.stemmer = stemmer;
this.termAtt = AddAttribute<ICharTermAttribute>();
this.keywordAttr = AddAttribute<IKeywordAttribute>();
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:7,代码来源:SnowballFilter.cs
示例13: TypeTokenFilter
/// <summary>
/// Create a new <seealso cref="TypeTokenFilter"/>. </summary>
/// <param name="version"> the Lucene match version </param>
/// <param name="input"> the <seealso cref="TokenStream"/> to consume </param>
/// <param name="stopTypes"> the types to filter </param>
/// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will
/// be kept, otherwise they will be filtered out </param>
public TypeTokenFilter(LuceneVersion version, TokenStream input, IEnumerable<string> stopTypes, bool useWhiteList)
: base(version, input)
{
typeAttribute = AddAttribute<ITypeAttribute>();
this.stopTypes = new HashSet<string>(stopTypes);
this.useWhiteList = useWhiteList;
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:14,代码来源:TypeTokenFilter.cs
示例14: Parse
public LaxExpression Parse(TokenStream reader)
{
var start = reader.Next.CodeRange;
try
{
ParseStream(reader);
//Finish the stack
while (opStack.Count > 0)
{
CompleteStack();
}
if (operandStack.Count != 1)
throw new SyntaxError(start, "Expected operator");
}
catch (SyntaxError)
{
throw;
}
#if !DEBUG
catch (Exception ex)
{
throw new SyntaxError(reader.Current.CodeRange, ex);
}
#endif
if (operandStack.Count != 1)
throw new SyntaxError(start, "Expected only one operator left");
return operandStack.Pop();
}
开发者ID:hultqvist,项目名称:lax,代码行数:32,代码来源:ExpressionParser.cs
示例15: GermanStemFilter
/// <summary>
/// Builds a GermanStemFilter that uses an exclusiontable.
/// </summary>
/// <param name="_in"></param>
/// <param name="exclusiontable"></param>
/// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This
/// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o'
/// respectively, before the DIN1 stemmer is invoked.</param>
public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool normalizeDin2)
: base(_in)
{
exclusionSet = exclusiontable;
stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer();
termAtt = AddAttribute<ITermAttribute>();
}
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:15,代码来源:GermanStemFilter.cs
示例16: DoesNotChangeStateAsUnderlyingEnumeratorIsTraversed
public void DoesNotChangeStateAsUnderlyingEnumeratorIsTraversed()
{
var tokens = new TokenStream(Tokens());
var first = tokens;
first.Current.ShouldBe(upper, "ABC", 1, 1);
var second = first.Advance();
first.Current.ShouldBe(upper, "ABC", 1, 1);
second.Current.ShouldBe(lower, "def", 1, 4);
var third = second.Advance();
first.Current.ShouldBe(upper, "ABC", 1, 1);
second.Current.ShouldBe(lower, "def", 1, 4);
third.Current.ShouldBe(upper, "GHI", 1, 7);
var fourth = third.Advance();
first.Current.ShouldBe(upper, "ABC", 1, 1);
second.Current.ShouldBe(lower, "def", 1, 4);
third.Current.ShouldBe(upper, "GHI", 1, 7);
fourth.Current.ShouldBe(TokenKind.EndOfInput, "", 1, 10);
fourth.Advance().ShouldBeSameAs(fourth);
}
开发者ID:plioi,项目名称:parsley,代码行数:25,代码来源:TokenStreamTests.cs
示例17: ParseImport
private Executable ParseImport(TokenStream tokens, int indention)
{
tokens.SkipWhitespace();
List<Token> fromChain = null;
List<Token> importChain = null;
List<Token> asChain = null;
Token firstToken = null;
if (tokens.PeekValue() == "from")
{
firstToken = tokens.Pop();
fromChain = ParseDotChainForImport(tokens);
}
firstToken = firstToken ?? tokens.PopExpected("import");
importChain = ParseDotChainForImport(tokens);
if (tokens.PopIfPresent("as"))
{
asChain = ParseDotChainForImport(tokens);
if (asChain.Count > 1) throw new ParserException(asChain[0], "Expected: variable");
}
return new ImportStatement(firstToken, importChain, fromChain, asChain == null ? null : asChain[0]);
}
开发者ID:blakeohare,项目名称:crython,代码行数:26,代码来源:ExecutableParser.cs
示例18: Create
public override TokenStream Create(TokenStream input)
{
#pragma warning disable 612, 618
var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets);
#pragma warning restore 612, 618
return filter;
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:7,代码来源:TrimFilterFactory.cs
示例19: assertEquals
// we only check a few core attributes here.
// TODO: test other things
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception
public virtual void assertEquals(string s, TokenStream left, TokenStream right)
{
left.reset();
right.reset();
CharTermAttribute leftTerm = left.addAttribute(typeof(CharTermAttribute));
CharTermAttribute rightTerm = right.addAttribute(typeof(CharTermAttribute));
OffsetAttribute leftOffset = left.addAttribute(typeof(OffsetAttribute));
OffsetAttribute rightOffset = right.addAttribute(typeof(OffsetAttribute));
PositionIncrementAttribute leftPos = left.addAttribute(typeof(PositionIncrementAttribute));
PositionIncrementAttribute rightPos = right.addAttribute(typeof(PositionIncrementAttribute));
while (left.incrementToken())
{
assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString());
assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement);
assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
};
assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
left.end();
right.end();
assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
left.close();
right.close();
}
开发者ID:Cefa68000,项目名称:lucenenet,代码行数:30,代码来源:TestDuelingAnalyzers.cs
示例20: ChineseFilter
public ChineseFilter(TokenStream @in)
: base(@in)
{
stopTable = new CharArraySet(LuceneVersion.LUCENE_CURRENT, Arrays.AsList(STOP_WORDS), false);
termAtt = AddAttribute<ICharTermAttribute>();
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:7,代码来源:ChineseFilter.cs
注:本文中的TokenStream类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论