本文整理汇总了Java中org.apache.lucene.analysis.ja.dict.UserDictionary类的典型用法代码示例。如果您正苦于以下问题:Java UserDictionary类的具体用法?Java UserDictionary怎么用?Java UserDictionary使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
UserDictionary类属于org.apache.lucene.analysis.ja.dict包,在下文中一共展示了UserDictionary类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getUserDictionary
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary getUserDictionary(Environment env, Settings settings) {
try {
final Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION);
if (reader == null) {
return null;
} else {
try {
return UserDictionary.open(reader);
} finally {
reader.close();
}
}
} catch (IOException e) {
throw new ElasticsearchException("failed to load kuromoji user dictionary", e);
}
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:KuromojiTokenizerFactory.java
示例2: userDictionary
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
@Nullable
private static UserDictionary userDictionary(@Nullable final String[] userDictArray)
throws UDFArgumentException {
if (userDictArray == null) {
return null;
}
final StringBuilder builder = new StringBuilder();
for (String row : userDictArray) {
builder.append(row).append('\n');
}
final Reader reader = new StringReader(builder.toString());
try {
return UserDictionary.open(reader); // return null if empty
} catch (Throwable e) {
throw new UDFArgumentException(
"Failed to create user dictionary based on the given array<string>: " + e);
}
}
开发者ID:apache,项目名称:incubator-hivemall,代码行数:20,代码来源:KuromojiUDF.java
示例3: getUserDictionary
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary getUserDictionary(final Environment env, final Settings settings) {
try {
final Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION);
if (reader == null) {
return null;
} else {
try {
return UserDictionary.open(reader);
} finally {
reader.close();
}
}
} catch (final IOException e) {
throw new ElasticsearchException("failed to load kuromoji user dictionary", e);
}
}
开发者ID:codelibs,项目名称:elasticsearch-analysis-ja,代码行数:17,代码来源:KuromojiTokenizerFactory.java
示例4: inform
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
if (userDictionaryPath != null) {
InputStream stream = loader.openResource(userDictionaryPath);
String encoding = userDictionaryEncoding;
if (encoding == null) {
encoding = IOUtils.UTF_8;
}
CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
Reader reader = new InputStreamReader(stream, decoder);
userDictionary = new UserDictionary(reader);
} else {
userDictionary = null;
}
}
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:JapaneseTokenizerFactory.java
示例5: readDict
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary readDict() {
InputStream is = TestJapaneseTokenizer.class.getResourceAsStream("userdict.txt");
if (is == null) {
throw new RuntimeException("Cannot find userdict.txt in test classpath!");
}
try {
try {
Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8);
return new UserDictionary(reader);
} finally {
is.close();
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestJapaneseTokenizer.java
示例6: inform
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
@Override
public void inform(ResourceLoader loader) throws IOException {
mode = getMode(args);
String userDictionaryPath = args.get(USER_DICT_PATH);
if (userDictionaryPath != null) {
InputStream stream = loader.openResource(userDictionaryPath);
String encoding = args.get(USER_DICT_ENCODING);
if (encoding == null) {
encoding = IOUtils.UTF_8;
}
CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
Reader reader = new InputStreamReader(stream, decoder);
userDictionary = new UserDictionary(reader);
} else {
userDictionary = null;
}
discardPunctuation = getBoolean(DISCARD_PUNCTUATION, true);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:21,代码来源:JapaneseTokenizerFactory.java
示例7: readDict
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public static UserDictionary readDict() {
InputStream is = TestJapaneseTokenizer.class.getResourceAsStream("userdict.txt");
if (is == null) {
throw new RuntimeException("Cannot find userdict.txt in test classpath!");
}
try {
try {
Reader reader = new InputStreamReader(is, IOUtils.CHARSET_UTF_8);
return new UserDictionary(reader);
} finally {
is.close();
}
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestJapaneseTokenizer.java
示例8: KuromojiAnalyzerProvider
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public KuromojiAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final Set<?> stopWords = Analysis.parseStopWords(
env, indexSettings.getIndexVersionCreated(), settings, JapaneseAnalyzer.getDefaultStopSet());
final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings);
final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings);
analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags());
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:KuromojiAnalyzerProvider.java
示例9: KuromojiAnalyzerProvider
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public KuromojiAnalyzerProvider(final IndexSettings indexSettings, final Environment env, final String name, final Settings settings) {
super(indexSettings, name, settings);
final Set<?> stopWords = Analysis.parseStopWords(
env, indexSettings.getIndexVersionCreated(), settings, JapaneseAnalyzer.getDefaultStopSet());
final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings);
final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings);
analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags());
}
开发者ID:codelibs,项目名称:elasticsearch-analysis-ja,代码行数:9,代码来源:KuromojiAnalyzerProvider.java
示例10: JapaneseAnalyzer
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public JapaneseAnalyzer(UserDictionary userDict, Mode mode, CharArraySet stopwords, Set<String> stoptags) {
super(stopwords);
this.userDict = userDict;
this.mode = mode;
this.stoptags = stoptags;
}
开发者ID:europeana,项目名称:search,代码行数:7,代码来源:JapaneseAnalyzer.java
示例11: super
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
* Create a new JapaneseTokenizer.
*
* @param factory the AttributeFactory to use
* @param input Reader containing text
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
*/
public JapaneseTokenizer
(AttributeFactory factory, Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
super(factory, input);
dictionary = TokenInfoDictionary.getInstance();
fst = dictionary.getFST();
unkDictionary = UnknownDictionary.getInstance();
characterDefinition = unkDictionary.getCharacterDefinition();
this.userDictionary = userDictionary;
costs = ConnectionCosts.getInstance();
fstReader = fst.getBytesReader();
if (userDictionary != null) {
userFST = userDictionary.getFST();
userFSTReader = userFST.getBytesReader();
} else {
userFST = null;
userFSTReader = null;
}
this.discardPunctuation = discardPunctuation;
switch(mode){
case SEARCH:
searchMode = true;
extendedMode = false;
outputCompounds = true;
break;
case EXTENDED:
searchMode = true;
extendedMode = true;
outputCompounds = false;
break;
default:
searchMode = false;
extendedMode = false;
outputCompounds = false;
break;
}
buffer.reset(this.input);
resetState();
dictionaryMap.put(Type.KNOWN, dictionary);
dictionaryMap.put(Type.UNKNOWN, unkDictionary);
dictionaryMap.put(Type.USER, userDictionary);
}
开发者ID:europeana,项目名称:search,代码行数:53,代码来源:JapaneseTokenizer.java
示例12: JapaneseAnalyzer
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
public JapaneseAnalyzer(Version matchVersion, UserDictionary userDict, Mode mode, CharArraySet stopwords, Set<String> stoptags) {
super(matchVersion, stopwords);
this.userDict = userDict;
this.mode = mode;
this.stoptags = stoptags;
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:7,代码来源:JapaneseAnalyzer.java
示例13: JapaneseTokenizer
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
* Create a new JapaneseTokenizer.
*
* @param input Reader containing text
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
*/
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
super(input);
dictionary = TokenInfoDictionary.getInstance();
fst = dictionary.getFST();
unkDictionary = UnknownDictionary.getInstance();
characterDefinition = unkDictionary.getCharacterDefinition();
this.userDictionary = userDictionary;
costs = ConnectionCosts.getInstance();
fstReader = fst.getBytesReader();
if (userDictionary != null) {
userFST = userDictionary.getFST();
userFSTReader = userFST.getBytesReader();
} else {
userFST = null;
userFSTReader = null;
}
this.discardPunctuation = discardPunctuation;
switch(mode){
case SEARCH:
searchMode = true;
extendedMode = false;
outputCompounds = true;
break;
case EXTENDED:
searchMode = true;
extendedMode = true;
outputCompounds = false;
break;
default:
searchMode = false;
extendedMode = false;
outputCompounds = false;
break;
}
buffer.reset(null); // best effort NPE consumers that don't call reset()
resetState();
dictionaryMap.put(Type.KNOWN, dictionary);
dictionaryMap.put(Type.UNKNOWN, unkDictionary);
dictionaryMap.put(Type.USER, userDictionary);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:51,代码来源:JapaneseTokenizer.java
示例14: JapaneseTokenizer
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
* Create a new JapaneseTokenizer.
* <p>
* Uses the default AttributeFactory.
*
* @param input Reader containing text
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
*/
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
}
开发者ID:europeana,项目名称:search,代码行数:14,代码来源:JapaneseTokenizer.java
示例15: JapaneseTokenizer
import org.apache.lucene.analysis.ja.dict.UserDictionary; //导入依赖的package包/类
/**
* Create a new JapaneseTokenizer.
* <p>
* Uses the default AttributeFactory.
*
* @param input Reader containing text
* @param userDictionary Optional: if non-null, user dictionary.
* @param discardPunctuation true if punctuation tokens should be dropped from the output.
* @param mode tokenization mode.
*/
public JapaneseTokenizer(Reader input, UserDictionary userDictionary, boolean discardPunctuation, Mode mode) {
this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, userDictionary, discardPunctuation, mode);
}
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:14,代码来源:JapaneseTokenizer.java
注:本文中的org.apache.lucene.analysis.ja.dict.UserDictionary类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论