本文整理汇总了Java中com.cybozu.labs.langdetect.LangDetectException类的典型用法代码示例。如果您正苦于以下问题:Java LangDetectException类的具体用法?Java LangDetectException怎么用?Java LangDetectException使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
LangDetectException类属于com.cybozu.labs.langdetect包,在下文中一共展示了LangDetectException类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getDetectorForLanguage
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
/**
* Gets a customized detector for a given language.
*
* TODO(P1) Adding priority on the language seems to be relatively useless.
* To be reviewed.
*
* @param language
* @return a {@link Detector} customized for that language
* @throws LangDetectException
*/
private Detector getDetectorForLanguage(String language) throws LangDetectException {
Detector detector = DetectorFactory.create();
HashMap<String, Double> priorityMap = new HashMap();
for (String supportedLanguage : getSupportedLanguages()) {
if (supportedLanguage.equals(language)) {
priorityMap.put(supportedLanguage, 0.8);
} else if (supportedLanguage.equals("en") && !"en".equals(language)) {
priorityMap.put(supportedLanguage, 0.5);
} else {
priorityMap.put(supportedLanguage, 0.1);
}
}
detector.setPriorMap(priorityMap);
return detector;
}
开发者ID:box,项目名称:mojito,代码行数:29,代码来源:LanguageDetectionService.java
示例2: main
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
public static void main(String[] args) throws IOException, LangDetectException {
Main supervisedCrfTrainer = new Main();
JCommander jCommander;
try {
jCommander = new JCommander(supervisedCrfTrainer, args);
} catch (ParameterException e) {
System.err.println(e.getMessage());
e.printStackTrace();
return;
}
if (supervisedCrfTrainer.help) {
jCommander.usage();
} else {
supervisedCrfTrainer.run();
}
}
开发者ID:exciteproject,项目名称:refext,代码行数:19,代码来源:Main.java
示例3: LanguageAnalyzer
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
public LanguageAnalyzer() throws LangDetectException, IOException {
// solution for loading detector profiles from jar taken from:
// http://stackoverflow.com/a/15332031
String dirname = "profiles/";
Enumeration<URL> en = Detector.class.getClassLoader().getResources(dirname);
List<String> profiles = new ArrayList<>();
if (en.hasMoreElements()) {
URL url = en.nextElement();
JarURLConnection urlcon = (JarURLConnection) url.openConnection();
try (JarFile jar = urlcon.getJarFile();) {
Enumeration<JarEntry> entries = jar.entries();
while (entries.hasMoreElements()) {
String entry = entries.nextElement().getName();
if (entry.startsWith(dirname)) {
try (InputStream in = Detector.class.getClassLoader().getResourceAsStream(entry);) {
profiles.add(IOUtils.toString(in, Charset.defaultCharset()));
}
}
}
}
}
if (DetectorFactory.getLangList().isEmpty()) {
DetectorFactory.loadProfile(profiles);
}
}
开发者ID:exciteproject,项目名称:refext,代码行数:27,代码来源:LanguageAnalyzer.java
示例4: detect
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
/**
* Wraps the Cybozu lybrary and detects the language over a specified
* text.
*
* @param text the text to analyze.
* @return the code of the language detected
* @throws LangDetectException when the model can't be loaded
*/
public String detect(String text) throws LangDetectException {
if (detector == null) {
// retrieve the language database embedded in the jar
// load the models inside an array then put them in
// the library
String[] models = new String[profiles.length];
for (int i = 0; i < profiles.length; i++) {
InputStream s = getClass().getClassLoader().
getResourceAsStream("cybozu/" + profiles[i]);
try {
models[i] = IOUtils.toString(s, "UTF-8");
} catch (IOException ex) {
Logger.getLogger(CybozuLanguageDetectorAnnotator.class.getName()).log(
Level.SEVERE, "Cannot load cybozu model " + profiles[i], ex);
}
}
DetectorFactory.loadProfile(Arrays.asList(models));
}
detector = DetectorFactory.create();
detector.append(text);
return detector.detect();
}
开发者ID:ailab-uniud,项目名称:distiller-CORE,代码行数:34,代码来源:CybozuLanguageDetectorAnnotator.java
示例5: annotate
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
/**
* Wraps the Cybozu lybrary and detects the most used probable language
* of the specified {@link it.uniud.ailab.dcore.persistence.DocumentComponent}.
* Note: the component supports only components written in a single language
* and with no children.
*
* @param blackboard the current blackboard
* @param component the component to analyze
*/
@Override
public void annotate(Blackboard blackboard,DocumentComponent component) {
String lang = "";
try {
lang = detect(component.getText());
} catch (LangDetectException ex) {
throw new AnnotationException(this,"CybozuLanguageDetector - error during language detection",ex);
}
if (lang.isEmpty()) {
throw new AnnotationException(this,"CybozuLanguageDetector could not detect a language");
}
Locale loc = Locale.forLanguageTag(lang);
component.setLanguage(loc);
}
开发者ID:ailab-uniud,项目名称:distiller-CORE,代码行数:25,代码来源:CybozuLanguageDetectorAnnotator.java
示例6: detectLanguage
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
List<DetectedLanguage> detectedLanguages = Lists.newArrayList();
if (StringUtils.isEmpty(content)) {
return detectedLanguages;
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> languages = detector.getProbabilities();
for (Language language : languages) {
detectedLanguages.add(new DetectedLanguage(language.lang, language.prob));
}
} catch (LangDetectException e) {
}
return detectedLanguages;
}
开发者ID:smalldirector,项目名称:solr-multilingual-analyzer,代码行数:18,代码来源:MultiLangDetectLanguageIdentifierUpdateProcessor.java
示例7: detectLanguage
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
private List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) { // to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
for (Language l: langlist)
{
if((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang)))
{
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
开发者ID:Alfresco,项目名称:community-edition-old,代码行数:25,代码来源:AbstractQParser.java
示例8: detect
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
@Override
public DetectionResult detect(String text) {
Detector detector;
try {
detector = DetectorFactory.create();
} catch (LangDetectException e) {
// TODO(skreft): log the reason
return UNKNOWN;
}
detector.append(text);
List<Language> results = detector.getProbabilities();
if (!results.isEmpty()) {
Language bestLang = results.get(0);
return new DetectionResult(bestLang.lang, bestLang.prob);
}
return UNKNOWN;
}
开发者ID:deezer,项目名称:weslang,代码行数:20,代码来源:DetectionServiceImplLanguageDetection.java
示例9: loadData
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
public static synchronized void loadData() throws IOException, LangDetectException {
if (loaded) {
return;
}
loaded = true;
List<String> profileData = new ArrayList<String>();
Charset encoding = Charset.forName("UTF-8");
for (String language : languages) {
InputStream stream = LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/" + language);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, encoding));
profileData.add(new String(IOUtils.toCharArray(reader)));
reader.close();
}
DetectorFactory.loadProfile(profileData);
DetectorFactory.setSeed(0);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:LangDetectLanguageIdentifierUpdateProcessorFactory.java
示例10: detectLanguage
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
@Override
protected List<DetectedLanguage> detectLanguage(String content) {
if (content.trim().length() == 0) { // to be consistent with the tika impl?
log.debug("No input text to detect language from, returning empty list");
return Collections.emptyList();
}
try {
Detector detector = DetectorFactory.create();
detector.append(content);
ArrayList<Language> langlist = detector.getProbabilities();
ArrayList<DetectedLanguage> solrLangList = new ArrayList<DetectedLanguage>();
for (Language l: langlist) {
solrLangList.add(new DetectedLanguage(l.lang, l.prob));
}
return solrLangList;
} catch (LangDetectException e) {
log.debug("Could not determine language, returning empty list: ", e);
return Collections.emptyList();
}
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:22,代码来源:LangDetectLanguageIdentifierUpdateProcessor.java
示例11: process
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
String title = getTitle(jCas);
String text = jCas.getDocumentText();
// add title to text if too small
if (text.length() < minTextLenght && title.length() > 0) {
text = title + " " + text;
}
// only detect if text is long enough
if (text != null && text.length() > minTextLenght) {
// TODO maybe cut if text too long --> slower
try {
jCas.setDocumentLanguage(detect(text));
} catch (LangDetectException e) {
LOG.warn("error detecting language for {}, {}",
getHeaderDocId(jCas), e);
}
}
}
开发者ID:BlueBrain,项目名称:bluima,代码行数:25,代码来源:LanguageDetectionAnnotator.java
示例12: SpeechFrame
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
/**
* Creates new form speechFrame
*/
public SpeechFrame() {
try {
//System.out.println(db.toString());
initComponents();
txtLog.setEnabled(false);
cbxEditMode.setSelected(false);
btnSave.setEnabled(false);
btnDelete.setEnabled(false);
setStatus(bundle.getString("TO START, CLICK ON 'NEW LOG'"));
//System.out.println(Constants.PATH_TO_PROFILES);
DetectorFactory.loadProfile(Constants.PATH_TO_PROFILES);
} catch (LangDetectException ex) {
Logger.getLogger(SpeechFrame.class.getName()).log(Level.SEVERE, null, ex);
}
}
开发者ID:ShahNami,项目名称:Speech,代码行数:19,代码来源:SpeechFrame.java
示例13: initProfiles
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
/**
* Initialise the language profiles needed by the detector. This
* initialisation has to be performed only once.
*/
private void initProfiles() {
PathMatchingResourcePatternResolver resolver =
new PathMatchingResourcePatternResolver();
List<String> profiles = new ArrayList<>();
DetectorFactory.setSeed(0L);
try {
for (Resource rs : resolver.getResources(profilePath)) {
StringWriter writer = new StringWriter();
IOUtils.copy(rs.getInputStream(), writer);
profiles.add(writer.toString());
}
DetectorFactory.loadProfile(profiles);
} catch (IOException | LangDetectException ex) {
LOGGER.warn(ex);
}
}
开发者ID:Tanaguru,项目名称:Tanaguru,代码行数:21,代码来源:LanguageDetector.java
示例14: langDetection
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
protected Locale langDetection(int textLength, ParserFieldEnum parserField) {
Locale lang = null;
String langMethod = null;
String text = getMergedBodyText(textLength, " ", parserField);
if (StringUtils.isEmpty(text))
return null;
langMethod = "ngram recognition";
try {
lang = Lang.langDetection(text, text.length());
} catch (LangDetectException e) {
Logging.warn(e);
return null;
}
if (lang == null)
return null;
addField(ParserFieldEnum.lang, lang.getLanguage());
addField(ParserFieldEnum.lang_method, langMethod);
return lang;
}
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:21,代码来源:ParserResultItem.java
示例15: LanguageDetectionService
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
public LanguageDetectionService() {
if (DetectorFactory.getLangList().isEmpty()) {
logger.debug("Initialize langdetect with profiles");
List<String> jsonProfiles = new ArrayList<>();
Resource[] resources;
try {
PathMatchingResourcePatternResolver pathMatchingResourcePatternResolver = new PathMatchingResourcePatternResolver();
resources = pathMatchingResourcePatternResolver.getResources("profiles/*");
} catch (IOException ex) {
throw new RuntimeException("Cannot get the list of resources maching langdetect profiles", ex);
}
for (Resource resource : resources) {
String filename = resource.getFilename();
logger.debug("Add profile for: {}", filename);
try {
jsonProfiles.add(Resources.toString(resource.getURL(), StandardCharsets.UTF_8));
} catch (Exception e) {
throw new RuntimeException("Cannot load langdetect profile for " + filename, e);
}
}
try {
logger.debug("Load profiles");
DetectorFactory.loadProfile(jsonProfiles);
} catch (LangDetectException lde) {
throw new RuntimeException("Cannot load langdetect profiles", lde);
}
} else {
logger.debug("langdetect profiles are already initialized");
}
logger.debug("Sets langdetect supported languages");
supportedLanguages = Collections.unmodifiableList(DetectorFactory.getLangList());
}
开发者ID:box,项目名称:mojito,代码行数:41,代码来源:LanguageDetectionService.java
示例16: getProcessors
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
try {
SecureDetectorFactory.loadProfileFromClassPath(parameters.env);
} catch (LangDetectException | URISyntaxException | IOException e) {
throw new ElasticsearchException(e);
}
Map<String, Processor.Factory> factoryMap = new HashMap<>(1);
factoryMap.put(LangDetectProcessor.TYPE, new LangDetectProcessor.Factory());
return factoryMap;
}
开发者ID:spinscale,项目名称:elasticsearch-ingest-langdetect,代码行数:13,代码来源:IngestLangDetectPlugin.java
示例17: identifyLanguage
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
@Override
public String identifyLanguage(String html)
throws IOException
{
// extracting plain html text
Document doc = Jsoup.parse(html);
String text = doc.text();
// we might have removed everything -> no lang
if (text.isEmpty()) {
return UNKNOWN_LANGUAGE;
}
try {
Detector detector = DetectorFactory.create();
detector.append(text);
String detectedLang = detector.detect();
ArrayList<Language> detectedProbabilities = detector.getProbabilities();
if (detectedProbabilities.get(0).prob > PROBABILITY_THRESHOLD) {
return detectedLang;
}
else {
return UNKNOWN_LANGUAGE;
}
}
catch (LangDetectException e) {
return UNKNOWN_LANGUAGE;
}
}
开发者ID:dkpro,项目名称:dkpro-c4corpus,代码行数:32,代码来源:CybozuLanguageIdentifier.java
示例18: buildSerialPipes
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
private SerialPipes buildSerialPipes(List<String> featureNames, List<String> replacements,
List<String> conjunctions) throws LangDetectException, IOException {
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(new LineGroupString2TokenSequence());
pipes.add(new AddTargetToLinePipe(6));
pipes.add(new LineToTargetTextPipe());
pipes.add(new TargetReplacementPipe(replacements));
FeaturePipeProvider featurePipeProvider = new FeaturePipeProvider();
for (String featureName : featureNames) {
pipes.add(featurePipeProvider.getPipe(featureName));
}
int[][] offsetConjunctions = new int[conjunctions.size()][];
for (int i = 0; i < conjunctions.size(); i++) {
String conjunction = conjunctions.get(i).replaceAll("min", "-");
String[] conjunctionElements = conjunction.split(";");
int[] conjunctionArray = new int[conjunctionElements.length];
for (int j = 0; j < conjunctionElements.length; j++) {
conjunctionArray[j] = Integer.parseInt(conjunctionElements[j]);
}
offsetConjunctions[i] = conjunctionArray;
}
pipes.add(new OffsetConjunctions(offsetConjunctions));
pipes.add(new TokenSequence2FeatureVectorSequence(false, false));
pipes.add(new Target2LabelSequence());
// pipes.add(new PrintInputAndTarget());
return new SerialPipes(pipes);
}
开发者ID:exciteproject,项目名称:refext,代码行数:34,代码来源:ReferenceExtractorTrainer.java
示例19: readObject
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException, LangDetectException {
// read serial version
in.readInt();
this.feature = (String) in.readObject();
this.csvSeparator = (String) in.readObject();
this.languageAnalyzer = new LanguageAnalyzer();
}
开发者ID:exciteproject,项目名称:refext,代码行数:9,代码来源:LanguagePipe.java
示例20: analyze
import com.cybozu.labs.langdetect.LangDetectException; //导入依赖的package包/类
/**
*
* @param inputDirectory:
* directory containing text files to be analyzed
* @param outputFile:
* file in which the analysis results are written
* @throws IOException
* @throws LangDetectException
*/
public String analyze(String string) {
// List<File> textFiles =
// FileUtils.listFilesRecursively(inputDirectory);
// Map<String, Integer> languageMap = new HashMap<String, Integer>();
try {
// MapUtils.addCount(languageMap, detector.detect());
// System.out.println(inputFile);
Detector detector = DetectorFactory.create();
detector.append(string);
return detector.detect();
} catch (LangDetectException e) {
// MapUtils.addCount(languageMap, "unknown");
return "unknown";
}
// BufferedWriter bufferedWriter = new BufferedWriter(new
// FileWriter(outputFile));
// bufferedWriter.write("number of files files: " + textFiles.size());
// bufferedWriter.newLine();
// bufferedWriter.newLine();
//
// for (Entry<String, Integer> languageMapEntry :
// MapUtils.entriesReverselySortedByValues(languageMap)) {
// bufferedWriter.write(languageMapEntry.getKey() + ": " +
// languageMapEntry.getValue());
// bufferedWriter.newLine();
// }
// bufferedWriter.close();
}
开发者ID:exciteproject,项目名称:refext,代码行数:42,代码来源:LanguageAnalyzer.java
注:本文中的com.cybozu.labs.langdetect.LangDetectException类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论