本文整理汇总了Java中com.optimaize.langdetect.profiles.LanguageProfile类的典型用法代码示例。如果您正苦于以下问题:Java LanguageProfile类的具体用法?Java LanguageProfile怎么用?Java LanguageProfile使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
LanguageProfile类属于com.optimaize.langdetect.profiles包,在下文中一共展示了LanguageProfile类的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getLanguageDetector
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private static LanguageDetector getLanguageDetector(){
if (languageDetector == null){
try {
List<String> languages = new ArrayList<>();
for(Language lg : Language.values())
languages.add(lg.getAbrev().toLowerCase());
List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(languages);
languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles).build();
}
catch (IOException e) {
LoggerFactory.getLogger(Translator.class).error("Translator.getLanguageDetector", e);
}
}
return languageDetector;
}
开发者ID:Kaysoro,项目名称:KaellyBot,代码行数:18,代码来源:Translator.java
示例2: main
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
if (args.length != 3) {
System.out.println("Usage: " + LanguageDetectionTrainer.class.getName() + " <languageCode> <plainTextFile> <minimalFrequency>");
System.exit(1);
}
String langCode = args[0];
String fileName = args[1];
int minimalFrequency = Integer.parseInt(args[2]);
String text = IOUtils.toString(new FileReader(fileName));
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forIndexingCleanText();
TextObject inputText = textObjectFactory.create().append(text);
LanguageProfile languageProfile = new LanguageProfileBuilder(langCode)
.ngramExtractor(NgramExtractors.standard())
.minimalFrequency(minimalFrequency)
.addText(inputText)
.build();
File outputDir = new File(System.getProperty("user.dir")); // current dir
new LanguageProfileWriter().writeToDirectory(languageProfile, outputDir);
System.out.println("Language profile written to " + new File(outputDir, langCode).getAbsolutePath());
}
开发者ID:languagetool-org,项目名称:languagetool,代码行数:21,代码来源:LanguageDetectionTrainer.java
示例3: initLanguageDetector
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private void initLanguageDetector() throws IOException {
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.build();
textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
}
开发者ID:victorward,项目名称:recruitervision,代码行数:8,代码来源:ParsingServiceImpl.java
示例4: doInitialize
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
@Override
public void doInitialize(UimaContext aContext) throws ResourceInitializationException {
try{
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.build();
textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
}catch(IOException ioe){
throw new ResourceInitializationException(ioe);
}
}
开发者ID:dstl,项目名称:baleen,代码行数:14,代码来源:DocumentLanguage.java
示例5: testLanguageDetectorErrorRate
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
@Test
public void testLanguageDetectorErrorRate() throws IOException {
//load target languages:
List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(Arrays.asList(TARGET_LANGUAGES_FOR_YALDER));
//build language detector:
com.optimaize.langdetect.LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.build();
//create a text object factory
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingShortCleanText();
// TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
SummaryStatistics stats = new SummaryStatistics();
List<String> lines = EuroParlUtils.readLines();
int numHits = 0;
int numMisses = 0;
for (String line : lines) {
String[] pieces = line.split("\t", 2);
String language = pieces[0];
TextObject textObject = textObjectFactory.forText(pieces[1]);
List<DetectedLanguage> result = languageDetector.getProbabilities(textObject);
if (result.size() > 0 && result.get(0).getLocale().getLanguage().equals(language)) {
numHits += 1;
} else {
numMisses += 1;
}
}
double missPercentage = 100.0 * (double)numMisses/(double)(numMisses + numHits);
stats.addValue(missPercentage);
System.out.println(String.format("Total miss ratio = %.2f%%", missPercentage));
System.out.println(String.format("Min = %.2f%%, max = %.2f%%, mean = %.2f%%, std deviation = %f",
stats.getMin(), stats.getMax(), stats.getMean(), stats.getStandardDeviation()));
}
开发者ID:kkrugler,项目名称:yalder,代码行数:40,代码来源:OtherDetectorsTest.java
示例6: withProfile
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* @throws IllegalStateException if a profile for the same language was added already (must be a userland bug).
*/
public LanguageDetectorBuilder withProfile(LanguageProfile languageProfile) throws IllegalStateException {
if (langsAdded.contains(languageProfile.getLocale())) {
throw new IllegalStateException("A language profile for language "+languageProfile.getLocale()+" was added already!");
}
for (Integer gramLength : ngramExtractor.getGramLengths()) {
if (!languageProfile.getGramLengths().contains(gramLength)) {
throw new IllegalArgumentException("The NgramExtractor is set to handle "+gramLength+"-grams but the given language profile for "+languageProfile.getLocale()+" does not support this!");
}
}
langsAdded.add(languageProfile.getLocale());
languageProfiles.add(languageProfile);
return this;
}
开发者ID:optimaize,项目名称:language-detector,代码行数:17,代码来源:LanguageDetectorBuilder.java
示例7: withProfiles
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* @throws IllegalStateException if a profile for the same language was added already (must be a userland bug).
*/
public LanguageDetectorBuilder withProfiles(Iterable<LanguageProfile> languageProfiles) throws IllegalStateException {
for (LanguageProfile languageProfile : languageProfiles) {
withProfile(languageProfile);
}
return this;
}
开发者ID:optimaize,项目名称:language-detector,代码行数:10,代码来源:LanguageDetectorBuilder.java
示例8: removeLanguageProfile
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* Remove potential LanguageProfiles, e.g. in combination with {@link #loadAllBuiltInLanguageProfiles()}.
* @param isoString the ISO string of the LanguageProfile to be removed.
*/
public LanguageProfileValidator removeLanguageProfile(final String isoString) {
Iterables.removeIf(this.languageProfiles, new Predicate<LanguageProfile>() {
@Override
public boolean apply(LanguageProfile languageProfile) {
return languageProfile.getLocale().getLanguage().equals(isoString);
}
});
return this;
}
开发者ID:optimaize,项目名称:language-detector,代码行数:14,代码来源:LanguageProfileValidator.java
示例9: create
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* @param gramLengths for example [1,2,3]
* @throws java.lang.IllegalArgumentException if languageProfiles or gramLengths is empty, or if one of the
* languageProfiles does not have the grams of the required sizes.
*/
@NotNull
public static NgramFrequencyData create(@NotNull Collection<LanguageProfile> languageProfiles, @NotNull Collection<Integer> gramLengths) throws IllegalArgumentException {
if (languageProfiles.isEmpty()) throw new IllegalArgumentException("No languageProfiles provided!");
if (gramLengths.isEmpty()) throw new IllegalArgumentException("No gramLengths provided!");
Map<String, double[]> wordLangProbMap = new HashMap<>();
List<LdLocale> langlist = new ArrayList<>();
int langsize = languageProfiles.size();
int index = -1;
for (LanguageProfile profile : languageProfiles) {
index++;
langlist.add( profile.getLocale() );
for (Integer gramLength : gramLengths) {
if (!profile.getGramLengths().contains(gramLength)) {
throw new IllegalArgumentException("The language profile for "+profile.getLocale()+" does not contain "+gramLength+"-grams!");
}
for (Map.Entry<String, Integer> ngramEntry : profile.iterateGrams(gramLength)) {
String ngram = ngramEntry.getKey();
Integer frequency = ngramEntry.getValue();
if (!wordLangProbMap.containsKey(ngram)) {
wordLangProbMap.put(ngram, new double[langsize]);
}
double prob = frequency.doubleValue() / profile.getNumGramOccurrences(ngram.length());
wordLangProbMap.get(ngram)[index] = prob;
}
}
}
return new NgramFrequencyData(wordLangProbMap, langlist);
}
开发者ID:optimaize,项目名称:language-detector,代码行数:39,代码来源:NgramFrequencyData.java
示例10: makeDetector
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* Using all language profiles from the given directory.
*/
private LanguageDetector makeDetector() throws IOException {
double alpha = getParamDouble("alpha", DEFAULT_ALPHA);
String profileDirectory = requireParamString("directory") + "/";
Optional<Long> seed = Optional.fromNullable(getParamLongOrNull("seed"));
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAll(new File(profileDirectory));
return LanguageDetectorBuilder.create(NgramExtractors.standard())
.alpha(alpha)
.seed(seed)
.shortTextAlgorithm(50)
.withProfiles(languageProfiles)
.build();
}
开发者ID:optimaize,项目名称:language-detector,代码行数:18,代码来源:CommandLineInterface.java
示例11: DataLanguageDetectorImplTest
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public DataLanguageDetectorImplTest() throws IOException {
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
shortDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.shortTextAlgorithm(100)
.withProfiles(languageProfiles)
.build();
longDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.shortTextAlgorithm(0)
.withProfiles(new LanguageProfileReader().readAllBuiltIn())
.build();
}
开发者ID:optimaize,项目名称:language-detector,代码行数:14,代码来源:DataLanguageDetectorImplTest.java
示例12: makeNewDetector
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private LanguageDetector makeNewDetector() throws IOException {
LanguageDetectorBuilder builder = LanguageDetectorBuilder.create(NgramExtractors.standard())
.shortTextAlgorithm(50)
.prefixFactor(1.5)
.suffixFactor(2.0);
LangProfileReader langProfileReader = new LangProfileReader();
for (String language : ImmutableList.of("en", "fr", "nl", "de")) {
LangProfile langProfile = langProfileReader.read(LanguageDetectorImplTest.class.getResourceAsStream("/languages/" + language));
LanguageProfile languageProfile = OldLangProfileConverter.convert(langProfile);
builder.withProfile(languageProfile);
}
return builder.build();
}
开发者ID:optimaize,项目名称:language-detector,代码行数:16,代码来源:LanguageDetectorImplTest.java
示例13: LanguageIdentifier
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public LanguageIdentifier() {
try {
List<LanguageProfile> profiles = loadProfiles(getLanguageCodes());
languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.minimalConfidence(MINIMAL_CONFIDENCE)
.withProfiles(profiles)
.build();
textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
} catch (IOException e) {
throw new RuntimeException("Could not set up language identifier", e);
}
}
开发者ID:languagetool-org,项目名称:languagetool,代码行数:13,代码来源:LanguageIdentifier.java
示例14: loadProfiles
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
LanguageProfileReader profileReader = new LanguageProfileReader();
List<LanguageProfile> profiles = profileReader.read(langCodes);
for (String externalLangCode : externalLangCodes) {
String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
if (JLanguageTool.getDataBroker().resourceExists(profilePath)) { // not all languages are always available
try (InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath)) {
profiles.add(new LanguageProfileReader().read(profile));
}
}
}
return profiles;
}
开发者ID:languagetool-org,项目名称:languagetool,代码行数:14,代码来源:LanguageIdentifier.java
示例15: checkIfNonEnglish
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
public static String checkIfNonEnglish(Post post){
List<LanguageProfile> languageProfiles;
com.optimaize.langdetect.LanguageDetector optimaizeDetector;
org.apache.tika.language.detect.LanguageDetector tikaDetector;
TextObjectFactory textObjectFactory;
String dataToCheck = stripTags(stripBody(post)).replaceAll("\\p{Punct}+", "");
try {
languageProfiles = new LanguageProfileReader().readAllBuiltIn();
optimaizeDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.build();
textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
TextObject textObject = textObjectFactory.forText(dataToCheck);
Optional<LdLocale> lang = optimaizeDetector.detect(textObject);
if (!lang.isPresent()) {
if(dataToCheck.length()>50) {
tikaDetector = new OptimaizeLangDetector().loadModels();
LanguageWriter writer = new LanguageWriter(tikaDetector);
writer.append(dataToCheck);
LanguageResult result = writer.getLanguage();
String tikaLang = result.getLanguage();
writer.close();
if (!tikaLang.toLowerCase().equals("")) {
return tikaLang;
}
else{
return null;
}
}
else if(dataToCheck.length()<50){
return null;
}
if(checkIfNoCodeBlock(post)){
return "Gibberish";
}
return null;
}
return lang.get().getLanguage();
}
catch (IOException e){
e.printStackTrace();
}
return null;
}
开发者ID:SOBotics,项目名称:Natty,代码行数:50,代码来源:CheckUtils.java
示例16: testLanguageDetectorPerformance
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
@Test
public void testLanguageDetectorPerformance() throws IOException {
// Turn off logging by language-detector.
System.setProperty("logging.root.level", "INFO");
Logger.getRootLogger().setLevel(Level.INFO);
//load target languages:
List<LanguageProfile> languageProfiles = new LanguageProfileReader().read(Arrays.asList(TARGET_LANGUAGES_FOR_YALDER));
//build language detector:
com.optimaize.langdetect.LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(languageProfiles)
.build();
//create a text object factory
TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingShortCleanText();
// TextObjectFactory textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
List<String> lines = EuroParlUtils.readLines();
// Do 10 runs, and take the fastest time.
long bestDuration = Long.MAX_VALUE;
for (int i = 0; i < 10; i++) {
int numHits = 0;
int numMisses = 0;
long startTime = System.currentTimeMillis();
for (String line : lines) {
String[] pieces = line.split("\t", 2);
String language = pieces[0];
TextObject textObject = textObjectFactory.forText(pieces[1]);
List<DetectedLanguage> result = languageDetector.getProbabilities(textObject);
if (result.size() > 0 && result.get(0).getLocale().getLanguage().equals(language)) {
numHits += 1;
} else {
numMisses += 1;
}
}
long duration = System.currentTimeMillis() - startTime;
System.out.println(String.format("Run #%d duration = %dms", i + 1, duration));
System.out.println(String.format("Run #%d error rate = %f%%", i + 1, 100.0 * (double)numMisses/(double)(numMisses + numHits)));
bestDuration = Math.min(bestDuration, duration);
}
System.out.println(String.format("Best duration = %dms", bestDuration));
}
开发者ID:kkrugler,项目名称:yalder,代码行数:50,代码来源:OtherDetectorsTest.java
示例17: loadLanguageProfile
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* Load the given {@link LanguageProfile}.
*/
public LanguageProfileValidator loadLanguageProfile(LanguageProfile languageProfile) {
this.languageProfiles.add(languageProfile);
return this;
}
开发者ID:optimaize,项目名称:language-detector,代码行数:8,代码来源:LanguageProfileValidator.java
示例18: loadLanguageProfiles
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
/**
* Load the given {@link LanguageProfile}s.
*/
public LanguageProfileValidator loadLanguageProfiles(Collection<LanguageProfile> languageProfiles) {
this.languageProfiles.addAll(languageProfiles);
return this;
}
开发者ID:optimaize,项目名称:language-detector,代码行数:8,代码来源:LanguageProfileValidator.java
示例19: forAll
import com.optimaize.langdetect.profiles.LanguageProfile; //导入依赖的package包/类
private static NgramFrequencyData forAll(int gramSize) throws IOException {
List<LanguageProfile> languageProfiles = new LanguageProfileReader().readAllBuiltIn();
return NgramFrequencyData.create(languageProfiles, ImmutableSet.of(gramSize));
}
开发者ID:optimaize,项目名称:language-detector,代码行数:5,代码来源:NgramFrequencyDataTest.java
注:本文中的com.optimaize.langdetect.profiles.LanguageProfile类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论