• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Java LmReaders类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中edu.berkeley.nlp.lm.io.LmReaders的典型用法代码示例。如果您正苦于以下问题:Java LmReaders类的具体用法?Java LmReaders怎么用?Java LmReaders使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



LmReaders类属于edu.berkeley.nlp.lm.io包,在下文中一共展示了LmReaders类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: estimateLanguaModel

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
@Override
public void estimateLanguaModel(int order, String textPath, String arpaPath) {

	//String trainArpaFilePath = workingDirPath+"/"+corpusSourceName+".arpa";

	List<String> inputFiles = new ArrayList<String>();
	inputFiles.add(textPath);

	final StringWordIndexer wordIndexer = new StringWordIndexer();
	wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
	wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
	wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);

	// System.out.printf("Info: estimating language model of %s (lmorder %d) and saving in %s \n",corpusSourceFilePath,lmOrder,trainArpaFilePath);
	LmReaders.createKneserNeyLmFromTextFiles(inputFiles, wordIndexer, order, new File(arpaPath), new ConfigOptions());
}
 
开发者ID:nicolashernandez,项目名称:dev-star,代码行数:17,代码来源:BerkeleyLanguageModel.java


示例2: BerkeleyRawLanguageModel

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public BerkeleyRawLanguageModel(File berkeleyLm) {
  if (!berkeleyLm.isFile()) {
    throw new RuntimeException("You need to specify a BerkeleyLM file: " + berkeleyLm);
  }
  File vocabFile = new File(berkeleyLm.getParent(), "vocab_cs.gz");
  if (!vocabFile.exists()) {
    throw new RuntimeException("No vocabulary file 'vocab_cs.gz' found in the BerkeleyLM directory: " + vocabFile);
  }
  map = LmReaders.readNgramMapFromBinary(berkeleyLm.getAbsolutePath(), vocabFile.getAbsolutePath());
  /* For some reason, this crashes with IndexOutOfBoundsException:
  System.out.println("---START");
  Map<List<String>, LongRef> mapForOrder = map.getMapForOrder(2);
  for (Map.Entry<List<String>, LongRef> entry : mapForOrder.entrySet()) {
    System.out.println("E: " + entry.getKey());
  }
  System.out.println("---DONE");*/
}
 
开发者ID:languagetool-org,项目名称:languagetool,代码行数:18,代码来源:BerkeleyRawLanguageModel.java


示例3: createFromFiles

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
/**
 * 
 * read ngram model from pretokenized sentences.
 * Expects one sentence per line, tokens separated by space.
 * 
 * @see see edu.berkeley.nlp.lm.io.MakeKneserNeyArpaFromText for reference
 * @param src_dir
 * @param accept_file_regex_pattern
 * @param order
 * @param arpa_filename (possibly null)
 * @return
 */
public static BerkeleyLM<String> createFromFiles(final String src_dir, final String accept_file_regex_pattern, final int order, String arpa_filename, double discount, int mincount) {
	File src_dir_ = new File(src_dir);
	if(!src_dir_.isDirectory())
		throw new IllegalArgumentException(String.format("Expected directory but got %s", src_dir_.getAbsolutePath()));
	String src_dir_name = src_dir_.getName();
	List<String> files = Arrays.asList(src_dir_.list(new FilenameFilter() {
		@Override
		public boolean accept(File dir, String name) {
			return new File(dir,name).isFile() && name.matches(accept_file_regex_pattern);
		}
	}));
	File arpa_file;
	if(arpa_filename == null)
		arpa_file = new File(src_dir, src_dir_name + ".arpa.gz");
	else
		arpa_file = new File(arpa_filename);

	final StringWordIndexer wordIndexer = new StringWordIndexer(); //indexer with default symbols
	wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
	wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
	wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);

	ConfigOptions opts = new ConfigOptions();		
	opts.kneserNeyMinCounts = new double[order];
	Arrays.fill(opts.kneserNeyMinCounts, mincount);
	if(discount < 0){
		opts.kneserNeyDiscounts = null;
	}else{
		opts.kneserNeyDiscounts = new double[order];
		Arrays.fill(opts.kneserNeyDiscounts, discount);
	}
	opts.unknownWordLogProb = Properties.knUnkLog10Prob();

	LmReaders.createKneserNeyLmFromTextFiles(files, wordIndexer, order, arpa_file, new ConfigOptions());
	return loadFromArpaFile(arpa_file.getAbsolutePath());
}
 
开发者ID:tudarmstadt-lt,项目名称:topicrawler,代码行数:49,代码来源:BerkeleyLM.java


示例4: LMGrammarBerkeley

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public LMGrammarBerkeley(int order, String lm_file) {
  super(order);
  vocabIdToMyIdMapping = new int[10];

  if (!new File(lm_file).exists()) {
    throw new RuntimeException("Can't read lm_file '" + lm_file + "'");
  }

  if (logRequests) {
    logger.addHandler(logHandler);
    logger.setLevel(Level.FINEST);
    logger.setUseParentHandlers(false);
  }

  try { // try binary format (even gzipped)
    lm = (ArrayEncodedNgramLanguageModel<String>) LmReaders.<String>readLmBinary(lm_file);
    LOG.info("Loading Berkeley LM from binary {}", lm_file);
  } catch (RuntimeException e) {
    ConfigOptions opts = new ConfigOptions();
    LOG.info("Loading Berkeley LM from ARPA file {}", lm_file);
    final StringWordIndexer wordIndexer = new StringWordIndexer();
    ArrayEncodedNgramLanguageModel<String> berkeleyLm =
        LmReaders.readArrayEncodedLmFromArpa(lm_file, false, wordIndexer, opts, order);

    lm = ArrayEncodedCachingLmWrapper.wrapWithCacheThreadSafe(berkeleyLm);
  }
  this.unkIndex = lm.getWordIndexer().getOrAddIndex(lm.getWordIndexer().getUnkSymbol());
}
 
开发者ID:apache,项目名称:incubator-joshua,代码行数:29,代码来源:LMGrammarBerkeley.java


示例5: createLM

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public static void createLM(final String fileName, final float lm[][],
		final int index, final int corpus[][]) {

	jobs.execute(new Runnable() {

		@Override
		public void run() {
			log.info("Creating language model");

			NgramLanguageModel<String> createdLM = null;
			final int lmOrder = 4;
			final List<String> inputFiles = new ArrayList<String>();
			inputFiles.add(fileName);
			final StringWordIndexer wordIndexer = new StringWordIndexer();
			wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
			wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
			wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);

			createdLM = LmReaders
					.readContextEncodedKneserNeyLmFromTextFile(inputFiles,
							wordIndexer, lmOrder, new ConfigOptions(),
							new File(fileName + ".lm"));

			lm[index] = new float[corpus.length];
			
			for (int i = 0; i < corpus.length; i++) {
				int sent[] = corpus[i];
				lm[index][i] = getLMProb(createdLM, sent);
			}

			log.info(".");

			InvitationModel.latch.countDown();
		}

	});
}
 
开发者ID:amirkamran,项目名称:InvitationModel,代码行数:38,代码来源:InvitationModel.java


示例6: readLmFromFile

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
/**
 * decides if the lm is stored in a binary file (based on extensions .b,
 * .bi, .bin, .binary)
 * 
 * @param lmfile
 */
private void readLmFromFile(String lmfile) {
	System.err.println("Loading language model from " + lmfile);
	StringWordIndexer swi = new StringWordIndexer();
	NgramLanguageModel<String> ngramLm;
	if (lmfile.endsWith(".b") || lmfile.endsWith(".bi")
			|| lmfile.endsWith(".bin") || lmfile.endsWith("binary")) {
		ngramLm = LmReaders.readLmBinary(lmfile);
	} else {
		ngramLm = LmReaders
				.readArrayEncodedLmFromArpa(lmfile, false, swi);
	}
	lm = ArrayEncodedCachingLmWrapper
			.wrapWithCacheNotThreadSafe((ArrayEncodedNgramLanguageModel<String>) ngramLm);
}
 
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:21,代码来源:MyBerkeleyLm.java


示例7: readLmFromFile

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
private void readLmFromFile(String lmfile) {
	System.err.println("Loading language model from " + lmfile);
	StringWordIndexer swi = new StringWordIndexer();
	NgramLanguageModel<String> ngramLm;
	if (lmfile.endsWith(".b") || lmfile.endsWith(".bi")
			|| lmfile.endsWith(".bin") || lmfile.endsWith("binary")) {
		ngramLm = LmReaders.readLmBinary(lmfile);
	} else {
		ngramLm = LmReaders
				.readArrayEncodedLmFromArpa(lmfile, false, swi);
	}
	lm = ArrayEncodedCachingLmWrapper
			.wrapWithCacheNotThreadSafe((ArrayEncodedNgramLanguageModel<String>) ngramLm);
}
 
开发者ID:siddBanPsu,项目名称:WikiKreator,代码行数:15,代码来源:LMReader.java


示例8: loadLanguageModel

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
/**
 * The return value should be casted in ArrayEncodedProbBackoffLm<String>
 */
@Override
public  void loadLanguageModel(String arpaPath) {
	File arpaFile = new File(arpaPath);
	ConfigOptions configOptions = new ConfigOptions();
	configOptions.storeRankedProbBackoffs = false;
	configOptions.unknownWordLogProb = -10.0f;
	
	setLM( LmReaders.readArrayEncodedLmFromArpa(arpaFile.getPath(), false, new StringWordIndexer(), configOptions,
			Integer.MAX_VALUE));

}
 
开发者ID:nicolashernandez,项目名称:dev-star,代码行数:15,代码来源:BerkeleyLanguageModel.java


示例9: BerkeleyLanguageModel

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public BerkeleyLanguageModel(File berkeleyLm) {
  if (!berkeleyLm.isFile()) {
    throw new RuntimeException("You need to specify a BerkeleyLM file (*.blm.gz): " + berkeleyLm);
  }
  File vocabFile = new File(berkeleyLm.getParent(), "vocab_cs.gz");
  if (!vocabFile.exists()) {
    throw new RuntimeException("No vocabulary file 'vocab_cs.gz' found in the BerkeleyLM directory: " + vocabFile);
  }
  lm = LmReaders.readGoogleLmBinary(berkeleyLm.getAbsolutePath(), vocabFile.getAbsolutePath());
}
 
开发者ID:languagetool-org,项目名称:languagetool,代码行数:11,代码来源:BerkeleyLanguageModel.java


示例10: readFromBinary

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public static <W> BerkeleyLM<W> readFromBinary(File file) {
	@SuppressWarnings("unchecked")
	ArrayEncodedNgramLanguageModel<W> berkeley_language_model = (ArrayEncodedNgramLanguageModel<W>) LmReaders.readLmBinary(file.getAbsolutePath());
	return new BerkeleyLM<W>(berkeley_language_model);
}
 
开发者ID:tudarmstadt-lt,项目名称:topicrawler,代码行数:6,代码来源:LanguageModelHelper.java


示例11: saveAsBinary

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public static <W> void saveAsBinary(BerkeleyLM<W> blm, File file) {
	LmReaders.writeLmBinary(blm.get(), file.getAbsolutePath());
}
 
开发者ID:tudarmstadt-lt,项目名称:topicrawler,代码行数:4,代码来源:LanguageModelHelper.java


示例12: createFileFromText

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
public static void createFileFromText() {
	// MakeKneserNeyArpaFromText.main(new String[] { "3", "lmorder3.arpa", "in/1.txt" });
	String txtfile = "src/test/resources/test.txt";
	String arpafile = "_svnignore/test.arpa.gz";
	String binfile = "_svnignore/test.blm.gz";

	// if (!new File(arpafile).exists()) {
	final StringWordIndexer wordIndexer = new StringWordIndexer();
	wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
	wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
	wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);
	ConfigOptions opts = new ConfigOptions();
	opts.kneserNeyDiscounts = new double[] { 0.75f, 0.6f, 0.6f };
	opts.kneserNeyMinCounts = new double[] { 0, 0, 0, 0, 0, 0, 0 };

	final TextReader<String> reader = new TextReader<String>(Arrays.asList(txtfile), wordIndexer);
	KneserNeyLmReaderCallback<String> kneserNeyReader = new KneserNeyLmReaderCallback<String>(wordIndexer, 3, opts);
	reader.parse(kneserNeyReader);
	// NgramLanguageModel<String> lm = kneserNeyReader;

	kneserNeyReader.parse(new KneserNeyFileWritingLmReaderCallback<String>(new File(arpafile), wordIndexer));
	//		}
	//		if (!new File(binfile).exists()) {
	//			// HASH OPT
	NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, false);
	//			// CONTEXT OPT
	//			// NgramLanguageModel<String> lm = LmReaders.readContextEncodedLmFromArpa(arpafile);
	//			// HASH COMPRESS OPT
	//			// NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, true);
	//
	//			LmReaders.writeLmBinary(lm, binfile);
	//		}
	//
	//		// NgramLanguageModel<String> lm = LmReaders.readLmBinary(binfile);
	//		NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, false);


	System.out.println(lm.getLogProb(Arrays.asList("Hallo")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "welt")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "pups")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "globus")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "erde")));

	// LmReaders.readn


}
 
开发者ID:tudarmstadt-lt,项目名称:topicrawler,代码行数:48,代码来源:BerkeleyLmPlayground.java


示例13: loadLanguageModel

import edu.berkeley.nlp.lm.io.LmReaders; //导入依赖的package包/类
/**
 * The return value should be casted in ArrayEncodedProbBackoffLm<String>
 */
@Override
public  void loadLanguageModel(String binaryPath) {
		 this.blm =(ArrayEncodedNgramLanguageModel) LmReaders.readLmBinary(binaryPath);
	}
 
开发者ID:nicolashernandez,项目名称:dev-star,代码行数:8,代码来源:BinaryBerkeleyLanguageModel.java



注:本文中的edu.berkeley.nlp.lm.io.LmReaders类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java AnnotationEntry类代码示例发布时间:2022-05-23
下一篇:
Java Video类代码示例发布时间:2022-05-23
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap