• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python gen_word2vec.skipgram函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.models.embedding.gen_word2vec.skipgram函数的典型用法代码示例。如果您正苦于以下问题:Python skipgram函数的具体用法?Python skipgram怎么用?Python skipgram使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了skipgram函数的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: build_graph

  def build_graph(self):
    """Build the graph for the full model."""
    opts = self._options
    # The training data. A text file.
    (words, counts, words_per_epoch, self._epoch, self._words, examples,
     labels) = word2vec.skipgram(filename=opts.train_data,
                                 batch_size=opts.batch_size,
                                 window_size=opts.window_size,
                                 min_count=opts.min_count,
                                 subsample=opts.subsample)
    (opts.vocab_words, opts.vocab_counts,
     opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
    opts.vocab_size = len(opts.vocab_words)
    print("Data file: ", opts.train_data)
    print("Vocab size: ", opts.vocab_size - 1, " + UNK")
    print("Words per epoch: ", opts.words_per_epoch)
    self._examples = examples
    self._labels = labels
    self._id2word = opts.vocab_words
    for i, w in enumerate(self._id2word):
      self._word2id[w] = i
    true_logits, sampled_logits = self.forward(examples, labels)
    loss = self.nce_loss(true_logits, sampled_logits)
    tf.scalar_summary("NCE loss", loss)
    self._loss = loss
    self.optimize(loss)

    # Properly initialize all variables.
    tf.initialize_all_variables().run()

    self.saver = tf.train.Saver()
开发者ID:debaratidas1994,项目名称:tensorflow,代码行数:31,代码来源:word2vec.py


示例2: build_graph

  def build_graph(self):
    #get the training data
    (words, counts, words_per_epoch, current_epoch, total_words_processed,
     examples, labels) = word2vec.skipgram(filename=word_config.train_data_path,
                                           batch_size=word_config.batch_size,
                                           window_size=word_config.window_size,
                                           min_count=word_config.min_count,
                                           subsample=word_config.subsample)
    # vocab_words, vocab_counts, words_per_epoch = self._sess.run([words, counts, words_per_epoch])
    if tf.gfile.Exists(os.path.join(word_config.output_dir, 'vocab.txt')):
      vocab_words, vocab_counts = self.load_vocab()
    else:
      vocab_words, vocab_counts = self._sess.run([words, counts])

    vocab_size = len(vocab_words)
    print("Data file: ", word_config.train_data_path)
    print("Vocab size: ", vocab_size - 1, " + UNK")
    # print("Words per epoch: ", words_per_epoch)

    self._id2word = vocab_words
    for id, word in enumerate(self._id2word):
      self._word2id[word] = id

    w_embed_in = tf.Variable(tf.random_uniform([vocab_size, word_config.embed_size],
                                                -0.5 / word_config.embed_size, 0.5 / word_config.embed_size),
                             name="w_embed_in")
    w_embed_out = tf.Variable(tf.zeros([vocab_size, word_config.embed_size]), name="w_embed_out")

    self.param_summary(w_embed_in)
    self.param_summary(w_embed_out)

    # learning_rate = tf.Variable(word_config.learning_rate, trainable=False, name="learning_rate")

    global_step = tf.Variable(0, trainable=False, name="global_step")

    total_words = words_per_epoch * word_config.max_steps

    learning_rate = word_config.learning_rate * tf.maximum(0.0001, tf.cast(1 - total_words_processed / total_words, tf.float32))

    inc = global_step.assign_add(1)
    with tf.control_dependencies([inc]):
      train = word2vec.neg_train(w_embed_in, w_embed_out, examples, labels, learning_rate, vocab_counts.tolist(),
                                 word_config.nr_neg_samples)

    self._vocab_words = vocab_words
    self._vocab_counts = vocab_counts
    self._vocab_size = vocab_size
    self._w_embed_in = w_embed_in
    self._w_embed_out = w_embed_out
    self._train = train
    self._examples = examples
    self._labels = labels
    self._global_step = global_step
    self._current_epoch = current_epoch
    self._total_words_processed = total_words_processed
    self._learning_rate = learning_rate
    print("end of build graph")
开发者ID:ioanachelu,项目名称:word2vec,代码行数:57,代码来源:word2vec.py


示例3: build_graph

  def build_graph(self):
	"""Build the model graph."""
	opts = self._options

	# The training data. A text file.
	(words, counts, words_per_epoch, current_epoch, total_words_processed,
	 examples, labels) = word2vec.skipgram(filename=opts.train_data,
										   batch_size=opts.batch_size,
										   window_size=opts.window_size,
										   min_count=opts.min_count,
										   subsample=opts.subsample)
			
	(opts.vocab_words, opts.vocab_counts, opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
	opts.vocab_size = len(opts.vocab_words)
	print("Data file: ", opts.train_data)
	print("Vocab size: ", opts.vocab_size - 1, " + UNK")
	print("Words per epoch: ", opts.words_per_epoch)

	self._id2word = opts.vocab_words
	for i, w in enumerate(self._id2word):
	  self._word2id[w] = i

	# Declare all variables we need.
	# Input words embedding: [vocab_size, emb_dim]
	w_in = tf.Variable(
		tf.random_uniform([opts.vocab_size,opts.emb_dim], -0.5 / opts.emb_dim, 0.5 / opts.emb_dim), name="w_in")

	# Global step: scalar, i.e., shape [].
	w_out = tf.Variable(tf.zeros([opts.vocab_size, opts.emb_dim]), name="w_out")

	# Global step: []
	global_step = tf.Variable(0, name="global_step")

	# Linear learning rate decay.
	words_to_train = float(opts.words_per_epoch * opts.epochs_to_train)
	lr = opts.learning_rate * tf.maximum(0.0001, 1.0 - tf.cast(total_words_processed, tf.float32) / words_to_train)

	# Training nodes.
	inc = global_step.assign_add(1)
	with tf.control_dependencies([inc]):
	  train = word2vec.neg_train(w_in,
								 w_out,
								 examples,
								 labels,
								 lr,
								 vocab_count=opts.vocab_counts.tolist(),
								 num_negative_samples=opts.num_samples)

	self._w_in = w_in
	self._examples = examples
	self._labels = labels
	self._lr = lr
	self._train = train
	self.step = global_step
	self._epoch = current_epoch
	self._words = total_words_processed
开发者ID:ngthuydiem,项目名称:skytutor,代码行数:56,代码来源:train_with_tensorflow.py


示例4: build_graph

 def build_graph(self):
     opts = self._options
     (words, counts, words_per_epoch, self._epoch, self._words, examples,
     labels) = word2vec.skipgram(filename="text8",
                       batch_size=opt.batch_size,
                       window_size=opt.window_size,
                       min_count=opt.min_count,
                       subsample=0)
     (opts.vocab_words, opts.vocab_counts,
      opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
     opts.vocab_size = len(opts.vocab_words)
     print("Data file: ", opts.train_data)
     print("Vocab size: ", opts.vocab_size - 1, " + UNK")
     print("Words per epoch: ", opts.words_per_epoch)
     self._examples = examples
     self._labels = labels
     self._id2word = opts.vocab_words
     for i, w in enumerate(self._id2word):
         self._word2id[w] = i
     true_logits, sampled_logits = self.forward(examples, labels)
     loss = self.nce_loss(true_logits, sampled_logits)
     tf.scalar_summary("NCE loss", loss)
     self._loss = loss
     self.optimize(loss)
开发者ID:kingtaurus,项目名称:cs224d,代码行数:24,代码来源:tensorflow_word2vec.py


示例5: build_graph

  def build_graph(self):
    """Build the graph for the full model."""
    opts = self._options
    # The training data. A text file.
    (words, counts, words_per_epoch, self._epoch, self._words, examples,
     labels) = word2vec.skipgram(filename=opts.train_data,
                                 batch_size=opts.batch_size,
                                 window_size=opts.window_size,
                                 min_count=opts.min_count,
                                 subsample=opts.subsample)

    ###NEW: read sampling corpus (=all files in same dir as train_data except for training data)
    full_path = os.path.realpath(opts.train_data)
    path, filename = os.path.split(full_path)
    sampling_files = []
    for file in os.listdir(path):
        if file.endswith(".txt") or file.endswith(".tok") and file != filename:
            sampling_files.append(path+"/"+file)
    print("Files for sampling: ", ", ".join(sampling_files))

    #write new file as concat of all sampling files
    sample_data = opts.train_data+".sample"
    sample_train_data = sample_data+".train"
    o = codecs.open(sample_data, "w", "utf8")
    oo = codecs.open(sample_train_data, "w", "utf8")
    for sampling_file in sampling_files:
        f = open(sampling_file,"r")
        t = f.read()
        o.write(t.decode("utf8")+" ") #concat all files
        oo.write(t.decode("utf8")+" ")
        f.close()
    o.close()
    t = codecs.open(opts.train_data, "r", "utf8")
    oo.write(t.read().decode("utf8"))
    t.close()
    oo.close()

    # The sampling data. A text file.
    (words_samples, counts_samples, words_per_epoch_samples, b_epoch_samples, b_words_samples, examples_samples,
     labels_samples) = word2vec.skipgram(filename=sample_data,
                                 batch_size=opts.batch_size,
                                 window_size=opts.window_size,
                                 min_count=opts.min_count,
                                 subsample=opts.subsample)

    #Sampling plus training data for getting full vocabulary for embeddings
    (words_samples_train, counts_samples_train, words_per_epoch_samples_train, b_epoch_samples_train, b_words_samples_train, examples_samples_train,
     labels_samples_train) = word2vec.skipgram(filename=sample_train_data,
                                 batch_size=opts.batch_size,
                                 window_size=opts.window_size,
                                 min_count=opts.min_count,
                                 subsample=opts.subsample)

    (opts.all_words, opts.all_counts,
     all_words_per_epoch) = self._session.run([words_samples_train, counts_samples_train, words_per_epoch])

    (opts.sample_words, opts.sample_counts,
     sample_words_per_epoch) = self._session.run([words_samples, counts_samples, words_per_epoch])

    #first add sample words
    for s in opts.sample_words:
        last_index = len(self._word2id)
        self._word2id.setdefault(s,last_index)

    (opts.vocab_words, opts.vocab_counts,
     opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])

    #then add training words
    for v in opts.vocab_words:
        last_index = len(self._word2id)
        self._word2id.setdefault(v,last_index)

    print("Word2id: ", self._word2id)

    opts.vocab_size = len(self._word2id) #NOTE: wc20(train)+wc(sample) != wc20(train+sample) -> therefore use word2id (proper union)
    print("Sample file: ", sample_data)
    print("Data file: ", opts.train_data)


    print("Vocab size: ", opts.vocab_size - 1, " + UNK")
    print("Words per epoch: ", opts.words_per_epoch)
    self._examples = examples_samples
    self._labels = labels_samples
    #self._id2word = opts.all_words
    #for i, w in enumerate(self._id2word):
    for (w,i) in self._word2id.iteritems():
      self._id2word[i] = w

    print("id2word: ", self._id2word)

    true_logits, sampled_logits = self.forward(examples_samples, labels_samples)
    loss = self.nce_loss(true_logits, sampled_logits)
    tf.scalar_summary("NCE loss", loss)
    self._loss = loss
    self.optimize(loss)

    # Properly initialize all variables.
    tf.initialize_all_variables().run()

    self.saver = tf.train.Saver()
开发者ID:juliakreutzer,项目名称:loons,代码行数:100,代码来源:loons.py



注:本文中的tensorflow.models.embedding.gen_word2vec.skipgram函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python cifar10.distorted_inputs函数代码示例发布时间:2022-05-27
下一篇:
Python interpreter.Interpreter类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap