本文整理汇总了Python中tensorflow.models.embedding.gen_word2vec.skipgram函数的典型用法代码示例。如果您正苦于以下问题:Python skipgram函数的具体用法?Python skipgram怎么用?Python skipgram使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了skipgram函数的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: build_graph
def build_graph(self):
"""Build the graph for the full model."""
opts = self._options
# The training data. A text file.
(words, counts, words_per_epoch, self._epoch, self._words, examples,
labels) = word2vec.skipgram(filename=opts.train_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
(opts.vocab_words, opts.vocab_counts,
opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
opts.vocab_size = len(opts.vocab_words)
print("Data file: ", opts.train_data)
print("Vocab size: ", opts.vocab_size - 1, " + UNK")
print("Words per epoch: ", opts.words_per_epoch)
self._examples = examples
self._labels = labels
self._id2word = opts.vocab_words
for i, w in enumerate(self._id2word):
self._word2id[w] = i
true_logits, sampled_logits = self.forward(examples, labels)
loss = self.nce_loss(true_logits, sampled_logits)
tf.scalar_summary("NCE loss", loss)
self._loss = loss
self.optimize(loss)
# Properly initialize all variables.
tf.initialize_all_variables().run()
self.saver = tf.train.Saver()
开发者ID:debaratidas1994,项目名称:tensorflow,代码行数:31,代码来源:word2vec.py
示例2: build_graph
def build_graph(self):
#get the training data
(words, counts, words_per_epoch, current_epoch, total_words_processed,
examples, labels) = word2vec.skipgram(filename=word_config.train_data_path,
batch_size=word_config.batch_size,
window_size=word_config.window_size,
min_count=word_config.min_count,
subsample=word_config.subsample)
# vocab_words, vocab_counts, words_per_epoch = self._sess.run([words, counts, words_per_epoch])
if tf.gfile.Exists(os.path.join(word_config.output_dir, 'vocab.txt')):
vocab_words, vocab_counts = self.load_vocab()
else:
vocab_words, vocab_counts = self._sess.run([words, counts])
vocab_size = len(vocab_words)
print("Data file: ", word_config.train_data_path)
print("Vocab size: ", vocab_size - 1, " + UNK")
# print("Words per epoch: ", words_per_epoch)
self._id2word = vocab_words
for id, word in enumerate(self._id2word):
self._word2id[word] = id
w_embed_in = tf.Variable(tf.random_uniform([vocab_size, word_config.embed_size],
-0.5 / word_config.embed_size, 0.5 / word_config.embed_size),
name="w_embed_in")
w_embed_out = tf.Variable(tf.zeros([vocab_size, word_config.embed_size]), name="w_embed_out")
self.param_summary(w_embed_in)
self.param_summary(w_embed_out)
# learning_rate = tf.Variable(word_config.learning_rate, trainable=False, name="learning_rate")
global_step = tf.Variable(0, trainable=False, name="global_step")
total_words = words_per_epoch * word_config.max_steps
learning_rate = word_config.learning_rate * tf.maximum(0.0001, tf.cast(1 - total_words_processed / total_words, tf.float32))
inc = global_step.assign_add(1)
with tf.control_dependencies([inc]):
train = word2vec.neg_train(w_embed_in, w_embed_out, examples, labels, learning_rate, vocab_counts.tolist(),
word_config.nr_neg_samples)
self._vocab_words = vocab_words
self._vocab_counts = vocab_counts
self._vocab_size = vocab_size
self._w_embed_in = w_embed_in
self._w_embed_out = w_embed_out
self._train = train
self._examples = examples
self._labels = labels
self._global_step = global_step
self._current_epoch = current_epoch
self._total_words_processed = total_words_processed
self._learning_rate = learning_rate
print("end of build graph")
开发者ID:ioanachelu,项目名称:word2vec,代码行数:57,代码来源:word2vec.py
示例3: build_graph
def build_graph(self):
"""Build the model graph."""
opts = self._options
# The training data. A text file.
(words, counts, words_per_epoch, current_epoch, total_words_processed,
examples, labels) = word2vec.skipgram(filename=opts.train_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
(opts.vocab_words, opts.vocab_counts, opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
opts.vocab_size = len(opts.vocab_words)
print("Data file: ", opts.train_data)
print("Vocab size: ", opts.vocab_size - 1, " + UNK")
print("Words per epoch: ", opts.words_per_epoch)
self._id2word = opts.vocab_words
for i, w in enumerate(self._id2word):
self._word2id[w] = i
# Declare all variables we need.
# Input words embedding: [vocab_size, emb_dim]
w_in = tf.Variable(
tf.random_uniform([opts.vocab_size,opts.emb_dim], -0.5 / opts.emb_dim, 0.5 / opts.emb_dim), name="w_in")
# Global step: scalar, i.e., shape [].
w_out = tf.Variable(tf.zeros([opts.vocab_size, opts.emb_dim]), name="w_out")
# Global step: []
global_step = tf.Variable(0, name="global_step")
# Linear learning rate decay.
words_to_train = float(opts.words_per_epoch * opts.epochs_to_train)
lr = opts.learning_rate * tf.maximum(0.0001, 1.0 - tf.cast(total_words_processed, tf.float32) / words_to_train)
# Training nodes.
inc = global_step.assign_add(1)
with tf.control_dependencies([inc]):
train = word2vec.neg_train(w_in,
w_out,
examples,
labels,
lr,
vocab_count=opts.vocab_counts.tolist(),
num_negative_samples=opts.num_samples)
self._w_in = w_in
self._examples = examples
self._labels = labels
self._lr = lr
self._train = train
self.step = global_step
self._epoch = current_epoch
self._words = total_words_processed
开发者ID:ngthuydiem,项目名称:skytutor,代码行数:56,代码来源:train_with_tensorflow.py
示例4: build_graph
def build_graph(self):
opts = self._options
(words, counts, words_per_epoch, self._epoch, self._words, examples,
labels) = word2vec.skipgram(filename="text8",
batch_size=opt.batch_size,
window_size=opt.window_size,
min_count=opt.min_count,
subsample=0)
(opts.vocab_words, opts.vocab_counts,
opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
opts.vocab_size = len(opts.vocab_words)
print("Data file: ", opts.train_data)
print("Vocab size: ", opts.vocab_size - 1, " + UNK")
print("Words per epoch: ", opts.words_per_epoch)
self._examples = examples
self._labels = labels
self._id2word = opts.vocab_words
for i, w in enumerate(self._id2word):
self._word2id[w] = i
true_logits, sampled_logits = self.forward(examples, labels)
loss = self.nce_loss(true_logits, sampled_logits)
tf.scalar_summary("NCE loss", loss)
self._loss = loss
self.optimize(loss)
开发者ID:kingtaurus,项目名称:cs224d,代码行数:24,代码来源:tensorflow_word2vec.py
示例5: build_graph
def build_graph(self):
"""Build the graph for the full model."""
opts = self._options
# The training data. A text file.
(words, counts, words_per_epoch, self._epoch, self._words, examples,
labels) = word2vec.skipgram(filename=opts.train_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
###NEW: read sampling corpus (=all files in same dir as train_data except for training data)
full_path = os.path.realpath(opts.train_data)
path, filename = os.path.split(full_path)
sampling_files = []
for file in os.listdir(path):
if file.endswith(".txt") or file.endswith(".tok") and file != filename:
sampling_files.append(path+"/"+file)
print("Files for sampling: ", ", ".join(sampling_files))
#write new file as concat of all sampling files
sample_data = opts.train_data+".sample"
sample_train_data = sample_data+".train"
o = codecs.open(sample_data, "w", "utf8")
oo = codecs.open(sample_train_data, "w", "utf8")
for sampling_file in sampling_files:
f = open(sampling_file,"r")
t = f.read()
o.write(t.decode("utf8")+" ") #concat all files
oo.write(t.decode("utf8")+" ")
f.close()
o.close()
t = codecs.open(opts.train_data, "r", "utf8")
oo.write(t.read().decode("utf8"))
t.close()
oo.close()
# The sampling data. A text file.
(words_samples, counts_samples, words_per_epoch_samples, b_epoch_samples, b_words_samples, examples_samples,
labels_samples) = word2vec.skipgram(filename=sample_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
#Sampling plus training data for getting full vocabulary for embeddings
(words_samples_train, counts_samples_train, words_per_epoch_samples_train, b_epoch_samples_train, b_words_samples_train, examples_samples_train,
labels_samples_train) = word2vec.skipgram(filename=sample_train_data,
batch_size=opts.batch_size,
window_size=opts.window_size,
min_count=opts.min_count,
subsample=opts.subsample)
(opts.all_words, opts.all_counts,
all_words_per_epoch) = self._session.run([words_samples_train, counts_samples_train, words_per_epoch])
(opts.sample_words, opts.sample_counts,
sample_words_per_epoch) = self._session.run([words_samples, counts_samples, words_per_epoch])
#first add sample words
for s in opts.sample_words:
last_index = len(self._word2id)
self._word2id.setdefault(s,last_index)
(opts.vocab_words, opts.vocab_counts,
opts.words_per_epoch) = self._session.run([words, counts, words_per_epoch])
#then add training words
for v in opts.vocab_words:
last_index = len(self._word2id)
self._word2id.setdefault(v,last_index)
print("Word2id: ", self._word2id)
opts.vocab_size = len(self._word2id) #NOTE: wc20(train)+wc(sample) != wc20(train+sample) -> therefore use word2id (proper union)
print("Sample file: ", sample_data)
print("Data file: ", opts.train_data)
print("Vocab size: ", opts.vocab_size - 1, " + UNK")
print("Words per epoch: ", opts.words_per_epoch)
self._examples = examples_samples
self._labels = labels_samples
#self._id2word = opts.all_words
#for i, w in enumerate(self._id2word):
for (w,i) in self._word2id.iteritems():
self._id2word[i] = w
print("id2word: ", self._id2word)
true_logits, sampled_logits = self.forward(examples_samples, labels_samples)
loss = self.nce_loss(true_logits, sampled_logits)
tf.scalar_summary("NCE loss", loss)
self._loss = loss
self.optimize(loss)
# Properly initialize all variables.
tf.initialize_all_variables().run()
self.saver = tf.train.Saver()
开发者ID:juliakreutzer,项目名称:loons,代码行数:100,代码来源:loons.py
注:本文中的tensorflow.models.embedding.gen_word2vec.skipgram函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论