• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python utils.TextLoader类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.TextLoader的典型用法代码示例。如果您正苦于以下问题:Python TextLoader类的具体用法?Python TextLoader怎么用?Python TextLoader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了TextLoader类的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test

def test(test_args):
    start = time.time()
    with open(os.path.join(test_args.save_dir, 'config.pkl')) as f:
        args = cPickle.load(f)
    data_loader = TextLoader(args, train=False)
    test_data = data_loader.read_dataset(test_args.test_file)

    args.word_vocab_size = data_loader.word_vocab_size
    print "Word vocab size: " + str(data_loader.word_vocab_size) + "\n"

    # Model
    lm_model = WordLM

    print "Begin testing..."
    # If using gpu:
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    # gpu_config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
    # add parameters to the tf session -> tf.Session(config=gpu_config)
    with tf.Graph().as_default(), tf.Session() as sess:
        initializer = tf.random_uniform_initializer(-args.init_scale, args.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            mtest = lm_model(args, is_training=False, is_testing=True)

        # save only the last model
        saver = tf.train.Saver(tf.all_variables())
        tf.initialize_all_variables().run()
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

        test_perplexity = run_epoch(sess, mtest, test_data, data_loader, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
        print("Test time: %.0f" % (time.time() - start))
开发者ID:agora-at-uoe,项目名称:tf-tutorial,代码行数:33,代码来源:test.py


示例2: train

def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
开发者ID:nakosung,项目名称:char-rnn-tensorflow,代码行数:32,代码来源:train.py


示例3: train

def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
开发者ID:owen-d,项目名称:tensorflow_practice,代码行数:59,代码来源:train.py


示例4: train

def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        train_loss_iterations = {'iteration': [], 'epoch': [], 'train_loss': [], 'val_loss': []}

        for e in xrange(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                batch_idx = e * data_loader.num_batches + b
                print "{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(batch_idx,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start)
                train_loss_iterations['iteration'].append(batch_idx)
                train_loss_iterations['epoch'].append(e)
                train_loss_iterations['train_loss'].append(train_loss)

                if batch_idx % args.save_every == 0:

                    # evaluate
                    state_val = model.initial_state.eval()
                    avg_val_loss = 0
                    for x_val, y_val in data_loader.val_batches:
                        feed_val = {model.input_data: x_val, model.targets: y_val, model.initial_state: state_val}
                        val_loss, state_val, _ = sess.run([model.cost, model.final_state, model.train_op], feed_val)
                        avg_val_loss += val_loss / len(data_loader.val_batches)
                    print 'val_loss: {:.3f}'.format(avg_val_loss)
                    train_loss_iterations['val_loss'].append(avg_val_loss)

                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print "model saved to {}".format(checkpoint_path)
                else:
                    train_loss_iterations['val_loss'].append(None)

            pd.DataFrame(data=train_loss_iterations,
                         columns=train_loss_iterations.keys()).to_csv(os.path.join(args.save_dir, 'log.csv'))
开发者ID:gfortaine,项目名称:grid-lstm-tensorflow,代码行数:55,代码来源:train.py


示例5: TestUtilsMethods

class TestUtilsMethods(unittest.TestCase):
    def setUp(self):
        self.data_loader = TextLoader("tests/test_data", batch_size=2, seq_length=5)

    def test_init(self):
      print (self.data_loader.vocab)
      print (self.data_loader.tensor)
      print (self.data_loader.vocab_size)

    def test_build_vocab(self):
        sentences = ["I", "love", "cat", "cat"]
        vocab, vocab_inv = self.data_loader.build_vocab(sentences)
        print (vocab, vocab_inv)

        # Must include I, love, and cat
        self.assertItemsEqual(vocab, ["I", "love", "cat"])
        self.assertDictEqual(vocab, {'I': 0, 'love': 2, 'cat': 1})

        self.assertItemsEqual(vocab_inv, ["I", "love", "cat"])

    def test_batch_vocab(self):
        print (np.array(self.data_loader.x_batches).shape)
        self.assertItemsEqual(self.data_loader.x_batches[0][0][1:],
                              self.data_loader.y_batches[0][0][:-1])
        self.assertItemsEqual(self.data_loader.x_batches[0][1][1:],
                              self.data_loader.y_batches[0][1][:-1])
开发者ID:OuYag,项目名称:word-rnn-tensorflow,代码行数:26,代码来源:test_utils.py


示例6: train

def train(args):
    # Load data
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    # Set vocabulary size
    args.vocab_size = data_loader.vocab_size

    # Create the save directory if it does not exist
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # Save the configuration and the vocab, used to reload models when sampling
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Create models with arguments
    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}"
                      .format(e * data_loader.num_batches + b,
                              args.num_epochs * data_loader.num_batches,
                              e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'models.ckpt')
                    saver.save(sess, checkpoint_path, global_step=e * data_loader.num_batches + b)
                    print("models saved to {}".format(checkpoint_path))
        # Save the final state
        saver.save(sess, os.path.join(args.save_dir, 'models.ckpt'),
                   global_step=args.num_epochs * data_loader.num_batches)
开发者ID:Zbot21,项目名称:char-rnn-tensorflow,代码行数:43,代码来源:train.py


示例7: train

def train(args):
    print(args)
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = Model(args)

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            #print("model learning rate is {}".format(model.lr.eval()))
            data_loader.reset_batch_pointer('train')

            state = model.initial_state.eval()
            for b in xrange(data_loader.ntrain):
                start = time.time()
                x, y = data_loader.next_batch('train')

                # tmp = ''
                # for c in x:
                #   for i in c:
                #     tmp += np.array(data_loader.chars)[i]
                # print(tmp)

                feed = {model.input_data: x, model.targets: y, model.initial_state: state}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.ntrain + b,
                            args.num_epochs * data_loader.ntrain,
                            e, train_loss, end - start))
                if (e * data_loader.ntrain + b) % args.save_every == 0:
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.ntrain + b)
                    print("model saved to {}".format(checkpoint_path))


            # eval validation loss
            data_loader.reset_batch_pointer('validation')
            validation_state = model.initial_state.eval()
            val_losses = 0
            for n in xrange(data_loader.nvalidation):
                x, y = data_loader.next_batch('validation')
                feed = {model.input_data: x, model.targets: y, model.initial_state: validation_state}
                validation_loss, validation_state = sess.run([model.cost, model.final_state], feed)
                val_losses += validation_loss

            validation_loss = val_losses / data_loader.nvalidation
            print("validation loss is {}".format(validation_loss))
开发者ID:jiongye,项目名称:char-rnn-tensorflow,代码行数:56,代码来源:train.py


示例8: train

def train(args):

    data_loader = TextLoader(args.data_path, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    args.file_size = data_loader.file_size
    print("Vocab size: ",args.vocab_size)
    print("File size: ",args.file_size)
    args.lower_bound = 0 #If we know the entropy then we set it to this
    data_info = {}
    if args.info_path is not None:
        assert os.path.isfile(args.info_path),"Info file not found in the path: %s"%args.info_path

        #Open the info file
        with open(args.info_path, 'rb') as f:
            data_info = json.load(f)
            #Assuming we know entropy
            args.lower_bound = data_info['Entropy']
            print(data_info)

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl'), 'rb') as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagree on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"
        
    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)
        
    
    ##################################################
    # Get the model
    ##################################################
    model = Model(args)
    print("model Loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        writer = tf.summary.FileWriter(args.summary_dir,sess.graph)
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        
        ######################################################
        # Perform the training
        #####################################################
        for e in range(args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer() #Need to check what this does
            state = sess.run(model.initial_state) #What is this initial state
            cumul_loss = 0
             
            for b in range(data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y}
                
                for i, (c, h) in enumerate(model.initial_state):
                    feed[c] = state[i].c
                    feed[h] = state[i].h
                summary, train_loss, state, _ = sess.run([model.merged_summaries, model.cost, model.final_state, model.train_op], feed) #what is the training loss
                train_loss /= np.log(2)
                cumul_loss += train_loss
                end = time.time()
                print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                    .format(e * data_loader.num_batches + b,
                            args.num_epochs * data_loader.num_batches,
                            e, train_loss, end - start))
                if (e * data_loader.num_batches + b) % args.save_every == 0\
                    or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))

                if b%10 == 0:
                    writer.add_summary(summary,e*data_loader.num_batches + b)
             
            cumul_loss /= data_loader.num_batches
            print("Epoch {}: Cumulative Loss for the epoch: {:.3f}".format(e,cumul_loss))
            if (abs(cumul_loss - args.lower_bound) < 0.1):
                print("Stopping Training as we get a good loss.. :) ... ") 
#.........这里部分代码省略.........
开发者ID:jessehui,项目名称:NN_compression,代码行数:101,代码来源:train.py


示例9: main

def main(_):
  pp.pprint(FLAGS.__flags)

  if not os.path.exists(FLAGS.checkpoint_dir):
    print(" [*] Creating checkpoint directory...")
    os.makedirs(FLAGS.checkpoint_dir)

  data_loader = TextLoader(os.path.join(FLAGS.data_dir, FLAGS.dataset_name),
                           FLAGS.batch_size, FLAGS.seq_length)
  vocab_size = data_loader.vocab_size
  valid_size = 50
  valid_window = 100

  with tf.variable_scope('model'):
    train_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    simple_model = CharRNN(vocab_size, 1, FLAGS.rnn_size,
                           FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                           1, FLAGS.keep_prob,
                           FLAGS.grad_clip)

  with tf.variable_scope('model', reuse=True):
    valid_model = CharRNN(vocab_size, FLAGS.batch_size, FLAGS.rnn_size,
                          FLAGS.layer_depth, FLAGS.num_units, FLAGS.rnn_type,
                          FLAGS.seq_length, FLAGS.keep_prob,
                          FLAGS.grad_clip)

  with tf.Session() as sess:
    tf.global_variables_initializer().run()

    train_model.load(sess, FLAGS.checkpoint_dir, FLAGS.dataset_name)

    best_val_pp = float('inf')
    best_val_epoch = 0
    valid_loss = 0
    valid_perplexity = 0
    start = time.time()

    if FLAGS.export:
      print("Eval...")
      final_embeddings = train_model.embedding.eval(sess)
      emb_file = os.path.join(FLAGS.data_dir, FLAGS.dataset_name, 'emb.npy')
      print("Embedding shape: {}".format(final_embeddings.shape))
      np.save(emb_file, final_embeddings)

    else: # Train
      current_step = 0
      similarity, valid_examples, _ = compute_similarity(train_model, valid_size, valid_window, 6)

      # save hyper-parameters
      cPickle.dump(FLAGS.__flags, open(FLAGS.log_dir + "/hyperparams.pkl", 'wb'))

      # run it!
      for e in range(FLAGS.num_epochs):
        data_loader.reset_batch_pointer()

        # decay learning rate
        sess.run(tf.assign(train_model.lr, FLAGS.learning_rate))

        # iterate by batch
        for b in range(data_loader.num_batches):
          x, y = data_loader.next_batch()
          res, time_batch = run_epochs(sess, x, y, train_model)
          train_loss = res["loss"]
          train_perplexity = np.exp(train_loss)
          iterate = e * data_loader.num_batches + b

          # print log
          print("{}/{} (epoch {}) loss = {:.2f}({:.2f}) perplexity(train/valid) = {:.2f}({:.2f}) time/batch = {:.2f} chars/sec = {:.2f}k"\
              .format(e * data_loader.num_batches + b,
                      FLAGS.num_epochs * data_loader.num_batches,
                      e, train_loss, valid_loss, train_perplexity, valid_perplexity,
                      time_batch, (FLAGS.batch_size * FLAGS.seq_length) / time_batch / 1000))

          current_step = tf.train.global_step(sess, train_model.global_step)

        # validate
        valid_loss = 0

        for vb in range(data_loader.num_valid_batches):
          res, valid_time_batch = run_epochs(sess, data_loader.x_valid[vb], data_loader.y_valid[vb], valid_model, False)
          valid_loss += res["loss"]

        valid_loss = valid_loss / data_loader.num_valid_batches
        valid_perplexity = np.exp(valid_loss)

        print("### valid_perplexity = {:.2f}, time/batch = {:.2f}".format(valid_perplexity, valid_time_batch))

        log_str = ""

        # Generate sample
        smp1 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"我喜歡做")
        smp2 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"他吃飯時會用")
        smp3 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"人類總要重複同樣的")
        smp4 = simple_model.sample(sess, data_loader.chars, data_loader.vocab, UNK_ID, 5, u"天色暗了,好像快要")

#.........这里部分代码省略.........
开发者ID:indiejoseph,项目名称:chinese-char-rnn,代码行数:101,代码来源:train.py


示例10: train

def train(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.input_encoding)
    args.vocab_size = data_loader.vocab_size

    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(args.init_from)," %s must be a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"words_vocab.pkl")),"words_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme

        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f:
            saved_words, saved_vocab = cPickle.load(f)
        assert saved_words==data_loader.words, "Data and loaded model disagree on word set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagree on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.words, data_loader.vocab), f)

    model = Model(args)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(args.log_dir)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_mem)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        train_writer.add_graph(sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # restore model
        if args.init_from is not None:
            saver.restore(sess, ckpt.model_checkpoint_path)
        for e in range(model.epoch_pointer.eval(), args.num_epochs):
            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
            data_loader.reset_batch_pointer()
            state = sess.run(model.initial_state)
            speed = 0
            if args.init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)
            if args.init_from is not None:
                data_loader.pointer = model.batch_pointer.eval()
                args.init_from = None
            for b in range(data_loader.pointer, data_loader.num_batches):
                start = time.time()
                x, y = data_loader.next_batch()
                feed = {model.input_data: x, model.targets: y, model.initial_state: state,
                        model.batch_time: speed}
                summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state,
                                                             model.train_op, model.inc_batch_pointer_op], feed)
                train_writer.add_summary(summary, e * data_loader.num_batches + b)
                speed = time.time() - start
                if (e * data_loader.num_batches + b) % args.batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches,
                                e, train_loss, speed))
                if (e * data_loader.num_batches + b) % args.save_every == 0 \
                        or (e==args.num_epochs-1 and b == data_loader.num_batches-1): # save for the last result
                    checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step = e * data_loader.num_batches + b)
                    print("model saved to {}".format(checkpoint_path))
        train_writer.close()
开发者ID:Sr-vZ,项目名称:word-rnn-tensorflow,代码行数:76,代码来源:train.py


示例11: train2

def train2(args):
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length, args.reprocess)
    args.vocab_size = data_loader.vocab_size

    totalTask = args.num_epochs * data_loader.num_batches

    lastCheckpoint = tf.train.latest_checkpoint(args.save_dir) 
    if lastCheckpoint is None:
        startEpoch = 0
    else:
        print "Last checkpoint :", lastCheckpoint
        startEpoch = int(lastCheckpoint.split("-")[-1])

    print "startEpoch = ", startEpoch

    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    model = ConstrainedModel(args)

    etaCount = 0
    etaString = "-" 
    etaStart = time.time()
    etaTime = 0

    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())
        if startEpoch > 0: # load latest checkpoint
            print "Loading last checkpoint"
            saver.restore(sess, lastCheckpoint)

        for e in xrange(startEpoch, args.num_epochs):
            sess.run(tf.assign(model.lr, decayForEpoch(args, e)))
            data_loader.reset_batch_pointer()
            state = model.initial_state.eval()
            for b in xrange(data_loader.num_batches):
                start = time.time()
                x, y, con = data_loader.next_batch()

                feed = {model.input_data: x, model.targets: y, model.initial_state: state, model.con_data:con}
                train_loss, state, _ = sess.run([model.cost, model.final_state, model.train_op], feed)
                #time.sleep(0.01)
                #train_loss = 5
                end = time.time()

                taskNum = (e * data_loader.num_batches + b)
                etaCount += 1
                if (etaCount) % 25 == 0:
                    duration = time.time() - etaStart
                    etaTime = (totalTask - (taskNum + 1)) / 25 * duration
                    m, s = divmod(etaTime, 60)
                    h, m = divmod(m, 60)
                    etaString = "%d:%02d:%02d" % (h, m, s)
                    etaStart = time.time()

                print "{}/{} (epoch {}), loss = {:.3f}, time/batch = {:.3f}, ETA: {} ({})" \
                    .format(taskNum, totalTask, e, train_loss, end - start, time.ctime(time.time()+etaTime), etaString)

            if (e + 1) % args.save_every == 0 or e == args.num_epochs - 1:
                checkpoint_path = os.path.join(args.save_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step = e + 1)
                print "model saved to {}".format(checkpoint_path)
开发者ID:supasorn,项目名称:constrained-char-rnn,代码行数:65,代码来源:train.py


示例12: train

def train(args):
    print("training on \'"+args.data_dir+"\'")
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size
    
    # check compatibility if training is continued from previously saved model
    if args.init_from is not None:
        print("RELOADING FROM CHECKPOING")
        # check if all necessary files exist 
        assert os.path.isdir(args.init_from)," %s must be a a path" % args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"config.pkl")),"config.pkl file does not exist in path %s"%args.init_from
        assert os.path.isfile(os.path.join(args.init_from,"chars_vocab.pkl")),"chars_vocab.pkl.pkl file does not exist in path %s" % args.init_from
        ckpt = tf.train.get_checkpoint_state(args.init_from)
        assert ckpt,"No checkpoint found"
        assert ckpt.model_checkpoint_path,"No model path found in checkpoint"

        # open old config and check if models are compatible
        with open(os.path.join(args.init_from, 'config.pkl')) as f:
            saved_model_args = cPickle.load(f)
        need_be_same=["model","rnn_size","num_layers","seq_length"]
        for checkme in need_be_same:
            assert vars(saved_model_args)[checkme]==vars(args)[checkme],"Command line argument and saved model disagree on '%s' "%checkme
        
        # open saved vocab/dict and check if vocabs/dicts are compatible
        with open(os.path.join(args.init_from, 'chars_vocab.pkl')) as f:
            saved_chars, saved_vocab = cPickle.load(f)
        assert saved_chars==data_loader.chars, "Data and loaded model disagreee on character set!"
        assert saved_vocab==data_loader.vocab, "Data and loaded model disagreee on dictionary mappings!"

    with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f:
        cPickle.dump(args, f)
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'wb') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    print("====================================")
    printargs(args)
    print("====================================")
    model = Model(args)

    def validateonce(expectationdropout=True, TrueIfVal_FalseIfTrain=True):
        data_loader.reset_batch_pointers()
        model.resetweights(expectationdropout=expectationdropout)
        state = model.resetstate()
        start = time.time()
        losses = []
        backupptrtr = data_loader.pointer_tr
        entrps = None
        truths = None
        allprobs = None
        for b in range(data_loader.num_batches_te):
            if TrueIfVal_FalseIfTrain:
                x, y = data_loader.next_batch_te()
            else:
                x, y = data_loader.next_batch_tr()
            # shapes of x and y are (batchsize, seqlength); each element is an integer from 0 to (vocabsize-1)
            feed = {model.input_data: x, model.targets: y, model.initial_state: state}
            feed = model.extrafeed(feed)
            state, probs, entropies = sess.run([model.final_state, model.probs, model.pred_entropy], feed)
            theseprobs = np.reshape(probs, (1, args.batch_size, args.seq_length, args.vocab_size))
            thesey = np.reshape(y, (args.batch_size, args.seq_length))
            allprobs = tryconcat(allprobs, theseprobs, axis=2)
            truths = tryconcat(truths, thesey, axis=1)
            y = y.flatten()
            for ii in range(y.size):
                losses.append(-np.log2(probs[ii,y[ii]]))
            thesentropies = np.reshape(entropies,(1,args.batch_size,args.seq_length))
            entrps = tryconcat(entrps, thesentropies, axis=2)
        data_loader.pointer_tr = backupptrtr
        end = time.time()
        testtimeperbatch = (end-start) / float(data_loader.num_batches_te)
        return (np.array(losses), truths, entrps, allprobs, testtimeperbatch)

    # for tensorboard
    valsumplh_cost = tf.placeholder(tf.float32, (1,), name="validation_summary_placeholder_cost")
    valsumplh_pent = tf.placeholder(tf.float32, (1,), name="validation_summary_placeholder_prediction_entropy")
    #reduce_sum fixes tensorflow scalar handling being weird (vector of size 1)
    valsumscs_cost = tf.scalar_summary('cost_val', tf.reduce_sum(valsumplh_cost))
    valsumscs_pent = tf.scalar_summary('prediction_entropy_val', tf.reduce_sum(valsumplh_pent))
    sumwriter = tf.train.SummaryWriter(args.save_dir, graph=tf.get_default_graph())
    
    befstarttime = time.time()
    
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(tf.all_variables())

        print("====================================")
        allvars = tf.all_variables()
        trainablevars = tf.trainable_variables()
        trainableMB = 0
        for tvar in allvars:
            #print(type(tvar))
            #print(tvar.name+" -- "+str(tvar.dtype)+" -- "+str(tvar.get_shape()))
            if tvar in trainablevars:
                print("@@@ "+tvar.name+" -- "+str(tvar.get_shape()))
                trainableMB += 4*tvar.get_shape().num_elements()
            else:
                print(tvar.name+" -- "+str(tvar.get_shape()))
        print(" ")
        print("trainable megabytes: "+str(float(trainableMB)/1e6))
#.........这里部分代码省略.........
开发者ID:jasonbunk,项目名称:char-rnn-tensorflow,代码行数:101,代码来源:train.py


示例13: train

def train(args):
    # Create the data_loader object, which loads up all of our batches, vocab dictionary, etc.
    # from utils.py (and creates them if they don't already exist).
    # These files go in the data directory.
    data_loader = TextLoader(args.data_dir, args.batch_size, args.seq_length)
    args.vocab_size = data_loader.vocab_size

    load_model = False
    if not os.path.exists(args.save_dir):
        print("Creating directory %s" % args.save_dir)
        os.mkdir(args.save_dir)
    elif (os.path.exists(os.path.join(args.save_dir, 'config.pkl'))):
        # Trained model already exists
        ckpt = tf.train.get_checkpoint_state(args.save_dir)
        if ckpt and ckpt.model_checkpoint_path:
            with open(os.path.join(args.save_dir, 'config.pkl')) as f:
                saved_args = cPickle.load(f)
                args.rnn_size = saved_args.rnn_size
                args.num_layers = saved_args.num_layers
                args.model = saved_args.model
                print("Found a previous checkpoint. Overwriting model description arguments to:")
                print(" model: {}, rnn_size: {}, num_layers: {}".format(
                    saved_args.model, saved_args.rnn_size, saved_args.num_layers))
                load_model = True

    # Save all arguments to config.pkl in the save directory -- NOT the data directory.
    with open(os.path.join(args.save_dir, 'config.pkl'), 'w') as f:
        cPickle.dump(args, f)
    # Save a tuple of the characters list and the vocab dictionary to chars_vocab.pkl in
    # the save directory -- NOT the data directory.
    with open(os.path.join(args.save_dir, 'chars_vocab.pkl'), 'w') as f:
        cPickle.dump((data_loader.chars, data_loader.vocab), f)

    # Create the model!
    print("Building the model")
    model = Model(args)

    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.initialize_all_variables().run()
        saver = tf.train.Saver(model.save_variables_list())
        if (load_model):
            print("Loading saved parameters")
            saver.restore(sess, ckpt.model_checkpoint_path)
        global_epoch_fraction = sess.run(model.global_epoch_fraction)
        global_seconds_elapsed = sess.run(model.global_seconds_elapsed)
        if load_model: print("Resuming from global epoch fraction {:.3f},"
                " total trained time: {}, learning rate: {}".format(
                global_epoch_fraction, global_seconds_elapsed, sess.run(model.lr)))
        data_loader.cue_batch_pointer_to_epoch_fraction(global_epoch_fraction)
        initial_batch_step = int((global_epoch_fraction
                - int(global_epoch_fraction)) * data_loader.total_batch_count)
        epoch_range = (int(global_epoch_fraction),
                args.num_epochs + int(global_epoch_fraction))
        writer = tf.train.SummaryWriter(args.save_dir, graph=tf.get_default_graph())
        outputs = [model.cost, model.final_state, model.train_op, model.summary_op]
        is_lstm = args.model == 'lstm'
        global_step = epoch_range[0] * data_loader.total_batch_count + initial_batch_step
        try:
            for e in xrange(*epoch_range):
                # e iterates through the training epochs.
                # Reset the model state, so it does not carry over from the end of the previous epoch.
                state = sess.run(model.initial_state)
                batch_range = (initial_batch_step, data_loader.total_batch_count)
                initial_batch_step = 0
                for b in xrange(*batch_range):
                    global_step += 1
                    if global_step % args.decay_steps == 0:
                        # Set the model.lr element of the model to track
                        # the appropriately decayed learning rate.
                        current_learning_rate = sess.run(model.lr)
                        current_learning_rate *= args.decay_rate
                        sess.run(tf.assign(model.lr, current_learning_rate))
                        print("Decayed learning rate to {}".format(current_learning_rate))
                    start = time.time()
                    # Pull the next batch inputs (x) and targets (y) from the data loader.
                    x, y = data_loader.next_batch()

                    # feed is a dictionary of variable references and respective values for initialization.
                    # Initialize the model's input data and target data from the batch,
                    # and initialize the model state to the final state from the previous batch, so that
                    # model state is accumulated and carried over between batches.
                    feed = {model.input_data: x, model.targets: y}
                    if is_lstm:
                        for i, (c, h) in enumerate(model.initial_state):
                            feed[c] = state[i].c
                            feed[h] = state[i].h
                    else:
                        for i, c in enumerate(model.initial_state):
                            feed[c] = state[i]
                    # Run the session! Specifically, tell TensorFlow to compute the graph to calculate
                    # the values of cost, final state, and the training op.
                    # Cost is used to monitor progress.
                    # Final state is used to carry over the state into the next batch.
                    # Training op is not used, but we want it to be calculated, since that calculation
                    # is what updates parameter states (i.e. that is where the training happens).
                    train_loss, state, _, summary = sess.run(outputs, feed)
                    elapsed = time.time() - start
                    global_seconds_elapsed += elapsed
#.........这里部分代码省略.........
开发者ID:Navdevl,项目名称:chatbot-rnn,代码行数:101,代码来源:train.py


示例14: train


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.Timer类代码示例发布时间:2022-05-26
下一篇:
Python utils.Parser类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap