• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python optim.Adam类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中torch.optim.Adam的典型用法代码示例。如果您正苦于以下问题:Python Adam类的具体用法?Python Adam怎么用?Python Adam使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Adam类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: pretrain

 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     optimizer = Adam(self.mdl.parameters())
     #optimizer = SGD(self.mdl.parameters(), lr=1e-4)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         rand_idx = t.randperm(n_train)
         src = src[rand_idx]
         rel = rel[rand_idx]
         dst = dst[rand_idx]
         src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
         src_cuda = src.cuda()
         rel_cuda = rel.cuda()
         dst_cuda = dst.cuda()
         src_corrupted = src_corrupted.cuda()
         dst_corrupted = dst_corrupted.cuda()
         for s0, r, t0, s1, t1 in batch_by_num(n_batch, src_cuda, rel_cuda, dst_cuda, src_corrupted, dst_corrupted,
                                               n_sample=n_train):
             self.mdl.zero_grad()
             loss = t.sum(self.mdl.pair_loss(Variable(s0), Variable(r), Variable(t0), Variable(s1), Variable(t1)))
             loss.backward()
             optimizer.step()
             self.mdl.constraint()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(os.path.join(config().task.dir, self.config.model_file))
                 best_perf = test_perf
     return best_perf
开发者ID:cai-lw,项目名称:KBGAN,代码行数:35,代码来源:trans_d.py


示例2: __init__

    def __init__(self, memory, nb_status, nb_actions, action_noise=None,
                 gamma=0.99, tau=0.001, normalize_observations=True,
                 batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.),
                 actor_lr=1e-4, critic_lr=1e-3):
        self.nb_status = nb_status
        self.nb_actions = nb_actions
        self.action_range = action_range
        self.observation_range = observation_range
        self.normalize_observations = normalize_observations

        self.actor = Actor(self.nb_status, self.nb_actions)
        self.actor_target = Actor(self.nb_status, self.nb_actions)
        self.actor_optim = Adam(self.actor.parameters(), lr=actor_lr)

        self.critic = Critic(self.nb_status, self.nb_actions)
        self.critic_target = Critic(self.nb_status, self.nb_actions)
        self.critic_optim = Adam(self.critic.parameters(), lr=critic_lr)

        # Create replay buffer
        self.memory = memory  # SequentialMemory(limit=args.rmsize, window_length=args.window_length)
        self.action_noise = action_noise

        # Hyper-parameters
        self.batch_size = batch_size
        self.tau = tau
        self.discount = gamma

        if self.normalize_observations:
            self.obs_rms = RunningMeanStd()
        else:
            self.obs_rms = None
开发者ID:megvii-rl,项目名称:pytorch-gym,代码行数:31,代码来源:ddpg.py


示例3: pretrain

 def pretrain(self, train_data, corrupter, tester):
     src, rel, dst = train_data
     n_train = len(src)
     n_epoch = self.config.n_epoch
     n_batch = self.config.n_batch
     optimizer = Adam(self.mdl.parameters(), weight_decay=self.weight_decay)
     best_perf = 0
     for epoch in range(n_epoch):
         epoch_loss = 0
         if epoch % self.config.sample_freq == 0:
             rand_idx = t.randperm(n_train)
             src = src[rand_idx]
             rel = rel[rand_idx]
             dst = dst[rand_idx]
             src_corrupted, rel_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst)
             src_corrupted = src_corrupted.cuda()
             rel_corrupted = rel_corrupted.cuda()
             dst_corrupted = dst_corrupted.cuda()
         for ss, rs, ts in batch_by_num(n_batch, src_corrupted, rel_corrupted, dst_corrupted, n_sample=n_train):
             self.mdl.zero_grad()
             label = t.zeros(len(ss)).type(t.LongTensor).cuda()
             loss = t.sum(self.mdl.softmax_loss(Variable(ss), Variable(rs), Variable(ts), label))
             loss.backward()
             optimizer.step()
             epoch_loss += loss.data[0]
         logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train)
         if (epoch + 1) % self.config.epoch_per_test == 0:
             test_perf = tester()
             if test_perf > best_perf:
                 self.save(os.path.join(config().task.dir, self.config.model_file))
                 best_perf = test_perf
     return best_perf
开发者ID:cai-lw,项目名称:KBGAN,代码行数:32,代码来源:distmult.py


示例4: _init_optimizers

    def _init_optimizers(self):
        if self.generator_optim is None or self.critic_optim is None:
            from torch.optim import Adam
            trainable_generator_params = (
                p for p in self.generator.parameters() if p.requires_grad)
            trainable_critic_params = (
                p for p in self.critic.parameters() if p.requires_grad)

            self.generator_optim = Adam(
                trainable_generator_params, lr=0.0001, betas=(0, 0.9))
            self.critic_optim = Adam(
                trainable_critic_params, lr=0.0001, betas=(0, 0.9))
开发者ID:JohnVinyard,项目名称:zounds,代码行数:12,代码来源:wgan.py


示例5: __init__

    def __init__(self, nb_status, nb_actions, args, writer):
        self.clip_actor_grad = args.clip_actor_grad
        self.nb_status = nb_status * args.window_length
        self.nb_actions = nb_actions
        self.discrete = args.discrete
        self.pic = args.pic
        self.writer = writer
        self.select_time = 0        
        if self.pic:
            self.nb_status = args.pic_status
        
        # Create Actor and Critic Network
        net_cfg = {
            'hidden1':args.hidden1, 
            'hidden2':args.hidden2, 
            'use_bn':args.bn,
            'init_method':args.init_method
        }
        if args.pic:
            self.cnn = CNN(1, args.pic_status)
            self.cnn_target = CNN(1, args.pic_status)
            self.cnn_optim = Adam(self.cnn.parameters(), lr=args.crate)
        self.actor = Actor(self.nb_status, self.nb_actions, **net_cfg)
        self.actor_target = Actor(self.nb_status, self.nb_actions, **net_cfg)
        self.actor_optim  = Adam(self.actor.parameters(), lr=args.prate)

        self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_optim  = Adam(self.critic.parameters(), lr=args.rate)

        hard_update(self.actor_target, self.actor) # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)
        if args.pic:
            hard_update(self.cnn_target, self.cnn)
        
        #Create replay buffer
        self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length)
        self.random_process = Myrandom(size=nb_actions)

        # Hyper-parameters
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.discount = args.discount
        self.depsilon = 1.0 / args.epsilon

        # 
        self.epsilon = 1.0
        self.s_t = None # Most recent state
        self.a_t = None # Most recent action
        self.use_cuda = args.cuda
        # 
        if self.use_cuda: self.cuda()
开发者ID:megvii-rl,项目名称:pytorch-gym,代码行数:52,代码来源:ddpg.py


示例6: __init__

    def __init__(self, nb_status, nb_actions, args):
        self.num_actor = 3

        self.nb_status = nb_status * args.window_length
        self.nb_actions = nb_actions
        self.discrete = args.discrete
        self.pic = args.pic
        if self.pic:
            self.nb_status = args.pic_status
        
        # Create Actor and Critic Network
        net_cfg = {
            'hidden1':args.hidden1, 
            'hidden2':args.hidden2, 
            'use_bn':args.bn
        }
        if args.pic:
            self.cnn = CNN(3, args.pic_status)
            self.cnn_optim = Adam(self.cnn.parameters(), lr=args.crate)
        self.actors = [Actor(self.nb_status, self.nb_actions) for _ in range(self.num_actor)]
        self.actor_targets = [Actor(self.nb_status, self.nb_actions) for _ in
                              range(self.num_actor)]
        self.actor_optims = [Adam(self.actors[i].parameters(), lr=args.prate) for i in range(self.num_actor)]

        self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_optim  = Adam(self.critic.parameters(), lr=args.rate)

        for i in range(self.num_actor):
            hard_update(self.actor_targets[i], self.actors[i])  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)
        
        #Create replay buffer
        self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length)
        self.random_process = Myrandom(size=nb_actions)

        # Hyper-parameters
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.discount = args.discount
        self.depsilon = 1.0 / args.epsilon

        # 
        self.epsilon = 1.0
        self.s_t = None # Most recent state
        self.a_t = None # Most recent action
        self.use_cuda = args.cuda
        # 
        if self.use_cuda: self.cuda()
开发者ID:megvii-rl,项目名称:pytorch-gym,代码行数:49,代码来源:ddpg.py


示例7: __init__

    def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):

        self.num_inputs = num_inputs
        self.action_space = action_space

        self.actor = Actor(hidden_size, self.num_inputs, self.action_space)
        self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space)
        self.actor_optim = Adam(self.actor.parameters(), lr=1e-4)

        self.critic = Critic(hidden_size, self.num_inputs, self.action_space)
        self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space)
        self.critic_optim = Adam(self.critic.parameters(), lr=1e-3)

        self.gamma = gamma
        self.tau = tau

        hard_update(self.actor_target, self.actor)  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)
开发者ID:GuanyuGao,项目名称:thermal_project,代码行数:18,代码来源:ddpg.py


示例8: __init__

    def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
        self.action_space = action_space
        self.num_inputs = num_inputs
        
        self.model = Policy(hidden_size, num_inputs, action_space)
        self.target_model = Policy(hidden_size, num_inputs, action_space)
        self.optimizer = Adam(self.model.parameters(), lr=1e-3)

        self.gamma = gamma
        self.tau = tau

        hard_update(self.target_model, self.model)
开发者ID:lenvdv,项目名称:pytorch-ddpg-naf,代码行数:12,代码来源:naf.py


示例9: train

    def train(self, data):

        data = data['data']

        self.network.train()

        optimizer = Adam(trainable_parameters(self.network), lr=1e-5)

        for epoch, batch in self._driver(data):
            self.network.zero_grad()

            # choose a batch of anchors
            indices, anchor = self._select_batch(data)
            anchor_v = self._variable(anchor)
            a = self._apply_network_and_normalize(anchor_v)

            # choose negative examples
            negative_indices, negative = self._select_batch(data)
            negative_v = self._variable(negative)
            n = self._apply_network_and_normalize(negative_v)

            # choose a deformation for this batch and apply it to produce the
            # positive examples
            deformation = choice(self.deformations)
            positive = deformation(anchor, data[indices, ...]) \
                .astype(np.float32)
            positive_v = self._variable(positive)
            p = self._apply_network_and_normalize(positive_v)

            error = self.loss.forward(a, p, n)
            error.backward()
            optimizer.step()

            self.on_batch_complete(
                epoch=epoch,
                batch=batch,
                error=float(error.data.cpu().numpy().squeeze()),
                deformation=deformation.__name__)

        return self.network
开发者ID:JohnVinyard,项目名称:zounds,代码行数:40,代码来源:embedding.py


示例10: learn

def learn(learning_rate, iterations, x, y, validation=None, stop_early=False, run_comment=''):
    # Define a neural network using high-level modules.
    writer = SummaryWriter(comment=run_comment)
    model = Sequential(
        Linear(len(x[0]), len(y[0]), bias=True)  # n inputs -> 1 output
    )
    loss_fn = BCEWithLogitsLoss(reduction='sum')  # reduction=mean converges slower.
    # TODO: Add an option to twiddle pos_weight, which lets us trade off precision and recall. Maybe also graph using add_pr_curve(), which can show how that tradeoff is going.
    optimizer = Adam(model.parameters(),lr=learning_rate)

    if validation:
        validation_ins, validation_outs = validation
        previous_validation_loss = None
    with progressbar(range(iterations)) as bar:
        for t in bar:
            y_pred = model(x)  # Make predictions.
            loss = loss_fn(y_pred, y)
            writer.add_scalar('loss', loss, t)
            if validation:
                validation_loss = loss_fn(model(validation_ins), validation_outs)
                if stop_early:
                    if previous_validation_loss is not None and previous_validation_loss < validation_loss:
                        print('Stopping early at iteration {t} because validation error rose.'.format(t=t))
                        model.load_state_dict(previous_model)
                        break
                    else:
                        previous_validation_loss = validation_loss
                        previous_model = model.state_dict()
                writer.add_scalar('validation_loss', validation_loss, t)
            writer.add_scalar('training_accuracy_per_tag', accuracy_per_tag(model, x, y), t)
            optimizer.zero_grad()  # Zero the gradients.
            loss.backward()  # Compute gradients.
            optimizer.step()

    # Horizontal axis is what confidence. Vertical is how many samples were that confidence.
    writer.add_histogram('confidence', confidences(model, x), t)
    writer.close()
    return model
开发者ID:mozilla,项目名称:fathom,代码行数:38,代码来源:train.py


示例11: train

    def train(self, training_data: TrainingData) -> None:
        x_train, y_train, x_val, y_val, vocab, class_to_i, i_to_class = preprocess_dataset(training_data)
        self.class_to_i = class_to_i
        self.i_to_class = i_to_class

        log.info('Batchifying data')
        train_batches = batchify(x_train, y_train, shuffle=True)
        val_batches = batchify(x_val, y_val, shuffle=False)
        self.model = ElmoModel(len(i_to_class), dropout=self.dropout)
        if CUDA:
            self.model = self.model.cuda()
        log.info(f'Parameters:\n{self.parameters()}')
        log.info(f'Model:\n{self.model}')
        parameters = list(self.model.classifier.parameters())
        for mix in self.model.elmo._scalar_mixes:
            parameters.extend(list(mix.parameters()))
        self.optimizer = Adam(parameters)
        self.criterion = nn.CrossEntropyLoss()
        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max')
        temp_prefix = get_tmp_filename()
        self.model_file = f'{temp_prefix}.pt'
        manager = TrainingManager([
            BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1),
            MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc')
        ])
        log.info('Starting training')
        epoch = 0
        while True:
            self.model.train()
            train_acc, train_loss, train_time = self.run_epoch(train_batches)
            random.shuffle(train_batches)

            self.model.eval()
            test_acc, test_loss, test_time = self.run_epoch(val_batches, train=False)

            stop_training, reasons = manager.instruct(
                train_time, train_loss, train_acc,
                test_time, test_loss, test_acc
            )

            if stop_training:
                log.info(' '.join(reasons))
                break
            else:
                self.scheduler.step(test_acc)
            epoch += 1
开发者ID:Pinafore,项目名称:qb,代码行数:46,代码来源:elmo.py


示例12: __init__

    def __init__(self, args, exp_model, logging_func):
        self.args = args

        # Exploration Model
        self.exp_model = exp_model

        self.log = logging_func["log"]

        # Experience Replay
        self.replay = ExpReplay(args.exp_replay_size, args.stale_limit, exp_model, args, priority=self.args.prioritized)

        # DQN and Target DQN
        model = get_models(args.model)
        self.dqn = model(actions=args.actions, atoms=args.atoms)
        self.target_dqn = model(actions=args.actions, atoms=args.atoms)

        dqn_params = 0
        for weight in self.dqn.parameters():
            weight_params = 1
            for s in weight.size():
                weight_params *= s
            dqn_params += weight_params
        print("Distrib DQN has {:,} parameters.".format(dqn_params))

        self.target_dqn.eval()

        if args.gpu:
            print("Moving models to GPU.")
            self.dqn.cuda()
            self.target_dqn.cuda()

        # Optimizer
        self.optimizer = Adam(self.dqn.parameters(), lr=args.lr)
        # self.optimizer = RMSprop(self.dqn.parameters(), lr=args.lr)

        self.T = 0
        self.target_sync_T = -self.args.t_max
开发者ID:tabzraz,项目名称:RL,代码行数:37,代码来源:DQN_Distribution_Agent.py


示例13: DDPG

class DDPG(object):
    def __init__(self, nb_status, nb_actions, args, writer):
        self.clip_actor_grad = args.clip_actor_grad
        self.nb_status = nb_status * args.window_length
        self.nb_actions = nb_actions
        self.discrete = args.discrete
        self.pic = args.pic
        self.writer = writer
        self.select_time = 0        
        if self.pic:
            self.nb_status = args.pic_status
        
        # Create Actor and Critic Network
        net_cfg = {
            'hidden1':args.hidden1, 
            'hidden2':args.hidden2, 
            'use_bn':args.bn,
            'init_method':args.init_method
        }
        if args.pic:
            self.cnn = CNN(1, args.pic_status)
            self.cnn_target = CNN(1, args.pic_status)
            self.cnn_optim = Adam(self.cnn.parameters(), lr=args.crate)
        self.actor = Actor(self.nb_status, self.nb_actions, **net_cfg)
        self.actor_target = Actor(self.nb_status, self.nb_actions, **net_cfg)
        self.actor_optim  = Adam(self.actor.parameters(), lr=args.prate)

        self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_optim  = Adam(self.critic.parameters(), lr=args.rate)

        hard_update(self.actor_target, self.actor) # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)
        if args.pic:
            hard_update(self.cnn_target, self.cnn)
        
        #Create replay buffer
        self.memory = rpm(args.rmsize) # SequentialMemory(limit=args.rmsize, window_length=args.window_length)
        self.random_process = Myrandom(size=nb_actions)

        # Hyper-parameters
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.discount = args.discount
        self.depsilon = 1.0 / args.epsilon

        # 
        self.epsilon = 1.0
        self.s_t = None # Most recent state
        self.a_t = None # Most recent action
        self.use_cuda = args.cuda
        # 
        if self.use_cuda: self.cuda()

    def normalize(self, pic):
        pic = pic.swapaxes(0, 2).swapaxes(1, 2)
        return pic

    def update_policy(self):
        # Sample batch
        state_batch, action_batch, reward_batch, \
            next_state_batch, terminal_batch = self.memory.sample_batch(self.batch_size)

        # Prepare for the target q batch
        if self.pic:
            state_batch = np.array([self.normalize(x) for x in state_batch])
            state_batch = to_tensor(state_batch, volatile=True)
            state_batch = self.cnn(state_batch)
            next_state_batch = np.array([self.normalize(x) for x in next_state_batch])
            next_state_batch = to_tensor(next_state_batch, volatile=True)
            next_state_batch = self.cnn_target(next_state_batch)
            next_q_values = self.critic_target([
                next_state_batch,
                self.actor_target(next_state_batch)
            ])
        else:
            next_q_values = self.critic_target([
                to_tensor(next_state_batch, volatile=True),
                self.actor_target(to_tensor(next_state_batch, volatile=True)),
            ])
        # print('batch of picture is ok')
        next_q_values.volatile = False

        target_q_batch = to_tensor(reward_batch) + \
            self.discount * to_tensor((1 - terminal_batch.astype(np.float))) * next_q_values

        # Critic update
        self.critic.zero_grad()
        if self.pic: self.cnn.zero_grad()

        if self.pic:
            state_batch.volatile = False
            q_batch = self.critic([state_batch, to_tensor(action_batch)])
        else:
            q_batch = self.critic([to_tensor(state_batch), to_tensor(action_batch)])

        # print(reward_batch, next_q_values*self.discount, target_q_batch, terminal_batch.astype(np.float))
        value_loss = criterion(q_batch, target_q_batch)
        value_loss.backward()
        self.critic_optim.step()
#.........这里部分代码省略.........
开发者ID:megvii-rl,项目名称:pytorch-gym,代码行数:101,代码来源:ddpg.py


示例14: RnnGuesser

class RnnGuesser(AbstractGuesser):
    def __init__(self, config_num):
        super(RnnGuesser, self).__init__(config_num)
        if self.config_num is not None:
            guesser_conf = conf['guessers']['qanta.guesser.rnn.RnnGuesser'][self.config_num]
            self.gradient_clip = guesser_conf['gradient_clip']
            self.n_hidden_units = guesser_conf['n_hidden_units']
            self.n_hidden_layers = guesser_conf['n_hidden_layers']
            self.nn_dropout = guesser_conf['dropout']
            self.batch_size = guesser_conf['batch_size']
            self.use_wiki = guesser_conf['use_wiki']
            self.n_wiki_sentences = guesser_conf['n_wiki_sentences']
            self.wiki_title_replace_token = guesser_conf['wiki_title_replace_token']
            self.lowercase = guesser_conf['lowercase']

            self.random_seed = guesser_conf['random_seed']

        self.page_field: Optional[Field] = None
        self.qanta_id_field: Optional[Field] = None
        self.text_field: Optional[Field] = None
        self.n_classes = None
        self.emb_dim = None
        self.model_file = None

        self.model: Optional[RnnModel] = None
        self.optimizer = None
        self.criterion = None
        self.scheduler = None

    @property
    def ans_to_i(self):
        return self.page_field.vocab.stoi

    @property
    def i_to_ans(self):
        return self.page_field.vocab.itos

    def parameters(self):
        return conf['guessers']['qanta.guesser.rnn.RnnGuesser'][self.config_num]

    def train(self, training_data):
        log.info('Loading Quiz Bowl dataset')
        train_iter, val_iter, dev_iter = QuizBowl.iters(
            batch_size=self.batch_size, lower=self.lowercase,
            use_wiki=self.use_wiki, n_wiki_sentences=self.n_wiki_sentences,
            replace_title_mentions=self.wiki_title_replace_token,
            sort_within_batch=True
        )
        log.info(f'Training Data={len(training_data[0])}')
        log.info(f'N Train={len(train_iter.dataset.examples)}')
        log.info(f'N Test={len(val_iter.dataset.examples)}')
        fields: Dict[str, Field] = train_iter.dataset.fields
        self.page_field = fields['page']
        self.n_classes = len(self.ans_to_i)
        self.qanta_id_field = fields['qanta_id']
        self.emb_dim = 300

        self.text_field = fields['text']
        log.info(f'Text Vocab={len(self.text_field.vocab)}')

        log.info('Initializing Model')
        self.model = RnnModel(
            self.n_classes,
            text_field=self.text_field,
            emb_dim=self.emb_dim,
            n_hidden_units=self.n_hidden_units, n_hidden_layers=self.n_hidden_layers,
            nn_dropout=self.nn_dropout
        )
        if CUDA:
            self.model = self.model.cuda()
        log.info(f'Parameters:\n{self.parameters()}')
        log.info(f'Model:\n{self.model}')
        self.optimizer = Adam(self.model.parameters())
        self.criterion = nn.CrossEntropyLoss()
        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, verbose=True, mode='max')

        temp_prefix = get_tmp_filename()
        self.model_file = f'{temp_prefix}.pt'
        manager = TrainingManager([
            BaseLogger(log_func=log.info), TerminateOnNaN(), EarlyStopping(monitor='test_acc', patience=10, verbose=1),
            MaxEpochStopping(100), ModelCheckpoint(create_save_model(self.model), self.model_file, monitor='test_acc')
        ])

        log.info('Starting training')

        epoch = 0
        while True:
            self.model.train()
            train_acc, train_loss, train_time = self.run_epoch(train_iter)

            self.model.eval()
            test_acc, test_loss, test_time = self.run_epoch(val_iter)

            stop_training, reasons = manager.instruct(
                train_time, train_loss, train_acc,
                test_time, test_loss, test_acc
            )

            if stop_training:
                log.info(' '.join(reasons))
#.........这里部分代码省略.........
开发者ID:Pinafore,项目名称:qb,代码行数:101,代码来源:rnn.py


示例15: DDPG

class DDPG(object):
    def __init__(self, memory, nb_status, nb_actions, action_noise=None,
                 gamma=0.99, tau=0.001, normalize_observations=True,
                 batch_size=128, observation_range=(-5., 5.), action_range=(-1., 1.),
                 actor_lr=1e-4, critic_lr=1e-3):
        self.nb_status = nb_status
        self.nb_actions = nb_actions
        self.action_range = action_range
        self.observation_range = observation_range
        self.normalize_observations = normalize_observations

        self.actor = Actor(self.nb_status, self.nb_actions)
        self.actor_target = Actor(self.nb_status, self.nb_actions)
        self.actor_optim = Adam(self.actor.parameters(), lr=actor_lr)

        self.critic = Critic(self.nb_status, self.nb_actions)
        self.critic_target = Critic(self.nb_status, self.nb_actions)
        self.critic_optim = Adam(self.critic.parameters(), lr=critic_lr)

        # Create replay buffer
        self.memory = memory  # SequentialMemory(limit=args.rmsize, window_length=args.window_length)
        self.action_noise = action_noise

        # Hyper-parameters
        self.batch_size = batch_size
        self.tau = tau
        self.discount = gamma

        if self.normalize_observations:
            self.obs_rms = RunningMeanStd()
        else:
            self.obs_rms = None

    def pi(self, obs, apply_noise=True, compute_Q=True):
        obs = np.array([obs])
        action = to_numpy(self.actor(to_tensor(obs))).squeeze(0)
        if compute_Q:
            q = self.critic([to_tensor(obs), to_tensor(action)]).cpu().data
        else:
            q = None

        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            assert noise.shape == action.shape
            action += noise

        action = np.clip(action, self.action_range[0], self.action_range[1])
        return action, q[0][0]

    def store_transition(self, obs0, action, reward, obs1, terminal1):
        self.memory.append(obs0, action, reward, obs1, terminal1)
        if self.normalize_observations:
            self.obs_rms.update(np.array([obs0]))

    def train(self):
        # Get a batch.
        batch = self.memory.sample(batch_size=self.batch_size)

        next_q_values = self.critic_target([
            to_tensor(batch['obs1'], volatile=True),
            self.actor_target(to_tensor(batch['obs1'], volatile=True))])
        next_q_values.volatile = False

        target_q_batch = to_tensor(batch['rewards']) + \
                         self.discount * to_tensor(1 - batch['terminals1'].astype('float32')) * next_q_values

        self.critic.zero_grad()
        q_batch = self.critic([to_tensor(batch['obs0']), to_tensor(batch['actions'])])
        value_loss = criterion(q_batch, target_q_batch)
        value_loss.backward()
        self.critic_optim.step()

        self.actor.zero_grad()
        policy_loss = -self.critic([to_tensor(batch['obs0']), self.actor(to_tensor(batch['obs0']))]).mean()
        policy_loss.backward()
        self.actor_optim.step()

        # Target update
        soft_update(self.actor_target, self.actor, self.tau)
        soft_update(self.critic_target, self.critic, self.tau)

        return value_loss.cpu().data[0], policy_loss.cpu().data[0]

    def initialize(self):
        hard_update(self.actor_target, self.actor)  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)

    def update_target_net(self):
        soft_update(self.actor_target, self.actor, self.tau)
        soft_update(self.critic_target, self.critic, self.tau)

    def reset(self):
        if self.action_noise is not None:
            self.action_noise.reset()

    def cuda(self):
        self.actor.cuda()
        self.actor_target.cuda()
        self.critic.cuda()
        self.critic_target.cuda()
开发者ID:megvii-rl,项目名称:pytorch-gym,代码行数:100,代码来源:ddpg.py


示例16: DDPG

class DDPG(object):
    def __init__(self, nb_status, nb_actions, args):
        self.num_actor = 3

        self.nb_status = nb_status * args.window_length
        self.nb_actions = nb_actions
        self.discrete = args.discrete

        # Create Actor and Critic Network
        net_cfg = {
            'hidden1': args.hidden1,
            'hidden2': args.hidden2,
            'use_bn': args.bn
        }
        self.actors = [Actor(self.nb_status, self.nb_actions) for _ in range(self.num_actor)]
        self.actor_targets = [Actor(self.nb_status, self.nb_actions) for _ in
                              range(self.num_actor)]
        self.actor_optims = [Adam(self.actors[i].parameters(), lr=args.prate) for i in range(self.num_actor)]

        self.critic = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_target = Critic(self.nb_status, self.nb_actions, **net_cfg)
        self.critic_optim = Adam(self.critic.parameters(), lr=args.rate)

        for i in range(self.num_actor):
            hard_update(self.actor_targets[i], self.actors[i])  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)

        # Create replay buffer
        self.memory = rpm(args.rmsize)  # SequentialMemory(limit=args.rmsize, window_length=args.window_length)
        self.random_process = Myrandom(size=nb_actions)

        # Hyper-parameters
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.discount = args.discount
        self.depsilon = 1.0 / args.epsilon

        # 
        self.epsilon = 1.0
        self.s_t = None  # Most recent state
        self.a_t = None  # Most recent action
        self.use_cuda = args.cuda
        # 
        if self.use_cuda: self.cuda()

    def update_policy(self, train_actor=True):
        # Sample batch
        state_batch, action_batch, reward_batch, \
        next_state_batch, terminal_batch = self.memory.sample_batch(self.batch_size)

        # Prepare for the target q batch
        next_q_values = 0
        for i in range(self.num_actor):
            next_q_values = next_q_values + self.critic_target([
                to_tensor(next_state_batch, volatile=True),
                self.actor_targets[i](to_tensor(next_state_batch, volatile=True)),
            ])
        # print('batch of picture is ok')
        next_q_values = next_q_values / self.num_actor
        next_q_values.volatile = False

        target_q_batch = to_tensor(reward_batch) + \
                         self.discount * to_tensor((1 - terminal_batch.astype(np.float))) * next_q_values

        # Critic update
        self.critic.zero_grad()
        q_batch = self.critic([to_tensor(state_batch), to_tensor(action_batch)])

        # print(reward_batch, next_q_values*self.discount, target_q_batch, terminal_batch.astype(np.float))
        value_loss = criterion(q_batch, target_q_batch)
        value_loss.backward()
        self.critic_optim.step()

        sum_policy_loss = 0
        for i in range(self.num_actor):
            self.actors[i].zero_grad()

            policy_loss = -self.critic([
                to_tensor(state_batch),
                self.actors[i](to_tensor(state_batch))
            ])

            policy_loss = policy_loss.mean()
            policy_loss.backward()
            if train_actor:
                self.actor_optims[i].step()
            sum_policy_loss += policy_loss

            # Target update
            soft_update(self.actor_targets[i], self.actors[i], self.tau)

        soft_update(self.critic_target, self.critic, self.tau)

        return -sum_policy_loss / self.num_actor, value_loss

    def cuda(self):
        for i in range(self.num_actor):
            self.actors[i].cuda()
            self.actor_targets[i].cuda()
        self.critic.cuda()
#.........这里部分代码省略.........
开发者ID:megvii-rl,项目名称:pytorch-gym,代码行数:101,代码来源:ddpg.py


示例17: _run

def _run(data_processor,
         data_file_name,
         dataset,
         data_generator,
         num_batches,
         vocabulary_size,
         number_examples,
         context_size,
         num_noise_words,
         vec_dim,
         num_epochs,
         batch_size,
         lr,
         model_ver,
         vec_combine_method,
         save_all):
    '''
    Averagely, the time consumption:
    max_generated_batches = 5
        CPU:
            backward time: 600~650 ms
            sampling time: 1 ms
            forward time:  5~7 ms
        GPU:
            backward time: 3 ms
            sampling time: 72 ms
            forward time:  1~2 ms
    Should rewrite sampling to speed up on GPU

    DocTag2Vec on CPU:
        121882 words/s, 8 workers
        processing one document time = 650~850 ms
        training on 173403030 raw words (68590824 effective words) took 646.2s, 106138 effective words/s

    Data Generation, the major bottleneck is still generation, maybe due to the lock:
        GPU (Desktop)
            generating batch time: 1200~2001 ms, (1508425387839, 1508425389840)
            transfer batch to Torch: 1 ms, (1508425389840, 1508425389841)
            #worker = 1: 300~600 words/s
            #worker = 8: 600~4000 words/s (around 2500 often)
            After changing to torch.sampler, getting worse, data-prepare time is not stable
        CPU (Mac)
            #worker = 8:
                generating batch time: 1200~1527 ms, (1508424953768, 1508424955295)
                transfer batch to Torch: 1 ms, (1508424955295, 1508424955296)
                Generating one example time: 2~5 ms, (1508458881118, 1508458881122)
                Generating one document time: 50~400 ms, (1508458881118, 1508458881122)
                Generating one batch time: 650~700 ms, (1508458880690, 1508458881122)
            After changing to torch.sampler
                Generating one example time: 4~7 ms
                Generating one batch time: 900~1200 ms

    '''

    model = DistributedMemory(
        vec_dim,
        num_docs=len(dataset),
        num_words=vocabulary_size)

    cost_func = NegativeSampling()
    optimizer = Adam(params=model.parameters(), lr=lr)
    logger = logging.getLogger('root')

    if torch.cuda.is_available():
        model.cuda()
        logger.info("Running on GPU - CUDA")
    else:
        logger.info("Running on CPU")

    logger.info("Dataset comprised of {:d} documents.".format(len(dataset)))
    logger.info("Vocabulary size is {:d}.\n".format(vocabulary_size))
    logger.info("Training started.")

    best_loss = float_info.max
    prev_model_file_path = ""

    progbar = Progbar(num_batches, batch_size=batch_size, total_examples = number_examples)

    for epoch_i in range(num_epochs):
        epoch_start_time = time.time()
        loss = []

        for batch_i in range(num_batches):
            start_time = current_milli_time()
            batch = next(data_generator)
            current_time = current_milli_time()
            print('data-prepare time: %d ms' % (round(current_time - start_time)))

            start_time = current_milli_time()
            x = model.forward(
                batch.context_ids,
                batch.doc_ids,
                batch.target_noise_ids)
            x = cost_func.forward(x)
            loss.append(x.data[0])
            print('forward time: %d ms' % round(current_milli_time() - start_time))

            start_time = current_milli_time()
            model.zero_grad()
            x.backward()
#.........这里部分代码省略.........
开发者ID:memray,项目名称:paragraph-vectors,代码行数:101,代码来源:train.py


示例18: ImageFolder

    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = ImageFolder(opt.data_path, transform=transform)

dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True)

netd = NetD(opt)
netg = NetG(opt)

if opt.netd_path:
    netd.load_state_dict(torch.load(opt.netd_path, map_location=lambda storage, loc: storage))
if opt.netg_path:
    netg.load_state_dict(torch.load(opt.netg_path, map_location=lambda storage, loc: storage))

optimizer_g = Adam(netg.parameters(), opt.lr1, betas=(opt.beta1, 0.999))
optimizer_d = Adam(netd.parameters(), opt.lr2, betas=(opt.beta1, 0.999))

criterion = nn.BCELoss()

true_labels = Variable(torch.ones(opt.batch_size))
fake_labels = Variable(torch.zeros(opt.batch_size))
fix_noises = Variable(torch.randn(opt.batch_size, opt.nz, 1, 1))
noises = Variable(torch.randn(opt.batch_size, opt.nz, 1, 1))

if opt.use_gpu:
    netd.cuda()
    netg.cuda()
    criterion.cuda()
    true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda()
    fix_noises, noises = fix_noises.cuda(), noises.cuda()
开发者ID:HadXu,项目名称:machine-learning,代码行数:31,代码来源:main.py


示例19: DDPG

class DDPG(object):
    def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):

        self.num_inputs = num_inputs
        self.action_space = action_space

        self.actor = Actor(hidden_size, self.num_inputs, self.action_space)
        self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space)
        self.actor_optim = Adam(self.actor.parameters(), lr=1e-4)

        self.critic = Critic(hidden_size, self.num_inputs, self.action_space)
        self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space)
        self.critic_optim = Adam(self.critic.parameters(), lr=1e-3)

        self.g 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python model_zoo.load_url函数代码示例发布时间:2022-05-27
下一篇:
Python rnn.pack_padded_sequence函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap