本文整理汇总了Java中burlap.mdp.singleagent.environment.EnvironmentOutcome类的典型用法代码示例。如果您正苦于以下问题:Java EnvironmentOutcome类的具体用法?Java EnvironmentOutcome怎么用?Java EnvironmentOutcome使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
EnvironmentOutcome类属于burlap.mdp.singleagent.environment包,在下文中一共展示了EnvironmentOutcome类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: addExperience
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void addExperience(EnvironmentOutcome eo) {
// If this is the first frame of the episode, add the o frame.
if (currentFrameHistory.historyLength == 0) {
currentFrameHistory = addFrame(((ALEState)eo.o).getScreen());
}
// If this is experience ends in a terminal state,
// the terminal frame will never be used so don't add it.
FrameHistory op;
if (eo.terminated) {
op = new FrameHistory(currentFrameHistory.index, 0);
} else {
op = addFrame(((ALEState)eo.op).getScreen());
}
experiences[next] = new FrameExperience(currentFrameHistory, actionSet.map(eo.a), op, eo.r, eo.terminated);
next = (next+1) % experiences.length;
size = Math.min(size+1, experiences.length);
currentFrameHistory = op;
}
开发者ID:h2r,项目名称:burlap_caffe,代码行数:23,代码来源:FrameExperienceMemory.java
示例2: updateQFunction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void updateQFunction(List<EnvironmentOutcome> samples) {
// fill up experience replay
if (runningRandomPolicy) {
if (totalSteps >= replayStartSize) {
System.out.println("Replay sufficiently filled. Beginning training...");
setLearningPolicy(trainingPolicy);
runningRandomPolicy = false;
// reset stale update timer
this.stepsSinceStale = 1;
}
return;
}
// only update every updateFreq steps
if (totalSteps % updateFreq == 0) {
((DQN)vfa).updateQFunction(samples, (DQN)staleVfa);
}
}
开发者ID:h2r,项目名称:burlap_caffe,代码行数:24,代码来源:DeepQLearner.java
示例3: executeAction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
State startState = this.currentObservation();
ActionController ac = this.actionControllerMap.get(a.actionName());
int delay = ac.executeAction(a);
if (delay > 0) {
try {
Thread.sleep(delay);
} catch(InterruptedException e) {
e.printStackTrace();
}
}
State finalState = this.currentObservation();
this.lastReward = this.rewardFunction.reward(startState, a, finalState);
EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
return eo;
}
开发者ID:h2r,项目名称:burlapcraft,代码行数:23,代码来源:MinecraftEnvironment.java
示例4: executeAction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action ga) {
State prevState = this.currentState;
synchronized(this.nextAction){
this.nextAction.val = ga;
this.nextAction.notifyAll();
}
synchronized(this.nextState){
while(this.nextState.val == null){
try{
nextState.wait();
} catch(InterruptedException ex){
ex.printStackTrace();
}
}
this.nextState.val = null;
}
EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, this.currentState, this.lastReward, this.curStateIsTerminal);
return eo;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:26,代码来源:LearningAgentToSGAgentInterface.java
示例5: collectDataFrom
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
if(intoDataset == null){
intoDataset = new SARSData();
}
State curState = s;
int nsteps = 0;
boolean terminated = model.terminal(s);
while(!terminated && nsteps < maxSteps){
List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
EnvironmentOutcome eo = model.sample(curState, ga);
intoDataset.add(curState, ga, eo.r, eo.op);
curState = eo.op;
terminated = eo.terminated;
nsteps++;
}
return intoDataset;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SARSCollector.java
示例6: computeF
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public double computeF(PrioritizedSearchNode parentNode, Action generatingAction, HashableState successorState, EnvironmentOutcome eo) {
double cumR = 0.;
int d = 0;
if(parentNode != null){
double pCumR = cumulatedRewardMap.get(parentNode.s);
cumR = pCumR + eo.r;
int pD = depthMap.get(parentNode.s);
if(!(generatingAction instanceof Option)){
d = pD + 1;
}
else{
d = pD + ((EnvironmentOptionOutcome)eo).numSteps();
}
}
double H = heuristic.h(successorState.s());
lastComputedCumR = cumR;
lastComputedDepth = d;
double weightedE = this.epsilon * this.epsilonWeight(d);
double F = cumR + ((1. + weightedE)*H);
return F;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:25,代码来源:DynamicWeightedAStar.java
示例7: transitions
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<TransitionProb> transitions(State s, Action a) {
if(!(this.stateModel instanceof FullStateModel)){
throw new RuntimeException("Factored Model cannot enumerate transition distribution, because the state model does not implement FullStateModel");
}
List<StateTransitionProb> stps = ((FullStateModel)this.stateModel).stateTransitions(s, a);
List<TransitionProb> tps = new ArrayList<TransitionProb>(stps.size());
for(StateTransitionProb stp : stps){
double r = this.rf.reward(s, a, stp.s);
boolean t = this.tf.isTerminal(stp.s);
TransitionProb tp = new TransitionProb(stp.p, new EnvironmentOutcome(s, a, stp.s, r, t));
tps.add(tp);
}
return tps;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:19,代码来源:FactoredModel.java
示例8: actUntilTerminalOrMaxSteps
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
* Causes the agent to act for some fixed number of steps. The agent's belief is automatically
* updated by this method using the specified {@link BeliefUpdate}.
* The agent's action selection for the current belief state is defined by
* the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
* sequence is saved and {@link Episode} object and returned.
* @param maxSteps the maximum number of steps to take in the environment
* @return and {@link Episode} that recorded the observation, action, and reward sequence.
*/
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
Episode ea = new Episode();
ea.initializeInState(this.environment.currentObservation());
int c = 0;
while(!this.environment.isInTerminalState() && c < maxSteps){
Action ga = this.getAction(this.curBelief);
EnvironmentOutcome eo = environment.executeAction(ga);
ea.transition(ga, eo.op, eo.r);
//update our belief
this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
c++;
}
return ea;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:28,代码来源:BeliefAgent.java
示例9: sampleExperiences
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<EnvironmentOutcome> sampleExperiences(int n) {
List<FrameExperience> samples = sampleFrameExperiences(n);
List<EnvironmentOutcome> sampleOutcomes = new ArrayList<>(samples.size());
for (FrameExperience exp : samples) {
sampleOutcomes.add(new EnvironmentOutcome(exp.o, actionSet.get(exp.a), exp.op, exp.r, exp.terminated));
}
return sampleOutcomes;
}
开发者ID:h2r,项目名称:burlap_caffe,代码行数:12,代码来源:FrameExperienceMemory.java
示例10: executeAction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public EnvironmentOutcome executeAction(ALEAction a) {
// save start state
State startState = currentState;
// perform action
boolean closed = io.act(a.aleCode);
if (closed) {
// the FIFO stream was closed
throw new RuntimeException("ALE FIFO stream closed");
}
// Obtain the edu.brown.cs.burlap.screen matrix
Mat screen = io.getScreen();
// Get RLData
RLData rlData = io.getRLData();
// Update Environment State
lastReward = rlData.reward;
isTerminal = rlData.isTerminal;
currentState = new ALEState(screen);
if (terminateOnEndLife) {
if (rlData.isTerminal) {
isTerminal = true;
currentLives = 0;
} else if (rlData.lives != currentLives) {
isTerminal = true;
currentLives = rlData.lives;
}
} else {
isTerminal = rlData.isTerminal;
currentLives = rlData.lives;
}
return new EnvironmentOutcome(startState, a, currentState, lastReward, isTerminal);
}
开发者ID:h2r,项目名称:burlap_ale,代码行数:38,代码来源:ALEEnvironment.java
示例11: runLearningEpisode
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
//initialize our episode object with the initial state of the environment
Episode e = new Episode(env.currentObservation());
//behave until a terminal state or max steps is reached
State curState = env.currentObservation();
int steps = 0;
while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){
//select an action
Action a = this.learningPolicy.action(curState);
//take the action and observe outcome
EnvironmentOutcome eo = env.executeAction(a);
//record result
e.transition(eo);
//get the max Q value of the resulting state if it's not terminal, 0 otherwise
double maxQ = eo.terminated ? 0. : this.value(eo.op);
//update the old Q-value
QValue oldQ = this.storedQ(curState, a);
oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);
//update state pointer to next environment state observed
curState = eo.op;
steps++;
}
return e;
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:36,代码来源:QLTutorial.java
示例12: executeAction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
* Executes the specified action in this environment
*
* @param a the Action that is to be performed in this environment.
* @return the resulting observation and reward transition from applying the given GroundedAction in this environment.
*/
@Override
public EnvironmentOutcome executeAction(Action a) {
ShodanStateOil currentState = currentObservation();
if(a.actionName().equals(ACTION_OPEN))
shodan.setOpen(true);
else {
assert a.actionName().equals(ACTION_CLOSE);
shodan.setOpen(false);
}
//lspiRun the model for another 30 days
for(int day=0; day<30; day++)
state.schedule.step(state);
/*
System.out.println(a.actionName() + " " + state.getFishers().get(0).getRegulation().allowedAtSea(null,state) +
" " + state.getMap().getPorts().iterator().next().getGasPricePerLiter()
);
*/
ShodanStateOil newState = currentObservation();
return new EnvironmentOutcome(
currentState,
a,
newState,
lastReward(),
isInTerminalState()
);
}
开发者ID:CarrKnight,项目名称:POSEIDON,代码行数:41,代码来源:ShodanEnvironment.java
示例13: executeAction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
State startState = this.currentObservation();
ActionPublisher ap = this.actionPublishers.get(a.actionName());
if(ap == null){
throw new RuntimeException("AbstractRosEnvironment has no ActionPublisher available to handle action " + a.toString());
}
int delay = ap.publishAction(a);
if(delay > 0){
try {
Thread.sleep(delay);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
State finalState = this.currentObservation();
this.lastReward = this.getMostRecentRewardSignal(startState, a, finalState);
EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
if(this.isInTerminalState()){
this.handleEnterTerminalState();
}
return eo;
}
开发者ID:h2r,项目名称:burlap_rosbridge,代码行数:32,代码来源:AbstractRosEnvironment.java
示例14: sample
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome sample(State s, Action a) {
if(!(a instanceof Option)){
return model.sample(s, a);
}
Option o = (Option)a;
SimulatedEnvironment env = new SimulatedEnvironment(model, s);
return o.control(env, discount);
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:12,代码来源:BFSMarkovOptionModel.java
示例15: control
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
Random rand = RandomFactory.getMapped(0);
State initial = env.currentObservation();
State cur = initial;
Episode episode = new Episode(cur);
Episode history = new Episode(cur);
double roll;
double pT;
int nsteps = 0;
double r = 0.;
double cd = 1.;
do{
Action a = o.policy(cur, history);
EnvironmentOutcome eo = env.executeAction(a);
nsteps++;
r += cd*eo.r;
cur = eo.op;
cd *= discount;
history.transition(a, eo.op, eo.r);
AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
episode.transition(annotatedAction, eo.op, r);
pT = o.probabilityOfTermination(eo.op, history);
roll = rand.nextDouble();
}while(roll > pT && !env.isInTerminalState());
EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);
return eoo;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:38,代码来源:Option.java
示例16: executeAction
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(burlap.mdp.core.action.Action ga) {
if(this.curState == null){
this.blockUntilStateReceived();
}
if(!(ga instanceof RLGlueDomain.RLGlueActionType)){
throw new RuntimeException("RLGlueEnvironment cannot execute actions that are not instances of RLGlueDomain.RLGlueSpecification.");
}
State prevState = this.curState;
int actionId = ((RLGlueDomain.RLGlueActionType)ga).getInd();
synchronized (nextAction) {
this.nextStateReference.val = null;
this.nextAction.val = actionId;
this.nextAction.notifyAll();
}
DPrint.cl(debugCode, "Set action (" + this.nextAction.val + ")");
State toRet;
synchronized (this.nextStateReference) {
while(this.nextStateReference.val == null){
try{
DPrint.cl(debugCode, "Waiting for state from RLGlue Server...");
nextStateReference.wait();
} catch(InterruptedException ex){
ex.printStackTrace();
}
}
toRet = this.curState;
this.nextStateReference.val = null;
}
EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, toRet, this.lastReward, this.curStateIsTerminal);
return eo;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:41,代码来源:RLGlueAgent.java
示例17: FixedSizeMemory
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
* Initializes with the size of the memory and whether the most recent memory should always be included
* in the returned results from the sampling memory.
* @param size the number of experiences to store
* @param alwaysIncludeMostRecent if true, then the result of the {@link #sampleExperiences(int)}} will always include the most recent experience and is a uniform random sampling for the n-1 samples.
* If false, then it is a pure random sample with replacement.
*/
public FixedSizeMemory(int size, boolean alwaysIncludeMostRecent) {
if(size < 1){
throw new RuntimeException("FixedSizeMemory requires memory size > 0; was request size of " + size);
}
this.alwaysIncludeMostRecent = alwaysIncludeMostRecent;
this.memory = new EnvironmentOutcome[size];
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:15,代码来源:FixedSizeMemory.java
示例18: updateModel
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void updateModel(EnvironmentOutcome eo) {
HashableState sh = this.hashingFactory.hashState(eo.o);
HashableState shp = this.hashingFactory.hashState(eo.op);
if(eo.terminated){
this.terminalStates.add(shp);
}
StateActionNode san = this.getOrCreateActionNode(sh, eo.a);
san.update(eo.r, shp);
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:15,代码来源:TabularModel.java
示例19: transitions
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<TransitionProb> transitions(State s, Action a) {
List<TransitionProb> tps = sourceModel.transitions(s, a);
for(TransitionProb tp : tps){
EnvironmentOutcome eo = tp.eo;
this.modifyEO(eo);
}
return tps;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:10,代码来源:RMaxModel.java
示例20: modifyEO
import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
protected void modifyEO(EnvironmentOutcome eo){
double oldPotential = potentialFunction.potentialValue(eo.o);
double nextPotential = 0.;
if(!eo.terminated){
nextPotential = potentialFunction.potentialValue(eo.op);
}
double bonus = gamma * nextPotential - oldPotential;
eo.r = eo.r + bonus;
if(!KWIKModel.Helper.stateTransitionsModeled(this, actionsTypes, eo.o)){
eo.terminated = true;
}
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:14,代码来源:RMaxModel.java
注:本文中的burlap.mdp.singleagent.environment.EnvironmentOutcome类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论