本文整理汇总了Java中burlap.mdp.singleagent.SADomain类的典型用法代码示例。如果您正苦于以下问题:Java SADomain类的具体用法?Java SADomain怎么用?Java SADomain使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
SADomain类属于burlap.mdp.singleagent包,在下文中一共展示了SADomain类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: DeepQLearner
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public DeepQLearner(SADomain domain, double gamma, int replayStartSize, Policy policy, DQN vfa, StateMapping stateMapping) {
super(domain, gamma, vfa, stateMapping);
if (replayStartSize > 0) {
System.out.println(String.format("Starting with random policy for %d frames", replayStartSize));
this.replayStartSize = replayStartSize;
this.trainingPolicy = policy;
setLearningPolicy(new RandomPolicy(domain));
runningRandomPolicy = true;
} else {
setLearningPolicy(policy);
runningRandomPolicy = false;
}
}
开发者ID:h2r,项目名称:burlap_caffe,代码行数:17,代码来源:DeepQLearner.java
示例2: generateDomain
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {
SADomain domain = new SADomain();
domain.addActionTypes(
new UniversalActionType(ACTION_NORTH),
new UniversalActionType(ACTION_SOUTH),
new UniversalActionType(ACTION_EAST),
new UniversalActionType(ACTION_WEST));
GridWorldStateModel smodel = new GridWorldStateModel();
RewardFunction rf = new ExampleRF();
TerminalFunction tf = new ExampleTF();
domain.setModel(new FactoredModel(smodel, rf, tf));
return domain;
}
开发者ID:honzaMaly,项目名称:kusanagi,代码行数:21,代码来源:ExampleGridWorld.java
示例3: generateDomain
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {
SADomain domain = new SADomain();
domain.addActionTypes(
new UniversalActionType(NextActionEnumerations.YES.name()),
new UniversalActionType(NextActionEnumerations.NO.name()));
//unknown reward
RewardFunction rf = (state, action, state1) -> defaultReward;
//no terminal state
TerminalFunction tf = state -> false;
domain.setModel(new FactoredModel(model, rf, tf));
return domain;
}
开发者ID:honzaMaly,项目名称:kusanagi,代码行数:18,代码来源:DecisionDomainGenerator.java
示例4: IPSS
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void IPSS(){
InvertedPendulum ip = new InvertedPendulum();
ip.physParams.actionNoise = 0.;
RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
ip.setRf(rf);
ip.setTf(tf);
SADomain domain = ip.generateDomain();
State initialState = new InvertedPendulumState();
SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
ss.setForgetPreviousPlanResults(true);
ss.toggleDebugPrinting(false);
Policy p = new GreedyQPolicy(ss);
Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
System.out.println("Num steps: " + e.maxTimeStep());
Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:24,代码来源:ContinuousDomainTutorial.java
示例5: main
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String[] args) {
GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
gw.setMapToFourRooms(); //four rooms layout
gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
SADomain domain = gw.generateDomain(); //generate the grid world domain
//setup initial state
State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
//create visualizer and explorer
Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
VisualExplorer exp = new VisualExplorer(domain, v, s);
//set control keys to use w-s-a-d
exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");
exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");
exp.initGUI();
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:24,代码来源:HelloGridWorld.java
示例6: generateDomain
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {
SADomain domain = new SADomain();
domain.addActionTypes(
new UniversalActionType(ACTION_NORTH),
new UniversalActionType(ACTION_SOUTH),
new UniversalActionType(ACTION_EAST),
new UniversalActionType(ACTION_WEST));
GridWorldStateModel smodel = new GridWorldStateModel();
RewardFunction rf = new ExampleRF(this.goalx, this.goaly);
TerminalFunction tf = new ExampleTF(this.goalx, this.goaly);
domain.setModel(new FactoredModel(smodel, rf, tf));
return domain;
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:21,代码来源:ExampleGridWorld.java
示例7: main
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String [] args){
ExampleGridWorld gen = new ExampleGridWorld();
gen.setGoalLocation(10, 10);
SADomain domain = gen.generateDomain();
State initialState = new EXGridState(0, 0);
SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);
Visualizer v = gen.getVisualizer();
VisualExplorer exp = new VisualExplorer(domain, env, v);
exp.addKeyAction("w", ACTION_NORTH, "");
exp.addKeyAction("s", ACTION_SOUTH, "");
exp.addKeyAction("d", ACTION_EAST, "");
exp.addKeyAction("a", ACTION_WEST, "");
exp.initGUI();
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:21,代码来源:ExampleGridWorld.java
示例8: processCommand
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public void processCommand(ICommandSender p_71515_1_, String[] p_71515_2_) {
MinecraftDomainGenerator mdg = new MinecraftDomainGenerator();
SADomain domain = mdg.generateDomain();
State in = MinecraftStateGeneratorHelper.getCurrentState(BurlapCraft.currentDungeon);
List<State> reachable = StateReachability.getReachableStates(in, domain, new SimpleHashableStateFactory());
for(State s : reachable){
OOState os = (OOState)s;
BCAgent a = (BCAgent)os.object(CLASS_AGENT);
System.out.println(a.x + ", " + a.y + ", " + a.z + ", " + a.rdir + ", "+ a.vdir + ", " + a.selected);
}
System.out.println(reachable.size());
}
开发者ID:h2r,项目名称:burlapcraft,代码行数:17,代码来源:CommandReachable.java
示例9: testSimpleHashFactoryIdentifierDependent
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Test
public void testSimpleHashFactoryIdentifierDependent() {
SADomain domain = (SADomain)this.gridWorldTest.getDomain();
State startState = this.gridWorldTest.generateState();
HashableStateFactory factory = new SimpleHashableStateFactory(false);
Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory);
assert(hashedStates.size() == 104);
Set<HashableState> renamedStates = new HashSet<HashableState>();
for (HashableState state : hashedStates) {
State source = state.s();
State renamed = this.renameObjects((GridWorldState)source.copy());
HashableState renamedHashed = factory.hashState(renamed);
renamedStates.add(renamedHashed);
}
hashedStates.addAll(renamedStates);
assert(hashedStates.size() == 208);
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:19,代码来源:TestHashing.java
示例10: DifferentiableSparseSampling
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
/**
* Initializes. The model of this planner will automatically be set to a {@link CustomRewardModel} using the provided reward function.
* @param domain the problem domain
* @param rf the differentiable reward function
* @param gamma the discount factor
* @param hashingFactory the hashing factory used to compare state equality
* @param h the planning horizon
* @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics.
* @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer.
*/
public DifferentiableSparseSampling(SADomain domain, DifferentiableRF rf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){
this.solverInit(domain, gamma, hashingFactory);
this.h = h;
this.c = c;
this.rf = rf;
this.boltzBeta = boltzBeta;
this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>();
this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>();
this.rfDim = rf.numParameters();
this.vinit = new VanillaDiffVinit(new ConstantValueFunction(), rf);
this.model = new CustomRewardModel(domain.getModel(), rf);
this.operator = new DifferentiableSoftmaxOperator(boltzBeta);
this.debugCode = 6368290;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:29,代码来源:DifferentiableSparseSampling.java
示例11: main
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
/**
* Main function to test the domain.
* Note: The termination conditions are not checked when testing the domain this way, which means it is
* impossible to win or die and might trigger bugs. To enable them, uncomment the code in the "update" function.
*
* @param args command line args
*/
public static void main(String[] args) {
FrostbiteDomain fd = new FrostbiteDomain();
SADomain d = fd.generateDomain();
State s = new FrostbiteState();
Visualizer vis = FrostbiteVisualizer.getVisualizer();
VisualExplorer exp = new VisualExplorer(d, vis, s);
exp.addKeyAction("a", ACTION_WEST, "");
exp.addKeyAction("d", ACTION_EAST, "");
exp.addKeyAction("w", ACTION_NORTH, "");
exp.addKeyAction("s", ACTION_SOUTH, "");
exp.addKeyAction("x", ACTION_IDLE, "");
exp.initGUI();
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:24,代码来源:FrostbiteDomain.java
示例12: ActionSet
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public ActionSet(SADomain domain) {
List<Action> actionList = ActionUtils.allApplicableActionsForTypes(domain.getActionTypes(), null);
size = actionList.size();
actions = new Action[size];
actionList.toArray(actions);
initActionMap();
}
开发者ID:h2r,项目名称:burlap_caffe,代码行数:9,代码来源:ActionSet.java
示例13: generateDomain
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {
SADomain domain = new SADomain();
// add in NullActions for Domain
for (String actionName : actionNames) {
domain.addActionType(new UniversalActionType(new ALEAction(actionName)));
}
return domain;
}
开发者ID:h2r,项目名称:burlap_ale,代码行数:12,代码来源:ALEDomainGenerator.java
示例14: ALEVisualExplorer
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public ALEVisualExplorer(SADomain domain, Environment env, Visualizer painter, boolean human) {
super(domain, env, painter,
(int)(ALEDomainConstants.ALEScreenWidth * widthRatio),
(int)(ALEDomainConstants.ALEScreenHeight * heightRatio));
if (human) {
enableHumanInput();
}
}
开发者ID:h2r,项目名称:burlap_ale,代码行数:10,代码来源:ALEVisualExplorer.java
示例15: learnPolicy
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public Policy learnPolicy(SADomain domain, List<Episode> episodes, int numberOfStates, int numberOfSamplesToUse) {
//create reward function features to use
LocationFeatures features = new LocationFeatures(numberOfStates);
//create a reward function that is linear with respect to those features and has small random
//parameter values to start
LinearStateDifferentiableRF rf = new LinearStateDifferentiableRF(features, numberOfStates);
for (int i = 0; i < rf.numParameters() - 1; i++) {
rf.setParameter(i, RandomFactory.getMapped(0).nextDouble() * 0.2 - 0.1);
}
//set last "dummy state" to large negative number as we do not want to go there
rf.setParameter(rf.numParameters() - 1, MLIRLWithGuard.minReward);
//use either DifferentiableVI or DifferentiableSparseSampling for planning. The latter enables receding horizon IRL,
//but you will probably want to use a fairly large horizon for this kind of reward function.
HashableStateFactory hashingFactory = new SimpleHashableStateFactory();
// DifferentiableVI dplanner = new DifferentiableVI(domain, rf, 0.99, beta, hashingFactory, 0.01, 100);
DifferentiableSparseSampling dplanner = new DifferentiableSparseSampling(domain, rf, 0.99, hashingFactory, (int) Math.sqrt(numberOfStates), numberOfSamplesToUse, beta);
dplanner.toggleDebugPrinting(doNotPrintDebug);
//define the IRL problem
MLIRLRequest request = new MLIRLRequest(domain, dplanner, episodes, rf);
request.setBoltzmannBeta(beta);
//run MLIRL on it
MLIRL irl = new MLIRLWithGuard(request, 0.1, 0.1, steps);
irl.performIRL();
return new GreedyQPolicy((QProvider) request.getPlanner());
}
开发者ID:honzaMaly,项目名称:kusanagi,代码行数:34,代码来源:PolicyLearningServiceImpl.java
示例16: VITutorial
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public VITutorial(SADomain domain, double gamma,
HashableStateFactory hashingFactory, ValueFunction vinit, int numIterations){
this.solverInit(domain, gamma, hashingFactory);
this.vinit = vinit;
this.numIterations = numIterations;
this.valueFunction = new HashMap<HashableState, Double>();
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:8,代码来源:VITutorial.java
示例17: main
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String [] args){
GridWorldDomain gwd = new GridWorldDomain(11, 11);
gwd.setTf(new GridWorldTerminalFunction(10, 10));
gwd.setMapToFourRooms();
//only go in intended directon 80% of the time
gwd.setProbSucceedTransitionDynamics(0.8);
SADomain domain = gwd.generateDomain();
//get initial state with agent in 0,0
State s = new GridWorldState(new GridAgent(0, 0));
//setup vi with 0.99 discount factor, a value
//function initialization that initializes all states to value 0, and which will
//run for 30 iterations over the state space
VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
new ConstantValueFunction(0.0), 30);
//run planning from our initial state
Policy p = vi.planFromState(s);
//evaluate the policy with one roll out visualize the trajectory
Episode ea = PolicyUtils.rollout(p, s, domain.getModel());
Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:31,代码来源:VITutorial.java
示例18: main
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String[] args) {
MountainCar mcGen = new MountainCar();
SADomain domain = mcGen.generateDomain();
StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);
NormalizedVariableFeatures features = new NormalizedVariableFeatures()
.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
FourierBasis fb = new FourierBasis(features, 4);
LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain,
new MCState(mcGen.physParams.valleyPos(), 0));
EnvironmentServer envServ = new EnvironmentServer(env, vob);
for(int i = 0; i < 100; i++){
PolicyUtils.rollout(p, envServ);
envServ.resetEnvironment();
}
System.out.println("Finished");
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:34,代码来源:MCVideo.java
示例19: QLTutorial
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public QLTutorial(SADomain domain, double gamma, HashableStateFactory hashingFactory,
QFunction qinit, double learningRate, double epsilon){
this.solverInit(domain, gamma, hashingFactory);
this.qinit = qinit;
this.learningRate = learningRate;
this.qValues = new HashMap<HashableState, List<QValue>>();
this.learningPolicy = new EpsilonGreedy(this, epsilon);
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:11,代码来源:QLTutorial.java
示例20: main
import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String[] args) {
GridWorldDomain gwd = new GridWorldDomain(11, 11);
gwd.setMapToFourRooms();
gwd.setProbSucceedTransitionDynamics(0.8);
gwd.setTf(new GridWorldTerminalFunction(10, 10));
SADomain domain = gwd.generateDomain();
//get initial state with agent in 0,0
State s = new GridWorldState(new GridAgent(0, 0));
//create environment
SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
//create Q-learning
QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
new ConstantValueFunction(), 0.1, 0.1);
//run Q-learning and store results in a list
List<Episode> episodes = new ArrayList<Episode>(1000);
for(int i = 0; i < 1000; i++){
episodes.add(agent.runLearningEpisode(env));
env.resetEnvironment();
}
Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
new EpisodeSequenceVisualizer(v, domain, episodes);
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:31,代码来源:QLTutorial.java
注:本文中的burlap.mdp.singleagent.SADomain类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论