Java GridWorldDomain类代码示例

本文整理汇总了Java中burlap.domain.singleagent.gridworld.GridWorldDomain的典型用法代码示例。如果您正苦于以下问题:Java GridWorldDomain类的具体用法?Java GridWorldDomain怎么用?Java GridWorldDomain使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


示例1: GridWorldDQN

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public GridWorldDQN(String solverFile, double gamma) {

        //create the domain
        gwdg = new GridWorldDomain(11, 11);
        rf = new UniformCostRF();
        tf = new SinglePFTF(PropositionalFunction.findPF(gwdg.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
        domain = gwdg.generateDomain();

        goalCondition = new TFGoalCondition(tf);

        //set up the initial state of the task
        initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

        //set up the state hashing system for tabular algorithms
        hashingFactory = new SimpleHashableStateFactory();

        //set up the environment for learners algorithms
        env = new SimulatedEnvironment(domain, initialState);

        dqn = new DQN(solverFile, actionSet, new NNGridStateConverter(), gamma);

示例2: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String[] args) {

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
		Domain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		State s = GridWorldDomain.getOneAgentOneLocationState(domain);
		GridWorldDomain.setAgent(s, 0, 0);
		GridWorldDomain.setLocation(s, 0, 10, 10);

		//create visualizer and explorer
		Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
		VisualExplorer exp = new VisualExplorer(domain, v, s);

		//set control keys to use w-s-a-d
		exp.addKeyAction("w", GridWorldDomain.ACTIONNORTH);
		exp.addKeyAction("s", GridWorldDomain.ACTIONSOUTH);
		exp.addKeyAction("a", GridWorldDomain.ACTIONWEST);
		exp.addKeyAction("d", GridWorldDomain.ACTIONEAST);



示例3: BasicBehavior

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		domain = gwdg.generateDomain();

		rf = new UniformCostRF();
		tf = new SinglePFTF(domain.getPropFunction(GridWorldDomain.PFATLOCATION));
		goalCondition = new TFGoalCondition(tf);

		initialState = GridWorldDomain.getOneAgentNLocationState(domain, 1);
		GridWorldDomain.setAgent(initialState, 0, 0);
		GridWorldDomain.setLocation(initialState, 0, 10, 10);

		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, rf, tf, initialState);

//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env = new EnvironmentServer(env, observer);
//		((SADomain)domain).addActionObserverForAllAction(observer);

示例4: getRandomInitialState

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
 * Returns a random initial state in a world with no location objects. The agent will not be placed in any cell that contains an obstacle,
 * but any other cell is equiprobable
 * @param gridWorldGenerator the grid world generator containing the map
 * @param d the domain object to which the state will be associated
 * @return a state with the agent in a random free position.
public static State getRandomInitialState(GridWorldDomain gridWorldGenerator, Domain d) {
	Random r = RandomFactory.getMapped(0);
	State s = new MutableState();
	int [][] map = gridWorldGenerator.getMap();
	int rx = 0;
	int ry = 0;
		rx = r.nextInt(map.length);
		ry = r.nextInt(map[0].length);
	}while(map[rx][ry] == 1);
	ObjectInstance agent = new MutableObjectInstance(d.getObjectClass(CLASSAGENT), CLASSAGENT+0);
	agent.setValue(ATTX, rx);
	agent.setValue(ATTY, ry);
	return s;

示例5: BasicBehavior

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public BasicBehavior(){
		gwdg = new GridWorldDomain(11, 11);
		tf = new GridWorldTerminalFunction(10, 10);
		goalCondition = new TFGoalCondition(tf);
		domain = gwdg.generateDomain();

		initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));
		hashingFactory = new SimpleHashableStateFactory();

		env = new SimulatedEnvironment(domain, initialState);

//		VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap()));
//		observer.initGUI();
//		env.addObservers(observer);

示例6: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String[] args) {

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
		SADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

		//create visualizer and explorer
		Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
		VisualExplorer exp = new VisualExplorer(domain, v, s);

		//set control keys to use w-s-a-d
		exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
		exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
		exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");
		exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");



示例7: testOnGridWorld

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public void testOnGridWorld() {
    //create the domain
    gwdg = new GridWorldDomain(new int[10][2]);
    domain = gwdg.generateDomain();

    //create the state parser
    sp = new GridWorldStateParser(domain);

    //define the task
    rf = new UniformCostRF();

    tf = new SinglePFTF(domain.getPropFunction(GridWorldDomain.PFATLOCATION));
    goalCondition = new TFGoalCondition(tf);

    //set up the initial state of the tasks
    initialState = gwdg.getOneAgentOneLocationState(domain);
    gwdg.setAgent(initialState, 0, 0);
    gwdg.setLocation(initialState, 0, 9, 1);

    //set up the state hashing system
    hashingFactory = new FeatureStateHashFactory();

示例8: vectorizeState

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public void vectorizeState(State state, FloatPointer input) {
    GridWorldState gwState = (GridWorldState) state;

    int width = gwdg.getWidth();


    ObjectInstance agent = gwState.object(GridWorldDomain.CLASS_AGENT);
    int x = (Integer)agent.get(GridWorldDomain.VAR_X);
    int y = (Integer)agent.get(GridWorldDomain.VAR_Y);

    input.put((long)(y*width + x), 1);

示例9: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);

		//only go in intended directon 80% of the time

		Domain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = GridWorldDomain.getOneAgentNoLocationState(domain);
		GridWorldDomain.setAgent(s, 0, 0);

		//all transitions return -1
		RewardFunction rf = new UniformCostRF();

		//terminate in top right corner
		TerminalFunction tf = new GridWorldTerminalFunction(10, 10);

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, rf, tf, 0.99, new SimpleHashableStateFactory(),
				new ValueFunctionInitialization.ConstantValueFunctionInitialization(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		EpisodeAnalysis ea = p.evaluateBehavior(s, rf, tf);

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));


示例10: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);

		Domain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = GridWorldDomain.getOneAgentNoLocationState(domain);
		GridWorldDomain.setAgent(s, 0, 0);

		//all transitions return -1
		RewardFunction rf = new UniformCostRF();

		//terminate in top right corner
		TerminalFunction tf = new GridWorldTerminalFunction(10, 10);

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain,rf, tf, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ValueFunctionInitialization.ConstantValueFunctionInitialization(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<EpisodeAnalysis> episodes = new ArrayList<EpisodeAnalysis>(1000);
		for(int i = 0; i < 1000; i++){

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);


示例11: getStateRenderLayer

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
 * Returns a {@link StateRenderLayer} for the Macro-Cell GridWorld Visualizer and reward weights associated with a set of MacroCell Propositional functions.
 * @param map a map of the grid world
 * @param propFunctions the macro cell propositional functions
 * @param rewardMap the reward weights associated with the propositional function names
 * @return a {@link StateRenderLayer} 
public static StateRenderLayer getStateRenderLayer(int [][] map, MacroCellGridWorld.InMacroCellPF[] propFunctions, Map<String, Double> rewardMap){
	StateRenderLayer r = new StateRenderLayer();
	r.addStaticPainter(new GridWorldVisualizer.MapPainter(map));
	r.addStaticPainter(new MacroCellRewardWeightPainter(map, propFunctions, rewardMap));
	r.addObjectClassPainter(GridWorldDomain.CLASSAGENT, new GridWorldVisualizer.CellPainter(1, Color.gray, map));
	return r;

示例12: launchExplorer

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
 * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state
 * Use the wasd keys to move north south, east, and west, respectively. To enable recording,
 * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal.
 * Each demonstration begins after an environment reset.
 * After each demonstration that you want to keep, go back to the shell and type "rec -r"
 * If you reset the environment before you type that,
 * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your
 * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo"
 * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones
 * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts).
public void launchExplorer() {
    SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.initialState);
    VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800);
    exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
    exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
    exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");
    exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");

    //exp.enableEpisodeRecording("r", "f", "irlDemo");


示例13: IRLExample

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public IRLExample(){

		this.gwd = new GridWorldDomain(5 ,5);
		this.domain = gwd.generateDomain();
		State bs = this.basicState();
		this.sg = new LeftSideGen(5, bs);
		this.v = GridWorldVisualizer.getVisualizer(this.gwd.getMap());


示例14: launchExplorer

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
 * Creates a visual explorer that you can use to to record trajectories. Use the "`" key to reset to a random initial state
 * Use the wasd keys to move north south, east, and west, respectively. To enable recording,
 * first open up the shell and type: "rec -b" (you only need to type this one). Then you can move in the explorer as normal.
 * Each demonstration begins after an environment reset.
 * After each demonstration that you want to keep, go back to the shell and type "rec -r"
 * If you reset the environment before you type that,
 * the episode will be discarded. To temporarily view the episodes you've created, in the shell type "episode -v". To actually record your
 * episodes to file, type "rec -w path/to/save/directory base_file_name" For example "rec -w irl_demos demo"
 * A recommendation for examples is to record two demonstrations that both go to the pink cell while avoiding blue ones
 * and do so from two different start locations on the left (if you keep resetting the environment, it will change where the agent starts).
public void launchExplorer(){
	SimulatedEnvironment env = new SimulatedEnvironment(this.domain, this.sg);
	VisualExplorer exp = new VisualExplorer(this.domain, env, this.v, 800, 800);
	exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
	exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
	exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");
	exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");

	//exp.enableEpisodeRecording("r", "f", "irlDemo");


示例15: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String [] args){

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		//only go in intended directon 80% of the time

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//setup vi with 0.99 discount factor, a value
		//function initialization that initializes all states to value 0, and which will
		//run for 30 iterations over the state space
		VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(0.0), 30);

		//run planning from our initial state
		Policy p = vi.planFromState(s);

		//evaluate the policy with one roll out visualize the trajectory
		Episode ea = PolicyUtils.rollout(p, s, domain.getModel());

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));


示例16: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String[] args) {

		GridWorldDomain gwd = new GridWorldDomain(11, 11);
		gwd.setTf(new GridWorldTerminalFunction(10, 10));

		SADomain domain = gwd.generateDomain();

		//get initial state with agent in 0,0
		State s = new GridWorldState(new GridAgent(0, 0));

		//create environment
		SimulatedEnvironment env = new SimulatedEnvironment(domain, s);

		//create Q-learning
		QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(),
				new ConstantValueFunction(), 0.1, 0.1);

		//run Q-learning and store results in a list
		List<Episode> episodes = new ArrayList<Episode>(1000);
		for(int i = 0; i < 1000; i++){

		Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
		new EpisodeSequenceVisualizer(v, domain, episodes);


示例17: main

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public static void main(String[] args) {
    GridWorldDomain skbdg = new GridWorldDomain(11, 11);

    Domain d = skbdg.generateDomain();
    FeatureStateGenerator fsg = new FeatureStateGenerator(new MockGWStateToFeatureVectorGenerator(d));
    State s = fsg.fromState(skbdg.getOneAgentOneLocationState(d));

    StateHashFactory hashingFactory = new FeatureStateHashFactory();

    DeepNNModel model = new DeepNNModel(d, "res/gridworld_solver.prototxt", fsg.fromState(s).features.length, hashingFactory, 10);

    // Try it out
    List<Action> actions = d.getActions();
    List<GroundedAction> gas = new ArrayList<GroundedAction>();
    for (Action a : actions)
        gas.add(new GroundedAction(a, new String[]{GridWorldDomain.CLASSAGENT}));

    // North
    for (int i = 0; i < 100; i++) {
        System.out.println("Episode " + i);
        GroundedAction ga = gas.get((int) (Math.random() * gas.size()));
        State sp = fsg.fromState(ga.executeIn(s));
        model.updateModel(s, ga, sp, -0.1, false);
        s = sp;

示例18: MockGWStateToFeatureVectorGenerator

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public MockGWStateToFeatureVectorGenerator(Domain domain) {
    this.domain = domain;
    x = domain.getAttribute(GridWorldDomain.ATTX);
    y = domain.getAttribute(GridWorldDomain.ATTY);
    height = (int) (y.upperLim - y.lowerLim) + 1;
    width = (int) (x.upperLim - x.lowerLim) + 1;

示例19: generateFeatureVectorFrom

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
public double[] generateFeatureVectorFrom(State s) {
    double[] features = new double[height * width];
    ObjectInstance agent = s.getObjectsOfTrueClass(GridWorldDomain.CLASSAGENT).get(0);
    int pos = agent.getDiscValForAttribute(GridWorldDomain.ATTX) - (int) (x.lowerLim) +
            (agent.getDiscValForAttribute(GridWorldDomain.ATTY) - (int) (y.lowerLim)) * width;
    for (int i = 0; i < features.length; i++)
        features[i] = (i == pos) ? 1 : 0;
    return features;

示例20: fourierBasisLearner

import burlap.domain.singleagent.gridworld.GridWorldDomain; //导入依赖的package包/类
private static LearningAgentFactory fourierBasisLearner(final Domain domain, final RewardFunction rf, final TerminalFunction tf, StateHashFactory hashFactory) {
    return new LearningAgentFactory() {
        public String getAgentName() {
            return "Fourier Basis";

        public LearningAgent generateAgent() {
            FeatureDatabase fd = new FourierBasis(new ConcatenatedObjectFeatureVectorGenerator(true, GridWorldDomain.CLASSAGENT), 4);
            return new GradientDescentSarsaLam(domain, rf, tf, 0.99, new LinearVFA(fd, 1.0), 0.02, 10000, 0.5);









