本文整理汇总了Java中burlap.mdp.core.action.Action类的典型用法代码示例。如果您正苦于以下问题:Java Action类的具体用法?Java Action怎么用?Java Action使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Action类属于burlap.mdp.core.action包,在下文中一共展示了Action类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: sample
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public DecisionState sample(State state, Action action) {
List<StateTransitionProb> reachableStates;
try {
reachableStates = stateTransitions(state, action);
} catch (NullPointerException e) {
reachableStates = Collections.singletonList(new StateTransitionProb(deadEnd, 1.0));
}
Collections.shuffle(reachableStates);
//sample random roll
double randomThreshold = Math.random(), sumOfProbability = 0;
for (StateTransitionProb reachableState : reachableStates) {
sumOfProbability = sumOfProbability + reachableState.p;
if (randomThreshold <= sumOfProbability) {
return ((DecisionState) reachableState.s).copy();
}
}
throw new IndexOutOfBoundsException("No state found!");
}
开发者ID:honzaMaly,项目名称:kusanagi,代码行数:21,代码来源:DecisionModel.java
示例2: qValue
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public double qValue(State s, Action a) {
if(this.model.terminal(s)){
return 0.;
}
//what are the possible outcomes?
List<TransitionProb> tps = ((FullModel)this.model).transitions(s, a);
//aggregate over each possible outcome
double q = 0.;
for(TransitionProb tp : tps){
//what is reward for this transition?
double r = tp.eo.r;
//what is the value for the next state?
double vp = this.valueFunction.get(this.hashingFactory.hashState(tp.eo.op));
//add contribution weighted by transition probability and
//discounting the next state
q += tp.p * (r + this.gamma * vp);
}
return q;
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:27,代码来源:VITutorial.java
示例3: qValues
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<QValue> qValues(State s) {
//first get hashed state
HashableState sh = this.hashingFactory.hashState(s);
//check if we already have stored values
List<QValue> qs = this.qValues.get(sh);
//create and add initialized Q-values if we don't have them stored for this state
if(qs == null){
List<Action> actions = this.applicableActions(s);
qs = new ArrayList<QValue>(actions.size());
//create a Q-value for each action
for(Action a : actions){
//add q with initialized value
qs.add(new QValue(s, a, this.qinit.qValue(s, a)));
}
//store this for later
this.qValues.put(sh, qs);
}
return qs;
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:24,代码来源:QLTutorial.java
示例4: actionDir
import burlap.mdp.core.action.Action; //导入依赖的package包/类
protected int actionDir(Action a){
int adir = -1;
if(a.actionName().equals(ACTION_NORTH)){
adir = 0;
}
else if(a.actionName().equals(ACTION_SOUTH)){
adir = 1;
}
else if(a.actionName().equals(ACTION_EAST)){
adir = 2;
}
else if(a.actionName().equals(ACTION_WEST)){
adir = 3;
}
return adir;
}
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:17,代码来源:ExampleOOGridWorld.java
示例5: executeAction
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
State startState = this.currentObservation();
ActionController ac = this.actionControllerMap.get(a.actionName());
int delay = ac.executeAction(a);
if (delay > 0) {
try {
Thread.sleep(delay);
} catch(InterruptedException e) {
e.printStackTrace();
}
}
State finalState = this.currentObservation();
this.lastReward = this.rewardFunction.reward(startState, a, finalState);
EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
return eo;
}
开发者ID:h2r,项目名称:burlapcraft,代码行数:23,代码来源:MinecraftEnvironment.java
示例6: allApplicableActions
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<Action> allApplicableActions(State s) {
BCAgent a = (BCAgent)((GenericOOState)s).object(CLASS_AGENT);
List<ObjectInstance> blocks = ((OOState)s).objectsOfClass(HelperNameSpace.CLASS_BLOCK);
for (ObjectInstance block : blocks) {
if (HelperActions.blockIsOneOf(Block.getBlockById(((BCBlock)block).type), HelperActions.dangerBlocks)) {
int dangerX = ((BCBlock)block).x;
int dangerY = ((BCBlock)block).y;
int dangerZ = ((BCBlock)block).z;
if ((a.x == dangerX) && (a.y - 1 == dangerY) && (a.z == dangerZ) || (a.x == dangerX) && (a.y == dangerY) && (a.z == dangerZ)) {
return new ArrayList<Action>();
}
}
}
//otherwise we pass check
return Arrays.<Action>asList(new SimpleAction(typeName));
}
开发者ID:h2r,项目名称:burlapcraft,代码行数:20,代码来源:MinecraftActionType.java
示例7: publishAction
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public int publishAction(Action a) {
Timer timer = new Timer();
PublishTask pt = new PublishTask();
timer.schedule(pt, 0, this.period);
if(this.synchronous){
synchronized(pt) {
while(!pt.finished()) {
try {
pt.wait();
} catch(InterruptedException e) {
e.printStackTrace();
}
}
}
}
return this.delayTime;
}
开发者ID:h2r,项目名称:burlap_rosbridge,代码行数:20,代码来源:RepeatingActionPublisher.java
示例8: action
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public Action action(State s) {
if(this.dp == null){
throw new RuntimeException("The valueFunction used by this Policy is not defined; therefore, the policy is undefined.");
}
if(this.dp.hasCachedPlanForState(s)){
Action ga = this.dp.querySelectedActionForState(s);
//the surrounding if condition will probably be sufficient for null cases, but doing double check just to make sure.
if(ga == null){
throw new PolicyUndefinedException();
}
return ga;
}
throw new PolicyUndefinedException();
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:18,代码来源:SDPlannerPolicy.java
示例9: collectDataFrom
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
if(intoDataset == null){
intoDataset = new SARSData();
}
State curState = s;
int nsteps = 0;
boolean terminated = model.terminal(s);
while(!terminated && nsteps < maxSteps){
List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
EnvironmentOutcome eo = model.sample(curState, ga);
intoDataset.add(curState, ga, eo.r, eo.op);
curState = eo.op;
terminated = eo.terminated;
nsteps++;
}
return intoDataset;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SARSCollector.java
示例10: estimateQs
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* Estimates and returns the Q-values for this node. Q-values and used state samples are forgotten after this call completes.
* @return a {@link List} of the estiamted Q-values for each action.
*/
public List<QValue> estimateQs(){
List<Action> gas = SparseSampling.this.applicableActions(this.sh.s());
List<QValue> qs = new ArrayList<QValue>(gas.size());
for(Action ga : gas){
if(this.height <= 0){
qs.add(new QValue(this.sh.s(), ga, SparseSampling.this.vinit.value(this.sh.s())));
}
else{
double q;
if(!SparseSampling.this.computeExactValueFunction){
q = this.sampledQEstimate(ga);
}
else{
q = this.exactQValue(ga);
}
qs.add(new QValue(this.sh.s(), ga, q));
}
}
return qs;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SparseSampling.java
示例11: allApplicableActions
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<Action> allApplicableActions(State s) {
List <Action> res = new ArrayList<Action>();
if(!(s instanceof OOState)){
throw new RuntimeException("Cannot get object-parameterized grounded actions in state, because " + s.getClass().getName() + " does not implement OOState");
}
//otherwise need to do parameter binding
List <List <String>> bindings = OOStateUtilities.getPossibleBindingsGivenParamOrderGroups((OOState)s, this.getParameterClasses(), this.getParameterOrderGroups());
for(List <String> params : bindings){
String [] aprams = params.toArray(new String[params.size()]);
ObjectParameterizedAction ga = this.generateAction(aprams);
if(this.applicableInState(s, ga)) {
res.add(ga);
}
}
return res;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:25,代码来源:ObjectParameterizedActionType.java
示例12: action
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public Action action(State s) {
synchronized(this){
while(this.nextAction == null){
try {
this.wait();
} catch(InterruptedException e) {
e.printStackTrace();
}
}
}
Action toTake = this.nextAction;
this.nextAction = null;
return toTake;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:17,代码来源:ManualAgentsCommands.java
示例13: UCTStateNode
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* Initializes the UCT state node.
* @param s the state that this node wraps
* @param d the depth of the node
* @param actionTypes the possible OO-MDP actions that can be taken
* @param constructor a {@link UCTActionNode} factory that can be used to create ActionNodes for each of the actions.
*/
public UCTStateNode(HashableState s, int d, List <ActionType> actionTypes, UCTActionConstructor constructor){
state = s;
depth = d;
n = 0;
actionNodes = new ArrayList<UCTActionNode>();
List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s.s());
for(Action a : actions){
UCTActionNode an = constructor.generate(a);
actionNodes.add(an);
}
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:24,代码来源:UCTStateNode.java
示例14: getAgentSynchronizedActionSelection
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* This method returns the action for a single agent by a synchronized sampling of this joint policy,
* which enables multiple agents to query this policy object and act according to the same selected joint
* actions from it. This is useful when decisions are made from a "referee" who selects the joint action
* that dictates the behavior of each agent. The synchronization is implemented by selecting a joint action.
* Each time an agent queries for their action, it is drawn from the previously sampled joint action.
* A new joint action is only selected after each agent defined in this objects {@link #agentsInJointPolicy} member
* has queried this method for their action or until an action for a different state is queried (that is, *either* condition
* will cause the joint action to be resampled).
* @param agentNum the agent whose action in this joint policy is being queried
* @param s the state in which the action is to be selected.
* @return the single agent action to be taken according to the synchonrized joint action that was selected.
*/
public Action getAgentSynchronizedActionSelection(int agentNum, State s){
if(this.lastSyncedState == null || !this.lastSyncedState.equals(s)){
//then reset syncrhonization
this.lastSyncedState = s;
this.agentsSynchronizedSoFar.clear();
this.lastSynchronizedJointAction = (JointAction)this.action(s);
}
Action a = this.lastSynchronizedJointAction.action(agentNum);
this.agentsSynchronizedSoFar.add(agentNum);
if(this.agentsSynchronizedSoFar.size() == this.agentsInJointPolicy.size()){
//then we're finished getting the actions for all agents and enable the next query
this.lastSyncedState = null;
this.agentsSynchronizedSoFar.clear();
}
return a;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:34,代码来源:JointPolicy.java
示例15: policyDistribution
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<ActionProb> policyDistribution(State s) {
if(!(this.sourcePolicy instanceof EnumerablePolicy)){
throw new RuntimeException("Cannot return policy distribution because source policy does not implement EnumerablePolicy");
}
List<Action> unmodeled = KWIKModel.Helper.unmodeledActions(model, allActionTypes, s);
if(!unmodeled.isEmpty()){
List<ActionProb> aps = new ArrayList<ActionProb>(unmodeled.size());
double p = 1./(double)unmodeled.size();
for(Action ga : unmodeled){
aps.add(new ActionProb(ga, p));
}
return aps;
}
return ((EnumerablePolicy)this.sourcePolicy).policyDistribution(s);
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:21,代码来源:UnmodeledFavoredPolicy.java
示例16: sample
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public State sample(State s, Action a) {
s = s.copy();
double [] directionProbs = transitionDynamics[actionInd(a.actionName())];
double roll = rand.nextDouble();
double curSum = 0.;
int dir = 0;
for(int i = 0; i < directionProbs.length; i++){
curSum += directionProbs[i];
if(roll < curSum){
dir = i;
break;
}
}
int [] dcomps = movementDirectionFromIndex(dir);
return move(s, dcomps[0], dcomps[1]);
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:22,代码来源:GridWorldDomain.java
示例17: reward
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public double reward(State s, Action a, State sprime) {
double [] features;
if(this.rfFeaturesAreForNextState){
features = this.rfFvGen.features(sprime);
}
else{
features = this.rfFvGen.features(s);
}
double sum = 0.;
for(int i = 0; i < features.length; i++){
sum += features[i] * this.parameters[i];
}
return sum;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:18,代码来源:LinearDiffRFVInit.java
示例18: getNode
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* Returns the policy node that stores the action preferences for state.
* @param sh The (hashed) state of the {@link BoltzmannActor.PolicyNode} to return
* @return the {@link BoltzmannActor.PolicyNode} object for the given input state.
*/
protected PolicyNode getNode(HashableState sh){
//List <GroundedAction> gas = sh.s.getAllGroundedActionsFor(this.actions);
List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, sh.s());
PolicyNode node = this.preferences.get(sh);
if(node == null){
node = new PolicyNode(sh);
for(Action ga : gas){
node.addPreference(new ActionPreference(ga, 0.0));
}
this.preferences.put(sh, node);
}
return node;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:23,代码来源:BoltzmannActor.java
示例19: sample
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public State sample(State s, Action a) {
s = s.copy();
double baseForce = 0.;
if(a.actionName().equals(CartPoleDomain.ACTION_LEFT)){
baseForce = -physParams.actionForce;
}
else if(a.actionName().equals(CartPoleDomain.ACTION_RIGHT)){
baseForce = physParams.actionForce;
}
double roll = RandomFactory.getMapped(0).nextDouble() * (2 * physParams.actionNoise) - physParams.actionNoise;
double force = baseForce + roll;
return updateState(s, force);
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:20,代码来源:IPModel.java
示例20: evaluate
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public double evaluate(State s, Action a) {
List<StateFeature> features = this.stateActionFeatures.features(s, a);
double val = 0.;
for(StateFeature sf : features){
double prod = sf.value * this.getWeight(sf.id);
val += prod;
}
this.currentValue = val;
this.currentGradient = null;
this.currentFeatures = features;
this.lastState = s;
this.lastAction = a;
return val;
}
开发者ID:jmacglashan,项目名称:burlap,代码行数:17,代码来源:LinearVFA.java
注:本文中的burlap.mdp.core.action.Action类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论