• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python mdp.getPossibleActions函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mdp.getPossibleActions函数的典型用法代码示例。如果您正苦于以下问题:Python getPossibleActions函数的具体用法?Python getPossibleActions怎么用?Python getPossibleActions使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了getPossibleActions函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
        for i in range(iterations):
            currentValues = self.values.copy()
            for s in mdp.getStates():
                if not self.mdp.isTerminal(s):
                    temp, i = [float("-inf")]*len(mdp.getPossibleActions(s)), 0
                    for a in mdp.getPossibleActions(s):
                        temp[i], i = self.getQValue(s, a), i + 1
                    currentValues[s] = max(temp)
            self.values = currentValues
开发者ID:tsaieugene,项目名称:reinforcement,代码行数:29,代码来源:valueIterationAgents.py


示例2: __init__

    def __init__(self, mdp, discount=0.9, iterations=100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter()  # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
        self.optimalActionInState = collections.defaultdict(None)
        for k in range(iterations):
            lastValues = self.values.copy()
            for state in mdp.getStates():
                if self.mdp.isTerminal(state):
                    continue
                maxValue = float("-inf") if mdp.getPossibleActions(state) else 0
                for action in mdp.getPossibleActions(state):
                    theSum = 0
                    for nextState, prob in self.mdp.getTransitionStatesAndProbs(state, action):
                        R = self.mdp.getReward(state, action, nextState)
                        theSum += prob * (R + self.discount * lastValues[nextState])
                    maxValue = max(maxValue,theSum)
                self.values[state] = maxValue
开发者ID:CatcherGG,项目名称:Small-Projects,代码行数:34,代码来源:valueIterationAgents.py


示例3: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0
        helper_vector = util.Counter() # Copy of vectors to be used for batch updating 
        
        for i in range(self.iterations):
            for state in mdp.getStates():
                if mdp.isTerminal(state):
                    continue
                if mdp.getPossibleActions(state):
                    helper_vector[state] = sum([transition[1]*(mdp.getReward(state,mdp.getPossibleActions(state)[0],transition[0])+self.discount*self.values[transition[0]])
                        for transition in mdp.getTransitionStatesAndProbs(state, mdp.getPossibleActions(state)[0])] )
                for action in mdp.getPossibleActions(state):
                    helper_vector[state] = max(helper_vector[state],sum([ transition[1]*(mdp.getReward(state, action, transition[0])+self.discount*self.values[transition[0]])
                        for transition in mdp.getTransitionStatesAndProbs(state, action)] ))
            for state in helper_vector:
                self.values[state] = helper_vector[state]
开发者ID:johnwilkey,项目名称:cs188,代码行数:31,代码来源:valueIterationAgents.py


示例4: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
        self.depth = 1
        self.qTable = {}
        self.vTable = {}
        for state in mdp.getStates():
            self.vTable[state] = 0
            self.qTable[state] = {}
            for action in mdp.getPossibleActions(state):
                
                self.qTable[state][action] = 0
        
        while self.depth < self.iterations + 1:
            self.tempTable = {}
            for state in mdp.getStates():
                self.stateValue = 0
                if not mdp.isTerminal(state):
                    self.stateValue = -9999
                    for action in mdp.getPossibleActions(state):
                        self.Qtotal = 0
                        for nextState,prob in mdp.getTransitionStatesAndProbs(state,action):
                            self.reward = mdp.getReward(state, action, nextState)
                            self.Qtotal += prob * (self.reward + self.discount * self.vTable[nextState])
                            #print "###state:",state,"Next",nextState,"reward:",self.reward,"Qtotal",self.Qtotal,"Value:",self.vTable[nextState]
                        self.qTable[state][action] = self.Qtotal
                        #print self.qTable[state][action]
                        self.stateValue = max(self.stateValue,self.qTable[state][action])
                else:
                    self.tempTable[state] = 0
                self.tempTable[state] = self.stateValue
            self.vTable = self.tempTable
            self.depth += 1
            
        for state in mdp.getStates():
            self.stateValue = -9999
            for action in mdp.getPossibleActions(state):
                self.Qtotal = 0
                for nextState,prob in mdp.getTransitionStatesAndProbs(state,action):
                    self.reward = mdp.getReward(state, action, nextState)
                    self.Qtotal += prob * (self.reward + self.discount * self.vTable[nextState])
                self.qTable[state][action] = self.Qtotal
开发者ID:ChristopherKai,项目名称:ai,代码行数:59,代码来源:valueIterationAgents.py


示例5: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
       
        
        i = 0
        terminalstates = []
        
        
        while i<iterations:
            nextValues = util.Counter()
            
            for state in mdp.getStates(): 
                
                stateValues = []
                for action in mdp.getPossibleActions(state):
                    sumValue = 0
                    for item in mdp.getTransitionStatesAndProbs(state, action):
                        nextState = item[0]
                        
                        probability = item[1]
                        reward = mdp.getReward(state,action,nextState)
                        
                        #print "reward", reward
                        sumValue = sumValue + (probability * (reward + (discount * self.values[nextState])))
                        
                        #print "SUMVALUE", sumValue
               
                    stateValues.append(sumValue)
                    
                if len(mdp.getPossibleActions(state)) == 0:
                    nextValues[state] = 0
                else: 
                    nextValues[state] = max(stateValues)
                        
            i+=1
            self.values = nextValues
开发者ID:NicusVictoria,项目名称:INFOB2KI-C,代码行数:54,代码来源:valueIterationAgents.py


示例6: __init__

  def __init__(self, mdp, discount = 0.9, iterations = 100):
    """
      Your value iteration agent should take an mdp on
      construction, run the indicated number of iterations
      and then act according to the resulting policy.
    
      Some useful mdp methods you will use:
          mdp.getStates()
          mdp.getPossibleActions(state)
          mdp.getTransitionStatesAndProbs(state, action)
          mdp.getReward(state, action, nextState)
    """
    self.mdp = mdp
    self.discount = discount
    self.iterations = iterations
    self.values = util.Counter() # A Counter is a dict with default 0
    self.delta = 0
    while(self.iterations > 0):
#         self.delta = 0
        batchValues = util.Counter()
        for state in mdp.getStates():  
            maxM = -10000
                   
            if mdp.isTerminal(state):
                continue 
            for action in mdp.getPossibleActions(state):
                statesProbs = mdp.getTransitionStatesAndProbs(state, action)
                sumU = 0
                Rs = 0
                for stateProb in statesProbs:
#                     if stateProb[0] == 'TERMINAL_STATE':
#                         continue
                    sumU = sumU + self.values[stateProb[0]]*stateProb[1]
                    Rs = Rs + mdp.getReward(state, action, stateProb[0]) * stateProb[1]
#                 if sumU > maxM:
#                     maxM = sumU   
                v = Rs + sumU * discount
                if (v > maxM):
                    maxM = v
            batchValues[state] = maxM
        self.values = batchValues
        self.iterations = self.iterations - 1       
    self.policy = {}
    for state in mdp.getStates():
        if mdp.isTerminal(state):
            self.policy[state] = None
            continue
        QValues = []
        for action in mdp.getPossibleActions(state):
            QValues.append(self.getQValue(state, action))
            self.policy[state] = mdp.getPossibleActions(state)[QValues.index(max (QValues))]
开发者ID:zxcsvd,项目名称:CSSE413,代码行数:51,代码来源:valueIterationAgents.py


示例7: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
        #states = mdp.getStates()
        #values = {state: 0 for state in states}
        for i in range(iterations):
            previous = self.values.copy()
            for state in mdp.getStates():
                possibleActions = mdp.getPossibleActions(state)
                if len(possibleActions) == 0: continue
                results = []
                for action in possibleActions:
                    total = 0
                    for (nextState, prob) in mdp.getTransitionStatesAndProbs(state,action):
                        total += (prob * previous[nextState])
                    results.append(total)
                self.values[state] = mdp.getReward(state) + (discount * max(results))
开发者ID:yousefhesy,项目名称:Drop7,代码行数:34,代码来源:valueIterationAgents.py


示例8: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # A Counter is a dict with default 0
    
   "*** YOUR CODE HERE ***"
   # OUR CODE HERE
   #Note: I think we should use the util.Counter thing?
   for times in range(0, iterations):
     #values from previous iteration so we don't update over them while iterating
     prevVals = self.values.copy()
     #iterate through all states
     for state in mdp.getStates():
       #will store the action-value for the iteration
       value = util.Counter()
       for action in mdp.getPossibleActions(state):
         for transitionState, probability in mdp.getTransitionStatesAndProbs(state, action):
           #expected value, probability * reward for the state with the discount * reward
           value[action] += probability * (mdp.getReward( state, action, transitionState) + discount * prevVals[transitionState])
       #update the values to the new value from the iteration
       #the .argMax() function returns the one with the largest value
       self.values[state] = value[value.argMax()]
开发者ID:lyeechong,项目名称:ai,代码行数:34,代码来源:valueIterationAgents.py


示例9: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # A Counter is a dict with default 0
    
   "*** YOUR CODE HERE ***"
   for times in range(iterations):
       V = self.values.copy()
       for state in mdp.getStates():
           action_values = util.Counter()
           for action in mdp.getPossibleActions(state):
               for trans_state, prob in mdp.getTransitionStatesAndProbs(state, action):
                   action_values[action] += prob * (mdp.getReward( state, action, trans_state) + discount * V[trans_state])
           self.values[state] = action_values[action_values.argMax()]
开发者ID:fdarko,项目名称:reinforcement,代码行数:26,代码来源:valueIterationAgents.py


示例10: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
        states = mdp.getStates()
        for k in range(iterations):
          newValues = {}
          for state in states:
            actions = mdp.getPossibleActions(state)
            v = util.Counter()
            for action in actions:
              v[action] = self.computeQValueFromValues(state, action)
            newValues[state] = v[v.argMax()]
          self.values = newValues
开发者ID:jmanalus,项目名称:CS-188,代码行数:30,代码来源:valueIterationAgents.py


示例11: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0

        # Write value iteration code here
        "*** YOUR CODE HERE ***"
        self.vks = util.Counter()
        for i in range(0,iterations):
            self.vks = self.values.copy()
            st = mdp.getStates()
            for s in st:
              a = mdp.getPossibleActions(s)
              qvals = util.Counter()
              for action in a:
                  qvals[action] = 0
                  stp = self.mdp.getTransitionStatesAndProbs(s,action)
                  for ss, prob in stp:
                      qvals[action] = qvals[action] + prob*(self.mdp.getReward(s,action,ss) + self.discount*(self.vks[ss]))
              self.values[s] = qvals[qvals.argMax()]
开发者ID:rohanrrc,项目名称:projects,代码行数:33,代码来源:valueIterationAgents.py


示例12: __init__

    def __init__(self, mdp, discount=0.9, iterations=100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter()  # A Counter is a dict with default 0
        while self.iterations > 0:
            prev_values = self.values.copy()
            for state in mdp.getStates():
                actions = mdp.getPossibleActions(state)
                if not actions:
                    continue
                self.values[state] = max([sum([prob*(mdp.getReward(state, act, state1) + discount*prev_values[state1])
                                               for state1, prob in mdp.getTransitionStatesAndProbs(state, act)])
                                          for act in actions])
            self.iterations -= 1
开发者ID:dragoon,项目名称:edX-AI-course,代码行数:27,代码来源:valueIterationAgents.py


示例13: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Author - Shandheap Shanmuganathan
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default values as 0
        self.count = 1
        while self.count <= iterations:
          for state in mdp.getStates():
            possibleActions = mdp.getPossibleActions(state)
            if len(possibleActions) == 0:
              continue
            QValues = {}
            for action in possibleActions:
              if action == "exit":
                finalScore = self.mdp.getReward(state, action, 'TERMINAL_STATE')
                self.values[state, self.count] = finalScore
                continue
              else:
                QValues[action] = self.getQValue(state, action)
            maxAction = None
            maxQ = -sys.maxint - 1
            for key, value in QValues.iteritems():
              if value > maxQ:
                maxAction = key
                maxQ = value
            if maxQ != -sys.maxint - 1:
              self.values[state, self.count] = maxQ
          self.count += 1
开发者ID:shandheap,项目名称:Reinforcement-Learner,代码行数:35,代码来源:valueIterationAgents.py


示例14: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # A Counter is a dict with default 0
   self.qvalues = util.Counter()
   
   states = mdp.getStates()
   
   for i in range(self.iterations):
       valuesCopy = self.values.copy()
       for state in states:
           actions = mdp.getPossibleActions(state)
           q = []
           for action in actions:
               q.append(self.getQValue(state,action))
           if len(q) == 0:
               valuesCopy[state] = 0
           else: valuesCopy[state] = max(q)
       self.values = valuesCopy
开发者ID:Hannah-Jiang,项目名称:Artificial-Intellenge,代码行数:31,代码来源:valueIterationAgents.py


示例15: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   "*** YOUR CODE HERE ***"
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # value of each state; a Counter is a dict with default 0
   
   # run for desired number of iterations
   for i in xrange(iterations):
     new_values = self.values.copy()
     for s in mdp.getStates():
       if not mdp.isTerminal(s):
         # the commented code works as well
         #curr_best = float("-inf")
         #for a in mdp.getPossibleActions(s):
         #temp_value = sum([p * (mdp.getReward(s, a, s2) + discount*prev[s2]) for s2, p in mdp.getTransitionStatesAndProbs(s, a)])
         #  if temp_value > curr_best:
         #    curr_best = temp_value
         #self.values[s] = curr_best       
         new_values[s] = max([self.getQValue(s, a) for a in mdp.getPossibleActions(s)])  
     self.values = new_values
开发者ID:saagar,项目名称:cs182,代码行数:32,代码来源:valueIterationAgents.py


示例16: __init__

  def __init__(self, mdp, discount = 0.9, iterations = 100):
    """
      Your value iteration agent should take an mdp on
      construction, run the indicated number of iterations
      and then act according to the resulting policy.
    
      Some useful mdp methods you will use:
          mdp.getStates()
          mdp.getPossibleActions(state)
          mdp.getTransitionStatesAndProbs(state, action)
          mdp.getReward(state, action, nextState)
          
    """
    self.mdp = mdp
    self.discount = discount
    self.iterations = iterations
    self.values = util.Counter() # A Counter is a dict with default 0

    for i in range(iterations):
        nextValues = util.Counter()
        for state in mdp.getStates(): 
            if mdp.isTerminal(state): continue
            first = True
            for action in mdp.getPossibleActions(state):
                qValue = 0
                for (nextState, prob) in mdp.getTransitionStatesAndProbs(state, action):
                    reward = mdp.getReward(state, action, nextState)
                    qValue += prob * (reward + discount*self.values[nextState])
                if first:
                    maxQValue = qValue
                    first = False
                elif qValue > maxQValue:
                    maxQValue = qValue
            nextValues[state] = maxQValue
        self.values = nextValues
开发者ID:kkansky,项目名称:CS-221,代码行数:35,代码来源:valueIterationAgents.py


示例17: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # A Counter is a dict with default 0
   
   for i in range(iterations):
       lastValues = copy.deepcopy(self.values)
       for s in mdp.getStates():
           aCounter = util.Counter()
           for a in mdp.getPossibleActions(s):
               for s2 in mdp.getStates():
                   aCounter[a] += self.T(s,a,s2) * (mdp.getReward(s,a,s2) + discount*lastValues[s2])
           self.values[s] = aCounter[aCounter.argMax()]
开发者ID:danielrich,项目名称:pacman-rl,代码行数:25,代码来源:valueIterationAgents.py


示例18: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # A Counter is a dict with default 0
   
   "*** YOUR CODE HERE ***"
   for n in range(iterations):
       V = self.values.copy()
       for s in mdp.getStates():
           action_values = []
           for a in mdp.getPossibleActions(s):
               action_value = 0
               for s_, P in mdp.getTransitionStatesAndProbs(s, a):
                   action_value += P * (mdp.getReward(s, a, s_) + discount * V[s_])
               action_values.append(action_value)
           self.values[s] = max(action_values or [0])
开发者ID:HaiYiMao,项目名称:cs188,代码行数:28,代码来源:valueIterationAgents.py


示例19: __init__

    def __init__(self, mdp, discount = 0.9, iterations = 100):
        """
          Your value iteration agent should take an mdp on
          construction, run the indicated number of iterations
          and then act according to the resulting policy.

          Some useful mdp methods you will use:
              mdp.getStates()
              mdp.getPossibleActions(state)
              mdp.getTransitionStatesAndProbs(state, action)
              mdp.getReward(state, action, nextState)
              mdp.isTerminal(state)
        """
        self.mdp = mdp
        self.discount = discount
        self.iterations = iterations
        self.values = util.Counter() # A Counter is a dict with default 0
        # Write value iteration code here
        while self.iterations > 0:
            junk = self.values.copy()
            for state in self.mdp.getStates():
                garbage = {}
                for action in mdp.getPossibleActions(state):
                    garbage[action] = 0
                    for (nextState, prob) in self.mdp.getTransitionStatesAndProbs(state, action):
                        garbage[action] += prob * (mdp.getReward(state, action, nextState) + self.discount * junk[nextState])
                try:
                    self.values[state] = max(garbage.values())
                except ValueError:
                    self.values[state] = 0
            self.iterations -= 1
开发者ID:Nickiller,项目名称:pacman,代码行数:31,代码来源:valueIterationAgents.py


示例20: __init__

 def __init__(self, mdp, discount = 0.9, iterations = 100):
   """
     Your value iteration agent should take an mdp on
     construction, run the indicated number of iterations
     and then act according to the resulting policy.
   
     Some useful mdp methods you will use:
         mdp.getStates()
         mdp.getPossibleActions(state)
         mdp.getTransitionStatesAndProbs(state, action)
         mdp.getReward(state, action, nextState)
   """
   self.mdp = mdp
   self.discount = discount
   self.iterations = iterations
   self.values = util.Counter() # A Counter is a dict with default 0
    
   "*** YOUR CODE HERE ***"
   currentIterationCounter = 1
   for state in mdp.getStates():
     self.values[state] = mdp.getReward(state, 'Stop', state)
   while (currentIterationCounter != self.iterations):
     newValues = util.Counter()
     for state in mdp.getStates():
       tempValues = util.Counter()
       for action in mdp.getPossibleActions(state):
         for newStateAndProb in mdp.getTransitionStatesAndProbs(state, action):
           newState = newStateAndProb[0]
           prob = newStateAndProb[1]
           tempValues[action] += prob*(mdp.getReward(state, action, newState)+self.discount*self.values[newState])
       newValues[state] = tempValues[tempValues.argMax()]
     currentIterationCounter += 1
     for state in mdp.getStates():
       self.values[state] = newValues[state]
开发者ID:StonyBrookUniversity,项目名称:reinforcement,代码行数:34,代码来源:valueIterationAgents.py



注:本文中的mdp.getPossibleActions函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python mdp.getStates函数代码示例发布时间:2022-05-27
下一篇:
Python mdp.activate_extension函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap