本文整理汇总了Python中pybrain.rl.agents.LearningAgent类的典型用法代码示例。如果您正苦于以下问题:Python LearningAgent类的具体用法?Python LearningAgent怎么用?Python LearningAgent使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LearningAgent类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: train
def train():
# Make the environment
environment = TwentyFortyEightEnvironment()
# The task is the game this time
task = environment
# Make the reinforcement learning agent (use a network because inputs are continuous)
network = ActionValueNetwork(task.nSenses, task.nActions)
# Use Q learning for updating the table (NFQ is for networks)
learner = NFQ()
learner.gamma = GAMMA
agent = LearningAgent(network, learner)
# Set up an experiment
experiment = EpisodicExperiment(task, agent)
# Train the Learner
meanScores = []
for i in xrange(LEARNING_EPOCHS):
experiment.doEpisodes(GAMES_PER_EPOCH)
print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
meanScores.append(task.meanScore)
agent.learn()
agent.reset()
params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
return meanScores, params, agent
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:31,代码来源:RLNFQ.py
示例2: q_learning_table
def q_learning_table():
controller = ActionValueTable(36, 4)
learner = Q()
controller.initialize(1.)
agent = LearningAgent(controller, learner)
score_list = []
turn_list = []
# neural側のトレーニング分 +100
for i in range(600):
print_state(agent.module.getValue, 'table')
score, turn = play(agent, 'table')
score_list.append(score)
turn_list.append(turn)
agent.learn()
agent.reset()
print i, int(numpy.mean(score_list)) , max(score_list), score, turn
with open('./agent.dump', 'w') as f:
pickle.dump(agent, f)
with open('./score.dump', 'w') as f:
pickle.dump([score_list, turn_list], f)
开发者ID:kokukuma,项目名称:reinforcement_learning_2048,代码行数:26,代码来源:pybrain_rl_simple2.py
示例3: getAction
def getAction(self):
#pega acao com Boltzmann ou Q-Learning
if(self.nextAction == None):
action = LearningAgent.getAction(self)
self.lastaction = action
return action
else:
#indicacao do supervisor com tolerancia
if(self.tolerance != None):
if( (self.expectedReward * (1 + self.tolerance)) > self.module.getActionValue(self.nextAction)):
action = self.nextAction
self.lastaction = action
self.nextAction = None
return action
else:
#acao independente
action = LearningAgent.getAction(self)
self.lastaction = action
return action
#indicacao do supervisor sem tolerancia
else:
action = self.nextAction
self.lastaction = action
self.nextAction = None
return action
开发者ID:pieschtz,项目名称:learning-on-traffic-lights,代码行数:30,代码来源:lowlevelagent.py
示例4: test_maze
def test_maze():
# simplified version of the reinforcement learning tutorial example
structure = np.array([[1, 1, 1, 1, 1],
[1, 0, 0, 0, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 0, 1],
[1, 1, 1, 1, 1]])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(' #'))[structure])
print('Maze map:')
print('\n'.join(''.join(row) for row in maze))
print('Greedy policy:')
print('\n'.join(''.join(row) for row in greedy_policy))
assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
开发者ID:gabrielhuang,项目名称:pybrain,代码行数:31,代码来源:test_maze.py
示例5: initExperiment
def initExperiment(alg, optimistic=True):
env = Maze(envmatrix, (7, 7))
# create task
task = MDPMazeTask(env)
# create value table and initialize with ones
table = ActionValueTable(81, 4)
if optimistic:
table.initialize(1.)
else:
table.initialize(0.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
learner = alg()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()
agent = LearningAgent(table, learner)
agent.batchMode = False
experiment = Experiment(task, agent)
experiment.allRewards = []
return experiment
开发者ID:bgrant,项目名称:portfolio,代码行数:25,代码来源:td.py
示例6: run_bbox
def run_bbox(verbose=False):
n_features = n_actions = max_time = -1
if bbox.is_level_loaded():
bbox.reset_level()
else:
bbox.load_level("../levels/train_level.data", verbose=1)
n_features = bbox.get_num_of_features()
n_actions = bbox.get_num_of_actions()
max_time = bbox.get_max_time()
av_table = ActionValueTable(n_features, n_actions)
av_table.initialize(0.2)
print av_table._params
learner = Q(0.5, 0.1)
learner._setExplorer(EpsilonGreedyExplorer(0.4))
agent = LearningAgent(av_table, learner)
environment = GameEnvironment()
task = GameTask(environment)
experiment = Experiment(task, agent)
while environment.finish_flag:
experiment.doInteractions(1)
agent.learn()
bbox.finish(verbose=1)
开发者ID:tsvvladimir95,项目名称:simple_bot,代码行数:26,代码来源:bot.py
示例7: __init__
def __init__(self, name, num_states, num_actions, epsilon=0.3, gamma=0.99, alpha=0.95):
self.controller = ActionValueTable(num_states, num_actions)
self.controller.initialize(np.random.rand(num_states * num_actions))
self.learner = Q(gamma=gamma, alpha=alpha)
self.learner.batchMode = False
self.learner.explorer.epsilon = epsilon
LearningAgent.__init__(self, self.controller, self.learner)
Agent.__init__(self, name)
开发者ID:phelps-sg,项目名称:py-abm,代码行数:8,代码来源:abm.py
示例8: __init__
def __init__(self, _id, module, learner=None):
#define variaveis da class
self.id = _id
self.horizontal_edge = lane.getEdgeID(trafficlights.getControlledLanes(self.id)[0])
self.vertical_edge = lane.getEdgeID(trafficlights.getControlledLanes(str(_id))[2])
#define variaveis da classe pai
self.horizontalLoad = []
self.verticalLoad = []
self.averageHorizontal = []
self.averageVertical = []
self.nextAction = None
self.expectedReward = None
self.tolerance = None
LearningAgent.__init__(self, module, learner)
开发者ID:pieschtz,项目名称:learning-on-traffic-lights,代码行数:14,代码来源:lowlevelagent.py
示例9: learn
def learn(client):
av_table = ActionValueNetwork(4, 1)
learner = Reinforce()
agent = LearningAgent(av_table, learner)
env = CarEnvironment(client)
task = CarTask(env)
experiment = ContinuousExperiment(task, agent)
while True:
experiment.doInteractionsAndLearn(1)
agent.learn()
开发者ID:alongubkin,项目名称:talkingcar,代码行数:14,代码来源:client_.py
示例10: __init__
def __init__(self, module, learner = None):
'''
Constructor
'''
LearningAgent.__init__(self, module, learner)
self.__rules=[]
self.__states={}
self.__input={}
self.__buffer={}
# self.__rules.append(BackOffRule())
self.__rules.append(BackOffRule2())
self.__rules.append(LocomotionPrimitives())
self.__states["driveBackStartTime"]=AgentMind.__driveBackStartTime
self.__states["__lostTrackTurnStartTime"]=AgentMind.__lostTrackTurnStartTime
开发者ID:dtbinh,项目名称:Lingadrome,代码行数:14,代码来源:AgentMind.py
示例11: Pause
class QAlgorithm:
def Pause(self):#if menu says pause pause exicution
while self.state == 1:
time.sleep(.05)
return True
def Quit(self):#if menu says quit stop running
self.process.terminate()
return False
def Start(self):#starts the Bot
if self.process == None:
self.runBot()
#self.process = multiprocessing.Process(target=self.runBot, args= [])
#self.process.start()
return True
def CheckState(self):#checks to see what state the menu says to be in
if self.state == 0 :
self.Start()
elif self.state == 1:
self.Pause()
elif self.state == 2:
self.Quit()
def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over
return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver)
def __init__(self,rewardBox,box,gameOver,endGame,scoreArea):
self.reward = rewardBox
self.bbox = box
self.environment = TEnviroment(box)#Custom environment class
if os.path.isfile("bot.txt"):
self.controller = pickle.load(open("bot.txt","rb"))
else:
self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons)
self.learner = Q()
gf = {0:self.GameOver}
self.agent = LearningAgent(self.controller, self.learner)
self.task = TTask(self.environment,scoreArea,gf)#needs custom task
self.experiment = EpisodicExperiment(self.task, self.agent)
self.process = None
self.endBox = endGame
def runBot(self):#runes the bot for a single Episode
self.experiment.doEpisodes()
self.agent.learn()
self.agent.reset()
file = open("bot.txt","wb+")
pickle.dump(self.controller,file)
开发者ID:Diesel9012,项目名称:GameLearningAI,代码行数:50,代码来源:QAlgorithm.py
示例12: learn
def learn(self, number_of_iterations):
learner = Q(0.2, 0.8)
task = CartMovingTask(self.environment)
self.controller = ActionValueTable(
reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
)
self.controller.initialize(1.0)
agent = LearningAgent(self.controller, learner)
experiment = Experiment(task, agent)
for i in range(number_of_iterations):
experiment.doInteractions(1)
agent.learn()
agent.reset()
with open("test.pcl", "w+") as f:
pickle.dump(self.controller, f)
开发者ID:pawel-k,项目名称:pendulum,代码行数:15,代码来源:ReinforcedController.py
示例13: maze
def maze():
# import sys, time
pylab.gray()
pylab.ion()
# The goal appears to be in the upper right
structure = [
"!!!!!!!!!!",
"! ! ! ! !",
"! !! ! ! !",
"! ! !",
"! !!!!!! !",
"! ! ! !",
"! ! !!!! !",
"! !",
"! !!!!! !",
"! ! !",
"!!!!!!!!!!",
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(100):
experiment.doInteractions(100)
agent.learn()
agent.reset()
# 4 actions, 81 locations/states (9x9 grid)
# max(1) gives/plots the biggest objective function value for that square
pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
pylab.draw()
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
开发者ID:nvaller,项目名称:pug-ann,代码行数:45,代码来源:example.py
示例14: run
def run():
"""
number of states is:
current value: 0-20
number of actions:
Stand=0, Hit=1 """
# define action value table
av_table = ActionValueTable(MAX_VAL, MIN_VAL)
av_table.initialize(0.)
# define Q-learning agent
q_learner = Q(Q_ALPHA, Q_GAMMA)
q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, q_learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env, verbosity=VERBOSE)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
for _ in range(NB_ITERATION):
experiment.doInteractions(1)
if task.lastreward != 0:
if VERBOSE:
print "Agent learn"
agent.learn()
print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
print '|:-------:|:-------|:-----|:-----|'
for i in range(MAX_VAL):
print '| %s | %s | %s | %s |' % (
(i+1),
av_table.getActionValues(i)[0],
av_table.getActionValues(i)[1],
av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
)
开发者ID:Petlefeu,项目名称:Q_Blackjack,代码行数:43,代码来源:main.py
示例15: __init__
def __init__(self, x, y, brain, learner, env):
LearningAgent.__init__(self, brain.net, learner)
self.cellType = 3
self.brain = brain
self.module = brain.net
self.learner = learner
self.env = env
self.color = cell.BLACK
self.x = x
self.y = y
self.num_interactions = 0
self.age = 0
self.colddown = 0
self.speed = self.Speeds[0]
self.energy = self.MaxEnergy
self.food_sensor = 0;
self.hunger_sensor = 0;
self.target = [-1, -1]
开发者ID:xjie0403,项目名称:communication-swarm-intelligence,代码行数:19,代码来源:stupid_animat.py
示例16: __init__
class RL:
def __init__(self):
self.av_table = ActionValueTable(4, 5)
self.av_table.initialize(0.1)
learner = SARSA()
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
env = HASSHEnv()
task = HASSHTask(env)
self.experiment = Experiment(task, self.agent)
def go(self):
global rl_params
rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
self.experiment.doInteractions(1)
self.agent.learn()
开发者ID:savamarius,项目名称:rassh,代码行数:20,代码来源:rl.py
示例17: main
def main():
# if os.path.exists('./agent.dump'):
# with open('./agent.dump') as f:
# agent = pickle.load(f)
# else:
controller = ActionValueNetwork(9, 4)
learner = NFQ()
agent = LearningAgent(controller, learner)
score_list = []
for i in range(10000):
score = play(agent)
score_list.append(score)
# ここで,
# TypeError: only length-1 arrays can be converted to Python scalars
# pybrain/rl/learners/valuebased/q.py
# => learnerをQからNFQにしたら行けた.
# => http://stackoverflow.com/questions/23755927/pybrain-training-a-actionvaluenetwork-doesnt-properly-work
#agent.learn()
agent.reset()
#data =[[0,0,0,0], [0,0,0,0], [0,0,0,2], [0,0,0,2]]
data =[[0,0,2], [0,0,0], [0,0,2]]
agent.integrateObservation(numpy.array(data).ravel())
move = agent.getAction()
print i, int(numpy.mean(score_list)) , max(score_list), move
with open('./agent.dump', 'w') as f:
pickle.dump(agent, f)
with open('./score.dump', 'w') as f:
pickle.dump(score_list, f)
开发者ID:kokukuma,项目名称:reinforcement_learning_2048,代码行数:34,代码来源:pybrain_rl_simple.py
示例18: explore_maze
def explore_maze():
# simplified version of the reinforcement learning tutorial example
structure = [
list("!!!!!!!!!!"),
list("! ! ! ! !"),
list("! !! ! ! !"),
list("! ! !"),
list("! !!!!!! !"),
list("! ! ! !"),
list("! ! !!!! !"),
list("! !"),
list("! !!!!! !"),
list("! ! !"),
list("!!!!!!!!!!"),
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"
开发者ID:nvaller,项目名称:pug-ann,代码行数:40,代码来源:example.py
示例19: someEpisodes
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
""" Return the fitness value for one episode of play, given the policy defined by a neural network. """
task = GameTask(game_env)
game_env.recordingEnabled = True
game_env.reset()
net.reset()
task.maxSteps=maxSteps
agent = LearningAgent(net)
agent.learning = False
agent.logging = False
exper = EpisodicExperiment(task, agent)
fitness = 0
for _ in range(avgOver):
rs = exper.doEpisodes(1)
# add a slight bonus for more exploration, if rewards are identical
fitness += len(set(game_env._allEvents)) * 1e-6
# the true, discounted reward
fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
fitness /= avgOver
if returnEvents:
return fitness, game_env._allEvents
else:
return fitness
开发者ID:sarobe,项目名称:VGDLEntityCreator,代码行数:23,代码来源:nomodel_pomdp.py
示例20: PlayYourCardsRight
class PlayYourCardsRight(Feature):
def __init__(self, text_to_speech, speech_to_text):
Feature.__init__(self)
# setup AV Table
self.av_table = GameTable(13, 2)
if(self.av_table.loadParameters() == False):
self.av_table.initialize(0.)
# setup a Q-Learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
# setup game interaction
self.game_interaction = GameInteraction(text_to_speech, speech_to_text)
# setup environment
environment = GameEnvironment(self.game_interaction)
# setup task
task = GameTask(environment, self.game_interaction)
# setup experiment
self.experiment = Experiment(task, self.agent)
@property
def is_speaking(self):
return self.game_interaction.is_speaking
def _thread(self, args):
# let's play our cards right!
while not self.is_stop:
self.experiment.doInteractions(1)
self.agent.learn()
self.av_table.saveParameters()
开发者ID:MYMSK4K,项目名称:SaltwashAR,代码行数:37,代码来源:playyourcardsright.py
注:本文中的pybrain.rl.agents.LearningAgent类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论