本文整理汇总了Python中pybrain.rl.experiments.Experiment类的典型用法代码示例。如果您正苦于以下问题:Python Experiment类的具体用法?Python Experiment怎么用?Python Experiment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Experiment类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: testMaze
def testMaze():
# simplified version of the reinforcement learning tutorial example
structure = np.array([[1, 1, 1, 1, 1],
[1, 0, 0, 0, 1],
[1, 0, 1, 0, 1],
[1, 0, 1, 0, 1],
[1, 1, 1, 1, 1]])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(3):
experiment.doInteractions(40)
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(' #'))[structure])
print('Maze map:')
print('\n'.join(''.join(row) for row in maze))
print('Greedy policy:')
print('\n'.join(''.join(row) for row in greedy_policy))
assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
开发者ID:gabrielhuang,项目名称:pybrain,代码行数:29,代码来源:optimizationtest.py
示例2: testNet
def testNet(learner, moduleNet, env, maxPlaneStartDist, stepSize,numAngs,thermRadius):
# Turn off exploration
from pybrain.rl.explorers.discrete.egreedy import EpsilonGreedyExplorer
learner._setExplorer(EpsilonGreedyExplorer(0))
agent = LearningAgent(moduleNet, learner)
# Move the plane back to the start by resetting the environment
env = contEnv.contThermEnvironment(maxPlaneStartDist, stepSize,numAngs,thermRadius)
from simpleThermalTask import SimpThermTask
task = SimpThermTask(env)
from pybrain.rl.experiments import Experiment
experiment = Experiment(task, agent)
# Have the plane move 100 times, and plot the position of the plane (hopefully it moves to the high reward area)
testIter = 100
trainResults = [env.distPlane()]
for i in range(testIter):
experiment.doInteractions(1)
trainResults.append(env.distPlane())
# Plot the training results
import matplotlib.pyplot as plt
plt.figure(1)
plt.plot(trainResults,'o')
plt.ylabel('Distance from center of thermal')
plt.xlabel('Interaction iteration')
plt.title('Test Results for Neural Fitted Q Learner')
plt.show()
开发者ID:ThermalSoaring,项目名称:Machine-Learning-Policy-Formation,代码行数:28,代码来源:main.py
示例3: run_bbox
def run_bbox(verbose=False):
n_features = n_actions = max_time = -1
if bbox.is_level_loaded():
bbox.reset_level()
else:
bbox.load_level("../levels/train_level.data", verbose=1)
n_features = bbox.get_num_of_features()
n_actions = bbox.get_num_of_actions()
max_time = bbox.get_max_time()
av_table = ActionValueTable(n_features, n_actions)
av_table.initialize(0.2)
print av_table._params
learner = Q(0.5, 0.1)
learner._setExplorer(EpsilonGreedyExplorer(0.4))
agent = LearningAgent(av_table, learner)
environment = GameEnvironment()
task = GameTask(environment)
experiment = Experiment(task, agent)
while environment.finish_flag:
experiment.doInteractions(1)
agent.learn()
bbox.finish(verbose=1)
开发者ID:tsvvladimir95,项目名称:simple_bot,代码行数:26,代码来源:bot.py
示例4: initExperiment
def initExperiment(alg, optimistic=True):
env = Maze(envmatrix, (7, 7))
# create task
task = MDPMazeTask(env)
# create value table and initialize with ones
table = ActionValueTable(81, 4)
if optimistic:
table.initialize(1.)
else:
table.initialize(0.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
learner = alg()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()
agent = LearningAgent(table, learner)
agent.batchMode = False
experiment = Experiment(task, agent)
experiment.allRewards = []
return experiment
开发者ID:bgrant,项目名称:portfolio,代码行数:25,代码来源:td.py
示例5: learn
def learn(self, number_of_iterations):
learner = Q(0.2, 0.8)
task = CartMovingTask(self.environment)
self.controller = ActionValueTable(
reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
)
self.controller.initialize(1.0)
agent = LearningAgent(self.controller, learner)
experiment = Experiment(task, agent)
for i in range(number_of_iterations):
experiment.doInteractions(1)
agent.learn()
agent.reset()
with open("test.pcl", "w+") as f:
pickle.dump(self.controller, f)
开发者ID:pawel-k,项目名称:pendulum,代码行数:15,代码来源:ReinforcedController.py
示例6: __init__
def __init__(self, event_queue_name, hub_queue_name):
super().__init__()
# create environment
self.conn = boto.sqs.connect_to_region(constants.REGION)
self.event_queue = self.conn.get_queue(event_queue_name)
self.event_queue.set_message_class(MHMessage)
self.env = DogEnv(DogEnv.ALL_QUIET, DogEnv.ALL_QUIET, self.event_queue, hub_queue_name)
self.env.delay = (self.episodes == 1)
# create task
self.task = QuietDogTask(self.env)
# create value table and initialize with ones
# TODO: Get number of states from DogEnv
self.table = ActionValueTable(2*5*4, 5*4)
self.table.initialize(1.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
self.learner = SARSA()
# standard exploration is e-greedy, but a different type can be chosen as well
self.learner.explorer = BoltzmannExplorer()
# create agent
self.agent = DogAgent(self.table, self.learner)
# create experiment
self.experiment = Experiment(self.task, self.agent)
开发者ID:jasonboyer,项目名称:dcs,代码行数:28,代码来源:rl_op.py
示例7: initExperiment
def initExperiment(learnalg='Q', history=None, binEdges='10s',
scriptfile='./rlRunExperiment_v2.pl',
resetscript='./rlResetExperiment.pl'):
if binEdges == '10s':
centerBinEdges = centerBinEdges_10s
elif binEdges == '30s':
centerBinEdges = centerBinEdges_30s
elif binEdges == 'lessperturbed':
centerBinEdges = centerBinEdges_10s_lessperturbed
elif binEdges is None:
centerBinEdges = None
else:
raise Exception("No bins for given binEdges setting")
env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
if history is not None:
env.data = history['data']
task = OmnetTask(env, centerBinEdges)
if history is not None:
task.allrewards = history['rewards']
if learnalg == 'Q':
nstates = env.numSensorBins ** env.numSensors
if history is None:
av_table = ActionValueTable(nstates, env.numActions)
av_table.initialize(1.)
else:
av_table = history['av_table']
learner = Q(0.1, 0.9) # alpha, gamma
learner._setExplorer(EpsilonGreedyExplorer(0.05)) # epsilon
elif learnalg == 'NFQ':
av_table = ActionValueNetwork(env.numSensors, env.numActions)
learner = NFQ()
else:
raise Exception("learnalg unknown")
agent = LearningAgent(av_table, learner)
experiment = Experiment(task, agent)
if history is None:
experiment.nruns = 0
else:
experiment.nruns = history['nruns']
return experiment
开发者ID:bgrant,项目名称:portfolio,代码行数:46,代码来源:manet_learner.py
示例8: maze
def maze():
# import sys, time
pylab.gray()
pylab.ion()
# The goal appears to be in the upper right
structure = [
"!!!!!!!!!!",
"! ! ! ! !",
"! !! ! ! !",
"! ! !",
"! !!!!!! !",
"! ! ! !",
"! ! !!!! !",
"! !",
"! !!!!! !",
"! ! !",
"!!!!!!!!!!",
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(100):
experiment.doInteractions(100)
agent.learn()
agent.reset()
# 4 actions, 81 locations/states (9x9 grid)
# max(1) gives/plots the biggest objective function value for that square
pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
pylab.draw()
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
开发者ID:nvaller,项目名称:pug-ann,代码行数:45,代码来源:example.py
示例9: run
def run():
"""
number of states is:
current value: 0-20
number of actions:
Stand=0, Hit=1 """
# define action value table
av_table = ActionValueTable(MAX_VAL, MIN_VAL)
av_table.initialize(0.)
# define Q-learning agent
q_learner = Q(Q_ALPHA, Q_GAMMA)
q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, q_learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env, verbosity=VERBOSE)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
for _ in range(NB_ITERATION):
experiment.doInteractions(1)
if task.lastreward != 0:
if VERBOSE:
print "Agent learn"
agent.learn()
print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
print '|:-------:|:-------|:-----|:-----|'
for i in range(MAX_VAL):
print '| %s | %s | %s | %s |' % (
(i+1),
av_table.getActionValues(i)[0],
av_table.getActionValues(i)[1],
av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
)
开发者ID:Petlefeu,项目名称:Q_Blackjack,代码行数:43,代码来源:main.py
示例10: __init__
def __init__(self, mode):
self.mode = mode
cu.mem('Reinforcement Learning Started')
self.environment = BoxSearchEnvironment(config.get(mode+'Database'), mode, config.get(mode+'GroundTruth'))
self.controller = QNetwork()
cu.mem('QNetwork controller created')
self.learner = None
self.agent = BoxSearchAgent(self.controller, self.learner)
self.task = BoxSearchTask(self.environment, config.get(mode+'GroundTruth'))
self.experiment = Experiment(self.task, self.agent)
开发者ID:jccaicedo,项目名称:localization-agent,代码行数:10,代码来源:TrackerRunner.py
示例11: __init__
class RL:
def __init__(self):
self.av_table = ActionValueTable(4, 5)
self.av_table.initialize(0.1)
learner = SARSA()
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
env = HASSHEnv()
task = HASSHTask(env)
self.experiment = Experiment(task, self.agent)
def go(self):
global rl_params
rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
self.experiment.doInteractions(1)
self.agent.learn()
开发者ID:savamarius,项目名称:rassh,代码行数:20,代码来源:rl.py
示例12: __init__
def __init__(self):
self.environment = GameEnv()
av_table = ActionValueTable(self.environment.outdim, self.environment.indim)
av_table.initialize(0.) # todo: save & restore agents state
learner = Q()
learner._setExplorer(EpsilonGreedyExplorer())
agent = LearningAgent(av_table, learner)
self.agent = agent
self.task = GameTask(self.environment)
self.experiment = Experiment(self.task, self.agent)
开发者ID:zmuda,项目名称:iwium,代码行数:12,代码来源:bot.py
示例13: explore_maze
def explore_maze():
# simplified version of the reinforcement learning tutorial example
structure = [
list("!!!!!!!!!!"),
list("! ! ! ! !"),
list("! !! ! ! !"),
list("! ! !"),
list("! !!!!!! !"),
list("! ! ! !"),
list("! ! !!!! !"),
list("! !"),
list("! !!!!! !"),
list("! ! !"),
list("!!!!!!!!!!"),
]
structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
shape = np.array(structure.shape)
environment = Maze(structure, tuple(shape - 2))
controller = ActionValueTable(shape.prod(), 4)
controller.initialize(1.0)
learner = Q()
agent = LearningAgent(controller, learner)
task = MDPMazeTask(environment)
experiment = Experiment(task, agent)
for i in range(30):
experiment.doInteractions(30)
agent.learn()
agent.reset()
controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
# (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
maze = np.flipud(np.array(list(" #"))[structure])
print("Maze map:")
print("\n".join("".join(row) for row in maze))
print("Greedy policy:")
print("\n".join("".join(row) for row in greedy_policy))
assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"
开发者ID:nvaller,项目名称:pug-ann,代码行数:40,代码来源:example.py
示例14: PlayYourCardsRight
class PlayYourCardsRight(Feature):
def __init__(self, text_to_speech, speech_to_text):
Feature.__init__(self)
# setup AV Table
self.av_table = GameTable(13, 2)
if(self.av_table.loadParameters() == False):
self.av_table.initialize(0.)
# setup a Q-Learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
# setup game interaction
self.game_interaction = GameInteraction(text_to_speech, speech_to_text)
# setup environment
environment = GameEnvironment(self.game_interaction)
# setup task
task = GameTask(environment, self.game_interaction)
# setup experiment
self.experiment = Experiment(task, self.agent)
@property
def is_speaking(self):
return self.game_interaction.is_speaking
def _thread(self, args):
# let's play our cards right!
while not self.is_stop:
self.experiment.doInteractions(1)
self.agent.learn()
self.av_table.saveParameters()
开发者ID:MYMSK4K,项目名称:SaltwashAR,代码行数:37,代码来源:playyourcardsright.py
示例15: __init__
def __init__(self, text_to_speech, speech_to_text):
Feature.__init__(self)
# setup AV Table
self.av_table = GameTable(13, 2)
if(self.av_table.loadParameters() == False):
self.av_table.initialize(0.)
# setup a Q-Learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
self.agent = LearningAgent(self.av_table, learner)
# setup game interaction
self.game_interaction = GameInteraction(text_to_speech, speech_to_text)
# setup environment
environment = GameEnvironment(self.game_interaction)
# setup task
task = GameTask(environment, self.game_interaction)
# setup experiment
self.experiment = Experiment(task, self.agent)
开发者ID:MYMSK4K,项目名称:SaltwashAR,代码行数:24,代码来源:playyourcardsright.py
示例16: Environment
import pickle
import time
# Create environment
sub_env = Environment(20, 20)
world = World(sub_env)
# Brain for the animat, we have already trained the data
f = open('neuro.net', 'r')
trained_net = pickle.load(f)
brain = BrainController(trained_net)
# Learning method we use
#learner = PolicyGradientLearner()
learner = ENAC()
learner._setLearningRate(0.2)
# Create an animat
animat = StupidAnimat(trained_net, learner, sub_env)
# Establish a task
task = InteractTask(world, animat)
brain.validate_net()
experiment = Experiment(task, animat)
while True:
experiment.doInteractions(10000)
animat.learn()
animat.reset()
brain.validate_net()
time.sleep(3)
开发者ID:xjie0403,项目名称:communication-swarm-intelligence,代码行数:30,代码来源:test_stupid.py
示例17: BoxSearchRunner
class BoxSearchRunner():
def __init__(self, mode):
self.mode = mode
cu.mem('Reinforcement Learning Started')
self.environment = BoxSearchEnvironment(config.get(mode+'Database'), mode, config.get(mode+'GroundTruth'))
self.controller = QNetwork()
cu.mem('QNetwork controller created')
self.learner = None
self.agent = BoxSearchAgent(self.controller, self.learner)
self.task = BoxSearchTask(self.environment, config.get(mode+'GroundTruth'))
self.experiment = Experiment(self.task, self.agent)
def runEpoch(self, interactions, maxImgs):
img = 0
s = cu.tic()
while img < maxImgs:
k = 0
while not self.environment.episodeDone and k < interactions:
self.experiment._oneInteraction()
k += 1
self.agent.learn()
self.agent.reset()
self.environment.loadNextEpisode()
img += 1
s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)
def run(self):
if self.mode == 'train':
self.agent.persistMemory = True
self.agent.startReplayMemory(len(self.environment.imageList), config.geti('trainInteractions'))
self.train()
elif self.mode == 'test':
self.agent.persistMemory = False
self.test()
def train(self):
networkFile = config.get('networkDir') + config.get('snapshotPrefix') + '_iter_' + config.get('trainingIterationsPerBatch') + '.caffemodel'
interactions = config.geti('trainInteractions')
minEpsilon = config.getf('minTrainingEpsilon')
epochSize = len(self.environment.imageList)/1
epsilon = 1.0
self.controller.setEpsilonGreedy(epsilon, self.environment.sampleAction)
epoch = 1
exEpochs = config.geti('explorationEpochs')
while epoch <= exEpochs:
s = cu.tic()
print 'Epoch',epoch,': Exploration (epsilon=1.0)'
self.runEpoch(interactions, len(self.environment.imageList))
self.task.flushStats()
self.doValidation(epoch)
s = cu.toc('Epoch done in ',s)
epoch += 1
self.learner = QLearning()
self.agent.learner = self.learner
egEpochs = config.geti('epsilonGreedyEpochs')
while epoch <= egEpochs + exEpochs:
s = cu.tic()
epsilon = epsilon - (1.0-minEpsilon)/float(egEpochs)
if epsilon < minEpsilon: epsilon = minEpsilon
self.controller.setEpsilonGreedy(epsilon, self.environment.sampleAction)
print 'Epoch',epoch ,'(epsilon-greedy:{:5.3f})'.format(epsilon)
self.runEpoch(interactions, epochSize)
self.task.flushStats()
self.doValidation(epoch)
s = cu.toc('Epoch done in ',s)
epoch += 1
maxEpochs = config.geti('exploitLearningEpochs') + exEpochs + egEpochs
while epoch <= maxEpochs:
s = cu.tic()
print 'Epoch',epoch,'(exploitation mode: epsilon={:5.3f})'.format(epsilon)
self.runEpoch(interactions, epochSize)
self.task.flushStats()
self.doValidation(epoch)
s = cu.toc('Epoch done in ',s)
shutil.copy(networkFile, networkFile + '.' + str(epoch))
epoch += 1
def test(self):
interactions = config.geti('testInteractions')
self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
self.runEpoch(interactions, len(self.environment.imageList))
def doValidation(self, epoch):
if epoch % config.geti('validationEpochs') != 0:
return
auxRL = BoxSearchRunner('test')
auxRL.run()
indexType = config.get('evaluationIndexType')
category = config.get('category')
if indexType == 'pascal':
categories, catIndex = bse.get20Categories()
elif indexType == 'relations':
categories, catIndex = bse.getCategories()
elif indexType == 'finetunedRelations':
categories, catIndex = bse.getRelationCategories()
if category in categories:
catI = categories.index(category)
else:
catI = -1
#.........这里部分代码省略.........
开发者ID:jccaicedo,项目名称:localization-agent,代码行数:101,代码来源:TrackerRunner.py
示例18: ActionValueTable
table = ActionValueTable(matrix_size, 2)
#table = ActionValueTable(matrix_size, matrix_size)
table.initialize(1.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
learner = Q()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()
# create agent
agent = LearningAgent(table, learner)
# create experiment
experiment = Experiment(task, agent)
# prepare plotting
pylab.gray()
pylab.ion()
#for i in range(100):
while True:
# interact with the environment (here in batch mode)
experiment.doInteractions(matrix_size)
agent.learn()
agent.reset()
# and draw the table
print table.params.reshape(matrix_size,2)
#print table.params.reshape(matrix_size,matrix_size)
开发者ID:paba,项目名称:reinforcement_learning,代码行数:31,代码来源:td_menu_sequential.py
示例19: WorldInteraction
world = WorldInteraction()
predTable = ActionValueTable(
PredatorInteraction.NSTATES,
len(PredatorInteraction.ACTIONS)
)
predTable.initialize(0.)
predLearner = Q(ALPHA, GAMMA)
predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
predAgent = LearningAgent(predTable, predLearner)
predEnv = PredatorEnvironment(world)
predTask = PredatorTask(predEnv)
predExp = Experiment(predTask, predAgent)
try:
for t in xrange(MAX_TIME):
print 't = %d' % t
world.t = t
predExp.doInteractions(1)
predAgent.learn()
print 'Colors vs. Q-table:'
table_print(predTable._params, PredatorInteraction.NSTATES)
print
except KeyboardInterrupt:
pass
finally:
开发者ID:ericmarcincuddy,项目名称:cs263c,代码行数:30,代码来源:animats.py
示例20: ActionValueTable
# create value table and initialize with ones
table = ActionValueTable(numStates, numActions)
table.initialize(1.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
# learner = QLambda()
learner = SARSA()
# learner = Q()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()
# create agent
agent = LearningAgent(table, learner)
# create experiment
experiment = Experiment(task, agent)
# prepare plotting
# pylab.gray()
# pylab.ion()
# Learning phase
# Num iterations used for PROHA Workshop perliminary evaluation
# numIterations = 1600
numIterations = 1500
numInteractions = 600
# Num iterations used for PROHA and PROLE slides
# numIterations = 10
# numInteractions = 3
开发者ID:polca-project,项目名称:polca-toolbox,代码行数:31,代码来源:trainer.py
注:本文中的pybrain.rl.experiments.Experiment类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论