本文整理汇总了Python中pybrain.rl.experiments.EpisodicExperiment类的典型用法代码示例。如果您正苦于以下问题:Python EpisodicExperiment类的具体用法?Python EpisodicExperiment怎么用?Python EpisodicExperiment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了EpisodicExperiment类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
class BaggerBot:
def __init__(self, host, port, net=None):
self.conn = ServerConnection(host, port)
self.env = self.conn.env
self.conn.join()
self.task = SurviveTask(self.env, self.conn)
self.net = buildNetwork(self.env.outdim, 4, self.env.indim, outclass=TanhLayer)
self.agent = OptimizationAgent(self.net, PGPE())
self.experiment = EpisodicExperiment(self.task, self.agent)
def wait_connected(self):
self.conn.wait_connected()
def train(self):
'''
Infinitely play the game. Figure out the next move(s), parse incoming
data, discard all that, do stupid stuff and die :)
'''
while self.env.in_game:
# Ask to be spawned
logging.info('Requesting spawn...')
self.conn.send_spawn()
while not self.env.playing:
self.conn.parse_pregame()
while self.env.playing:
self.experiment.doEpisodes(100)
开发者ID:Remboooo,项目名称:LoBotomy,代码行数:26,代码来源:baggerbot.py
示例2: train
def train():
# Make the environment
environment = TwentyFortyEightEnvironment()
# Store the environment as the task
task = environment
# Set up the Neural Network
neuralNet = buildNetwork(task.nSenses, HIDDEN_NODES, task.nActions)
# Use a Genetic Algorithm as the Trainer
trainer = GA( populationSize=20, topProportion=0.2, elitism=False
, eliteProportion=0.25, mutationProb=0.1
, mutationStdDev=0.2, tournament=False
, tournamentSize=2 )
agent = OptimizationAgent(neuralNet, trainer)
# Set up an experiment
experiment = EpisodicExperiment(task, agent)
# Train the network
meanScores = []
print "Starting HillClimberNN"
for i in xrange(LEARNING_EPOCHS):
experiment.doEpisodes(GAMES_PER_EPOCH)
print "Training Iteration", i, "With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
environment.maxGameBlock = 0
meanScores.append(task.meanScore)
params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "hiddenNodes": HIDDEN_NODES }
return meanScores, params, experiment
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:33,代码来源:hillclimberNN.py
示例3: train
def train(self, episodes, maxSteps):
avgReward = 0
# set up environment and task
self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories)
self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
do_decay_beliefs = True, uniformInitialBeliefs = True)
# create neural net and learning agent
self.params = buildNetwork(self.task.outdim, self.task.indim, \
bias=True, outclass=SoftmaxLayer)
if self._PGPE:
self.agent = OptimizationAgent(self.params, PGPE(minimize=False,verbose=False))
elif self._CMAES:
self.agent = OptimizationAgent(self.params, CMAES(minimize=False,verbose=False))
# init and perform experiment
exp = EpisodicExperiment(self.task, self.agent)
for i in range(episodes):
exp.doEpisodes(1)
avgReward += self.task.getTotalReward()
print "reward episode ",i,self.task.getTotalReward()
# print initial info
print "\naverage reward over training = ",avgReward/episodes
# save trained network
self._saveWeights()
开发者ID:Kenkoko,项目名称:ua-ros-pkg,代码行数:31,代码来源:agent.py
示例4: train
def train():
# Make the environment
environment = TwentyFortyEightEnvironment()
# The task is the game this time
task = environment
# Make the reinforcement learning agent (use a network because inputs are continuous)
network = ActionValueNetwork(task.nSenses, task.nActions)
# Use Q learning for updating the table (NFQ is for networks)
learner = NFQ()
learner.gamma = GAMMA
agent = LearningAgent(network, learner)
# Set up an experiment
experiment = EpisodicExperiment(task, agent)
# Train the Learner
meanScores = []
for i in xrange(LEARNING_EPOCHS):
experiment.doEpisodes(GAMES_PER_EPOCH)
print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
meanScores.append(task.meanScore)
agent.learn()
agent.reset()
params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
return meanScores, params, agent
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:31,代码来源:RLNFQ.py
示例5: main
def main():
"""
The task represents one full simulation. Therefore it is episodic.
Each episode calls performAction after passing getObservation to the agent.
Once isFinished is true, the reward is returned and one simulation is done.
The net is the neural network. It has 7 input nodes, a hidden layer of 5
nodes, and 2 output nodes. It is a feed-forward network using sigmoid
activation functions.
OptimizationAgent(module, learner)
EpisodicExperiment.optimizer = learner
learner.setEvaluator(task, module)
optimizer.learn()
"""
task = LanderTask(batchSize=1)
net = buildNetwork(task.indim, 5, task.outdim)
learner = StochasticHillClimber()
agent = OptimizationAgent(net, learner)
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(100000)
tasks = [LanderTask(environment=Lander(acceleration=float(i)))
for i in range(1, 4)]
test_size = 1000
for task in tasks:
print("Running task with acceleration {}".format(task.env.acceleration))
success = 0
for _ in range(test_size):
task.env.reset()
while not task.isFinished():
observation = task.getObservation()
action = net.activate(observation)
task.performAction(action)
print("Finished a simulation with result {}".format(task.env.status))
if task.env.status == 'landed':
success += 1
print("Succeeded {} times out of {}".format(success, test_size))
开发者ID:andschwa,项目名称:uidaho-cs470-moonlander,代码行数:38,代码来源:main.py
示例6: someEpisodes
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
""" Return the fitness value for one episode of play, given the policy defined by a neural network. """
task = GameTask(game_env)
game_env.recordingEnabled = True
game_env.reset()
net.reset()
task.maxSteps=maxSteps
agent = LearningAgent(net)
agent.learning = False
agent.logging = False
exper = EpisodicExperiment(task, agent)
fitness = 0
for _ in range(avgOver):
rs = exper.doEpisodes(1)
# add a slight bonus for more exploration, if rewards are identical
fitness += len(set(game_env._allEvents)) * 1e-6
# the true, discounted reward
fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
fitness /= avgOver
if returnEvents:
return fitness, game_env._allEvents
else:
return fitness
开发者ID:sarobe,项目名称:VGDLEntityCreator,代码行数:23,代码来源:nomodel_pomdp.py
示例7: agent
#the task is the game this time
task = environment
#make the reinforcement learning agent (use a network because inputs are continuous)
controller = ActionValueNetwork(task.nsenses,task.nactions)
#use Q learning for updating the table (NFQ is for networks)
learner = NFQ()
agent = LearningAgent(controller, learner)
#set up an experiment
experiment = EpisodicExperiment(task, agent)
meanscores = []
m = 0.0
for i in xrange(learning_eps):
print i
experiment.doEpisodes(games_per_ep)
meanscores.append(task.meanscore)
if meanscores[-1] > m:
m = meanscores[-1]
f = open("bestRL.pkl",'w')
pickle.dump(agent,f)
f.close()
agent.learn()
agent.reset()
开发者ID:jskye,项目名称:uon.2014.comp3330.hwa2.alevmy,代码行数:29,代码来源:runRL.py
示例8: CartPoleEnvironment
env = CartPoleEnvironment()
if render:
renderer = CartPoleRenderer()
env.setRenderer(renderer)
renderer.start()
module = ActionValueNetwork(4, 3)
task = DiscreteBalanceTask(env, 100)
learner = NFQ()
learner.explorer.epsilon = 0.4
agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if not render:
pf_fig = plt.figure()
while(True):
开发者ID:vascobailao,项目名称:PYTHON,代码行数:30,代码来源:NFQ.py
示例9: run
def run(arg):
task = arg[0]
parameters = arg[1]
#print "run with", parameters
seed = parameters["seed"]
process_id = hash(multiprocessing.current_process()._identity)
numpy.random.seed(seed + process_id)
render = False
plot = False
plt.ion()
env = CartPoleEnvironment()
if render:
renderer = CartPoleRenderer()
env.setRenderer(renderer)
renderer.start()
task_class = getattr(cp, task)
task = task_class(env, parameters["MaxRunsPerEpisode"])
testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"])
#print "dim: ", task.indim, task.outdim
# to inputs state and 4 actions
module = ActionValueNetwork(task.outdim, task.indim)
learner = NFQ()
# % of random actions
learner.explorer.epsilon = parameters["ExplorerEpsilon"]
agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)
testexperiment = EpisodicExperiment(testtask, testagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
for episode in range(0,m):
# one learning step after one episode of world-interaction
experiment.doEpisodes(parameters["EpisodesPerLearn"])
agent.learn(1)
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
if (episode) % parameters["TestAfter"] == 0:
#print "Evaluating at episode: ", episode
#experiment.agent = testagent
r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
env.delay = False
testagent.reset()
#experiment.agent = agent
performance.append(r)
if plot:
plotPerformance(performance, pf_fig)
# print "reward avg", r
# print "explorer epsilon", learner.explorer.epsilon
# print "num episodes", agent.history.getNumSequences()
# print "update step", len(performance)
# print "done"
return performance
#print "network", json.dumps(module.bn.net.E, indent=2)
开发者ID:nairboon,项目名称:bnrl,代码行数:94,代码来源:NFQ.py
示例10: RecurrentNetwork
from pybrain.rl.environments.timeseries.timeseries import MonthlySnPEnvironment
from pybrain.rl.learners.directsearch.rrl import RRL
from pybrain.structure import RecurrentNetwork
from pybrain.structure import LinearLayer, SigmoidLayer, TanhLayer, BiasUnit
from pybrain.structure import FullConnection
from pybrain.rl.agents import LearningAgent
from pybrain.rl.experiments import EpisodicExperiment
from numpy import sign, round
from matplotlib import pyplot
net= RecurrentNetwork()
#Single linear layer with bias unit, and single tanh layer. the linear layer is whats optimised
net.addInputModule(BiasUnit(name='bias'))
net.addOutputModule(TanhLayer(1, name='out'))
net.addRecurrentConnection(FullConnection(net['out'], net['out'], name='c3'))
net.addInputModule(LinearLayer(1,name='in'))
net.addConnection(FullConnection(net['in'],net['out'],name='c1'))
net.addConnection((FullConnection(net['bias'],net['out'],name='c2')))
net.sortModules()
net._setParameters([-8.79227886e-02, -8.29319017e+02, 1.25946474e+00])
print(net._params)
env=MonthlySnPEnvironment()
task=MaximizeReturnTask(env)
learner = RRL() # ENAC() #Q_LinFA(2,1)
agent = LearningAgent(net,learner)
exp=EpisodicExperiment(task,agent)
exp.doEpisodes(10)
开发者ID:samstern,项目名称:MSc-Project,代码行数:30,代码来源:episodicSnP.py
示例11: CCRLEnvironment
__author__ = 'Stubborn'
from pybrain.rl.environments.ode import CCRLEnvironment
from pybrain.rl.environments.ode.tasks import CCRLGlasTask
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules.tanhlayer import TanhLayer
from pybrain.optimization import PGPE
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment
environment = CCRLEnvironment()
task = CCRLGlasTask(environment)
net = buildNetwork(len(task.getObservation()), 4, environment.indim, outclass=TanhLayer)
agent = OptimizationAgent(net, PGPE())
experiment = EpisodicExperiment(task, agent)
for updates in range(20000):
experiment.doEpisodes(1)
开发者ID:AkselBH,项目名称:QLearning,代码行数:21,代码来源:Pybrain+tutvideo+3+Robotarm.py
示例12: Bool
# create environment
#Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
env = ShipSteeringEnvironment(False)
# create task
task = GoNorthwardTask(env,maxsteps = 500)
# create controller network
net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer)
# create agent with controller and learner
agent = FiniteDifferenceAgent(net, SPLA())
# learning options
agent.learner.gd.alpha = 0.3 #step size of \mu adaption
agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption
agent.learner.gd.momentum = 0.0
batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting)
#create experiment
experiment = EpisodicExperiment(task, agent)
prnts=1 #frequency of console output
epis=2000/batch/prnts
#actual roll outs
filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat"
wf = open(filename, 'wb')
for updates in range(epis):
for i in range(prnts):
experiment.doEpisodes(batch) #execute #batch episodes
agent.learn() #learn from the gather experience
agent.reset() #reset agent and environment
#print out related data
stp = (updates+1)*batch*prnts
print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward
wf.write(repr(stp)+"\n")
开发者ID:HKou,项目名称:pybrain,代码行数:31,代码来源:shipbenchSPLA.py
示例13: GrowTask
# create task
task = GrowTask(env)
# create controller network (flat network)
net = buildNetwork(32, 10, 12)
# create agent with controller and learner
agent = FiniteDifferenceAgent(net, SPLA())
# learning options
agent.learner.gd.alpha = 0.05
agent.learner.gdSig.alpha = 0.1
agent.learner.gd.momentum = 0.0
agent.learner.epsilon = 2.0
agent.learner.initSigmas()
sr = []
experiment = EpisodicExperiment(task, agent)
for updates in range(1000):
# training step
for i in range(5):
experiment.doEpisodes(10)
agent.learn()
print "parameters:", agent.module.params
agent.reset()
# learning step
agent.disableLearning()
experiment.doEpisodes(50)
# append mean reward to sr array
ret = []
for n in range(agent.history.getNumSequences()):
state, action, reward, _ = agent.history.getSequence(n)
开发者ID:HKou,项目名称:pybrain,代码行数:31,代码来源:flexCubeNAC.py
示例14: run_experiment
def run_experiment():
# Create the controller network
HIDDEN_NODES = 4
RUNS = 2
BATCHES = 1
PRINTS = 1
EPISODES = 500
env = None
start_state_net = None
run_results = []
# Set up plotting tools for the experiments
tools = ExTools(BATCHES, PRINTS)
# Run the experiment
for run in range(RUNS):
if run == 0:
continue
# If an environment already exists, shut it down
if env:
env.closeSocket()
# Create the environment
env = create_environment()
# Create the task
task = Pa10MovementTask(env)
# Create the neural network. Only create the network once so it retains
# the same starting values for each run.
if start_state_net:
net = start_state_net.copy()
else:
# Create the initial neural network
net = create_network(
in_nodes=env.obsLen,
hidden_nodes=HIDDEN_NODES,
out_nodes=env.actLen
)
start_state_net = net.copy()
# Create the learning agent
learner = HillClimber(storeAllEvaluations=True)
agent = OptimizationAgent(net, learner)
tools.agent = agent
# Create the experiment
experiment = EpisodicExperiment(task, agent)
# Perform all episodes in the run
for episode in range(EPISODES):
experiment.doEpisodes(BATCHES)
# Calculate results
all_results = agent.learner._allEvaluations
max_result = np.max(all_results)
min_result = np.min(all_results)
avg_result = np.sum(all_results) / len(all_results)
run_results.append((run, max_result, min_result, avg_result))
# Make the results directory if it does not exist
if not os.path.exists(G_RESULTS_DIR):
os.mkdir(G_RESULTS_DIR)
# Write all results to the results file
with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
# Store the calculated max, min, avg
f.write('RUN, MAX, MIN, AVG\n')
f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))
# Store all results from this run
f.write('EPISODE, REWARD\n')
for episode, result in enumerate(all_results):
f.write('%d, %f\n' % (episode, result))
return
开发者ID:evansneath,项目名称:surgicalsim,代码行数:80,代码来源:start_environment.py
示例15: run
def run(nao,pad):
# ################################
# choose bottom cam, so nao can see object when standing next to it
nao.camera.selectCam(1)
env = grabbingEnvironment(nao)
#env.connect(nao)
task = grabbingTask(env)
net = buildNetwork(len(task.getObservation()),8, env.indim, bias = True, recurrent=True)
print env.indim
#net = ActionValueNetwork(5,4)
#, outclass=TanhLayer)
#, hiddenclass=TanhLayer, outclass=TanhLayer
# not correct right now..
# TODO: train into RL Modules, dataset needs to be merged with exploration data
#generateTraining.generateTraining().runDeltaMovements(nao,net,env,pad)
#module = ActionValueNetwork(3, 3)
#module = NeuronLayer(40)
#agent = LearningAgent(net, SARSA())
#learner = PolicyGradientLearner()
#learner._setExplorer(StateDependentExplorer(3,3))
#learner._setModule(module)
#agent = LearningAgent(module, learner)
#agent = LearningAgent(net, ENAC())
#agent = LearningAgent(net, Reinforce())
#learner = NFQ()
#learner.explorer.epsilon = 0.4
#agent = LearningAgent(net, learner)
testagent = OptimizationAgent(net,None,env)
#agent = LearningAgent(module, Q())
#agent = LearningAgent(module, QLambda())
learner = grabbingPGPE(storeAllEvaluations = True, verbose = True, epsilon = 1.0, deltamax =5.0, sigmaLearningRate = 0.1, learningRate = 0.2)
agent = OptimizationAgent(net, learner,env)
#agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True, verbose = True))
#agent = OptimizationAgent(net, HillClimber(storeAllEvaluations = True, verbose = True))
#agent = OptimizationAgent(net, RandomSearch(storeAllEvaluations = True, verbose = True))
experiment = EpisodicExperiment(task, agent)
# only for optimizationAgent
#experiment.doOptimization = True
# only for simulator!
nao.fractionMaxSpeed = 1.0
print "#env"
print " sensors:", env.outdim
print " actions:", env.indim
print " discreteStates:", env.discreteStates
print " discreteActions:", env.discreteActions
print
print "#task"
print " sensor_limits:", task.sensor_limits
print " actor_limits:", task.actor_limits
print " epilen: ", task.epiLen
print "#EpisodicTask"
print " discount:", task.discount
print " batchsize:", task.batchSize
print
print "#PGPE"
print " exploration type:", grabbingPGPE().exploration
print " LearningRate:", grabbingPGPE().learningRate
print " sigmaLearningRate:", grabbingPGPE().sigmaLearningRate
print " epsilon:", grabbingPGPE().epsilon
print " wDecay:", grabbingPGPE().wDecay
print " momentum:", grabbingPGPE().momentum
print " rprop:", grabbingPGPE().rprop
# # switch this to True if you want to see the cart balancing the pole (slower)
# render = False
#
# plt.ion()
#
# env = CartPoleEnvironment()
# if render:
# renderer = CartPoleRenderer()
# env.setRenderer(renderer)
# renderer.start()
#
# module = ActionValueNetwork(4, 3)
#.........这里部分代码省略.........
开发者ID:c0de2014,项目名称:nao-control,代码行数:101,代码来源:grabbingTest.py
示例16: run
def run(arg):
task = arg[0]
parameters = arg[1]
#print "run with", parameters
seed = parameters["seed"]
process_id = hash(multiprocessing.current_process()._identity)
numpy.random.seed(seed + process_id)
render = False
plot = False
plt.ion()
env = CartPoleEnvironment()
if render:
renderer = CartPoleRenderer()
env.setRenderer(renderer)
renderer.start()
task_class = getattr(cp, task)
task = task_class(env, parameters["MaxRunsPerEpisode"])
testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"],desiredValue=None)
#print "dim: ", task.indim, task.outdim
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.agents import OptimizationAgent
from pybrain.optimization import PGPE
module = buildNetwork(task.outdim, task.indim, bias=False)
# create agent with controller and learner (and its options)
# % of random actions
#learner.explorer.epsilon = parameters["ExplorerEpsilon"]
agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=False, maxEvaluations=None,desiredEvaluation=1, verbose=False))
#
# print agent
# from pprint import pprint
# pprint (vars(agent.learner))
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)
testexperiment = EpisodicExperiment(testtask, testagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
for episode in range(0,m):
# one learning step after one episode of world-interaction
experiment.doEpisodes(parameters["EpisodesPerLearn"])
#agent.learn(1)
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
if (episode) % parameters["TestAfter"] == 0:
#print "Evaluating at episode: ", episode
#experiment.agent = testagent
#r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
#for i in range(0,parameters["TestWith"]):
# y = testexperiment.doEpisodes(1)
# print (agent.learner._allEvaluated)
#
#
# from pprint import pprint
# pprint (vars(task))
l = parameters["TestWith"]
task.N = parameters["MaxRunsPerEpisodeTest"]
experiment.doEpisodes(l)
task.N = parameters["MaxRunsPerEpisode"]
resList = (agent.learner._allEvaluations)[-l:-1]
#.........这里部分代码省略.........
开发者ID:nairboon,项目名称:bnrl,代码行数:101,代码来源:PGPE.py
示例17: mazeEnv
[1, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 1, 1, 1, 1, 1, 1, 1, 1]])
side = 9
goal = 3,2
env = mazeEnv(structure, goal) #use maze environment for now; note pos is Y,X
# our own task and environment for later
#env = policyEnv()
thetask = MDPMazeTaskEpisodic(env)
# create neural net; create and train agent
theparams = buildNetwork(thetask.outdim, thetask.indim, bias=False)
agent = OptimizationAgent(theparams, CMAES())
exp = EpisodicExperiment(thetask, agent)
# train agent
exp.doEpisodes(NUM_EPISODES)
print "\ntotal reward = ",thetask.getTotalReward()
#print "\n"
#print "initial weights: "; print theparams.params
print "\n"
print "NOTE positions below are (Y,X)"
print "\n"
print "getting observation 1"
print "robot = ",thetask.getObservation()
print "goal = ",goal
print "reward: ", thetask.getReward()
开发者ID:krylenko,项目名称:python,代码行数:31,代码来源:INFOMAX__policyWrapper.py
示例18: train
def train(self, size, goal, initPose, mapSelect, envSelect, episodes, maxSteps, goalTol, randomizeInitPose):
avgReward = 0
# set up environment and task
self.env = mazeEnv(size, goal, initPose, mapSelect, envSelect, randomizeInitPose)
self.task = MDPMazeTaskEpisodic(self.env, maxSteps, goalTol)
# create neural net and learning agent
self.params = buildNetwork(self.task.outdim, 48, self.task.indim, \
bias=True, outclass=SoftmaxLayer)
if self._PGPE:
self.agent = OptimizationAgent(self.params, PGPE(minimize=True,verbose=False))
elif self._CMAES:
self.agent = OptimizationAgent(self.params, CMAES(minimize=True,verbose=False))
# init experiment
exp = EpisodicExperiment(self.task, self.agent)
for i in range(0, episodes):
exp.doEpisodes()
avgReward += self.task.getTotalReward()
print "reward episode ",i,self.task.getTotalReward()
# print initial info
print "\naverage reward over training = ",avgReward/episodes
# import weights into network and save network
if self._PGPE:
for i in range(len(self.params.params)):
self.params.params[i] = self.agent.learner.current[i]
pickle.dump(self.params, open('policyNet.pkl','w'))
elif self._CMAES:
################ following code came from WWInfoMaxCMAES.py script from ICDL 2010 paper
arz = randn(self.agent.learner.numParameters, self.agent.learner.batchSize)
arx = tile(self.agent.learner.center.reshape(self.agent.learner.numParameters, 1),\
(1, self.agent.learner.batchSize)) + \
self.agent.learner.stepSize * dot(dot(self.agent.learner.B, self.agent.learner.D), arz)
# Go through the parameters and pick the current best
arfitness = zeros(self.agent.learner.batchSize)
for k in xrange(self.agent.learner.batchSize):
self.agent.learner.wrappingEvaluable._setParameters(arx[:, k]);
arfitness[k] = self.agent.learner._BlackBoxOptimizer__evaluator\
(self.agent.learner.wrappingEvaluable)
# Sort by fitness and compute weighted mean into center
tmp = sorted(map(lambda (x, y): (y, x), enumerate(ravel(arfitness))))
arfitness = array(map(lambda x: x[0], tmp))
arindex = array(map(lambda x: int(x[1]), tmp))
arz = arz[:, arindex]
curparams = arx[:, arindex[0]];
# update network weights with selected parameters
for i in range(len(self.params.params)):
self.params.params[i] = curparams[i]
# save trained network
pickle.dump(self.params, open('policyNet.pkl','w'))
开发者ID:krylenko,项目名称:python,代码行数:61,代码来源:INFOMAX__pybrainNode.py
示例19: run
def run(arg):
task = arg[0]
parameters = arg[1]
#print "run with", task,parameters
seed = parameters["seed"]
process_id = hash(multiprocessing.current_process()._identity)
numpy.random.seed(seed)
render = False
plot = False
plt.ion()
env = CartPoleEnvironment()
env.randomInitialization = False
if render:
renderer = CartPoleRenderer()
env.setRenderer(renderer)
renderer.start()
task_class = getattr(cp, task)
task = task_class(env, 50)
#print "dim: ", task.indim, task.outdim
# to inputs state and 4 actions
bmodule = ActionValueRAND(task.outdim, task.indim)
rlearner = RAND()
blearner = RAND()
# % of random actions
bagent = LearningAgent(bmodule, rlearner)
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.agents import OptimizationAgent
from pybrain.optimization import PGPE
module = buildNetwork(task.outdim, task.indim, bias=False)
# create agent with controller and learner (and its options)
# % of random actions
#learner.explorer.epsilon = parameters["ExplorerEpsilon"]
agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=True, maxEvaluations=None, verbose=False))
testagent = LearningAgent(module, None)
pgpeexperiment = EpisodicExperiment(task, agent)
randexperiment = EpisodicExperiment(task, bagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
## train pgpe
for episode in range(0,50):
# one learning step after one episode of world-interaction
y =pgpeexperiment.doEpisodes(1)
be, bf = agent.learner._bestFound()
print be,bf
print "generate data"
be.numActions = 1
gdagent = LearningAgent(be, blearner)
experiment = EpisodicExperiment(task, gdagent)
for episode in range(0,1000):
# print episode, " of 1000"
# one learning step after one episode of world-interaction
y =experiment.doEpisodes(1)
# print y
x = randexperiment.doEpisodes(1)
# print len(y[0])
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
#.........这里部分代码省略.........
开发者ID:nairboon,项目名称:bnrl,代码行数:101,代码来源:trainnet.py
示例20: BalanceTask
env.setRenderer(CartPoleRenderer())
env.getRenderer().start()
env.delay = (episodes == 1)
# create task
task = BalanceTask(env, epilen)
# create controller network
net = buildNetwork(4, 1, bias=False)
# create agent and set parameters from command line
agent = LearningAgent(net, None)
agent.module._setParameters([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])])
# create experiment
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(episodes)
# run environment
ret = []
for n in range(agent.history.getNumSequences()):
returns = agent.history.getSequence(n)
reward = returns[2]
ret.append( sum(reward, 0).item() )
# print results
print ret, "mean:",mean(ret)
#env.getRenderer().stop()
开发者ID:Boblogic07,项目名称:pybrain,代码行数:28,代码来源:play_cartpole.py
注:本文中的pybrain.rl.experiments.EpisodicExperiment类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论