38#ifndef LEARNING_AGENT_H
39#define LEARNING_AGENT_H
45#include "environment.h"
46#include "instructions/set.h"
47#include "log/laLogger.h"
48#include "mutator/mutationParameters.h"
49#include "tpg/tpgExecutionEngine.h"
50#include "tpg/tpgGraph.h"
52#include "learn/evaluationResult.h"
54#include "learn/learningEnvironment.h"
55#include "learn/learningParameters.h"
78 std::shared_ptr<TPG::TPGGraph>
tpg;
82 std::pair<const TPG::TPGVertex*, std::shared_ptr<EvaluationResult>>
98 std::map<const TPG::TPGVertex*, std::shared_ptr<EvaluationResult>>
114 std::vector<std::reference_wrapper<Log::LALogger>>
loggers;
131 p.nbRegisters, p.nbProgramConstant),
133 archive(p.archiveSize, p.archivingProbability)
202 virtual std::shared_ptr<EvaluationResult>
evaluateJob(
223 std::shared_ptr<Learn::EvaluationResult>& previousResult)
const;
236 virtual std::multimap<std::shared_ptr<EvaluationResult>,
267 std::multimap<std::shared_ptr<EvaluationResult>,
284 uint64_t
train(
volatile bool& altTraining,
bool printProgressBar);
307 const std::multimap<std::shared_ptr<EvaluationResult>,
329 std::shared_ptr<EvaluationResult>>&
355 virtual std::shared_ptr<Learn::Job>
makeJob(
370 virtual std::queue<std::shared_ptr<Learn::Job>>
makeJobs(
382 void init(uint64_t seed = 0);
The Environment class contains all information needed to execute a Program.
Definition: environment.h:84
Class for storing a set of Instruction.
Definition: set.h:53
This class embeds roots for the simulations.
Definition: job.h:53
Class used to control the learning steps of a TPGGraph within a given LearningEnvironment.
Definition: learningAgent.h:63
void addLogger(Log::LALogger &logger)
Adds a LALogger to the loggers vector.
Definition: learningAgent.cpp:79
std::shared_ptr< TPG::TPGGraph > getTPGGraph()
Getter for the TPGGraph built by the LearningAgent.
Definition: learningAgent.cpp:49
uint64_t maxNbThreads
Control the maximum number of threads when running in parallel.
Definition: learningAgent.h:105
const Archive & getArchive() const
Getter for the Archive filled by the LearningAgent.
Definition: learningAgent.cpp:54
virtual std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > evaluateAllRoots(uint64_t generationNumber, LearningMode mode)
Evaluate all root TPGVertex of the TPGGraph.
Definition: learningAgent.cpp:163
LearningParameters params
Parameters for the learning process.
Definition: learningAgent.h:75
virtual void trainOneGeneration(uint64_t generationNumber)
Train the TPGGraph for one generation.
Definition: learningAgent.cpp:187
std::map< const TPG::TPGVertex *, std::shared_ptr< EvaluationResult > > resultsPerRoot
Map associating root TPG::TPGVertex to their EvaluationResult.
Definition: learningAgent.h:99
LearningAgent(LearningEnvironment &le, const Instructions::Set &iSet, const LearningParameters &p, const TPG::TPGFactory &factory=TPG::TPGFactory())
Constructor for LearningAgent.
Definition: learningAgent.h:127
Archive archive
Archive used during the training process.
Definition: learningAgent.h:72
void init(uint64_t seed=0)
Initialize the LearningAgent.
Definition: learningAgent.cpp:64
void forgetPreviousResults()
This method resets the previous registered scores per root.
Definition: learningAgent.cpp:415
virtual std::queue< std::shared_ptr< Learn::Job > > makeJobs(Learn::LearningMode mode, TPG::TPGGraph *tpgGraph=nullptr)
Puts all roots into jobs to be able to use them in simulation later.
Definition: learningAgent.cpp:401
Mutator::RNG rng
Random Number Generator for this Learning Agent.
Definition: learningAgent.h:102
LearningEnvironment & learningEnvironment
LearningEnvironment with which the LearningAgent will interact.
Definition: learningAgent.h:66
uint64_t train(volatile bool &altTraining, bool printProgressBar)
Train the TPGGraph for a given number of generation.
Definition: learningAgent.cpp:266
virtual void decimateWorstRoots(std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > &results)
Removes from the TPGGraph the root TPGVertex with the worst results.
Definition: learningAgent.cpp:231
std::vector< std::reference_wrapper< Log::LALogger > > loggers
Set of LALogger called throughout the training process.
Definition: learningAgent.h:114
std::pair< const TPG::TPGVertex *, std::shared_ptr< EvaluationResult > > bestRoot
Definition: learningAgent.h:83
bool isRootEvalSkipped(const TPG::TPGVertex &root, std::shared_ptr< Learn::EvaluationResult > &previousResult) const
Method detecting whether a root should be evaluated again.
Definition: learningAgent.cpp:86
virtual ~LearningAgent()=default
Default destructor for polymorphism.
Mutator::RNG & getRNG()
Getter for the RNG used by the LearningAgent.
Definition: learningAgent.cpp:59
void keepBestPolicy()
This method keeps only the bestRoot policy in the TPGGraph.
Definition: learningAgent.cpp:363
std::shared_ptr< TPG::TPGGraph > tpg
TPGGraph built during the learning process.
Definition: learningAgent.h:78
Environment env
Environment for executing Program of the LearningAgent.
Definition: learningAgent.h:69
virtual std::shared_ptr< EvaluationResult > evaluateJob(TPG::TPGExecutionEngine &tee, const Job &job, uint64_t generationNumber, LearningMode mode, LearningEnvironment &le) const
Evaluates policy starting from the given root.
Definition: learningAgent.cpp:105
virtual std::shared_ptr< Learn::Job > makeJob(int num, Learn::LearningMode mode, int idx=0, TPG::TPGGraph *tpgGraph=nullptr)
Takes a given root index and creates a job containing it. Useful for example in adversarial mode wher...
Definition: learningAgent.cpp:381
const std::pair< const TPG::TPGVertex *, std::shared_ptr< EvaluationResult > > & getBestRoot() const
Get the best root TPG::Vertex encountered since the last init.
Definition: learningAgent.cpp:358
void updateEvaluationRecords(const std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > &results)
Update the bestRoot and resultsPerRoot attributes.
Definition: learningAgent.cpp:310
Interface for creating a Learning Environment.
Definition: learningEnvironment.h:80
uint64_t getNbActions() const
Get the number of actions available for this LearningEnvironment.
Definition: learningEnvironment.h:131
Learning Agent logger class that will be called during LearningAgent executions.
Definition: laLogger.h:64
Definition: tpgExecutionEngine.h:56
Factory for creating all elements constituting a TPG.
Definition: tpgFactory.h:34
Class for storing a Tangled-Program-Graph.
Definition: tpgGraph.h:54
Abstract class representing the vertices of a TPGGraph.
Definition: tpgVertex.h:49
Definition: adversarialEvaluationResult.h:45
LearningMode
Different modes in which the LearningEnvironment can be reset.
Definition: learningEnvironment.h:58
Structure for simplifying the transmission of LearningParameters to functions.
Definition: learningParameters.h:53
Mutator::MutationParameters mutation
Definition: learningParameters.h:56
TPGParameters tpg
Parameters for TPGMutator.
Definition: mutationParameters.h:200
size_t nbActions
Number of TPGAction vertex of the initialized TPGGraph.
Definition: mutationParameters.h:55