GEGELATI
learningAgent.h
1
38#ifndef LEARNING_AGENT_H
39#define LEARNING_AGENT_H
40
41#include <map>
42#include <queue>
43
44#include "archive.h"
45#include "environment.h"
46#include "instructions/set.h"
47#include "log/laLogger.h"
48#include "mutator/mutationParameters.h"
49#include "tpg/tpgExecutionEngine.h"
50#include "tpg/tpgGraph.h"
51
52#include "learn/evaluationResult.h"
53#include "learn/job.h"
54#include "learn/learningEnvironment.h"
55#include "learn/learningParameters.h"
56namespace Learn {
57
63 {
64 protected:
67
70
73
76
78 std::shared_ptr<TPG::TPGGraph> tpg;
79
82 std::pair<const TPG::TPGVertex*, std::shared_ptr<EvaluationResult>>
83 bestRoot{nullptr, nullptr};
84
98 std::map<const TPG::TPGVertex*, std::shared_ptr<EvaluationResult>>
100
103
105 uint64_t maxNbThreads = 1;
106
114 std::vector<std::reference_wrapper<Log::LALogger>> loggers;
115
116 public:
128 const LearningParameters& p,
129 const TPG::TPGFactory& factory = TPG::TPGFactory())
130 : learningEnvironment{le}, env(iSet, le.getDataSources(),
131 p.nbRegisters, p.nbProgramConstant),
132 tpg(factory.createTPGGraph(env)), params{p},
133 archive(p.archiveSize, p.archivingProbability)
134 {
135 // override the number of actions from the parameters.
136 this->params.mutation.tpg.nbActions =
137 this->learningEnvironment.getNbActions();
138 };
139
141 virtual ~LearningAgent() = default;
142
148 std::shared_ptr<TPG::TPGGraph> getTPGGraph();
149
155 const Archive& getArchive() const;
156
163
174 void addLogger(Log::LALogger& logger);
175
202 virtual std::shared_ptr<EvaluationResult> evaluateJob(
203 TPG::TPGExecutionEngine& tee, const Job& job,
204 uint64_t generationNumber, LearningMode mode,
205 LearningEnvironment& le) const;
206
222 const TPG::TPGVertex& root,
223 std::shared_ptr<Learn::EvaluationResult>& previousResult) const;
224
236 virtual std::multimap<std::shared_ptr<EvaluationResult>,
237 const TPG::TPGVertex*>
238 evaluateAllRoots(uint64_t generationNumber, LearningMode mode);
239
251 virtual void trainOneGeneration(uint64_t generationNumber);
252
266 virtual void decimateWorstRoots(
267 std::multimap<std::shared_ptr<EvaluationResult>,
268 const TPG::TPGVertex*>& results);
269
284 uint64_t train(volatile bool& altTraining, bool printProgressBar);
285
307 const std::multimap<std::shared_ptr<EvaluationResult>,
308 const TPG::TPGVertex*>& results);
309
319
328 const std::pair<const TPG::TPGVertex*,
329 std::shared_ptr<EvaluationResult>>&
330 getBestRoot() const;
331
338 void keepBestPolicy();
339
355 virtual std::shared_ptr<Learn::Job> makeJob(
356 int num, Learn::LearningMode mode, int idx = 0,
357 TPG::TPGGraph* tpgGraph = nullptr);
358
370 virtual std::queue<std::shared_ptr<Learn::Job>> makeJobs(
371 Learn::LearningMode mode, TPG::TPGGraph* tpgGraph = nullptr);
372
382 void init(uint64_t seed = 0);
383 };
384}; // namespace Learn
385
386#endif
Definition: archive.h:80
The Environment class contains all information needed to execute a Program.
Definition: environment.h:84
Class for storing a set of Instruction.
Definition: set.h:53
This class embeds roots for the simulations.
Definition: job.h:53
Class used to control the learning steps of a TPGGraph within a given LearningEnvironment.
Definition: learningAgent.h:63
void addLogger(Log::LALogger &logger)
Adds a LALogger to the loggers vector.
Definition: learningAgent.cpp:79
std::shared_ptr< TPG::TPGGraph > getTPGGraph()
Getter for the TPGGraph built by the LearningAgent.
Definition: learningAgent.cpp:49
uint64_t maxNbThreads
Control the maximum number of threads when running in parallel.
Definition: learningAgent.h:105
const Archive & getArchive() const
Getter for the Archive filled by the LearningAgent.
Definition: learningAgent.cpp:54
virtual std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > evaluateAllRoots(uint64_t generationNumber, LearningMode mode)
Evaluate all root TPGVertex of the TPGGraph.
Definition: learningAgent.cpp:163
LearningParameters params
Parameters for the learning process.
Definition: learningAgent.h:75
virtual void trainOneGeneration(uint64_t generationNumber)
Train the TPGGraph for one generation.
Definition: learningAgent.cpp:187
std::map< const TPG::TPGVertex *, std::shared_ptr< EvaluationResult > > resultsPerRoot
Map associating root TPG::TPGVertex to their EvaluationResult.
Definition: learningAgent.h:99
LearningAgent(LearningEnvironment &le, const Instructions::Set &iSet, const LearningParameters &p, const TPG::TPGFactory &factory=TPG::TPGFactory())
Constructor for LearningAgent.
Definition: learningAgent.h:127
Archive archive
Archive used during the training process.
Definition: learningAgent.h:72
void init(uint64_t seed=0)
Initialize the LearningAgent.
Definition: learningAgent.cpp:64
void forgetPreviousResults()
This method resets the previous registered scores per root.
Definition: learningAgent.cpp:415
virtual std::queue< std::shared_ptr< Learn::Job > > makeJobs(Learn::LearningMode mode, TPG::TPGGraph *tpgGraph=nullptr)
Puts all roots into jobs to be able to use them in simulation later.
Definition: learningAgent.cpp:401
Mutator::RNG rng
Random Number Generator for this Learning Agent.
Definition: learningAgent.h:102
LearningEnvironment & learningEnvironment
LearningEnvironment with which the LearningAgent will interact.
Definition: learningAgent.h:66
uint64_t train(volatile bool &altTraining, bool printProgressBar)
Train the TPGGraph for a given number of generation.
Definition: learningAgent.cpp:266
virtual void decimateWorstRoots(std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > &results)
Removes from the TPGGraph the root TPGVertex with the worst results.
Definition: learningAgent.cpp:231
std::vector< std::reference_wrapper< Log::LALogger > > loggers
Set of LALogger called throughout the training process.
Definition: learningAgent.h:114
std::pair< const TPG::TPGVertex *, std::shared_ptr< EvaluationResult > > bestRoot
Definition: learningAgent.h:83
bool isRootEvalSkipped(const TPG::TPGVertex &root, std::shared_ptr< Learn::EvaluationResult > &previousResult) const
Method detecting whether a root should be evaluated again.
Definition: learningAgent.cpp:86
virtual ~LearningAgent()=default
Default destructor for polymorphism.
Mutator::RNG & getRNG()
Getter for the RNG used by the LearningAgent.
Definition: learningAgent.cpp:59
void keepBestPolicy()
This method keeps only the bestRoot policy in the TPGGraph.
Definition: learningAgent.cpp:363
std::shared_ptr< TPG::TPGGraph > tpg
TPGGraph built during the learning process.
Definition: learningAgent.h:78
Environment env
Environment for executing Program of the LearningAgent.
Definition: learningAgent.h:69
virtual std::shared_ptr< EvaluationResult > evaluateJob(TPG::TPGExecutionEngine &tee, const Job &job, uint64_t generationNumber, LearningMode mode, LearningEnvironment &le) const
Evaluates policy starting from the given root.
Definition: learningAgent.cpp:105
virtual std::shared_ptr< Learn::Job > makeJob(int num, Learn::LearningMode mode, int idx=0, TPG::TPGGraph *tpgGraph=nullptr)
Takes a given root index and creates a job containing it. Useful for example in adversarial mode wher...
Definition: learningAgent.cpp:381
const std::pair< const TPG::TPGVertex *, std::shared_ptr< EvaluationResult > > & getBestRoot() const
Get the best root TPG::Vertex encountered since the last init.
Definition: learningAgent.cpp:358
void updateEvaluationRecords(const std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > &results)
Update the bestRoot and resultsPerRoot attributes.
Definition: learningAgent.cpp:310
Interface for creating a Learning Environment.
Definition: learningEnvironment.h:80
uint64_t getNbActions() const
Get the number of actions available for this LearningEnvironment.
Definition: learningEnvironment.h:131
Learning Agent logger class that will be called during LearningAgent executions.
Definition: laLogger.h:64
Definition: rng.h:52
Definition: tpgExecutionEngine.h:56
Factory for creating all elements constituting a TPG.
Definition: tpgFactory.h:34
Class for storing a Tangled-Program-Graph.
Definition: tpgGraph.h:54
Abstract class representing the vertices of a TPGGraph.
Definition: tpgVertex.h:49
Definition: adversarialEvaluationResult.h:45
LearningMode
Different modes in which the LearningEnvironment can be reset.
Definition: learningEnvironment.h:58
Structure for simplifying the transmission of LearningParameters to functions.
Definition: learningParameters.h:53
Mutator::MutationParameters mutation
Definition: learningParameters.h:56
TPGParameters tpg
Parameters for TPGMutator.
Definition: mutationParameters.h:200
size_t nbActions
Number of TPGAction vertex of the initialized TPGGraph.
Definition: mutationParameters.h:55