36#ifndef ADVERSARIAL_LEARNING_AGENT_H
37#define ADVERSARIAL_LEARNING_AGENT_H
39#include "learn/adversarialEvaluationResult.h"
40#include "learn/adversarialJob.h"
41#include "learn/adversarialLearningAgent.h"
42#include "learn/adversarialLearningEnvironment.h"
43#include "learn/parallelLearningAgent.h"
114 std::map<uint64_t, std::pair<std::shared_ptr<EvaluationResult>,
115 std::shared_ptr<Job>>>&
117 std::multimap<std::shared_ptr<EvaluationResult>,
119 std::map<uint64_t, Archive*>& archiveMap)
override;
160 std::multimap<std::shared_ptr<Learn::EvaluationResult>,
196 virtual std::shared_ptr<EvaluationResult>
evaluateJob(
227 std::queue<std::shared_ptr<Learn::Job>>
makeJobs(
Class for storing a set of Instruction.
Definition: set.h:53
Class used to control the learning steps of a TPGGraph within a given LearningEnvironment,...
Definition: adversarialLearningAgent.h:73
AdversarialLearningAgent(LearningEnvironment &le, const Instructions::Set &iSet, const LearningParameters &p, size_t agentsPerEval=2, const TPG::TPGFactory &factory=TPG::TPGFactory())
Constructor for AdversarialLearningAgent.
Definition: adversarialLearningAgent.h:136
std::multimap< std::shared_ptr< Learn::EvaluationResult >, const TPG::TPGVertex * > evaluateAllRoots(uint64_t generationNumber, Learn::LearningMode mode) override
Evaluate all root TPGVertex of the TPGGraph.
Definition: adversarialLearningAgent.cpp:42
size_t agentsPerEvaluation
Number of agents per evaluation (e.g. 2 in tic-tac-toe).
Definition: adversarialLearningAgent.h:87
void evaluateAllRootsInParallelCompileResults(std::map< uint64_t, std::pair< std::shared_ptr< EvaluationResult >, std::shared_ptr< Job > > > &resultsPerJobMap, std::multimap< std::shared_ptr< EvaluationResult >, const TPG::TPGVertex * > &results, std::map< uint64_t, Archive * > &archiveMap) override
Subfunction of evaluateAllRootsInParallel which handles the gathering of results and the merge of the...
Definition: adversarialLearningAgent.cpp:56
std::vector< const TPG::TPGVertex * > champions
Champions of the last generation.
Definition: adversarialLearningAgent.h:82
virtual std::shared_ptr< EvaluationResult > evaluateJob(TPG::TPGExecutionEngine &tee, const Job &job, uint64_t generationNumber, LearningMode mode, LearningEnvironment &le) const override
Evaluates policy starting from the given root, taking adversarial in charge.
Definition: adversarialLearningAgent.cpp:125
std::queue< std::shared_ptr< Learn::Job > > makeJobs(Learn::LearningMode mode, TPG::TPGGraph *tpgGraph=nullptr) override
Puts all roots into AdversarialJob to be able to use them in simulation later. The difference with th...
Definition: adversarialLearningAgent.cpp:180
This class embeds roots for the simulations.
Definition: job.h:53
Interface for creating a Learning Environment.
Definition: learningEnvironment.h:80
Class used to control the learning steps of a TPGGraph within a given LearningEnvironment,...
Definition: parallelLearningAgent.h:66
Definition: tpgExecutionEngine.h:56
Factory for creating all elements constituting a TPG.
Definition: tpgFactory.h:34
Class for storing a Tangled-Program-Graph.
Definition: tpgGraph.h:54
Abstract class representing the vertices of a TPGGraph.
Definition: tpgVertex.h:49
Definition: adversarialEvaluationResult.h:45
LearningMode
Different modes in which the LearningEnvironment can be reset.
Definition: learningEnvironment.h:58
Structure for simplifying the transmission of LearningParameters to functions.
Definition: learningParameters.h:53