11 #ifndef WRENCH_PILOTJOBMANAGER_H
12 #define WRENCH_PILOTJOBMANAGER_H
17 #include "wrench/services/Service.h"
18 #include "wrench/services/storage/storage_helpers/FileLocation.h"
27 class ExecutionController;
56 class JobManager :
public Service {
64 std::shared_ptr<CompoundJob> createCompoundJob(std::string name);
66 std::shared_ptr<StandardJob> createStandardJob(
const std::vector<std::shared_ptr<WorkflowTask>>& tasks,
67 const std::map<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation> >& file_locations,
68 std::vector<std::tuple<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation>, std::shared_ptr<FileLocation> >> pre_file_copies,
69 std::vector<std::tuple<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation>, std::shared_ptr<FileLocation> >> post_file_copies,
70 std::vector<std::tuple<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation> >> cleanup_file_deletions);
72 std::shared_ptr<StandardJob> createStandardJob(
const std::vector<std::shared_ptr<WorkflowTask>>& tasks,
73 std::map<std::shared_ptr<DataFile>, std::vector<std::shared_ptr<FileLocation>>> file_locations,
74 std::vector<std::tuple<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation>, std::shared_ptr<FileLocation> >> pre_file_copies,
75 std::vector<std::tuple<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation>, std::shared_ptr<FileLocation> >> post_file_copies,
76 std::vector<std::tuple<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation> >> cleanup_file_deletions);
79 std::shared_ptr<StandardJob> createStandardJob(
const std::vector<std::shared_ptr<WorkflowTask>>& tasks,
80 const std::map<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation>>& file_locations);
82 std::shared_ptr<StandardJob> createStandardJob(
const std::vector<std::shared_ptr<WorkflowTask>>& tasks,
83 std::map<std::shared_ptr<DataFile>, std::vector<std::shared_ptr<FileLocation>>> file_locations);
85 std::shared_ptr<StandardJob> createStandardJob(
const std::shared_ptr<WorkflowTask>&task,
86 const std::map<std::shared_ptr<DataFile>, std::shared_ptr<FileLocation>>& file_locations);
88 std::shared_ptr<StandardJob> createStandardJob(
const std::shared_ptr<WorkflowTask>&task,
89 std::map<std::shared_ptr<DataFile>, std::vector<std::shared_ptr<FileLocation>>> file_locations);
91 std::shared_ptr<StandardJob> createStandardJob(
const std::vector<std::shared_ptr<WorkflowTask>>& tasks);
93 std::shared_ptr<StandardJob> createStandardJob(
const std::shared_ptr<WorkflowTask>&task);
95 std::shared_ptr<PilotJob> createPilotJob();
97 void submitJob(
const std::shared_ptr<StandardJob>& job,
const std::shared_ptr<ComputeService>& compute_service,
98 std::map<std::string, std::string> service_specific_args = {});
100 void submitJob(
const std::shared_ptr<CompoundJob>& job,
const std::shared_ptr<ComputeService>& compute_service,
101 std::map<std::string, std::string> service_specific_args = {});
103 void submitJob(
const std::shared_ptr<PilotJob>& job,
const std::shared_ptr<ComputeService>& compute_service,
104 std::map<std::string, std::string> service_specific_args = {});
106 void terminateJob(
const std::shared_ptr<StandardJob>& job);
108 void terminateJob(
const std::shared_ptr<CompoundJob>& job);
110 void terminateJob(
const std::shared_ptr<PilotJob>& job);
112 simgrid::s4u::Mailbox *getCreatorMailbox();
114 unsigned long getNumRunningPilotJobs()
const;
120 ~JobManager()
override;
124 friend class ExecutionController;
127 explicit JobManager(std::string hostname, simgrid::s4u::Mailbox *creator_mailbox);
139 void dispatchJob(
const std::shared_ptr<CompoundJob>& job);
141 bool processNextMessage();
144 processStandardJobCompletion(
const std::shared_ptr<StandardJob>& job, std::shared_ptr<ComputeService> compute_service);
147 processStandardJobFailure(std::shared_ptr<StandardJob> job, std::shared_ptr<ComputeService> compute_service);
150 processCompoundJobCompletion(
const std::shared_ptr<CompoundJob>& job, std::shared_ptr<ComputeService> compute_service);
153 processCompoundJobFailure(
const std::shared_ptr<CompoundJob>& job, std::shared_ptr<ComputeService> compute_service);
155 void processPilotJobStart(
const std::shared_ptr<PilotJob>& job, std::shared_ptr<ComputeService> compute_service);
157 void processPilotJobExpiration(
const std::shared_ptr<PilotJob>& job, std::shared_ptr<ComputeService> compute_service);
159 void processPilotJobFailure(
const std::shared_ptr<PilotJob>& job, std::shared_ptr<ComputeService> compute_service, std::shared_ptr<FailureCause> cause);
162 simgrid::s4u::Mailbox *creator_mailbox;
164 std::vector<std::shared_ptr<CompoundJob>> jobs_to_dispatch;
165 std::set<std::shared_ptr<CompoundJob>> jobs_dispatched;
167 unsigned long num_running_pilot_jobs = 0;
171 std::map<std::shared_ptr<CompoundJob>, std::shared_ptr<StandardJob>> cjob_to_sjob_map;
172 std::map<std::shared_ptr<CompoundJob>, std::shared_ptr<PilotJob>> cjob_to_pjob_map;
182 #endif //WRENCH_PILOTJOBMANAGER_H