JobManager.h
1 
11 #ifndef WRENCH_PILOTJOBMANAGER_H
12 #define WRENCH_PILOTJOBMANAGER_H
13 
14 #include <vector>
15 #include <set>
16 
17 #include "wrench/services/Service.h"
18 
19 namespace wrench {
20 
21  class WMS;
22  class Workflow;
23  class WorkflowTask;
24  class WorkflowFile;
25  class WorkflowJob;
26  class PilotJob;
27  class StandardJob;
28  class ComputeService;
29  class StorageService;
30 
31  /***********************/
33  /***********************/
34 
35 
40  class JobManager : public Service {
41 
42  public:
43 
44 
45  void stop() override;
46 
47  void kill();
48 
49 
50  StandardJob *createStandardJob(std::vector<WorkflowTask *> tasks,
51  std::map<WorkflowFile *, std::shared_ptr<FileLocation> > file_locations,
52  std::vector<std::tuple<WorkflowFile *, std::shared_ptr<FileLocation> , std::shared_ptr<FileLocation> >> pre_file_copies,
53  std::vector<std::tuple<WorkflowFile *, std::shared_ptr<FileLocation> , std::shared_ptr<FileLocation> >> post_file_copies,
54  std::vector<std::tuple<WorkflowFile *, std::shared_ptr<FileLocation> >> cleanup_file_deletions);
55 
56 
57  StandardJob *createStandardJob(std::vector<WorkflowTask *> tasks,
58  std::map<WorkflowFile *,
59  std::shared_ptr<FileLocation> > file_locations);
60 
62  std::map<WorkflowFile *,
63  std::shared_ptr<FileLocation> > file_locations);
64 
66 
67  void submitJob(WorkflowJob *job, std::shared_ptr<ComputeService> compute_service, std::map<std::string, std::string> service_specific_args = {});
68 
69  void terminateJob(WorkflowJob *);
70 
71  void forgetJob(WorkflowJob *job);
72 
73  std::set<PilotJob *> getPendingPilotJobs();
74 
75  std::set<PilotJob *> getRunningPilotJobs();
76  /***********************/
78  /***********************/
79 
80  ~JobManager() override;
81 
82  protected:
83 
84  friend class WMS;
85 
86  explicit JobManager(std::shared_ptr<WMS> wms);
87 
88  /***********************/
90  /***********************/
91 
92  private:
93 
94  int main() override;
95  bool processNextMessage();
96  void processStandardJobCompletion(StandardJob *job, std::shared_ptr<ComputeService> compute_service);
97  void processStandardJobFailure(StandardJob *job, std::shared_ptr<ComputeService> compute_service, std::shared_ptr<FailureCause> cause);
98  void processPilotJobStart(PilotJob *job, std::shared_ptr<ComputeService> compute_service);
99  void processPilotJobExpiration(PilotJob *job, std::shared_ptr<ComputeService> compute_service);
100 
101 
102 
103  // Relevant WMS
104  std::shared_ptr<WMS> wms;
105 
106  // Job map
107  std::map<WorkflowJob*, std::unique_ptr<WorkflowJob>> jobs;
108 
109  // Job lists
110  std::set<StandardJob *> pending_standard_jobs;
111  std::set<StandardJob *> running_standard_jobs;
112  std::set<StandardJob *> completed_standard_jobs;
113  std::set<StandardJob *> failed_standard_jobs;
114 
115  std::set<PilotJob *> pending_pilot_jobs;
116  std::set<PilotJob *> running_pilot_jobs;
117  std::set<PilotJob *> completed_pilot_jobs;
118 
119  };
120 
121  /***********************/
123  /***********************/
124 
125 };
126 
127 #endif //WRENCH_PILOTJOBMANAGER_H
void forgetJob(WorkflowJob *job)
Forget a job (to free memory, only once a job has completed or failed)
Definition: JobManager.cpp:454
~JobManager() override
Destructor, which kills the daemon (and clears all the jobs)
Definition: JobManager.cpp:48
PilotJob * createPilotJob()
Create a pilot job.
Definition: JobManager.cpp:228
void kill()
Kill the job manager (brutally terminate the daemon, clears all jobs)
Definition: JobManager.cpp:55
std::set< PilotJob * > getPendingPilotJobs()
Get the list of currently pending pilot jobs.
Definition: JobManager.cpp:442
void stop() override
Stop the job manager.
Definition: JobManager.cpp:66
A helper daemon (co-located with and explicitly started by a WMS), which is used to handle all job ex...
Definition: JobManager.h:40
void submitJob(WorkflowJob *job, std::shared_ptr< ComputeService > compute_service, std::map< std::string, std::string > service_specific_args={})
Submit a job to compute service.
Definition: JobManager.cpp:258
A standard (i.e., non-pilot) workflow job that can be submitted to a ComputeService by a WMS (via a J...
Definition: StandardJob.h:37
void terminateJob(WorkflowJob *)
Terminate a job (standard or pilot) that hasn't completed/expired/failed yet.
Definition: JobManager.cpp:375
Definition: Alarm.cpp:20
JobManager(std::shared_ptr< WMS > wms)
Constructor.
Definition: JobManager.cpp:38
A computational task in a Workflow.
Definition: WorkflowTask.h:31
Abstraction of a job used for executing tasks in a Workflow.
Definition: WorkflowJob.h:34
std::set< PilotJob * > getRunningPilotJobs()
Get the list of currently running pilot jobs.
Definition: JobManager.cpp:434
A pilot (i.e., non-standard) workflow job that can be submitted to a ComputeService by a WMS (via a J...
Definition: PilotJob.h:29
A service that can be added to the simulation and that can be used by a WMS when executing a workflow...
Definition: Service.h:26
A data file used/produced by a WorkflowTask in a Workflow.
Definition: WorkflowFile.h:26
A workflow management system (WMS)
Definition: WMS.h:43
StandardJob * createStandardJob(std::vector< WorkflowTask * > tasks, std::map< WorkflowFile *, std::shared_ptr< FileLocation > > file_locations, std::vector< std::tuple< WorkflowFile *, std::shared_ptr< FileLocation >, std::shared_ptr< FileLocation > >> pre_file_copies, std::vector< std::tuple< WorkflowFile *, std::shared_ptr< FileLocation >, std::shared_ptr< FileLocation > >> post_file_copies, std::vector< std::tuple< WorkflowFile *, std::shared_ptr< FileLocation > >> cleanup_file_deletions)
Create a standard job.
Definition: JobManager.cpp:94