10 #ifndef WRENCH_BAREMETALCOMPUTESERVICE_H
11 #define WRENCH_BAREMETALCOMPUTESERVICE_H
16 #include "wrench/services/compute/ComputeService.h"
17 #include "BareMetalComputeServiceProperty.h"
18 #include "BareMetalComputeServiceMessagePayload.h"
19 #include "wrench/services/helper_services/host_state_change_detector/HostStateChangeDetector.h"
30 class ActionExecutionService;
52 WRENCH_PROPERTY_COLLECTION_TYPE default_property_values = {
58 WRENCH_MESSAGE_PAYLOADCOLLECTION_TYPE default_messagepayload_values = {
92 const std::map<std::string, std::tuple<unsigned long, double>> compute_resources,
93 std::string scratch_space_mount_point,
94 WRENCH_PROPERTY_COLLECTION_TYPE property_list = {},
95 WRENCH_MESSAGE_PAYLOADCOLLECTION_TYPE messagepayload_list = {}
100 const std::vector<std::string> compute_hosts,
101 std::string scratch_space_mount_point,
102 WRENCH_PROPERTY_COLLECTION_TYPE property_list = {},
103 WRENCH_MESSAGE_PAYLOADCOLLECTION_TYPE messagepayload_list = {}
114 void submitCompoundJob(std::shared_ptr<CompoundJob> job,
const std::map<std::string, std::string> &service_specific_args)
override;
116 void terminateCompoundJob(std::shared_ptr<CompoundJob> job)
override;
122 friend class JobManager;
124 void validateServiceSpecificArguments(std::shared_ptr<CompoundJob> job,
125 std::map<std::string, std::string> &service_specific_args)
override;
129 std::map<std::string, std::tuple<unsigned long, double>> compute_resources,
130 WRENCH_PROPERTY_COLLECTION_TYPE property_list,
131 WRENCH_MESSAGE_PAYLOADCOLLECTION_TYPE messagepayload_list,
133 std::shared_ptr<PilotJob> pj, std::string suffix,
134 std::shared_ptr<StorageService> scratch_space);
137 std::map<std::string, std::tuple<unsigned long, double>> compute_resources,
138 WRENCH_PROPERTY_COLLECTION_TYPE property_list,
139 WRENCH_MESSAGE_PAYLOADCOLLECTION_TYPE messagepayload_list,
140 std::shared_ptr<StorageService> scratch_space);
142 void validateProperties();
146 void initiateInstance(
const std::string &hostname,
147 std::map<std::string, std::tuple<unsigned long, double>> compute_resources,
148 WRENCH_PROPERTY_COLLECTION_TYPE property_list,
149 WRENCH_MESSAGE_PAYLOADCOLLECTION_TYPE messagepayload_list,
151 std::shared_ptr<PilotJob> pj);
156 std::shared_ptr<Alarm> death_alarm =
nullptr;
157 std::shared_ptr<PilotJob> containing_pilot_job;
160 std::unordered_map<std::shared_ptr<StandardJob> , std::set<std::shared_ptr<DataFile>>> files_in_scratch;
162 std::set<std::shared_ptr<CompoundJob>> current_jobs;
164 std::set<std::shared_ptr<Action>> not_ready_actions;
165 std::vector<std::shared_ptr<Action>> ready_actions;
166 std::set<std::shared_ptr<Action>> dispatched_actions;
167 std::unordered_map<std::shared_ptr<CompoundJob>,
int> num_dispatched_actions_for_cjob;
178 void cleanUpScratch();
184 void terminate(
bool send_failure_notifications, ComputeService::TerminationCause termination_cause);
186 void processActionDone(std::shared_ptr<Action> action);
188 void processCompoundJobTerminationRequest(std::shared_ptr<CompoundJob> job, simgrid::s4u::Mailbox *answer_mailbox);
190 bool processNextMessage();
192 void dispatchReadyActions();
195 void terminateCurrentCompoundJob(std::shared_ptr<CompoundJob> job, ComputeService::TerminationCause termination_cause);
197 void processGetResourceInformation(simgrid::s4u::Mailbox *answer_mailbox,
const std::string &key);
201 void processSubmitCompoundJob(simgrid::s4u::Mailbox *answer_mailbox, std::shared_ptr<CompoundJob> job,
202 std::map<std::string, std::string> &service_specific_arguments);
204 void processIsThereAtLeastOneHostWithAvailableResources(
205 simgrid::s4u::Mailbox *answer_mailbox,
unsigned long num_cores,
double ram);
215 void cleanup(
bool has_terminated_cleanly,
int return_value)
override;
219 static std::tuple<std::string, unsigned long>
parseResourceSpec(
const std::string &spec);
224 std::shared_ptr<HostStateChangeDetector> host_state_change_monitor;
226 std::shared_ptr<ActionExecutionService> action_execution_service;
236 #endif //WRENCH_BAREMETALCOMPUTESERVICE_H
static const std::string PILOT_JOB_EXPIRED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that a pilot job has expired.
Definition: ComputeServiceMessagePayload.h:60
static const std::string IS_THERE_AT_LEAST_ONE_HOST_WITH_AVAILABLE_RESOURCES_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to ask is one host has some resources a...
Definition: ComputeServiceMessagePayload.h:78
static const std::string SUBMIT_COMPOUND_JOB_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to acknowledge a compound job submissio...
Definition: ComputeServiceMessagePayload.h:43
static const std::string TERMINATE_PILOT_JOB_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to acknowledge a pilot job termination.
Definition: ComputeServiceMessagePayload.h:66
static const std::string SUBMIT_STANDARD_JOB_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to submit a standard job.
Definition: ComputeServiceMessagePayload.h:27
static const std::string COMPOUND_JOB_DONE_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that it has completed a compou...
Definition: ComputeServiceMessagePayload.h:45
static const std::string DAEMON_STOPPED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to confirm it has terminated.
Definition: ServiceMessagePayload.h:37
static const std::string TERMINATE_STANDARD_JOB_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to acknowledge a standard job terminati...
Definition: ComputeServiceMessagePayload.h:37
static const std::string RESOURCE_DESCRIPTION_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to request information on its resources...
Definition: ComputeServiceMessagePayload.h:74
static const std::string IS_THERE_AT_LEAST_ONE_HOST_WITH_AVAILABLE_RESOURCES_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message by the the daemon to state whether one host has some resou...
Definition: ComputeServiceMessagePayload.h:80
The compute service base class.
Definition: ComputeService.h:34
A cloud-based compute service that manages a set of physical hosts and controls access to their resou...
Definition: CloudComputeService.h:36
static const std::string TERMINATE_PILOT_JOB_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to terminate a pilot job.
Definition: ComputeServiceMessagePayload.h:64
A batch_standard_and_pilot_jobs-scheduled compute service that manages a set of compute hosts and con...
Definition: BatchComputeService.h:49
static const std::string STANDARD_JOB_DONE_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that it has completed a standa...
Definition: ComputeServiceMessagePayload.h:31
static const std::string STANDARD_JOB_FAILED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that a running standard job ha...
Definition: ComputeServiceMessagePayload.h:33
Definition: Action.cpp:28
static const std::string TERMINATE_STANDARD_JOB_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to terminate a standard job.
Definition: ComputeServiceMessagePayload.h:35
static const std::string SUBMIT_PILOT_JOB_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent from the daemon to acknowledge a pilot job submission...
Definition: ComputeServiceMessagePayload.h:56
static const std::string COMPOUND_JOB_FAILED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that a running compound job ha...
Definition: ComputeServiceMessagePayload.h:47
static std::tuple< std::string, unsigned long > parseResourceSpec(const std::string &spec)
Helper static method to parse resource specifications to the <cores,ram> format.
Definition: ActionExecutionService.cpp:76
static const std::string RESOURCE_DESCRIPTION_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state information on its resources.
Definition: ComputeServiceMessagePayload.h:76
static const std::string STOP_DAEMON_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to terminate it.
Definition: ServiceMessagePayload.h:35
static const std::string PILOT_JOB_STARTED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that a pilot job has started.
Definition: ComputeServiceMessagePayload.h:58
static const std::string SUBMIT_STANDARD_JOB_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to acknowledge a standard job submissio...
Definition: ComputeServiceMessagePayload.h:29
static const std::string JOB_TYPE_NOT_SUPPORTED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that it does not support the t...
Definition: ComputeServiceMessagePayload.h:24
static const std::string SUBMIT_PILOT_JOB_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to submit a pilot job.
Definition: ComputeServiceMessagePayload.h:54
static const std::string SUBMIT_COMPOUND_JOB_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to submit a pilot job.
Definition: ComputeServiceMessagePayload.h:41
static const std::string PILOT_JOB_FAILED_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to state that a pilot job has failed.
Definition: ComputeServiceMessagePayload.h:62
static const std::string TERMINATE_COMPOUND_JOB_REQUEST_MESSAGE_PAYLOAD
The number of bytes in the control message sent to the daemon to terminate a compound job.
Definition: ComputeServiceMessagePayload.h:49
static const std::string TERMINATE_COMPOUND_JOB_ANSWER_MESSAGE_PAYLOAD
The number of bytes in the control message sent by the daemon to acknowledge a compound job terminati...
Definition: ComputeServiceMessagePayload.h:51