|
| HTCondorCentralManagerService (const std::string &hostname, double negotiator_startup_overhead, std::set< std::shared_ptr< ComputeService >> compute_services, std::map< std::string, std::string > property_list={}, std::map< std::string, double > messagepayload_list={}) |
| Constructor. More...
|
|
| ~HTCondorCentralManagerService () override |
| Destructor.
|
|
void | addComputeService (std::shared_ptr< ComputeService > compute_service) |
| Add a new 'child' compute service. More...
|
|
bool | jobCanRunSomewhere (std::shared_ptr< WorkflowJob > job, std::map< std::string, std::string > service_specific_arguments) |
| Helper function to check whether a job can run on at least one child compute service. More...
|
|
bool | jobKindIsSupported (const std::shared_ptr< WorkflowJob > &job, std::map< std::string, std::string > service_specific_arguments) |
| Helper function to check whether a job kind is supported. More...
|
|
void | submitPilotJob (std::shared_ptr< PilotJob > job, const std::map< std::string, std::string > &service_specific_arguments) override |
| Asynchronously submit a pilot job to the cloud service. More...
|
|
void | submitStandardJob (std::shared_ptr< StandardJob > job, const std::map< std::string, std::string > &service_specific_arguments) override |
| Submit a standard job to the HTCondor service. More...
|
|
void | terminatePilotJob (std::shared_ptr< PilotJob > job) override |
| Terminate a pilot job to the compute service. More...
|
|
void | terminateStandardJob (std::shared_ptr< StandardJob > job) override |
| Terminate a standard job to the compute service (virtual) More...
|
|
| ComputeService (const std::string &hostname, std::string service_name, std::string mailbox_name_prefix, std::string scratch_space_mount_point) |
| Constructor. More...
|
|
std::map< std::string, double > | getCoreFlopRate () |
| Get the per-core flop rate of the compute service's hosts. More...
|
|
double | getFreeScratchSpaceSize () |
| Get the free space on the compute service's scratch storage space. More...
|
|
std::vector< std::string > | getHosts () |
| Get the list of the compute service's compute host. More...
|
|
std::map< std::string, double > | getMemoryCapacity () |
| Get the RAM capacities for each of the compute service's hosts. More...
|
|
unsigned long | getNumHosts () |
| Get the number of hosts that the compute service manages. More...
|
|
std::map< std::string, double > | getPerHostAvailableMemoryCapacity () |
| Get ram availability for each of the compute service's host. More...
|
|
std::map< std::string, unsigned long > | getPerHostNumCores () |
| Get core counts for each of the compute service's host. More...
|
|
std::map< std::string, unsigned long > | getPerHostNumIdleCores () |
| Get idle core counts for each of the compute service's host. More...
|
|
std::shared_ptr< StorageService > | getScratch () |
| Method that returns the computer service's scratch space's storage service. More...
|
|
unsigned long | getTotalNumCores () |
| Get the total core counts for all hosts of the compute service. More...
|
|
virtual unsigned long | getTotalNumIdleCores () |
| Get the total idle core count for all hosts of the compute service. More...
|
|
double | getTotalScratchSpaceSize () |
| Get the total capacity of the compute service's scratch storage space. More...
|
|
double | getTTL () |
| Get the time-to-live of the compute service. More...
|
|
bool | hasScratch () |
| Checks if the compute service has a scratch space. More...
|
|
virtual bool | isThereAtLeastOneHostWithIdleResources (unsigned long num_cores, double ram) |
| Method to find out if, right now, the compute service has at least one host with some idle number of cores and some available RAM. More...
|
|
void | stop () override |
| Stop the compute service - must be called by the stop() method of derived classes.
|
|
bool | supportsPilotJobs () |
| Get whether the compute service supports pilot jobs or not. More...
|
|
bool | supportsStandardJobs () |
| Get whether the compute service supports standard jobs or not. More...
|
|
void | terminateJob (std::shared_ptr< WorkflowJob > job) |
| Terminate a previously-submitted job (which may or may not be running yet) More...
|
|
void | assertServiceIsUp () |
| Throws an exception if the service is not up. More...
|
|
std::string | getHostname () |
| Get the name of the host on which the service is / will be running. More...
|
|
double | getMessagePayloadValue (std::string) |
| Get a message payload of the Service as a double. More...
|
|
double | getNetworkTimeoutValue () |
| Returns the service's network timeout value. More...
|
|
bool | getPropertyValueAsBoolean (std::string) |
| Get a property of the Service as a boolean. More...
|
|
double | getPropertyValueAsDouble (std::string) |
| Get a property of the Service as a double. More...
|
|
std::string | getPropertyValueAsString (std::string) |
| Get a property of the Service as a string. More...
|
|
unsigned long | getPropertyValueAsUnsignedLong (std::string) |
| Get a property of the Service as an unsigned long. More...
|
|
bool | isUp () |
| Returns true if the service is UP, false otherwise. More...
|
|
void | resume () |
| Resume the service. More...
|
|
void | setNetworkTimeoutValue (double value) |
| Sets the service's network timeout value. More...
|
|
void | setStateToDown () |
| Set the state of the service to DOWN.
|
|
void | start (std::shared_ptr< Service > this_service, bool daemonize, bool auto_restart) |
| Start the service. More...
|
|
void | suspend () |
| Suspend the service.
|
|
| S4U_Daemon (std::string hostname, std::string process_name_prefix, std::string mailbox_prefix) |
| Constructor (daemon with a mailbox) More...
|
|
virtual | ~S4U_Daemon () |
|
virtual void | cleanup (bool has_returned_from_main, int return_value) |
| Cleanup function called when the daemon terminates (for whatever reason). The default behavior is to throw an exception if the host is off. This method should be overriden in a daemons implements some fault-tolerant behavior, or is naturally tolerant. More...
|
|
void | createLifeSaver (std::shared_ptr< S4U_Daemon > reference) |
| Create a life saver for the daemon. More...
|
|
std::string | getName () |
| Retrieve the process name. More...
|
|
int | getReturnValue () |
| Returns the value returned by main() (if the daemon has returned from main) More...
|
|
S4U_Daemon::State | getState () |
| Get the daemon's state. More...
|
|
bool | hasReturnedFromMain () |
| Returns true if the daemon has returned from main() (i.e., not brutally killed) More...
|
|
bool | isDaemonized () |
| Return the daemonized status of the daemon. More...
|
|
bool | isSetToAutoRestart () |
| Return the auto-restart status of the daemon. More...
|
|
std::pair< bool, int > | join () |
| Join (i.e., wait for) the daemon. More...
|
|
void | resumeActor () |
| Resume the daemon/actor.
|
|
void | setupOnExitFunction () |
| Sets up the on_exit functionf for the actor.
|
|
void | startDaemon (bool _daemonized, bool _auto_restart) |
| Start the daemon. More...
|
|
void | suspendActor () |
| Suspend the daemon/actor.
|
|
|
enum | State { UP,
DOWN,
SUSPENDED
} |
| Daemon states. More...
|
|
static void | cleanupTrackedServices () |
| Go through the tracked services and remove all entries with a refcount of 1!
|
|
static void | clearTrackedServices () |
| Forget all tracked services.
|
|
static void | increaseNumCompletedServicesCount () |
| Increase the completed service count.
|
|
std::string | hostname |
| The name of the host on which the daemon is running.
|
|
std::string | initial_mailbox_name |
| The initial name of the daemon's mailbox.
|
|
LifeSaver * | life_saver = nullptr |
| The daemon's life saver.
|
|
std::string | mailbox_name |
| The current name of the daemon's mailbox.
|
|
std::string | process_name |
| The name of the daemon.
|
|
Simulation * | simulation |
| a pointer to the simulation object
|
|
static constexpr unsigned long | ALL_CORES = ULONG_MAX |
| A convenient constant to mean "use all cores of a physical host" whenever a number of cores is needed when instantiating compute services.
|
|
static constexpr double | ALL_RAM = DBL_MAX |
| A convenient constant to mean "use all ram of a physical host" whenever a ram capacity is needed when instantiating compute services.
|
|
| ComputeService (const std::string &hostname, std::string service_name, std::string mailbox_name_prefix, std::shared_ptr< StorageService > scratch_space) |
| Constructor. More...
|
|
std::shared_ptr< StorageService > | getScratchSharedPtr () |
| Get a shared pointer to the compute service's scratch storage space. More...
|
|
void | submitJob (std::shared_ptr< WorkflowJob > job, const std::map< std::string, std::string > &={}) |
| Submit a job to the compute service. More...
|
|
| Service (std::string hostname, std::string process_name_prefix, std::string mailbox_name_prefix) |
| Constructor. More...
|
|
| ~Service () |
| Destructor.
|
|
template<class T > |
std::shared_ptr< T > | getSharedPtr () |
| Method to retrieve the shared_ptr to a service. More...
|
|
void | serviceSanityCheck () |
| Check whether the service is properly configured and running. More...
|
|
void | setMessagePayload (std::string, double) |
| Set a message payload of the Service. More...
|
|
void | setMessagePayloads (std::map< std::string, double > default_messagepayload_values, std::map< std::string, double > overriden_messagepayload_values) |
| Set default and user-defined message payloads. More...
|
|
void | setProperties (std::map< std::string, std::string > default_property_values, std::map< std::string, std::string > overriden_property_values) |
| Set default and user-defined properties. More...
|
|
void | setProperty (std::string, std::string) |
| Set a property of the Service. More...
|
|
void | acquireDaemonLock () |
| Lock the daemon's lock.
|
|
void | killActor () |
| Kill the daemon/actor (does nothing if already dead) More...
|
|
void | releaseDaemonLock () |
| Unlock the daemon's lock.
|
|
void | runMainMethod () |
| Method that run's the user-defined main method (that's called by the S4U actor class)
|
|
static void | assertServiceIsUp (std::shared_ptr< Service > s) |
| Assert for the service being up. More...
|
|
template<class T > |
static std::shared_ptr< T > | getServiceByName (std::string name) |
| Method to retrieve the shared_ptr to a service based on the service's name (not efficient) More...
|
|
std::shared_ptr< StorageService > | scratch_space_storage_service |
| A scratch storage service associated to the compute service.
|
|
std::map< std::string, double > | messagepayload_list |
| The service's messagepayload list.
|
|
std::string | name |
| The service's name.
|
|
double | network_timeout = 30.0 |
| The time (in seconds) after which a service that doesn't send back a reply (control) message causes a NetworkTimeOut exception. (default: 30 second; if <0 never timeout)
|
|
std::map< std::string, std::string > | property_list |
| The service's property list.
|
|
unsigned int | num_starts = 0 |
| The number of time that this daemon has started (i.e., 1 + number of restarts)
|
|
State | state |
| The service's state.
|
|
A HTCondor central manager service implementation.