Commit 8c4b8048 authored by Michael Mercier's avatar Michael Mercier
Browse files

Merge branch 'master' of gitlab.inria.fr:batsim/batsim

parents a01b9897 20c496ed
# A C++ Batch Scheduler for Batsim
#
# You have to specify the desired "commit_hash" to be built.
# Optionally you can specify the working directory.
#
# Usage example:
# global:
# batsched_commit: 1a2e210e
# setup:
# - commit_hash: $${batsched_commit}
# # Optional
# - work_dir: /tmp
- work_dir: /root
# Same dependencies as Batsim
- get_it:
- exec_in: |
mkdir -p $${work_dir}
cd $${work_dir}
git clone git@gitlab.inria.fr:batsim/batsched.git
- exec_in: |
cd $${work_dir}/batsched
git checkout $${commit_hash}
mkdir -p build
cd build
cmake -DCMAKE_BUILD_TYPE=DEBUG ..
make
make install
......@@ -3,7 +3,7 @@ Experiment tools
This directory contains scripts that can be used to make experimenting with Batsim easier.
These scripts are written in Python 2 (because Execo does not handle python 3 yet).
These scripts are written in Python 2 but should be compatible with Python 3.
Dependencies
------------
......@@ -18,9 +18,9 @@ the moment.
#### Installation
``` bash
git clone https://github.com/mickours/execo.git
cd $${simctn_dir}
cd execo
git checkout 725fddc # cwd support
git checkout 406f4fa06 # cwd support
# Make sure you have no execo already installed before the next step
# Otherwise, conflicts might occur (try pip2 uninstall execo)
pip2 install --user .
......
#!/usr/bin/python2
# Should now work with both python2 and python3
import argparse
import yaml
......@@ -56,7 +57,7 @@ def signal_handler(signal, frame):
def instance_id_from_comb(comb, hash_length):
fdict = flatten_dict(comb)
return hashlib.sha1(str(fdict)).hexdigest()[:hash_length]
return hashlib.sha1(str(fdict).encode()).hexdigest()[:hash_length]
def retrieve_dirs_from_instances(variables,
variables_declaration_order,
......@@ -287,37 +288,39 @@ class WorkerLifeCycleHandler(ProcessLifecycleHandler):
local_rank = self.local_rank,
cmd = instance_command))
create_dir_if_not_exists('{base_output_dir}/instances/output/'.format(
base_output_dir = self.data.base_output_directory))
# Launching the process
if self.hostname == 'localhost':
process = Process(cmd = instance_command,
kill_subprocesses = True,
cwd = self.data.base_working_directory,
lifecycle_handlers = [self])
lifecycle_handlers = [self],
stdout_handlers = ['{out}/instances/output/{iid}.stdout'.format(
out = self.data.base_output_directory,
iid = self.instance_id)],
stderr_handlers = ['{out}/instances/output/{iid}.stderr'.format(
out = self.data.base_output_directory,
iid = self.instance_id)])
process.start()
else:
process = SshProcess(cmd = instance_command,
host = self.host,
kill_subprocesses = True,
cwd = self.data.base_working_directory,
lifecycle_handlers = [self])
lifecycle_handlers = [self],
stdout_handlers = ['{out}/instances/output/{iid}.stdout'.format(
out = self.data.base_output_directory,
iid = self.instance_id)],
stderr_handlers = ['{out}/instances/output/{iid}.stderr'.format(
out = self.data.base_output_directory,
iid = self.instance_id)])
process.start()
def end(self, process):
assert(self.comb != None)
# Let's log the process's output
create_dir_if_not_exists('{base_output_dir}/instances/output/'.format(
base_output_dir = self.data.base_output_directory))
write_string_into_file(process.stdout,
'{base_output_dir}/instances/output/{iid}.stdout'.format(
base_output_dir = self.data.base_output_directory,
iid = self.instance_id))
write_string_into_file(process.stderr,
'{base_output_dir}/instances/output/{iid}.stderr'.format(
base_output_dir = self.data.base_output_directory,
iid = self.instance_id))
# Let's mark whether the computation was successful
if process.finished_ok:
self.data.sweeper.done(self.comb)
......
#!/usr/bin/python2
# Should now work with both python2 and python3
import argparse
from execo import *
......@@ -342,12 +343,6 @@ class BatsimLifecycleHandler(ProcessLifecycleHandler):
self.execution_data.nb_started += 1
def end(self, process):
# Let's write stdout and stderr to files
write_string_into_file(process.stdout, '{output_dir}/batsim.stdout'.format(
output_dir = self.execution_data.output_directory))
write_string_into_file(process.stderr, '{output_dir}/batsim.stderr'.format(
output_dir = self.execution_data.output_directory))
# Let's check whether the process was successful
if (process.exit_code != 0) or process.timeouted or process.killed or process.error:
self.execution_data.failure = True
......@@ -389,12 +384,6 @@ class SchedLifecycleHandler(ProcessLifecycleHandler):
logger.info("Sched started")
self.execution_data.nb_started += 1
def end(self, process):
# Let's write stdout and stderr to files
write_string_into_file(process.stdout, '{output_dir}/sched.stdout'.format(
output_dir = self.execution_data.output_directory))
write_string_into_file(process.stderr, '{output_dir}/sched.stderr'.format(
output_dir = self.execution_data.output_directory))
# Let's check whether the process was successful
if (process.exit_code != 0) or process.timeouted or process.killed or process.error:
self.execution_data.failure = True
......@@ -458,22 +447,22 @@ def execute_command(command,
shell = True,
kill_subprocesses = True,
name = command_name,
cwd = working_directory)
cwd = working_directory,
stdout_handlers = ['{out}/{name}.stdout'.format(
out = output_script_output_dir,
name = command_name)],
stderr_handlers = ['{out}/{name}.stderr'.format(
out = output_script_output_dir,
name = command_name)])
logger.info("Executing command: {cmd}".format(cmd=command))
# Let's create the script logging directory if needed
create_dir_if_not_exists(output_script_output_dir)
# Let's start the process
cmd_process.start().wait()
# Let's write command outputs
create_dir_if_not_exists(output_script_output_dir)
write_string_into_file(cmd_process.stdout, '{out}/{name}.stdout'.format(
out = output_script_output_dir,
name = command_name))
write_string_into_file(cmd_process.stderr, '{out}/{name}.stderr'.format(
out = output_script_output_dir,
name = command_name))
return cmd_process.finished_ok and not cmd_process.error and cmd_process.exit_code == 0
def socket_in_use(sock):
......@@ -489,7 +478,6 @@ def wait_for_batsim_to_open_connection(execution_data,
while remaining_time > 0 and not socket_in_use(sock) and not execution_data.batsim_process.ended:
time.sleep(seconds_to_sleep)
remaining_time -= seconds_to_sleep
#logger.debug("Batsim stderr: {}".format(execution_data.batsim_process.stderr))
return socket_in_use(sock)
......@@ -558,7 +546,11 @@ def execute_one_instance(working_directory,
name = "batsim_process",
cwd = working_directory,
timeout = timeout,
lifecycle_handlers = [batsim_lifecycle_handler])
lifecycle_handlers = [batsim_lifecycle_handler],
stdout_handlers = ['{out}/batsim.stdout'.format(
out = output_directory)],
stderr_handlers = ['{out}/batsim.stderr'.format(
out = output_directory)])
sched_process = Process(cmd = 'bash {sched_script}'.format(
sched_script = sched_script_filename),
......@@ -567,7 +559,11 @@ def execute_one_instance(working_directory,
name = "sched_process",
cwd = working_directory,
timeout = timeout,
lifecycle_handlers = [sched_lifecycle_handler])
lifecycle_handlers = [sched_lifecycle_handler],
stdout_handlers = ['{out}/sched.stdout'.format(
out = output_directory)],
stderr_handlers = ['{out}/sched.stderr'.format(
out = output_directory)])
# Let's create a shared execution data, which will be given to LC handlers
execution_data = InstanceExecutionData(batsim_process = batsim_process,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment