Commit d5d67929 authored by Samuel GAIST's avatar Samuel GAIST
Browse files

[experiments] per-commit cleanup

parent 85b35556
...@@ -61,31 +61,37 @@ logger = logging.getLogger(__name__) ...@@ -61,31 +61,37 @@ logger = logging.getLogger(__name__)
def run_experiment(configuration, name, force, use_docker, use_local, quiet): def run_experiment(configuration, name, force, use_docker, use_local, quiet):
'''Run experiments locally''' """Run experiments locally"""
def load_result(executor): def load_result(executor):
'''Loads the result of an experiment, in a single go''' """Loads the result of an experiment, in a single go"""
f = CachedDataSource() f = CachedDataSource()
assert f.setup(os.path.join(executor.cache, success = f.setup(
executor.data['result']['path'] + '.data'), os.path.join(executor.cache, executor.data["result"]["path"] + ".data"),
executor.prefix) executor.prefix,
)
if not success:
raise RuntimeError("Failed to setup cached data source")
data, start, end = f[0] data, start, end = f[0]
return data return data
def print_results(executor): def print_results(executor):
data = load_result(executor) data = load_result(executor)
r = reindent(simplejson.dumps(data.as_dict(), indent=2, r = reindent(
cls=NumpyJSONEncoder), 2) simplejson.dumps(data.as_dict(), indent=2, cls=NumpyJSONEncoder), 2
)
logger.info(" Results:\n%s", r) logger.info(" Results:\n%s", r)
def reindent(s, n): def reindent(s, n):
'''Re-indents output so it is more visible''' """Re-indents output so it is more visible"""
margin = n * ' ' margin = n * " "
return margin + ('\n' + margin).join(s.split('\n')) return margin + ("\n" + margin).join(s.split("\n"))
def simplify_time(s): def simplify_time(s):
'''Re-writes the time so it is easier to understand it''' """Re-writes the time so it is easier to understand it"""
minute = 60.0 minute = 60.0
hour = 60 * minute hour = 60 * minute
...@@ -110,7 +116,7 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -110,7 +116,7 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
return "%d days %d h %d m %.2f s" % (days, hours, minutes, seconds) return "%d days %d h %d m %.2f s" % (days, hours, minutes, seconds)
def simplify_size(s): def simplify_size(s):
'''Re-writes the size so it is easier to understand it''' """Re-writes the size so it is easier to understand it"""
kb = 1024.0 kb = 1024.0
mb = kb * kb mb = kb * kb
...@@ -129,14 +135,17 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -129,14 +135,17 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
def index_experiment_databases(cache_path, experiment): def index_experiment_databases(cache_path, experiment):
for block_name, infos in experiment.datasets.items(): for block_name, infos in experiment.datasets.items():
view = infos['database'].view(infos['protocol'], infos['set']) view = infos["database"].view(infos["protocol"], infos["set"])
filename = toPath(hashDataset(infos['database'].name, filename = toPath(
infos['protocol'], hashDataset(infos["database"].name, infos["protocol"], infos["set"]),
infos['set']), suffix=".db",
suffix='.db') )
database_index_path = os.path.join(cache_path, filename) database_index_path = os.path.join(cache_path, filename)
if not os.path.exists(database_index_path): if not os.path.exists(database_index_path):
logger.info("Index for database %s not found, building it", infos['database'].name) logger.info(
"Index for database %s not found, building it",
infos["database"].name,
)
view.index(database_index_path) view.index(database_index_path)
dataformat_cache = {} dataformat_cache = {}
...@@ -144,14 +153,19 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -144,14 +153,19 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
algorithm_cache = {} algorithm_cache = {}
library_cache = {} library_cache = {}
experiment = Experiment(configuration.path, name, experiment = Experiment(
dataformat_cache, database_cache, configuration.path,
algorithm_cache, library_cache) name,
dataformat_cache,
database_cache,
algorithm_cache,
library_cache,
)
if not experiment.valid: if not experiment.valid:
logger.error("Failed to load the experiment `%s':", name) logger.error("Failed to load the experiment `%s':", name)
for e in experiment.errors: for e in experiment.errors:
logger.error(' * %s', e) logger.error(" * %s", e)
return 1 return 1
if not os.path.exists(configuration.cache): if not os.path.exists(configuration.cache):
...@@ -173,45 +187,58 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -173,45 +187,58 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
executable = None # use the default executable = None # use the default
if use_docker: if use_docker:
env = value['configuration']['environment'] env = value["configuration"]["environment"]
search_key = '%s (%s)' % (env['name'], env['version']) search_key = "%s (%s)" % (env["name"], env["version"])
if search_key not in host: if search_key not in host:
logger.error("Cannot execute block `%s' on environment `%s': " logger.error(
"environment was not found' - please install it", "Cannot execute block `%s' on environment `%s': "
key, search_key) "environment was not found' - please install it",
key,
search_key,
)
return 1 return 1
if use_docker: if use_docker:
executor = DockerExecutor(host, configuration.path, executor = DockerExecutor(
value['configuration'], host,
configuration.cache, dataformat_cache, configuration.path,
database_cache, algorithm_cache, value["configuration"],
library_cache) configuration.cache,
dataformat_cache,
database_cache,
algorithm_cache,
library_cache,
)
else: else:
executor = LocalExecutor(configuration.path, executor = LocalExecutor(
value['configuration'], configuration.path,
configuration.cache, dataformat_cache, value["configuration"],
database_cache, algorithm_cache, configuration.cache,
library_cache, dataformat_cache,
configuration.database_paths) database_cache,
algorithm_cache,
library_cache,
configuration.database_paths,
)
if not executor.valid: if not executor.valid:
logger.error( logger.error("Failed to load the execution information for `%s':", key)
"Failed to load the execution information for `%s':", key)
for e in executor.errors: for e in executor.errors:
logger.error(' * %s', e) logger.error(" * %s", e)
return 1 return 1
if executor.outputs_exist and not force: if executor.outputs_exist and not force:
logger.info("Skipping execution of `%s' for block `%s' " logger.info(
"- outputs exist", executor.algorithm.name, key) "Skipping execution of `%s' for block `%s' " "- outputs exist",
executor.algorithm.name,
key,
)
if executor.analysis and not quiet: if executor.analysis and not quiet:
logger.extra(" Outputs produced:") logger.extra(" Outputs produced:")
print_results(executor) print_results(executor)
continue continue
logger.info("Running `%s' for block `%s'", logger.info("Running `%s' for block `%s'", executor.algorithm.name, key)
executor.algorithm.name, key)
if executable is not None: if executable is not None:
logger.extra(" -> using executable at `%s'", executable) logger.extra(" -> using executable at `%s'", executable)
else: else:
...@@ -220,49 +247,53 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -220,49 +247,53 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
with executor: with executor:
result = executor.process() result = executor.process()
if result['status'] != 0: if result["status"] != 0:
logger.error("Block did not execute properly - outputs were reset") logger.error("Block did not execute properly - outputs were reset")
logger.error(" Standard output:\n%s", logger.error(" Standard output:\n%s", reindent(result["stdout"], 4))
reindent(result['stdout'], 4)) logger.error(" Standard error:\n%s", reindent(result["stderr"], 4))
logger.error(" Standard error:\n%s", logger.error(
reindent(result['stderr'], 4)) " Captured user error:\n%s", reindent(result["user_error"], 4)
logger.error(" Captured user error:\n%s", )
reindent(result['user_error'], 4)) logger.error(
logger.error(" Captured system error:\n%s", " Captured system error:\n%s", reindent(result["system_error"], 4)
reindent(result['system_error'], 4)) )
logger.extra(" Environment: %s" % 'default environment') logger.extra(" Environment: %s" % "default environment")
return 1 return 1
elif use_docker: elif use_docker:
stats = result['statistics'] stats = result["statistics"]
cpu_stats = stats['cpu'] cpu_stats = stats["cpu"]
data_stats = stats['data'] data_stats = stats["data"]
cpu_total = cpu_stats['total'] cpu_total = cpu_stats["total"]
# Likely means that GPU was used # Likely means that GPU was used
if not cpu_total: if not cpu_total:
cpu_total = 1.0 cpu_total = 1.0
logger.extra(" CPU time (user, system, total, percent): " logger.extra(
"%s, %s, %s, %d%%", " CPU time (user, system, total, percent): " "%s, %s, %s, %d%%",
simplify_time(cpu_stats['user']), simplify_time(cpu_stats["user"]),
simplify_time(cpu_stats['system']), simplify_time(cpu_stats["system"]),
simplify_time(cpu_total), simplify_time(cpu_total),
100. * (cpu_stats['user'] + cpu_stats['system']) / 100.0 * (cpu_stats["user"] + cpu_stats["system"]) / cpu_total,
cpu_total) )
logger.extra(" Memory usage: %s", logger.extra(" Memory usage: %s", simplify_size(stats["memory"]["rss"]))
simplify_size(stats['memory']['rss'])) logger.extra(
logger.extra(" Cached input read: %s, %s", " Cached input read: %s, %s",
simplify_time(data_stats['time']['read']), simplify_time(data_stats["time"]["read"]),
simplify_size(data_stats['volume']['read'])) simplify_size(data_stats["volume"]["read"]),
logger.extra(" Cached output write: %s, %s", )
simplify_time(data_stats['time']['write']), logger.extra(
simplify_size(data_stats['volume']['write'])) " Cached output write: %s, %s",
logger.extra(" Communication time: %s (%d%%)", simplify_time(data_stats["time"]["write"]),
simplify_time(data_stats['network']['wait_time']), simplify_size(data_stats["volume"]["write"]),
100. * data_stats['network']['wait_time'] / )
cpu_total) logger.extra(
" Communication time: %s (%d%%)",
simplify_time(data_stats["network"]["wait_time"]),
100.0 * data_stats["network"]["wait_time"] / cpu_total,
)
else: else:
logger.extra(" Environment: %s" % 'local environment') logger.extra(" Environment: %s" % "local environment")
if not quiet: if not quiet:
if executor.analysis: if executor.analysis:
...@@ -270,10 +301,10 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -270,10 +301,10 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
logger.extra(" Outputs produced:") logger.extra(" Outputs produced:")
if executor.analysis: if executor.analysis:
logger.extra(" * %s", executor.data['result']['path']) logger.extra(" * %s", executor.data["result"]["path"])
else: else:
for name, details in executor.data['outputs'].items(): for name, details in executor.data["outputs"].items():
logger.extra(" * %s", details['path']) logger.extra(" * %s", details["path"])
else: else:
logger.info("Done") logger.info("Done")
...@@ -281,21 +312,26 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet): ...@@ -281,21 +312,26 @@ def run_experiment(configuration, name, force, use_docker, use_local, quiet):
def caches_impl(configuration, name, ls, delete, checksum): def caches_impl(configuration, name, ls, delete, checksum):
'''List all cache files involved in this experiment''' """List all cache files involved in this experiment"""
dataformat_cache = {} dataformat_cache = {}
database_cache = {} database_cache = {}
algorithm_cache = {} algorithm_cache = {}
library_cache = {} library_cache = {}
experiment = Experiment(configuration.path, name, experiment = Experiment(
dataformat_cache, database_cache, configuration.path,
algorithm_cache, library_cache) name,
dataformat_cache,
database_cache,
algorithm_cache,
library_cache,
)
if not experiment.valid: if not experiment.valid:
logger.error("Failed to load the experiment `%s':", name) logger.error("Failed to load the experiment `%s':", name)
for e in experiment.errors: for e in experiment.errors:
logger.error(' * %s', e) logger.error(" * %s", e)
return 1 return 1
scheduled = experiment.setup() scheduled = experiment.setup()
...@@ -303,45 +339,47 @@ def caches_impl(configuration, name, ls, delete, checksum): ...@@ -303,45 +339,47 @@ def caches_impl(configuration, name, ls, delete, checksum):
block_list = [] block_list = []
for key, value in scheduled.items(): for key, value in scheduled.items():
block = { block = {
'name': key, "name": key,
'algorithm': value['configuration']['algorithm'], "algorithm": value["configuration"]["algorithm"],
'is_analyser': False, "is_analyser": False,
'paths': [] "paths": [],
} }
if 'outputs' in value['configuration']: # normal block if "outputs" in value["configuration"]: # normal block
for name, data in value['configuration']['outputs'].items(): for name, data in value["configuration"]["outputs"].items():
block['paths'].append(data['path']) block["paths"].append(data["path"])
else: # analyzer else: # analyzer
block['is_analyser'] = True block["is_analyser"] = True
block['paths'].append(value['configuration']['result']['path']) block["paths"].append(value["configuration"]["result"]["path"])
block_list.append(block) block_list.append(block)
for block in block_list: for block in block_list:
block_type = 'analyzer' if block['is_analyser'] else 'algorithm' block_type = "analyzer" if block["is_analyser"] else "algorithm"
logger.info("block: `%s'", block['name']) logger.info("block: `%s'", block["name"])
logger.info(" %s: `%s'", block_type, block['algorithm']) logger.info(" %s: `%s'", block_type, block["algorithm"])
for path in block['paths']: for path in block["paths"]:
# prefix cache path # prefix cache path
path = os.path.join(configuration.cache, path) path = os.path.join(configuration.cache, path)
logger.info(" output: `%s'", path) logger.info(" output: `%s'", path)
if ls: if ls:
for file in glob.glob(path + '.*'): for file in glob.glob(path + ".*"):
logger.info(' %s' % file) logger.info(" %s" % file)
if delete: if delete:
for file in glob.glob(path + '.*'): for file in glob.glob(path + ".*"):
logger.info("removing `%s'...", file) logger.info("removing `%s'...", file)
os.unlink(file) os.unlink(file)
common.recursive_rmdir_if_empty( common.recursive_rmdir_if_empty(
os.path.dirname(path), configuration.cache) os.path.dirname(path), configuration.cache
)
if checksum: if checksum:
assert load_data_index(configuration.cache, path + '.data') if not load_data_index(configuration.cache, path + ".data"):
logger.error("Failed to load data index for {}".format(path))
logger.info("index for `%s' can be loaded and checksums", path) logger.info("index for `%s' can be loaded and checksums", path)
return 0 return 0
...@@ -383,13 +421,21 @@ def pull_impl(webapi, prefix, names, force, indentation, format_cache): ...@@ -383,13 +421,21 @@ def pull_impl(webapi, prefix, names, force, indentation, format_cache):
from .algorithms import pull_impl as algorithms_pull from .algorithms import pull_impl as algorithms_pull
from .databases import pull_impl as databases_pull from .databases import pull_impl as databases_pull
status, names = common.pull(webapi, prefix, 'experiment', names, if indentation == 0:
['declaration', 'description'], force, indentation = 4
indentation)
status, names = common.pull(
webapi,
prefix,
"experiment",
names,
["declaration", "description"],
force,
indentation,
)
if status != 0: if status != 0:
logger.error( logger.error("could not find any matching experiments - widen your search")
"could not find any matching experiments - widen your search")
return status return status
# see what dataformats one needs to pull # see what dataformats one needs to pull
...@@ -410,18 +456,37 @@ def pull_impl(webapi, prefix, names, force, indentation, format_cache): ...@@ -410,18 +456,37 @@ def pull_impl(webapi, prefix, names, force, indentation, format_cache):
# downloads any formats to which we depend on # downloads any formats to which we depend on
format_cache = {} format_cache = {}
library_cache = {} library_cache = {}
tc_status, _ = common.pull(webapi, prefix, 'toolchain', tc_status, _ = common.pull(
toolchains, ['declaration', 'description'], webapi,
force, indentation + 2) prefix,
db_status = databases_pull(webapi, prefix, databases, force, "toolchain",
indentation + 2, format_cache) toolchains,
algo_status = algorithms_pull(webapi, prefix, algorithms, force, ["declaration", "description"],
indentation + 2, format_cache, library_cache) force,
indentation,
)
db_status = databases_pull(
webapi, prefix, databases, force, indentation, format_cache
)
algo_status = algorithms_pull(
webapi, prefix, algorithms, force, indentation, format_cache, library_cache
)
return status + tc_status + db_status + algo_status return status + tc_status + db_status + algo_status
def plot_impl(webapi, configuration, prefix, names, remote_results, show, force, indentation, format_cache, outputfolder=None): def plot_impl(
webapi,
configuration,
prefix,
names,
remote_results,
show,
force,
indentation,
format_cache,
outputfolder=None,
):
"""Plots experiments from the server. """Plots experiments from the server.
Parameters: Parameters:
...@@ -463,7 +528,10 @@ def plot_impl(webapi, configuration, prefix, names, remote_results, show, force, ...@@ -463,7 +528,10 @@ def plot_impl(webapi, configuration, prefix, names, remote_results, show, force,
""" """
status = 0 status = 0
RESULTS_SIMPLE_TYPE_NAMES = ('int32', 'float32', 'bool', 'string') RESULTS_SIMPLE_TYPE_NAMES = ("int32", "float32", "bool", "string")
if indentation == 0:
indentation = 4
if remote_results: if remote_results:
if outputfolder is None: if outputfolder is None:
...@@ -477,7 +545,11 @@ def plot_impl(webapi, configuration, prefix, names, remote_results, show, force, ...@@ -477,7 +545,11 @@ def plot_impl(webapi, configuration, prefix, names, remote_results, show, force,
for name in names: for name in names:
if not remote_results: if not remote_results:
if outputfolder is None: if outputfolder is None:
output_folder = os.path.join(configuration.path, common.TYPE_PLURAL['experiment'], name.rsplit('/', 1)[0]) output_folder = os.path.join(
configuration.path,
common.TYPE_PLURAL["experiment"],
name.rsplit("/", 1)[0],
)
else: else:
# check if directory exists else create # check if directory exists else create
if not os.path.isdir(outputfolder): if not os.path.isdir(outputfolder):
...@@ -486,22 +558,46 @@ def plot_impl(webapi, configuration, prefix, names, remote_results, show, force, ...@@ -486,22 +558,46 @@ def plot_impl(webapi, configuration, prefix, names, remote_results, show, force,
check_plottable = False check_plottable = False
if not os.path.exists(configuration.cache) or remote_results: if not os.path.exists(configuration.cache) or remote_results:
experiment = simplejson.loads(simplejson.dumps(common.fetch_object(webapi, "experiment", name, ['results']))) experiment = simplejson.loads(
results = experiment['results']['analysis'] simplejson.dumps(
common.fetch_object(webapi, "experiment", name, ["results"])
)
)
results = experiment["results"]["analysis"]
for key, value in results.iteritems():