Commit 8ee9b4ae authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

eval: Handle failed copying too

parent 6cb3a7f5
Pipeline #27556 failed with stage
in 28 minutes and 53 seconds
......@@ -22,7 +22,7 @@ from import create_directories_safe
logger = logging.getLogger(__name__)
def copy_one_step(train_dir, global_step, save_dir):
def copy_one_step(train_dir, global_step, save_dir, fail_on_error=False):
for path in glob('{}/model.ckpt-{}.*'.format(train_dir, global_step)):
dst = os.path.join(save_dir, os.path.basename(path))
if os.path.isfile(dst):
......@@ -34,6 +34,8 @@ def copy_one_step(train_dir, global_step, save_dir):
"Failed to copy `%s' over to `%s'", path, dst,
if fail_on_error:
def save_n_best_models(train_dir, save_dir, evaluated_file,
......@@ -255,7 +257,11 @@ def eval(estimator, eval_input_fn, hooks, run_once, eval_interval_secs, name,
# copy over the checkpoint before evaluating since it might
# disappear after evaluation.
copy_one_step(estimator.model_dir, global_step, eval_dir)
copy_one_step(estimator.model_dir, global_step, eval_dir, fail_on_error=True)
except Exception:
# skip testing this checkpoint
# evaluate based on the just copied checkpoint_path
checkpoint_path = checkpoint_path.replace(estimator.model_dir, eval_dir)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment