# -*- coding: utf-8 -*-
# Apache Software License 2.0
#
# Copyright (c) 2018, Christophe Duong
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Implementations of commands for Jupyter
"""
import datetime
import logging
import os.path
import sys
from os import makedirs
from aiscalator.core.config import AiscalatorConfig
from aiscalator.core.config import convert_to_format
from aiscalator.core.log_regex_analyzer import LogRegexAnalyzer
from aiscalator.core.utils import check_notebook_dir
from aiscalator.core.utils import copy_replace
from aiscalator.core.utils import data_file
from aiscalator.core.utils import notebook_file
from aiscalator.core.utils import subprocess_run
from aiscalator.core.utils import wait_for_jupyter_lab
from aiscalator.jupyter.docker_image import build
def _prepare_docker_env(conf: AiscalatorConfig, program, reason):
"""
Assembles the list of commands to execute a docker run call
When calling "docker run ...", this function also adds a set of
additional parameters to mount the proper volumes and expose the
correct environment for the call in the docker image mapped to the
host directories. This is done so only some specific data and code
folders are accessible within the docker image.
Parameters
----------
conf : AiscalatorConfig
Configuration object for the step
program : List
the rest of the commands to execute as part of
the docker run call
Returns
-------
List
The full Array of Strings representing the commands to execute
in the docker run call
"""
logger = logging.getLogger(__name__)
commands = [
"docker", "run", "--name", conf.step_container_name() + "_" + reason,
"--rm"
]
for env in conf.user_env_file(conf.step_field("task.env")):
if os.path.isfile(env):
commands += ["--env-file", env]
commands += _prepare_docker_image_env(conf)
code_path = conf.step_file_path('task.code_path')
if conf.has_step_field('task.code_format'):
from_format = conf.step_field('task.code_format')
else:
from_format = "py"
from_format += ':'
if conf.has_step_field('task.jupytext_format'):
from_format += conf.step_field('task.jupytext_format')
else:
from_format += "percent"
notebook, _ = notebook_file(code_path)
check_notebook_dir(logger, notebook, from_format)
commands += [
"--mount", "type=bind,source=" + os.path.dirname(notebook) +
",target=/home/jovyan/work/notebook/",
]
commands += _prepare_task_env(conf)
if conf.has_step_field("task.execution_dir_path"):
execution_dir_path = conf.step_file_path('task.execution_dir_path')
if execution_dir_path:
makedirs(execution_dir_path, exist_ok=True)
commands += [
"--mount", "type=bind,source=" +
execution_dir_path +
",target=/home/jovyan/work/notebook_run/"
]
commands += program
return commands
def _prepare_docker_image_env(conf: AiscalatorConfig):
"""
Assemble the list of volumes to mount specific to
building the docker image
Parameters
----------
conf : AiscalatorConfig
Configuration object for the step
Returns
-------
list
list of commands to bind those volumes
"""
commands = []
if conf.config_path() is not None:
commands += [
"--mount",
"type=bind,source=" + os.path.realpath(conf.config_path()) +
",target="
"/home/jovyan/work/" + os.path.basename(conf.config_path()),
]
if conf.has_step_field("docker_image.apt_repository_path"):
apt_repo = conf.step_file_path('docker_image.apt_repository_path')
if apt_repo and os.path.isfile(apt_repo):
commands += [
"--mount", "type=bind,source=" + apt_repo +
",target=/home/jovyan/work/apt_repository.txt",
]
if conf.has_step_field("docker_image.apt_package_path"):
apt_packages = conf.step_file_path('docker_image.apt_package_path')
if apt_packages and os.path.isfile(apt_packages):
commands += [
"--mount", "type=bind,source=" + apt_packages +
",target=/home/jovyan/work/apt_packages.txt",
]
if conf.has_step_field("docker_image.requirements_path"):
requirements = conf.step_file_path('docker_image.requirements_path')
if requirements and os.path.isfile(requirements):
commands += [
"--mount", "type=bind,source=" + requirements +
",target=/home/jovyan/work/requirements.txt",
]
if conf.has_step_field("docker_image.lab_extension_path"):
lab_extensions = conf.step_file_path('docker_image.lab_extension_path')
if lab_extensions and os.path.isfile(lab_extensions):
commands += [
"--mount", "type=bind,source=" + lab_extensions +
",target=/home/jovyan/work/lab_extensions.txt",
]
# allow to pass a list of extra options like ["--network", "bridge"]
if conf.has_step_field("docker_image.docker_extra_options"):
commands += conf.step_field("docker_image.docker_extra_options")
return commands
def _prepare_task_env(conf: AiscalatorConfig):
"""
Assemble the list of volumes to mount specific to
the task execution
Parameters
----------
conf : AiscalatorConfig
Configuration object for the step
Returns
-------
list
list of commands to bind those volumes
"""
commands = []
if conf.root_dir():
commands += _mount_path(conf, "task.modules_src_path",
"/home/jovyan/work/modules/")
commands += _mount_path(conf, "task.input_data_path",
"/home/jovyan/work/data/input/",
readonly=True)
commands += _mount_path(conf, "task.output_data_path",
"/home/jovyan/work/data/output/",
make_dirs=True)
return commands
def _mount_path(conf: AiscalatorConfig, field, target_path,
readonly=False, make_dirs=False):
"""
Returu commands to mount path from list field into the
docker image when running.
Parameters
----------
conf : AiscalatorConfig
Configuration object for the step
field : str
the field in the configuration step that contains the path
target_path : str
where to mount them inside the container
readonly : bool
flag to mount the path as read-only
make_dirs : bool
flag to create the folder on the host before mounting if
it doesn't exists.
Returns
-------
list
commands to mount all the paths from the field
"""
commands = []
if conf.has_step_field(field):
for value in conf.step_field(field):
# TODO handle URL
for i in value:
if make_dirs:
makedirs(os.path.realpath(conf.root_dir() + value[i]),
exist_ok=True)
if os.path.exists(conf.root_dir() + value[i]):
commands += [
"--mount",
"type=bind,source=" +
os.path.realpath(conf.root_dir() + value[i]) +
",target=" + os.path.join(target_path, i) +
(",readonly" if readonly else "")
]
return commands
[docs]def jupyter_run(conf: AiscalatorConfig, prepare_only=False,
param=None, param_raw=None):
"""
Executes the step in browserless mode using papermill
Parameters
----------
conf : AiscalatorConfig
Configuration object for the step
prepare_only : bool
Indicates if papermill should replace the parameters of the
notebook only or it should execute all the cells too
Returns
-------
string
the path to the output notebook resulting from the execution
of this step
"""
logger = logging.getLogger(__name__)
conf.validate_config()
docker_image = build(conf)
if not docker_image:
raise Exception("Failed to build docker image")
notebook, _ = notebook_file(conf.step_file_path('task.code_path'))
notebook = os.path.join("/home/jovyan/work/notebook/",
os.path.basename(notebook))
notebook_output = conf.step_notebook_output_path(notebook)
commands = _prepare_docker_env(conf, [
docker_image, "bash", "start-papermill.sh",
"papermill",
notebook, notebook_output
], "run_" + conf.step_name() + "_"
# add timestamp to name to handle multiple concurrent runs
+ datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
if prepare_only:
commands.append("--prepare-only")
parameters = conf.step_extract_parameters()
if parameters:
commands += parameters
if param:
for parameter in param:
commands += ["-p", parameter[0], parameter[1]]
if param_raw:
for raw_parameter in param_raw:
commands += ["-r", raw_parameter[0], raw_parameter[1]]
log = LogRegexAnalyzer()
logger.info("Running...: %s", " ".join(commands))
returncode = subprocess_run(commands, log_function=log.grep_logs)
if returncode:
logger.error("Run was not successful, returned status code is: "
+ str(returncode))
sys.exit(returncode)
return os.path.join(conf.step_file_path('task.execution_dir_path'),
os.path.basename(notebook_output))
[docs]def jupyter_edit(conf: AiscalatorConfig, param=None, param_raw=None):
"""
Starts a Jupyter Lab environment configured to edit the focused step
Parameters
----------
conf : AiscalatorConfig
Configuration object for the step
param : list
list of tuples of parameters
param_raw : list
list of tuples of raw parameters
Returns
-------
string
Url of the running jupyter lab
"""
logger = logging.getLogger(__name__)
conf.validate_config()
docker_image = build(conf)
if docker_image:
# TODO: shutdown other jupyter lab still running
notebook, _ = notebook_file(conf.step_field('task.code_path'))
notebook = os.path.basename(notebook)
if conf.step_extract_parameters() or param or param_raw:
jupyter_run(conf, prepare_only=True,
param=param,
param_raw=param_raw)
commands = _prepare_docker_env(conf, [
# TODO: improve port publishing
"-p", "10000:8888",
"-p", "4040:4040",
docker_image, "start.sh",
'jupyter', 'lab'
], "edit")
return wait_for_jupyter_lab(commands, logger, notebook,
10000, "work/notebook")
raise Exception("Failed to build docker image")
[docs]def jupyter_new(name, path, output_format="hocon"):
"""
Starts a Jupyter Lab environment configured to edit a brand new step
Parameters
----------
name : str
name of the new step
path : str
path to where the new step files should be created
output_format : str
the format of the new configuration file to produce
Returns
-------
string
Url of the running jupyter lab
"""
step_file = os.path.join(path, name, name) + '.conf'
if os.path.dirname(step_file):
makedirs(os.path.dirname(step_file), exist_ok=True)
copy_replace(data_file("../config/template/step.conf"), step_file,
pattern="Untitled", replace_value=name)
if output_format != 'hocon':
file = os.path.join(path, name, name) + '.' + output_format
step_file = convert_to_format(step_file, output=file,
output_format=output_format)
notebook = os.path.join(path, name, 'notebook', name) + '.ipynb'
if os.path.dirname(notebook):
makedirs(os.path.dirname(notebook), exist_ok=True)
copy_replace(data_file("../config/template/notebook.json"), notebook)
open(os.path.join(path, name, "apt_repository.txt"), 'a').close()
open(os.path.join(path, name, "apt_packages.txt"), 'a').close()
open(os.path.join(path, name, "requirements.txt"), 'a').close()
open(os.path.join(path, name, "lab_extensions.txt"), 'a').close()
jupyter_edit(AiscalatorConfig(config=step_file,
step_selection=name))