google · sritchie · Jul 16, 2020 · Jul 15, 2020 · Jul 16, 2020 · Jul 16, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,14 +1,76 @@
-# 0.2.7
+# 0.3.0
 
 - Caliban now authenticates AI Platform job submissions using the authentication
   provided by `gcloud auth login`, rather than requiring a service account key.
   This significantly simplifies the setup required for a first time user.
+
 - `caliban cloud` now checks if the image exists remotely before issuing a
   `docker push` command on the newly built image
   (https://github.com/google/caliban/pull/36)
+
 - Big internal refactor to make it easier to work on code, increase test
   coverage, add new backends (https://github.com/google/caliban/pull/32)
 
+- add `schema` validation for `.calibanconfig.json`. This makes it much easier
+  to add configuration knobs: https://github.com/google/caliban/pull/37
+
+- Custom base image support (https://github.com/google/caliban/pull/39), thanks
+  to https://github.com/google/caliban/pull/20 from @sagravat.
+  `.calibanconfig.json` now supports a `"base_image"` key. For the value, can
+  supply:
+  - a Docker base image of your own
+  - a dict of the form `{"cpu": "base_image", "gpu": "base_image"}` with both
+    entries optional, of course.
+
+  Two more cool features.
+
+  First, if you use a format string, like `"my_image-{}:latest"`, the format
+  block `{}` will be filled in with either `cpu` or `gpu`, depending on the mode
+  Caliban is using.
+
+  Second, we now have native support for [Google's Deep Learning
+  VMs](https://cloud.google.com/ai-platform/deep-learning-vm/docs/introduction)
+  as base images. The actual VM containers [live
+  here](https://console.cloud.google.com/gcr/images/deeplearning-platform-release/GLOBAL).
+  If you provide any of the following strings, Caliban will expand them out to
+  the actual base image location:
+
+```
+dlvm:pytorch-cpu
+dlvm:pytorch-cpu-1.0
+dlvm:pytorch-cpu-1.1
+dlvm:pytorch-cpu-1.2
+dlvm:pytorch-cpu-1.3
+dlvm:pytorch-cpu-1.4
+dlvm:pytorch-gpu
+dlvm:pytorch-gpu-1.0
+dlvm:pytorch-gpu-1.1
+dlvm:pytorch-gpu-1.2
+dlvm:pytorch-gpu-1.3
+dlvm:pytorch-gpu-1.4
+dlvm:tf-cpu
+dlvm:tf-cpu-1.0
+dlvm:tf-cpu-1.13
+dlvm:tf-cpu-1.14
+dlvm:tf-cpu-1.15
+dlvm:tf-gpu
+dlvm:tf-gpu-1.0
+dlvm:tf-gpu-1.13
+dlvm:tf-gpu-1.14
+dlvm:tf-gpu-1.15
+dlvm:tf2-cpu
+dlvm:tf2-cpu-2.0
+dlvm:tf2-cpu-2.1
+dlvm:tf2-cpu-2.2
+dlvm:tf2-gpu
+dlvm:tf2-gpu-2.0
+dlvm:tf2-gpu-2.1
+dlvm:tf2-gpu-2.2
+```
+
+Format strings work here as well! So, `"dlvm:pytorch-{}-1.4"` is a totally valid
+base image.
+
 # 0.2.6
 
 - Prepared for a variety of base images by setting up a cloud build matrix:

diff --git a/caliban/cli.py b/caliban/cli.py
@@ -373,7 +373,6 @@ def dry_run_arg(parser):
 
 
 def container_parser(parser):
-
   executing_parser(parser)
 
   image_tag_arg(parser)

diff --git a/caliban/config/__init__.py b/caliban/config/__init__.py
@@ -20,7 +20,7 @@
 import os
 import sys
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import schema as s
 
@@ -35,7 +35,7 @@ class JobMode(str, Enum):
 
   @staticmethod
   def parse(label):
-    return JobMode(label.upper())
+    return JobMode(label.upper().strip())
 
 
 DRY_RUN_FLAG = "--dry_run"
@@ -58,33 +58,79 @@ def parse(label):
     "type": "ACCELERATOR_TYPE_UNSPECIFIED"
 }
 
+# Dictionary of the DLVM "Platform" to a sequence of versions that are
+# currently available as DLVMs. The full list of images is here:
+# https://console.cloud.google.com/gcr/images/deeplearning-platform-release/GLOBAL/
+DLVMS = {
+    "pytorch": [None, "1.0", "1.1", "1.2", "1.3", "1.4"],
+    "tf": [None, "1.0", "1.13", "1.14", "1.15"],
+    "tf2": [None, "2.0", "2.1", "2.2"],
+}
+
 # Schema for Caliban Config
 
+
+def _dlvm_config(job_mode: JobMode) -> Dict[str, str]:
+  """Generates a dict of custom DLVM image identifier -> the actual image ID
+  available from GCR.
+
+  """
+  mode = job_mode.lower()
+
+  def with_version(s: str, version: Optional[str], sep: str) -> Tuple[str, str]:
+    return f"{s}{sep}{version}" if version else s
+
+  def image(lib: str, version: Optional[str]) -> str:
+    base = f"gcr.io/deeplearning-platform-release/{lib}-{mode}"
+    k = with_version(f"dlvm:{lib}-{mode}", version, "-")
+    v = with_version(base, version.replace('.', '-') if version else None, ".")
+    return (k, v)
+
+  return dict(
+      [image(lib, v) for lib, versions in DLVMS.items() for v in versions])
+
+
+# This is a dictionary of some identifier like 'dlvm:pytorch-1.0' to the actual
+# Docker image ID.
+DLVM_CONFIG = {
+    **_dlvm_config(JobMode.CPU),
+    **_dlvm_config(JobMode.GPU),
+}
+
+
+def expand_image(image: str) -> str:
+  """If the supplied image is one of our special prefixed identifiers, returns
+  the expanded Docker image ID. Else, returns the input.
+
+  """
+  return DLVM_CONFIG.get(image, image)
+
+
 AptPackages = s.Or(
     [str], {
         s.Optional("gpu", default=list): [str],
         s.Optional("cpu", default=list): [str]
     },
     error=""""apt_packages" entry must be a dictionary or list, not '{}'""")
 
+Image = s.And(str, s.Use(expand_image))
+
+BaseImage = s.Or(
+    Image, {
+        s.Optional("gpu", default=None): Image,
+        s.Optional("cpu", default=None): Image
+    },
+    error=
+    """"base_image" entry must be a string OR dict with 'cpu' and 'gpu' keys, not '{}'"""
+)
+
 CalibanConfig = s.Schema({
-    s.Optional("build_time_credentials", default=False):
-        bool,
-    s.Optional("default_mode", default=JobMode.CPU):
-        s.Use(JobMode.parse),
-    s.Optional("project_id"):
-        s.And(str, len),
-    s.Optional("cloud_key"):
-        s.And(str, len),
-    s.Optional("base_image"):
-        str,
-    s.Optional("apt_packages", default=dict):
-        AptPackages,
-
-    # Allow extra entries without killing the schema to allow for backwards
-    # compatibility.
-    s.Optional(str):
-        str,
+    s.Optional("build_time_credentials", default=False): bool,
+    s.Optional("default_mode", default=JobMode.CPU): s.Use(JobMode.parse),
+    s.Optional("project_id"): s.And(str, len),
+    s.Optional("cloud_key"): s.And(str, len),
+    s.Optional("base_image", default=None): BaseImage,
+    s.Optional("apt_packages", default=AptPackages.validate({})): AptPackages
 })
 
 # Accessors
@@ -143,10 +189,6 @@ def extract_region(m: Dict[str, Any]) -> ct.Region:
   return DEFAULT_REGION
 
 
-def extract_zone(m: Dict[str, Any]) -> str:
-  return "{}-a".format(extract_region(m))
-
-
 def extract_cloud_key(m: Dict[str, Any]) -> Optional[str]:
   """Returns the Google service account key filepath specified in the args;
   defaults to the $GOOGLE_APPLICATION_CREDENTIALS variable.
@@ -170,6 +212,33 @@ def apt_packages(conf: CalibanConfig, mode: JobMode) -> List[str]:
   return packages
 
 
+def base_image(conf: CalibanConfig, mode: JobMode) -> Optional[str]:
+  """Returns a custom base image, if the user has supplied one in the
+  calibanconfig.
+
+  If the custom base image has a marker for a format string, like 'pytorch-{}',
+  this method will fill it in with the current mode (cpu or gpu).
+
+  """
+  ret = None
+  mode_s = mode.lower()
+
+  image = conf.get("base_image")
+  if image is None:
+    return ret
+
+  elif isinstance(image, str):
+    ret = image
+
+  else:
+    # dictionary case.
+    ret = image[mode_s]
+
+  # we run expand_image again in case the user has included a format {} in the
+  # string.
+  return expand_image(ret.format(mode_s))
+
+
 def caliban_config(conf_path: str = CALIBAN_CONFIG) -> CalibanConfig:
   """Returns a dict that represents a `.calibanconfig.json` file if present,
   empty dictionary otherwise.

diff --git a/caliban/docker/build.py b/caliban/docker/build.py
@@ -25,8 +25,7 @@
 import subprocess
 from enum import Enum
 from pathlib import Path
-from typing import (Any, Callable, Dict, List, NamedTuple, NewType, Optional,
-                    Union)
+from typing import Any, Dict, List, NamedTuple, NewType, Optional, Union
 
 from absl import logging
 from blessings import Terminal
@@ -458,7 +457,6 @@ def _extra_dir_entries(workdir: str, user_id: int, user_group: int,
 def _dockerfile_template(
     job_mode: c.JobMode,
     workdir: Optional[str] = None,
-    base_image_fn: Optional[Callable[[c.JobMode], str]] = None,
     package: Optional[Union[List, u.Package]] = None,
     requirements_path: Optional[str] = None,
     conda_env_path: Optional[str] = None,
@@ -486,11 +484,6 @@ def _dockerfile_template(
   Most functions that call _dockerfile_template pass along any kwargs that they
   receive. It should be enough to add kwargs here, then rely on that mechanism
   to pass them along, vs adding kwargs all the way down the call chain.
-
-  Supply a custom base_image_fn (function from job_mode -> image ID) to inject
-  more complex Docker commands into the Caliban environments by, for example,
-  building your own image on top of the TF base images, then using that.
-
   """
   uid = os.getuid()
   gid = os.getgid()
@@ -502,10 +495,7 @@ def _dockerfile_template(
   if workdir is None:
     workdir = DEFAULT_WORKDIR
 
-  if base_image_fn is None:
-    base_image_fn = base_image_id
-
-  base_image = base_image_fn(job_mode)
+  base_image = c.base_image(caliban_config, job_mode) or base_image_id(job_mode)
 
   dockerfile = """
 FROM {base_image}

diff --git a/caliban/platform/notebook.py b/caliban/platform/notebook.py
@@ -74,7 +74,7 @@ def run_notebook(job_mode: c.JobMode,
   docker_args = ["-p", "{}:{}".format(port, port)] + run_args
 
   ps.run_interactive(job_mode,
-                     entrypoint="/opt/conda/envs/caliban/bin/python",
+                     entrypoint="python",
                      entrypoint_args=jupyter_args,
                      run_args=docker_args,
                      inject_notebook=inject_arg,
Original file line number	Diff line number	Diff line change
Expand Up		@@ -373,7 +373,6 @@ def dry_run_arg(parser):


		def container_parser(parser):

		executing_parser(parser)

		image_tag_arg(parser)
Expand Down