fix(dockerutil): GetImageMetadata: detect correct usr lib dir based on os release (#127)

johnstcn · web-flow · commit a556bac7b6cd · 2025-03-06T12:43:04.000Z
We had been mounting GPU libraries into the destination /usr/lib inside the inner container by default. This doesn't hold true for Redhat-based distributions, who use /usr/lib64 instead.

- Adds the capability to sniff /etc/os-release inside the inner container
- Modifies the destination path for bind-mounting GPU libraries to the inner container based on the detected OS release ID
- Adds integration testing for Envbox+NVidia GPUs (not run by default in CI)
- Attempts to automatically set CODER_USR_LIB_DIR if not specified.
- Adds the capability to manually set the inner mount path via CODER_INNER_USR_LIB_DIR.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -8,7 +8,7 @@ on:
   pull_request:
 
   workflow_dispatch:
-    
+
 permissions:
   actions: read
   checks: none
@@ -209,7 +209,7 @@ jobs:
           category: "Trivy"
 
       - name: Upload Trivy scan results as an artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: trivy
           path: trivy-results.sarif
diff --git a/Makefile b/Makefile
@@ -39,4 +39,4 @@ test:
 
 .PHONY: test-integration
 test-integration:
-	go test -v -count=1 -tags=integration ./integration/
+	CODER_TEST_INTEGRATION=1 go test -v -count=1 ./integration/
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ The environment variables can be used to configure various aspects of the inner
 | `CODER_DOCKER_BRIDGE_CIDR`     | The bridge CIDR to start the Docker daemon with.                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | false    |
 | `CODER_MOUNTS`                 | A list of mounts to mount into the inner container. Mounts default to `rw`. Ex: `CODER_MOUNTS=/home/coder:/home/coder,/var/run/mysecret:/var/run/mysecret:ro`                                                                                                                                                                                                                                                                                                                                                                  | false    |
 | `CODER_USR_LIB_DIR`            | The mountpoint of the host `/usr/lib` directory. Only required when using GPUs.                                                                                                                                                                                                                                                                                                                                                                                                                                                | false    |
+| `CODER_INNER_USR_LIB_DIR`      | The inner /usr/lib mountpoint. This is automatically detected based on `/etc/os-release` in the inner image, but may optionally be overridden.                                                                                                                                                                                                                                                                                                                                                                                 | false    |
 | `CODER_ADD_TUN`                | If `CODER_ADD_TUN=true` add a TUN device to the inner container.                                                                                                                                                                                                                                                                                                                                                                                                                                                               | false    |
 | `CODER_ADD_FUSE`               | If `CODER_ADD_FUSE=true` add a FUSE device to the inner container.                                                                                                                                                                                                                                                                                                                                                                                                                                                             | false    |
 | `CODER_ADD_GPU`                | If `CODER_ADD_GPU=true` add detected GPUs and related files to the inner container. Requires setting `CODER_USR_LIB_DIR` and mounting in the hosts `/usr/lib/` directory.                                                                                                                                                                                                                                                                                                                                                      | false    |
@@ -43,7 +44,7 @@ It is not possible to develop `envbox` effectively using a containerized environ
 
 If a login is required to pull images from a private repository, create a secret following the instructions from the [Kubernetes Documentation](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) as such:
 
-```
+```shell
 kubectl -n <coder namespace> create secret docker-registry regcred \
         --docker-server=<your-registry-server> \
         --docker-username=<your-name> \
@@ -53,7 +54,7 @@ kubectl -n <coder namespace> create secret docker-registry regcred \
 
 Then reference the secret in your template as such:
 
-```
+```shell
 env {
   name = "CODER_IMAGE_PULL_SECRET"
   value_from {
@@ -93,11 +94,60 @@ When passing through GPUs to the inner container, you may end up using associate
 
 Envbox will detect these mounts and pass them inside the inner container it creates, so that GPU-aware tools run inside the inner container can still utilize these libraries.
 
+Here's an example Docker command to run a GPU-enabled workload in Envbox. Note the following:
+
+1) The NVidia container runtime must be installed on the host (`--runtime=nvidia`).
+2) `CODER_ADD_GPU=true` must be set to enable GPU-specific functionality.
+3) When `CODER_ADD_GPU` is set, it is required to also set `CODER_USR_LIB_DIR` to a location where the relevant host directory has been mounted inside the outer container. In the below example, this is `/usr/lib/x86_64-linux-gnu` on the underlying host. It is mounted into the container under the path `/var/coder/usr/lib`. We then set `CODER_USR_LIB_DIR=/var/coder/usr/lib`. The actual location inside the container is not important **as long as it does not overwrite any pre-existing directories containing system libraries**.
+4) The location to mount the libraries in the inner container is determined by the distribution ID in the `/etc/os-release` of the inner container. If the automatically determined location is incorrect (e.g. `nvidia-smi` complains about not being able to find libraries), you can set it manually via `CODER_INNER_USR_LIB_DIR`.
+
+   > Note: this step is required in case user workloads require libraries from the underlying host that are not added in by the container runtime.
+
+```shell
+docker run -it --rm \
+  --runtime=nvidia \
+  --gpus=all \
+  --name=envbox-gpu-test \
+  -v /tmp/envbox/docker:/var/lib/coder/docker \
+  -v /tmp/envbox/containers:/var/lib/coder/containers \
+  -v /tmp/envbox/sysbox:/var/lib/sysbox \
+  -v /tmp/envbox/docker:/var/lib/docker \
+  -v /usr/src:/usr/src:ro \
+  -v /lib/modules:/lib/modules:ro \
+  -v /usr/lib/x86_64-linux-gnu:/var/coder/usr/lib \
+  --privileged \
+  -e CODER_INNER_IMAGE=nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2 \
+  -e CODER_INNER_USERNAME=root \
+  -e CODER_ADD_GPU=true \
+  -e CODER_USR_LIB_DIR=/var/coder/usr/lib \
+  envbox:latest /envbox docker
+```
+
+To validate GPU functionality, you can run the following commands:
+
+1) To validate that the container runtime correctly passed the required GPU tools into the outer container, run:
+
+   ```shell
+   docker exec -it envbox-gpu-test nvidia-smi
+   ```
+
+2) To validate the same inside the inner container, run:
+
+   ```shell
+   docker exec -it envbox-gpu-test docker exec -it workspace_cvm nvidia-smi
+   ```
+
+3) To validate that the sample CUDA application inside the container runs correctly:
+
+   ```shell
+   docker exec -it envbox-gpu-test docker exec -it workspace_cvm /tmp/vectorAdd
+   ```
+
 ## Hacking
 
 Here's a simple one-liner to run the `codercom/enterprise-minimal:ubuntu` image in Envbox using Docker:
 
-```
+```shell
 docker run -it --rm \
   -v /tmp/envbox/docker:/var/lib/coder/docker \
   -v /tmp/envbox/containers:/var/lib/coder/containers \
diff --git a/cli/docker.go b/cli/docker.go
@@ -98,6 +98,7 @@ var (
 	EnvMemory               = "CODER_MEMORY"
 	EnvAddGPU               = "CODER_ADD_GPU"
 	EnvUsrLibDir            = "CODER_USR_LIB_DIR"
+	EnvInnerUsrLibDir       = "CODER_INNER_USR_LIB_DIR"
 	EnvDockerConfig         = "CODER_DOCKER_CONFIG"
 	EnvDebug                = "CODER_DEBUG"
 	EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT"
@@ -135,6 +136,7 @@ type flags struct {
 	boostrapScript       string
 	containerMounts      string
 	hostUsrLibDir        string
+	innerUsrLibDir       string
 	dockerConfig         string
 	cpus                 int
 	memory               int
@@ -370,6 +372,7 @@ func dockerCmd() *cobra.Command {
 	cliflag.StringVarP(cmd.Flags(), &flags.boostrapScript, "boostrap-script", "", EnvBootstrap, "", "The script to use to bootstrap the container. This should typically install and start the agent.")
 	cliflag.StringVarP(cmd.Flags(), &flags.containerMounts, "mounts", "", EnvMounts, "", "Comma separated list of mounts in the form of '<source>:<target>[:options]' (e.g. /var/lib/docker:/var/lib/docker:ro,/usr/src:/usr/src).")
 	cliflag.StringVarP(cmd.Flags(), &flags.hostUsrLibDir, "usr-lib-dir", "", EnvUsrLibDir, "", "The host /usr/lib mountpoint. Used to detect GPU drivers to mount into inner container.")
+	cliflag.StringVarP(cmd.Flags(), &flags.innerUsrLibDir, "inner-usr-lib-dir", "", EnvInnerUsrLibDir, "", "The inner /usr/lib mountpoint. This is automatically detected based on /etc/os-release in the inner image, but may optionally be overridden.")
 	cliflag.StringVarP(cmd.Flags(), &flags.dockerConfig, "docker-config", "", EnvDockerConfig, "/root/.docker/config.json", "The path to the docker config to consult when pulling an image.")
 	cliflag.BoolVarP(cmd.Flags(), &flags.addTUN, "add-tun", "", EnvAddTun, false, "Add a TUN device to the inner container.")
 	cliflag.BoolVarP(cmd.Flags(), &flags.addFUSE, "add-fuse", "", EnvAddFuse, false, "Add a FUSE device to the inner container.")
@@ -523,7 +526,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
 	// of the user so that we can chown directories to the namespaced UID inside
 	// the inner container as well as whether we should be starting the container
 	// with /sbin/init or something simple like 'sleep infinity'.
-	imgMeta, err := dockerutil.GetImageMetadata(ctx, client, flags.innerImage, flags.innerUsername)
+	imgMeta, err := dockerutil.GetImageMetadata(ctx, log, client, flags.innerImage, flags.innerUsername)
 	if err != nil {
 		return xerrors.Errorf("get image metadata: %w", err)
 	}
@@ -534,6 +537,8 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
 		slog.F("uid", imgMeta.UID),
 		slog.F("gid", imgMeta.GID),
 		slog.F("has_init", imgMeta.HasInit),
+		slog.F("os_release", imgMeta.OsReleaseID),
+		slog.F("home_dir", imgMeta.HomeDir),
 	)
 
 	uid, err := strconv.ParseInt(imgMeta.UID, 10, 32)
@@ -614,16 +619,33 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
 			})
 		}
 
+		innerUsrLibDir := imgMeta.UsrLibDir()
+		if flags.innerUsrLibDir != "" {
+			log.Info(ctx, "overriding auto-detected inner usr lib dir ",
+				slog.F("before", innerUsrLibDir),
+				slog.F("after", flags.innerUsrLibDir))
+			innerUsrLibDir = flags.innerUsrLibDir
+		}
 		for _, bind := range binds {
 			// If the bind has a path that points to the host-mounted /usr/lib
 			// directory we need to remap it to /usr/lib inside the container.
 			mountpoint := bind.Path
 			if strings.HasPrefix(mountpoint, flags.hostUsrLibDir) {
 				mountpoint = filepath.Join(
-					"/usr/lib",
+					// Note: we used to mount into /usr/lib, but this can change
+					// based on the distro inside the container.
+					innerUsrLibDir,
 					strings.TrimPrefix(mountpoint, strings.TrimSuffix(flags.hostUsrLibDir, "/")),
 				)
 			}
+			// Even though xunix.GPUs checks for duplicate mounts, we need to check
+			// for duplicates again here after remapping the path.
+			if slices.ContainsFunc(mounts, func(m xunix.Mount) bool {
+				return m.Mountpoint == mountpoint
+			}) {
+				log.Debug(ctx, "skipping duplicate mount", slog.F("path", mountpoint))
+				continue
+			}
 			mounts = append(mounts, xunix.Mount{
 				Source:     bind.Path,
 				Mountpoint: mountpoint,
diff --git a/dockerutil/image.go b/dockerutil/image.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"time"
 
+	"cdr.dev/slog"
 	dockertypes "github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/api/types/filters"
@@ -22,6 +23,23 @@ import (
 
 const diskFullStorageDriver = "vfs"
 
+// Adapted from https://github.com/NVIDIA/libnvidia-container/blob/v1.15.0/src/nvc_container.c#L152-L165
+var UsrLibDirs = map[string]string{
+	// Debian-based distros use a multi-arch directory.
+	"debian": usrLibMultiarchDir,
+	"ubuntu": usrLibMultiarchDir,
+	// Fedora and Redhat use the standard /usr/lib64.
+	"fedora": "/usr/lib64",
+	"rhel":   "/usr/lib64",
+	// Fall back to the standard /usr/lib.
+	"linux": "/usr/lib",
+}
+
+// /etc/os-release is the standard location for system identification data on
+// Linux systems running systemd.
+// Ref: https://www.freedesktop.org/software/systemd/man/latest/os-release.html
+var etcOsRelease = "/etc/os-release"
+
 type PullImageConfig struct {
 	Client     Client
 	Image      string
@@ -148,15 +166,16 @@ func processImagePullEvents(r io.Reader, fn ImagePullProgressFn) error {
 }
 
 type ImageMetadata struct {
-	UID     string
-	GID     string
-	HomeDir string
-	HasInit bool
+	UID         string
+	GID         string
+	HomeDir     string
+	HasInit     bool
+	OsReleaseID string
 }
 
 // GetImageMetadata returns metadata about an image such as the UID/GID of the
 // provided username and whether it contains an /sbin/init that we should run.
-func GetImageMetadata(ctx context.Context, client Client, img, username string) (ImageMetadata, error) {
+func GetImageMetadata(ctx context.Context, log slog.Logger, client Client, img, username string) (ImageMetadata, error) {
 	// Creating a dummy container to inspect the filesystem.
 	created, err := client.ContainerCreate(ctx,
 		&container.Config{
@@ -226,14 +245,58 @@ func GetImageMetadata(ctx context.Context, client Client, img, username string)
 		return ImageMetadata{}, xerrors.Errorf("no users returned for username %s", username)
 	}
 
+	// Read the /etc/os-release file to get the ID of the OS.
+	// We only care about the ID field.
+	var osReleaseID string
+	out, err = ExecContainer(ctx, client, ExecConfig{
+		ContainerID: inspect.ID,
+		Cmd:         "cat",
+		Args:        []string{etcOsRelease},
+	})
+	if err != nil {
+		log.Error(ctx, "read os-release", slog.Error(err))
+		log.Error(ctx, "falling back to linux for os-release ID")
+		osReleaseID = "linux"
+	} else {
+		osReleaseID = GetOSReleaseID(out)
+	}
+
 	return ImageMetadata{
-		UID:     users[0].Uid,
-		GID:     users[0].Gid,
-		HomeDir: users[0].HomeDir,
-		HasInit: initExists,
+		UID:         users[0].Uid,
+		GID:         users[0].Gid,
+		HomeDir:     users[0].HomeDir,
+		HasInit:     initExists,
+		OsReleaseID: osReleaseID,
 	}, nil
 }
 
+// UsrLibDir returns the path to the /usr/lib directory for the given
+// operating system determined by the /etc/os-release file.
+func (im ImageMetadata) UsrLibDir() string {
+	if val, ok := UsrLibDirs[im.OsReleaseID]; ok && val != "" {
+		return val
+	}
+	return UsrLibDirs["linux"] // fallback
+}
+
+// GetOSReleaseID returns the ID of the operating system from the
+// raw contents of /etc/os-release.
+func GetOSReleaseID(raw []byte) string {
+	var osReleaseID string
+	for _, line := range strings.Split(string(raw), "\n") {
+		if strings.HasPrefix(line, "ID=") {
+			osReleaseID = strings.TrimPrefix(line, "ID=")
+			// The value may be quoted.
+			osReleaseID = strings.Trim(osReleaseID, "\"")
+			break
+		}
+	}
+	if osReleaseID == "" {
+		return "linux"
+	}
+	return osReleaseID
+}
+
 func DefaultLogImagePullFn(log buildlog.Logger) func(ImagePullEvent) error {
 	var (
 		// Avoid spamming too frequently, the messages can come quickly
diff --git a/dockerutil/image_linux_amd64.go b/dockerutil/image_linux_amd64.go
@@ -0,0 +1,5 @@
+package dockerutil
+
+// usrLibMultiarchDir is defined for arm64 and amd64 architectures.
+// Envbox is not published for other architectures.
+var usrLibMultiarchDir = "/usr/lib/aarch64-linux-gnu"
diff --git a/dockerutil/image_linux_arm64.go b/dockerutil/image_linux_arm64.go
@@ -0,0 +1,5 @@
+package dockerutil
+
+// usrLibMultiarchDir is defined for arm64 and amd64 architectures.
+// Envbox is not published for other architectures.
+var usrLibMultiarchDir = "/usr/lib/x86_64-linux-gnu"
diff --git a/integration/docker_test.go b/integration/docker_test.go
@@ -1,6 +1,3 @@
-//go:build integration
-// +build integration
-
 package integration_test
 
 import (
@@ -24,6 +21,9 @@ import (
 
 func TestDocker(t *testing.T) {
 	t.Parallel()
+	if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" {
+		t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1")
+	}
 
 	// Dockerd just tests that dockerd can spin up and function correctly.
 	t.Run("Dockerd", func(t *testing.T) {
diff --git a/integration/gpu_test.go b/integration/gpu_test.go
diff --git a/integration/integrationtest/docker.go b/integration/integrationtest/docker.go