diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f3ffc86..078d698 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ on: pull_request: workflow_dispatch: - + permissions: actions: read checks: none @@ -37,13 +37,13 @@ jobs: # Install Go! - uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" # Check for Go linting errors! - name: Lint Go uses: golangci/golangci-lint-action@v6.1.1 with: - version: v1.62.2 + version: v1.64.8 args: "--out-${NO_FUTURE}format colored-line-number" - name: Lint shell scripts @@ -98,7 +98,7 @@ jobs: - uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" # Sadly the new "set output" syntax (of writing env vars to # $GITHUB_OUTPUT) does not work on both powershell and bash so we use the @@ -127,7 +127,7 @@ jobs: run: go test ./... integration-tests: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 timeout-minutes: 20 steps: - name: Install dependencies @@ -136,7 +136,7 @@ jobs: - uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" # Sadly the new "set output" syntax (of writing env vars to # $GITHUB_OUTPUT) does not work on both powershell and bash so we use the @@ -165,12 +165,12 @@ jobs: run: go test -tags=integration ./... build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 - uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" - name: Go Cache Paths id: go-cache-paths @@ -209,7 +209,7 @@ jobs: category: "Trivy" - name: Upload Trivy scan results as an artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: trivy path: trivy-results.sarif @@ -223,7 +223,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" - name: Go Cache Paths id: go-cache-paths @@ -245,7 +245,7 @@ jobs: uses: github/codeql-action/analyze@v2 publish: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 if: github.ref == 'refs/heads/main' steps: - name: Docker Login @@ -262,7 +262,7 @@ jobs: - uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" - name: build image run: make -j build/image/envbox diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a19e65a..ccba56b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -31,11 +31,11 @@ jobs: include: - os: ubuntu-22.04 arch: linux/amd64 - sha: f02ffb48eae99d6c884c9aa0378070cc716d028f58e87deec5ae00a41b706fe8 + sha: b7ac389e5a19592cadf16e0ca30e40919516128f6e1b7f99e1cb4ff64554172e arch-suffix: amd64 - os: depot-ubuntu-22.04-arm arch: linux/arm64 - sha: d9267eb176190b96dcfa29ba4c4c685a26a4a1aca1d7f15deb31ec33ed63de15 + sha: 16d80123ba53058cf90f5a68686e297621ea97942602682e34b3352783908f91 arch-suffix: arm64 runs-on: ${{ matrix.os }} steps: @@ -57,7 +57,7 @@ jobs: - uses: actions/setup-go@v3 with: - go-version: "~1.23" + go-version: "~1.24" - name: Go Cache Paths id: go-cache-paths diff --git a/.golangci.yaml b/.golangci.yaml index e92087f..06f75af 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -202,6 +202,7 @@ run: concurrency: 4 skip-dirs: - node_modules + - cli/cliflag skip-files: - scripts/rules.go timeout: 5m @@ -218,7 +219,6 @@ linters: - errcheck - errname - errorlint - - exportloopref - forcetypeassert - gocritic - gocyclo diff --git a/Makefile b/Makefile index 0ca723b..8a22340 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PROJECT_ROOT := $(shell git rev-parse --show-toplevel) GO_FILES := $(shell git ls-files '*.go' '*.sum') IMAGE_FILES := $(shell find deploy) ARCH ?= linux/amd64 -SYSBOX_SHA ?= f02ffb48eae99d6c884c9aa0378070cc716d028f58e87deec5ae00a41b706fe8 +SYSBOX_SHA ?= b7ac389e5a19592cadf16e0ca30e40919516128f6e1b7f99e1cb4ff64554172e .PHONY: clean clean: @@ -32,3 +32,11 @@ fmt/go: .PHONY: fmt/md fmt/md: go run github.com/Kunde21/markdownfmt/v3/cmd/markdownfmt@v3.1.0 -w ./README.md + +.PHONY: test +test: + go test -v -count=1 ./... + +.PHONY: test-integration +test-integration: + CODER_TEST_INTEGRATION=1 go test -v -count=1 ./integration/ diff --git a/README.md b/README.md index 7ed5bda..d09c4a5 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,10 @@ The environment variables can be used to configure various aspects of the inner | `CODER_INNER_HOSTNAME` | The hostname to use for the inner container. | false | | `CODER_IMAGE_PULL_SECRET` | The docker credentials to use when pulling the inner container. The recommended way to do this is to create an [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) and then reference the secret using an [environment variable](https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#define-container-environment-variables-using-secret-data). See below for example. | false | | `CODER_DOCKER_BRIDGE_CIDR` | The bridge CIDR to start the Docker daemon with. | false | +| `CODER_BOOTSTRAP_SCRIPT` | The script to use to bootstrap the container. This should typically install and start the agent. | false | | `CODER_MOUNTS` | A list of mounts to mount into the inner container. Mounts default to `rw`. Ex: `CODER_MOUNTS=/home/coder:/home/coder,/var/run/mysecret:/var/run/mysecret:ro` | false | | `CODER_USR_LIB_DIR` | The mountpoint of the host `/usr/lib` directory. Only required when using GPUs. | false | +| `CODER_INNER_USR_LIB_DIR` | The inner /usr/lib mountpoint. This is automatically detected based on `/etc/os-release` in the inner image, but may optionally be overridden. | false | | `CODER_ADD_TUN` | If `CODER_ADD_TUN=true` add a TUN device to the inner container. | false | | `CODER_ADD_FUSE` | If `CODER_ADD_FUSE=true` add a FUSE device to the inner container. | false | | `CODER_ADD_GPU` | If `CODER_ADD_GPU=true` add detected GPUs and related files to the inner container. Requires setting `CODER_USR_LIB_DIR` and mounting in the hosts `/usr/lib/` directory. | false | @@ -43,7 +45,7 @@ It is not possible to develop `envbox` effectively using a containerized environ If a login is required to pull images from a private repository, create a secret following the instructions from the [Kubernetes Documentation](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) as such: -``` +```shell kubectl -n create secret docker-registry regcred \ --docker-server= \ --docker-username= \ @@ -53,7 +55,7 @@ kubectl -n create secret docker-registry regcred \ Then reference the secret in your template as such: -``` +```shell env { name = "CODER_IMAGE_PULL_SECRET" value_from { @@ -86,3 +88,86 @@ env { > } > } > ``` + +## GPUs + +When passing through GPUs to the inner container, you may end up using associated tooling such as the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) or the [NVIDIA GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). These will inject required utilities and libraries inside the inner container. You can verify this by directly running (without Envbox) a barebones image like `debian:bookworm` and running `mount` or `nvidia-smi` inside the container. + +Envbox will detect these mounts and pass them inside the inner container it creates, so that GPU-aware tools run inside the inner container can still utilize these libraries. + +Here's an example Docker command to run a GPU-enabled workload in Envbox. Note the following: + +1) The NVidia container runtime must be installed on the host (`--runtime=nvidia`). +2) `CODER_ADD_GPU=true` must be set to enable GPU-specific functionality. +3) When `CODER_ADD_GPU` is set, it is required to also set `CODER_USR_LIB_DIR` to a location where the relevant host directory has been mounted inside the outer container. In the below example, this is `/usr/lib/x86_64-linux-gnu` on the underlying host. It is mounted into the container under the path `/var/coder/usr/lib`. We then set `CODER_USR_LIB_DIR=/var/coder/usr/lib`. The actual location inside the container is not important **as long as it does not overwrite any pre-existing directories containing system libraries**. +4) The location to mount the libraries in the inner container is determined by the distribution ID in the `/etc/os-release` of the inner container. If the automatically determined location is incorrect (e.g. `nvidia-smi` complains about not being able to find libraries), you can set it manually via `CODER_INNER_USR_LIB_DIR`. + + > Note: this step is required in case user workloads require libraries from the underlying host that are not added in by the container runtime. + +```shell +docker run -it --rm \ + --runtime=nvidia \ + --gpus=all \ + --name=envbox-gpu-test \ + -v /tmp/envbox/docker:/var/lib/coder/docker \ + -v /tmp/envbox/containers:/var/lib/coder/containers \ + -v /tmp/envbox/sysbox:/var/lib/sysbox \ + -v /tmp/envbox/docker:/var/lib/docker \ + -v /usr/src:/usr/src:ro \ + -v /lib/modules:/lib/modules:ro \ + -v /usr/lib/x86_64-linux-gnu:/var/coder/usr/lib \ + --privileged \ + -e CODER_INNER_IMAGE=nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2 \ + -e CODER_INNER_USERNAME=root \ + -e CODER_ADD_GPU=true \ + -e CODER_USR_LIB_DIR=/var/coder/usr/lib \ + envbox:latest /envbox docker +``` + +To validate GPU functionality, you can run the following commands: + +1) To validate that the container runtime correctly passed the required GPU tools into the outer container, run: + + ```shell + docker exec -it envbox-gpu-test nvidia-smi + ``` + +2) To validate the same inside the inner container, run: + + ```shell + docker exec -it envbox-gpu-test docker exec -it workspace_cvm nvidia-smi + ``` + +3) To validate that the sample CUDA application inside the container runs correctly: + + ```shell + docker exec -it envbox-gpu-test docker exec -it workspace_cvm /tmp/vectorAdd + ``` + +## Hacking + +Here's a simple one-liner to run the `codercom/enterprise-minimal:ubuntu` image in Envbox using Docker: + +```shell +docker run -it --rm \ + -v /tmp/envbox/docker:/var/lib/coder/docker \ + -v /tmp/envbox/containers:/var/lib/coder/containers \ + -v /tmp/envbox/sysbox:/var/lib/sysbox \ + -v /tmp/envbox/docker:/var/lib/docker \ + -v /usr/src:/usr/src:ro \ + -v /lib/modules:/lib/modules:ro \ + --privileged \ + -e CODER_INNER_IMAGE=codercom/enterprise-minimal:ubuntu \ + -e CODER_INNER_USERNAME=coder \ + envbox:latest /envbox docker +``` + +This will store persistent data under `/tmp/envbox`. + +## Troubleshooting + +### `failed to write to cgroup.procs: write /sys/fs/cgroup/docker//init.scope/cgroup.procs: operation not supported: unknown` + +This issue occurs in Docker if you have `cgroupns-mode` set to `private`. To validate, add `--cgroupns=host` to your `docker run` invocation and re-run. + +To permanently set this as the default in your Docker daemon, add `"default-cgroupns-mode": "host"` to your `/etc/docker/daemon.json` and restart Docker. diff --git a/cli/docker.go b/cli/docker.go index f48ea64..1573612 100644 --- a/cli/docker.go +++ b/cli/docker.go @@ -7,11 +7,15 @@ import ( "io" "net/url" "os" + "os/exec" + "os/signal" "path" "path/filepath" "sort" "strconv" "strings" + "syscall" + "time" "github.com/docker/docker/api/types/container" "github.com/google/go-containerregistry/pkg/name" @@ -98,6 +102,7 @@ var ( EnvMemory = "CODER_MEMORY" EnvAddGPU = "CODER_ADD_GPU" EnvUsrLibDir = "CODER_USR_LIB_DIR" + EnvInnerUsrLibDir = "CODER_INNER_USR_LIB_DIR" EnvDockerConfig = "CODER_DOCKER_CONFIG" EnvDebug = "CODER_DEBUG" EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT" @@ -135,6 +140,7 @@ type flags struct { boostrapScript string containerMounts string hostUsrLibDir string + innerUsrLibDir string dockerConfig string cpus int memory int @@ -155,11 +161,24 @@ func dockerCmd() *cobra.Command { Short: "Create a docker-based CVM", RunE: func(cmd *cobra.Command, args []string) (err error) { var ( - ctx = cmd.Context() - log = slog.Make(slogjson.Sink(cmd.ErrOrStderr()), slogkubeterminate.Make()).Leveled(slog.LevelDebug) - blog buildlog.Logger = buildlog.JSONLogger{Encoder: json.NewEncoder(os.Stderr)} + ctx, cancel = context.WithCancel(cmd.Context()) //nolint + log = slog.Make(slogjson.Sink(cmd.ErrOrStderr()), slogkubeterminate.Make()).Leveled(slog.LevelDebug) + blog buildlog.Logger = buildlog.JSONLogger{Encoder: json.NewEncoder(os.Stderr)} ) + // We technically leak a context here, but it's impact is negligible. + signalCtx, signalCancel := context.WithCancel(ctx) + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT, syscall.SIGWINCH) + + // Spawn a goroutine to wait for a signal. + go func() { + defer signalCancel() + log.Info(ctx, "waiting for signal") + <-sigs + log.Info(ctx, "got signal, canceling context") + }() + if flags.noStartupLogs { log = slog.Make(slogjson.Sink(io.Discard)) blog = buildlog.NopLogger{} @@ -167,6 +186,7 @@ func dockerCmd() *cobra.Command { httpClient, err := xhttp.Client(log, flags.extraCertsPath) if err != nil { + //nolint return xerrors.Errorf("http client: %w", err) } @@ -206,13 +226,19 @@ func dockerCmd() *cobra.Command { // Start sysbox-mgr and sysbox-fs in order to run // sysbox containers. case err := <-background.New(ctx, log, "sysbox-mgr", sysboxArgs...).Run(): - blog.Info(sysboxErrMsg) - //nolint - log.Fatal(ctx, "sysbox-mgr exited", slog.Error(err)) + if ctx.Err() == nil { + blog.Info(sysboxErrMsg) + //nolint + log.Critical(ctx, "sysbox-mgr exited", slog.Error(err)) + panic(err) + } case err := <-background.New(ctx, log, "sysbox-fs").Run(): - blog.Info(sysboxErrMsg) - //nolint - log.Fatal(ctx, "sysbox-fs exited", slog.Error(err)) + if ctx.Err() == nil { + blog.Info(sysboxErrMsg) + //nolint + log.Critical(ctx, "sysbox-fs exited", slog.Error(err)) + panic(err) + } } }() @@ -314,7 +340,7 @@ func dockerCmd() *cobra.Command { ) } - err = runDockerCVM(ctx, log, client, blog, flags) + bootstrapExecID, err := runDockerCVM(ctx, log, client, blog, flags) if err != nil { // It's possible we failed because we ran out of disk while // pulling the image. We should restart the daemon and use @@ -343,7 +369,7 @@ func dockerCmd() *cobra.Command { }() log.Debug(ctx, "reattempting container creation") - err = runDockerCVM(ctx, log, client, blog, flags) + bootstrapExecID, err = runDockerCVM(ctx, log, client, blog, flags) } if err != nil { blog.Errorf("Failed to run envbox: %v", err) @@ -351,6 +377,44 @@ func dockerCmd() *cobra.Command { } } + go func() { + defer cancel() + + <-signalCtx.Done() + log.Debug(ctx, "ctx canceled, forwarding signal to inner container") + + if bootstrapExecID == "" { + log.Debug(ctx, "no bootstrap exec id, skipping") + return + } + + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), time.Second*90) + defer shutdownCancel() + + bootstrapPID, err := dockerutil.GetExecPID(shutdownCtx, client, bootstrapExecID) + if err != nil { + log.Error(shutdownCtx, "get exec pid", slog.Error(err)) + } + + log.Debug(shutdownCtx, "killing container", slog.F("bootstrap_pid", bootstrapPID)) + + // The PID returned is the PID _outside_ the container... + out, err := exec.CommandContext(shutdownCtx, "kill", "-TERM", strconv.Itoa(bootstrapPID)).CombinedOutput() //nolint:gosec + if err != nil { + log.Error(shutdownCtx, "kill bootstrap process", slog.Error(err), slog.F("output", string(out))) + return + } + + log.Debug(shutdownCtx, "sent kill signal waiting for process to exit") + err = dockerutil.WaitForExit(shutdownCtx, client, bootstrapExecID) + if err != nil { + log.Error(shutdownCtx, "wait for exit", slog.Error(err)) + return + } + + log.Debug(shutdownCtx, "bootstrap process successfully exited") + }() + return nil }, } @@ -370,6 +434,7 @@ func dockerCmd() *cobra.Command { cliflag.StringVarP(cmd.Flags(), &flags.boostrapScript, "boostrap-script", "", EnvBootstrap, "", "The script to use to bootstrap the container. This should typically install and start the agent.") cliflag.StringVarP(cmd.Flags(), &flags.containerMounts, "mounts", "", EnvMounts, "", "Comma separated list of mounts in the form of ':[:options]' (e.g. /var/lib/docker:/var/lib/docker:ro,/usr/src:/usr/src).") cliflag.StringVarP(cmd.Flags(), &flags.hostUsrLibDir, "usr-lib-dir", "", EnvUsrLibDir, "", "The host /usr/lib mountpoint. Used to detect GPU drivers to mount into inner container.") + cliflag.StringVarP(cmd.Flags(), &flags.innerUsrLibDir, "inner-usr-lib-dir", "", EnvInnerUsrLibDir, "", "The inner /usr/lib mountpoint. This is automatically detected based on /etc/os-release in the inner image, but may optionally be overridden.") cliflag.StringVarP(cmd.Flags(), &flags.dockerConfig, "docker-config", "", EnvDockerConfig, "/root/.docker/config.json", "The path to the docker config to consult when pulling an image.") cliflag.BoolVarP(cmd.Flags(), &flags.addTUN, "add-tun", "", EnvAddTun, false, "Add a TUN device to the inner container.") cliflag.BoolVarP(cmd.Flags(), &flags.addFUSE, "add-fuse", "", EnvAddFuse, false, "Add a FUSE device to the inner container.") @@ -387,34 +452,31 @@ func dockerCmd() *cobra.Command { return cmd } -func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client, blog buildlog.Logger, flags flags) error { +func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client, blog buildlog.Logger, flags flags) (string, error) { fs := xunix.GetFS(ctx) - - // Set our OOM score to something really unfavorable to avoid getting killed - // in memory-scarce scenarios. err := xunix.SetOOMScore(ctx, "self", "-1000") if err != nil { - return xerrors.Errorf("set oom score: %w", err) + return "", xerrors.Errorf("set oom score: %w", err) } - ref, err := name.NewTag(flags.innerImage) + ref, err := name.ParseReference(flags.innerImage) if err != nil { - return xerrors.Errorf("parse ref: %w", err) + return "", xerrors.Errorf("parse ref: %w", err) } var dockerAuth dockerutil.AuthConfig if flags.imagePullSecret != "" { - dockerAuth, err = dockerutil.AuthConfigFromString(flags.imagePullSecret, ref.RegistryStr()) + dockerAuth, err = dockerutil.AuthConfigFromString(flags.imagePullSecret, ref.Context().RegistryStr()) if err != nil { - return xerrors.Errorf("parse auth config: %w", err) + return "", xerrors.Errorf("parse auth config: %w", err) } } log.Info(ctx, "checking for docker config file", slog.F("path", flags.dockerConfig)) if _, err := fs.Stat(flags.dockerConfig); err == nil { log.Info(ctx, "detected file", slog.F("image", flags.innerImage)) - dockerAuth, err = dockerutil.AuthConfigFromPath(flags.dockerConfig, ref.RegistryStr()) + dockerAuth, err = dockerutil.AuthConfigFromPath(flags.dockerConfig, ref.Context().RegistryStr()) if err != nil && !xerrors.Is(err, os.ErrNotExist) { - return xerrors.Errorf("auth config from file: %w", err) + return "", xerrors.Errorf("auth config from file: %w", err) } } @@ -427,7 +489,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client // Add any user-specified mounts to our mounts list. extraMounts, err := parseMounts(flags.containerMounts) if err != nil { - return xerrors.Errorf("read mounts: %w", err) + return "", xerrors.Errorf("read mounts: %w", err) } mounts = append(mounts, extraMounts...) @@ -439,7 +501,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client blog.Info("Creating TUN device") dev, err := xunix.CreateTUNDevice(ctx, OuterTUNPath) if err != nil { - return xerrors.Errorf("creat tun device: %w", err) + return "", xerrors.Errorf("creat tun device: %w", err) } devices = append(devices, container.DeviceMapping{ @@ -454,7 +516,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client blog.Info("Creating FUSE device") dev, err := xunix.CreateFuseDevice(ctx, OuterFUSEPath) if err != nil { - return xerrors.Errorf("create fuse device: %w", err) + return "", xerrors.Errorf("create fuse device: %w", err) } devices = append(devices, container.DeviceMapping{ @@ -476,7 +538,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client ) err = fs.Chown(device.PathOnHost, UserNamespaceOffset, UserNamespaceOffset) if err != nil { - return xerrors.Errorf("chown device %q: %w", device.PathOnHost, err) + return "", xerrors.Errorf("chown device %q: %w", device.PathOnHost, err) } } @@ -489,7 +551,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client ProgressFn: dockerutil.DefaultLogImagePullFn(blog), }) if err != nil { - return xerrors.Errorf("pull image: %w", err) + return "", xerrors.Errorf("pull image: %w", err) } log.Debug(ctx, "remounting /sys") @@ -497,19 +559,19 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client // After image pull we remount /sys so sysbox can have appropriate perms to create a container. err = xunix.MountFS(ctx, "/sys", "/sys", "", "remount", "rw") if err != nil { - return xerrors.Errorf("remount /sys: %w", err) + return "", xerrors.Errorf("remount /sys: %w", err) } if flags.addGPU { if flags.hostUsrLibDir == "" { - return xerrors.Errorf("when using GPUs, %q must be specified", EnvUsrLibDir) + return "", xerrors.Errorf("when using GPUs, %q must be specified", EnvUsrLibDir) } // Unmount GPU drivers in /proc as it causes issues when creating any // container in some cases (even the image metadata container). _, err = xunix.TryUnmountProcGPUDrivers(ctx, log) if err != nil { - return xerrors.Errorf("unmount /proc GPU drivers: %w", err) + return "", xerrors.Errorf("unmount /proc GPU drivers: %w", err) } } @@ -523,9 +585,9 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client // of the user so that we can chown directories to the namespaced UID inside // the inner container as well as whether we should be starting the container // with /sbin/init or something simple like 'sleep infinity'. - imgMeta, err := dockerutil.GetImageMetadata(ctx, client, flags.innerImage, flags.innerUsername) + imgMeta, err := dockerutil.GetImageMetadata(ctx, log, client, flags.innerImage, flags.innerUsername) if err != nil { - return xerrors.Errorf("get image metadata: %w", err) + return "", xerrors.Errorf("get image metadata: %w", err) } blog.Infof("Detected entrypoint user '%s:%s' with home directory %q", imgMeta.UID, imgMeta.UID, imgMeta.HomeDir) @@ -534,15 +596,17 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client slog.F("uid", imgMeta.UID), slog.F("gid", imgMeta.GID), slog.F("has_init", imgMeta.HasInit), + slog.F("os_release", imgMeta.OsReleaseID), + slog.F("home_dir", imgMeta.HomeDir), ) uid, err := strconv.ParseInt(imgMeta.UID, 10, 32) if err != nil { - return xerrors.Errorf("parse image uid: %w", err) + return "", xerrors.Errorf("parse image uid: %w", err) } gid, err := strconv.ParseInt(imgMeta.GID, 10, 32) if err != nil { - return xerrors.Errorf("parse image gid: %w", err) + return "", xerrors.Errorf("parse image gid: %w", err) } for _, m := range mounts { @@ -563,13 +627,13 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client mounter := xunix.Mounter(ctx) err := mounter.Mount("", m.Source, "", []string{"remount,rw"}) if err != nil { - return xerrors.Errorf("remount: %w", err) + return "", xerrors.Errorf("remount: %w", err) } } err := fs.Chmod(m.Source, 0o2755) if err != nil { - return xerrors.Errorf("chmod mountpoint %q: %w", m.Source, err) + return "", xerrors.Errorf("chmod mountpoint %q: %w", m.Source, err) } var ( @@ -596,14 +660,14 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client // user. err = fs.Chown(m.Source, shiftedUID, shiftedGID) if err != nil { - return xerrors.Errorf("chown mountpoint %q: %w", m.Source, err) + return "", xerrors.Errorf("chown mountpoint %q: %w", m.Source, err) } } if flags.addGPU { devs, binds, err := xunix.GPUs(ctx, log, flags.hostUsrLibDir) if err != nil { - return xerrors.Errorf("find gpus: %w", err) + return "", xerrors.Errorf("find gpus: %w", err) } for _, dev := range devs { @@ -614,16 +678,33 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client }) } + innerUsrLibDir := imgMeta.UsrLibDir() + if flags.innerUsrLibDir != "" { + log.Info(ctx, "overriding auto-detected inner usr lib dir ", + slog.F("before", innerUsrLibDir), + slog.F("after", flags.innerUsrLibDir)) + innerUsrLibDir = flags.innerUsrLibDir + } for _, bind := range binds { // If the bind has a path that points to the host-mounted /usr/lib // directory we need to remap it to /usr/lib inside the container. mountpoint := bind.Path if strings.HasPrefix(mountpoint, flags.hostUsrLibDir) { mountpoint = filepath.Join( - "/usr/lib", + // Note: we used to mount into /usr/lib, but this can change + // based on the distro inside the container. + innerUsrLibDir, strings.TrimPrefix(mountpoint, strings.TrimSuffix(flags.hostUsrLibDir, "/")), ) } + // Even though xunix.GPUs checks for duplicate mounts, we need to check + // for duplicates again here after remapping the path. + if slices.ContainsFunc(mounts, func(m xunix.Mount) bool { + return m.Mountpoint == mountpoint + }) { + log.Debug(ctx, "skipping duplicate mount", slog.F("path", mountpoint)) + continue + } mounts = append(mounts, xunix.Mount{ Source: bind.Path, Mountpoint: mountpoint, @@ -634,6 +715,13 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client } blog.Info("Creating workspace...") + // If imgMeta.HasInit is true, we just use flags.boostrapScript as the entrypoint. + // But if it's false, we need to run /sbin/init as the entrypoint. + // We need to mount or run some exec command that injects a systemd service for starting + // the coder agent. + + // We need to check that if PID1 is systemd (or /sbin/init) that systemd propagates SIGTERM + // to service units. If it doesn't then this solution doesn't help us. // Create the inner container. containerID, err := dockerutil.CreateContainer(ctx, client, &dockerutil.ContainerConfig{ @@ -650,14 +738,14 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client MemoryLimit: int64(flags.memory), }) if err != nil { - return xerrors.Errorf("create container: %w", err) + return "", xerrors.Errorf("create container: %w", err) } blog.Info("Pruning images to free up disk...") // Prune images to avoid taking up any unnecessary disk from the user. _, err = dockerutil.PruneImages(ctx, client) if err != nil { - return xerrors.Errorf("prune images: %w", err) + return "", xerrors.Errorf("prune images: %w", err) } // TODO fix iptables when istio detected. @@ -665,7 +753,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client blog.Info("Starting up workspace...") err = client.ContainerStart(ctx, containerID, container.StartOptions{}) if err != nil { - return xerrors.Errorf("start container: %w", err) + return "", xerrors.Errorf("start container: %w", err) } log.Debug(ctx, "creating bootstrap directory", slog.F("directory", imgMeta.HomeDir)) @@ -685,7 +773,7 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client Args: []string{"-p", bootDir}, }) if err != nil { - return xerrors.Errorf("make bootstrap dir: %w", err) + return "", xerrors.Errorf("make bootstrap dir: %w", err) } cpuQuota, err := xunix.ReadCPUQuota(ctx, log) @@ -707,34 +795,54 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client } blog.Info("Envbox startup complete!") - - // The bootstrap script doesn't return since it execs the agent - // meaning that it can get pretty noisy if we were to log by default. - // In order to allow users to discern issues getting the bootstrap script - // to complete successfully we pipe the output to stdout if - // CODER_DEBUG=true. - debugWriter := io.Discard - if flags.debug { - debugWriter = os.Stdout + if flags.boostrapScript == "" { + return "", nil } - // Bootstrap the container if a script has been provided. blog.Infof("Bootstrapping workspace...") - err = dockerutil.BootstrapContainer(ctx, client, dockerutil.BootstrapConfig{ - ContainerID: containerID, - User: imgMeta.UID, - Script: flags.boostrapScript, - // We set this because the default behavior is to download the agent - // to /tmp/coder.XXXX. This causes a race to happen where we finish - // downloading the binary but before we can execute systemd remounts - // /tmp. - Env: []string{fmt.Sprintf("BINARY_DIR=%s", bootDir)}, - StdOutErr: debugWriter, + + bootstrapExec, err := client.ContainerExecCreate(ctx, containerID, container.ExecOptions{ + User: imgMeta.UID, + Cmd: []string{"/bin/sh", "-s"}, + Env: []string{fmt.Sprintf("BINARY_DIR=%s", bootDir)}, + AttachStdin: true, + AttachStdout: true, + AttachStderr: true, + Detach: true, }) if err != nil { - return xerrors.Errorf("boostrap container: %w", err) + return "", xerrors.Errorf("create exec: %w", err) + } + + resp, err := client.ContainerExecAttach(ctx, bootstrapExec.ID, container.ExecStartOptions{}) + if err != nil { + return "", xerrors.Errorf("attach exec: %w", err) } - return nil + _, err = io.Copy(resp.Conn, strings.NewReader(flags.boostrapScript)) + if err != nil { + return "", xerrors.Errorf("copy stdin: %w", err) + } + err = resp.CloseWrite() + if err != nil { + return "", xerrors.Errorf("close write: %w", err) + } + + go func() { + defer resp.Close() + go func() { + // Also close the response reader when the context is canceled. + defer resp.Close() + <-ctx.Done() + }() + rd := io.LimitReader(resp.Reader, 1<<10) + _, err := io.Copy(blog, rd) + if err != nil { + log.Error(ctx, "copy bootstrap output", slog.Error(err)) + } + log.Debug(ctx, "bootstrap output copied") + }() + + return bootstrapExec.ID, nil } //nolint:revive diff --git a/cli/docker_test.go b/cli/docker_test.go index 88e9bc3..9f05201 100644 --- a/cli/docker_test.go +++ b/cli/docker_test.go @@ -110,6 +110,11 @@ func TestDocker(t *testing.T) { image: "gcr.io/ubuntu:24.04", success: true, }, + { + name: "RegistryRepositorySha", + image: "gcr.io/images/helloworld@sha256:13e101dd511a26a2147e123456bdff5845c9461aaa53d856845745b063001234", + success: true, + }, } for _, tc := range testcases { diff --git a/cmd/envbox/main.go b/cmd/envbox/main.go index 2dfdb94..d7bb87e 100644 --- a/cmd/envbox/main.go +++ b/cmd/envbox/main.go @@ -14,7 +14,6 @@ func main() { _, _ = fmt.Fprintln(os.Stderr, err.Error()) os.Exit(1) } - // We exit the main thread while keepin all the other procs goin strong. runtime.Goexit() } diff --git a/deploy/Dockerfile b/deploy/Dockerfile index a8435c6..1bf3ee3 100644 --- a/deploy/Dockerfile +++ b/deploy/Dockerfile @@ -6,7 +6,7 @@ ARG TARGETARCH # We don't hardcode it here because we have to be able to build both # amd and arm ARG SYSBOX_SHA -ARG SYSBOX_VERSION="0.6.5" +ARG SYSBOX_VERSION="0.6.7" ARG SYSBOX_DEB="sysbox-ce_$SYSBOX_VERSION-0.linux_$TARGETARCH.deb" # Copy configuration files to appropriate locations diff --git a/dockerutil/client.go b/dockerutil/client.go index 00dc4b9..c860fbc 100644 --- a/dockerutil/client.go +++ b/dockerutil/client.go @@ -82,16 +82,39 @@ func parseConfig(cfg dockercfg.Config, reg string) (AuthConfig, error) { } if secret != "" { - if username == "" { - return AuthConfig{ - IdentityToken: secret, - }, nil + return toAuthConfig(username, secret), nil + } + + // This to preserve backwards compatibility with older variants of envbox + // that didn't mandate a hostname key in the config file. We just take the + // first valid auth config we find and use that. + for _, auth := range cfg.AuthConfigs { + if auth.IdentityToken != "" { + return toAuthConfig("", auth.IdentityToken), nil } - return AuthConfig{ - Username: username, - Password: secret, - }, nil + + if auth.Username != "" && auth.Password != "" { + return toAuthConfig(auth.Username, auth.Password), nil + } + + username, secret, err = dockercfg.DecodeBase64Auth(auth) + if err == nil && secret != "" { + return toAuthConfig(username, secret), nil + } + // Invalid auth config, skip it. } return AuthConfig{}, xerrors.Errorf("no auth config found for registry %s: %w", reg, os.ErrNotExist) } + +func toAuthConfig(username, secret string) AuthConfig { + if username == "" { + return AuthConfig{ + IdentityToken: secret, + } + } + return AuthConfig{ + Username: username, + Password: secret, + } +} diff --git a/dockerutil/container.go b/dockerutil/container.go index 2731cbc..ca27375 100644 --- a/dockerutil/container.go +++ b/dockerutil/container.go @@ -115,6 +115,7 @@ func BootstrapContainer(ctx context.Context, client Client, conf BootstrapConfig Stdin: strings.NewReader(conf.Script), Env: conf.Env, StdOutErr: conf.StdOutErr, + Detach: conf.Detach, }) if err != nil { err = xerrors.Errorf("boostrap container (%s): %w", out, err) diff --git a/dockerutil/dockerfake/client.go b/dockerutil/dockerfake/client.go index 7c7d012..8e348dc 100644 --- a/dockerutil/dockerfake/client.go +++ b/dockerutil/dockerfake/client.go @@ -163,7 +163,9 @@ func (m MockClient) ContainerExecCreate(ctx context.Context, name string, config func (m MockClient) ContainerExecInspect(ctx context.Context, id string) (dockertypes.ContainerExecInspect, error) { if m.ContainerExecInspectFn == nil { - return dockertypes.ContainerExecInspect{}, nil + return dockertypes.ContainerExecInspect{ + Pid: 123, + }, nil } return m.ContainerExecInspectFn(ctx, id) diff --git a/dockerutil/exec.go b/dockerutil/exec.go index 8f27f47..c3821c6 100644 --- a/dockerutil/exec.go +++ b/dockerutil/exec.go @@ -4,11 +4,14 @@ import ( "bytes" "context" "io" + "time" dockertypes "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" "golang.org/x/xerrors" "github.com/coder/envbox/xio" + "github.com/coder/retry" ) type ExecConfig struct { @@ -25,8 +28,8 @@ type ExecConfig struct { // ExecContainer runs a command in a container. It returns the output and any error. // If an error occurs during the execution of the command, the output is appended to the error. func ExecContainer(ctx context.Context, client Client, config ExecConfig) ([]byte, error) { - exec, err := client.ContainerExecCreate(ctx, config.ContainerID, dockertypes.ExecConfig{ - Detach: true, + exec, err := client.ContainerExecCreate(ctx, config.ContainerID, container.ExecOptions{ + Detach: config.Detach, Cmd: append([]string{config.Cmd}, config.Args...), User: config.User, AttachStderr: true, @@ -92,3 +95,39 @@ func ExecContainer(ctx context.Context, client Client, config ExecConfig) ([]byt return buf.Bytes(), nil } + +func GetExecPID(ctx context.Context, client Client, execID string) (int, error) { + for r := retry.New(time.Second, time.Second); r.Wait(ctx); { + inspect, err := client.ContainerExecInspect(ctx, execID) + if err != nil { + return 0, xerrors.Errorf("exec inspect: %w", err) + } + + if inspect.Pid == 0 { + continue + } + return inspect.Pid, nil + } + + return 0, ctx.Err() +} + +func WaitForExit(ctx context.Context, client Client, execID string) error { + for r := retry.New(time.Second, time.Second); r.Wait(ctx); { + inspect, err := client.ContainerExecInspect(ctx, execID) + if err != nil { + return xerrors.Errorf("exec inspect: %w", err) + } + + if inspect.Running { + continue + } + + if inspect.ExitCode > 0 { + return xerrors.Errorf("exit code %d", inspect.ExitCode) + } + + return nil + } + return ctx.Err() +} diff --git a/dockerutil/image.go b/dockerutil/image.go index ffb2bdb..f49cba0 100644 --- a/dockerutil/image.go +++ b/dockerutil/image.go @@ -15,6 +15,8 @@ import ( "github.com/docker/docker/api/types/image" "golang.org/x/xerrors" + "cdr.dev/slog" + "github.com/coder/envbox/buildlog" "github.com/coder/envbox/xunix" "github.com/coder/retry" @@ -22,6 +24,23 @@ import ( const diskFullStorageDriver = "vfs" +// Adapted from https://github.com/NVIDIA/libnvidia-container/blob/v1.15.0/src/nvc_container.c#L152-L165 +var UsrLibDirs = map[string]string{ + // Debian-based distros use a multi-arch directory. + "debian": usrLibMultiarchDir, + "ubuntu": usrLibMultiarchDir, + // Fedora and Redhat use the standard /usr/lib64. + "fedora": "/usr/lib64", + "rhel": "/usr/lib64", + // Fall back to the standard /usr/lib. + "linux": "/usr/lib", +} + +// /etc/os-release is the standard location for system identification data on +// Linux systems running systemd. +// Ref: https://www.freedesktop.org/software/systemd/man/latest/os-release.html +var etcOsRelease = "/etc/os-release" + type PullImageConfig struct { Client Client Image string @@ -148,15 +167,16 @@ func processImagePullEvents(r io.Reader, fn ImagePullProgressFn) error { } type ImageMetadata struct { - UID string - GID string - HomeDir string - HasInit bool + UID string + GID string + HomeDir string + HasInit bool + OsReleaseID string } // GetImageMetadata returns metadata about an image such as the UID/GID of the // provided username and whether it contains an /sbin/init that we should run. -func GetImageMetadata(ctx context.Context, client Client, img, username string) (ImageMetadata, error) { +func GetImageMetadata(ctx context.Context, log slog.Logger, client Client, img, username string) (ImageMetadata, error) { // Creating a dummy container to inspect the filesystem. created, err := client.ContainerCreate(ctx, &container.Config{ @@ -226,14 +246,58 @@ func GetImageMetadata(ctx context.Context, client Client, img, username string) return ImageMetadata{}, xerrors.Errorf("no users returned for username %s", username) } + // Read the /etc/os-release file to get the ID of the OS. + // We only care about the ID field. + var osReleaseID string + out, err = ExecContainer(ctx, client, ExecConfig{ + ContainerID: inspect.ID, + Cmd: "cat", + Args: []string{etcOsRelease}, + }) + if err != nil { + log.Error(ctx, "read os-release", slog.Error(err)) + log.Error(ctx, "falling back to linux for os-release ID") + osReleaseID = "linux" + } else { + osReleaseID = GetOSReleaseID(out) + } + return ImageMetadata{ - UID: users[0].Uid, - GID: users[0].Gid, - HomeDir: users[0].HomeDir, - HasInit: initExists, + UID: users[0].Uid, + GID: users[0].Gid, + HomeDir: users[0].HomeDir, + HasInit: initExists, + OsReleaseID: osReleaseID, }, nil } +// UsrLibDir returns the path to the /usr/lib directory for the given +// operating system determined by the /etc/os-release file. +func (im ImageMetadata) UsrLibDir() string { + if val, ok := UsrLibDirs[im.OsReleaseID]; ok && val != "" { + return val + } + return UsrLibDirs["linux"] // fallback +} + +// GetOSReleaseID returns the ID of the operating system from the +// raw contents of /etc/os-release. +func GetOSReleaseID(raw []byte) string { + var osReleaseID string + for _, line := range strings.Split(string(raw), "\n") { + if strings.HasPrefix(line, "ID=") { + osReleaseID = strings.TrimPrefix(line, "ID=") + // The value may be quoted. + osReleaseID = strings.Trim(osReleaseID, "\"") + break + } + } + if osReleaseID == "" { + return "linux" + } + return osReleaseID +} + func DefaultLogImagePullFn(log buildlog.Logger) func(ImagePullEvent) error { var ( // Avoid spamming too frequently, the messages can come quickly diff --git a/dockerutil/image_linux_amd64.go b/dockerutil/image_linux_amd64.go new file mode 100644 index 0000000..5952a29 --- /dev/null +++ b/dockerutil/image_linux_amd64.go @@ -0,0 +1,5 @@ +package dockerutil + +// usrLibMultiarchDir is defined for arm64 and amd64 architectures. +// Envbox is not published for other architectures. +var usrLibMultiarchDir = "/usr/lib/x86_64-linux-gnu" diff --git a/dockerutil/image_linux_arm64.go b/dockerutil/image_linux_arm64.go new file mode 100644 index 0000000..577c1be --- /dev/null +++ b/dockerutil/image_linux_arm64.go @@ -0,0 +1,5 @@ +package dockerutil + +// usrLibMultiarchDir is defined for arm64 and amd64 architectures. +// Envbox is not published for other architectures. +var usrLibMultiarchDir = "/usr/lib/aarch64-linux-gnu" diff --git a/go.mod b/go.mod index 2cd31e0..de07e31 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/coder/envbox -go 1.23.3 +go 1.24.1 // There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here: // https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main @@ -22,12 +22,12 @@ require ( github.com/spf13/afero v1.11.0 github.com/spf13/cobra v1.7.0 github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 github.com/vishvananda/netlink v1.2.1-beta.2 - golang.org/x/crypto v0.28.0 + golang.org/x/crypto v0.36.0 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/mod v0.19.0 - golang.org/x/sys v0.27.0 + golang.org/x/sys v0.31.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 k8s.io/mount-utils v0.26.2 k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 @@ -92,13 +92,13 @@ require ( github.com/fatih/color v1.17.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fxamacker/cbor/v2 v2.4.0 // indirect - github.com/go-chi/chi/v5 v5.1.0 // indirect - github.com/go-jose/go-jose/v4 v4.0.2 // indirect + github.com/go-chi/chi/v5 v5.2.2 // indirect + github.com/go-jose/go-jose/v4 v4.0.5 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-sql-driver/mysql v1.8.1 // indirect - github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/go-viper/mapstructure/v2 v2.3.0 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect @@ -201,11 +201,11 @@ require ( go.uber.org/atomic v1.11.0 // indirect go4.org/mem v0.0.0-20220726221520-4f986261bf13 // indirect go4.org/netipx v0.0.0-20230728180743-ad4cb58a6516 // indirect - golang.org/x/net v0.30.0 // indirect + golang.org/x/net v0.38.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.9.0 // indirect - golang.org/x/term v0.25.0 // indirect - golang.org/x/text v0.20.0 // indirect + golang.org/x/sync v0.12.0 // indirect + golang.org/x/term v0.30.0 // indirect + golang.org/x/text v0.23.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.23.0 // indirect golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect diff --git a/go.sum b/go.sum index 152894b..92b33e4 100644 --- a/go.sum +++ b/go.sum @@ -175,10 +175,10 @@ github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= github.com/github/fakeca v0.1.0 h1:Km/MVOFvclqxPM9dZBC4+QE564nU4gz4iZ0D9pMw28I= github.com/github/fakeca v0.1.0/go.mod h1:+bormgoGMMuamOscx7N91aOuUST7wdaJ2rNjeohylyo= -github.com/go-chi/chi/v5 v5.1.0 h1:acVI1TYaD+hhedDJ3r54HyA6sExp3HfXq7QWEEY/xMw= -github.com/go-chi/chi/v5 v5.1.0/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= -github.com/go-jose/go-jose/v4 v4.0.2 h1:R3l3kkBds16bO7ZFAEEcofK0MkrAJt3jlJznWZG0nvk= -github.com/go-jose/go-jose/v4 v4.0.2/go.mod h1:WVf9LFMHh/QVrmqrOfqun0C45tMe3RoiKJMPvgWwLfY= +github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618= +github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops= +github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE= +github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= @@ -201,8 +201,8 @@ github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpv github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM= github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= -github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= -github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk= +github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= @@ -483,8 +483,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/swaggest/assertjson v1.9.0 h1:dKu0BfJkIxv/xe//mkCrK5yZbs79jL7OVf9Ija7o2xQ= github.com/swaggest/assertjson v1.9.0/go.mod h1:b+ZKX2VRiUjxfUIal0HDN85W0nHPAYUbYH5WkkSsFsU= github.com/tailscale/certstore v0.1.1-0.20220316223106-78d6e1c49d8d h1:K3j02b5j2Iw1xoggN9B2DIEkhWGheqFOeDkdJdBrJI8= @@ -596,8 +596,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= +golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -618,8 +618,8 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= +golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -629,8 +629,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= -golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -660,14 +660,14 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.4.1-0.20230131160137-e7d7f63158de/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= -golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= -golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= -golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= +golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= +golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -675,8 +675,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= -golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= diff --git a/integration/docker_test.go b/integration/docker_test.go index 1419ff5..61c961d 100644 --- a/integration/docker_test.go +++ b/integration/docker_test.go @@ -1,6 +1,3 @@ -//go:build integration -// +build integration - package integration_test import ( @@ -24,6 +21,9 @@ import ( func TestDocker(t *testing.T) { t.Parallel() + if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" { + t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1") + } // Dockerd just tests that dockerd can spin up and function correctly. t.Run("Dockerd", func(t *testing.T) { @@ -240,28 +240,53 @@ func TestDocker(t *testing.T) { require.Equal(t, "1000", strings.TrimSpace(string(out))) // Validate that memory limit is being applied to the inner container. - out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + // First check under cgroupv2 path. + if out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ ContainerID: resource.Container.ID, - Cmd: []string{"cat", "/sys/fs/cgroup/memory/memory.limit_in_bytes"}, - }) - require.NoError(t, err) - require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out))) + Cmd: []string{"cat", "/sys/fs/cgroup/memory.max"}, + }); err == nil { + require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out))) + } else { // fall back to cgroupv1 path. + out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"cat", "/sys/fs/cgroup/memory/memory.limit_in_bytes"}, + }) + require.NoError(t, err) + require.Equal(t, expectedMemoryLimit, strings.TrimSpace(string(out))) + } - periodStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + // Validate the cpu limits are being applied to the inner container. + // First check under cgroupv2 path. + var quota, period int64 + if out, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ ContainerID: resource.Container.ID, - Cmd: []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"}, - }) - require.NoError(t, err) - period, err := strconv.ParseInt(strings.TrimSpace(string(periodStr)), 10, 64) - require.NoError(t, err) + Cmd: []string{"cat", "/sys/fs/cgroup/cpu.max"}, + }); err == nil { + // out is in the format "period quota" + // e.g. "100000 100000" + fields := strings.Fields(string(out)) + require.Len(t, fields, 2) + period, err = strconv.ParseInt(fields[0], 10, 64) + require.NoError(t, err) + quota, err = strconv.ParseInt(fields[1], 10, 64) + require.NoError(t, err) + } else { // fall back to cgroupv1 path. + periodStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"cat", "/sys/fs/cgroup/cpu,cpuacct/cpu.cfs_period_us"}, + }) + require.NoError(t, err) + period, err = strconv.ParseInt(strings.TrimSpace(string(periodStr)), 10, 64) + require.NoError(t, err) - quotaStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ - ContainerID: resource.Container.ID, - Cmd: []string{"cat", "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"}, - }) - require.NoError(t, err) - quota, err := strconv.ParseInt(strings.TrimSpace(string(quotaStr)), 10, 64) - require.NoError(t, err) + quotaStr, err := integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"cat", "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"}, + }) + require.NoError(t, err) + quota, err = strconv.ParseInt(strings.TrimSpace(string(quotaStr)), 10, 64) + require.NoError(t, err) + } // Validate that the CPU limit is being applied to the inner container. actualLimit := float64(quota) / float64(period) @@ -365,7 +390,101 @@ func TestDocker(t *testing.T) { // This indicates we've made it all the way to end // of the logs we attempt to push. - require.True(t, recorder.ContainsLog("Bootstrapping workspace...")) + require.True(t, recorder.ContainsLog("Envbox startup complete!")) + }) + + // This test provides backwards compatibility for older variants of envbox that may specify a + // Docker Auth config without a hostname key. + t.Run("NoHostnameAuthConfig", func(t *testing.T) { + t.Parallel() + + var ( + dir = integrationtest.TmpDir(t) + binds = integrationtest.DefaultBinds(t, dir) + ) + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + // Create some listeners for the Docker and Coder + // services we'll be running with self signed certs. + bridgeIP := integrationtest.DockerBridgeIP(t) + coderListener, err := net.Listen("tcp", fmt.Sprintf("%s:0", bridgeIP)) + require.NoError(t, err) + defer coderListener.Close() + coderAddr := tcpAddr(t, coderListener) + + registryListener, err := net.Listen("tcp", fmt.Sprintf("%s:0", bridgeIP)) + require.NoError(t, err) + err = registryListener.Close() + require.NoError(t, err) + registryAddr := tcpAddr(t, registryListener) + + coderCert := integrationtest.GenerateTLSCertificate(t, "host.docker.internal", coderAddr.IP.String()) + dockerCert := integrationtest.GenerateTLSCertificate(t, "host.docker.internal", registryAddr.IP.String()) + + // Startup our fake Coder "control-plane". + recorder := integrationtest.FakeBuildLogRecorder(t, coderListener, coderCert) + + certDir := integrationtest.MkdirAll(t, dir, "certs") + + // Write the Coder cert disk. + coderCertPath := filepath.Join(certDir, "coder_cert.pem") + coderKeyPath := filepath.Join(certDir, "coder_key.pem") + integrationtest.WriteCertificate(t, coderCert, coderCertPath, coderKeyPath) + coderCertMount := integrationtest.BindMount(certDir, "/tmp/certs", false) + + // Write the Registry cert to disk. + regCertPath := filepath.Join(certDir, "registry_cert.crt") + regKeyPath := filepath.Join(certDir, "registry_key.pem") + integrationtest.WriteCertificate(t, dockerCert, regCertPath, regKeyPath) + + username := "coder" + password := "helloworld" + + // Start up the docker registry and push an image + // to it that we can reference. + image := integrationtest.RunLocalDockerRegistry(t, pool, integrationtest.RegistryConfig{ + HostCertPath: regCertPath, + HostKeyPath: regKeyPath, + Image: integrationtest.UbuntuImage, + TLSPort: strconv.Itoa(registryAddr.Port), + PasswordDir: dir, + Username: username, + Password: password, + }) + + type authConfigs struct { + Auths map[string]dockerutil.AuthConfig `json:"auths"` + } + + auths := authConfigs{ + Auths: map[string]dockerutil.AuthConfig{ + "": {Username: username, Password: password}, + }, + } + + authStr, err := json.Marshal(auths) + require.NoError(t, err) + + envs := []string{ + integrationtest.EnvVar(cli.EnvAgentToken, "faketoken"), + integrationtest.EnvVar(cli.EnvAgentURL, fmt.Sprintf("https://%s:%d", "host.docker.internal", coderAddr.Port)), + integrationtest.EnvVar(cli.EnvExtraCertsPath, "/tmp/certs"), + integrationtest.EnvVar(cli.EnvBoxPullImageSecretEnvVar, string(authStr)), + } + + // Run the envbox container. + _ = integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{ + Image: image.String(), + Username: "coder", + Envs: envs, + OuterMounts: append(binds, coderCertMount), + }) + + // This indicates we've made it all the way to end + // of the logs we attempt to push. + require.True(t, recorder.ContainsLog("Envbox startup complete!")) }) // This tests the inverse of SelfSignedCerts. We assert that @@ -425,6 +544,9 @@ func TestDocker(t *testing.T) { HostKeyPath: regKeyPath, Image: integrationtest.UbuntuImage, TLSPort: strconv.Itoa(registryAddr.Port), + PasswordDir: dir, + Username: "user", + Password: "password", }) envs := []string{ @@ -506,6 +628,59 @@ func TestDocker(t *testing.T) { require.NoError(t, err) require.Equal(t, "hello\n", string(output)) }) + t.Run("HandleSignals", func(t *testing.T) { + t.Parallel() + + pool, err := dockertest.NewPool("") + require.NoError(t, err) + + var ( + tmpdir = integrationtest.TmpDir(t) + binds = integrationtest.DefaultBinds(t, tmpdir) + ) + homeDir := filepath.Join(tmpdir, "home") + err = os.MkdirAll(homeDir, 0o777) + require.NoError(t, err) + + binds = append(binds, integrationtest.BindMount(homeDir, "/home/coder", false)) + + envs := []string{fmt.Sprintf("%s=%s:%s", cli.EnvMounts, "/home/coder", "/home/coder")} + // Run the envbox container. + resource := integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{ + Image: integrationtest.UbuntuImage, + Username: "root", + OuterMounts: binds, + Envs: envs, + BootstrapScript: sigtrapScript, + }) + + _, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"/bin/sh", "-c", "stat /home/coder/foo"}, + }) + require.Error(t, err) + + // Simulate a shutdown. + integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second) + + err = resource.Close() + require.NoError(t, err) + + t.Logf("envbox %q started successfully, recreating...", resource.Container.ID) + // Run the envbox container. + resource = integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{ + Image: integrationtest.UbuntuImage, + Username: "root", + OuterMounts: binds, + Envs: envs, + BootstrapScript: sigtrapScript, + }) + _, err = integrationtest.ExecInnerContainer(t, pool, integrationtest.ExecConfig{ + ContainerID: resource.Container.ID, + Cmd: []string{"/bin/sh", "-c", "stat /home/coder/foo"}, + }) + require.NoError(t, err) + }) } func requireSliceNoContains(t *testing.T, ss []string, els ...string) { @@ -538,3 +713,17 @@ func tcpAddr(t testing.TB, l net.Listener) *net.TCPAddr { require.True(t, ok) return tcpAddr } + +const sigtrapScript = `#!/bin/bash +cleanup() { + echo "HANDLING A SIGNAL!" && touch /home/coder/foo && echo "touched file" + exit 0 +} + +trap 'cleanup' INT TERM + +while true; do + echo "Working..." + sleep 1 +done +` diff --git a/integration/gpu_test.go b/integration/gpu_test.go new file mode 100644 index 0000000..f3fc1ac --- /dev/null +++ b/integration/gpu_test.go @@ -0,0 +1,305 @@ +package integration_test + +import ( + "context" + "os" + "os/exec" + "slices" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/coder/envbox/integration/integrationtest" +) + +func TestDocker_Nvidia(t *testing.T) { + t.Parallel() + if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" { + t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1") + } + // Only run this test if the nvidia container runtime is detected. + // Check if the nvidia runtime is available using `docker info`. + // The docker client doesn't expose this information so we need to fetch it ourselves. + if !slices.Contains(dockerRuntimes(t), "nvidia") { + t.Skip("this test requires nvidia runtime to be available") + } + + t.Run("Ubuntu", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that we can run nvidia-smi in the inner container. + assertInnerNvidiaSMI(ctx, t, ctID) + }) + + t.Run("Redhat", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.RedhatImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that we can run nvidia-smi in the inner container. + assertInnerNvidiaSMI(ctx, t, ctID) + + // Make sure dnf still works. This checks for a regression due to + // gpuExtraRegex matching `libglib.so` in the outer container. + // This had a dependency on `libpcre.so.3` which would cause dnf to fail. + out, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "dnf") + if !assert.NoError(t, err, "failed to run dnf in the inner container") { + t.Logf("dnf output:\n%s", strings.TrimSpace(out)) + } + + // Make sure libglib.so is not present in the inner container. + out, err = execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "ls", "-1", "/usr/lib/x86_64-linux-gnu/libglib*") + // An error is expected here. + assert.Error(t, err, "libglib should not be present in the inner container") + assert.Contains(t, out, "No such file or directory", "libglib should not be present in the inner container") + }) + + t.Run("InnerUsrLibDirOverride", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--env", "CODER_INNER_USR_LIB_DIR=/usr/lib/coder", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that the libraries end up in the expected location in the inner + // container. + out, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "ls", "-1", "/usr/lib/coder") + require.NoError(t, err, "inner usr lib dir override failed") + require.Regexp(t, `(?i)(libgl|nvidia|vulkan|cuda)`, out) + }) + + t.Run("EmptyHostUsrLibDir", func(t *testing.T) { + t.Parallel() + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + emptyUsrLibDir := t.TempDir() + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root", + "-v", emptyUsrLibDir+":/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + ofs := outerFiles(ctx, t, ctID, "/usr/lib/x86_64-linux-gnu/libnv*") + // Assert invariant: the outer container has the files we expect. + require.NotEmpty(t, ofs, "failed to list outer container files") + // Assert that expected files are available in the inner container. + assertInnerFiles(ctx, t, ctID, "/usr/lib/x86_64-linux-gnu/libnv*", ofs...) + assertInnerNvidiaSMI(ctx, t, ctID) + }) + + t.Run("CUDASample", func(t *testing.T) { + t.Parallel() + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + // Start the envbox container. + ctID := startEnvboxCmd(ctx, t, integrationtest.CUDASampleImage, "root", + "-v", "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib", + "--env", "CODER_ADD_GPU=true", + "--env", "CODER_USR_LIB_DIR=/var/coder/usr/lib", + "--runtime=nvidia", + "--gpus=all", + ) + + // Assert that we can run nvidia-smi in the inner container. + assertInnerNvidiaSMI(ctx, t, ctID) + + // Assert that /tmp/vectorAdd runs successfully in the inner container. + _, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "/tmp/vectorAdd") + require.NoError(t, err, "failed to run /tmp/vectorAdd in the inner container") + }) +} + +// dockerRuntimes returns the list of container runtimes available on the host. +// It does this by running `docker info` and parsing the output. +func dockerRuntimes(t *testing.T) []string { + t.Helper() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "docker", "info", "--format", "{{ range $k, $v := .Runtimes}}{{ println $k }}{{ end }}") + out, err := cmd.CombinedOutput() + require.NoError(t, err, "failed to get docker runtimes: %s", out) + raw := strings.TrimSpace(string(out)) + return strings.Split(raw, "\n") +} + +// outerFiles returns the list of files in the outer container matching the +// given pattern. It does this by running `ls -1` in the outer container. +func outerFiles(ctx context.Context, t *testing.T, containerID, pattern string) []string { + t.Helper() + // We need to use /bin/sh -c to avoid the shell interpreting the glob. + out, err := execContainerCmd(ctx, t, containerID, "/bin/sh", "-c", "ls -1 "+pattern) + require.NoError(t, err, "failed to list outer container files") + files := strings.Split(strings.TrimSpace(out), "\n") + slices.Sort(files) + return files +} + +// assertInnerFiles checks that all the files matching the given pattern exist in the +// inner container. +func assertInnerFiles(ctx context.Context, t *testing.T, containerID, pattern string, expected ...string) { + t.Helper() + + // Get the list of files in the inner container. + // We need to use /bin/sh -c to avoid the shell interpreting the glob. + out, err := execContainerCmd(ctx, t, containerID, "docker", "exec", "workspace_cvm", "/bin/sh", "-c", "ls -1 "+pattern) + require.NoError(t, err, "failed to list inner container files") + innerFiles := strings.Split(strings.TrimSpace(out), "\n") + + // Check that the expected files exist in the inner container. + missingFiles := make([]string, 0) + for _, expectedFile := range expected { + if !slices.Contains(innerFiles, expectedFile) { + missingFiles = append(missingFiles, expectedFile) + } + } + require.Empty(t, missingFiles, "missing files in inner container: %s", strings.Join(missingFiles, ", ")) +} + +// assertInnerNvidiaSMI checks that nvidia-smi runs successfully in the inner +// container. +func assertInnerNvidiaSMI(ctx context.Context, t *testing.T, containerID string) { + t.Helper() + // Assert that we can run nvidia-smi in the inner container. + out, err := execContainerCmd(ctx, t, containerID, "docker", "exec", "workspace_cvm", "nvidia-smi") + require.NoError(t, err, "failed to run nvidia-smi in the inner container") + require.Contains(t, out, "NVIDIA-SMI", "nvidia-smi output does not contain NVIDIA-SMI") +} + +// startEnvboxCmd starts the envbox container with the given arguments. +// Ideally we would use ory/dockertest for this, but it doesn't support +// specifying the runtime. We have alternatively used the docker client library, +// but a nice property of using the docker cli is that if a test fails, you can +// just run the command manually to debug! +func startEnvboxCmd(ctx context.Context, t *testing.T, innerImage, innerUser string, addlArgs ...string) (containerID string) { + t.Helper() + + var ( + tmpDir = integrationtest.TmpDir(t) + binds = integrationtest.DefaultBinds(t, tmpDir) + cancelCtx, cancel = context.WithCancel(ctx) + ) + t.Cleanup(cancel) + + // Unfortunately ory/dockertest does not allow us to specify runtime. + // We're instead going to just run the container directly via the docker cli. + startEnvboxArgs := []string{ + "run", + "--detach", + "--rm", + "--privileged", + "--env", "CODER_INNER_IMAGE=" + innerImage, + "--env", "CODER_INNER_USERNAME=" + innerUser, + } + for _, bind := range binds { + bindParts := []string{bind.Source, bind.Target} + if bind.ReadOnly { + bindParts = append(bindParts, "ro") + } + startEnvboxArgs = append(startEnvboxArgs, []string{"-v", strings.Join(bindParts, ":")}...) + } + startEnvboxArgs = append(startEnvboxArgs, addlArgs...) + startEnvboxArgs = append(startEnvboxArgs, "envbox:latest", "/envbox", "docker") + t.Logf("envbox docker cmd: docker %s", strings.Join(startEnvboxArgs, " ")) + + // Start the envbox container without attaching. + startEnvboxCmd := exec.CommandContext(cancelCtx, "docker", startEnvboxArgs...) + out, err := startEnvboxCmd.CombinedOutput() + require.NoError(t, err, "failed to start envbox container") + containerID = strings.TrimSpace(string(out)) + t.Logf("envbox container ID: %s", containerID) + t.Cleanup(func() { + if t.Failed() { + // Dump the logs if the test failed. + logsCmd := exec.Command("docker", "logs", containerID) + out, err := logsCmd.CombinedOutput() + if err != nil { + t.Logf("failed to read logs: %s", err) + } + t.Logf("envbox logs:\n%s", string(out)) + } + // Stop the envbox container. + stopEnvboxCmd := exec.Command("docker", "rm", "-f", containerID) + out, err := stopEnvboxCmd.CombinedOutput() + if err != nil { + t.Errorf("failed to stop envbox container: %s", out) + } + }) + + // Wait for the Docker CVM to come up. + waitCtx, waitCancel := context.WithTimeout(cancelCtx, 5*time.Minute) + defer waitCancel() +WAITLOOP: + for { + select { + case <-waitCtx.Done(): + t.Fatal("timed out waiting for inner container to come up") + default: + execCmd := exec.CommandContext(cancelCtx, "docker", "exec", containerID, "docker", "inspect", "workspace_cvm") + out, err := execCmd.CombinedOutput() + if err != nil { + t.Logf("waiting for inner container to come up:\n%s", string(out)) + <-time.After(time.Second) + continue WAITLOOP + } + t.Logf("inner container is up") + break WAITLOOP + } + } + + return containerID +} + +func execContainerCmd(ctx context.Context, t *testing.T, containerID string, cmdArgs ...string) (string, error) { + t.Helper() + + execArgs := []string{"exec", containerID} + execArgs = append(execArgs, cmdArgs...) + t.Logf("exec cmd: docker %s", strings.Join(execArgs, " ")) + execCmd := exec.CommandContext(ctx, "docker", execArgs...) + out, err := execCmd.CombinedOutput() + if err != nil { + t.Logf("exec cmd failed: %s\n%s", err.Error(), string(out)) + } else { + t.Logf("exec cmd success: %s", out) + } + return strings.TrimSpace(string(out)), err +} diff --git a/integration/integrationtest/docker.go b/integration/integrationtest/docker.go index 6b5e07c..b6f5ce4 100644 --- a/integration/integrationtest/docker.go +++ b/integration/integrationtest/docker.go @@ -33,18 +33,23 @@ import ( const ( // DockerdImage is a large image (~1GB) and should only be used to test // dockerd. - DockerdImage = "gcr.io/coder-dev-1/sreya/enterprise-base:ubuntu" + DockerdImage = "us-docker.pkg.dev/coder-v2-images-public/public/envbox/enterprise-base:ubuntu" // HelloWorldImage is useful for testing a CVM's dockerd is functioning // correctly - HelloWorldImage = "gcr.io/coder-dev-1/sreya/hello-world" + HelloWorldImage = "us-docker.pkg.dev/coder-v2-images-public/public/envbox/hello-world" // UbuntuImage is just vanilla ubuntu (80MB) but the user is set to a non-root // user . - UbuntuImage = "gcr.io/coder-dev-1/sreya/ubuntu-coder" + UbuntuImage = "us-docker.pkg.dev/coder-v2-images-public/public/envbox/ubuntu-coder" + // Redhat UBI9 image as of 2025-03-05 + RedhatImage = "registry.access.redhat.com/ubi9/ubi:9.5" + // CUDASampleImage is a CUDA sample image from NVIDIA's container registry. + // It contains a binary /tmp/vectorAdd which can be run to test the CUDA setup. + CUDASampleImage = "nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2" // RegistryImage is used to assert that we add certs // correctly to the docker daemon when pulling an image // from a registry with a self signed cert. - registryImage = "gcr.io/coder-dev-1/sreya/registry" + registryImage = "us-docker.pkg.dev/coder-v2-images-public/public/envbox/registry" registryTag = "2.8.3" ) @@ -301,6 +306,31 @@ func ExecEnvbox(t *testing.T, pool *dockertest.Pool, conf ExecConfig) ([]byte, e return buf.Bytes(), nil } +func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) { + t.Helper() + + err := pool.Client.KillContainer(docker.KillContainerOptions{ + ID: id, + Signal: docker.SIGTERM, + }) + require.NoError(t, err) + + ctx, cancel := context.WithTimeout(context.Background(), to) + defer cancel() + for r := retry.New(time.Second, time.Second); r.Wait(ctx); { + cnt, err := pool.Client.InspectContainer(id) + require.NoError(t, err) + + if cnt.State.Running { + continue + } + + return + } + + t.Fatalf("timed out waiting for container %s to stop", id) +} + // cmdLineEnvs returns args passed to the /envbox command // but using their env var alias. func cmdLineEnvs(c *CreateDockerCVMConfig) []string { diff --git a/integration/integrationtest/os.go b/integration/integrationtest/os.go index 45aacd7..dc13318 100644 --- a/integration/integrationtest/os.go +++ b/integration/integrationtest/os.go @@ -17,6 +17,12 @@ func TmpDir(t *testing.T) string { tmpdir, err := os.MkdirTemp("", strings.ReplaceAll(t.Name(), "/", "_")) require.NoError(t, err) t.Logf("using tmpdir %s", tmpdir) + t.Cleanup(func() { + if !t.Failed() { + // Could be useful in case of test failure. + _ = os.RemoveAll(tmpdir) + } + }) return tmpdir } diff --git a/sysboxutil/manager.go b/sysboxutil/manager.go index 61ea658..d3f9bef 100644 --- a/sysboxutil/manager.go +++ b/sysboxutil/manager.go @@ -5,8 +5,9 @@ import ( "os" "time" - "github.com/coder/envbox/xunix" "golang.org/x/xerrors" + + "github.com/coder/envbox/xunix" ) const ManagerSocketPath = "/run/sysbox/sysmgr.sock" diff --git a/xunix/gpu.go b/xunix/gpu.go index a494ab5..a9129d5 100644 --- a/xunix/gpu.go +++ b/xunix/gpu.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "regexp" + "slices" "sort" "strings" @@ -17,9 +18,10 @@ import ( ) var ( - gpuMountRegex = regexp.MustCompile("(?i)(nvidia|vulkan|cuda)") - gpuExtraRegex = regexp.MustCompile("(?i)(libgl|nvidia|vulkan|cuda)") - gpuEnvRegex = regexp.MustCompile("(?i)nvidia") + gpuMountRegex = regexp.MustCompile(`(?i)(nvidia|vulkan|cuda)`) + gpuExtraRegex = regexp.MustCompile(`(?i)(libgl(e|sx|\.)|nvidia|vulkan|cuda)`) + gpuEnvRegex = regexp.MustCompile(`(?i)nvidia`) + sharedObjectRegex = regexp.MustCompile(`\.so(\.[0-9\.]+)?$`) ) func GPUEnvs(ctx context.Context) []string { @@ -38,6 +40,7 @@ func GPUEnvs(ctx context.Context) []string { func GPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]Device, []mount.MountPoint, error) { var ( + afs = GetFS(ctx) mounter = Mounter(ctx) devices = []Device{} binds = []mount.MountPoint{} @@ -63,6 +66,22 @@ func GPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]Device, []m // If it's not in /dev treat it as a bind mount. binds = append(binds, m) + // We also want to find any symlinks that point to the target. + // This is important for the nvidia driver as it mounts the driver + // files with the driver version appended to the end, and creates + // symlinks that point to the actual files. + links, err := SameDirSymlinks(afs, m.Path) + if err != nil { + log.Error(ctx, "find symlinks", slog.F("path", m.Path), slog.Error(err)) + } else { + for _, link := range links { + log.Debug(ctx, "found symlink", slog.F("link", link), slog.F("target", m.Path)) + binds = append(binds, mount.MountPoint{ + Path: link, + Opts: []string{"ro"}, + }) + } + } } } @@ -103,7 +122,11 @@ func usrLibGPUs(ctx context.Context, log slog.Logger, usrLibDir string) ([]mount return nil } - if filepath.Ext(path) != ".so" || !gpuExtraRegex.MatchString(path) { + if !gpuExtraRegex.MatchString(path) { + return nil + } + + if !sharedObjectRegex.MatchString(path) { return nil } @@ -175,6 +198,75 @@ func recursiveSymlinks(afs FS, mountpoint string, path string) ([]string, error) return paths, nil } +// SameDirSymlinks returns all links in the same directory as `target` that +// point to target, either indirectly or directly. Only symlinks in the same +// directory as `target` are considered. +func SameDirSymlinks(afs FS, target string) ([]string, error) { + // Get the list of files in the directory of the target. + fis, err := afero.ReadDir(afs, filepath.Dir(target)) + if err != nil { + return nil, xerrors.Errorf("read dir %q: %w", filepath.Dir(target), err) + } + + // Do an initial pass to map all symlinks to their destinations. + allLinks := make(map[string]string) + for _, fi := range fis { + // Ignore non-symlinks. + if fi.Mode()&os.ModeSymlink == 0 { + continue + } + + absPath := filepath.Join(filepath.Dir(target), fi.Name()) + link, err := afs.Readlink(filepath.Join(filepath.Dir(target), fi.Name())) + if err != nil { + return nil, xerrors.Errorf("readlink %q: %w", fi.Name(), err) + } + + if !filepath.IsAbs(link) { + link = filepath.Join(filepath.Dir(target), link) + } + allLinks[absPath] = link + } + + // Now we can start checking for symlinks that point to the target. + var ( + found = make([]string, 0) + // Set an arbitrary upper limit to prevent infinite loops. + maxIterations = 10 + ) + for range maxIterations { + var foundThisTime bool + for linkName, linkDest := range allLinks { + // Ignore symlinks that point outside of target's directory. + if filepath.Dir(linkName) != filepath.Dir(target) { + continue + } + + // If the symlink points to the target, add it to the list. + if linkDest == target { + if !slices.Contains(found, linkName) { + found = append(found, linkName) + foundThisTime = true + } + } + + // If the symlink points to another symlink that we already determined + // points to the target, add it to the list. + if slices.Contains(found, linkDest) { + if !slices.Contains(found, linkName) { + found = append(found, linkName) + foundThisTime = true + } + } + } + // If we didn't find any new symlinks, we're done. + if !foundThisTime { + break + } + } + return found, nil +} + // TryUnmountProcGPUDrivers unmounts any GPU-related mounts under /proc as it causes // issues when creating any container in some cases. Errors encountered while // unmounting are treated as non-fatal. diff --git a/xunix/gpu_test.go b/xunix/gpu_test.go index 4cbf5f0..4324fcf 100644 --- a/xunix/gpu_test.go +++ b/xunix/gpu_test.go @@ -2,10 +2,13 @@ package xunix_test import ( "context" + "os" "path/filepath" + "sort" "testing" "github.com/spf13/afero" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "k8s.io/mount-utils" @@ -56,14 +59,20 @@ func TestGPUs(t *testing.T) { expectedUsrLibFiles = []string{ filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so"), filepath.Join(usrLibMountpoint, "libnvidia-ml.so"), + filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"), } - // fakeUsrLibFiles are files that should be written to the "mounted" - // /usr/lib directory. It includes files that shouldn't be returned. - fakeUsrLibFiles = append([]string{ + // fakeUsrLibFiles are files that we do not expect to be returned + // bind mounts for. + fakeUsrLibFiles = []string{ filepath.Join(usrLibMountpoint, "libcurl-gnutls.so"), - filepath.Join(usrLibMountpoint, "nvidia", "libglxserver_nvidia.so.1"), - }, expectedUsrLibFiles...) + filepath.Join(usrLibMountpoint, "libglib.so"), + } + + // allUsrLibFiles are all the files that should be written to the + // "mounted" /usr/lib directory. It includes files that shouldn't + // be returned. + allUsrLibFiles = append(expectedUsrLibFiles, fakeUsrLibFiles...) ) ctx := xunix.WithFS(context.Background(), fs) @@ -90,15 +99,19 @@ func TestGPUs(t *testing.T) { err := fs.MkdirAll(filepath.Join(usrLibMountpoint, "nvidia"), 0o755) require.NoError(t, err) - for _, file := range fakeUsrLibFiles { + for _, file := range allUsrLibFiles { _, err = fs.Create(file) require.NoError(t, err) } + for _, mp := range mounter.MountPoints { + _, err = fs.Create(mp.Path) + require.NoError(t, err) + } devices, binds, err := xunix.GPUs(ctx, log, usrLibMountpoint) require.NoError(t, err) require.Len(t, devices, 2, "unexpected 2 nvidia devices") - require.Len(t, binds, 3, "expected 4 nvidia binds") + require.Len(t, binds, 4, "expected 4 nvidia binds") require.Contains(t, binds, mount.MountPoint{ Device: "/dev/sda1", Path: "/usr/local/nvidia", @@ -110,5 +123,111 @@ func TestGPUs(t *testing.T) { Opts: []string{"ro"}, }) } + for _, file := range fakeUsrLibFiles { + require.NotContains(t, binds, mount.MountPoint{ + Path: file, + Opts: []string{"ro"}, + }) + } }) } + +func Test_SameDirSymlinks(t *testing.T) { + t.Parallel() + + var ( + ctx = context.Background() + // We need to test with a real filesystem as the fake one doesn't + // support creating symlinks. + tmpDir = t.TempDir() + // We do test with the interface though! + afs = xunix.GetFS(ctx) + ) + + // Create some files in the temporary directory. + _, err := os.Create(filepath.Join(tmpDir, "file1.real")) + require.NoError(t, err, "create file") + _, err = os.Create(filepath.Join(tmpDir, "file2.real")) + require.NoError(t, err, "create file2") + _, err = os.Create(filepath.Join(tmpDir, "file3.real")) + require.NoError(t, err, "create file3") + _, err = os.Create(filepath.Join(tmpDir, "file4.real")) + require.NoError(t, err, "create file4") + + // Create a symlink to the file in the temporary directory. + // This needs to be done by the real os package. + err = os.Symlink(filepath.Join(tmpDir, "file1.real"), filepath.Join(tmpDir, "file1.link1")) + require.NoError(t, err, "create first symlink") + + // Create another symlink to the previous symlink. + err = os.Symlink(filepath.Join(tmpDir, "file1.link1"), filepath.Join(tmpDir, "file1.link2")) + require.NoError(t, err, "create second symlink") + + // Create a symlink to a file outside of the temporary directory. + err = os.MkdirAll(filepath.Join(tmpDir, "dir"), 0o755) + require.NoError(t, err, "create dir") + // Create a symlink from file2 to inside the dir. + err = os.Symlink(filepath.Join(tmpDir, "file2.real"), filepath.Join(tmpDir, "dir", "file2.link1")) + require.NoError(t, err, "create dir symlink") + + // Create a symlink with a relative path. To do this, we need to + // change the working directory to the temporary directory. + oldWorkingDir, err := os.Getwd() + require.NoError(t, err, "get working dir") + // Change the working directory to the temporary directory. + require.NoError(t, os.Chdir(tmpDir), "change working dir") + err = os.Symlink(filepath.Join(tmpDir, "file4.real"), "file4.link1") + require.NoError(t, err, "create relative symlink") + // Change the working directory back to the original. + require.NoError(t, os.Chdir(oldWorkingDir), "change working dir back") + + for _, tt := range []struct { + name string + expected []string + }{ + { + // Two symlinks to the same file. + name: "file1.real", + expected: []string{ + filepath.Join(tmpDir, "file1.link1"), + filepath.Join(tmpDir, "file1.link2"), + }, + }, + { + // Mid-way in the symlink chain. + name: "file1.link1", + expected: []string{ + filepath.Join(tmpDir, "file1.link2"), + }, + }, + { + // End of the symlink chain. + name: "file1.link2", + expected: []string{}, + }, + { + // Symlink to a file outside of the temporary directory. + name: "file2.real", + expected: []string{}, + }, + { + // No symlinks to this file. + name: "file3.real", + expected: []string{}, + }, + { + // One relative symlink. + name: "file4.real", + expected: []string{filepath.Join(tmpDir, "file4.link1")}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + fullPath := filepath.Join(tmpDir, tt.name) + actual, err := xunix.SameDirSymlinks(afs, fullPath) + require.NoError(t, err, "find symlink") + sort.Strings(actual) + assert.Equal(t, tt.expected, actual, "find symlinks %q", tt.name) + }) + } +}