mlr-org · sebffischer · Sep 5, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -9,13 +9,23 @@ mlr3torch*.tgz
 *~
 docs
 inst/doc
-*.html
 **/.DS_Store
 /doc/
 /Meta/
 CRAN-SUBMISSION
 paper/data
 .idea/
 .vsc/
-paper/data
+paper/data/
+paper/benchmark/registry
 .vscode/
+paper/benchmark/registry-linux-cpu/
+paper/benchmark/registry-macos/
+paper/benchmark/registry-linux-gpu/
+paper/benchmark/registry-linux-gpu/**
+paper/benchmark/registry-linux-gpu-optimizer/
+paper/benchmark/registry-linux-gpu-old/
+paper/paper.aux
+paper/paper.fdb_latexmk
+paper/paper.fls
+paper/paper.log
diff --git a/paper/.Rprofile b/paper/.Rprofile
@@ -0,0 +1,18 @@
+# Setting HTTP User Agent to identify OS, such that P3M can detect compatibility
+options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version["platform"], R.version["arch"], R.version["os"])))
+
+# Ensure curl is used for downloading packages
+options(download.file.method = "curl")
+
+# Enable verbose output for curl and again set HHTP user agent
+options(download.file.extra = paste(
+  # Follow redirects, show errors, and display the HTTP status and URL
+  '-fsSL -w "%{stderr}curl: HTTP %{http_code} %{url_effective}\n"',
+  # Configure the R user agent header to install Linux binary packages
+  sprintf('--header "User-Agent: R (%s)"', paste(getRversion(), R.version["platform"], R.version["arch"], R.version["os"]))
+))
+
+# for ubuntu:
+options(
+  repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/jammy/latest")
+)
diff --git a/paper/README.md b/paper/README.md
@@ -0,0 +1,145 @@
+# Reproducing the Results
+
+## Computational Environment
+
+For reproducibility, two linux docker images are provided for CPU and CUDA GPU: https://doi.org/10.5281/zenodo.17152009.
+
+You can, e.g., download the images via the [zenodo-client](https://pypi.org/project/zenodo-client/) library:
+
+```bash
+# pip install zenodo-client
+export ZENODO_API_TOKEN=<your-token>
+# for CPU:
+zenodo-client download 17140855 IMAGE_CPU.tar.gz
+```
+
+By default, the downloaded files are stored in `~/.data/zenodo`.
+
+At the time of writing, the images are also hosted on dockerhub, but this is not a permanent storage:
+https://hub.docker.com/repository/docker/sebffischer/mlr3torch-jss/general
+
+The `Dockerfile`s used to create the images are available in the `paper/envs` directory.
+
+After downloading the images, you can load them into Docker, e.g. via:
+
+```bash
+docker load -i IMAGE_CPU.tar.gz
+```
+
+When using another container manager such as `enroot`, a workaround is to import the image using `Docker` on a system that has it installed and then push it to a dockerhub repository and then pull it from there using `enroot`, via:
+
+```bash
+enroot import docker://sebffischer/mlr3torch-jss:cpu
+enroot create --name mlr3torch-jss:cpu sebffischer+mlr3torch-jss+cpu.sqsh
+```
+
+To start the container using `Docker`, run:
+
+```bash
+docker run -it --rm -v <parent-dir-to-mlr3torch>:/mnt/data/mlr3torch sebffischer/mlr3torch-jss:cpu
+```
+
+To start the container using `enroot`, run:
+
+```bash
+enroot start \
+  --mount <parent-dir-to-mlr3torch>:/mnt/data \
+  mlr3torch-jss:cpu bash
+```
+
+Some code expects this directory structure, so make sure to mount the directory like above.
+
+## Running the Benchmark
+
+Note that while the benchmark uses `batchtools` for experiment definition, we don't use it for job submission in order to ensure that all GPU and CPU benchmarks respectively are run on the same machine.
+
+### Running locally
+
+Note that it's important to have enough RAM, otherwise the benchmarks will be incomparable.
+
+To run the benchmarks locally, go into `./paper`:
+
+To run the GPU benchmarks (using the CUDA docker image) on linux, run:
+
+```bash
+Rscript benchmark/linux-gpu.R
+```
+
+To run the CPU benchmarks (using the CPU docker image) on linux, run:
+
+```bash
+Rscript benchmark/linux-cpu.R
+```
+
+To run the benchmark that compares "ignite" with standard optimizers (using the CUDA docker image) on linux, run:
+
+```bash
+Rscript benchmark/linux-gpu-optimizer.R
+```
+
+The results are stored in:
+
+* `paper/benchmark/result-linux-gpu.rds`
+* `paper/benchmark/result-linux-cpu.rds`
+* `paper/benchmark/result-linux-gpu-optimizer.rds`
+
+There are also some exemplary slurm scripts that need to be adapted to the specific cluster and job submission system.
+
+* `paper/benchmark/benchmark_gpu.sh`
+* `paper/benchmark/benchmark_gpu_optimizer.sh`
+
+### Running a subset of the Jobs
+
+To run a subset of the jobs, you can adjust the runner scripts to do something along the lines of:
+
+```r
+reg = loadRegistry("~/mlr3torch/paper/benchmark/registry", writeable = TRUE)
+tbl = unwrap(getJobTable(reg))
+ids = tbl[device == "cpu" & n_layers == 10 & latent == 250 & jit & optimizer == "adamw" & repl == 1, ]$job.id
+for (id in sample(ids)) {
+  submitJobs(id)
+  Sys.sleep(0.1)
+}
+```
+
+### Generating the Plots
+
+For the main benchmark shown in the paper, run:
+
+```r
+Rscript paper/benchmark/plot_benchmark.R
+```
+
+For the comparison of "ignite" with standard optimizers, run:
+
+```r
+Rscript paper/benchmark/plot_optimizer.R
+```
+
+These commands generate the files:
+
+* `paper/benchmark/plot_benchmark.png`
+* `paper/benchmark/plot_benchmark_relative.png`
+* `paper/benchmark/plot_optimizer.png`
+
+## Running the Paper Code
+
+In the docker container, run the following code from the `./paper` directory.
+This requires access to an NVIDIA GPU and usage of the CUDA docker image.
+
+```r
+knitr::knit('paper_code.Rmd')
+```
+
+The result of the above is `paper_code.md`.
+The ROC plot is postprocessed using the `roc.R` script.
+
+In order to demonstrate reprodicbility of the code on CPU (see paper Appendix A), we include a considerably simplified version of the paper code, where the tasks are subset to only contain a few rows and some other hyperparameters are adjusted.
+This means the results are not meaningful, but it allows to run the code easily on a CPU in a short amount of time.
+Use the linux docker image for CPU for this.
+
+```r
+knitr::knit('paper_code_cheap_cpu.Rmd')
+```
+
+The results of running this on the CPU container are included in `paper_code_cheap_cpu.md`
diff --git a/paper/batchtools.conf.R b/paper/batchtools.conf.R
@@ -0,0 +1 @@
+cluster.functions = batchtools::makeClusterFunctionsInteractive()
diff --git a/paper/benchmark/benchmark.R b/paper/benchmark/benchmark.R
@@ -0,0 +1,95 @@
+library(batchtools)
+library(mlr3misc)
+
+setup = function(reg_path, python_path, work_dir) {
+  reg = makeExperimentRegistry(
+    file.dir = reg_path,
+    work.dir = work_dir,
+    packages = "checkmate"
+  )
+  reg$cluster.functions = makeClusterFunctionsInteractive()
+
+  source(here::here("paper/benchmark/time_rtorch.R"))
+
+  batchExport(list(
+    time_rtorch = time_rtorch # nolint
+  ))
+
+  addProblem(
+    "runtime_train",
+    data = NULL,
+    fun = function(
+      epochs,
+      batch_size,
+      n_layers,
+      latent,
+      n,
+      p,
+      optimizer,
+      device,
+      ...
+    ) {
+      problem = list(
+        epochs = assert_int(epochs),
+        batch_size = assert_int(batch_size),
+        n_layers = assert_int(n_layers),
+        latent = assert_int(latent),
+        n = assert_int(n),
+        p = assert_int(p),
+        optimizer = assert_choice(
+          optimizer,
+          c("ignite_adamw", "adamw", "sgd", "ignite_sgd")
+        ),
+        device = assert_choice(device, c("cuda", "cpu", "mps"))
+      )
+
+      problem
+    }
+  )
+
+  addAlgorithm("pytorch", fun = function(instance, job, data, jit, ...) {
+    f = function(..., python_path) {
+      library(reticulate)
+      x = try(
+        {
+          #reticulate::use_python("/opt/homebrew/Caskroom/mambaforge/base/bin/python3", required = TRUE)
+          reticulate::use_python(python_path, required = TRUE)
+          reticulate::source_python(here::here("paper/benchmark/time_pytorch.py"))
+          print(reticulate::py_config())
+          time_pytorch(...) # nolint
+        },
+        silent = TRUE
+      )
+      print(x)
+    }
+    args = c(instance, list(seed = job$seed, jit = jit, python_path = python_path))
+    #do.call(f, args)
+    callr::r(f, args = args)
+  })
+
+  addAlgorithm("rtorch", fun = function(instance, job, opt_type, jit, ...) {
+    assert_choice(opt_type, c("standard", "ignite"))
+    if (opt_type == "ignite") {
+      instance$optimizer = paste0("ignite_", instance$optimizer)
+    }
+    #do.call(time_rtorch, args = c(instance, list(seed = job$seed, jit = jit))) # nolint
+    callr::r(time_rtorch, args = c(instance, list(seed = job$seed, jit = jit))) # nolint
+  })
+
+  addAlgorithm("mlr3torch", fun = function(instance, job, opt_type, jit, ...) {
+    if (opt_type == "ignite") {
+      instance$optimizer = paste0("ignite_", instance$optimizer)
+    }
+    callr::r(
+      time_rtorch, # nolint
+      args = c(instance, list(seed = job$seed, mlr3torch = TRUE, jit = jit))
+    )
+    #do.call(time_rtorch, args = c(instance, list(seed = job$seed, mlr3torch = TRUE, jit = jit)))
+  })
+}
+
+# global config:
+REPLS = 10L
+EPOCHS = 20L
+N = 2000L
+P = 1000L
diff --git a/paper/benchmark/benchmark_gpu.sh b/paper/benchmark/benchmark_gpu.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH --job-name=mlr3torch-benchmark
+#SBATCH --partition=mcml-hgx-a100-80x4
+#SBATCH --gres=gpu:4
+#SBATCH --qos=mcml
+#SBATCH --ntasks=1
+#SBATCH --time=48:00:00
+#SBATCH --exclusive
+#SBATCH --output=mlr3torch-benchmark-%j.out
+
+cd /dss/dssmcmlfs01/pr74ze/pr74ze-dss-0001/ru48nas2/
+enroot create --force --name mlr3torch-jss sebffischer+mlr3torch-jss+gpu.sqsh
+
+enroot start \
+  --mount /dss/dssmcmlfs01/pr74ze/pr74ze-dss-0001/ru48nas2/:/mnt/data \
+  mlr3torch-jss bash -c "
+  cd /mnt/data/mlr3torch/paper
+  Rscript -e \"
+    source('benchmark/linux-gpu.R')
+  \"
+"
diff --git a/paper/benchmark/benchmark_gpu_optimizer.sh b/paper/benchmark/benchmark_gpu_optimizer.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH --job-name=mlr3torch-benchmark
+#SBATCH --partition=mcml-hgx-a100-80x4
+#SBATCH --gres=gpu:4
+#SBATCH --qos=mcml
+#SBATCH --ntasks=1
+#SBATCH --time=48:00:00
+#SBATCH --exclusive
+#SBATCH --output=mlr3torch-benchmark-%j.out
+
+cd /dss/dssmcmlfs01/pr74ze/pr74ze-dss-0001/ru48nas2/
+enroot create --force --name mlr3torch-jss sebffischer+mlr3torch-jss+gpu.sqsh
+
+enroot start \
+  --mount /dss/dssmcmlfs01/pr74ze/pr74ze-dss-0001/ru48nas2/:/mnt/data \
+  mlr3torch-jss bash -c "
+  cd /mnt/data/mlr3torch/paper
+  Rscript -e \"
+    source('benchmark/linux-gpu-optimizer.R')
+  \"
+"
diff --git a/paper/benchmark/linux-cpu.R b/paper/benchmark/linux-cpu.R
@@ -0,0 +1,56 @@
+library(here)
+
+setwd(here("paper"))
+source(here("paper", "benchmark", "benchmark.R"))
+
+setup(
+  here("paper", "benchmark", "registry-linux-cpu"),
+  # This path is relative to the docker container, so no need to change it
+ "/opt/venv/bin/python3",
+ here("paper")
+)
+
+problem_design = expand.grid(
+  list(
+    n = N,
+    p = P,
+    epochs = EPOCHS,
+    optimizer = c("sgd", "adamw"),
+    batch_size = 32L,
+    device = "cpu",
+    n_layers = c(0L, 4L, 8L, 12L, 16L),
+    latent = c(100L, 200L, 400L)
+  ),
+  stringsAsFactors = FALSE
+)
+
+addExperiments(
+  prob.designs = list(
+    runtime_train = problem_design
+  ),
+  algo.designs = list(
+    rtorch = data.frame(
+      jit = FALSE,
+      opt_type = "ignite"
+    ),
+    mlr3torch = data.frame(
+      jit = FALSE,
+      opt_type = "ignite"
+    ),
+    pytorch = data.frame(
+      jit = FALSE
+    )
+  ),
+  repls = REPLS
+)
+
+tbl = unwrap(getJobTable())
+
+for (id in sample(tbl$job.id)) {
+  submitJobs(id)
+  Sys.sleep(0.1)
+}
+
+source(here("paper", "benchmark", "summarize.R"))
+result = summarize(tbl$job.id)
+saveRDS(result, here("paper", "benchmark", "result-linux-cpu.rds"))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		cluster.functions = batchtools::makeClusterFunctionsInteractive()