@@ -13,13 +13,46 @@ setup_linux_system_environment: &setup_linux_system_environment
1313 name : Set Up System Environment
1414 no_output_timeout : " 1h"
1515 command : |
16- set -e
17- # Set up CircleCI GPG keys for apt, if needed
16+ set -ex
17+
18+ # Set up CircleCI GPG keys for apt, if needed
1819 curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
1920
21+ # Stop background apt updates. Hypothetically, the kill should not
22+ # be necessary, because stop is supposed to send a kill signal to
23+ # the process, but we've added it for good luck. Also
24+ # hypothetically, it's supposed to be unnecessary to wait for
25+ # the process to block. We also have that line for good luck.
26+ # If you like, try deleting them and seeing if it works.
27+ sudo systemctl stop apt-daily.service || true
28+ sudo systemctl kill --kill-who=all apt-daily.service || true
29+
30+ sudo systemctl stop unattended-upgrades.service || true
31+ sudo systemctl kill --kill-who=all unattended-upgrades.service || true
32+
33+ # wait until `apt-get update` has been killed
34+ while systemctl is-active --quiet apt-daily.service
35+ do
36+ sleep 1;
37+ done
38+ while systemctl is-active --quiet unattended-upgrades.service
39+ do
40+ sleep 1;
41+ done
42+
43+ # See if we actually were successful
44+ systemctl list-units --all | cat
45+
46+ sudo apt-get purge -y unattended-upgrades
47+
48+ cat /etc/apt/sources.list
49+
50+ ps auxfww | grep [a]pt
51+ ps auxfww | grep dpkg
52+
2053pytorch_tutorial_build_defaults : &pytorch_tutorial_build_defaults
2154 machine :
22- image : default
55+ image : ubuntu-1604:201903-01
2356 steps :
2457 - checkout
2558 - run :
@@ -30,28 +63,43 @@ pytorch_tutorial_build_defaults: &pytorch_tutorial_build_defaults
3063 command : |
3164 set -e
3265
66+ # Set up NVIDIA docker repo
3367 curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
34- echo "deb https://nvidia.github.io/libnvidia-container/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
35- echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
36- echo "deb https://nvidia.github.io/nvidia-docker/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
37-
38- sudo apt-get -qq update
39- sudo apt-get -qq remove linux-image-generic linux-headers-generic linux-generic
40- sudo apt-get -qq install \
68+ echo "deb https://nvidia.github.io/libnvidia-container/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
69+ echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
70+ echo "deb https://nvidia.github.io/nvidia-docker/ubuntu16.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
71+
72+ sudo apt-get -y update
73+ sudo apt-get -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
74+ # WARNING: Docker version is hardcoded here; you must update the
75+ # version number below for docker-ce and nvidia-docker2 to get newer
76+ # versions of Docker. We hardcode these numbers because we kept
77+ # getting broken CI when Docker would update their docker version,
78+ # and nvidia-docker2 would be out of date for a day until they
79+ # released a newer version of their package.
80+ #
81+ # How to figure out what the correct versions of these packages are?
82+ # My preferred method is to start a Docker instance of the correct
83+ # Ubuntu version (e.g., docker run -it ubuntu:16.04) and then ask
84+ # apt what the packages you need are. Note that the CircleCI image
85+ # comes with Docker.
86+ sudo apt-get -y install \
4187 linux-headers-$(uname -r) \
4288 linux-image-generic \
4389 moreutils \
44- nvidia-docker2 \
45- expect-dev \
46- docker-ce=18.06.2~ce~3-0~ubuntu
90+ docker-ce=5:18.09.4~3-0~ubuntu-xenial \
91+ nvidia-container-runtime=2.0.0+docker18.09.4-1 \
92+ nvidia-docker2=2.0.3+docker18.09.4-1 \
93+ expect-dev
4794
4895 sudo pkill -SIGHUP dockerd
4996
5097 sudo pip -q install awscli==1.16.35
5198
5299 if [ -n "${CUDA_VERSION}" ]; then
53- wget 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-396.26.run'
54- sudo /bin/bash ./NVIDIA-Linux-x86_64-396.26.run -s --no-drm
100+ DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run"
101+ wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
102+ sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
55103 nvidia-smi
56104 fi
57105
0 commit comments