diff --git a/backup.config-example b/backup.config-example index 9e1e4f157..a7a51bb32 100644 --- a/backup.config-example +++ b/backup.config-example @@ -64,6 +64,13 @@ GHE_NUM_SNAPSHOTS=10 # WARNING: this feature is in beta. #GHE_PARALLEL_MAX_JOBS=2 +# Sets the maximum number of rsync jobs to run in parallel. Defaults to the +# configured GHE_PARALLEL_MAX_JOBS, or the number of available processing +# units on the machine. +# +# WARNING: this feature is in beta. +# GHE_PARALLEL_RSYNC_MAX_JOBS=3 + # When jobs are running in parallel wait as needed to avoid starting new jobs # when the system's load average is not below the specified percentage. Defaults to # unrestricted. diff --git a/share/github-backup-utils/ghe-backup-config b/share/github-backup-utils/ghe-backup-config index 13247f43c..65ad13ada 100755 --- a/share/github-backup-utils/ghe-backup-config +++ b/share/github-backup-utils/ghe-backup-config @@ -85,10 +85,18 @@ ghe_parallel_check() { if [ -n "$GHE_PARALLEL_MAX_JOBS" ]; then GHE_PARALLEL_COMMAND_OPTIONS="-j $GHE_PARALLEL_MAX_JOBS" + # Default to the number of max rsync jobs to the same as GHE_PARALLEL_MAX_JOBS, if not set. + # This is only applicable to ghe-restore-repositories currently. + : ${GHE_PARALLEL_RSYNC_MAX_JOBS:="$GHE_PARALLEL_MAX_JOBS"} + fi + + if [ -n "$GHE_PARALLEL_RSYNC_MAX_JOBS" ]; then + GHE_PARALLEL_RSYNC_COMMAND_OPTIONS="-j $GHE_PARALLEL_RSYNC_MAX_JOBS" fi if [ -n "$GHE_PARALLEL_MAX_LOAD" ]; then GHE_PARALLEL_COMMAND_OPTIONS+=" -l $GHE_PARALLEL_MAX_LOAD" + GHE_PARALLEL_RSYNC_COMMAND_OPTIONS+=" -l $GHE_PARALLEL_MAX_LOAD" fi } diff --git a/share/github-backup-utils/ghe-restore-repositories b/share/github-backup-utils/ghe-restore-repositories index c87c8b421..201f20092 100755 --- a/share/github-backup-utils/ghe-restore-repositories +++ b/share/github-backup-utils/ghe-restore-repositories @@ -10,6 +10,9 @@ set -e # shellcheck source=share/github-backup-utils/ghe-backup-config . "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config" +# Check to make sure moreutils parallel is installed and working properly +ghe_parallel_check + # Show usage and bail with no arguments [ -z "$*" ] && print_usage @@ -140,20 +143,36 @@ fi # rsync all the repository networks to the git server where they belong. # One rsync invocation per server available. bm_start "$(basename $0) - Restoring repository networks" +rsync_commands=() for file_list in $tempdir/*.rsync; do if $CLUSTER; then server=$(basename $file_list .rsync) else server=$host fi - ghe_verbose "* Transferring repository networks to $server ..." - ghe-rsync -avrR --delete \ - -e "ssh -q $opts -p $port $ssh_config_file_opt -l $user" \ - --rsync-path="sudo -u git rsync" \ - --files-from=$file_list \ - "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/./" \ - "$server:$GHE_REMOTE_DATA_USER_DIR/repositories/" 1>&3 + + rsync_commands+=(" + if [ -n \"$GHE_VERBOSE\" ]; then + echo \"* Transferring repository networks to $server ($file_list) ...\" 1>&3 + fi + + ghe-rsync -avrR --delete \ + -e \"ssh -q $opts -p $port $ssh_config_file_opt -l $user\" \ + --rsync-path=\"sudo -u git rsync\" \ + --files-from=$file_list \ + \"$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/./\" \ + \"$server:$GHE_REMOTE_DATA_USER_DIR/repositories/\" 1>&3 + ") done + +if [ "$GHE_PARALLEL_ENABLED" = "yes" ]; then + $GHE_PARALLEL_COMMAND $GHE_PARALLEL_RSYNC_COMMAND_OPTIONS -- "${rsync_commands[@]}" +else + for c in "${rsync_commands[@]}"; do + eval "$c" + done +fi + bm_end "$(basename $0) - Restoring repository networks" # Tell dgit about the repositories restored diff --git a/test/test-ghe-restore-parallel.sh b/test/test-ghe-restore-parallel.sh index 835cbdd29..35d85d2d0 100755 --- a/test/test-ghe-restore-parallel.sh +++ b/test/test-ghe-restore-parallel.sh @@ -4,6 +4,12 @@ set -e export GHE_PARALLEL_ENABLED=yes +# use temp dir to fix rsync file issues in parallel execution: +# we are imitating remote server by local files, and running rsync in parallel may cause +# race conditions when two processes writing to same folder +parallel_rsync_tempdir=$(mktemp -d -t backup-utils-restore-temp-XXXXXX) +export GHE_EXTRA_RSYNC_OPTS="--copy-dirlinks --temp-dir=$parallel_rsync_tempdir" + TESTS_DIR="$PWD/$(dirname "$0")" # shellcheck source=test/test-ghe-restore.sh . "$TESTS_DIR/test-ghe-restore.sh"