diff --git a/bin/ghe-restore b/bin/ghe-restore index b34ba977c..5dcac04d7 100755 --- a/bin/ghe-restore +++ b/bin/ghe-restore @@ -239,8 +239,14 @@ ghe-ssh "$GHE_HOSTNAME" -- 'ghe-import-authorized-keys' < "$GHE_RESTORE_SNAPSHOT if $cluster; then echo "Restoring storage data ..." - ghe-restore-alambic-cluster "$GHE_HOSTNAME" 1>&3 - + if ghe-ssh "$GHE_HOSTNAME" test -f /data/github/current/script/storage-cluster-restore-routes; then + ghe_verbose "* Using ghe-restore-alambic-cluster-ng to restore" + # This restore script is faster but only available in + # GHE >= 2.6.3 + ghe-restore-alambic-cluster-ng "$GHE_HOSTNAME" 1>&3 + else + ghe-restore-alambic-cluster "$GHE_HOSTNAME" 1>&3 + fi echo "Restoring custom Git hooks ..." ghe-restore-git-hooks-cluster "$GHE_HOSTNAME" 1>&3 elif [ "$GHE_VERSION_MAJOR" -ge 2 ]; then diff --git a/share/github-backup-utils/ghe-restore-alambic-cluster-ng b/share/github-backup-utils/ghe-restore-alambic-cluster-ng new file mode 100644 index 000000000..d640b07f4 --- /dev/null +++ b/share/github-backup-utils/ghe-restore-alambic-cluster-ng @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +#/ Usage: ghe-restore-alambic-cluster-ng +#/ +#/ Restore storage objects from an rsync snapshot to a GitHub Cluster. +#/ +#/ Note: This script typically isn't called directly. It's invoked by the +#/ ghe-restore command when restoring into a cluster. +set -e + +# Bring in the backup configuration +cd $(dirname "$0")/../.. +. share/github-backup-utils/ghe-backup-config + +# Show usage and bail with no arguments +[ -z "$*" ] && print_usage + +# Grab host arg +GHE_HOSTNAME="$1" + +# The snapshot to restore should be set by the ghe-restore command but this lets +# us run this script directly. +: ${GHE_RESTORE_SNAPSHOT:=current} + +# Find the objets to restore +storage_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find storage -mindepth 4 -maxdepth 4 -type f -printf '%p %s\n' | cut -d / -f2-) + +# No need to restore anything, early exit +if [ -z "$storage_paths" ]; then + echo "Warning: Storage backup missing. Skipping ..." + exit 0 +fi + +# Perform a host-check and establish GHE_REMOTE_XXX variables. +ghe_remote_version_required "$GHE_HOSTNAME" + +# Split host:port into parts +port=$(ssh_port_part "$GHE_HOSTNAME") +host=$(ssh_host_part "$GHE_HOSTNAME") + +# Add user / -l option +user="${host%@*}" +[ "$user" = "$host" ] && user="admin" + +# Generate SSH config for forwarding +config="" +hostnames=$(ghe-ssh "$GHE_HOSTNAME" ghe-config --get-regexp cluster.*.hostname | cut -d ' ' -f 2) +for hostname in $hostnames; do + config="$config +Host $hostname + ServerAliveInterval 60 + ProxyCommand ssh -q $GHE_EXTRA_SSH_OPTS -p $port $user@$host nc.openbsd %h %p" +done + +config_file=$(mktemp -t cluster-backup-restore-XXXXXX) +echo "$config" > "$config_file" + +# Stores a list of "oid size" tuples. +tmp_list=$(mktemp -t cluster-backup-restore-XXXXXX) +to_restore=$(mktemp -t cluster-backup-restore-XXXXXX) + +opts="$GHE_EXTRA_SSH_OPTS -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no" +tempdir=$(mktemp -d) + +cleanup() { + rm -rf $tempdir $config_file $tmp_list $to_restore + true +} + +trap 'cleanup' EXIT + +# Find the routes (servers) for each storage object available locally +# Sends a list of " " tuples with the following format: +# +# # OID bytes +# b8a48b6b122b4ef8175348d1d6fbd846d3b3ccc8fd7552b79f91125c4958e43b 5592001 +# b851fd1f147c644a9de778f19090ea785b415c69e2a2fba35a65144fa2753ab9 7340032 +# b65f657194ca6202c17b5062e4afc11843fc892a3f2febef8ac10971db7689a8 5591634 +# b63c30f6f885e59282c2aa22cfca846516b5e72621c10a58140fb04d133e2c17 5592492 +# ... +OLDIFS=$IFS; IFS=$'\n' +for path in $storage_paths; do + oid=$(echo $path | cut -d ' ' -f 1 | awk -F/ '{print $(NF)}') + size=$(echo $path | cut -d ' ' -f 2) + echo $oid $size +done > $tmp_list +IFS=$OLDIFS + +ghe_verbose "* Sending the object list to the server..." + +# The server receives the list of objects and returns the list servers where the objects will be sent. +# The format of the list returned by the server: +# +# # OID SERVER1 SERVER2 SERVER2 +# b8a48b6b122b4ef8175348d1d6fbd846d3b3ccc8fd7552b79f91125c4958e43b server1 server2 server3 +# bc4cdd292e6b5387df2a42a907fcd5f3b6804a5d5ab427184faea5ef118d635e server1 server2 server3 +cat $tmp_list | ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-restore-routes \ + | while read obj; do + ghe_verbose "Received route: $obj" + oid=$(echo $obj | cut -d ' ' -f1) + oid_c1=$(echo $oid | cut -c1) + oid_c2=$(echo $oid | cut -c1-2) + oid_c3=$(echo $oid | cut -c3-4) + for server in $(echo $obj | cut -d ' ' -f2-); do + ghe_verbose "Adding $oid_c1/$oid_c2/$oid_c3/$oid to $tempdir/$server.rsync" + echo "$oid_c1/$oid_c2/$oid_c3/$oid" >> $tempdir/$server.rsync + done + echo "$obj" >> $to_restore +done + +# rsync all the objects to the storage server where they belong. +# One rsync invocation per server available. +for route in $tempdir/*.rsync; do + ghe_verbose "* rsync data to $(basename $route .rsync) ..." + ghe-rsync -arHR --delete \ + -e "ssh -q $opts -p $port -F $config_file -l $user" \ + --rsync-path="sudo -u git rsync" \ + --files-from=$route \ + "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/storage/./" \ + "$(basename $route .rsync):$GHE_REMOTE_DATA_USER_DIR/storage/" +done + +ghe_verbose "* Update storage database after the restore ..." +cat $to_restore | ghe-ssh "$GHE_HOSTNAME" github-env ./bin/storage-cluster-restore-finalize