#!/usr/bin/env bash

AVP_TYPE="aws"  # I haven't added support for IBM
DELIMIITER="/"

function logts() {
  echo "[$(date '+%Y-%m-%d %H:%M:%S.%3N')]"
}

function sm_login() {
  if [[ "$AVP_TYPE" == "aws" ]]; then
    echo "Logging into AWS SecretsManager ..."
    aws configure set aws_access_key_id $SM_AWS_ACCESS_KEY_ID
    aws configure set aws_secret_access_key $SM_AWS_SECRET_ACCESS_KEY
    if [[ -n "${SM_AWS_SESSION_TOKEN}" ]]; then
      aws configure set aws_session_token "${SM_AWS_SESSION_TOKEN}"
    fi
    aws configure set default.region $SM_AWS_REGION
    export AWS_REGION=$SM_AWS_REGION
    aws configure list
  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    echo "IBM SecretsManager not yet supported"
    exit 1
  fi
}

function sm_list_cluster_secrets() {
  ACCOUNT=$1
  CLUSTER_ID=$2

  if [[ "$AVP_TYPE" == "aws" ]]; then
    set +o pipefail
    LIST_SECRETS_OUTPUT="$(aws secretsmanager list-secrets --output yaml --no-cli-pager --filters Key=name,Values=${ACCOUNT}${DELIMIITER}${CLUSTER_ID}${DELIMIITER})"
    return_code=$?
    if [ $return_code -ne 0 ]; then
      echo "aws: secretmanager list-secrets failed with error: $return_code"
      exit $return_code
    fi
    set -o pipefail

    echo "$LIST_SECRETS_OUTPUT" | yq -r '.SecretList[].Name'
  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    echo "IBM SecretsManager not yet supported"
    exit 1
  fi
}

function sm_update_secret(){
  SECRET_NAME=$1
  SECRET_VALUE=$2
  SECRET_TAGS=$3
  RETRY="${4:-0}"

  echo "Secret Manager: Updating $SECRET_NAME with tags $SECRET_TAGS"
  if [[ "$AVP_TYPE" == "aws" ]]; then
    # There's a different command to run depending on whether we are creating or updating a secret
    # It's annoying there isn't an upsert/idemopotent command as we don't really care either way, but
    # we need to looking whether the secret exists, so we may as well prevent an update if the secret
    # exists and is already set to what we want to use.

    # Get the current value
    set +o pipefail
    CURRENT_SECRET_VALUE=$(aws secretsmanager get-secret-value --secret-id ${SECRET_NAME} --output json 2> /dev/null | jq -r .SecretString)

    if [[ "$CURRENT_SECRET_VALUE" == "" ]]; then
      # Create the secret
      echo "- Secret $SECRET_NAME creating"
      aws secretsmanager create-secret --name ${SECRET_NAME} --secret-string "${SECRET_VALUE}" --tags "${SECRET_TAGS}"
      rc=$?
      if [[ $rc -eq 0 ]]; then
        echo "- Secret $SECRET_NAME created"
      elif [[ $RETRY -eq '0' ]]; then
        echo "Retrying secret update"
        sleep 120
        sm_update_secret ${SECRET_NAME} "${SECRET_VALUE}" "${SECRET_TAGS}" "1"
      else
        exit 1
      fi
    elif [[ "$SECRET_VALUE" != "$CURRENT_SECRET_VALUE" ]]; then
      # Update the secret
      echo "- Secret $SECRET_NAME updating"
      aws secretsmanager update-secret --secret-id ${SECRET_NAME} --secret-string "${SECRET_VALUE}" || exit 1
      aws secretsmanager tag-resource --secret-id ${SECRET_NAME} --tags "${SECRET_TAGS}"
      echo "- Secret $SECRET_NAME updated"
    else
      # No change
      echo "- Secret $SECRET_NAME unchanged"
    fi
    set -o pipefail
  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    echo "IBM SecretsManager not yet supported"
    exit 1
  fi
}

function sm_delete_secret(){
  SECRET_NAME=$1
  if [[ "$AVP_TYPE" == "aws" ]]; then

    # Delete the secret
    set +o pipefail
    aws secretsmanager delete-secret --force-delete-without-recovery --secret-id ${SECRET_NAME} --output json 2> /dev/null || exit 1
    set -o pipefail

  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    echo "IBM SecretsManager not yet supported"
    exit 1
  fi
}

function sm_get_secret(){
  SECRET_NAME=$1
  SECRET_ENV_VAR=$2
  if [[ "$AVP_TYPE" == "aws" ]]; then
    # Get the current value and set it to the provided env var
    # echo "- Getting Secret $SECRET_NAME to set in env var $SECRET_ENV_VAR"
    # export $SECRET_ENV_VAR=$(aws secretsmanager get-secret-value --secret-id ${SECRET_NAME} --output json 2> /dev/null | jq -r .SecretString)
    set +o pipefail
    aws secretsmanager get-secret-value --secret-id ${SECRET_NAME} --output json 2> /dev/null | jq -r .SecretString
    set -o pipefail

  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    # echo "IBM SecretsManager not yet supported"
    echo ""
    exit 1
  fi
}

function sm_get_secret_value(){
  SECRET_NAME=$1
  SECRET_KEY=$2
  if [[ "$AVP_TYPE" == "aws" ]]; then
    set +o pipefail
    aws secretsmanager get-secret-value --secret-id ${SECRET_NAME} --output json 2> /dev/null | jq -r .SecretString | jq -r .${SECRET_KEY}
    set -o pipefail

  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    # echo "IBM SecretsManager not yet supported"
    echo ""
    exit 1
  fi
}

function sm_get_secret_file(){
  SECRET_NAME=$1
  SECRET_FILE=$2
  if [[ "$AVP_TYPE" == "aws" ]]; then
    # Get the current value and send it to the file passed in
    echo "- Getting Secret $SECRET_NAME to set in file $SECRET_FILE"
    set +o pipefail
    aws secretsmanager get-secret-value --secret-id ${SECRET_NAME} --output json 2> /dev/null | jq -r .SecretString > $SECRET_FILE
    set -o pipefail

  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    echo "IBM SecretsManager not yet supported"
    exit 1
  fi
}

function sm_verify_secret_exists(){
  SECRET_NAME=$1
  LOOKUP_KEYS=$2
  ENFORCE_VALIDATION=${3:-true}
  echo "ENFORCE_VALIDATION:${ENFORCE_VALIDATION}"

  if [[ "$AVP_TYPE" == "aws" ]]; then
    echo "- Verifying Secret $SECRET_NAME exists"
    set +o pipefail
    aws secretsmanager describe-secret --secret-id ${SECRET_NAME} --output json
    if [ $? -ne 0 ]; then 
      echo "Error: Secret ${SECRET_NAME} does not exist" 
      exit 1 
    fi

    if [ -n "${LOOKUP_KEYS}" ]; then
      echo "Secret Keys to validate ${LOOKUP_KEYS}"
      for secret_key in $(echo $LOOKUP_KEYS | tr "," "\n"); do
        secret_value=$(aws secretsmanager get-secret-value --secret-id ${SECRET_NAME} --output json | jq -r .SecretString | jq -r .$secret_key)
        echo "String value for secret key $secret_key: ${secret_value:0:4}<snip>"
        if [[ -z "${secret_value}" || "$secret_value" == null ]]; then
          if [[ ${ENFORCE_VALIDATION} = false ]]; then
            echo "*****Warning: Secret key $secret_key does not exist or it is missing value in Secret ${SECRET_NAME}" 
          else
            echo "Error: Secret key $secret_key does not exist or it is missing value in Secret ${SECRET_NAME}" 
            exit 1
          fi
        fi
      done
    fi

    set -o pipefail
  elif [[ "$AVP_TYPE" == "ibm" ]]; then
    # echo "IBM SecretsManager not yet supported"
    echo ""
    exit 1
  fi
}

function sm_update_account_secret() {
  ACCOUNT=$1
  SECRET_NAME=$2
  SECRET_VALUE=$3
  SECRET_TAGS=$4

  sm_update_secret ${ACCOUNT}${DELIMIITER}${SECRET_NAME} "${SECRET_VALUE}" "${TAGS}"
}

function sm_update_cluster_secret() {
  ACCOUNT=$1
  CLUSTER_ID=$2
  SECRET_NAME=$3
  SECRET_VALUE=$4
  SECRET_TAGS=$5

  sm_update_secret ${ACCOUNT}${DELIMIITER}${CLUSTER_ID}${DELIMIITER}${SECRET_NAME} "${SECRET_VALUE}" "${TAGS}"
}

function sm_update_mas_secret() {
  ACCOUNT=$1
  CLUSTER_ID=$2
  MAS_INSTANCE_ID=$3
  SECRET_NAME=$4
  SECRET_VALUE=$5
  SECRET_TAGS=$6

  sm_update_secret ${ACCOUNT}${DELIMIITER}${CLUSTER_ID}${DELIMIITER}${MAS_INSTANCE_ID}${DELIMIITER}${SECRET_NAME} "${SECRET_VALUE}" "${TAGS}"
}

function sm_get_cluster_secret() {
  ACCOUNT=$1
  CLUSTER_ID=$2
  SECRET_NAME=$3
  SECRET_ENV_VAR=$4

  sm_get_secret ${ACCOUNT}${DELIMIITER}${CLUSTER_ID}${DELIMIITER}${SECRET_NAME} ${SECRET_ENV_VAR}
}


# Fetches ARN of secret named in first param (e.g. "development/mas-c6/ibm_entitlement") and sets it as the value of the variable named in the second param.
# This is often needed due to the random suffix e.g. "-fdP1Lx" that AWS seems to apply when secrets are created
# exit 1 if something goes wrong
# Example usage:
#   export SECRET_ARN_IBM_ENTITLEMENT
#   sm_get_secret_arn "${SECRET_NAME_IBM_ENTITLEMENT}" "SECRET_ARN_IBM_ENTITLEMENT"
#   echo "${SECRET_NAME_IBM_ENTITLEMENT} ARN: ${SECRET_ARN_IBM_ENTITLEMENT}"
function sm_get_secret_arn() {
  local _SECRET_NAME="$1"
  set +o pipefail
  local _SECRET_ARN=$(aws secretsmanager describe-secret --secret-id "${_SECRET_NAME}" --output json | jq -r .ARN)
  set -o pipefail

  # can't check RC of the call inside $(), so we'll have to make do with checking for empty output instead
   if [ -z "${_SECRET_ARN}" ]; then
    echo "Failed to get ARN for AWS SM secret named ${_SECRET_NAME}"
    exit 1
   fi

   eval "$2=${_SECRET_ARN}"

}

function clone_target_git_repo() {
  GITHUB_HOST=$1
  GITHUB_ORG=$2
  GITHUB_REPO=$3
  GIT_BRANCH=$4
  LOCAL_DIR=$5
  SSH_PATH=$6
  CURRENT_DIR=$PWD
  cd $LOCAL_DIR
  if [ "$SSH_PATH" == "false" ]; then
    echo ""
    echo "$(logts) git clone https://git:****@$GITHUB_HOST/$GITHUB_ORG/$GITHUB_REPO.git -b $GIT_BRANCH"
    echo "-------------------------------------------------"
    git clone https://git:$GITHUB_PAT@$GITHUB_HOST/$GITHUB_ORG/$GITHUB_REPO.git -b $GIT_BRANCH || exit 1
  else
    echo ""
    echo "$(logts) git -c \"core.sshCommand=ssh -i $SSH_PATH -F /dev/null\" clone git@$GITHUB_HOST:$GITHUB_ORG/$GITHUB_REPO.git -b $GIT_BRANCH"
    echo "-------------------------------------------------"
    git -c "core.sshCommand=ssh -i $SSH_PATH -F /dev/null" clone git@$GITHUB_HOST:$GITHUB_ORG/$GITHUB_REPO.git -b $GIT_BRANCH || exit 1
  fi
  cd $PWD
}

function save_to_target_git_repo() {
  GITHUB_HOST=$1
  GITHUB_ORG=$2
  GITHUB_REPO=$3
  GIT_BRANCH=$4
  LOCAL_DIR="$5"
  COMMIT_MSG="$6"

  CURRENT_DIR=$PWD

  echo "git: Changing to directory $LOCAL_DIR"
  cd $LOCAL_DIR || exit 1

  echo ""
  echo "$(logts) git add -v ."
  echo "-------------------------------------------------"
  FILES_ADDED_OUTPUT="$(git add -v .)"
  return_code=$?
  if [ $return_code -ne 0 ]; then
    echo "git: Add files failed with error: $return_code"
    exit $return_code
  fi
  export FILES_ADDED=$(echo "$FILES_ADDED_OUTPUT" | awk NF | wc -l | xargs)
  echo "git: Added ${FILES_ADDED} files"

  if [ "$FILES_ADDED" != "0" ]; then
    echo ""
    echo "$(logts) git commit -m \"$COMMIT_MSG\""
    echo "-------------------------------------------------"
    git commit -m "$COMMIT_MSG" || exit 1
    retries=5
    interval=30
    index=0
    while true; do
      echo ""
      echo "$(logts) git fetch origin $GIT_BRANCH"
      echo "-------------------------------------------------"
      git fetch origin $GIT_BRANCH || exit 1

      echo ""
      echo "$(logts) git pull origin --rebase"
      echo "-------------------------------------------------"
      git pull origin --rebase || exit 1

      echo ""
      echo "$(logts) git pull origin $GIT_BRANCH --rebase"
      echo "-------------------------------------------------"
      git pull origin $GIT_BRANCH --rebase || exit 1

      echo ""
      echo "$(logts) git push -u origin $GIT_BRANCH"
      echo "-------------------------------------------------"
      git push -u origin $GIT_BRANCH
      return_code=$?
      if [ $return_code -eq 0 ]; then
        echo "git: Pushing changes to branch $GIT_BRANCH success"
        sleep 10
        break
      fi

      if [[ ${index} -eq ${retries} ]]; then
        echo "git: Pushing changes to branch $GIT_BRANCH failed even after $retries retries, exit with error"
        exit 1
      fi
      echo "git: Pushing changes to branch $GIT_BRANCH failed, retry after $interval sec ..."
      sleep $interval
      ((index++))
    done
  else
    echo "No changes found so no commit made"
  fi

  cd $PWD
}

function remove_git_repo_clone() {
  CLONE_DIR=$1
  echo "git: Deleting git clone directory $CLONE_DIR"
  rm -rf $CLONE_DIR || exit 1
}





function unlock_git_repo() {
  GIT_LOCK_BRANCH=$1
  GITOPS_REPO_DIR=$2

  if [[ -d "${GITOPS_REPO_DIR}" ]]; then
    echo ""
    echo "$(logts) git push origin --delete ${GIT_LOCK_BRANCH}"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" push origin --delete "${GIT_LOCK_BRANCH}" || exit 1

    echo ""
    echo "$(logts) rm -rf \"${GITOPS_REPO_DIR}\""
    echo "-------------------------------------------------"
    rm -rf "${GITOPS_REPO_DIR}" || exit 1
  fi
}





function git_lock_branch_name() {

  LOCK_NAME=$1
  ACCOUNT_ID=$2
  CLUSTER_ID=$3
  MAS_INSTANCE_ID=$4

  echo -n "lock.${LOCK_NAME}.${ACCOUNT_ID}.${CLUSTER_ID}"
  if [[ -n "${MAS_INSTANCE_ID}" ]]; then
    echo -n ".${MAS_INSTANCE_ID}"
  fi
  
}



# Clones the target repo and attempts to create a new branch ($GIT_LOCK_BRANCH) on the remote
# If this lock branch already exists, this means another process is currently making a change and could lead to a merge conflict.
# This function retries a limited number of times to reaquire the lock branch, and exits if this does not succeed.
# In this way, we guarantee sequentual execution order across any script that shares the same $GIT_LOCK_BRANCH name.
# If the function successfully acquires the lock branch, it registers git_lock_branch_name as an exit trap, to ensure we do not
# leave the lock branch in place (which would permanently block any other invokations of this function with the same GIT_LOCK_BRANCH.
# For consistency, it is recommended to use the git_lock_branch_name function above to generate GIT_LOCK_BRANCH, i.e.
#   GIT_LOCK_BRANCH=$(git_lock_branch_name "${LOCK_NAME}" ${ACCOUNT_ID} "${CLUSTER_ID}" "${MAS_INSTANCE_ID}")
# In order to commit and push at the end of any script that uses this function, use save_and_unlock_target_git_repo. 
function clone_and_lock_target_git_repo() {

  GITHUB_HOST=$1
  GITHUB_ORG=$2
  GITHUB_REPO=$3
  GIT_BRANCH=$4
  LOCAL_DIR=$5
  SSH_PATH=$6
  GIT_LOCK_BRANCH=$7

  RETRIES=${8:-100}
  RETRY_DELAY_SECONDS=${8:-20}

  GITOPS_REPO_DIR="${LOCAL_DIR}/${GITHUB_REPO}"
  LOCKFILE_NAME='.lock'

  for (( c=1; c<="${RETRIES}"; c++ )); do
    echo ""
    echo "clone_and_lock_git_repo: attempt ${c} of ${RETRIES}"
    echo "================================================="

    # Remove any clones created by prior attempts
    rm -rf "${GITOPS_REPO_DIR}"

    clone_target_git_repo "${GITHUB_HOST}" "${GITHUB_ORG}" "${GITHUB_REPO}" "${GIT_BRANCH}" "${LOCAL_DIR}" "${SSH_PATH}"

    # If the lock branch exists currently on the remote, retry after a delay
    echo
    echo "$(logts) git ls-remote --heads origin ${GIT_LOCK_BRANCH}"
    echo "-------------------------------------------------"
    LS_REMOTE_STDOUT=$(git -C "${GITOPS_REPO_DIR}" ls-remote --heads origin ${GIT_LOCK_BRANCH})
    if [[ -n "${LS_REMOTE_STDOUT}"  ]]; then
      echo "Lock branch ${GIT_LOCK_BRANCH} currently in use by another process, retry in ${RETRY_DELAY_SECONDS}s"
      echo "..."
      sleep ${RETRY_DELAY_SECONDS}
      continue
    fi
    # NOTE: >1 invokation may pass the initial "git ls-remote" check above
    # because of the non-zero delay between this call and the subsequent git push that creates the branch
    # The "git ls-remote" check is NOT DEFINITIVE, it is merely an optimization to avoid doing unnecessary work where
    # there is sufficient desynchronization between parallel runs of this script

    # Create the lock branch locally
    echo
    echo "$(logts) git checkout -b ${GIT_LOCK_BRANCH}"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" checkout -b "${GIT_LOCK_BRANCH}"

    # To definitively acquire the "lock", we attempt to create, commit and push a temporary "lock file";
    # This will mean that, amongst n processes running in parallel and in sync (i.e. where all processes have passed the initial git ls-remote check),
    # at most 1 process will be able to successfully perform the push below.

    # Additionally, we need to ensure the commit hash generated by git is unique amongst all concurrent processes competing for the lock.
    # The commit hash is generated from tree hash (e.g. file content), parent commit hash, committer information, commit message and timestamp (with second-level precision).

    # It's entirely possible that these could all be the same across 2 or more competing processes. If this happens, 2 or more processes may successfully
    # execute the push below. 
    # One process will create the branch (reporting "[new branch]"), the others will see that the remote has the same commit hashes in its history and will just report "Everything up-to-date".

    # If this happens, 2 or more competing processes will have successfully acquired the lock, which defeats the point of the lock and will likely result
    # in an unresolvable merge conflict in one or more of the competing processes when they attempt to merge their updates to GIT_BRANCH.

    # To fix this, we need to ensure each process generates a unique commmit hash. The easiest way to do this (without requiring additional parameters)
    # is to stick a UUID in the lockfile. This will result in a different tree hash and thus overall commit hash in all competing processes.
    cat /proc/sys/kernel/random/uuid > "${GITOPS_REPO_DIR}/${LOCKFILE_NAME}"
    echo ""
    echo "Created ${GITOPS_REPO_DIR}/${LOCKFILE_NAME} with content:"
    cat "${GITOPS_REPO_DIR}/${LOCKFILE_NAME}"

    echo
    echo "$(logts) git add ${LOCKFILE_NAME}"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" add ${LOCKFILE_NAME}

    echo
    echo "$(logts) git commit -m 'Acquire lock branch'"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" commit -m 'Acquire lock branch'

    echo
    echo "$(logts) git push --atomic -u origin ${GIT_LOCK_BRANCH}"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" push --atomic -u origin "${GIT_LOCK_BRANCH}"
    GIT_PUSH_RC=$?

    if [ "${GIT_PUSH_RC}" == "0" ]; then
      # Now we've created the remote lock branch, we are blocking any other invokations of this script
      # Register an exit trap to ensure we delete the remote branch whatever happens
      trap "unlock_git_repo ${GIT_LOCK_BRANCH} ${GITOPS_REPO_DIR}" EXIT

      echo ""
      echo "acquired lock on branch ${GIT_LOCK_BRANCH}; ensuring that we have the latest from ${GIT_BRANCH}..."
      
      # It's possible that *conflicting* commits (i.e. from another run sharing the same GIT_LOCK_BRANCH)
      # have been made to GIT_BRANCH between cloning GIT_BRANCH here (clone_target_git_repo call above) 
      # and successfully acquiring GIT_LOCK_BRANCH
      
      # The sequence of events that hit this race condition are as follows:
      # - this process clones GIT_BRANCH that has commits up to X
      # - another process with the same GIT_LOCK_BRANCH pushes commit Y to GIT_BRANCH and deletes the GIT_LOCK_BRANCH
      # - this process successfully acquires GIT_LOCK_BRANCH and so proceeds
      # - but the version of GIT_BRANCH cloned in this process does not have commit Y in it, so the GIT_LOCK_BRANCH does not include Y

      # because Y (in this case) originates from a process sharing GIT_LOCK_BRANCH, it's likely that it affects the same
      # file that this process is about to update and so is likely to lead to an unresolvable merge conflict when we attempt to
      # merge GIT_LOCK_BRANCH updates back into GIT_BRANCH in the save_and_unlock_target_git_repo call at the end

      # So, now we've acquired GIT_LOCK_BRANCH (thus ensuring no further conflicting commits can be made to master),
      # we need to ensure we are basing our changes on the latest version of GIT_BRANCH.

      # This is acheived by rebasing GIT_LOCK_BRANCH on GIT_BRANCH then forcing the remote lock branch to line up using a --force push
      # NOTE: --force is safe here since we are the sole current "owners" of GIT_LOCK_BRANCH
      # so any commits made to GIT_LOCK_BRANCH by any other concurrent processes can be disregarded

      # NOTE: of course other processes could be making other commits to git at any time during execution of this process.
      # this is fine since they *must* be from processes that do not share GIT_LOCK_BRANCH, so will not affect
      # the same files updated by this process and so will be auto-mergable.

      echo
      echo "$(logts) git pull origin $GIT_BRANCH --rebase"
      echo "-------------------------------------------------"
      git -C "${GITOPS_REPO_DIR}" pull origin $GIT_BRANCH --rebase || exit 1

      echo
      echo "$(logts) git push --force -u origin ${GIT_LOCK_BRANCH}"
      echo "-------------------------------------------------"
      git -C "${GITOPS_REPO_DIR}" push --force -u "origin" "${GIT_LOCK_BRANCH}"
      echo "================================================="

      return 0
    fi 

    echo ""
    echo "failed to acquire Lock branch ${GIT_LOCK_BRANCH}, retry in ${RETRY_DELAY_SECONDS}s"
    echo "..."
    sleep ${RETRY_DELAY_SECONDS}

  done

  echo "clone_and_lock_git_repo: non-recoverable failure"
  echo "================================================="
  return 1

}


# rebases then attempts to push the git repo
# if the push fails, the rebase and push will be retried after RETRY_DELAY_SECONDS delay up to RETRIES times
# the retry logic is there in case another process pushes a commit to the repo in between the rebase and push, causing errors like this:
#    ! [rejected]        master -> master (fetch first)
#    ! [remote rejected] master -> master (cannot lock ref 'refs/heads/master': is at x but expected y)
function git_push_with_retries {
  GITHUB_REPO="$1"
  GIT_BRANCH="$2"
  LOCAL_DIR="$3"
  RETRIES=${4:-20}
  RETRY_DELAY_SECONDS=${5:-30}

  GITOPS_REPO_DIR="${LOCAL_DIR}/${GITHUB_REPO}"

  for (( c=1; c<="${RETRIES}"; c++ )); do

    echo
    echo "git_push_with_retries: attempt ${c} of ${RETRIES}"
    echo "================================================="

    echo
    echo "$(logts) git pull origin $GIT_BRANCH --rebase"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" pull origin "${GIT_BRANCH}" --rebase

    echo
    echo "$(logts) git push -u origin ${GIT_BRANCH}"
    echo "-------------------------------------------------"
    git -C "${GITOPS_REPO_DIR}" push -u origin "${GIT_BRANCH}"
    rc=$?
    if [[ $rc == "0" ]]; then
      echo ""
      echo "git_push_with_retries: success"
      echo "================================================="
      return 0
    fi

    echo ""
    echo "git_push_with_retries: failed (rc: ${rc}), retry in ${RETRY_DELAY_SECONDS}s"
    echo "..."
    sleep $RETRY_DELAY_SECONDS
  done

  echo ""
  echo "git_push_with_retries: non-recoverable failure"
  echo "================================================="
  return 1
}


# Intended to be called at the end of a script that uses the clone_and_lock_target_git_repo function above
# after modifications to config files have been applied to the local clone
# This pushes the changes to the lock branch (GIT_LOCK_BRANCH), and squash merges them to the main branch (GIT_BRANCH)
# If RETURNVARNAME_MODIFIED is passed a variable name, that variable will be set to 1 if there are changes in GIT_LOCK_BRANCH that are not in GIT_BRANCH, or 0 otherwise
# Example usage:
#   save_and_unlock_target_git_repo "${GITHUB_REPO}" "${GIT_BRANCH}" "${GITOPS_WORKING_DIR}" "${GIT_COMMIT_MSG}" "${GIT_LOCK_BRANCH}" WAS_MODIFIED
#   if [[ "${WAS_MODIFIED}" == "1" ]]; then
#     echo "Configuration was modified"
#   fi
function save_and_unlock_target_git_repo {
  echo
  echo "save_and_unlock_target_git_repo"
  echo "================================================="
  GITHUB_REPO="$1"
  GIT_BRANCH="$2"
  LOCAL_DIR="$3"
  COMMIT_MSG="$4"
  GIT_LOCK_BRANCH="$5"
  RETURNVARNAME_MODIFIED="$6"

  LOCKFILE_NAME='.lock'
  GITOPS_REPO_DIR="${LOCAL_DIR}/${GITHUB_REPO}"

  # Delete the .lock file
  rm -rf "${GITOPS_REPO_DIR}/${LOCKFILE_NAME}"

  # commit and push all changes
  echo
  echo "$(logts) git add -v ."
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" add -v . || exit 1

  echo
  echo "$(logts) git commit -m ${GIT_COMMIT_MSG}"
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" commit -m "${GIT_COMMIT_MSG}"
  
  export RC=$?
  echo "commit returned $RC"
  if [ $RC -ne 0 -a $RC -ne 1 ]; then
    echo "exit $RC"
    exit $RC
  fi

  echo
  echo "$(logts) git push -u origin ${GIT_LOCK_BRANCH}"
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" push -u origin "${GIT_LOCK_BRANCH}" || exit 1

  # Merge back to master
  echo
  echo "$(logts) git switch ${GIT_BRANCH}"
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" switch "${GIT_BRANCH}" || exit 1

  echo
  echo "$(logts) git pull origin $GIT_BRANCH --rebase"
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" pull origin $GIT_BRANCH --rebase || exit 1

  echo
  echo "$(logts) git merge --squash ${GIT_LOCK_BRANCH}"
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" merge --squash "${GIT_LOCK_BRANCH}" || exit 1


  if [[ -n "${RETURNVARNAME_MODIFIED}" ]]; then
    local -n RETURNVAR_MODIFIED="${RETURNVARNAME_MODIFIED}"
    local GIT_STATUS=$(git -C "${GITOPS_REPO_DIR}" status --porcelain=v1)
    if [[ -n "$GIT_STATUS" ]]; then
      RETURNVAR_MODIFIED=1
    else
      RETURNVAR_MODIFIED=0
    fi
  fi

  echo
  echo "$(logts) git commit -m ${GIT_COMMIT_MSG}"
  echo "-------------------------------------------------"
  git -C "${GITOPS_REPO_DIR}" commit -m "${GIT_COMMIT_MSG}" 

  export RC=$?
  echo "commit returned $RC"
  if [ $RC -ne 0 -a $RC -ne 1 ]; then
    echo exit $RC
    exit $RC
  fi

  git_push_with_retries "${GITHUB_REPO}" "${GIT_BRANCH}" "${LOCAL_DIR}"


  # unlock_git_repo exit trap function registered in clone_and_lock_target_git_repo
  # takes care of deleting remote branch and local clone
  # we'll do it here too anyway just to be sure
  # (note since this will also delete the repo dir from the system, when the exit trap reruns this script
  # it won't repeat the branch delete (which could cause problems since another process could have since re-created the lock branch))
  unlock_git_repo "${GIT_LOCK_BRANCH}" "${GITOPS_REPO_DIR}"


  echo
  echo "save_and_unlock_target_git_repo: success"
  echo "================================================="

}


function argocd_login() {
  retries=${1:-20}
  interval=${2:-30}
  index=0
  echo "argo:argocd_login : Logging into ArgoCD ..."
  while true; do
    if [ -z $ARGOCD_URL ] || [ -z $ARGOCD_USERNAME ] || [ -z $ARGOCD_PASSWORD ]; then
      echo "argo:argocd_login : ARGOCD_URL, ARGOCD_USERNAME and ARGOCD_PASSWORD environment variables must be set"
      exit 1
    fi
    echo "argo:argocd_login : ARGOCD_URL=$ARGOCD_URL ARGOCD_USERNAME=$ARGOCD_USERNAME ARGOCD_PASSWORD=${ARGOCD_PASSWORD:0:8}<snip> ..."
    argocd login $ARGOCD_URL --username $ARGOCD_USERNAME --password $ARGOCD_PASSWORD --insecure --skip-test-tls --grpc-web
    return_code=$?
    echo "argo:argocd_login : return_code=$return_code"

    if [ $return_code -eq 0 ]; then
      echo "argo:argocd_login : ArgoCD login success"
      break
    fi

    if [[ ${index} -eq ${retries} ]]; then
      echo "argo:argocd_login : Timeout argocd_login failed, exit with error"
      exit 1
    fi
    sleep $interval
    ((index++))
  done
}


function argocd_sync(){
  APP_NAME=$1
  echo
  echo "Force Application $APP_NAME to Sync ..."
  argocd app sync $APP_NAME --force --timeout 30 --assumeYes --grpc-web
  RC=$?
  echo "ArgoCD response for Force Application $APP_NAME to Sync: $RC"
}

function argocd_prune_sync(){
  APP_NAME=$1
  echo "Force Application $APP_NAME to Sync with --prune ..."
  argocd app sync $APP_NAME --prune --grpc-web
  RC=$?
  echo "ArgoCD response for Force Application $APP_NAME to Sync with --prune: $RC"
}

function argocd_hard_refresh(){
  APP_NAME=$1
  echo "Force Application $APP_NAME to hard refresh ..."
  argocd app get $APP_NAME --hard-refresh --grpc-web
  RC=$?
  echo "ArgoCD response for Force Application $APP_NAME to hard refresh: $RC"
}

function check_argo_app_deleted(){
  APPLICATION=$1
  PROJECT=$2
  WAIT_PERIOD=0
  echo "argo: Checking if $APPLICATION in project $PROJECT is deleted."
  while true; do
    APP=$(argocd app list -p $PROJECT -o json 2> /dev/null | jq -r ".[] | select(.metadata.name == \"$APPLICATION\")")
    if [ -z "$APP" ]; then
      echo "Application deleted"
      break
    else
      echo "Application $APPLICATION still found, Waiting 30s before checking application again"
      sleep 30
      WAIT_PERIOD=$(($WAIT_PERIOD+30))
    fi
  done
}


# Fetches URL for cluster with cluster id supplied in the first param (e.g. "mas-c6") and sets it as the value of the variable named in the second param
# exit 1 if something goes wrong
# Example usage:
#   export CLUSTER_URL
#   argocd_get_cluster_url "${CLUSTER_ID}" "CLUSTER_URL"
#   echo "Cluster URL: ${CLUSTER_URL}"
function argocd_get_cluster_url(){
  local _CLUSTER_ID=$1
  local _CLUSTER_URL=$(argocd cluster get ${_CLUSTER_ID} -o server)

  # can't check RC of the call inside $(), so we'll have to make do with checking for empty output instead
   if [ -z "${_CLUSTER_URL}" ]; then
    echo "Failed to get URL for cluster ${_CLUSTER_ID} from ArgoCD"
    exit 1
   fi

   eval "$2=${_CLUSTER_URL}"

}



function check_argo_app_synced() {
  APPLICATION=$1
  CLUSTER_WATCHER=$2
  NAMESPACE=$3
  COUNT=0
  echo "argo:check_argo_app_synced : APPLICATION=$APPLICATION CLUSTER_WATCHER=$CLUSTER_WATCHER NAMESPACE=$NAMESPACE"
  while true; do
    echo "argo:check_argo_app_synced : Checking sync status for $APPLICATION"
    APP_RES_YAML=$(argocd app get $APPLICATION -o json 2> /dev/null)
    SYNC_STATUS=$(echo $APP_RES_YAML | jq -r .status.sync.status)

    # We also check phase is "Succeeded" (not e.g. "Running") to ensure we wait for any post-sync hooks to complete
    PHASE=$(echo $APP_RES_YAML | jq -r .status.operationState.phase)

    echo "argo:check_argo_app_synced : SYNC_STATUS=$SYNC_STATUS PHASE=${PHASE}"
    if [[ "$SYNC_STATUS" == "Synced" && "${PHASE}" == "Succeeded" ]]; then
      echo "Sync Status is Synced and Phase is Succeeded"
      break
    else
      ((COUNT++))
      echo "argo:check_argo_app_synced : Sync Status is $SYNC_STATUS, Phase is ${PHASE}. Waiting 30s before checking status and phase again - $COUNT"
      if ! (( $COUNT % 5 )) ; then
        if [[ -n "$CLUSTER_WATCHER" ]]; then
          argocd_sync $CLUSTER_WATCHER
        fi
        argocd_sync $APPLICATION
      fi
      sleep 30
    fi
  done
}


function check_argo_app_healthy() {
  APPLICATION=$1
  MAX_CHECKS=$2
  COUNT=0
  while true; do
    if [[ -n "$MAX_CHECKS" ]]; then
      echo "argo:check_argo_app_healthy : Checking health status for $APPLICATION up to $MAX_CHECKS times"
    else
      echo "argo:check_argo_app_healthy : Checking health status for $APPLICATION with no timeout"
    fi
    HEALTH_STATUS=$(argocd app get $APPLICATION -o json 2> /dev/null | jq -r .status.health.status)
    if [ "$HEALTH_STATUS" == "Healthy" ]; then
      echo "Health Status is Healthy"
      break
    else
      ((COUNT++))
      echo "argo:check_argo_app_healthy : Health Status is $HEALTH_STATUS, Waiting 30s before checking status again - $COUNT"
      sleep 30
      if [[ -n "$MAX_CHECKS" ]]; then
        if [[ $COUNT -eq $MAX_CHECKS ]]; then
          echo "argo:check_argo_app_healthy : App not healthy after $MAX_CHECKS checks, exiting"
          exit 1
        fi
      fi
    fi
  done
}

function validate_app_name(){
  APPLICATION=$1

  if [[ ${#APPLICATION} -gt 63 ]]; then
    echo "Application name cannot be longer than 63 characters: $APPLICATION"
    exit 1
  fi
}

# Look for storage class names matching "generated(<type>)" where <type> is a string
# If it matches, return "<type>" or "" otherwise (used to indicate that no match was found).
# E.g. "generated(efs)" returns "efs". "gp3" returns ""
function parse_storage_class_name() {
  INPUT_STORAGE_CLASS_NAME="$1"
  if [[ "$INPUT_STORAGE_CLASS_NAME" =~ generated\((.*)\) ]]; then
    echo "${BASH_REMATCH[1]}"
  fi
  echo ""
}

# If the first parameter matches a "generated(<type>)" directive (see parse_storage_class_name() above),
# This will return a suitable name containing all necessary delimiters for a K8S storage class resource:
# ${STORAGE_CLASS_TYPE}-${CLUSTER_ID}-${MAS_INSTANCE_ID}-${MAS_APP_ID}-${DEPENDENCY}
# e.g. "efs-noble5-inst1-manage-db2"
#
# For efs (i.e. "generated(efs)"), the output of this function is also used in generate_storage_class_def() (below) to lookup
# an EFS instance in AWS by its CreationToken.
#
# Resulting name must not be longer than 64 characters (due to CreationToken field length restrictions):
#   STORAGE_CLASS_TYPE: 6 chars max
#   CLUSTER_ID: 12 chars max
#   MAS_INSTANCE_ID: 12 chars max
#   MAS_APP_ID: 16 chars max
#   DEPENDENCY: 8 chars max
#   
#   After the 4 chars reserved for delimiters this results in names of max 58 in length, leaving
#   us with 6 chars of headroom in case of future requirements.
# 
# If the first parameter does not match the "generated(<type>)" directive, this indicates that a named
# storage class was specified, so this function will just return the first parameter unchanged.
function generate_storage_class_name() {
  INPUT_STORAGE_CLASS_NAME="$1"
  CLUSTER_ID="${2}"
  MAS_INSTANCE_ID="${3}"
  MAS_APP_ID="${4}"
  DEPENDENCY="${5}"

  STORAGE_CLASS_TYPE=$(parse_storage_class_name "${INPUT_STORAGE_CLASS_NAME}")
  if [[ -z "${STORAGE_CLASS_TYPE}" ]]; then
    echo "${INPUT_STORAGE_CLASS_NAME}"
    exit 0
  fi

  echo "${STORAGE_CLASS_TYPE}-${CLUSTER_ID}-${MAS_INSTANCE_ID}-${MAS_APP_ID}-${DEPENDENCY}"
}

# Adds the storage class definition yaml to STORAGE_CLASS_DEFINITIONS_SO_FAR based on INPUT_STORAGE_CLASS_NAME, then echoes (i.e. "returns") STORAGE_CLASS_DEFINITIONS_SO_FAR
# All other parameters are used to generates the name for the storage class (see generate_storage_class_name() above).
# 
# If INPUT_STORAGE_CLASS_NAME is a named storage class (indicating that no storage class definition is required), STORAGE_CLASS_DEFINITIONS_SO_FAR will be echoed unchanged
# If it is a "generated(<type>)" directive, YAML describing the storage class type will be added to STORAGE_CLASS_DEFINITIONS_SO_FAR
#
# Currently supports <type>:
#   efs - EFS file system ID will be fetched from AWS account looking up EFS by CreationToken matching the generated storage class name.
#         NOTE: Assumes aws CLI already logged into an AWS account with creds authorised to run "aws efs describe-file-systems"
#
# If <type> is not supported, an error will be echoed to stderr and function will exit 1
#
# Example usage:
#   export STORAGE_CLASS_DEFINITIONS
#
#   STORAGE_CLASS_DEFINITIONS=$(generate_storage_class_def "generated(efs)" "cluster1" "inst1" "app1" "dep1" "${STORAGE_CLASS_DEFINITIONS}") || exit 1
#   STORAGE_CLASS_DEFINITIONS=$(generate_storage_class_def "generated(efs)" "cluster1" "inst1" "app1" "dep2" "${STORAGE_CLASS_DEFINITIONS}") || exit 1
#   STORAGE_CLASS_DEFINITIONS=$(generate_storage_class_def "gp3"            "cluster1" "inst1" "app1" "dep3" "${STORAGE_CLASS_DEFINITIONS}") || exit 1
# 
#   echo "${STORAGE_CLASS_DEFINITIONS}"
#
#   # Output:
#   #
#   # efs-cluster1-inst1-app1-dep1:
#   #   provisioner: efs.csi.aws.com
#   #   parameters:
#   #     fileSystemId: fs-000000000000001
#   #     directoryPerms: "777"
#   #     basePath: /
#   #     gid: "0"
#   #     uid: "0"
#   #     provisioningMode: efs-ap
#   # efs-cluster1-inst1-app1-dep2:
#   #   provisioner: efs.csi.aws.com
#   #   parameters:
#   #     fileSystemId: fs-000000000000002
#   #     directoryPerms: "777"
#   #     basePath: /
#   #     gid: "0"
#   #     uid: "0"
#   #     provisioningMode: efs-ap
function generate_storage_class_def() {
  INPUT_STORAGE_CLASS_NAME="$1"
  CLUSTER_ID="${2}"
  MAS_INSTANCE_ID="${3}"
  MAS_APP_ID="${4}"
  DEPENDENCY="${5}"
  STORAGE_CLASS_DEFINITIONS_SO_FAR="$6"

  STORAGE_CLASS_NAME=$(generate_storage_class_name "${INPUT_STORAGE_CLASS_NAME}" "${CLUSTER_ID}" "${MAS_INSTANCE_ID}" "${MAS_APP_ID}" "${DEPENDENCY}" )

  STORAGE_CLASS_TYPE=$(parse_storage_class_name "${INPUT_STORAGE_CLASS_NAME}")
  if [[ -z "${STORAGE_CLASS_TYPE}" ]]; then
    # not a special generated(x) directive, so must be a named storage class
    # do not add any new storage class definitions
    echo "${STORAGE_CLASS_DEFINITIONS}"
    return 0
  elif [[ "${STORAGE_CLASS_TYPE}" == "efs" ]]; then

    CREATION_TOKEN="${STORAGE_CLASS_NAME}"
    EFS_FILESYSTEM_ID=$(aws efs describe-file-systems --creation-token "${CREATION_TOKEN}"  --query 'FileSystems[0].FileSystemId' --output text)
    if [[ -z "${EFS_FILESYSTEM_ID}" || "${EFS_FILESYSTEM_ID}" == "None" ]]; then
      echo "Unable to find EFS Instance with CreationToken ${CREATION_TOKEN}. Aborting." >&2
      exit 1
    fi

    STORAGE_CLASS_DEF='{"provisioner": "efs.csi.aws.com", "parameters": { "fileSystemId": "'${EFS_FILESYSTEM_ID}'", "directoryPerms": "777", "basePath": "/rosa-'${STORAGE_CLASS_NAME}'", "gid": "0", "uid": "0", "provisioningMode": "efs-ap" }}'
  else
    echo "Unsupported StorageClass type: ${STORAGE_CLASS_TYPE}" >&2
    exit 1
  fi

  # Add storage class to dict STORAGE_CLASS_DEFINITIONS_SO_FAR using STORAGE_CLASS_NAME as the key
  # No-op if key already exists
  echo "${STORAGE_CLASS_DEFINITIONS_SO_FAR}" | yq e '(.'${STORAGE_CLASS_NAME}' // '"${STORAGE_CLASS_DEF}"') as $x | .'${STORAGE_CLASS_NAME}' = $x' -
}