feat(deploy): implement blue-green deployment strategy

This commit replaces the previous deployment mechanism with a blue-green strategy to lay the groundwork for zero-downtime deployments. Key changes: Introduces a deploy-blue-green.sh script to manage "blue" and "green" container sets, creating versioned releases. Updates the Anubis gatekeeper template to dynamically route traffic based on the active deployment color, allowing for seamless traffic switching. Modifies Docker Compose files to include color-specific labels and environment variables. Adapts the GitHub Actions workflow to execute the new blue-green deployment process. Removes the old, now-obsolete deployment and health check scripts. Note: Automated rollback on health check failure is not yet implemented. Downgrades can be performed manually by switching the active color.
2025-06-12 16:56:36 -07:00 · 2025-06-12 16:56:36 -07:00 · 30ce126a07
commit 30ce126a07
parent a58a0e642a
19 changed files with 1166 additions and 591 deletions
--- a/server/scripts/common-lib.sh
+++ b/server/scripts/common-lib.sh
@ -0,0 +1,382 @@
+#!/bin/bash
+# Common library for deployment scripts
+# Source this file in other scripts: source "${SCRIPT_DIR}/common-lib.sh"
+
+# Common constants
+readonly BLUE_COLOR="blue"
+readonly GREEN_COLOR="green"
+readonly CORE_PROJECT_NAME="pkmntrade-club"
+readonly DEPLOYMENT_LABEL="deployment.color"
+readonly RETRY_MAX_ATTEMPTS="${RETRY_MAX_ATTEMPTS:-5}"
+readonly RETRY_DELAY="${RETRY_DELAY:-5}"
+
+# Dry run helper function
+# Usage: execute_if_not_dry "description" command [args...]
+execute_if_not_dry() {
+    local description="$1"
+    shift
+    
+    if [ "$DRY_RUN" = true ]; then
+        indent_output echo "[DRY RUN] Would execute: $description"
+        indent_output echo " Command: $*"
+    else
+        "$@"
+    fi
+}
+
+# Execute with error handling
+# Usage: execute_or_fail "description" command [args...]
+execute_or_fail() {
+    local description="$1"
+    shift
+    
+    if [ "$DRY_RUN" = true ]; then
+        indent_output echo "[DRY RUN] Would execute: $description"
+        indent_output echo " Command: $*"
+    else
+        if ! "$@"; then
+            echo "❌ Error: Failed to $description"
+            exit 1
+        fi
+    fi
+}
+
+# Execute with warning on failure (non-critical operations)
+# Usage: execute_or_warn "description" command [args...]
+execute_or_warn() {
+    local description="$1"
+    shift
+    
+    if [ "$DRY_RUN" = true ]; then
+        indent_output echo "[DRY RUN] Would execute: $description"
+        indent_output echo " Command: $*"
+    else
+        if ! "$@"; then
+            echo "⚠️  Warning: Failed to $description (continuing anyway)"
+        fi
+    fi
+}
+
+# Retry a command with exponential backoff
+retry() {
+    local max_attempts=$RETRY_MAX_ATTEMPTS
+    local delay=$RETRY_DELAY
+    local attempt=1
+    local exit_code=0
+    
+    until "$@"; do
+        exit_code=$?
+        
+        if [ "$attempt" -ge "$max_attempts" ]; then
+            echo "❌ Command failed after $max_attempts attempts: $*" >&2
+            return $exit_code
+        fi
+        
+        echo "⚠️  Attempt $attempt failed, retrying in ${delay}s..." >&2
+        sleep "$delay"
+        
+        # Exponential backoff
+        delay=$((delay * 2))
+        attempt=$((attempt + 1))
+    done
+    
+    if [ $attempt -gt 1 ]; then
+        echo "✅ Command succeeded after $attempt attempts"
+    fi
+    
+    return 0
+} 
+
+run_on_target() {
+    # if DEPLOY_HOST is set, we are not on remote
+    if [[ -n "${DEPLOY_HOST}" ]]; then
+        ssh deploy "$*"
+    else
+        bash -c -- "$*"
+    fi
+}
+
+# Function to check if a variable is set
+require_var() {
+    local var_name=$1
+    local var_value=${!var_name}
+    
+    if [ -z "$var_value" ]; then
+        echo "Error: ${var_name} not set" >&2
+        exit 1
+    fi
+}
+
+# Function to get deployment color based on running containers
+get_current_color() {
+    local blue_count=$(docker ps --filter "label=${DEPLOYMENT_LABEL}=${BLUE_COLOR}" -q 2>/dev/null | wc -l)
+    local green_count=$(docker ps --filter "label=${DEPLOYMENT_LABEL}=${GREEN_COLOR}" -q 2>/dev/null | wc -l)
+    
+    if [ "$blue_count" -gt 0 ] && [ "$green_count" -eq 0 ]; then
+        echo "$BLUE_COLOR"
+    elif [ "$green_count" -gt 0 ] && [ "$blue_count" -eq 0 ]; then
+        echo "$GREEN_COLOR"
+    elif [ "$blue_count" -gt 0 ] && [ "$green_count" -gt 0 ]; then
+        # Both colors running - return the newer one
+        local blue_newest=$(docker inspect --format='{{.Created}}' "$(docker ps -q --filter "label=${DEPLOYMENT_LABEL}=${BLUE_COLOR}" | head -1)" 2>/dev/null || echo '1970-01-01')
+        local green_newest=$(docker inspect --format='{{.Created}}' "$(docker ps -q --filter "label=${DEPLOYMENT_LABEL}=${GREEN_COLOR}" | head -1)" 2>/dev/null || echo '1970-01-01')
+        
+        if [[ "$blue_newest" > "$green_newest" ]]; then
+            echo "$BLUE_COLOR"
+        else
+            echo "$GREEN_COLOR"
+        fi
+    else
+        echo "none"
+    fi
+}
+
+# Function to get deployment state (none, blue, green, both)
+get_deployment_state() {
+    local blue_count=$(docker ps --filter "label=${DEPLOYMENT_LABEL}=${BLUE_COLOR}" -q 2>/dev/null | wc -l)
+    local green_count=$(docker ps --filter "label=${DEPLOYMENT_LABEL}=${GREEN_COLOR}" -q 2>/dev/null | wc -l)
+    
+    if [ "$blue_count" -gt 0 ] && [ "$green_count" -gt 0 ]; then
+        echo "both"
+    elif [ "$blue_count" -gt 0 ]; then
+        echo "$BLUE_COLOR"
+    elif [ "$green_count" -gt 0 ]; then
+        echo "$GREEN_COLOR"
+    else
+        echo "none"
+    fi
+}
+
+# Function to check if deployment is in progress
+is_deployment_in_progress() {
+    local blue_count=$(docker ps --filter "label=${DEPLOYMENT_LABEL}=${BLUE_COLOR}" -q 2>/dev/null | wc -l)
+    local green_count=$(docker ps --filter "label=${DEPLOYMENT_LABEL}=${GREEN_COLOR}" -q 2>/dev/null | wc -l)
+    
+    if [ "$blue_count" -gt 0 ] && [ "$green_count" -gt 0 ]; then
+        return 0  # true - deployment in progress
+    else
+        return 1  # false - no deployment in progress
+    fi
+}
+
+# Function to switch color
+switch_color() {
+    local current=$1
+    if [ "$current" = "$BLUE_COLOR" ]; then
+        echo "$GREEN_COLOR"
+    else
+        echo "$BLUE_COLOR"
+    fi
+}
+
+# Function to get project name for a color
+get_project_name() {
+    local color=$1
+    echo "${CORE_PROJECT_NAME}-${color}"
+}
+
+# Function to get compose files based on PROD setting
+get_compose_files() {
+    # Always use the same docker-compose file for both staging and production
+    echo "-f docker-compose_web.yml"
+}
+
+# Function to refresh gatekeepers
+refresh_gatekeepers() {
+    echo "🔄 Refreshing gatekeepers..."
+    docker kill -s SIGHUP ${CORE_PROJECT_NAME}-gatekeeper-manager-1 2>/dev/null || true
+}
+
+# Function to count containers by filter
+count_containers() {
+    local filters=$1
+    docker ps ${filters} -q 2>/dev/null | wc -l | tr -d '\n' || echo 0
+}
+
+get_previous_release_path() {
+    local current_link_path=$1
+    local previous_release_path=$(run_on_target "cat '${current_link_path}/.previous_version'")
+    echo "${previous_release_path}"
+}
+
+# Function to stop and remove the previous release's containers for a color
+cleanup_color_containers() {
+    local color=$1
+    local project_name=$(get_project_name "$color")
+    # Use CLEANUP_RELEASE_PATH if set, otherwise default to the previous release.
+    # This is crucial for rollbacks to use the correct compose file for cleanup.
+    local release_path=${CLEANUP_RELEASE_PATH:-$(get_previous_release_path "${CURRENT_LINK_PATH}")}
+
+    echo "🛑 Stopping $color containers from release: ${release_path}"
+    run_on_target "cd '${release_path}' && docker compose -p '${project_name}' stop --timeout 30 2>/dev/null || true"
+
+    echo "🗑️  Removing $color containers from release: ${release_path}"
+    run_on_target "cd '${release_path}' && docker compose -p '${project_name}' down --remove-orphans 2>/dev/null || true"
+}
+
+# Function to wait with countdown
+wait_with_countdown() {
+    local seconds=$1
+    local message=$2
+    
+    echo -n "$message"
+    for ((i=seconds; i>0; i--)); do
+        echo -n " $i"
+        sleep 1
+    done
+    echo " done!"
+}
+
+get_web_service_name() {
+    echo "web" # hardcoded for now
+}
+# Standard environment validation
+validate_deployment_env() {
+    require_var "REPO_PROJECT_PATH"
+    require_var "PROD"
+    require_var "REPLICA_COUNT"
+    if [ "$PROD" = "true" ]; then
+        require_var "PRODUCTION_DOMAIN"
+    else
+        require_var "STAGING_DOMAIN"
+    fi
+
+    # Set derived variables
+    export CURRENT_LINK_PATH="${REPO_PROJECT_PATH}/current"
+    export RELEASES_PATH="${REPO_PROJECT_PATH}/releases"
+    export REPLICA_COUNT="${REPLICA_COUNT}"
+}
+
+get_health_check_status() {
+    # TODO: instead get the health check status from gatus container's api
+    local statuses=$(docker ps --format '{{.Names}} {{.Status}}')
+    local unhealthy_count=0
+    local IFS=$'\n'
+    for status in $statuses; do
+        local name=$(echo $status | cut -d' ' -f1)
+        local status=$(echo $status | cut -d' ' -f2-)
+        if [[ "$status" == *"unhealthy"* ]]; then
+            unhealthy_count=$((unhealthy_count + 1))
+            echo "❌ Unhealthy: $name [$status]"
+        else
+            echo "✅ Healthy: $name [$status]"
+        fi
+    done
+    return $unhealthy_count
+}
+
+# Function to wait for containers to be healthy
+wait_for_healthy_containers() {
+    local project_name=$1
+    local service_name=$2
+    local expected_count=$3
+    local max_attempts=60  # 5 minutes with 5-second intervals
+    local attempt=0
+    
+    echo "⏳ Waiting for $service_name containers to be healthy..."
+    
+    while [ $attempt -lt $max_attempts ]; do
+        healthy_count=$(count_containers "--filter label=com.docker.compose.project=${project_name} --filter label=com.docker.compose.service=${service_name} --filter health=healthy")
+        
+        if [[ "$healthy_count" -eq "$expected_count" ]]; then
+            echo "✅ All $service_name containers are healthy ($healthy_count/$expected_count)"
+            return 0
+        fi
+        
+        echo "⏳ Healthy containers: $healthy_count/$expected_count (attempt $((attempt+1))/$max_attempts)"
+        sleep 5
+        attempt=$((attempt + 1))
+    done
+    
+    echo "❌ Timeout waiting for $service_name containers to be healthy"
+    return 1
+}
+
+list_releases() {
+    local REPO_PROJECT_PATH=$1
+    local RELEASES_PATH="${REPO_PROJECT_PATH}/releases"
+    local CURRENT_LINK_PATH="${REPO_PROJECT_PATH}/current"
+
+    echo "📋 Available releases:"
+        if [ -d "$RELEASES_PATH" ]; then
+            for release in $(ls -dt ${RELEASES_PATH}/*); do
+                version=$(basename "$release")
+                status=""
+                
+                # Check if it's current
+                if [ -L "$CURRENT_LINK_PATH" ] && [ "$(readlink -f "$CURRENT_LINK_PATH")" = "$(realpath "$release")" ]; then
+                    status=" [CURRENT]"
+                fi
+                
+                # Check if it failed
+                if [ -f "${release}/.failed" ]; then
+                    status="${status} [FAILED]"
+                fi
+                
+                indent_output echo "- ${version}${status}"
+            done
+        else
+            indent_output echo "No releases found"
+        fi
+}
+
+# Function to get image tag from deployment
+get_deployment_image_tag() {
+    local color=$1
+    local container=$(docker ps --filter "label=com.docker.compose.project=${CORE_PROJECT_NAME}-${color}" --format '{{.Names}}'| head -1)
+    if [ -n "$container" ]; then
+        docker inspect "${container}" --format '{{index .Config.Labels "deployment.image_tag"}}'
+    else
+        echo "unknown"
+    fi
+}
+
+# Function to run a command and prefix its output
+# Usage: prefix_output "PREFIX" command [args...]
+# Example: prefix_output "  |  " docker ps
+# Example: prefix_output "  => " docker compose ps
+prefix_output() {
+    local prefix="  "
+    
+    if [ $# -lt 2 ]; then
+        echo "Error: prefix_output requires at least 2 arguments" >&2
+        return 1
+    fi
+    
+    prefix="$1"
+    shift
+    
+    # Run the command and prefix each line of output
+    "$@" 2>&1 | sed "s/^/${prefix}/"
+    
+    # Return the exit code of the original command (not sed)
+    return ${PIPESTATUS[0]}
+}
+
+# Function to run a command and indent its output
+# Usage: indent_output [INDENT_STRING] command [args...]
+# Example: indent_output docker ps           # Uses default 2 spaces
+# Example: indent_output "    " docker ps    # Uses 4 spaces
+indent_output() {
+    local indent="   "  # Default to 2 spaces
+    
+    # Check if first argument looks like an indent string (starts with spaces or tabs)
+    if [[ "$1" =~ ^[[:space:]]+$ ]]; then
+        indent="$1"
+        shift
+    fi
+    
+    # Use prefix_output with the indent string
+    prefix_output "$indent" "$@"
+}
+
+# Function to run command with header and indented output
+# Usage: run_with_header "HEADER" command [args...]
+# Example: run_with_header "Docker Containers:" docker ps
+run_with_header() {
+    local header="$1"
+    shift
+    
+    echo "$header"
+    indent_output "  " "$@"
+}