pkmntrade.club/server/gatus/config.template.yaml
badbl0cks c87d73435b
feat: Enhance gatekeeper resilience and host handling
This commit significantly improves the gatekeeper system's robustness, monitoring capabilities, and simplifies host header management for backend services.

Key changes include:

**Gatekeeper Health, Management & Resilience:**
- Implemented active health checking for individual gatekeeper containers within the `gatekeeper-manager` service.
    - The manager now periodically curls the `/metrics` endpoint of each gatekeeper container.
    - Reports health status to a new Gatus `services_gatekeeper` endpoint.
    - Automatically attempts to restart the gatekeeper stack if any gatekeeper instance is unhealthy or if the expected number of gatekeepers is not running.
- Refactored the `gatekeeper-manager` shell script for improved state management and signal handling:
    - Introduced `STARTED`, `RESTARTING`, `TERMINATING` state flags for more controlled operations.
    - Enhanced SIGTERM and SIGHUP handling to gracefully manage gatekeeper lifecycles.
    - Added `apk add curl` to ensure `curl` is available in the manager container.
- Renamed the gatekeeper Docker Compose template from `docker-compose_gatekeeper.template.yml` to `gatekeepers.template.yml` and its output to `gatekeepers.yml`.
- Updated `dockergen-gatekeeper` to watch the new template file and notify the correct `gatekeeper-manager` service instance (e.g., `pkmntrade-club-gatekeeper-manager-1`).
- Discover services that should be protected by looking for a `gatekeeper=true` label.

**Host Header Management & `ALLOWED_HOSTS` Simplification:**
- HAProxy configuration (`haproxy.cfg`) now consistently sets the `Host` HTTP header for requests to all backend services (e.g., `pkmntrade.club`, `staging.pkmntrade.club`). This centralizes and standardizes host information.
- Consequently, explicit `ALLOWED_HOSTS` environment variables have been removed from the `web` and `celery` service definitions in `docker-compose_web.yml` and `docker-compose_staging.yml`. Backend Django applications should now rely on the `Host` header set by HAProxy for request validation.
- The `gatekeepers.template.yml` now defines a `TARGET_HOST` environment variable for proxied services (e.g., `web`, `web-staging`). This aligns with the ALLOWED_HOSTS on the target to ensure requests aren't blocked.

**Gatus Monitoring & Configuration Updates:**
- In Gatus configuration (`gatus/config.template.yaml`):
    - The "Redis" external service endpoint has been renamed to "Cache" for better clarity and to fit the theme of simple names.
    - A new external service endpoint "Gatekeeper" has been added to monitor the overall health reported by the `gatekeeper-manager`.
    - Health checks for "Web Worker" endpoints (both main and staging) now include the appropriate `Host` header (e.g., `Host: pkmntrade.club`) to ensure accurate health assessments by Django.
- In `docker-compose_core.yml`, the `curl` commands used by `db-redis-healthcheck` for database and cache health now append `|| true`. This prevents the script from exiting on a curl error (e.g., timeout, connection refused), ensuring that the failure is still reported to Gatus via the `success=false` parameter rather than the script terminating prematurely.

These changes collectively make the gatekeeper system more fault-tolerant, provide better visibility into its status, and streamline the configuration of backend applications by standardizing how they receive host information.
2025-05-23 16:16:59 -07:00

154 lines
3.7 KiB
YAML

storage:
type: postgres
path: "${GATUS_DATABASE_URL}"
web:
read-buffer-size: 32768
connectivity:
checker:
target: 1.1.1.1:53
interval: 60s
external-endpoints:
- name: Database
group: Services
token: "${GATUS_TOKEN}"
alerts:
- type: email
- name: Cache
group: Services
token: "${GATUS_TOKEN}"
alerts:
- type: email
- name: Gatekeeper
group: Services
token: "${GATUS_TOKEN}"
alerts:
- type: email
endpoints:
- name: Domain
group: Expirations
url: "https://pkmntrade.club"
interval: 1h
conditions:
- "[DOMAIN_EXPIRATION] > 720h"
alerts:
- type: email
- name: Certificate
group: Expirations
url: "https://pkmntrade.club"
interval: 1h
conditions:
- "[CERTIFICATE_EXPIRATION] > 240h"
alerts:
- type: email
- name: Cloudflare
group: DNS
url: "1.1.1.1"
interval: 60s
dns:
query-name: "pkmntrade.club"
query-type: "A"
conditions:
- "[DNS_RCODE] == NOERROR"
alerts:
- type: email
- name: Google
group: DNS
url: "8.8.8.8"
interval: 60s
dns:
query-name: "pkmntrade.club"
query-type: "A"
conditions:
- "[DNS_RCODE] == NOERROR"
alerts:
- type: email
- name: Quad9
group: DNS
url: "9.9.9.9"
interval: 60s
dns:
query-name: "pkmntrade.club"
query-type: "A"
conditions:
- "[DNS_RCODE] == NOERROR"
alerts:
- type: email
- name: Load Balancer
group: Services
url: "http://loba/"
interval: 60s
conditions:
- "[STATUS] == 200"
- "[BODY] == OK/HEALTHY"
alerts:
- type: email
- name: Feedback
group: Main
url: "http://pkmntrade-club-feedback-1:3000/"
interval: 60s
conditions:
- "[STATUS] == 200"
alerts:
- type: email
{{ $all_containers := . }}
{{ $web_containers := list }}
{{ $web_staging_containers := list }}
{{ range $container := $all_containers }}
{{ $serviceLabel := index $container.Labels "com.docker.compose.service" }}
{{ if eq $serviceLabel "web" }}
{{ $web_containers = append $web_containers $container }}
{{ end }}
{{ if eq $serviceLabel "web-staging" }}
{{ $web_staging_containers = append $web_staging_containers $container }}
{{ end }}
{{ end }}
{{ $web_containers = sortObjectsByKeysAsc $web_containers "Name" }}
{{ $web_staging_containers = sortObjectsByKeysAsc $web_staging_containers "Name" }}
{{ range $container := $web_containers }}
{{ $containerNumber := index $container.Labels "com.docker.compose.container-number" }}
- name: "Web Worker {{ $containerNumber }}"
group: Main
url: "http://{{ $container.Name }}:8000/health/"
headers:
Host: "pkmntrade.club"
interval: 60s
conditions:
- "[STATUS] == 200"
# - "[BODY] == OK/HEALTHY"
alerts:
- type: email
{{ end }}
{{ range $container := $web_staging_containers }}
{{ $containerNumber := index $container.Labels "com.docker.compose.container-number" }}
- name: "Web Worker {{ $containerNumber }}"
group: Staging
url: "http://{{ $container.Name }}:8000/health/"
headers:
Host: "staging.pkmntrade.club"
interval: 60s
conditions:
- "[STATUS] == 200"
# - "[BODY] == OK/HEALTHY"
alerts:
- type: email
{{ end }}
alerting:
email:
from: "${GATUS_SMTP_FROM}"
username: "${GATUS_SMTP_USER}"
password: "${GATUS_SMTP_PASS}"
host: "${GATUS_SMTP_HOST}"
port: ${GATUS_SMTP_PORT}
to: "${GATUS_SMTP_TO}"
client:
insecure: false
default-alert:
enabled: true
failure-threshold: 3
success-threshold: 2
send-on-resolved: true