feat: Implement dynamic Gatekeeper proxy and enhance service health monitoring
- **Implemented Dynamic Gatekeeper (Anubis) Proxy:** - Introduced Anubis as a Gatekeeper proxy layer for services (`web`, `web-staging`, `feedback`, `health`). - Added `docker-gen` setup (`docker-compose_gatekeeper.template.yml`, `gatekeeper-manager`) to dynamically configure Anubis instances based on container labels (`enable_gatekeeper=true`). - Updated HAProxy to route traffic through the respective Gatekeeper services. - **Enhanced Service Health Monitoring & Checks:** - Integrated `django-health-check` into the Django application, providing detailed health endpoints (e.g., `/health/`). - Replaced the custom health check view with `django-health-check` URLs. - Added `psutil` for system metrics in health checks. - Made Gatus configuration dynamic using `docker-gen` (`config.template.yaml`), allowing automatic discovery and monitoring of service instances (e.g., web workers). - Externalized Gatus SMTP credentials to environment variables. - Strengthened `docker-compose_core.yml` with a combined `db-redis-healthcheck` service reporting to Gatus. - Added explicit health checks for `db` and `redis` services in `docker-compose.yml`. - **Improved Docker & Compose Configuration:** - Added `depends_on` conditions in `docker-compose.yml` for `web` and `celery` services to wait for the database. - Updated `ALLOWED_HOSTS` in `docker-compose_staging.yml` and `docker-compose_web.yml` to include internal container names for Gatekeeper communication. - Set `DEBUG=False` for staging services. - Removed `.env.production` from `.gitignore` (standardized to `.env`). - Streamlined `scripts/entrypoint.sh` by removing the call to the no-longer-present `/deploy.sh`. - **Dependency Updates:** - Added `django-health-check>=3.18.3` and `psutil>=7.0.0` to `pyproject.toml` and `uv.lock`. - Updated `settings.py` to include `health_check` apps, configuration, and use `REDIS_URL` consistently. - **Streamlined deployment script used in GHA:** - Updated the workflow to copy new server files and create a new `.env` file in the temporary directory before moving them into place. - Consolidated the stopping and removal of old containers into a single step for better clarity and efficiency. - Reduce container downtime by rearranging stop/start steps.
This commit is contained in:
parent
f530790f6c
commit
6aa15d1af9
16 changed files with 487 additions and 162 deletions
|
|
@ -1,5 +1,5 @@
|
|||
services:
|
||||
db-healthcheck:
|
||||
db-redis-healthcheck:
|
||||
image: stephenc/postgresql-cli:latest
|
||||
command:
|
||||
- "sh"
|
||||
|
|
@ -9,26 +9,47 @@ services:
|
|||
sleep 30;
|
||||
while true; do
|
||||
pg_output=$$(pg_isready -d ${DJANGO_DATABASE_URL} 2>&1);
|
||||
exit_code=$$?;
|
||||
if [ $$exit_code -eq 0 ]; then
|
||||
success="true";
|
||||
error="";
|
||||
pg_exit_code=$$?;
|
||||
if [ $$pg_exit_code -eq 0 ]; then
|
||||
pg_success="true";
|
||||
pg_error="";
|
||||
else
|
||||
success="false";
|
||||
error="$$pg_output";
|
||||
pg_success="false";
|
||||
pg_error="$$pg_output";
|
||||
fi;
|
||||
curl -s -f -X POST \
|
||||
--connect-timeout 10 \
|
||||
--max-time 15 \
|
||||
--header "Authorization: Bearer ${GATUS_TOKEN}" \
|
||||
http://health:8080/api/v1/endpoints/db_pg-isready/external?success=$$success&error=$$error;
|
||||
if [ "$$success" = "true" ]; then
|
||||
http://health:8080/api/v1/endpoints/services_database/external?success=$$pg_success&error=$$pg_error;
|
||||
if [ "$$pg_success" = "true" ]; then
|
||||
echo " Database is OK";
|
||||
sleep 60;
|
||||
else
|
||||
echo "Database is not OK: $$pg_output";
|
||||
exit 1;
|
||||
fi;
|
||||
|
||||
redis_output=$$(echo -e "ping\nquit" | curl -v --max-time 10 --connect-timeout 10 telnet://redis:6379 2>&1 | grep -q "+PONG");
|
||||
redis_exit_code=$$?;
|
||||
if [ $$redis_exit_code -eq 0 ]; then
|
||||
redis_success="true";
|
||||
redis_error="";
|
||||
else
|
||||
redis_success="false";
|
||||
redis_error="$$redis_output";
|
||||
fi;
|
||||
curl -s -f -X POST \
|
||||
--connect-timeout 10 \
|
||||
--max-time 15 \
|
||||
--header "Authorization: Bearer ${GATUS_TOKEN}" \
|
||||
http://health:8080/api/v1/endpoints/services_redis/external?success=$$redis_success&error=$$redis_error;
|
||||
if [ "$$redis_success" = "true" ]; then
|
||||
echo " Redis is OK";
|
||||
else
|
||||
echo "Redis is not OK: $$redis_output";
|
||||
exit 1;
|
||||
fi;
|
||||
sleep 60;
|
||||
done
|
||||
env_file:
|
||||
- .env
|
||||
|
|
@ -46,41 +67,114 @@ services:
|
|||
feedback:
|
||||
restart: always
|
||||
image: getfider/fider:stable
|
||||
labels:
|
||||
- "enable_gatekeeper=true"
|
||||
env_file:
|
||||
- .env
|
||||
cadvisor:
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
privileged: true
|
||||
devices:
|
||||
- /dev/kmsg
|
||||
image: gcr.io/cadvisor/cadvisor:v0.52.1
|
||||
# cadvisor:
|
||||
# volumes:
|
||||
# - /:/rootfs:ro
|
||||
# - /var/run:/var/run:ro
|
||||
# - /sys:/sys:ro
|
||||
# - /var/lib/docker/:/var/lib/docker:ro
|
||||
# - /dev/disk/:/dev/disk:ro
|
||||
# privileged: true
|
||||
# devices:
|
||||
# - /dev/kmsg
|
||||
# image: gcr.io/cadvisor/cadvisor:v0.52.1
|
||||
redis:
|
||||
image: redis:latest
|
||||
restart: always
|
||||
ports:
|
||||
- 6379:6379
|
||||
# anubis:
|
||||
# image: ghcr.io/techarohq/anubis:latest
|
||||
# env_file:
|
||||
# - .env
|
||||
# dockergen:
|
||||
# image: jwilder/docker-gen:latest
|
||||
# container_name: dockergen_gatus_config
|
||||
# command: -watch -notify-sighup gatus_service -only-exposed /app/config.template.yml /app/config.yaml
|
||||
# restart: unless-stopped
|
||||
# volumes:
|
||||
# - /var/run/docker.sock:/tmp/docker.sock:ro
|
||||
# - ./gatus:/app
|
||||
# depends_on:
|
||||
# - health
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
dockergen-health:
|
||||
image: nginxproxy/docker-gen:latest
|
||||
command: -wait 15s -watch /gatus/config.template.yaml /gatus/config.yaml
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /var/run/docker.sock:/tmp/docker.sock:ro
|
||||
- ./gatus:/gatus
|
||||
dockergen-gatekeeper:
|
||||
image: nginxproxy/docker-gen:latest
|
||||
command: -wait 15s -watch /gatekeeper/docker-compose_gatekeeper.template.yml /gatekeeper/docker-compose_gatekeeper.yml -notify-sighup gatekeeper-manager
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /var/run/docker.sock:/tmp/docker.sock:ro
|
||||
- ./:/gatekeeper
|
||||
gatekeeper-manager:
|
||||
image: docker:latest
|
||||
restart: always
|
||||
stop_signal: SIGTERM
|
||||
volumes:
|
||||
- /srv:/srv:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
environment:
|
||||
- REFRESH_INTERVAL=60
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command:
|
||||
- |
|
||||
set -eu -o pipefail
|
||||
|
||||
COMPOSE_FILE_PATH="/srv/pkmntrade-club/docker-compose_gatekeeper.yml"
|
||||
PROJECT_DIR_PATH="/srv/pkmntrade-club"
|
||||
PROJECT_NAME_TAG="gatekeepers"
|
||||
|
||||
gatekeeper_down() {
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Taking gatekeepers down (Project: $$PROJECT_NAME_TAG)..."
|
||||
cd "$$PROJECT_DIR_PATH"
|
||||
if ! docker compose -p "$$PROJECT_NAME_TAG" -f "$$COMPOSE_FILE_PATH" down; then
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [WARN]: 'docker compose down' for $$PROJECT_NAME_TAG encountered an issue, but proceeding."
|
||||
fi
|
||||
}
|
||||
|
||||
gatekeeper_up() {
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Bringing gatekeepers up/updating (Project: $$PROJECT_NAME_TAG, File: $$COMPOSE_FILE_PATH)..."
|
||||
cd "$$PROJECT_DIR_PATH"
|
||||
if ! docker compose -p "$$PROJECT_NAME_TAG" -f "$$COMPOSE_FILE_PATH" up -d --remove-orphans; then
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [ERROR]: 'docker compose up' for $$PROJECT_NAME_TAG failed. Will retry."
|
||||
fi
|
||||
}
|
||||
|
||||
handle_sigterm() {
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: SIGTERM received. Initiating graceful shutdown for gatekeepers."
|
||||
gatekeeper_down
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Gatekeepers shut down. Gatekeeper Manager exiting."
|
||||
exit 0
|
||||
}
|
||||
|
||||
handle_sighup() {
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: SIGHUP received. Restarting gatekeepers."
|
||||
gatekeeper_down
|
||||
gatekeeper_up
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Gatekeepers restarted following SIGHUP."
|
||||
}
|
||||
|
||||
trap 'handle_sigterm' SIGTERM
|
||||
trap 'handle_sighup' SIGHUP
|
||||
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Gatekeeper Manager started."
|
||||
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Periodic refresh enabled: $$REFRESH_INTERVAL seconds."
|
||||
while true; do
|
||||
gatekeeper_up
|
||||
|
||||
# 'sleep 60 &' and 'wait $!' allows signals to interrupt the sleep.
|
||||
sleep $$REFRESH_INTERVAL &
|
||||
# '|| true' ensures the loop continues if 'wait' is killed by a handled signal (SIGHUP/SIGTERM)
|
||||
# SIGTERM handler exits completely, so loop won't continue. SIGHUP handler doesn't exit.
|
||||
wait $! || true
|
||||
|
||||
echo "$(date +'%Y-%m-%d %H:%M:%S') [INFO]: Periodic refresh triggered."
|
||||
done
|
||||
health:
|
||||
image: twinproduction/gatus:latest
|
||||
restart: always
|
||||
labels:
|
||||
- "enable_gatekeeper=true"
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
|
|
|
|||
37
server/docker-compose_gatekeeper.template.yml
Normal file
37
server/docker-compose_gatekeeper.template.yml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
services:
|
||||
{{ $all_containers := whereLabelValueMatches . "enable_gatekeeper" "true" }}
|
||||
{{ $all_containers = sortObjectsByKeysAsc $all_containers "Name" }}
|
||||
|
||||
{{ range $container := $all_containers }}
|
||||
{{ $serviceLabel := index $container.Labels "com.docker.compose.service" }}
|
||||
{{ $containerNumber := index $container.Labels "com.docker.compose.container-number" }}
|
||||
{{ $port := "" }}
|
||||
{{ if eq $serviceLabel "web" }}
|
||||
{{ $port = ":8000" }}
|
||||
{{ end }}
|
||||
{{ if eq $serviceLabel "web-staging" }}
|
||||
{{ $port = ":8000" }}
|
||||
{{ end }}
|
||||
{{ if eq $serviceLabel "feedback" }}
|
||||
{{ $port = ":3000" }}
|
||||
{{ end }}
|
||||
{{ if eq $serviceLabel "health" }}
|
||||
{{ $port = ":8080" }}
|
||||
{{ end }}
|
||||
gatekeeper-{{ $serviceLabel }}-{{ $containerNumber }}:
|
||||
image: ghcr.io/techarohq/anubis:latest
|
||||
container_name: pkmntrade-club-gatekeeper-{{ $serviceLabel }}-{{ $containerNumber }}
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- TARGET=http://{{ $container.Name }}{{ $port }}
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- pkmntrade-club-gatekeeper-{{ $serviceLabel }}
|
||||
- gatekeeper-{{ $serviceLabel }}
|
||||
{{ end }}
|
||||
networks:
|
||||
default:
|
||||
name: pkmntrade-club_default
|
||||
external: true
|
||||
|
|
@ -3,15 +3,17 @@ x-common: &common
|
|||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- DEBUG=True
|
||||
- DISABLE_SIGNUPS=True
|
||||
- PUBLIC_HOST=staging.pkmntrade.club
|
||||
- ALLOWED_HOSTS=staging.pkmntrade.club,127.0.0.1
|
||||
|
||||
services:
|
||||
web-staging:
|
||||
<<: *common
|
||||
environment:
|
||||
- DEBUG=False
|
||||
- DISABLE_SIGNUPS=True
|
||||
- PUBLIC_HOST=staging.pkmntrade.club
|
||||
- ALLOWED_HOSTS=staging.pkmntrade.club,127.0.0.1,pkmntrade-club-web-staging-1,pkmntrade-club-web-staging-2
|
||||
labels:
|
||||
- "enable_gatekeeper=true"
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 2
|
||||
|
|
@ -23,4 +25,9 @@ services:
|
|||
# start_period: 30s
|
||||
celery-staging:
|
||||
<<: *common
|
||||
environment:
|
||||
- DEBUG=False
|
||||
- DISABLE_SIGNUPS=True
|
||||
- PUBLIC_HOST=staging.pkmntrade.club
|
||||
- ALLOWED_HOSTS=staging.pkmntrade.club,127.0.0.1,pkmntrade-club-celery-staging-1
|
||||
command: ["celery", "-A", "pkmntrade_club.django_project", "worker", "-l", "INFO", "-B", "-E"]
|
||||
|
|
@ -2,11 +2,6 @@ x-common: &common
|
|||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- DEBUG=False
|
||||
- DISABLE_SIGNUPS=True
|
||||
- PUBLIC_HOST=pkmntrade.club
|
||||
- ALLOWED_HOSTS=pkmntrade.club,127.0.0.1
|
||||
|
||||
services:
|
||||
web:
|
||||
|
|
@ -15,6 +10,13 @@ services:
|
|||
entrypoint: ["/ko-app/httpdebug", "--bind", ":8000"]
|
||||
#image: badbl0cks/pkmntrade-club:stable
|
||||
#command: ["granian", "--interface", "wsgi", "pkmntrade_club.django_project.wsgi:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1", "--workers-kill-timeout", "180", "--access-log"]
|
||||
environment:
|
||||
- DEBUG=False
|
||||
- DISABLE_SIGNUPS=True
|
||||
- PUBLIC_HOST=pkmntrade.club
|
||||
- ALLOWED_HOSTS=pkmntrade.club,127.0.0.1,pkmntrade-club-web-1,pkmntrade-club-web-2,pkmntrade-club-web-3,pkmntrade-club-web-4
|
||||
labels:
|
||||
- "enable_gatekeeper=true"
|
||||
deploy:
|
||||
mode: replicated
|
||||
replicas: 4
|
||||
|
|
@ -24,7 +26,12 @@ services:
|
|||
# timeout: 10s
|
||||
# retries: 3
|
||||
# start_period: 30s
|
||||
celery:
|
||||
<<: *common
|
||||
image: badbl0cks/pkmntrade-club:stable
|
||||
command: ["celery", "-A", "pkmntrade_club.django_project", "worker", "-l", "INFO", "-B", "-E"]
|
||||
# celery:
|
||||
# <<: *common
|
||||
# image: badbl0cks/pkmntrade-club:stable
|
||||
# environment:
|
||||
# - DEBUG=False
|
||||
# - DISABLE_SIGNUPS=True
|
||||
# - PUBLIC_HOST=pkmntrade.club
|
||||
# - ALLOWED_HOSTS=pkmntrade.club,127.0.0.1,pkmntrade-club-celery-1,pkmntrade-club-celery-2
|
||||
# command: ["celery", "-A", "pkmntrade_club.django_project", "worker", "-l", "INFO", "-B", "-E"]
|
||||
145
server/gatus/config.template.yaml
Normal file
145
server/gatus/config.template.yaml
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
storage:
|
||||
type: postgres
|
||||
path: "${GATUS_DATABASE_URL}"
|
||||
web:
|
||||
read-buffer-size: 32768
|
||||
connectivity:
|
||||
checker:
|
||||
target: 1.1.1.1:53
|
||||
interval: 60s
|
||||
external-endpoints:
|
||||
- name: Database
|
||||
group: Services
|
||||
token: "${GATUS_TOKEN}"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Redis
|
||||
group: Services
|
||||
token: "${GATUS_TOKEN}"
|
||||
alerts:
|
||||
- type: email
|
||||
endpoints:
|
||||
- name: Domain
|
||||
group: Expirations
|
||||
url: "https://pkmntrade.club"
|
||||
interval: 1h
|
||||
conditions:
|
||||
- "[DOMAIN_EXPIRATION] > 720h"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Certificate
|
||||
group: Expirations
|
||||
url: "https://pkmntrade.club"
|
||||
interval: 1h
|
||||
conditions:
|
||||
- "[CERTIFICATE_EXPIRATION] > 240h"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Cloudflare
|
||||
group: DNS
|
||||
url: "1.1.1.1"
|
||||
interval: 60s
|
||||
dns:
|
||||
query-name: "pkmntrade.club"
|
||||
query-type: "A"
|
||||
conditions:
|
||||
- "[DNS_RCODE] == NOERROR"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Google
|
||||
group: DNS
|
||||
url: "8.8.8.8"
|
||||
interval: 60s
|
||||
dns:
|
||||
query-name: "pkmntrade.club"
|
||||
query-type: "A"
|
||||
conditions:
|
||||
- "[DNS_RCODE] == NOERROR"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Quad9
|
||||
group: DNS
|
||||
url: "9.9.9.9"
|
||||
interval: 60s
|
||||
dns:
|
||||
query-name: "pkmntrade.club"
|
||||
query-type: "A"
|
||||
conditions:
|
||||
- "[DNS_RCODE] == NOERROR"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: HAProxy
|
||||
group: Load Balancer
|
||||
url: "http://loba/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
- "[BODY] == OK/HEALTHY"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Feedback
|
||||
group: Services
|
||||
url: "http://feedback:3000/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
alerts:
|
||||
- type: email
|
||||
{{ $all_containers := . }}
|
||||
{{ $web_containers := list }}
|
||||
{{ $web_staging_containers := list }}
|
||||
|
||||
{{ range $container := $all_containers }}
|
||||
{{ $serviceLabel := index $container.Labels "com.docker.compose.service" }}
|
||||
{{ if eq $serviceLabel "web" }}
|
||||
{{ $web_containers = append $web_containers $container }}
|
||||
{{ end }}
|
||||
{{ if eq $serviceLabel "web-staging" }}
|
||||
{{ $web_staging_containers = append $web_staging_containers $container }}
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
||||
{{ $web_containers = sortObjectsByKeysAsc $web_containers "Name" }}
|
||||
{{ $web_staging_containers = sortObjectsByKeysAsc $web_staging_containers "Name" }}
|
||||
|
||||
{{ range $container := $web_containers }}
|
||||
{{ $containerNumber := index $container.Labels "com.docker.compose.container-number" }}
|
||||
- name: "Web Worker {{ $containerNumber }}"
|
||||
group: Main
|
||||
url: "http://{{ $container.Name }}:8000/health/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
# - "[BODY] == OK/HEALTHY"
|
||||
alerts:
|
||||
- type: email
|
||||
{{ end }}
|
||||
|
||||
{{ range $container := $web_staging_containers }}
|
||||
{{ $containerNumber := index $container.Labels "com.docker.compose.container-number" }}
|
||||
- name: "Web Worker {{ $containerNumber }}"
|
||||
group: Staging
|
||||
url: "http://{{ $container.Name }}:8000/health/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
# - "[BODY] == OK/HEALTHY"
|
||||
alerts:
|
||||
- type: email
|
||||
{{ end }}
|
||||
|
||||
alerting:
|
||||
email:
|
||||
from: "${GATUS_SMTP_FROM}"
|
||||
username: "${GATUS_SMTP_USER}"
|
||||
password: "${GATUS_SMTP_PASS}"
|
||||
host: "${GATUS_SMTP_HOST}"
|
||||
port: ${GATUS_SMTP_PORT}
|
||||
to: "${GATUS_SMTP_TO}"
|
||||
client:
|
||||
insecure: false
|
||||
default-alert:
|
||||
enabled: true
|
||||
failure-threshold: 3
|
||||
success-threshold: 2
|
||||
send-on-resolved: true
|
||||
|
|
@ -8,14 +8,19 @@ connectivity:
|
|||
target: 1.1.1.1:53
|
||||
interval: 60s
|
||||
external-endpoints:
|
||||
- name: pg_isready
|
||||
group: db
|
||||
- name: Database
|
||||
group: Services
|
||||
token: "${GATUS_TOKEN}"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Redis
|
||||
group: Services
|
||||
token: "${GATUS_TOKEN}"
|
||||
alerts:
|
||||
- type: email
|
||||
endpoints:
|
||||
- name: Domain
|
||||
group: expirations
|
||||
group: Expirations
|
||||
url: "https://pkmntrade.club"
|
||||
interval: 1h
|
||||
conditions:
|
||||
|
|
@ -23,7 +28,7 @@ endpoints:
|
|||
alerts:
|
||||
- type: email
|
||||
- name: Certificate
|
||||
group: expirations
|
||||
group: Expirations
|
||||
url: "https://pkmntrade.club"
|
||||
interval: 1h
|
||||
conditions:
|
||||
|
|
@ -31,7 +36,7 @@ endpoints:
|
|||
alerts:
|
||||
- type: email
|
||||
- name: Cloudflare
|
||||
group: dns
|
||||
group: DNS
|
||||
url: "1.1.1.1"
|
||||
interval: 60s
|
||||
dns:
|
||||
|
|
@ -42,7 +47,7 @@ endpoints:
|
|||
alerts:
|
||||
- type: email
|
||||
- name: Google
|
||||
group: dns
|
||||
group: DNS
|
||||
url: "8.8.8.8"
|
||||
interval: 60s
|
||||
dns:
|
||||
|
|
@ -53,7 +58,7 @@ endpoints:
|
|||
alerts:
|
||||
- type: email
|
||||
- name: Quad9
|
||||
group: dns
|
||||
group: DNS
|
||||
url: "9.9.9.9"
|
||||
interval: 60s
|
||||
dns:
|
||||
|
|
@ -64,7 +69,7 @@ endpoints:
|
|||
alerts:
|
||||
- type: email
|
||||
- name: HAProxy
|
||||
group: loadbalancer
|
||||
group: Load Balancer
|
||||
url: "http://loba/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
|
|
@ -73,60 +78,22 @@ endpoints:
|
|||
alerts:
|
||||
- type: email
|
||||
- name: Feedback
|
||||
group: backends
|
||||
group: Services
|
||||
url: "http://feedback:3000/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Web Worker 1
|
||||
group: backends
|
||||
url: "http://pkmntrade-club-web-1:8000/health/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
#- "[BODY] == OK/HEALTHY"
|
||||
#- [BODY].database == UP
|
||||
# must return json like {"database": "UP"} first
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Web Worker 2
|
||||
group: backends
|
||||
url: "http://pkmntrade-club-web-2:8000/health/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
#- "[BODY] == OK/HEALTHY"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Web Worker 3
|
||||
group: backends
|
||||
url: "http://pkmntrade-club-web-3:8000/health/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
#- "[BODY] == OK/HEALTHY"
|
||||
alerts:
|
||||
- type: email
|
||||
- name: Web Worker 4
|
||||
group: backends
|
||||
url: "http://pkmntrade-club-web-4:8000/health/"
|
||||
interval: 60s
|
||||
conditions:
|
||||
- "[STATUS] == 200"
|
||||
#- "[BODY] == OK/HEALTHY"
|
||||
alerts:
|
||||
- type: email
|
||||
# todo: add cadvisor checks via api https://github.com/google/cadvisor/blob/master/docs/api.md
|
||||
|
||||
alerting:
|
||||
email:
|
||||
from: noreply@pkmntrade.club
|
||||
username: dd2cd354-de6d-4fa4-bfe8-31c961cb4e90
|
||||
password: 1622e8a1-9a45-4a7f-8071-cccca29d8675
|
||||
host: smtp.tem.scaleway.com
|
||||
port: 465
|
||||
to: rob@badblocks.email
|
||||
from: "${GATUS_SMTP_FROM}"
|
||||
username: "${GATUS_SMTP_USER}"
|
||||
password: "${GATUS_SMTP_PASS}"
|
||||
host: "${GATUS_SMTP_HOST}"
|
||||
port: ${GATUS_SMTP_PORT}
|
||||
to: "${GATUS_SMTP_TO}"
|
||||
client:
|
||||
insecure: false
|
||||
default-alert:
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ defaults
|
|||
timeout http-request 120s
|
||||
option httplog
|
||||
|
||||
frontend web_frontend
|
||||
frontend haproxy_entrypoint
|
||||
bind :443 ssl crt /certs/crt.pem verify required ca-file /certs/ca.pem
|
||||
use_backend %[req.hdr(host),lower,word(1,:)] # strip out port from host
|
||||
|
||||
|
|
@ -34,17 +34,17 @@ backend basic_check
|
|||
|
||||
backend pkmntrade.club
|
||||
balance leastconn
|
||||
server-template web- 10 web:8000 check resolvers docker_resolver init-addr libc,none
|
||||
server-template gatekeeper-web- 4 gatekeeper-web:8000 check resolvers docker_resolver init-addr libc,none
|
||||
|
||||
backend staging.pkmntrade.club
|
||||
balance leastconn
|
||||
server-template web-staging- 10 web-staging:8000 check resolvers docker_resolver init-addr libc,none
|
||||
server-template gatekeeper-web-staging- 4 gatekeeper-web-staging:8000 check resolvers docker_resolver init-addr libc,none
|
||||
|
||||
backend feedback.pkmntrade.club
|
||||
server feedback-1 feedback:3000
|
||||
server-template gatekeeper-feedback- 4 gatekeeper-feedback:8000 check resolvers docker_resolver init-addr libc,none
|
||||
|
||||
backend health.pkmntrade.club
|
||||
server health-1 health:8080
|
||||
server-template gatekeeper-health- 4 gatekeeper-health:8000 check resolvers docker_resolver init-addr libc,none
|
||||
|
||||
#EOF - trailing newline required
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue