From 8ec67acc07e02af29476e3936af15797573373cc Mon Sep 17 00:00:00 2001 From: Andrew Baker Date: Sun, 31 May 2026 14:31:55 +0200 Subject: [PATCH 1/4] fix(backup): surface real MariaDB error when read_only quiesce fails db_exec captured stderr to /dev/null, so a failed `SET GLOBAL read_only=ON` only logged "did not take effect" with no cause. Capture stderr into _DB_LAST_ERR and log it in db_lock_mysql's fallback branch so the actual MariaDB error (privilege, auth, etc.) is visible for diagnosis. Co-Authored-By: Claude Opus 4.8 (1M context) --- pi-image-backup.sh | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/pi-image-backup.sh b/pi-image-backup.sh index 83dd42c..55064c5 100755 --- a/pi-image-backup.sh +++ b/pi-image-backup.sh @@ -163,6 +163,7 @@ _DB_ROOT_PASSWORD="" _DB_LOCK_PID="" _DB_CONN_ID="" _DB_LOCK_TAG="pi2s3-lock-$$" +_DB_LAST_ERR="" _DB_RO_CHANGED=false # Sentinel recording that WE flipped the server read-only. Lets the next backup # recover a stale read-only state if a previous run was hard-killed (SIGKILL/power @@ -356,13 +357,19 @@ _write_standby_sync_marker() { # If container is empty, runs mariadb/mysql locally with MYSQL_PWD in the environment. db_exec() { local _c="$1" _pw="$2"; shift 2 + # Capture stderr into _DB_LAST_ERR (not /dev/null) so callers can log the + # real MariaDB error on failure, while stdout still carries only query output. + local _ef _rc; _ef=$(mktemp 2>/dev/null || echo "/tmp/pi2s3-dbexec.$$") if [[ -n "${_c}" ]]; then docker exec -e "MYSQL_PWD=${_pw}" "${_c}" \ - mariadb -u root --batch --silent "$@" 2>/dev/null + mariadb -u root --batch --silent "$@" 2>"${_ef}" else - MYSQL_PWD="${_pw}" mariadb -u root --batch --silent "$@" 2>/dev/null \ - || MYSQL_PWD="${_pw}" mysql -u root --batch --silent "$@" 2>/dev/null + MYSQL_PWD="${_pw}" mariadb -u root --batch --silent "$@" 2>"${_ef}" \ + || MYSQL_PWD="${_pw}" mysql -u root --batch --silent "$@" 2>"${_ef}" fi + _rc=$? + _DB_LAST_ERR=$(cat "${_ef}" 2>/dev/null); rm -f "${_ef}" + return ${_rc} } # Run a single PostgreSQL statement. @@ -522,18 +529,21 @@ db_lock_mysql() { # Flip to read-only. read_only blocks the (non-SUPER) app user; super_read_only # (MySQL only) additionally blocks SUPER users — best-effort so MariaDB is fine. + local _set_err="" db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" \ - -e "SET GLOBAL read_only=ON;" 2>/dev/null || true + -e "SET GLOBAL read_only=ON;" >/dev/null || true + _set_err="${_DB_LAST_ERR}" db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" \ - -e "SET GLOBAL super_read_only=ON;" 2>/dev/null || true + -e "SET GLOBAL super_read_only=ON;" >/dev/null || true db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" \ - -e "FLUSH LOGS;" 2>/dev/null || true + -e "FLUSH LOGS;" >/dev/null || true # Verify it took effect before trusting the snapshot. _ro=$(db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" \ -e "SELECT @@global.read_only;" | tail -1 || true) if [[ "${_ro}" != "1" ]]; then log " WARNING: SET GLOBAL read_only did not take effect — falling back to STOP_DOCKER" + [[ -n "${_set_err}" ]] && log " mariadb: ${_set_err}" _DB_CONTAINER=""; _DB_ENGINE=""; return 0 fi From 08435974718fceb207188a5e48565be31625de48 Mon Sep 17 00:00:00 2001 From: Andrew Baker Date: Sun, 31 May 2026 15:40:25 +0200 Subject: [PATCH 2/4] feat(backup): add --db-check diagnostic mode Reports DB detection + whether the read-only quiesce engages (current user, version, prior/after read_only, SET errors), then exits without imaging. Briefly toggles read_only and restores it. Zero downtime; reusable for diagnosing why a backup falls back to STOP_DOCKER. Co-Authored-By: Claude Opus 4.8 (1M context) --- pi-image-backup.sh | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/pi-image-backup.sh b/pi-image-backup.sh index 55064c5..4070b56 100755 --- a/pi-image-backup.sh +++ b/pi-image-backup.sh @@ -120,6 +120,7 @@ VERIFY=false VERIFY_DATE="" STALE_CHECK=false COST=false +DB_CHECK=false for arg in "$@"; do case "$arg" in @@ -132,6 +133,7 @@ for arg in "$@"; do --no-stop-docker) STOP_DOCKER=false ;; --stale-check) STALE_CHECK=true ;; --cost) COST=true ;; + --db-check) DB_CHECK=true ;; --help) echo "Usage: pi-image-backup.sh [options] (no args) Run nightly backup --force Skip duplicate-check (run even if today's backup exists) @@ -143,6 +145,7 @@ for arg in "$@"; do --stale-check Alert via ntfy if latest backup is older than STALE_BACKUP_HOURS --cost Show S3 storage used and estimated monthly cost --no-stop-docker Skip Docker stop (for daytime test runs, no downtime) + --db-check Diagnose DB detection + read-only quiesce, then exit (no imaging) --help Show this help Config: ${SCRIPT_DIR}/config.env @@ -631,6 +634,64 @@ db_unlock() { log " DB read-write — writes unblocked." } +# ── DB diagnostic (--db-check) ──────────────────────────────────────────────── +# Reports how the DB would be detected and whether the read-only quiesce works, +# without imaging anything. For MySQL/MariaDB it briefly toggles read_only and +# restores it, logging the connecting user and any error. Safe to run any time. +db_check() { + log "========================================================" + log " pi2s3 — DB quiesce check" + log "========================================================" + log " DB_CONTAINER=${DB_CONTAINER} DB_ENGINE=${DB_ENGINE}" + if ! db_resolve_target; then + log " RESULT: no DB resolved — backup would use STOP_DOCKER (downtime)." + exit 0 + fi + log " Resolved: engine=${_DB_ENGINE} location=${_DB_CONTAINER:-}" + + if [[ "${_DB_ENGINE}" == "postgres" ]]; then + local _v + _v=$(db_exec_pg "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" "SELECT version();" || true) + if [[ -n "${_v}" ]]; then + log " Connected. ${_v}" + log " RESULT: OK — PostgreSQL CHECKPOINT path will be used (zero downtime)." + else + log " RESULT: could not connect (check DB_PG_USER) — would use STOP_DOCKER." + fi + exit 0 + fi + + # MySQL / MariaDB + log " current_user: $(db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SELECT CURRENT_USER();" | tail -1)" + log " version: $(db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SELECT VERSION();" | tail -1)" + local _ro0 _ro1 + _ro0=$(db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SELECT @@global.read_only;" | tail -1 || true) + log " prior read_only=${_ro0:-} super_read_only=$(db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SELECT @@global.super_read_only;" | tail -1 || true)" + if [[ "${_ro0}" == "1" ]]; then + log " Server is already read-only (replica?) — pi2s3 would leave it untouched. OK." + exit 0 + fi + + log " Attempting SET GLOBAL read_only=ON ..." + db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SET GLOBAL read_only=ON;" >/dev/null || true + log " read_only SET error: ${_DB_LAST_ERR:-}" + db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SET GLOBAL super_read_only=ON;" >/dev/null || true + log " super_read_only SET error: ${_DB_LAST_ERR:-}" + _ro1=$(db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SELECT @@global.read_only;" | tail -1 || true) + log " read_only after SET=${_ro1:-}" + # Restore + db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SET GLOBAL super_read_only=OFF;" >/dev/null || true + db_exec "${_DB_CONTAINER}" "${_DB_ROOT_PASSWORD}" -e "SET GLOBAL read_only=OFF;" >/dev/null || true + + if [[ "${_ro1}" == "1" ]]; then + log " RESULT: OK — read-only quiesce works (zero downtime). Restored to read-write." + else + log " RESULT: FAILED — read_only would not engage; backup falls back to STOP_DOCKER (downtime)." + fi + exit 0 +} +[[ "${DB_CHECK}" == "true" ]] && db_check + # ── Site availability probe ─────────────────────────────────────────────────── # Pings the site every PROBE_INTERVAL seconds during partition imaging. # Cache-busted via query param + no-cache headers so every request hits PHP/DB. From eb4a1d7761c9d8d32ecf1c4e668b4c028a3abde0 Mon Sep 17 00:00:00 2001 From: Andrew Baker Date: Sun, 31 May 2026 15:42:09 +0200 Subject: [PATCH 3/4] fix(backup): fall back to mysql client in container exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The container branch of db_exec hardcoded the `mariadb` client. MySQL and older MariaDB images ship only `mysql`, so `docker exec ... mariadb` failed with 'executable not found' and the read-only quiesce silently no-op'd — causing a fallback to STOP_DOCKER (downtime). Now tries mariadb then mysql, mirroring the native branch. Co-Authored-By: Claude Opus 4.8 (1M context) --- pi-image-backup.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pi-image-backup.sh b/pi-image-backup.sh index 4070b56..8d43696 100755 --- a/pi-image-backup.sh +++ b/pi-image-backup.sh @@ -364,8 +364,14 @@ db_exec() { # real MariaDB error on failure, while stdout still carries only query output. local _ef _rc; _ef=$(mktemp 2>/dev/null || echo "/tmp/pi2s3-dbexec.$$") if [[ -n "${_c}" ]]; then + # Try the `mariadb` client, falling back to `mysql` — older MariaDB and + # MySQL images ship only the `mysql` binary (no `mariadb`), so a hardcoded + # `mariadb` exec fails with "executable not found" and the quiesce silently + # no-ops. Mirrors the native branch below. docker exec -e "MYSQL_PWD=${_pw}" "${_c}" \ - mariadb -u root --batch --silent "$@" 2>"${_ef}" + mariadb -u root --batch --silent "$@" 2>"${_ef}" \ + || docker exec -e "MYSQL_PWD=${_pw}" "${_c}" \ + mysql -u root --batch --silent "$@" 2>"${_ef}" else MYSQL_PWD="${_pw}" mariadb -u root --batch --silent "$@" 2>"${_ef}" \ || MYSQL_PWD="${_pw}" mysql -u root --batch --silent "$@" 2>"${_ef}" From 05b99eb1058af759b1a48c3099941d1cbd190451 Mon Sep 17 00:00:00 2001 From: Andrew Baker Date: Sun, 31 May 2026 15:46:34 +0200 Subject: [PATCH 4/4] docs: changelog + --db-check in README/AGENTS/llms.txt Document the container mysql-client fallback fix and the new --db-check diagnostic mode across CHANGELOG (1.10.0 Fixed/Added), README options, AGENTS.md troubleshooting, and pi2s3.com/llms.txt. Co-Authored-By: Claude Opus 4.8 (1M context) --- AGENTS.md | 9 +++++++++ CHANGELOG.md | 6 ++++++ README.md | 1 + website/llms.txt | 1 + 4 files changed, 17 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index deb6342..cfd13b3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -98,6 +98,15 @@ auto-detecting it whether it runs in Docker or natively on the host: You normally don't touch any of this. Only edit `config.env` (`DB_ENGINE`, `DB_ROOT_PASSWORD`, `DB_PG_USER`) if the user reports the DB wasn't detected. +To confirm the quiesce will be zero-downtime *before* relying on it, run the +diagnostic (no imaging, no downtime): +```bash +bash ~/pi2s3/pi-image-backup.sh --db-check +``` +It prints the detected engine/container, the connecting user, and whether +`read_only` actually engages. If it reports FAILED, the backup would fall back +to stopping containers (brief downtime) — surface that to the user. + ## Scheduling `install.sh` installs a nightly cron job (2am by default). To change it, set diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e3952..7555fbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,12 @@ All notable changes to pi2s3 are documented here. - **Native (non-Docker) database detection** (`lib/containers.sh`) — `DB_CONTAINER="auto"` now detects a database running **natively on the host** (`mariadbd`/`mysqld`/`postgres` processes), not just in Docker. Native MySQL/MariaDB previously fell back to a stop-the-service downtime; it now uses the zero-downtime path (set `DB_ROOT_PASSWORD` — there is no container env to read it from). Native PostgreSQL with peer auth needs no password. - **`DB_ENGINE` and `DB_PG_USER` config** (`config.env.example`) — `DB_ENGINE` (`auto` | `mysql` | `mariadb` | `postgres`) forces the engine for an explicit native install where auto-detection can't see a container. `DB_PG_USER` is the PostgreSQL superuser used for `CHECKPOINT`. - **`AGENTS.md` + `pi2s3.com/llms.txt`** — agent-facing instructions so an AI assistant (e.g. Claude) pointed at the repo or the site can install pi2s3 and run a backup unattended ("backup my site with pi2s3"). +- **`--db-check` diagnostic mode** (`pi-image-backup.sh`) — reports DB detection (engine, container/native), connecting user, version, and whether the read-only quiesce actually engages, then exits without imaging. Briefly toggles `read_only` and restores it (zero downtime). Use it to confirm a backup will be zero-downtime before relying on it. + +### Fixed + +- **Container DB client fell back to `mysql`** (`pi-image-backup.sh`) — `db_exec` hardcoded the `mariadb` client for the Docker path. MySQL images (and MariaDB before 10.5) ship only the `mysql` binary, so `docker exec … mariadb` failed with "executable not found", the read-only quiesce silently no-op'd, and the backup fell back to `STOP_DOCKER` — causing avoidable downtime. Now tries `mariadb` then `mysql`, mirroring the native branch. (Found in production: a MySQL 8.0 analytics container was auto-detected and the quiesce silently failed.) +- **Silent quiesce failures now logged** (`pi-image-backup.sh`) — `db_exec` captured stderr to `/dev/null`, so a failed quiesce gave no cause. It now captures stderr into `_DB_LAST_ERR` and `db_lock_mysql` logs the real MariaDB/MySQL error in its fallback branch. ### Changed diff --git a/README.md b/README.md index bb7a6d0..67d40b4 100644 --- a/README.md +++ b/README.md @@ -355,6 +355,7 @@ pi-image-backup.sh [options] --stale-check Ntfy alert if latest backup is older than STALE_BACKUP_HOURS --cost Show S3 storage used and estimated monthly cost --no-stop-docker Skip Docker stop (for daytime test runs with no downtime) + --db-check Diagnose DB detection + read-only quiesce, then exit (no imaging) --help Show usage ``` diff --git a/website/llms.txt b/website/llms.txt index 8884fde..b9b54f0 100644 --- a/website/llms.txt +++ b/website/llms.txt @@ -28,6 +28,7 @@ Never run `pi-image-restore.sh` unless explicitly asked — it overwrites a disk - `bash pi-image-backup.sh --force` — run a backup now - `bash pi-image-backup.sh --dry-run` — show the plan, upload nothing +- `bash pi-image-backup.sh --db-check` — confirm DB detection + zero-downtime quiesce (no imaging) - `bash pi-image-backup.sh --list` / `--verify` — list / verify S3 backups - `bash pi-image-backup.sh --cost` — S3 usage and estimated monthly cost - `bash install.sh --iam-policy` — print the minimum AWS IAM policy