diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8cdd826fbd37a..f11bb67b63173 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4253,7 +4253,10 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows recovery_target_time, or recovery_target_xid can be used; if more than one of these is specified in the configuration file, an error will be raised. - These parameters can only be set at server start. + These parameters can be changed by reloading the server configuration + without requiring a restart, except for + recovery_target_timeline which can only be set at + server start. @@ -4274,6 +4277,9 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows Technically, this is a string parameter, but 'immediate' is currently the only allowed value. + + This parameter can be changed by reloading the server configuration. + @@ -4288,6 +4294,9 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows This parameter specifies the named restore point (created with pg_create_restore_point()) to which recovery will proceed. + + This parameter can be changed by reloading the server configuration. + @@ -4314,6 +4323,29 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows numeric offset from UTC, or you can write a full time zone name, e.g., Europe/Helsinki not EEST. + + + This parameter can be changed by reloading the server configuration + (for example, via pg_reload_conf()) without + requiring a server restart. + + + + The value is re-parsed under the current + setting on each reload. To avoid + ambiguity, use an explicit time zone offset, e.g., + '2024-01-15 10:30:00+00', to ensure deterministic + behavior regardless of the server's timezone configuration. + + + + When is set to + pause and recovery is paused at the target, + changing recovery_target_time to a later value + and reloading the configuration will automatically resume WAL replay + toward the new target. Setting it to an equal or earlier value while + paused is a no-op. + @@ -4349,6 +4381,9 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows The effective transaction ID (the 32-bit portion) must be greater than or equal to 3. + + This parameter can be changed by reloading the server configuration. + @@ -4366,6 +4401,13 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows parameter is parsed using the system data type pg_lsn. + + This parameter can be changed by reloading the server configuration. + When is set to + pause and recovery is paused at the target, + advancing the LSN to a later value and reloading the configuration + will automatically resume WAL replay toward the new target. + @@ -4395,6 +4437,10 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows having exactly the target WAL location (LSN), commit time, or transaction ID, respectively, will be included in the recovery. Default is on. + + This parameter can be changed by reloading the server configuration. + The new value takes effect on the next recovery stop evaluation. + @@ -4430,6 +4476,9 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows a state that itself was reached after a point-in-time recovery. See for discussion. + + This parameter can only be set at server start. + @@ -4457,9 +4506,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows using pg_wal_replay_resume() (see ), which then causes recovery to end. If this recovery target is not the - desired stopping point, then shut down the server, change the - recovery target settings to a later target and restart to - continue recovery. + desired stopping point, change the recovery target settings to a + later target and reload the configuration to continue recovery. The shutdown setting is useful to have the instance ready @@ -4482,6 +4530,12 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows a setting of pause will act the same as promote. + + This parameter can be changed by reloading the server configuration. + For example, changing from pause to + promote while recovery is paused at the target + will trigger promotion. + In any case, if a recovery target is configured but the archive recovery ends before the target is reached, the server will shut down diff --git a/implementation-plan-recovery-target-time-no-restart.md b/implementation-plan-recovery-target-time-no-restart.md new file mode 100644 index 0000000000000..108a3d5ac677f --- /dev/null +++ b/implementation-plan-recovery-target-time-no-restart.md @@ -0,0 +1,583 @@ +# Implementation Plan: Make `recovery_target_*` Parameters Reloadable Without Restart + +**Version**: 3 +**Last updated**: 2026-03-26 + +## Implementation Progress + +### Phase 1: recovery_target_time (Complete) +- [x] 1.1 Change recovery_target_time GUC context to PGC_SIGHUP +- [x] 1.2 Add target_type_conflict_exists() helper function +- [x] 1.3 Add parse_recovery_target_time_safe() shared helper +- [x] 1.4 Rewrite check_recovery_target_time() with safe parsing and conflict check +- [x] 1.5 Simplify assign_recovery_target_time() (remove error_throwing) +- [x] 1.6 Update validateRecoveryParameters() to use shared parser +- [x] 1.7 Add RecoveryPauseReason enum for target-advance detection +- [x] 1.8 Add redo label and goto for replay loop re-entry + +### Phase 2: All other recovery_target_* parameters (Complete) +- [x] 2.1 Change recovery_target to PGC_SIGHUP, fix hooks +- [x] 2.2 Change recovery_target_lsn to PGC_SIGHUP, fix hooks +- [x] 2.3 Change recovery_target_xid to PGC_SIGHUP, fix hooks +- [x] 2.4 Change recovery_target_name to PGC_SIGHUP, fix hooks +- [x] 2.5 Change recovery_target_inclusive to PGC_SIGHUP (no hooks needed) +- [x] 2.6 Change recovery_target_action to PGC_SIGHUP (no hooks needed) +- [x] 2.7 Remove error_multiple_recovery_targets() (replaced by check hooks) +- [x] 2.8 Generalize resume logic: LSN, XID, NAME target changes +- [x] 2.9 Handle recovery_target_action change (pause→promote/shutdown) + +### Documentation +- [x] Update config.sgml for all reloadable parameters + +### Testing (14 assertions, 8 scenarios — ALL PASS) +- [x] TEST 1: Pause at T1, advance to T2, verify resume and re-pause +- [x] TEST 2: Same target reload is no-op +- [x] TEST 3: Earlier target reload is no-op +- [x] TEST 4: pg_wal_replay_resume() promotes (not re-enter replay) +- [x] TEST 5: Mutual exclusion of recovery target types +- [x] TEST 6: LSN target advance — pause, advance LSN, verify resume +- [x] TEST 7: Action change pause→promote while paused +- [x] TEST 8: recovery_target_inclusive toggle accepted on reload + +### Parameters NOT made reloadable (by design) +- recovery_target_timeline — changing timeline mid-recovery is unsafe + +### Integration +- [x] Build and compile successfully (Docker debian:bookworm) +- [x] All 14 TAP tests pass in Docker container +- [x] Post testing evidence to PR #22 + +## Changelog + +### v2 (2026-03-26) — Post-review revision + +Incorporates feedback from three expert reviews. Key changes from v1: + +1. **Reframed scope** (Review 2): This is not a generic "GUC reloadability" patch. + It is specifically: "Allow moving a paused PITR time-based stop point forward + via config reload during recovery." The feature is scoped to + `recovery_target_action = 'pause'` + `recovery_target_time` only. + +2. **Split into two patches** (Review 2): Patch 1 is pure GUC safety cleanup + (useful on its own). Patch 2 adds paused-target-forward-resume semantics. + +3. **Fixed unsafe timestamp parsing** (Reviews 1, 3): Removed `DirectFunctionCall3(timestamptz_in)` + from check hook — it throws ERROR on bad input, which corrupts GUC state. + Now uses safe manual `ParseDateTime`/`DecodeDateTime`/`tm2timestamp` path + throughout, storing parsed `TimestampTz` in `*extra`. + +4. **Fixed fragile mutual-exclusion check** (Review 3): Check hook now inspects + raw GUC string variables (`recovery_target_string`, `recovery_target_lsn_string`, + etc.) instead of the derived `recoveryTarget` enum, avoiding order-dependent + intermediate state during SIGHUP processing. + +5. **Replaced `goto` with outer loop** (Review 3): Instead of `goto redo_loop_start`, + use `while (!reachedFinalTarget)` wrapping the replay loop. More code but + more reviewable. + +6. **Added explicit "paused-at-target" state** (Review 2): New recovery sub-state + instead of implicitly toggling `reachedRecoveryTarget`. Makes the state + transition from "target reached" to "target superseded, resume replay" explicit. + +7. **Simplified backward-target semantics** (Reviews 1, 2): Removed check-hook + state inspection (`GetLatestXTime()` is unsafe in check hooks). New rule: + accept any valid timestamp; during active replay a past target means "stop at + next commit"; while paused at target an equal-or-earlier value is a no-op. + +8. **Kept startup validation authoritative** (Review 3): `validateRecoveryParameters()` + remains the canonical startup path. Replaced the `Assert` idea with a shared + parsing helper used by both startup and the check hook. + +9. **Added missing analysis**: `recovery_min_apply_delay` interaction, + `pg_control` / crash-recovery safety, timezone-on-reload semantics, + mid-transaction stop safety (Review 3). + +10. **Expanded test plan** (Reviews 2, 3): Added cases for no-change reload, + `pg_wal_replay_resume()` interaction, timezone reload, backward move during + active replay, and post-promotion no-op. + +### v1 (2026-03-26) — Initial draft + +Initial analysis and implementation plan. + +--- + +## Problem Statement + +Operators managing standby servers doing point-in-time recovery (PITR) must +restart PostgreSQL to change `recovery_target_time`. The concrete operator +story this patch enables: + +1. Standby is in archive/PITR recovery +2. `recovery_target_action = 'pause'`, `recovery_target_time` is set +3. Recovery pauses at the target +4. Admin updates `recovery_target_time` to a later value +5. `pg_reload_conf()` — startup process resumes WAL replay toward the new target + +This is scoped to time-based targets with pause action. Other target types +remain `PGC_POSTMASTER`. Changing target *type* (e.g., time to XID) still +requires a restart. + +## Current Architecture + +### GUC Definition (`src/backend/utils/misc/guc_parameters.dat:2407`) + +```perl +{ name => 'recovery_target_time', type => 'string', context => 'PGC_POSTMASTER', + variable => 'recovery_target_time_string', + check_hook => 'check_recovery_target_time', + assign_hook => 'assign_recovery_target_time', +}, +``` + +### Key Variables (`src/backend/access/transam/xlogrecovery.c`) + +| Variable | Type | Purpose | +|----------|------|---------| +| `recovery_target_time_string` | `char *` | Raw GUC string value | +| `recoveryTargetTime` | `TimestampTz` | Parsed timestamp used in comparisons | +| `recoveryTarget` | `RecoveryTargetType` | Enum indicating which target type is active | +| `recoveryTargetInclusive` | `bool` | Whether the target is inclusive | +| `recoveryTargetAction` | `RecoveryTargetActionType` | What to do when target is reached | + +### Recovery Flow + +1. **Startup**: `StartupXLOG()` → `InitWalRecovery()` → `validateRecoveryParameters()` parses `recovery_target_time_string` into `recoveryTargetTime` (line 1105-1111) +2. **WAL Replay Loop** (line ~1614-1802): For each WAL record: + - `ProcessStartupProcInterrupts()` handles signals including SIGHUP for config reload + - `recoveryStopsBefore()` checks if this record's timestamp exceeds `recoveryTargetTime` (line 2651-2662) + - `recoveryStopsAfter()` does the same for post-apply checks + - If target reached: `reachedRecoveryTarget = true`, breaks out of loop +3. **Post-target** (line 1808-1840): Based on `recoveryTargetAction`: shutdown, pause, or promote + +### Why It Currently Requires Restart + +1. **`PGC_POSTMASTER` context** — GUC framework rejects SIGHUP changes +2. **Assign hook throws ERROR** (`error_multiple_recovery_targets()` at line 4784) — violates GUC contract; only safe because PGC_POSTMASTER limits this to startup (acknowledged as "broken by design" in comment at line 4776) +3. **One-time parsing** — `recoveryTargetTime` is parsed once during `validateRecoveryParameters()` and never refreshed +4. **No mechanism to resume paused recovery** when target changes + +--- + +## Patch Series + +### Patch 1: Make GUC Internally Safe for Reload + +Pure GUC cleanup. Useful on its own even if Patch 2 is deferred. Does not +promise resume-after-pause yet. + +#### 1.1 Change GUC Context to `PGC_SIGHUP` + +**File**: `src/backend/utils/misc/guc_parameters.dat` + +```perl +{ name => 'recovery_target_time', type => 'string', context => 'PGC_SIGHUP', + ... +}, +``` + +Only `recovery_target_time` changes. All other `recovery_target_*` parameters +remain `PGC_POSTMASTER`. + +#### 1.2 Fix Mutual-Exclusion Check (Move to Check Hook) + +**File**: `src/backend/access/transam/xlogrecovery.c` + +**Problem**: `assign_recovery_target_time()` calls `error_multiple_recovery_targets()` +which throws `ERROR`. With `PGC_SIGHUP`, this corrupts GUC state. + +**Solution**: Move the check into `check_recovery_target_time()`, which can +safely return `false`. Critically, check the **raw GUC string variables** +(`recovery_target_string`, `recovery_target_lsn_string`, +`recovery_target_name_string`, `recovery_target_xid_string`) rather than the +derived `recoveryTarget` enum, to avoid order-dependent intermediate state +during SIGHUP processing: + +```c +bool +check_recovery_target_time(char **newval, void **extra, GucSource source) +{ + if (strcmp(*newval, "") != 0) + { + /* + * Reject if a different recovery target type is already configured. + * Check raw GUC strings, not the derived recoveryTarget enum, to + * avoid sensitivity to GUC processing order during SIGHUP. + */ + if (target_type_conflict_exists(RECOVERY_TARGET_TIME)) + { + GUC_check_errdetail("Another recovery target type is already set."); + return false; + } + + /* ... safe timestamp parsing (see 1.3) ... */ + } + return true; +} +``` + +Introduce `target_type_conflict_exists()` helper that checks raw GUC strings. +Use this helper from both startup validation and all check hooks. + +Simplify the assign hook to be purely mechanical: + +```c +void +assign_recovery_target_time(const char *newval, void *extra) +{ + if (newval && strcmp(newval, "") != 0) + { + recoveryTarget = RECOVERY_TARGET_TIME; + recoveryTargetTime = *((TimestampTz *) extra); + } + else + recoveryTarget = RECOVERY_TARGET_UNSET; +} +``` + +#### 1.3 Safe Timestamp Parsing via `*extra` + +**File**: `src/backend/access/transam/xlogrecovery.c` + +**Critical safety rule**: Never call `DirectFunctionCall3(timestamptz_in, ...)` +inside a check hook. It throws `ereport(ERROR)` on bad input, which corrupts +GUC state during SIGHUP. Instead, use the manual safe-parsing path already +present in the existing check hook: + +```c +bool +check_recovery_target_time(char **newval, void **extra, GucSource source) +{ + if (strcmp(*newval, "") != 0) + { + /* mutual exclusion check (see 1.2) ... */ + + /* Safe timestamp parsing — no ereport(ERROR) on bad input */ + { + char *str = *newval; + fsec_t fsec; + struct pg_tm tt, *tm = &tt; + int tz; + int dtype, nf, dterr; + char *field[MAXDATEFIELDS]; + int ftype[MAXDATEFIELDS]; + char workbuf[MAXDATELEN + MAXDATEFIELDS]; + DateTimeErrorExtra dtextra; + TimestampTz timestamp; + TimestampTz *parsed_ts; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeDateTime(field, ftype, nf, + &dtype, tm, &fsec, &tz, &dtextra); + if (dterr != 0) + return false; + if (dtype != DTK_DATE) + return false; + if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0) + { + GUC_check_errdetail("Timestamp out of range: \"%s\".", str); + return false; + } + + /* Stash parsed value for the assign hook */ + parsed_ts = (TimestampTz *) guc_malloc(LOG, sizeof(TimestampTz)); + if (!parsed_ts) + return false; + *parsed_ts = timestamp; + *extra = parsed_ts; + } + } + return true; +} +``` + +Note: `guc_malloc`-allocated `*extra` is managed by the GUC framework — it +handles freeing previous values on reassignment. No manual free needed. + +#### 1.4 Shared Parsing Helper for Startup and Check Hook + +**File**: `src/backend/access/transam/xlogrecovery.c` + +Extract a shared helper `parse_recovery_target_time_safe()` used by both +`check_recovery_target_time()` and `validateRecoveryParameters()`. The startup +path in `validateRecoveryParameters()` remains authoritative — it is not +replaced by an Assert or removed. Both paths call the same function: + +```c +static bool +parse_recovery_target_time_safe(const char *str, TimestampTz *result) +{ + /* Manual ParseDateTime/DecodeDateTime/tm2timestamp — no ereport */ + ... + *result = timestamp; + return true; +} +``` + +#### 1.5 Timezone-on-Reload Semantics + +**Decision**: `recovery_target_time` is re-parsed under the current timezone on +every reload. This means if the timezone GUC changes and `recovery_target_time` +contains a zone-ambiguous string, the effective `TimestampTz` may change even if +the text is unchanged. + +This is the correct behavior: it matches how the parameter works at startup +(parsed after timezone is finalized). Document this explicitly and require +fully-qualified timestamps (with explicit timezone offset) in documentation +examples to avoid surprises. + +--- + +### Patch 2: Paused-Target-Forward-Resume Semantics + +Only applies when: +- `recovery_target_action = 'pause'` +- `recovery_target` type is `RECOVERY_TARGET_TIME` +- Recovery is still in progress (not promoted, not shut down) + +#### 2.1 Explicit Recovery Sub-State + +**File**: `src/backend/access/transam/xlogrecovery.c` + +Introduce a distinct "paused-at-target" state rather than implicitly toggling +`reachedRecoveryTarget`: + +```c +typedef enum +{ + RECOVERY_PAUSE_NONE, + RECOVERY_PAUSE_REQUESTED, /* pg_wal_replay_pause() */ + RECOVERY_PAUSE_AT_TARGET, /* recovery_target reached with action=pause */ + RECOVERY_PAUSE_TARGET_SUPERSEDED /* target changed, should resume */ +} RecoveryPauseReason; +``` + +When recovery pauses because a time target was reached with `action = pause`, +the startup process enters `RECOVERY_PAUSE_AT_TARGET`. On config reload, if a +new valid `recovery_target_time` is strictly later than `recoveryStopTime`, the +state transitions to `RECOVERY_PAUSE_TARGET_SUPERSEDED` and WAL replay resumes. + +This avoids ambiguity between "DBA called `pg_wal_replay_resume()`" (which +means proceed to promote) and "DBA changed the target time" (which means +continue replay toward new target). + +#### 2.2 Restructure Replay Loop (No `goto`) + +**File**: `src/backend/access/transam/xlogrecovery.c` + +Instead of `goto redo_loop_start`, wrap the existing replay loop in an outer +`while (!reachedFinalTarget)` loop: + +```c +bool reachedFinalTarget = false; + +while (!reachedFinalTarget) +{ + bool reachedRecoveryTarget = false; + + /* ... existing WAL replay do/while loop ... */ + + if (reachedRecoveryTarget) + { + switch (recoveryTargetAction) + { + case RECOVERY_TARGET_ACTION_SHUTDOWN: + proc_exit(3); + + case RECOVERY_TARGET_ACTION_PAUSE: + SetRecoveryPause(true); + recoveryPausesHere(true); + + /* + * Distinguish: did we unpause because target changed, + * or because pg_wal_replay_resume() was called? + */ + if (GetRecoveryPauseReason() == RECOVERY_PAUSE_TARGET_SUPERSEDED) + { + /* Target moved forward — continue replay */ + ereport(LOG, + (errmsg("recovery target time changed to %s, resuming WAL replay", + timestamptz_to_str(recoveryTargetTime)))); + continue; /* outer while loop */ + } + /* else: pg_wal_replay_resume() — fall through to promote */ + pg_fallthrough; + + case RECOVERY_TARGET_ACTION_PROMOTE: + break; + } + reachedFinalTarget = true; + } +} +``` + +#### 2.3 Target-Change Detection in Pause Loop + +**File**: `src/backend/access/transam/xlogrecovery.c` + +Inside `recoveryPausesHere()`, after `ProcessStartupProcInterrupts()`, detect +whether `recoveryTargetTime` has moved beyond `recoveryStopTime`: + +```c +static void +recoveryPausesHere(bool endOfRecovery) +{ + /* ... existing checks ... */ + + TimestampTz pausedAtTime = recoveryStopTime; + + while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED) + { + ProcessStartupProcInterrupts(); + if (CheckForStandbyTrigger()) + return; + + /* + * If recovery_target_time was changed via SIGHUP to a value + * strictly later than where we paused, resume replay. + */ + if (endOfRecovery && + recoveryTarget == RECOVERY_TARGET_TIME && + recoveryTargetTime > pausedAtTime) + { + SetRecoveryPause(false); + SetRecoveryPauseReason(RECOVERY_PAUSE_TARGET_SUPERSEDED); + ereport(LOG, + (errmsg("recovery target time advanced, resuming WAL replay"))); + break; + } + + ConfirmRecoveryPaused(); + ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, + 1000, WAIT_EVENT_RECOVERY_PAUSE); + } + ConditionVariableCancelSleep(); +} +``` + +#### 2.4 Backward and Equal Target Semantics + +No check-hook state inspection (check hooks must not read shared memory or +runtime state like `GetLatestXTime()`). Rules: + +| State | New target vs current | Behavior | +|-------|----------------------|----------| +| Active replay, before target reached | Any valid timestamp | Accepted. `recoveryStopsBefore()` evaluates normally. Past value = stop at next commit record. | +| Paused at target | Strictly later | Resume replay toward new target | +| Paused at target | Equal or earlier | No-op (remains paused). No error, no warning. | +| After promotion / shutdown | Any | No runtime effect (config is accepted but inert) | + +**Mid-transaction safety**: `recoveryStopsBefore()`/`recoveryStopsAfter()` only +evaluate at commit/abort records (this invariant is already enforced and does +not need changes). A backward target during active replay will not stop +mid-transaction — it stops at the next transaction boundary. + +#### 2.5 Logging Strategy + +Keep assign hook purely mechanical. Meaningful LOG messages go in recovery +control logic where the runtime consequence is known: + +| Event | Where | Message | +|-------|-------|---------| +| Target time advanced, resuming | `recoveryPausesHere()` | `"recovery target time advanced, resuming WAL replay"` | +| Replay stopped at new target | `recoveryStopsBefore()` (existing) | Already logged | +| Reload accepted, no runtime effect | — | No log (avoid noise) | + +--- + +## Additional Analysis (Added in v2) + +### `recovery_min_apply_delay` Interaction + +The delay is applied in `recoveryApplyDelay()` which runs *before* +`recoveryStopsBefore()` in the replay loop (line 1765). This means: + +- If a delayed replica has its target time changed, the delay still applies to + each record before the stop-check. No conflict. +- A forward target change while paused resumes replay, and subsequent records + will still be delayed normally. + +No code changes needed, but add a test case. + +### `pg_control` and Crash-Recovery Safety + +When recovery pauses at a target, `pg_control` records the recovery state +(including the last replayed LSN). If the server crashes while paused: + +- On restart, recovery re-reads `postgresql.conf` (including the potentially + updated `recovery_target_time`) and replays from the last checkpoint. +- Since `recovery_target_time` is now `PGC_SIGHUP`, the startup path reads the + current config file value. If the admin changed it before the crash, the new + value is used on restart — which is the correct behavior. +- If the admin changed it *after* the crash (before restart), same result. + +No special handling needed. The existing crash-recovery path is correct. + +### `pg_wal_replay_resume()` Interaction + +If an admin calls `pg_wal_replay_resume()` while paused at a target, the +current behavior is to proceed to promotion. This must be preserved. The new +`RecoveryPauseReason` state distinguishes between "resume because target changed" +and "resume because admin called `pg_wal_replay_resume()`", ensuring the two +paths remain separate. + +If an admin changes the target *and* calls `pg_wal_replay_resume()` in the same +reload cycle: the resume wins (pause state is cleared), and the target change +takes effect for subsequent stop checks if replay continues. + +--- + +## Files Modified + +| File | Change | +|------|--------| +| `src/backend/utils/misc/guc_parameters.dat` | `PGC_POSTMASTER` → `PGC_SIGHUP` | +| `src/backend/access/transam/xlogrecovery.c` | Fix hooks, safe parsing, shared helper, resume-on-change logic, pause reason state | +| `src/include/access/xlogrecovery.h` | `RecoveryPauseReason` enum, helper declarations | +| `doc/src/sgml/config.sgml` | Documentation update (reloadable, timezone note, behavior) | +| `src/test/recovery/t/0XX_recovery_target_reload.pl` | New TAP test | + +## Estimated Size + +~500-700 lines changed/added across all files (revised upward from v1 estimate +of 300-500). + +## Test Plan + +### Core scenarios +1. Pause at target, reload with later time, verify resume and re-pause +2. Pause at target, reload with same time, verify no-op +3. Pause at target, reload with earlier time, verify no-op (stays paused) +4. Active replay before target, reload with earlier time, verify stops at next commit +5. Active replay before target, reload with later time, verify continues to new target + +### Interaction tests +6. `pg_wal_replay_resume()` while paused at target — verify promotes (not re-enter replay) +7. Target change + `pg_wal_replay_resume()` in same cycle — verify correct precedence +8. `recovery_min_apply_delay` interaction — verify delay still applies after target change +9. Timezone reload — verify reparsing under new timezone +10. Non-time target already set (`recovery_target_xid`) — verify `recovery_target_time` rejected + +### Edge cases +11. Reload with no change — verify no spurious log messages +12. Empty string (unset) while paused at target — verify behavior +13. After promotion — verify reload accepted but inert +14. Crash while paused, restart with new target — verify correct recovery + +## Risks and Considerations + +1. **Race conditions**: Not a concern. The startup process is single-threaded; + SIGHUP is processed synchronously via `ProcessStartupProcInterrupts()`. + +2. **Timezone on reload**: Re-parsed under current timezone. Documented. Users + should use explicit timezone offsets in timestamps for deterministic behavior. + +3. **Only `pause` benefits at runtime**: `shutdown` exits, `promote` ends + recovery. Runtime resume semantics are explicitly limited to `pause` action. + +4. **Backward compatibility**: Strictly additive. Existing behavior unchanged. + +5. **Review strategy**: Two-patch series. Patch 1 is independently useful GUC + hygiene. Patch 2 is the behavioral change that will attract review scrutiny. diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 6d2c4a86b9600..32ebcc6b21d8c 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -355,6 +355,9 @@ static char *getRecoveryStopReason(void); static void recoveryPausesHere(bool endOfRecovery); static bool recoveryApplyDelay(XLogReaderState *record); static void ConfirmRecoveryPaused(void); +static bool parse_recovery_target_time_safe(const char *str, + TimestampTz *result); +static bool target_type_conflict_exists(RecoveryTargetType this_target); static XLogRecord *ReadRecord(XLogPrefetcher *xlogprefetcher, int emode, bool fetching_ckpt, @@ -1104,10 +1107,12 @@ validateRecoveryParameters(void) */ if (recoveryTarget == RECOVERY_TARGET_TIME) { - recoveryTargetTime = DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in, - CStringGetDatum(recovery_target_time_string), - ObjectIdGetDatum(InvalidOid), - Int32GetDatum(-1))); + if (!parse_recovery_target_time_safe(recovery_target_time_string, + &recoveryTargetTime)) + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse recovery target time \"%s\"", + recovery_target_time_string))); } /* @@ -1637,6 +1642,7 @@ PerformWalRecovery(void) XLogRecoveryCtl->recoveryLastXTime = 0; XLogRecoveryCtl->currentChunkStartTime = 0; XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED; + XLogRecoveryCtl->recoveryPauseReason = RECOVERY_PAUSE_NONE; SpinLockRelease(&XLogRecoveryCtl->info_lck); /* Also ensure XLogReceiptTime has a sane value */ @@ -1706,6 +1712,7 @@ PerformWalRecovery(void) /* * main redo apply loop */ +redo: do { if (!StandbyMode) @@ -1829,9 +1836,32 @@ PerformWalRecovery(void) case RECOVERY_TARGET_ACTION_PAUSE: SetRecoveryPause(true); + SetRecoveryPauseReason(RECOVERY_PAUSE_AT_TARGET); recoveryPausesHere(true); - /* drop into promote */ + /* + * Check why we unpaused: + * - RECOVERY_PAUSE_AT_TARGET: target was advanced, + * resume replay + * - RECOVERY_PAUSE_NONE: pg_wal_replay_resume() or + * action change + */ + if (GetRecoveryPauseReason() == RECOVERY_PAUSE_AT_TARGET) + { + ereport(LOG, + (errmsg("resuming WAL replay toward new recovery target"))); + reachedRecoveryTarget = false; + goto redo; + } + + /* + * If recovery_target_action was changed to 'shutdown' + * while we were paused, honor the new action. + */ + if (recoveryTargetAction == RECOVERY_TARGET_ACTION_SHUTDOWN) + proc_exit(3); + + /* pg_wal_replay_resume() or action changed to promote */ pg_fallthrough; case RECOVERY_TARGET_ACTION_PROMOTE: @@ -2897,6 +2927,14 @@ getRecoveryStopReason(void) static void recoveryPausesHere(bool endOfRecovery) { + /* Capture the paused-at state for later comparison */ + TimestampTz pausedAtTime = recoveryStopTime; + XLogRecPtr pausedAtLSN = recoveryStopLSN; + TransactionId pausedAtXid = recoveryStopXid; + char pausedAtName[MAXFNAMELEN]; + + strlcpy(pausedAtName, recoveryStopName, MAXFNAMELEN); + /* Don't pause unless users can connect! */ if (!LocalHotStandbyActive) return; @@ -2921,6 +2959,61 @@ recoveryPausesHere(bool endOfRecovery) if (CheckForStandbyTrigger()) return; + /* + * If a recovery target parameter was changed via SIGHUP while + * paused at the target, check if we should resume replay. + */ + if (endOfRecovery) + { + bool should_resume = false; + + /* + * Check if recovery_target_action was changed away from + * 'pause'. If so, we should unpause and let the post-target + * action logic handle the new action (promote or shutdown). + */ + if (recoveryTargetAction != RECOVERY_TARGET_ACTION_PAUSE) + { + SetRecoveryPause(false); + /* Don't set RECOVERY_PAUSE_AT_TARGET -- let caller handle action change */ + ereport(LOG, + (errmsg("recovery_target_action changed, unpausing recovery"))); + break; + } + + /* Check if the recovery target itself was advanced */ + switch (recoveryTarget) + { + case RECOVERY_TARGET_TIME: + if (timestamptz_cmp_internal(recoveryTargetTime, pausedAtTime) > 0) + should_resume = true; + break; + case RECOVERY_TARGET_LSN: + if (recoveryTargetLSN > pausedAtLSN) + should_resume = true; + break; + case RECOVERY_TARGET_XID: + if (recoveryTargetXid != pausedAtXid) + should_resume = true; + break; + case RECOVERY_TARGET_NAME: + if (strcmp(recoveryTargetName, pausedAtName) != 0) + should_resume = true; + break; + default: + break; + } + + if (should_resume) + { + SetRecoveryPause(false); + SetRecoveryPauseReason(RECOVERY_PAUSE_AT_TARGET); + ereport(LOG, + (errmsg("recovery target changed, resuming WAL replay"))); + break; + } + } + /* * If recovery pause is requested then set it paused. While we are in * the loop, user might resume and pause again so set this every time. @@ -3065,7 +3158,10 @@ SetRecoveryPause(bool recoveryPause) SpinLockAcquire(&XLogRecoveryCtl->info_lck); if (!recoveryPause) + { XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED; + XLogRecoveryCtl->recoveryPauseReason = RECOVERY_PAUSE_NONE; + } else if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED) XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED; @@ -3089,6 +3185,32 @@ ConfirmRecoveryPaused(void) SpinLockRelease(&XLogRecoveryCtl->info_lck); } +/* + * Get the reason for the current recovery pause. + */ +RecoveryPauseReason +GetRecoveryPauseReason(void) +{ + RecoveryPauseReason reason; + + SpinLockAcquire(&XLogRecoveryCtl->info_lck); + reason = XLogRecoveryCtl->recoveryPauseReason; + SpinLockRelease(&XLogRecoveryCtl->info_lck); + + return reason; +} + +/* + * Set the reason for the current recovery pause. + */ +void +SetRecoveryPauseReason(RecoveryPauseReason reason) +{ + SpinLockAcquire(&XLogRecoveryCtl->info_lck); + XLogRecoveryCtl->recoveryPauseReason = reason; + SpinLockRelease(&XLogRecoveryCtl->info_lck); +} + /* * Attempt to read the next XLOG record. @@ -4767,28 +4889,16 @@ check_primary_slot_name(char **newval, void **extra, GucSource source) * may be set. Setting a second one results in an error. The global variable * recoveryTarget tracks which kind of recovery target was chosen. Other * variables store the actual target value (for example a string or a xid). - * The assign functions of the parameters check whether a competing parameter - * was already set. But we want to allow setting the same parameter multiple - * times. We also want to allow unsetting a parameter and setting a different - * one, so we unset recoveryTarget when the parameter is set to an empty - * string. - * - * XXX this code is broken by design. Throwing an error from a GUC assign - * hook breaks fundamental assumptions of guc.c. So long as all the variables - * for which this can happen are PGC_POSTMASTER, the consequences are limited, - * since we'd just abort postmaster startup anyway. Nonetheless it's likely - * that we have odd behaviors such as unexpected GUC ordering dependencies. + * The check hooks use target_type_conflict_exists() to detect conflicts by + * inspecting raw GUC string variables, which is safe during SIGHUP + * processing when multiple GUC variables may be updated in any order. + * The assign hooks are purely mechanical: they set the derived variables + * from the validated values. We allow setting the same parameter multiple + * times, and unsetting a parameter (by setting it to an empty string) resets + * recoveryTarget to RECOVERY_TARGET_UNSET, allowing a different target type + * to be set. */ -pg_noreturn static void -error_multiple_recovery_targets(void) -{ - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("multiple recovery targets specified"), - errdetail("At most one of \"recovery_target\", \"recovery_target_lsn\", \"recovery_target_name\", \"recovery_target_time\", \"recovery_target_xid\" may be set."))); -} - /* * GUC check_hook for recovery_target */ @@ -4800,6 +4910,14 @@ check_recovery_target(char **newval, void **extra, GucSource source) GUC_check_errdetail("The only allowed value is \"immediate\"."); return false; } + if (strcmp(*newval, "") != 0) + { + if (target_type_conflict_exists(RECOVERY_TARGET_IMMEDIATE)) + { + GUC_check_errdetail("Cannot set recovery_target when another recovery target type is already set."); + return false; + } + } return true; } @@ -4809,10 +4927,6 @@ check_recovery_target(char **newval, void **extra, GucSource source) void assign_recovery_target(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_IMMEDIATE) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) recoveryTarget = RECOVERY_TARGET_IMMEDIATE; else @@ -4831,6 +4945,12 @@ check_recovery_target_lsn(char **newval, void **extra, GucSource source) XLogRecPtr *myextra; ErrorSaveContext escontext = {T_ErrorSaveContext}; + if (target_type_conflict_exists(RECOVERY_TARGET_LSN)) + { + GUC_check_errdetail("Cannot set recovery_target_lsn when another recovery target type is already set."); + return false; + } + lsn = pg_lsn_in_safe(*newval, (Node *) &escontext); if (escontext.error_occurred) return false; @@ -4850,10 +4970,6 @@ check_recovery_target_lsn(char **newval, void **extra, GucSource source) void assign_recovery_target_lsn(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_LSN) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_LSN; @@ -4876,6 +4992,14 @@ check_recovery_target_name(char **newval, void **extra, GucSource source) "recovery_target_name", MAXFNAMELEN - 1); return false; } + if (strcmp(*newval, "") != 0) + { + if (target_type_conflict_exists(RECOVERY_TARGET_NAME)) + { + GUC_check_errdetail("Cannot set recovery_target_name when another recovery target type is already set."); + return false; + } + } return true; } @@ -4885,10 +5009,6 @@ check_recovery_target_name(char **newval, void **extra, GucSource source) void assign_recovery_target_name(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_NAME) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_NAME; @@ -4899,19 +5019,86 @@ assign_recovery_target_name(const char *newval, void *extra) } /* - * GUC check_hook for recovery_target_time + * parse_recovery_target_time_safe + * + * Safely parse a timestamp string into a TimestampTz without throwing + * errors. Returns true on success, false on failure. + * Used by both check_recovery_target_time() and validateRecoveryParameters(). + */ +static bool +parse_recovery_target_time_safe(const char *str, TimestampTz *result) +{ + fsec_t fsec; + struct pg_tm tt, + *tm = &tt; + int tz; + int dtype; + int nf; + int dterr; + char *field[MAXDATEFIELDS]; + int ftype[MAXDATEFIELDS]; + char workbuf[MAXDATELEN + MAXDATEFIELDS]; + DateTimeErrorExtra dtextra; + TimestampTz timestamp; + + dterr = ParseDateTime(str, workbuf, sizeof(workbuf), + field, ftype, MAXDATEFIELDS, &nf); + if (dterr == 0) + dterr = DecodeDateTime(field, ftype, nf, + &dtype, tm, &fsec, &tz, &dtextra); + if (dterr != 0) + return false; + if (dtype != DTK_DATE) + return false; + if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0) + return false; + + *result = timestamp; + return true; +} + +/* + * target_type_conflict_exists + * + * Check if another recovery target type is already configured by inspecting + * raw GUC string variables. This avoids dependency on the derived + * recoveryTarget enum, which may be in an intermediate state during SIGHUP + * processing when multiple GUC variables are being updated. * - * The interpretation of the recovery_target_time string can depend on the - * time zone setting, so we need to wait until after all GUC processing is - * done before we can do the final parsing of the string. This check function - * only does a parsing pass to catch syntax errors, but we store the string - * and parse it again when we need to use it. + * Returns true if a conflict exists (another target type is set). + */ +static bool +target_type_conflict_exists(RecoveryTargetType this_target) +{ + if (this_target != RECOVERY_TARGET_IMMEDIATE && + recovery_target_string && strcmp(recovery_target_string, "") != 0) + return true; + if (this_target != RECOVERY_TARGET_LSN && + recovery_target_lsn_string && strcmp(recovery_target_lsn_string, "") != 0) + return true; + if (this_target != RECOVERY_TARGET_NAME && + recovery_target_name_string && strcmp(recovery_target_name_string, "") != 0) + return true; + if (this_target != RECOVERY_TARGET_TIME && + recovery_target_time_string && strcmp(recovery_target_time_string, "") != 0) + return true; + if (this_target != RECOVERY_TARGET_XID && + recovery_target_xid_string && strcmp(recovery_target_xid_string, "") != 0) + return true; + return false; +} + +/* + * GUC check_hook for recovery_target_time */ bool check_recovery_target_time(char **newval, void **extra, GucSource source) { if (strcmp(*newval, "") != 0) { + TimestampTz timestamp; + TimestampTz *parsed_ts; + /* reject some special values */ if (strcmp(*newval, "now") == 0 || strcmp(*newval, "today") == 0 || @@ -4922,39 +5109,29 @@ check_recovery_target_time(char **newval, void **extra, GucSource source) } /* - * parse timestamp value (see also timestamptz_in()) + * Reject if a different recovery target type is already configured. + * Check raw GUC strings, not the derived recoveryTarget enum, to + * avoid sensitivity to GUC processing order during SIGHUP. */ + if (target_type_conflict_exists(RECOVERY_TARGET_TIME)) { - char *str = *newval; - fsec_t fsec; - struct pg_tm tt, - *tm = &tt; - int tz; - int dtype; - int nf; - int dterr; - char *field[MAXDATEFIELDS]; - int ftype[MAXDATEFIELDS]; - char workbuf[MAXDATELEN + MAXDATEFIELDS]; - DateTimeErrorExtra dtextra; - TimestampTz timestamp; - - dterr = ParseDateTime(str, workbuf, sizeof(workbuf), - field, ftype, MAXDATEFIELDS, &nf); - if (dterr == 0) - dterr = DecodeDateTime(field, ftype, nf, - &dtype, tm, &fsec, &tz, &dtextra); - if (dterr != 0) - return false; - if (dtype != DTK_DATE) - return false; - - if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0) - { - GUC_check_errdetail("Timestamp out of range: \"%s\".", str); - return false; - } + GUC_check_errdetail("Cannot set recovery_target_time when another recovery target type is already set."); + return false; + } + + /* Safe timestamp parsing — no ereport(ERROR) on bad input */ + if (!parse_recovery_target_time_safe(*newval, ×tamp)) + { + GUC_check_errdetail("Invalid value for recovery_target_time: \"%s\".", *newval); + return false; } + + /* Stash parsed value for the assign hook via GUC extra mechanism */ + parsed_ts = (TimestampTz *) guc_malloc(LOG, sizeof(TimestampTz)); + if (!parsed_ts) + return false; + *parsed_ts = timestamp; + *extra = parsed_ts; } return true; } @@ -4965,12 +5142,11 @@ check_recovery_target_time(char **newval, void **extra, GucSource source) void assign_recovery_target_time(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_TIME) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) + { recoveryTarget = RECOVERY_TARGET_TIME; + recoveryTargetTime = *((TimestampTz *) extra); + } else recoveryTarget = RECOVERY_TARGET_UNSET; } @@ -5048,6 +5224,12 @@ check_recovery_target_xid(char **newval, void **extra, GucSource source) char *endp; char *val; + if (target_type_conflict_exists(RECOVERY_TARGET_XID)) + { + GUC_check_errdetail("Cannot set recovery_target_xid when another recovery target type is already set."); + return false; + } + errno = 0; /* @@ -5093,10 +5275,6 @@ check_recovery_target_xid(char **newval, void **extra, GucSource source) void assign_recovery_target_xid(const char *newval, void *extra) { - if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_XID) - error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) { recoveryTarget = RECOVERY_TARGET_XID; diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 0c9854ad8fc05..11b8f65bb2cf3 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -2367,7 +2367,7 @@ assign_hook => 'assign_recovery_prefetch', }, -{ name => 'recovery_target', type => 'string', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target', type => 'string', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Set to "immediate" to end recovery as soon as a consistent state is reached.', variable => 'recovery_target_string', boot_val => '""', @@ -2375,20 +2375,20 @@ assign_hook => 'assign_recovery_target', }, -{ name => 'recovery_target_action', type => 'enum', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target_action', type => 'enum', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Sets the action to perform upon reaching the recovery target.', variable => 'recoveryTargetAction', boot_val => 'RECOVERY_TARGET_ACTION_PAUSE', options => 'recovery_target_action_options', }, -{ name => 'recovery_target_inclusive', type => 'bool', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target_inclusive', type => 'bool', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Sets whether to include or exclude transaction with recovery target.', variable => 'recoveryTargetInclusive', boot_val => 'true', }, -{ name => 'recovery_target_lsn', type => 'string', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target_lsn', type => 'string', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Sets the LSN of the write-ahead log location up to which recovery will proceed.', variable => 'recovery_target_lsn_string', boot_val => '""', @@ -2396,7 +2396,7 @@ assign_hook => 'assign_recovery_target_lsn', }, -{ name => 'recovery_target_name', type => 'string', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target_name', type => 'string', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Sets the named restore point up to which recovery will proceed.', variable => 'recovery_target_name_string', boot_val => '""', @@ -2404,7 +2404,7 @@ assign_hook => 'assign_recovery_target_name', }, -{ name => 'recovery_target_time', type => 'string', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target_time', type => 'string', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Sets the time stamp up to which recovery will proceed.', variable => 'recovery_target_time_string', boot_val => '""', @@ -2420,7 +2420,7 @@ assign_hook => 'assign_recovery_target_timeline', }, -{ name => 'recovery_target_xid', type => 'string', context => 'PGC_POSTMASTER', group => 'WAL_RECOVERY_TARGET', +{ name => 'recovery_target_xid', type => 'string', context => 'PGC_SIGHUP', group => 'WAL_RECOVERY_TARGET', short_desc => 'Sets the transaction ID up to which recovery will proceed.', variable => 'recovery_target_xid_string', boot_val => '""', diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 1e14b7b4af060..0ee2ba6ab9005 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -647,10 +647,10 @@ static bool assert_enabled = DEFAULT_ASSERT_ENABLED; static bool exec_backend_enabled = EXEC_BACKEND_ENABLED; static char *recovery_target_timeline_string; -static char *recovery_target_string; -static char *recovery_target_xid_string; -static char *recovery_target_name_string; -static char *recovery_target_lsn_string; +char *recovery_target_string; +char *recovery_target_xid_string; +char *recovery_target_name_string; +char *recovery_target_lsn_string; /* should be static, but commands/variable.c needs to get at this */ char *role_string; diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h index 2842106b28580..88ed75cf3f02b 100644 --- a/src/include/access/xlogrecovery.h +++ b/src/include/access/xlogrecovery.h @@ -60,6 +60,19 @@ typedef enum RecoveryPauseState RECOVERY_PAUSED, /* recovery is paused */ } RecoveryPauseState; +/* + * RecoveryPauseReason tracks WHY recovery is paused, to distinguish between + * "paused at target with action=pause" and "paused via pg_wal_replay_pause()". + * This lets us correctly handle the case where the recovery target time is + * changed during a pause: we resume replay toward the new target, rather than + * treating it as a pg_wal_replay_resume() (which would proceed to promotion). + */ +typedef enum RecoveryPauseReason +{ + RECOVERY_PAUSE_NONE, /* not paused, or paused by pg_wal_replay_pause() */ + RECOVERY_PAUSE_AT_TARGET /* paused because recovery_target reached with action=pause */ +} RecoveryPauseReason; + /* * Shared-memory state for WAL recovery. */ @@ -116,8 +129,9 @@ typedef struct XLogRecoveryCtlData * only relevant for replication or archive recovery */ TimestampTz currentChunkStartTime; - /* Recovery pause state */ + /* Recovery pause state and reason */ RecoveryPauseState recoveryPauseState; + RecoveryPauseReason recoveryPauseReason; ConditionVariable recoveryNotPausedCV; slock_t info_lck; /* locks shared variables shown above */ @@ -139,6 +153,12 @@ extern PGDLLIMPORT char *archiveCleanupCommand; extern PGDLLIMPORT TransactionId recoveryTargetXid; extern PGDLLIMPORT char *recovery_target_time_string; extern PGDLLIMPORT TimestampTz recoveryTargetTime; + +/* GUC string variables for recovery targets, defined in guc_tables.c */ +extern char *recovery_target_string; +extern char *recovery_target_xid_string; +extern char *recovery_target_name_string; +extern char *recovery_target_lsn_string; extern PGDLLIMPORT const char *recoveryTargetName; extern PGDLLIMPORT XLogRecPtr recoveryTargetLSN; extern PGDLLIMPORT RecoveryTargetType recoveryTarget; @@ -216,6 +236,8 @@ extern bool HotStandbyActive(void); extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI); extern RecoveryPauseState GetRecoveryPauseState(void); extern void SetRecoveryPause(bool recoveryPause); +extern RecoveryPauseReason GetRecoveryPauseReason(void); +extern void SetRecoveryPauseReason(RecoveryPauseReason reason); extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); extern TimestampTz GetLatestXTime(void); extern TimestampTz GetCurrentChunkReplayStartTime(void); diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 36d789720a3c8..9f8a5231d42ab 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -61,6 +61,7 @@ tests += { 't/050_redo_segment_missing.pl', 't/051_effective_wal_level.pl', 't/052_checkpoint_segment_missing.pl', + 't/053_recovery_target_time_reload.pl', ], }, } diff --git a/src/test/recovery/t/053_recovery_target_time_reload.pl b/src/test/recovery/t/053_recovery_target_time_reload.pl new file mode 100644 index 0000000000000..84e7add2524b1 --- /dev/null +++ b/src/test/recovery/t/053_recovery_target_time_reload.pl @@ -0,0 +1,365 @@ + +# Copyright (c) 2021-2026, PostgreSQL Global Development Group +# +# Test for recovery_target_* configuration reload without server restart. +# Verifies that changing recovery target parameters via pg_reload_conf() +# while recovery is paused at the target causes replay to resume toward +# the new target. Covers recovery_target_time, recovery_target_lsn, +# recovery_target_action, and recovery_target_inclusive. + +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary with WAL archiving +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(has_archiving => 1, allows_streaming => 1); +$node_primary->start; + +# Create test table and insert initial data (batch 1 -- included in backup) +$node_primary->safe_psql('postgres', + "CREATE TABLE test_data(id serial, val text, created_at timestamptz DEFAULT now())"); +$node_primary->safe_psql('postgres', + "INSERT INTO test_data(val) SELECT 'batch1_' || g FROM generate_series(1,100) g"); + +# Take base backup (needed for all standbys) +$node_primary->backup('my_backup'); + +# Insert batch 2 +$node_primary->safe_psql('postgres', + "INSERT INTO test_data(val) SELECT 'batch2_' || g FROM generate_series(1,100) g"); + +# Small sleep to ensure distinct timestamps between batches +$node_primary->safe_psql('postgres', "SELECT pg_sleep(1)"); + +# Capture timestamp T1 (after batch 2, before batch 3) +my $recovery_time_t1 = + $node_primary->safe_psql('postgres', "SELECT now()"); + +$node_primary->safe_psql('postgres', "SELECT pg_sleep(1)"); + +# Capture LSN L1 (after batch 2, before batch 3) -- used by TEST 6 +my $lsn_before_batch3 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + +# Insert batch 3 +$node_primary->safe_psql('postgres', + "INSERT INTO test_data(val) SELECT 'batch3_' || g FROM generate_series(1,100) g"); + +$node_primary->safe_psql('postgres', "SELECT pg_sleep(1)"); + +# Capture timestamp T2 (after batch 3, before batch 4) +my $recovery_time_t2 = + $node_primary->safe_psql('postgres', "SELECT now()"); + +# Capture LSN L2 (after batch 3, before batch 4) -- used by TEST 6 +my $lsn_after_batch3 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + +$node_primary->safe_psql('postgres', "SELECT pg_sleep(1)"); + +# Insert batch 4 (beyond all targets) +$node_primary->safe_psql('postgres', + "INSERT INTO test_data(val) SELECT 'batch4_' || g FROM generate_series(1,100) g"); + +# Force WAL switch to ensure archiving completes +$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); + +# Wait for the WAL file to be archived before creating standbys +# that rely on archive recovery. +sleep(2); + +############################################################################### +# TEST 1: Pause at T1, advance to T2, verify resume and re-pause +############################################################################### + +my $node_standby = PostgreSQL::Test::Cluster->new('standby1'); +$node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +$node_standby->append_conf('postgresql.conf', qq( +recovery_target_time = '$recovery_time_t1' +recovery_target_action = 'pause' +)); + +$node_standby->start; + +# Wait for recovery to pause at T1 +$node_standby->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out waiting for recovery to pause at T1"; + +# Verify data: should have batch1 (100) + batch2 (100) = 200 rows +# (batch3 not yet replayed because T1 is between batch2 and batch3) +my $count = $node_standby->safe_psql('postgres', + "SELECT count(*) FROM test_data"); +is($count, '200', 'TEST 1a: correct data at recovery target T1'); + +# Record log position before reload so we can check for log messages +my $log_offset = -s $node_standby->logfile; + +# Now advance target to T2 via config file change and reload +$node_standby->adjust_conf('postgresql.conf', 'recovery_target_time', + "'$recovery_time_t2'"); +$node_standby->reload; + +# Wait for the "recovery target changed" log message, which confirms +# the recovery process detected the new target and resumed replay. +$node_standby->wait_for_log( + qr/recovery target changed, resuming WAL replay/, + $log_offset); + +# Wait for recovery to pause again (at T2 this time) +$node_standby->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out waiting for recovery to re-pause at T2"; + +# Verify data: should have batch1 + batch2 + batch3 = 300 rows +$count = $node_standby->safe_psql('postgres', + "SELECT count(*) FROM test_data"); +is($count, '300', 'TEST 1b: correct data after advancing target to T2'); + +############################################################################### +# TEST 2: Reload with same time -- verify no-op (still paused) +############################################################################### + +# We're paused at T2. Reload with same T2 value. +$node_standby->reload; + +# Brief wait to confirm it doesn't resume +sleep(2); +my $pause_state = $node_standby->safe_psql('postgres', + "SELECT pg_get_wal_replay_pause_state()"); +is($pause_state, 'paused', + 'TEST 2a: same target reload is no-op, still paused'); + +$count = $node_standby->safe_psql('postgres', + "SELECT count(*) FROM test_data"); +is($count, '300', 'TEST 2b: data unchanged after same-target reload'); + +############################################################################### +# TEST 3: Reload with earlier time -- verify no-op (still paused) +############################################################################### + +$node_standby->adjust_conf('postgresql.conf', 'recovery_target_time', + "'$recovery_time_t1'"); +$node_standby->reload; + +# Brief wait to confirm it doesn't resume +sleep(2); +$pause_state = $node_standby->safe_psql('postgres', + "SELECT pg_get_wal_replay_pause_state()"); +is($pause_state, 'paused', + 'TEST 3a: earlier target reload is no-op, still paused'); + +$count = $node_standby->safe_psql('postgres', + "SELECT count(*) FROM test_data"); +is($count, '300', 'TEST 3b: data unchanged after earlier-target reload'); + +############################################################################### +# TEST 4: pg_wal_replay_resume() while paused at target proceeds to +# promotion, does NOT re-enter replay +############################################################################### + +# Reset target back to T2 first (so we're paused at a well-defined point) +$node_standby->adjust_conf('postgresql.conf', 'recovery_target_time', + "'$recovery_time_t2'"); +$node_standby->reload; +sleep(1); + +# Call pg_wal_replay_resume() -- this should proceed to promote +$node_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume()"); + +# Wait for promotion to complete +$node_standby->poll_query_until('postgres', + "SELECT pg_is_in_recovery() = false") + or die "Timed out waiting for promotion after pg_wal_replay_resume()"; + +my $in_recovery = $node_standby->safe_psql('postgres', + "SELECT pg_is_in_recovery()"); +is($in_recovery, 'f', + 'TEST 4: pg_wal_replay_resume() promotes, does not re-enter replay'); + +$node_standby->stop; + +############################################################################### +# TEST 5: Mutual exclusion -- reject recovery_target_time when another +# target type is already set +############################################################################### + +my $node_standby5 = PostgreSQL::Test::Cluster->new('standby5'); +$node_standby5->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +# Set recovery_target_name first, then try to also set recovery_target_time. +# PostgreSQL should reject the startup due to multiple recovery targets. +$node_standby5->append_conf('postgresql.conf', qq( +recovery_target_name = 'does_not_matter' +recovery_target_time = '$recovery_time_t1' +recovery_target_action = 'pause' +)); + +my $res = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_standby5->data_dir, + '--log' => $node_standby5->logfile, + 'start', + ]); +ok(!$res, 'TEST 5a: startup with conflicting recovery targets fails'); + +my $logfile = slurp_file($node_standby5->logfile()); +like( + $logfile, + qr/Cannot set recovery_target_time when another recovery target type is already set|multiple recovery targets specified/, + 'TEST 5b: conflicting recovery target types are rejected'); + +############################################################################### +# TEST 6: LSN target — pause, advance LSN, verify resume +############################################################################### + +my $node_standby6 = PostgreSQL::Test::Cluster->new('standby6'); +$node_standby6->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +# Set recovery_target_lsn to L1 (before batch 3). With inclusive = true +# (the default), recovery stops after replaying the record at or past L1. +# Because L1 was captured between batch 2 and batch 3, pausing at L1 means +# batch 3 has not yet been replayed. +$node_standby6->append_conf('postgresql.conf', qq( +recovery_target_lsn = '$lsn_before_batch3' +recovery_target_action = 'pause' +recovery_target_timeline = 'current' +)); + +$node_standby6->start; + +# Wait for recovery to pause at L1 +$node_standby6->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out waiting for recovery to pause at LSN L1"; + +# Verify data: should have batch1 (100) + batch2 (100) = 200 rows +$count = $node_standby6->safe_psql('postgres', + "SELECT count(*) FROM test_data"); +is($count, '200', 'TEST 6a: correct data at recovery target LSN L1'); + +# Record log position before reload +$log_offset = -s $node_standby6->logfile; + +# Advance target to L2 (after batch 3, before batch 4) +$node_standby6->adjust_conf('postgresql.conf', 'recovery_target_lsn', + "'$lsn_after_batch3'"); +$node_standby6->reload; + +# Wait for the "recovery target changed" log message +$node_standby6->wait_for_log( + qr/recovery target changed, resuming WAL replay/, + $log_offset); + +# Wait for recovery to pause again (at L2 this time) +$node_standby6->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out waiting for recovery to re-pause at LSN L2"; + +# Verify data: should have batch1 + batch2 + batch3 = 300 rows +$count = $node_standby6->safe_psql('postgres', + "SELECT count(*) FROM test_data"); +is($count, '300', 'TEST 6b: correct data after advancing LSN target to L2'); + +$node_standby6->stop; + +############################################################################### +# TEST 7: recovery_target_action change — pause to promote while paused +############################################################################### + +my $node_standby7 = PostgreSQL::Test::Cluster->new('standby7'); +$node_standby7->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +$node_standby7->append_conf('postgresql.conf', qq( +recovery_target_time = '$recovery_time_t1' +recovery_target_action = 'pause' +recovery_target_timeline = 'current' +)); + +$node_standby7->start; + +# Wait for recovery to pause at T1 +$node_standby7->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out waiting for recovery to pause at T1 (TEST 7)"; + +# Record log position before reload +$log_offset = -s $node_standby7->logfile; + +# Change action from 'pause' to 'promote' and reload +$node_standby7->adjust_conf('postgresql.conf', 'recovery_target_action', + "'promote'"); +$node_standby7->reload; + +# Wait for the log message confirming action change +$node_standby7->wait_for_log( + qr/recovery_target_action changed, unpausing recovery/, + $log_offset); + +# Wait for the server to promote (exit recovery) +$node_standby7->poll_query_until('postgres', + "SELECT pg_is_in_recovery() = false") + or die "Timed out waiting for promotion after action change (TEST 7)"; + +$in_recovery = $node_standby7->safe_psql('postgres', + "SELECT pg_is_in_recovery()"); +is($in_recovery, 'f', + 'TEST 7: changing recovery_target_action to promote causes promotion'); + +$node_standby7->stop; + +############################################################################### +# TEST 8: recovery_target_inclusive toggle is accepted on reload +############################################################################### + +my $node_standby8 = PostgreSQL::Test::Cluster->new('standby8'); +$node_standby8->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + +$node_standby8->append_conf('postgresql.conf', qq( +recovery_target_time = '$recovery_time_t2' +recovery_target_action = 'pause' +recovery_target_inclusive = true +recovery_target_timeline = 'current' +)); + +$node_standby8->start; + +# Wait for recovery to pause at T2 +$node_standby8->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out waiting for recovery to pause at T2 (TEST 8)"; + +# Verify the current value of recovery_target_inclusive +my $inclusive_val = $node_standby8->safe_psql('postgres', + "SHOW recovery_target_inclusive"); +is($inclusive_val, 'on', + 'TEST 8a: recovery_target_inclusive initially on'); + +# Toggle recovery_target_inclusive to false via reload +$node_standby8->adjust_conf('postgresql.conf', 'recovery_target_inclusive', + 'false'); +$node_standby8->reload; + +# Verify the parameter was accepted and changed +$node_standby8->poll_query_until('postgres', + "SELECT current_setting('recovery_target_inclusive') = 'off'") + or die "Timed out waiting for recovery_target_inclusive to change (TEST 8)"; + +$inclusive_val = $node_standby8->safe_psql('postgres', + "SHOW recovery_target_inclusive"); +is($inclusive_val, 'off', + 'TEST 8b: recovery_target_inclusive changed to off after reload'); + +$node_standby8->stop; + +done_testing();