821ff9b9ef
Adds a Kubernetes-ZVMA companion to the existing Windows-ZVM recipe:
- scripts/examples/zerto-zvma-send.ps1 - Zerto-side sender for both
pre and post phases, packages the Zerto* env vars into a structured
JSON body and POSTs to a {phase}-templated webhook URL.
- scripts/examples/zerto-receiver-notify.ps1 - server-side receiver
that posts a Slack/Teams notification, with phase-aware formatting
and ZertoForce highlighted on pre.
- scripts/examples/zerto-receiver-vm-healthcheck.ps1 - server-side
receiver that pings + port-probes each VM in VmDisplayNames after
failover and writes a per-run JSON report.
- scripts/examples/send-env-vars.ps1 + save-env-vars.ps1 - generic
env-dump client/receiver pair (the diagnostic that surfaced what
the ZVMA scripts-service container exposes).
- docs/recipes/zerto-zvma-pre-post.md - full walkthrough mirroring
the existing Windows-ZVM recipe's structure.
- README.md and docs/README.md - link the new recipe and examples.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
141 lines
4.9 KiB
PowerShell
141 lines
4.9 KiB
PowerShell
<#
|
|
.SYNOPSIS
|
|
Webhook-server-side receiver: post-failover VM health check. Pings each
|
|
VM in the VPG and probes a configurable TCP port; writes a per-run
|
|
report to disk.
|
|
|
|
.DESCRIPTION
|
|
Intended for the POST-recovery webhook only - on a Test or real Failover,
|
|
once the VMs are powered on at the recovery site, we can spot-check that
|
|
they responded to ICMP and that a known port is listening (RDP, SSH,
|
|
HTTP, etc).
|
|
|
|
Skips itself entirely on the pre-recovery phase (nothing's running yet)
|
|
and on $z.operation values that don't bring VMs up.
|
|
|
|
Wire up one endpoint:
|
|
/hook/zerto-post -> this script
|
|
DataPassing: [x] Stdin JSON
|
|
ResponseMode: async
|
|
|
|
.NOTES
|
|
VmDisplayNames is a comma-separated list for multi-VM VPGs; some Zerto
|
|
versions wrap each name in parentheses (e.g. "vm1(1)(1)(1)") to disambig
|
|
after Test failover. We strip the trailing parenthesised suffixes when
|
|
resolving DNS so the recovered hostname is what we ping.
|
|
|
|
Endpoint config:
|
|
ExecutorType: WindowsPowerShell or PowerShell 7
|
|
ScriptPath: C:\scripts\zerto-receiver-vm-healthcheck.ps1
|
|
DataPassing: [x] Stdin JSON
|
|
ResponseMode: async
|
|
TimeoutSeconds: 120 (this script does network I/O - bump from default)
|
|
#>
|
|
|
|
[CmdletBinding()]
|
|
param(
|
|
[int] $ProbePort = 3389, # RDP. Use 22 for Linux, 80/443 for web tier.
|
|
[int] $PingTimeout = 2000, # ms
|
|
[string] $ReportDir = 'C:\ProgramData\WebhookServer\zerto-healthchecks'
|
|
)
|
|
|
|
$ErrorActionPreference = 'Stop'
|
|
|
|
# --- read + parse payload -------------------------------------------------
|
|
$body = [Console]::In.ReadToEnd()
|
|
if ([string]::IsNullOrWhiteSpace($body)) {
|
|
Write-Error 'Empty stdin.'
|
|
exit 2
|
|
}
|
|
$p = $body | ConvertFrom-Json
|
|
|
|
$z = $p.zerto
|
|
$phase = $p.phase
|
|
$op = $z.operation
|
|
|
|
# Skip if this isn't a post-phase run for an op that powers VMs on.
|
|
if ($phase -ne 'post') {
|
|
Write-Host "Phase '$phase' - nothing to check yet, skipping."
|
|
exit 0
|
|
}
|
|
if ($op -notin @('Test','Failover','Move','FailoverBeforeCommit','FailoverDuringCommit')) {
|
|
Write-Host "Operation '$op' doesn't bring VMs up; skipping."
|
|
exit 0
|
|
}
|
|
|
|
# --- parse VM list --------------------------------------------------------
|
|
function Strip-ZertoSuffix {
|
|
param([string] $name)
|
|
# "ubuntu-2404(1)(1)(1)" -> "ubuntu-2404"
|
|
return ($name -replace '(\([^)]*\))+\s*$','').Trim()
|
|
}
|
|
|
|
$rawNames = ($z.vmDisplayNames -split '[,;]') | ForEach-Object { $_.Trim() } |
|
|
Where-Object { $_ }
|
|
if (-not $rawNames) {
|
|
Write-Warning 'No VM display names in payload - nothing to check.'
|
|
exit 0
|
|
}
|
|
|
|
# --- run checks -----------------------------------------------------------
|
|
$results = foreach ($raw in $rawNames) {
|
|
$clean = Strip-ZertoSuffix $raw
|
|
$pingOk = $false
|
|
$portOk = $false
|
|
$err = $null
|
|
|
|
try {
|
|
$pingOk = (Test-Connection -ComputerName $clean -Count 1 -Quiet `
|
|
-TimeoutSeconds ([math]::Max(1, [int]($PingTimeout / 1000))) `
|
|
-ErrorAction Stop)
|
|
} catch { $err = "ping: $($_.Exception.Message)" }
|
|
|
|
try {
|
|
$portOk = (Test-NetConnection -ComputerName $clean -Port $ProbePort `
|
|
-InformationLevel Quiet -WarningAction SilentlyContinue)
|
|
} catch { $err = ($err, "port: $($_.Exception.Message)") -ne $null -join '; ' }
|
|
|
|
[pscustomobject]@{
|
|
DisplayName = $raw
|
|
Resolved = $clean
|
|
PingOk = $pingOk
|
|
PortOk = $portOk
|
|
ProbePort = $ProbePort
|
|
Error = $err
|
|
}
|
|
}
|
|
|
|
# --- write report ---------------------------------------------------------
|
|
if (-not (Test-Path $ReportDir)) {
|
|
New-Item -ItemType Directory -Path $ReportDir -Force | Out-Null
|
|
}
|
|
|
|
$safeVpg = ($z.vpgName -replace '[^A-Za-z0-9_.-]','_')
|
|
$stamp = (Get-Date).ToUniversalTime().ToString('yyyyMMddTHHmmssZ')
|
|
$file = Join-Path $ReportDir "$safeVpg-$op-$stamp.json"
|
|
|
|
$report = [ordered]@{
|
|
vpgName = $z.vpgName
|
|
operation = $op
|
|
phase = $phase
|
|
capturedAt = $p.capturedAt
|
|
completedAt = (Get-Date).ToUniversalTime().ToString('o')
|
|
probePort = $ProbePort
|
|
vms = $results
|
|
summary = @{
|
|
total = $results.Count
|
|
pingFailures = ($results | Where-Object { -not $_.PingOk }).Count
|
|
portFailures = ($results | Where-Object { -not $_.PortOk }).Count
|
|
}
|
|
}
|
|
$report | ConvertTo-Json -Depth 5 | Set-Content -Path $file -Encoding utf8
|
|
|
|
# Console output goes back via the webhook callback (if configured) so the
|
|
# Zerto-side script log shows a quick summary even though the call is async.
|
|
$bad = $report.summary.pingFailures + $report.summary.portFailures
|
|
Write-Host "[$op/$phase] $($z.vpgName): $($results.Count) VM(s), $bad issue(s). Report: $file"
|
|
|
|
# Exit non-zero if anything failed, so the webhook server's failOnNonZeroExit
|
|
# turns this into a 502 for the caller (and shows up in the run history).
|
|
if ($bad -gt 0) { exit 1 }
|