kvm-backup-scripts/kvm-backup-zvol-iscsi.sh
James Coleman 25f11d5535 Update backup scripts for remote borg repo, add zvol-iscsi backup, and fix bugs
- Update copyright year to 2026
- Point BORG_REPO to remote backup host
- Fix RBD pid file name to avoid collision with images backup
- Fix snapshot count typo and off-by-one error in RBD cleanup
- Add error handling for rbd snap create
- Fix prune log messages to reference domain instead of image
- Add new kvm-backup-zvol-iscsi.sh for ZVol over iSCSI backups
2026-03-11 22:18:44 -05:00

309 lines
10 KiB
Bash

#!/bin/bash
# Copyright (c) 2026 Mr. Gecko's Media (James Coleman). http://mrgeckosmedia.com/
# This is for backing up ZFS zvol-backed VMs exported via iSCSI.
# Runs on the ZFS server where the zvols live. VM domains are
# discovered via virsh on remote libvirt hosts connected over iSCSI,
# matching the domain-driven approach of the other kvm-backup scripts.
# A file to prevent overlapping runs.
PIDFILE="/tmp/backup-zvol-iscsi.pid"
# If the pid file exists and process is running, exit.
if [[ -f "$PIDFILE" ]]; then
PID=$(cat "$PIDFILE")
if ps -p "$PID" >/dev/null; then
echo "Backup process already running, exiting."
exit 1
fi
fi
# Create a new pid file for this process.
echo $BASHPID >"$PIDFILE"
# The borg repository we're backing up to.
export BORG_REPO='root@10.0.0.5:/media/Storage/Backup/kvm'
# If you have a passphrase for your repository,
# set it here or you can use bash to retrieve it.
# export BORG_PASSPHRASE=''
# Set answers for automation.
export BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=yes
export BORG_RELOCATED_REPO_ACCESS_IS_OK=yes
export BORG_CHECK_I_KNOW_WHAT_I_AM_DOING=NO
export BORG_DELETE_I_KNOW_WHAT_I_AM_DOING=NO
# Set to empty string to disable pruning.
PRUNE_OPTIONS="--keep-daily 7 --keep-weekly 4 --keep-monthly 6"
# Number of ZFS snapshots to keep per zvol.
SNAPSHOTS_KEEP=2
# The parent ZFS dataset containing zvols to back up.
ZFS_PARENT="tank/kvm"
# Remote libvirt hosts for VM discovery and XML backup via SSH.
# Each entry is an SSH destination (e.g. "root@hostname").
# The script will run virsh list and domblklist on each host to
# discover VMs with iSCSI-attached zvols, then back up the
# corresponding local zvols.
REMOTE_HOSTS=("root@kiki" "root@gaming-pc")
# Whether to also discover and backup VMs from local libvirt.
BACKUP_LOCAL=true
# Remove PID file on exit.
cleanup() {
rm "$PIDFILE"
}
trap cleanup EXIT
# Name the snapshot today's date.
SNAPSHOT_NAME=$(date '+%Y-%m-%dT%H-%M-%S')
# Allows providing an argument of a domain to specifically backup.
BACKUP_DOMAIN="$1"
# Failures should remove pid file and exit with status code 1.
fail() {
echo "$1"
exit 1
}
# Cleanup old ZFS snapshots, keeping SNAPSHOTS_KEEP most recent.
cleanupSnapshots() {
ZVOL="$1"
snapshots=()
# Read list of snapshots for the provided zvol.
SNAPLIST_STATUS_TMP="/tmp/backup-zvol-snap-tmp"
while read -r NAME; do
[[ -z "$NAME" ]] && continue
snapshots+=("$NAME")
done < <(
zfs list -t snapshot -o name -s creation -H -r "$ZVOL" 2>/dev/null
echo "${PIPESTATUS[0]}" >"$SNAPLIST_STATUS_TMP"
)
# Get status from the snapshot listing.
status=1
if [[ -f $SNAPLIST_STATUS_TMP ]]; then
status=$(cat "$SNAPLIST_STATUS_TMP")
rm "$SNAPLIST_STATUS_TMP"
fi
# If status has an error, exit.
if ((status!=0)); then
fail "Snapshot listing failed for $ZVOL"
fi
# If the snapshot count is more than the number to keep,
# remove snapshots until count matches.
# The snapshots are listed from oldest to newest, so this
# should keep the newer snapshots.
snapshot_count=${#snapshots[@]}
if ((snapshot_count>SNAPSHOTS_KEEP)); then
for ((i = 0; snapshot_count-i > SNAPSHOTS_KEEP; i++)); do
NAME=${snapshots[$i]}
echo "Removing snapshot: $NAME"
zfs destroy "$NAME"
done
fi
}
# Extract the zvol name from an iSCSI by-path device path.
# Example input: /dev/disk/by-path/ip-10.0.100.6:3260-iscsi-iqn.2026-03.im.gec.host:MainServer-lun-0
# Example output: MainServer
# The zvol name is the target name portion of the IQN (after the last colon,
# before the -lun- suffix).
extractZvolName() {
local PATH_STR="$1"
# Strip everything up to "-iscsi-" to get the IQN and lun.
local IQN="${PATH_STR##*-iscsi-}"
# Strip the "-lun-*" suffix.
IQN="${IQN%-lun-*}"
# The target name is after the last colon.
echo "${IQN##*:}"
}
# Back up domains from a virsh source.
# Usage: backupDomains "ssh_prefix" "host_label"
# ssh_prefix: empty string for local virsh, or "ssh user@host" for remote.
# host_label: label for log messages and archive naming (e.g. "local", "kiki").
backupDomains() {
local SSH_PREFIX="$1"
local HOST_LABEL="$2"
DOMLIST_STATUS_TMP="/tmp/backup-zvol-domlist-$HOST_LABEL-tmp"
while read -r _ DOMAIN _; do
# If the domain is empty, skip.
if [[ -z "$DOMAIN" ]]; then
continue
fi
# If a backup domain was provided, only backup that domain.
if [[ -n "$BACKUP_DOMAIN" ]] && [[ "$BACKUP_DOMAIN" != "$DOMAIN" ]]; then
continue
fi
# Get the block devices for this domain.
DEVS=()
ZVOL_NAMES=()
BLKLIST_STATUS_TMP="/tmp/backup-zvol-blklist-$HOST_LABEL-tmp"
while read -r DEV IMAGE; do
# Ignore empty line or no image.
if [[ -z "$IMAGE" ]] || [[ "$IMAGE" == "-" ]]; then
continue
fi
# Only process iSCSI by-path devices.
if ! [[ "$IMAGE" =~ -iscsi- ]]; then
continue
fi
# Extract the zvol name from the iSCSI path.
ZVOL_NAME=$(extractZvolName "$IMAGE")
if [[ -z "$ZVOL_NAME" ]]; then
echo "Warning: Could not extract zvol name from $IMAGE, skipping"
continue
fi
# Verify the zvol exists locally.
ZVOL="$ZFS_PARENT/$ZVOL_NAME"
if ! zfs list "$ZVOL" &>/dev/null; then
echo "Warning: zvol $ZVOL does not exist locally, skipping"
continue
fi
DEVS+=("$DEV")
ZVOL_NAMES+=("$ZVOL_NAME")
done < <(
if [[ -n "$SSH_PREFIX" ]]; then
$SSH_PREFIX "virsh domblklist '$DOMAIN'" 2>/dev/null | tail -n +3
else
virsh domblklist "$DOMAIN" | tail -n +3
fi
echo "${PIPESTATUS[0]}" >"$BLKLIST_STATUS_TMP"
)
# Get status from the block listing.
status=1
if [[ -f $BLKLIST_STATUS_TMP ]]; then
status=$(cat "$BLKLIST_STATUS_TMP")
rm "$BLKLIST_STATUS_TMP"
fi
# If status has an error, exit.
if ((status!=0)); then
fail "Domain block listing failed for $DOMAIN ($HOST_LABEL)"
fi
# For each iSCSI disk, snapshot the zvol and back it up.
for ((i = 0; i < ${#DEVS[@]}; i++)); do
DEV=${DEVS[$i]}
ZVOL_NAME=${ZVOL_NAMES[$i]}
ZVOL="$ZFS_PARENT/$ZVOL_NAME"
# Create a ZFS snapshot.
echo "Creating snapshot: $ZVOL@$SNAPSHOT_NAME"
if ! zfs snapshot "$ZVOL@$SNAPSHOT_NAME"; then
fail "Failed to create snapshot for $ZVOL"
fi
# Read the raw disk image from the snapshot's block device.
# This produces a portable raw image that can be restored to
# any virtual disk or converted with qemu-img, without
# requiring ZFS on the restore target.
SNAP_DEV="/dev/zvol/$ZVOL@$SNAPSHOT_NAME"
echo "Creating backup for $DOMAIN ($DEV [$ZVOL])"
if ! dd if="$SNAP_DEV" bs=4M status=none | pv | borg create \
--verbose \
--stats \
--show-rc \
"::$DOMAIN-$DEV-{now}" -; then
fail "Failed to backup $DOMAIN ($DEV)"
fi
# Prune if options are configured.
if [[ -n "$PRUNE_OPTIONS" ]]; then
echo "Pruning backups for $DOMAIN ($DEV)"
if ! eval borg prune --list \
--show-rc \
--glob-archives "'$DOMAIN-$DEV-*'" \
"$PRUNE_OPTIONS"; then
fail "Failed to prune $DOMAIN ($DEV)"
fi
fi
# Cleanup old ZFS snapshots.
cleanupSnapshots "$ZVOL"
done
# Backup the domain XML.
echo "Backing up $DOMAIN xml ($HOST_LABEL)"
if [[ -n "$SSH_PREFIX" ]]; then
if ! $SSH_PREFIX "virsh dumpxml '$DOMAIN'" 2>/dev/null | borg create \
--verbose \
--stats \
--show-rc \
"::$DOMAIN-xml-{now}" -; then
fail "Failed to backup $DOMAIN xml"
fi
else
if ! virsh dumpxml "$DOMAIN" | borg create \
--verbose \
--stats \
--show-rc \
"::$DOMAIN-xml-{now}" -; then
fail "Failed to backup $DOMAIN xml"
fi
fi
# Prune if options are configured.
if [[ -n "$PRUNE_OPTIONS" ]]; then
echo "Pruning xml backups for $DOMAIN"
if ! eval borg prune --list \
--show-rc \
--glob-archives "'$DOMAIN-xml-*'" \
"$PRUNE_OPTIONS"; then
fail "Failed to prune $DOMAIN xml"
fi
fi
done < <(
if [[ -n "$SSH_PREFIX" ]]; then
$SSH_PREFIX "virsh list --all" 2>/dev/null | tail -n +3
else
virsh list --all | tail -n +3
fi
echo "${PIPESTATUS[0]}" >"$DOMLIST_STATUS_TMP"
)
# Get status from the domain listing.
status=1
if [[ -f $DOMLIST_STATUS_TMP ]]; then
status=$(cat "$DOMLIST_STATUS_TMP")
rm "$DOMLIST_STATUS_TMP"
fi
# If status has an error, exit.
if ((status!=0)); then
if [[ -n "$SSH_PREFIX" ]]; then
echo "Warning: Domain listing from $HOST_LABEL failed (host may be unreachable)"
else
fail "Local domain listing failed"
fi
fi
}
# Backup domains from local libvirt.
if [[ "$BACKUP_LOCAL" == true ]]; then
backupDomains "" "local"
fi
# Backup domains from remote libvirt hosts.
for REMOTE_HOST in "${REMOTE_HOSTS[@]}"; do
REMOTE_NAME="${REMOTE_HOST##*@}"
backupDomains "ssh $REMOTE_HOST" "$REMOTE_NAME"
done
# Shrink repo.
borg compact