Fix zvol-iscsi backup: fd conflict, case-insensitive zvol lookup, and snapdev handling

- Use fd 3 for domain list to avoid stdin conflicts with inner reads
- Resolve zvol names case-insensitively to match lowercased iSCSI IQNs
- Toggle snapdev=visible/hidden so snapshot block devices appear for dd
- Wait for snapshot device with udevadm settle before reading
- Pass volsize to pv for accurate progress display
- Document volmode=dev prerequisite to avoid zvol_create_minors deadlock
This commit is contained in:
James Coleman 2026-03-12 17:55:31 -05:00
parent 25f11d5535
commit e9cae05639

View File

@ -5,6 +5,13 @@
# Runs on the ZFS server where the zvols live. VM domains are
# discovered via virsh on remote libvirt hosts connected over iSCSI,
# matching the domain-driven approach of the other kvm-backup scripts.
#
# Prerequisites:
# Set volmode=dev on each zvol before running this script to prevent
# kernel partition scanning on snapshot devices. This must be done
# once per zvol (not toggled at runtime) to avoid a deadlock in
# zvol_create_minors when combined with snapdev toggling.
# zfs set volmode=dev tank/kvm/<name>
# A file to prevent overlapping runs.
PIDFILE="/tmp/backup-zvol-iscsi.pid"
@ -133,7 +140,7 @@ backupDomains() {
local HOST_LABEL="$2"
DOMLIST_STATUS_TMP="/tmp/backup-zvol-domlist-$HOST_LABEL-tmp"
while read -r _ DOMAIN _; do
while read -r _ DOMAIN _ <&3; do
# If the domain is empty, skip.
if [[ -z "$DOMAIN" ]]; then
continue
@ -166,12 +173,21 @@ backupDomains() {
continue
fi
# Verify the zvol exists locally.
ZVOL="$ZFS_PARENT/$ZVOL_NAME"
if ! zfs list "$ZVOL" &>/dev/null; then
echo "Warning: zvol $ZVOL does not exist locally, skipping"
# Resolve the zvol name case-insensitively, since iSCSI IQN
# target names are typically lowercased.
RESOLVED_NAME=$(zfs list -o name -H -r "$ZFS_PARENT" -d 1 | while read -r ZN; do
BN="${ZN##*/}"
if [[ "${BN,,}" == "${ZVOL_NAME,,}" ]]; then
echo "$BN"
break
fi
done)
if [[ -z "$RESOLVED_NAME" ]]; then
echo "Warning: zvol matching $ZFS_PARENT/$ZVOL_NAME does not exist locally, skipping"
continue
fi
ZVOL_NAME="$RESOLVED_NAME"
ZVOL="$ZFS_PARENT/$ZVOL_NAME"
DEVS+=("$DEV")
ZVOL_NAMES+=("$ZVOL_NAME")
@ -202,19 +218,33 @@ backupDomains() {
ZVOL_NAME=${ZVOL_NAMES[$i]}
ZVOL="$ZFS_PARENT/$ZVOL_NAME"
# Make snapshot block devices visible so we can read them.
zfs set snapdev=visible "$ZVOL"
# Create a ZFS snapshot.
echo "Creating snapshot: $ZVOL@$SNAPSHOT_NAME"
if ! zfs snapshot "$ZVOL@$SNAPSHOT_NAME"; then
fail "Failed to create snapshot for $ZVOL"
fi
# Wait for the snapshot block device to appear.
SNAP_DEV="/dev/zvol/$ZVOL@$SNAPSHOT_NAME"
udevadm settle
for _ in $(seq 1 30); do
[[ -e "$SNAP_DEV" ]] && break
sleep 1
done
if [[ ! -e "$SNAP_DEV" ]]; then
fail "Snapshot device $SNAP_DEV did not appear"
fi
# Read the raw disk image from the snapshot's block device.
# This produces a portable raw image that can be restored to
# any virtual disk or converted with qemu-img, without
# requiring ZFS on the restore target.
SNAP_DEV="/dev/zvol/$ZVOL@$SNAPSHOT_NAME"
ZVOL_SIZE=$(zfs list -Hp -o volsize "$ZVOL")
echo "Creating backup for $DOMAIN ($DEV [$ZVOL])"
if ! dd if="$SNAP_DEV" bs=4M status=none | pv | borg create \
if ! dd if="$SNAP_DEV" bs=4M status=none | pv -s "$ZVOL_SIZE" | borg create \
--verbose \
--stats \
--show-rc \
@ -233,6 +263,9 @@ backupDomains() {
fi
fi
# Hide snapshot devices again.
zfs set snapdev=hidden "$ZVOL"
# Cleanup old ZFS snapshots.
cleanupSnapshots "$ZVOL"
done
@ -267,7 +300,7 @@ backupDomains() {
fail "Failed to prune $DOMAIN xml"
fi
fi
done < <(
done 3< <(
if [[ -n "$SSH_PREFIX" ]]; then
$SSH_PREFIX "virsh list --all" 2>/dev/null | tail -n +3
else