diff --git a/lib/libc/sys/swapon.2 b/lib/libc/sys/swapon.2 --- a/lib/libc/sys/swapon.2 +++ b/lib/libc/sys/swapon.2 @@ -28,7 +28,7 @@ .\" @(#)swapon.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd March 30, 2020 +.Dd December 2, 2021 .Dt SWAPON 2 .Os .Sh NAME @@ -37,6 +37,8 @@ .Sh LIBRARY .Lb libc .Sh SYNOPSIS +.It vm/vm_param.h +.It vm/swap_pager.h .In unistd.h .Ft int .Fn swapon "const char *special" @@ -63,6 +65,39 @@ system call disables paging and swapping on the given device. All associated swap metadata are deallocated, and the device is made available for other purposes. +.Pp +The +.Fa special +argument points either to the string which names the special device +used for swapping, or to the following structure +.Bd -literal +struct swapoff_new_args { + const char *name_old_syscall; + const char *name; + u_int flags; +}; +.Ed +use of which allows to augment the syscall behavior. +The +.Va name_old_syscall +member must be initialized to +.Dv NULL +for system to recognize the extended syscall. +It is recommended to zero whole structure for future compatibility, +due to padding. +.Pp +The +.Va name +string is the special device name, same as legacy syscall argument. +.The +.Va flags +argument takes the following flags: +.Bl -tag -width SWAPOFF_FORCE +.It Dv SWAPOFF_FORCE +Turns off very conservative heuristic which otherwise prevents swapoff +if it is possible that total amount of free memory and remaining swap +devices space is unsufficient for system operations. +.El .Sh RETURN VALUES If an error has occurred, a value of -1 is returned and .Va errno diff --git a/sbin/swapon/swapon.8 b/sbin/swapon/swapon.8 --- a/sbin/swapon/swapon.8 +++ b/sbin/swapon/swapon.8 @@ -28,7 +28,7 @@ .\" @(#)swapon.8 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd May 19, 2020 +.Dd November 29, 2021 .Dt SWAPON 8 .Os .Sh NAME @@ -42,9 +42,9 @@ .Nm swapoff .Oo Fl F Ar fstab .Oc -.Fl aLq | Ar +.Fl afLq | Ar .Nm swapctl -.Op Fl AghklmsU +.Op Fl AfghklmsU .Oo .Fl a Ar | @@ -125,9 +125,15 @@ written to standard output when a swap device is removed. Note that .Nm swapoff -will fail and refuse to remove a swap device if there is insufficient +will fail and refuse to remove a swap device if some very conservative +estimation reports that there is insufficient VM (memory + remaining swap devices) to run the system. The +.Fl f +option turns off this heuristic, which could deadlock the system +if there is unsufficient swap space remaining. +.Pp +The .Nm swapoff utility must move swapped pages out of the device being removed which could diff --git a/sbin/swapon/swapon.c b/sbin/swapon/swapon.c --- a/sbin/swapon/swapon.c +++ b/sbin/swapon/swapon.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -78,7 +79,7 @@ static enum { SWAPON, SWAPOFF, SWAPCTL } orig_prog, which_prog = SWAPCTL; -static int Eflag, qflag; +static int Eflag, fflag, qflag; int main(int argc, char **argv) @@ -101,7 +102,7 @@ doall = 0; etc_fstab = NULL; - while ((ch = getopt(argc, argv, "AadEghklLmqsUF:")) != -1) { + while ((ch = getopt(argc, argv, "AadEfghklLmqsUF:")) != -1) { switch(ch) { case 'A': if (which_prog == SWAPCTL) { @@ -128,6 +129,12 @@ else usage(); break; + case 'f': + if (which_prog == SWAPOFF) + fflag = 1; + else + usage(); + break; case 'g': hflag = 'G'; break; @@ -782,12 +789,18 @@ static const char * swap_on_off_sfile(const char *name, int doingall) { + struct swapoff_new_args sa; int error; if (which_prog == SWAPON) error = Eflag ? swapon_trim(name) : swapon(name); - else /* SWAPOFF */ - error = swapoff(name); + else { /* SWAPOFF */ + bzero(&sa, sizeof(sa)); + sa.name = name; + if (fflag) + sa.flags |= SWAPOFF_FORCE; + error = swapoff(&sa); + } if (error == -1) { switch (errno) { @@ -820,7 +833,7 @@ fprintf(stderr, "[-F fstab] -aLq | [-E] file ...\n"); break; case SWAPOFF: - fprintf(stderr, "[-F fstab] -aLq | file ...\n"); + fprintf(stderr, "[-F fstab] -afLq | file ...\n"); break; case SWAPCTL: fprintf(stderr, "[-AghklmsU] [-a file ... | -d file ...]\n"); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1458,9 +1458,16 @@ printf("Final sync complete\n"); /* - * Unmount filesystems. Swapoff before unmount, - * because file-backed swap is non-operational after unmount - * of the underlying filesystem. + * Unmount filesystems and perform swapoff, to quiesce + * the system as much as possible. In particular, no + * I/O should be initiated from top levels since it + * might be abruptly terminated by reset, or otherwise + * erronously handled because other parts of the + * system are disabled. + * + * Swapoff before unmount, because file-backed swap is + * non-operational after unmount of the underlying + * filesystem. */ if (!KERNEL_PANICKED()) { swapoff_all(); diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -69,6 +69,16 @@ #define SW_UNMAPPED 0x01 #define SW_CLOSING 0x04 +struct swapoff_new_args { + const char *name_old_syscall; + const char *name; + u_int flags; + u_int pad0; + uintptr_t pad1[8]; +}; + +#define SWAPOFF_FORCE 0x00000001 + #ifdef _KERNEL extern int swap_pager_avail; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -470,7 +470,7 @@ static int swapongeom(struct vnode *); static int swaponvp(struct thread *, struct vnode *, u_long); static int swapoff_one(struct swdevt *sp, struct ucred *cred, - bool ignore_check); + u_int flags); /* * Swap bitmap functions @@ -2499,15 +2499,38 @@ struct vnode *vp; struct nameidata nd; struct swdevt *sp; - int error; + struct swapoff_new_args sa; + int error, probe_byte; error = priv_check(td, PRIV_SWAPOFF); if (error) return (error); + /* + * Detect old vs. new-style swapoff(2) syscall. The first + * pointer in the mmeory pointed to by uap->name is NULL for + * the old variant. + */ + probe_byte = fubyte(uap->name); + switch (probe_byte) { + case -1: + return (EFAULT); + case 0: + error = copyin(uap->name, &sa, sizeof(sa)); + if (error != 0) + return (error); + if ((sa.flags & ~(SWAPOFF_FORCE)) != 0) + return (EINVAL); + break; + default: + bzero(&sa, sizeof(sa)); + sa.name = uap->name; + break; + } + sx_xlock(&swdev_syscall_lock); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, sa.name); error = namei(&nd); if (error) goto done; @@ -2524,14 +2547,14 @@ error = EINVAL; goto done; } - error = swapoff_one(sp, td->td_ucred, false); + error = swapoff_one(sp, td->td_ucred, sa.flags); done: sx_xunlock(&swdev_syscall_lock); return (error); } static int -swapoff_one(struct swdevt *sp, struct ucred *cred, bool ignore_check) +swapoff_one(struct swdevt *sp, struct ucred *cred, u_int flags) { u_long nblks; #ifdef MAC @@ -2561,7 +2584,7 @@ * means that we can lose swap data when filesystems go away, * which is arguably worse. */ - if (!ignore_check && + if ((flags & SWAPOFF_FORCE) == 0 && vm_free_count() + swap_pager_avail < nblks + nswap_lowat) return (ENOMEM); @@ -2612,7 +2635,7 @@ devname = devtoname(sp->sw_vp->v_rdev); else devname = "[file]"; - error = swapoff_one(sp, thread0.td_ucred, true); + error = swapoff_one(sp, thread0.td_ucred, SWAPOFF_FORCE); if (error != 0) { printf("Cannot remove swap device %s (error=%d), " "skipping.\n", devname, error);