Index: lib/libc/sys/getdirentries.2 =================================================================== --- lib/libc/sys/getdirentries.2 +++ lib/libc/sys/getdirentries.2 @@ -193,6 +193,8 @@ error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. +.It Bq Er ENOENT +Directory unlinked but still open. .El .Sh SEE ALSO .Xr lseek 2 , Index: share/man/man4/linux.4 =================================================================== --- share/man/man4/linux.4 +++ share/man/man4/linux.4 @@ -145,6 +145,12 @@ does not emulate the Linux environment completely, and missed features may result in security vulnerabilities. Defaults to 1. +.It Va compat.linux32.true_i386_emulation +In the x86_64 (amd64) world enable the real i386 Linuxulator behavior. +For example, when set to 0, Linux uname -m will return "x86_64" even if +uname itself is a i386 Linux executable. When set to 1, Linux i386 +uname -m will return "i686". +Defaults to 0. .El .Sh FILES .Bl -tag -width /compat/linux/dev/shm -compact Index: sys/amd64/linux/linux_proto.h =================================================================== --- sys/amd64/linux/linux_proto.h +++ sys/amd64/linux/linux_proto.h @@ -1319,7 +1319,10 @@ syscallarg_t dummy; }; struct linux_rseq_args { - syscallarg_t dummy; + char rseq_l_[PADL_(struct linux_rseq *)]; struct linux_rseq * rseq; char rseq_r_[PADR_(struct linux_rseq *)]; + char rseq_len_l_[PADL_(uint32_t)]; uint32_t rseq_len; char rseq_len_r_[PADR_(uint32_t)]; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; + char sig_l_[PADL_(uint32_t)]; uint32_t sig; char sig_r_[PADR_(uint32_t)]; }; struct linux_pidfd_send_signal_args { char pidfd_l_[PADL_(l_int)]; l_int pidfd; char pidfd_r_[PADR_(l_int)]; Index: sys/amd64/linux/linux_sysent.c =================================================================== --- sys/amd64/linux/linux_sysent.c +++ sys/amd64/linux/linux_sysent.c @@ -351,7 +351,7 @@ { .sy_narg = AS(linux_pkey_free_args), .sy_call = (sy_call_t *)linux_pkey_free, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pkey_free */ { .sy_narg = AS(linux_statx_args), .sy_call = (sy_call_t *)linux_statx, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_statx */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_io_pgetevents, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_io_pgetevents */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_rseq */ + { .sy_narg = AS(linux_rseq_args), .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_rseq */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 335 = nosys */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 336 = nosys */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 337 = nosys */ Index: sys/amd64/linux/linux_systrace_args.c =================================================================== --- sys/amd64/linux/linux_systrace_args.c +++ sys/amd64/linux/linux_systrace_args.c @@ -2613,7 +2613,12 @@ } /* linux_rseq */ case 334: { - *n_args = 0; + struct linux_rseq_args *p = params; + uarg[a++] = (intptr_t)p->rseq; /* struct linux_rseq * */ + uarg[a++] = p->rseq_len; /* uint32_t */ + iarg[a++] = p->flags; /* l_int */ + uarg[a++] = p->sig; /* uint32_t */ + *n_args = 4; break; } /* linux_pidfd_send_signal */ @@ -6964,6 +6969,22 @@ break; /* linux_rseq */ case 334: + switch (ndx) { + case 0: + p = "userland struct linux_rseq *"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "l_int"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_pidfd_send_signal */ case 424: @@ -8499,6 +8520,9 @@ case 333: /* linux_rseq */ case 334: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_pidfd_send_signal */ case 424: if (ndx == 0 || ndx == 1) Index: sys/amd64/linux/syscalls.master =================================================================== --- sys/amd64/linux/syscalls.master +++ sys/amd64/linux/syscalls.master @@ -2039,7 +2039,12 @@ int linux_io_pgetevents(void); } 334 AUE_NULL STD { - int linux_rseq(void); + int linux_rseq( + struct linux_rseq *rseq, + uint32_t rseq_len, + l_int flags, + uint32_t sig + ); } ; Linux 5.0: 335-423 AUE_NULL UNIMPL nosys Index: sys/amd64/linux32/linux.h =================================================================== --- sys/amd64/linux32/linux.h +++ sys/amd64/linux32/linux.h @@ -637,4 +637,6 @@ void bsd_to_linux_regset32(const struct reg32 *b_reg, struct linux_pt_regset32 *l_regset); +extern bool linux32_true_i386_emulation; + #endif /* !_AMD64_LINUX_H_ */ Index: sys/amd64/linux32/linux32_proto.h =================================================================== --- sys/amd64/linux32/linux32_proto.h +++ sys/amd64/linux32/linux32_proto.h @@ -1470,7 +1470,10 @@ syscallarg_t dummy; }; struct linux_rseq_args { - syscallarg_t dummy; + char rseq_l_[PADL_(struct linux_rseq *)]; struct linux_rseq * rseq; char rseq_r_[PADR_(struct linux_rseq *)]; + char rseq_len_l_[PADL_(uint32_t)]; uint32_t rseq_len; char rseq_len_r_[PADR_(uint32_t)]; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; + char sig_l_[PADL_(uint32_t)]; uint32_t sig; char sig_r_[PADR_(uint32_t)]; }; struct linux_semget_args { char key_l_[PADL_(l_key_t)]; l_key_t key; char key_r_[PADR_(l_key_t)]; Index: sys/amd64/linux32/linux32_sysent.c =================================================================== --- sys/amd64/linux32/linux32_sysent.c +++ sys/amd64/linux32/linux32_sysent.c @@ -403,7 +403,7 @@ { .sy_narg = AS(linux_statx_args), .sy_call = (sy_call_t *)linux_statx, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 383 = linux_statx */ { .sy_narg = AS(linux_arch_prctl_args), .sy_call = (sy_call_t *)linux_arch_prctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 384 = linux_arch_prctl */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_io_pgetevents, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 385 = linux_io_pgetevents */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 386 = linux_rseq */ + { .sy_narg = AS(linux_rseq_args), .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 386 = linux_rseq */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 387 = nosys */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 388 = nosys */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 389 = nosys */ Index: sys/amd64/linux32/linux32_systrace_args.c =================================================================== --- sys/amd64/linux32/linux32_systrace_args.c +++ sys/amd64/linux32/linux32_systrace_args.c @@ -2867,7 +2867,12 @@ } /* linux_rseq */ case 386: { - *n_args = 0; + struct linux_rseq_args *p = params; + uarg[a++] = (intptr_t)p->rseq; /* struct linux_rseq * */ + uarg[a++] = p->rseq_len; /* uint32_t */ + iarg[a++] = p->flags; /* l_int */ + uarg[a++] = p->sig; /* uint32_t */ + *n_args = 4; break; } /* linux_semget */ @@ -7873,6 +7878,22 @@ break; /* linux_rseq */ case 386: + switch (ndx) { + case 0: + p = "userland struct linux_rseq *"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "l_int"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_semget */ case 393: @@ -9924,6 +9945,9 @@ case 385: /* linux_rseq */ case 386: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_semget */ case 393: if (ndx == 0 || ndx == 1) Index: sys/amd64/linux32/linux32_sysvec.c =================================================================== --- sys/amd64/linux32/linux32_sysvec.c +++ sys/amd64/linux32/linux32_sysvec.c @@ -885,6 +885,9 @@ static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); +bool linux32_true_i386_emulation = false; +SYSCTL_BOOL(_compat_linux32, OID_AUTO, true_i386_emulation, CTLFLAG_RWTUN, + &linux32_true_i386_emulation, 0, "Emulate the real i386"); static void linux32_fixlimit(struct rlimit *rl, int which) Index: sys/amd64/linux32/syscalls.master =================================================================== --- sys/amd64/linux32/syscalls.master +++ sys/amd64/linux32/syscalls.master @@ -2265,7 +2265,12 @@ int linux_io_pgetevents(void); } 386 AUE_NULL STD { - int linux_rseq(void); + int linux_rseq( + struct linux_rseq *rseq, + uint32_t rseq_len, + l_int flags, + uint32_t sig + ); } 387-392 AUE_NULL UNIMPL nosys 393 AUE_NULL STD { Index: sys/arm64/linux/linux_proto.h =================================================================== --- sys/arm64/linux/linux_proto.h +++ sys/arm64/linux/linux_proto.h @@ -1126,7 +1126,10 @@ syscallarg_t dummy; }; struct linux_rseq_args { - syscallarg_t dummy; + char rseq_l_[PADL_(struct linux_rseq *)]; struct linux_rseq * rseq; char rseq_r_[PADR_(struct linux_rseq *)]; + char rseq_len_l_[PADL_(uint32_t)]; uint32_t rseq_len; char rseq_len_r_[PADR_(uint32_t)]; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; + char sig_l_[PADL_(uint32_t)]; uint32_t sig; char sig_r_[PADR_(uint32_t)]; }; struct linux_kexec_file_load_args { syscallarg_t dummy; Index: sys/arm64/linux/linux_sysent.c =================================================================== --- sys/arm64/linux/linux_sysent.c +++ sys/arm64/linux/linux_sysent.c @@ -310,7 +310,7 @@ { .sy_narg = AS(linux_pkey_free_args), .sy_call = (sy_call_t *)linux_pkey_free, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_pkey_free */ { .sy_narg = AS(linux_statx_args), .sy_call = (sy_call_t *)linux_statx, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_statx */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_io_pgetevents, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_io_pgetevents */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_rseq */ + { .sy_narg = AS(linux_rseq_args), .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_rseq */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_kexec_file_load, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_kexec_file_load */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 295 = unimpl_md_syscall */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 296 = unimpl_md_syscall */ Index: sys/arm64/linux/linux_systrace_args.c =================================================================== --- sys/arm64/linux/linux_systrace_args.c +++ sys/arm64/linux/linux_systrace_args.c @@ -2230,7 +2230,12 @@ } /* linux_rseq */ case 293: { - *n_args = 0; + struct linux_rseq_args *p = params; + uarg[a++] = (intptr_t)p->rseq; /* struct linux_rseq * */ + uarg[a++] = p->rseq_len; /* uint32_t */ + iarg[a++] = p->flags; /* l_int */ + uarg[a++] = p->sig; /* uint32_t */ + *n_args = 4; break; } /* linux_kexec_file_load */ @@ -6036,6 +6041,22 @@ break; /* linux_rseq */ case 293: + switch (ndx) { + case 0: + p = "userland struct linux_rseq *"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "l_int"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_kexec_file_load */ case 294: @@ -7357,6 +7378,9 @@ case 292: /* linux_rseq */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_kexec_file_load */ case 294: /* linux_pidfd_send_signal */ Index: sys/arm64/linux/syscalls.master =================================================================== --- sys/arm64/linux/syscalls.master +++ sys/arm64/linux/syscalls.master @@ -1686,7 +1686,12 @@ int linux_io_pgetevents(void); } 293 AUE_NULL STD { - int linux_rseq(void); + int linux_rseq( + struct linux_rseq *rseq, + uint32_t rseq_len, + l_int flags, + uint32_t sig + ); } 294 AUE_NULL STD { int linux_kexec_file_load(void); Index: sys/compat/linprocfs/linprocfs.c =================================================================== --- sys/compat/linprocfs/linprocfs.c +++ sys/compat/linprocfs/linprocfs.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,7 @@ #endif /* __i386__ || __amd64__ */ #include +#include #include #include #include @@ -1932,6 +1934,33 @@ return (error); } +/* + * Filler function for proc/self/oom_score_adj + * + */ +static int +linprocfs_dooomscoreadj(PFS_FILL_ARGS) +{ + struct linux_pemuldata *pem; + long oom; + + pem = pem_find(p); + if (pem == NULL || uio == NULL) + return (EOPNOTSUPP); + if (uio->uio_rw == UIO_READ) { + sbuf_printf(sb, "%d\n", pem->oom_score_adj); + } else { + sbuf_trim(sb); + sbuf_finish(sb); + oom = strtol(sbuf_data(sb), NULL, 10); + if (oom < LINUX_OOM_SCORE_ADJ_MIN || + oom > LINUX_OOM_SCORE_ADJ_MAX) + return (EINVAL); + pem->oom_score_adj = oom; + } + return (0); +} + /* * Constructor */ @@ -2018,6 +2047,8 @@ NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD); pfs_create_file(dir, "limits", &linprocfs_doproclimits, NULL, NULL, NULL, PFS_RD); + pfs_create_file(dir, "oom_score_adj", &linprocfs_dooomscoreadj, + procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR); /* /proc//task/... */ dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0); Index: sys/compat/linux/linux_common.h =================================================================== --- sys/compat/linux/linux_common.h +++ sys/compat/linux/linux_common.h @@ -45,5 +45,4 @@ short lev, short *bev); void bsd_to_linux_poll_events(short bev, short *lev); - #endif /* _LINUX_COMMON_H_ */ Index: sys/compat/linux/linux_dummy.c =================================================================== --- sys/compat/linux/linux_dummy.c +++ sys/compat/linux/linux_dummy.c @@ -144,7 +144,6 @@ DUMMY(mount_setattr); /* Linux 4.18: */ DUMMY(io_pgetevents); -DUMMY(rseq); /* Linux 5.0: */ DUMMY(pidfd_send_signal); DUMMY(io_uring_setup); Index: sys/compat/linux/linux_emul.h =================================================================== --- sys/compat/linux/linux_emul.h +++ sys/compat/linux/linux_emul.h @@ -75,6 +75,7 @@ struct sx pem_sx; /* lock for this struct */ uint32_t persona; /* process execution domain */ uint32_t ptrace_flags; /* used by ptrace(2) */ + uint32_t oom_score_adj; /* /proc/self/oom_score_adj */ }; #define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx) Index: sys/compat/linux/linux_emul.c =================================================================== --- sys/compat/linux/linux_emul.c +++ sys/compat/linux/linux_emul.c @@ -163,6 +163,7 @@ pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO); sx_init(&pem->pem_sx, "lpemlk"); p->p_emuldata = pem; + pem->oom_score_adj = LINUX_OOM_SCORE_ADJ_MAX; } newtd->td_emuldata = em; @@ -187,8 +188,8 @@ pem = pem_find(p); KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n")); pem->persona = 0; + pem->oom_score_adj = LINUX_OOM_SCORE_ADJ_MAX; } - } void Index: sys/compat/linux/linux_file.c =================================================================== --- sys/compat/linux/linux_file.c +++ sys/compat/linux/linux_file.c @@ -497,7 +497,7 @@ linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; - linux_dirent->d_off = base + reclen; + linux_dirent->d_off = bdp->d_off; linux_dirent->d_reclen = linuxreclen; /* * Copy d_type to last byte of l_dirent buffer @@ -574,7 +574,7 @@ linux_dirent64 = (struct l_dirent64*)lbuf; linux_dirent64->d_ino = bdp->d_fileno; - linux_dirent64->d_off = base + reclen; + linux_dirent64->d_off = bdp->d_off; linux_dirent64->d_reclen = linuxreclen; linux_dirent64->d_type = bdp->d_type; strlcpy(linux_dirent64->d_name, bdp->d_name, @@ -631,7 +631,7 @@ linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; - linux_dirent->d_off = linuxreclen; + linux_dirent->d_off = bdp->d_off; linux_dirent->d_reclen = bdp->d_namlen; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)); @@ -1042,6 +1042,9 @@ char *name; int error; + if (args->count <= 0) + return (EINVAL); + if (!LUSECONVPATH(td)) { return (kern_readlinkat(td, AT_FDCWD, args->name, UIO_USERSPACE, args->buf, UIO_USERSPACE, args->count)); @@ -1060,6 +1063,9 @@ char *name; int error, dfd; + if (args->bufsiz <= 0) + return (EINVAL); + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (!LUSECONVPATH(td)) { return (kern_readlinkat(td, dfd, args->path, UIO_USERSPACE, Index: sys/compat/linux/linux_mib.h =================================================================== --- sys/compat/linux/linux_mib.h +++ sys/compat/linux/linux_mib.h @@ -58,10 +58,10 @@ #define LINUX_VERSION_STR LINUX_XKERNVERSTR(LINUX_KVERSION.LINUX_KPATCHLEVEL.LINUX_KSUBLEVEL) #define LINUX_KERNVER_2004000 LINUX_KERNVER(2,4,0) -#define LINUX_KERNVER_2006000 LINUX_KERNVER(2,6,0) #define LINUX_KERNVER_2006039 LINUX_KERNVER(2,6,39) +#define LINUX_KERNVER_5004000 LINUX_KERNVER(5,4,0) -#define linux_use26(t) (linux_kernver(t) >= LINUX_KERNVER_2006000) +#define linux_use54(t) (linux_kernver(t) >= LINUX_KERNVER_5004000) extern int linux_debug; extern int linux_default_openfiles; Index: sys/compat/linux/linux_misc.h =================================================================== --- sys/compat/linux/linux_misc.h +++ sys/compat/linux/linux_misc.h @@ -136,6 +136,7 @@ #define LINUX_P_ALL 0 #define LINUX_P_PID 1 #define LINUX_P_PGID 2 +#define LINUX_P_PIDFD 3 #define LINUX_RLIMIT_LOCKS 10 #define LINUX_RLIMIT_SIGPENDING 11 @@ -156,6 +157,10 @@ /* Linux seccomp flags */ #define LINUX_SECCOMP_GET_ACTION_AVAIL 2 +/* Linux /proc/self/oom_score_adj */ +#define LINUX_OOM_SCORE_ADJ_MIN -1000 +#define LINUX_OOM_SCORE_ADJ_MAX 1000 + #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) int linux_ptrace_status(struct thread *td, int pid, int status); #endif Index: sys/compat/linux/linux_misc.c =================================================================== --- sys/compat/linux/linux_misc.c +++ sys/compat/linux/linux_misc.c @@ -720,6 +720,11 @@ * the string returned by getauxval(AT_PLATFORM) needs * to remain "i686", though. */ +#if defined(COMPAT_LINUX32) + if (linux32_true_i386_emulation) + strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); + else +#endif strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); #elif defined(__aarch64__) strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); @@ -1054,6 +1059,10 @@ LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); + /* -INT_MIN is not defined. */ + if (args->pid == INT_MIN) + return (ESRCH); + options = 0; linux_to_bsd_waitopts(args->options, &options); @@ -1063,6 +1072,14 @@ */ options |= WEXITED | WTRAPPED; + /* + * As FreeBSD does not have __WALL option bit analogue explicitly set all + * possible option bits to emulate Linux __WALL wait option bit. The same + * for waitid system call. + */ + if ((args->options & __WALL) != 0) + options |= WUNTRACED | WCONTINUED | WLINUXCLONE; + if (args->pid == WAIT_ANY) { idtype = P_ALL; id = 0; @@ -1089,15 +1106,20 @@ { idtype_t idtype; int error, options; + struct proc *p; + pid_t id; + + if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | + LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) + return (EINVAL); options = 0; linux_to_bsd_waitopts(args->options, &options); + if ((args->options & __WALL) != 0) + options |= WEXITED | WTRAPPED | WUNTRACED | + WCONTINUED | WLINUXCLONE; - if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) - return (EINVAL); - if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) - return (EINVAL); - + id = args->id; switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; @@ -1108,15 +1130,23 @@ idtype = P_PID; break; case LINUX_P_PGID: - if (args->id <= 0) + if (linux_use54(td) && args->id == 0) { + p = td->td_proc; + PROC_LOCK(p); + id = p->p_pgid; + PROC_UNLOCK(p); + } else if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; + case LINUX_P_PIDFD: + LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); + return (ENOSYS); default: return (EINVAL); } - error = linux_common_wait(td, idtype, args->id, NULL, options, + error = linux_common_wait(td, idtype, id, NULL, options, args->rusage, args->info); td->td_retval[0] = 0; Index: sys/compat/linux/linux_rseq.h =================================================================== --- sys/compat/linux/linux_rseq.h +++ sys/compat/linux/linux_rseq.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2019 Dmitry Chagin + * Copyright (c) 2022 Dmitry Chagin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,27 +23,16 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#ifndef _LINUX_COMMON_H_ -#define _LINUX_COMMON_H_ - -struct ifnet *ifname_linux_to_bsd(struct thread *td, - const char *lxname, char *bsdname); -void linux_ifflags(struct ifnet *ifp, short *flags); -int linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa); - -int linux_to_bsd_domain(int domain); -int bsd_to_linux_domain(int domain); -int bsd_to_linux_sockaddr(const struct sockaddr *sa, - struct l_sockaddr **lsa, socklen_t len); -int linux_to_bsd_sockaddr(const struct l_sockaddr *lsa, - struct sockaddr **sap, socklen_t *len); -void linux_to_bsd_poll_events(struct thread *td, int fd, - short lev, short *bev); -void bsd_to_linux_poll_events(short bev, short *lev); +#ifndef _LINUX_RSEQ_H_ +#define _LINUX_RSEQ_H_ +struct linux_rseq { + uint32_t cpu_id_start; + uint32_t cpu_id; + uint64_t rseq_cs; + uint32_t flags; +} __attribute__((aligned(4 * sizeof(uint64_t)))); -#endif /* _LINUX_COMMON_H_ */ +#endif /* _LINUX_RSEQ_H_ */ Index: sys/compat/linux/linux_rseq.c =================================================================== --- /dev/null +++ sys/compat/linux/linux_rseq.c @@ -0,0 +1,84 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 Dmitry Chagin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#ifdef COMPAT_LINUX32 +#include +#include +#else +#include +#include +#endif +#include + +enum linux_rseq_cpu_id_state { + LINUX_RSEQ_CPU_ID_UNINITIALIZED = -1, + LINUX_RSEQ_CPU_ID_REGISTRATION_FAILED = -2, +}; + +enum linux_rseq_flags { + LINUX_RSEQ_FLAG_UNREGISTER = (1 << 0), +}; + +enum linux_rseq_cs_flags_bit { + LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, + LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, + LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, +}; + +enum linux_rseq_cs_flags { + LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = + (1U << LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), + LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = + (1U << LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), + LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = + (1U << LINUX_RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), +}; + +struct linux_rseq_cs { + uint32_t version; + uint32_t flags; + uint64_t start_ip; + uint64_t post_commit_offset; + uint64_t abort_ip; +} __attribute__((aligned(4 * sizeof(uint64_t)))); + + +int +linux_rseq(struct thread *td, struct linux_rseq_args *args) +{ + + return (ENOSYS); +} Index: sys/compat/linux/linux_socket.c =================================================================== --- sys/compat/linux/linux_socket.c +++ sys/compat/linux/linux_socket.c @@ -727,7 +727,7 @@ error = bsd_to_linux_sockaddr(sa, &lsa, len); if (error != 0) return (error); - + error = copyout(lsa, uaddr, len); free(lsa, M_SONAME); Index: sys/compat/linux/linux_stats.c =================================================================== --- sys/compat/linux/linux_stats.c +++ sys/compat/linux/linux_stats.c @@ -800,4 +800,3 @@ return (error); } - Index: sys/i386/linux/linux_proto.h =================================================================== --- sys/i386/linux/linux_proto.h +++ sys/i386/linux/linux_proto.h @@ -1463,7 +1463,10 @@ syscallarg_t dummy; }; struct linux_rseq_args { - syscallarg_t dummy; + char rseq_l_[PADL_(struct linux_rseq *)]; struct linux_rseq * rseq; char rseq_r_[PADR_(struct linux_rseq *)]; + char rseq_len_l_[PADL_(uint32_t)]; uint32_t rseq_len; char rseq_len_r_[PADR_(uint32_t)]; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; + char sig_l_[PADL_(uint32_t)]; uint32_t sig; char sig_r_[PADR_(uint32_t)]; }; struct linux_semget_args { char key_l_[PADL_(l_key_t)]; l_key_t key; char key_r_[PADR_(l_key_t)]; Index: sys/i386/linux/linux_sysent.c =================================================================== --- sys/i386/linux/linux_sysent.c +++ sys/i386/linux/linux_sysent.c @@ -403,7 +403,7 @@ { .sy_narg = AS(linux_statx_args), .sy_call = (sy_call_t *)linux_statx, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 383 = linux_statx */ { .sy_narg = AS(linux_arch_prctl_args), .sy_call = (sy_call_t *)linux_arch_prctl, .sy_auevent = AUE_PRCTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 384 = linux_arch_prctl */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_io_pgetevents, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 385 = linux_io_pgetevents */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 386 = linux_rseq */ + { .sy_narg = AS(linux_rseq_args), .sy_call = (sy_call_t *)linux_rseq, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 386 = linux_rseq */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 387 = nosys */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 388 = nosys */ { .sy_narg = 0, .sy_call = (sy_call_t *)nosys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 389 = nosys */ Index: sys/i386/linux/linux_systrace_args.c =================================================================== --- sys/i386/linux/linux_systrace_args.c +++ sys/i386/linux/linux_systrace_args.c @@ -2906,7 +2906,12 @@ } /* linux_rseq */ case 386: { - *n_args = 0; + struct linux_rseq_args *p = params; + uarg[a++] = (intptr_t)p->rseq; /* struct linux_rseq * */ + uarg[a++] = p->rseq_len; /* uint32_t */ + iarg[a++] = p->flags; /* l_int */ + uarg[a++] = p->sig; /* uint32_t */ + *n_args = 4; break; } /* linux_semget */ @@ -7950,6 +7955,22 @@ break; /* linux_rseq */ case 386: + switch (ndx) { + case 0: + p = "userland struct linux_rseq *"; + break; + case 1: + p = "uint32_t"; + break; + case 2: + p = "l_int"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_semget */ case 393: @@ -10030,6 +10051,9 @@ case 385: /* linux_rseq */ case 386: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_semget */ case 393: if (ndx == 0 || ndx == 1) Index: sys/i386/linux/syscalls.master =================================================================== --- sys/i386/linux/syscalls.master +++ sys/i386/linux/syscalls.master @@ -2283,7 +2283,12 @@ int linux_io_pgetevents(void); } 386 AUE_NULL STD { - int linux_rseq(void); + int linux_rseq( + struct linux_rseq *rseq, + uint32_t rseq_len, + l_int flags, + uint32_t sig + ); } 387-392 AUE_NULL UNIMPL nosys 393 AUE_NULL STD { Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -5980,6 +5980,7 @@ vn_seqc_write_end(dvp); vn_seqc_write_end(vp); if (!rc) { + vp->v_vflag |= VV_UNLINKED; VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); } Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c +++ sys/kern/vfs_syscalls.c @@ -4194,6 +4194,10 @@ error = EINVAL; goto fail; } + if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { + error = ENOENT; + goto fail; + } aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; Index: sys/modules/linux/Makefile =================================================================== --- sys/modules/linux/Makefile +++ sys/modules/linux/Makefile @@ -16,7 +16,7 @@ linux${SFX}_machdep.c linux_misc.c linux_signal.c \ linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \ linux${SFX}_sysvec.c linux_uid16.c linux_time.c \ - linux_timer.c linux_vdso.c \ + linux_timer.c linux_vdso.c linux_rseq.c \ opt_inet6.h opt_compat.h opt_posix.h opt_usb.h vnode_if.h \ device_if.h bus_if.h .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" Index: sys/modules/linux64/Makefile =================================================================== --- sys/modules/linux64/Makefile +++ sys/modules/linux64/Makefile @@ -10,7 +10,7 @@ linux_event.c linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \ linux_machdep.c linux_misc.c linux_ptrace.c linux_signal.c \ linux_socket.c linux_stats.c linux_sysctl.c linux_sysent.c \ - linux_sysvec.c linux_time.c linux_vdso.c linux_timer.c \ + linux_sysvec.c linux_time.c linux_vdso.c linux_timer.c linux_rseq.c \ opt_compat.h opt_inet6.h opt_posix.h opt_usb.h \ vnode_if.h device_if.h bus_if.h \ linux_support.s Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -270,7 +270,7 @@ #define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */ #define VV_SYSTEM 0x0080 /* vnode being used by kernel */ #define VV_PROCDEP 0x0100 /* vnode is process dependent */ -/* UNUSED 0x0200 */ +#define VV_UNLINKED 0x0200 /* unlinked but stil open directory */ #define VV_DELETED 0x0400 /* should be removed */ #define VV_MD 0x0800 /* vnode backs the md device */ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */