diff --git a/share/man/man9/unr.9 b/share/man/man9/unr.9 --- a/share/man/man9/unr.9 +++ b/share/man/man9/unr.9 @@ -29,17 +29,25 @@ .Os .Sh NAME .Nm new_unrhdr , +.Nm clean_unrhdr , .Nm clear_unrhdr , .Nm delete_unrhdr , .Nm alloc_unr , .Nm alloc_unr_specific , -.Nm free_unr +.Nm free_unr , +.Nm create_iter_unr , +.Nm next_iter_unr , +.Nm free_iter_unr .Nd "kernel unit number allocator" .Sh SYNOPSIS .In sys/systm.h .Ft "struct unrhdr *" .Fn new_unrhdr "int low" "int high" "struct mtx *mutex" .Ft void +.Fn clean_unrhdr "struct unrhdr *uh" +.Ft void +.Fn clean_unrhdrl "struct unrhdr *uh" +.Ft void .Fn clear_unrhdr "struct unrhdr *uh" .Ft void .Fn delete_unrhdr "struct unrhdr *uh" @@ -51,6 +59,12 @@ .Fn alloc_unr_specific "struct unrhdr *uh" "u_int item" .Ft void .Fn free_unr "struct unrhdr *uh" "u_int item" +.Ft void * +.Fn create_iter_unr "struct unrhdr *uh" +.Ft int +.Fn next_iter_unr "void *handle" +.Ft void +.Fn free_iter_unr "void *handle" .Sh DESCRIPTION The kernel unit number allocator is a generic facility, which allows to allocate unit numbers within a specified range. @@ -86,6 +100,26 @@ any units. To free all units use .Fn clear_unrhdr . +.It Fn clean_unrhdr uh +Freeing unit numbers might result in some internal memory becoming unused. +There are +.Nm unit +allocator consumers that cannot tolerate taking +.Xr malloc 9 +locks to free the memory, while having their unit mutex locked. +For this reason, free of the unused memory after delete is postponed +until the consumer can afford calling into the +.Xr malloc 9 +subsystem. +Call +.Fn clean_unrhdr uh +to do the cleanup. +In particular, this needs to be done before freeing a unr, if +a deletion of units could have been performed. +.It Fn clean_unrhdrl +Same as +.Fn clean_unrhdr , +but assumes that the unr mutex is already owned, if any. .It Fn alloc_unr uh Return a new unit number. The lowest free number is always allocated. @@ -110,6 +144,41 @@ This function may require allocating memory, and thus it can sleep. There is no pre-locked variant. .El +.Sh ITERATOR INTERFACE +The +.Nm unr +facility provides an interface to iterate over all allocated units +for the given +.Dv unrhdr . +Iterators are identified by an opaque handle. +More than one iterators can operate simultaneously; the iterator position +data is recorded only in the iterator handle. +.Pp +Consumers must ensure that the unit allocator is not modified between +calls to the iterator functions. +In particular, the internal allocator mutex cannot provide consistency, +because it is acquired and dropped inside the +.Fn next_iter_unr +function. +If the allocator was modified, it is safe to free the iterator with +.Fn free_iter_unr +method nevertheless. +.Bl -tag -width indent +.It Fn create_iter_unr uh +Create an iterator. +Return the handle that should be passed to other iterator functions. +.It Fn next_iter_unr handle +Return the value of the next unit. +Units are returned in ascending order. +A return value of +.Li \-1 +indicates the end of iteration, in which +case +.Li \-1 +is returned for all future calls. +.It Fn free_iter_unr handle +Free the iterator, handle is no longer valid. +.El .Sh CODE REFERENCES The above functions are implemented in .Pa sys/kern/subr_unit.c . diff --git a/sys/kern/kern_procctl.c b/sys/kern/kern_procctl.c --- a/sys/kern/kern_procctl.c +++ b/sys/kern/kern_procctl.c @@ -416,8 +416,21 @@ continue; if ((p2->p_treeflag & P_TREE_REAPER) != 0) reap_kill_sched(&tracker, p2); - if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid) + + /* + * Handle possible pid reuse. If we recorded + * p2 as killed but its p_flag2 does not + * confirm it, that means that the process + * terminated and its id was reused by other + * process in the reaper subtree. + * + * Unlocked read of p2->p_flag2 is fine, it is + * our thread that set the tested flag. + */ + if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid && + (atomic_load_int(&p2->p_flag2) & P2_REAPKILLED) != 0) continue; + if (p2 == td->td_proc) { if ((p2->p_flag & P_HADTHREADS) != 0 && (p2->p_flag2 & P2_WEXIT) == 0) { @@ -428,6 +441,11 @@ st = false; } PROC_LOCK(p2); + /* + * sapblk ensures that only one thread + * in the system sets this flag. + */ + p2->p_flag2 |= P2_REAPKILLED; if (st) r = thread_single(p2, SINGLE_NO_EXIT); (void)pksignal(p2, w->rk->rk_sig, w->ksi); @@ -445,6 +463,7 @@ PROC_LOCK(p2); if ((p2->p_flag2 & P2_WEXIT) == 0) { _PHOLD_LITE(p2); + p2->p_flag2 |= P2_REAPKILLED; PROC_UNLOCK(p2); w->target = p2; taskqueue_enqueue(taskqueue_thread, @@ -471,6 +490,9 @@ struct reap_kill_proc_work *w) { struct unrhdr pids; + void *ihandle; + struct proc *p2; + int pid; /* * pids records processes which were already signalled, to @@ -486,6 +508,17 @@ PROC_UNLOCK(td->td_proc); while (reap_kill_subtree_once(td, p, reaper, &pids, w)) ; + + ihandle = create_iter_unr(&pids); + while ((pid = next_iter_unr(ihandle)) != -1) { + p2 = pfind(pid); + if (p2 != NULL) { + p2->p_flag2 &= ~P2_REAPKILLED; + PROC_UNLOCK(p2); + } + } + free_iter_unr(ihandle); + out: clean_unrhdr(&pids); clear_unrhdr(&pids); diff --git a/sys/kern/subr_unit.c b/sys/kern/subr_unit.c --- a/sys/kern/subr_unit.c +++ b/sys/kern/subr_unit.c @@ -178,6 +178,12 @@ * For bitmaps the len field represents the number of allocated items. * * The bitmap is the same size as struct unr to optimize memory management. + * + * Two special ranges are not covered by unrs: + * - at the start of the allocator space, all elements in [low, low + first) + * are allocated; + * - at the end of the allocator space, all elements in [high - last, high] + * are free. */ struct unr { TAILQ_ENTRY(unr) list; @@ -192,7 +198,13 @@ CTASSERT((sizeof(struct unr) % sizeof(bitstr_t)) == 0); /* Number of bits we can store in the bitmap */ -#define NBITS (8 * sizeof(((struct unrb*)NULL)->map)) +#define NBITS (NBBY * sizeof(((struct unrb *)NULL)->map)) + +static inline bool +is_bitmap(struct unrhdr *uh, struct unr *up) +{ + return (up->ptr != uh && up->ptr != NULL); +} /* Is the unrb empty in at least the first len bits? */ static inline bool @@ -213,6 +225,120 @@ return (first_clear == -1); } +/* + * start: ipos = -1, upos = NULL; + * end: ipos = -1, upos = uh + */ +struct unrhdr_iter { + struct unrhdr *uh; + int ipos; + int upos_first_item; + void *upos; +}; + +void * +create_iter_unr(struct unrhdr *uh) +{ + struct unrhdr_iter *iter; + + iter = Malloc(sizeof(*iter)); + iter->ipos = -1; + iter->uh = uh; + iter->upos = NULL; + iter->upos_first_item = -1; + return (iter); +} + +static void +next_iter_unrl(struct unrhdr *uh, struct unrhdr_iter *iter) +{ + struct unr *up; + struct unrb *ub; + u_int y; + int c; + + if (iter->ipos == -1) { + if (iter->upos == uh) + return; + y = uh->low - 1; + if (uh->first == 0) { + up = TAILQ_FIRST(&uh->head); + if (up == NULL) { + iter->upos = uh; + return; + } + iter->upos = up; + if (up->ptr == NULL) + iter->upos = NULL; + } + } else { + y = iter->ipos; + } + + up = iter->upos; + + /* Special case for the compacted [low, first) run. */ + if (up == NULL) { + if (y + 1 < uh->low + uh->first) { + iter->ipos = y + 1; + return; + } + up = iter->upos = TAILQ_FIRST(&uh->head); + iter->upos_first_item = uh->low + uh->first; + } + + for (;;) { + if (y + 1 < iter->upos_first_item + up->len) { + if (up->ptr == uh) { + iter->ipos = y + 1; + return; + } else if (is_bitmap(uh, up)) { + ub = up->ptr; + bit_ffs_at(&ub->map[0], + (y + 1) % iter->upos_first_item, + NBITS, &c); + if (c != -1) { + iter->ipos = iter->upos_first_item + c; + return; + } + } + } + iter->upos_first_item += up->len; + y = iter->upos_first_item - 1; + up = iter->upos = TAILQ_NEXT((struct unr *)iter->upos, list); + if (iter->upos == NULL) { + iter->ipos = -1; + iter->upos = uh; + return; + } + } +} + +/* + * returns -1 on end, otherwise the next element + */ +int +next_iter_unr(void *handle) +{ + struct unrhdr *uh; + struct unrhdr_iter *iter; + + iter = handle; + uh = iter->uh; + if (uh->mtx != NULL) + mtx_lock(uh->mtx); + next_iter_unrl(uh, iter); + if (uh->mtx != NULL) + mtx_unlock(uh->mtx); + return (iter->ipos); +} + +void +free_iter_unr(void *handle) +{ + Free(handle); +} + #if defined(DIAGNOSTIC) || !defined(_KERNEL) /* * Consistency check function. @@ -233,7 +359,7 @@ z = 0; TAILQ_FOREACH(up, &uh->head, list) { z++; - if (up->ptr != uh && up->ptr != NULL) { + if (is_bitmap(uh, up)) { ub = up->ptr; KASSERT (up->len <= NBITS, ("UNR inconsistency: len %u max %zd (line %d)\n", @@ -396,12 +522,6 @@ check_unrhdr(uh, __LINE__); } -static __inline int -is_bitmap(struct unrhdr *uh, struct unr *up) -{ - return (up->ptr != uh && up->ptr != NULL); -} - /* * Look for sequence of items which can be combined into a bitmap, if * multiple are present, take the one which saves most memory. @@ -1010,10 +1130,93 @@ } } +#define TBASE 7 +#define XSIZE 10 +#define ISIZE 1000 + +static int +test_iter_compar(const void *a, const void *b) +{ + return (*(const int *)a - *(const int *)b); +} + static void -usage(char** argv) +test_iter_fill(int *vals, struct unrhdr *uh, int i, int v, int *res) { - printf("%s [-h] [-r REPETITIONS] [-v]\n", argv[0]); + int x; + + vals[i] = v; + x = alloc_unr_specific(uh, v); + if (x != v) { + VPRINTF("alloc_unr_specific failed %d %d\n", x, v); + *res = 1; + } +} + +static void +test_iter(void) +{ + struct unrhdr *uh; + void *ihandle; + int vals[ISIZE]; + int i, j, v, x, res; + + res = 0; + uh = new_unrhdr(TBASE, INT_MAX, NULL); + for (i = 0; i < XSIZE; i++) { + vals[i] = i + TBASE; + x = alloc_unr_specific(uh, i + TBASE); + if (x != i + TBASE) { + VPRINTF("alloc_unr_specific failed %d %d\n", x, + i + TBASE); + res = 1; + } + } + for (; i < ISIZE; i++) { + for (;;) { +again: + v = arc4random_uniform(INT_MAX); + if (v < TBASE) + goto again; + for (j = 0; j < i; j++) { + if (v == vals[j] || v + 1 == vals[j]) + goto again; + } + break; + } + test_iter_fill(vals, uh, i, v, &res); + i++, v++; + if (i < ISIZE) + test_iter_fill(vals, uh, i, v, &res); + } + qsort(vals, ISIZE, sizeof(vals[0]), test_iter_compar); + + ihandle = create_iter_unr(uh); + i = 0; + while ((v = next_iter_unr(ihandle)) != -1) { + if (vals[i] != v) { + VPRINTF("iter %d: iter %d != val %d\n", i, v, vals[i]); + if (res == 0) { + if (verbose) + print_unrhdr(uh); + res = 1; + } + } else { + VPRINTF("iter %d: val %d\n", i, v); + } + i++; + } + free_iter_unr(ihandle); + clean_unrhdr(uh); + clear_unrhdr(uh); + delete_unrhdr(uh); + exit(res); +} + +static void +usage(char **argv) +{ + printf("%s [-h] [-i] [-r REPETITIONS] [-v]\n", argv[0]); } int @@ -1025,11 +1228,16 @@ long reps = 1, m; int ch; u_int i; + bool testing_iter; verbose = false; + testing_iter = false; - while ((ch = getopt(argc, argv, "hr:v")) != -1) { + while ((ch = getopt(argc, argv, "hir:v")) != -1) { switch (ch) { + case 'i': + testing_iter = true; + break; case 'r': errno = 0; reps = strtol(optarg, NULL, 0); @@ -1050,6 +1258,10 @@ } setbuf(stdout, NULL); + + if (testing_iter) + test_iter(); + uh = new_unrhdr(0, count - 1, NULL); print_unrhdr(uh); diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -881,6 +881,7 @@ #define P2_WEXIT 0x00040000 /* exit just started, no external thread_single() is permitted */ +#define P2_REAPKILLED 0x00080000 /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ diff --git a/sys/sys/systm.h b/sys/sys/systm.h --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -509,6 +509,9 @@ int alloc_unr_specific(struct unrhdr *uh, u_int item); int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); +void *create_iter_unr(struct unrhdr *uh); +int next_iter_unr(void *handle); +void free_iter_unr(void *handle); struct unrhdr64 { uint64_t counter;