diff --git a/share/man/man4/geom.4 b/share/man/man4/geom.4 --- a/share/man/man4/geom.4 +++ b/share/man/man4/geom.4 @@ -418,6 +418,50 @@ a separate section below. .El .Pp +Because the non-blocking allocation of +.Vt "struct bio" +is frequently required, +.Va "biozone" , +the default +.Xr uma 9 +zone for +.Vt "struct bio" +has some items preallocated and reserved by +.Xr uma_prealloc 9 +and +.Xr uma_zone_reserve 9 . +Only the non-blocking +.Vt "struct bio" +allocation requests can use the reserved items, including cloning. +The number of the reserved items are configurable by loader tunable +.Va kern.geom.reserved_new_bios . +.Pp +An I/O request originator may allocate +.Vt "struct bio" +out of its own +.Xr uma 9 +zone instead of +.Va "biozone" . +This is useful when an isolated +.Xr uma 9 +zone is desirable due to the nature of the I/O requests. +The write requests by the VM swap pager is a good example. +A swap write operation is meant to make some free pages, which, in turn, +should not involve any blocking memory allocation including +.Vt "struct bio" . +The VM swap pager hence has the even higher demand for the +.Vt "struct bio" +reservation, and has its own +.Xr uma 9 +zone for that. +Its reservation is configurable by loader tunable +.Va vm.swap_reserved_new_bios . +Other kernel subsystems may have their own +.Xr uma 9 +zones for +.Vt "struct bio" . +Please refer to their respective man pages for the detail. +.Pp (Stay tuned while the author rests his brain and fingers: more to come.) .Sh DIAGNOSTICS Several flags are provided for tracing diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -1128,11 +1128,15 @@ MLINKS+=g_attach.9 g_detach.9 MLINKS+=g_bio.9 bio.9 \ g_bio.9 g_alloc_bio.9 \ + g_bio.9 g_alloc_bio_uz.9 \ g_bio.9 g_clone_bio.9 \ + g_bio.9 g_ctor_bio.9 \ g_bio.9 g_destroy_bio.9 \ g_bio.9 g_duplicate_bio.9 \ g_bio.9 g_format_bio.9 \ + g_bio.9 g_io_new_uz.9 \ g_bio.9 g_new_bio.9 \ + g_bio.9 g_new_bio_uz.9 \ g_bio.9 g_print_bio.9 \ g_bio.9 g_reset_bio.9 MLINKS+=g_consumer.9 g_destroy_consumer.9 \ diff --git a/share/man/man9/g_bio.9 b/share/man/man9/g_bio.9 --- a/share/man/man9/g_bio.9 +++ b/share/man/man9/g_bio.9 @@ -27,11 +27,16 @@ .Os .Sh NAME .Nm g_new_bio , +.Nm g_new_bio_uz , +.Nm g_alloc_bio , +.Nm g_alloc_bio_uz , .Nm g_clone_bio , .Nm g_destroy_bio , .Nm g_format_bio , .Nm g_print_bio , -.Nm g_reset_bio +.Nm g_reset_bio , +.Nm g_io_new_uz , +.Nm g_ctor_bio .Nd "GEOM bio controlling functions" .Sh SYNOPSIS .In sys/bio.h @@ -39,8 +44,12 @@ .Ft "struct bio *" .Fn g_new_bio void .Ft "struct bio *" +.Fn g_new_bio_uz "uma_zone_t uz" +.Ft "struct bio *" .Fn g_alloc_bio void .Ft "struct bio *" +.Fn g_alloc_bio_uz "uma_zone_t uz" +.Ft "struct bio *" .Fn g_clone_bio "struct bio *bp" .Ft "struct bio *" .Fn g_duplicate_bio "struct bio *bp" @@ -55,6 +64,10 @@ .Fc .Ft void .Fn g_reset_bio "struct bio *bp" +.Ft uma_zone_t +.Fn g_io_new_uz "const char *name" +.Ft int +.Fn g_ctor_bio "void *mem" "int size" "void *arg" "int flags" .Sh DESCRIPTION A .Vt "struct bio" @@ -143,20 +156,42 @@ .It Va bio_parent Pointer to parent .Vt bio . +.It Va bio_uz +.Xr uma 9 +zone from which this +.Vt bio +structure is allocated. .El .Pp The .Fn g_new_bio -function allocates a new, empty +and +.Fn g_new_bio_uz +functions allocate a new, empty .Vt bio -structure. +structure from +.Va biozone , +the default +.Xr uma 9 +zone for +.Vt bio +and the supplied one, respectively. .Pp -.Fn g_alloc_bio +.Fn g_alloc_bio , +.Fn g_alloc_bio_uz - same as -.Fn g_new_bio , -but always succeeds (allocates bio with the +.Fn g_new_bio +and +.Fn g_new_bio_uz , +respectively, but always succeed at the possible cost of blocking (allocate +.Vt bio +with the .Dv M_WAITOK -malloc flag). +.Xr uma_zalloc 9 +flag). +NB these functions do not allocate from the reserves even if the +.Xr uma 9 +zone has them, for the sake of saving the reserves. .Pp The .Fn g_clone_bio @@ -169,7 +204,9 @@ .Va bio_length , .Va bio_offset , .Va bio_data , -.Va bio_attribute . +.Va bio_attribute +and +.Va bio_uz . The field .Va bio_parent in the clone points to the passed @@ -179,6 +216,12 @@ in the passed .Vt bio is incremented. +The same +.Xr uma 9 +zone as the argument +.Vt bio +is used to allocate the cloned +.Vt bio . .Pp This function should be used for every request which enters through the provider of a particular geom and needs to be scheduled down. @@ -197,9 +240,12 @@ .Fn g_duplicate_bio - same as .Fn g_clone_bio , -but always succeeds (allocates bio with the +but always succeeds at the possible cost of blocking (allocates +.Vt bio +with the .Dv M_WAITOK -malloc flag). +.Xr uma_zalloc 9 +flag). .Pp The .Fn g_destroy_bio @@ -256,6 +302,47 @@ structure created by other means, .Fn g_reset_bio should be used to initialize it and between transactions. +.Pp +The +.Fn g_io_new_uz +function allocates a new +.Xr uma 9 +zone for +.Vt bio +of +.Va name +as the zone name +by +.Xr uma_zcreate 9 +with the suitable argument. +The new zone has no preallocated nor reserved items. +The caller should prepare them by +.Xr uma_prealloc 9 +and +.Xr uma_zone_reserve 9 +before using it. +The allocated +.Xr uma 9 +zone can be freed by +.Xr uma_zdestroy 9 +when it is no longer needed. +.Pp +The +.Fn g_ctor_bio +function is the +.Xr uma 9 +zone item constructor for +.Vt bio . +Use this for the zones created without +.Fn g_io_new_uz . +The zones created by +.Fn g_io_new_uz +automatically have +.Fn g_ctor_bio +as the item constructor. +There is no item destructor; +.Vt bio +does not depend on any external resources for now. .Sh RETURN VALUES The .Fn g_new_bio @@ -266,6 +353,19 @@ or .Dv NULL if an error occurred. +.Pp +The +.Fn g_io_new_uz +function returns a pointer to the allocated +.Xr uma 9 +zone, +or +.Dv NULL +if an error occurred. +.Pp +The +.Fn g_ctor_bio +function always returns zero to indicate a successful item construction. .Sh EXAMPLES Implementation of .Dq Dv NULL Ns -transformation , diff --git a/sys/geom/geom.h b/sys/geom/geom.h --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -58,6 +58,7 @@ struct g_configargs; struct disk_zone_args; struct thread; +struct uma_zone; typedef int g_config_t (struct g_configargs *ca); typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb); @@ -335,11 +336,15 @@ int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr); int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp); int g_io_flush(struct g_consumer *cp); +struct uma_zone *g_io_new_uz(const char *name); int g_io_speedup(off_t shortage, u_int flags, size_t *resid, struct g_consumer *cp); void g_io_request(struct bio *bp, struct g_consumer *cp); +int g_ctor_bio(void *mem, int size, void *arg, int flags); struct bio *g_new_bio(void); +struct bio *g_new_bio_uz(struct uma_zone *uz); struct bio *g_alloc_bio(void); +struct bio *g_alloc_bio_uz(struct uma_zone *uz); void g_reset_bio(struct bio *); void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error); int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length); diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -137,17 +137,34 @@ return (bp); } +static int __read_mostly reserved_new_bios = 65536; + struct bio * g_new_bio(void) +{ + return (g_new_bio_uz(biozone)); +} + +struct bio * +g_new_bio_uz(uma_zone_t uz) { struct bio *bp; - bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); + if (NULL == uz) + uz = biozone; + + bp = uma_zalloc_arg(uz, uz, M_NOWAIT | + ((uz != biozone) || (reserved_new_bios > 0) ? M_USE_RESERVE : 0)); + if (__predict_false(NULL == bp)) + printf( + "g_new_bio_uz(): failed to allocate from zone %s (sysctl: %s)\n", + uma_zone_name(uz), + uma_zone_unique_name(uz)); #ifdef KTR if (KTR_GEOM_ENABLED) { struct stack st; - CTR1(KTR_GEOM, "g_new_bio(): %p", bp); + CTR1(KTR_GEOM, "g_new_bio_uz(): %p", bp); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3); } @@ -157,15 +174,24 @@ struct bio * g_alloc_bio(void) +{ + return (g_alloc_bio_uz(biozone)); +} + +struct bio * +g_alloc_bio_uz(struct uma_zone *uz) { struct bio *bp; - bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); + if (NULL == uz) + uz = biozone; + + bp = uma_zalloc_arg(uz, uz, M_WAITOK); #ifdef KTR if (KTR_GEOM_ENABLED) { struct stack st; - CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp); + CTR1(KTR_GEOM, "g_alloc_bio_uz(): %p", bp); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3); } @@ -176,6 +202,8 @@ void g_destroy_bio(struct bio *bp) { + KASSERT(NULL != bp->bio_uz, ("null bio zone")); + #ifdef KTR if (KTR_GEOM_ENABLED) { struct stack st; @@ -185,7 +213,7 @@ CTRSTACK(KTR_GEOM, &st, 3); } #endif - uma_zfree(biozone, bp); + uma_zfree(bp->bio_uz, bp); } struct bio * @@ -193,8 +221,12 @@ { struct bio *bp2; - bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); - if (bp2 != NULL) { + KASSERT(NULL != bp->bio_uz, ("null bio zone")); + + bp2 = uma_zalloc_arg(bp->bio_uz, bp->bio_uz, M_NOWAIT | + ((bp->bio_uz != biozone) || (reserved_new_bios > 0) ? + M_USE_RESERVE : 0)); + if (__predict_true(bp2 != NULL)) { bp2->bio_parent = bp; bp2->bio_cmd = bp->bio_cmd; /* @@ -220,7 +252,11 @@ bp2->bio_track_bp = bp->bio_track_bp; #endif bp->bio_children++; - } + } else + printf( + "g_clone_bio(): failed to allocate from zone %s (sysctl: %s)\n", + uma_zone_name(bp->bio_uz), + uma_zone_unique_name(bp->bio_uz)); #ifdef KTR if (KTR_GEOM_ENABLED) { struct stack st; @@ -238,7 +274,9 @@ { struct bio *bp2; - bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO); + KASSERT(NULL != bp->bio_uz, ("null bio zone")); + + bp2 = uma_zalloc_arg(bp->bio_uz, bp->bio_uz, M_WAITOK); bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST | BIO_SWAP); bp2->bio_parent = bp; bp2->bio_cmd = bp->bio_cmd; @@ -265,8 +303,40 @@ void g_reset_bio(struct bio *bp) { + uma_zone_t uz; + + KASSERT(NULL != bp->bio_uz, ("null bio zone")); + uz = bp->bio_uz; bzero(bp, sizeof(*bp)); + bp->bio_uz = uz; +} + +int +g_ctor_bio(void *mem, int size __unused, void *arg, int flags __unused) +{ + struct bio *bp; + uma_zone_t uz; + + bp = (struct bio *)mem; + uz = (uma_zone_t)arg; + + KASSERT(sizeof(*bp) == size, ("bio size mismatch")); + KASSERT(NULL != uz, ("null bio zone")); + + memset(bp, 0, sizeof(*bp)); + bp->bio_uz = uz; + + return (0); +} + +uma_zone_t +g_io_new_uz(const char *name) +{ + return (uma_zcreate(name, sizeof (struct bio), + g_ctor_bio, NULL, + NULL, NULL, + 0, 0)); } void @@ -275,10 +345,15 @@ g_bioq_init(&g_bio_run_down); g_bioq_init(&g_bio_run_up); - biozone = uma_zcreate("g_bio", sizeof (struct bio), - NULL, NULL, - NULL, NULL, - 0, 0); + biozone = g_io_new_uz("g_bio"); + /* + * XXX the reservation of a uma(9) zone cannot be altered if it is serving + * any items. + */ + if (reserved_new_bios > 0) { + uma_prealloc(biozone, reserved_new_bios); + uma_zone_reserve(biozone, reserved_new_bios); + } } int @@ -734,6 +809,9 @@ SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD, &inflight_transient_maps, 0, "Current count of the active transient maps"); +SYSCTL_INT(_kern_geom, OID_AUTO, reserved_new_bios, CTLFLAG_RDTUN, + &reserved_new_bios, 0, + "Number of reserved new bios for non-blocking allocation"); static int g_io_transient_map_bio(struct bio *bp) diff --git a/sys/sys/bio.h b/sys/sys/bio.h --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -76,6 +76,11 @@ struct disk; struct bio; struct vm_map; +struct uma_zone; +/* + * XXX The typedef of uma_zone_t here makes a typedef redefinition, even if + * the actual definition does not change. + */ typedef void bio_task_t(void *); @@ -113,6 +118,7 @@ u_int bio_inbed; /* Children safely home by now */ struct bio *bio_parent; /* Pointer to parent */ struct bintime bio_t0; /* Time request started */ + struct uma_zone *bio_uz; /* uma(9) zone from which this bio comes */ bio_task_t *bio_task; /* Task_queue handler */ void *bio_task_arg; /* Argument to above */