Index: head/sys/kern/subr_disk.c =================================================================== --- head/sys/kern/subr_disk.c (revision 335065) +++ head/sys/kern/subr_disk.c (revision 335066) @@ -1,269 +1,287 @@ /*- * SPDX-License-Identifier: Beerware * * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * The bioq_disksort() (and the specification of the bioq API) * have been written by Luigi Rizzo and Fabio Checconi under the same * license as above. */ #include __FBSDID("$FreeBSD$"); #include "opt_geom.h" #include #include #include #include #include +#include #include +static int bioq_batchsize = 0; +SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, + &bioq_batchsize, 0, "BIOQ batch size"); + /*- * Disk error is the preface to plaintive error messages * about failing disk transfers. It prints messages of the form * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" * blkdone should be -1 if the position of the error is unknown. * The message is printed with printf. */ void disk_err(struct bio *bp, const char *what, int blkdone, int nl) { daddr_t sn; if (bp->bio_dev != NULL) printf("%s: %s ", devtoname(bp->bio_dev), what); else if (bp->bio_disk != NULL) printf("%s%d: %s ", bp->bio_disk->d_name, bp->bio_disk->d_unit, what); else printf("disk??: %s ", what); switch(bp->bio_cmd) { case BIO_READ: printf("cmd=read "); break; case BIO_WRITE: printf("cmd=write "); break; case BIO_DELETE: printf("cmd=delete "); break; case BIO_GETATTR: printf("cmd=getattr "); break; case BIO_FLUSH: printf("cmd=flush "); break; default: printf("cmd=%x ", bp->bio_cmd); break; } sn = bp->bio_pblkno; if (bp->bio_bcount <= DEV_BSIZE) { printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); return; } if (blkdone >= 0) { sn += blkdone; printf("fsbn %jd of ", (intmax_t)sn); } printf("%jd-%jd", (intmax_t)bp->bio_pblkno, (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); if (nl) printf("\n"); } /* * BIO queue implementation * * Please read carefully the description below before making any change * to the code, or you might change the behaviour of the data structure * in undesirable ways. * * A bioq stores disk I/O request (bio), normally sorted according to * the distance of the requested position (bio->bio_offset) from the * current head position (bioq->last_offset) in the scan direction, i.e. * * (uoff_t)(bio_offset - last_offset) * * Note that the cast to unsigned (uoff_t) is fundamental to insure * that the distance is computed in the scan direction. * * The main methods for manipulating the bioq are: * * bioq_disksort() performs an ordered insertion; * * bioq_first() return the head of the queue, without removing; * * bioq_takefirst() return and remove the head of the queue, * updating the 'current head position' as * bioq->last_offset = bio->bio_offset + bio->bio_length; * * When updating the 'current head position', we assume that the result of * bioq_takefirst() is dispatched to the device, so bioq->last_offset * represents the head position once the request is complete. * * If the bioq is manipulated using only the above calls, it starts * with a sorted sequence of requests with bio_offset >= last_offset, * possibly followed by another sorted sequence of requests with * 0 <= bio_offset < bioq->last_offset * * NOTE: historical behaviour was to ignore bio->bio_length in the * update, but its use tracks the head position in a better way. * Historical behaviour was also to update the head position when * the request under service is complete, rather than when the * request is extracted from the queue. However, the current API * has no method to update the head position; secondly, once * a request has been submitted to the disk, we have no idea of * the actual head position, so the final one is our best guess. * * --- Direct queue manipulation --- * * A bioq uses an underlying TAILQ to store requests, so we also * export methods to manipulate the TAILQ, in particular: * * bioq_insert_tail() insert an entry at the end. * It also creates a 'barrier' so all subsequent * insertions through bioq_disksort() will end up * after this entry; * * bioq_insert_head() insert an entry at the head, update * bioq->last_offset = bio->bio_offset so that * all subsequent insertions through bioq_disksort() * will end up after this entry; * * bioq_remove() remove a generic element from the queue, act as * bioq_takefirst() if invoked on the head of the queue. * * The semantic of these methods is the same as the operations * on the underlying TAILQ, but with additional guarantees on * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() * can be useful for making sure that all previous ops are flushed * to disk before continuing. * * Updating bioq->last_offset on a bioq_insert_head() guarantees * that the bio inserted with the last bioq_insert_head() will stay * at the head of the queue even after subsequent bioq_disksort(). * * Note that when the direct queue manipulation functions are used, * the queue may contain multiple inversion points (i.e. more than * two sorted sequences of requests). * */ void bioq_init(struct bio_queue_head *head) { TAILQ_INIT(&head->queue); head->last_offset = 0; head->insert_point = NULL; + head->total = 0; + head->batched = 0; } void bioq_remove(struct bio_queue_head *head, struct bio *bp) { if (head->insert_point == NULL) { if (bp == TAILQ_FIRST(&head->queue)) head->last_offset = bp->bio_offset + bp->bio_length; } else if (bp == head->insert_point) head->insert_point = NULL; TAILQ_REMOVE(&head->queue, bp, bio_queue); + head->total--; } void bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) { struct bio *bp; while ((bp = bioq_takefirst(head)) != NULL) biofinish(bp, stp, error); } void bioq_insert_head(struct bio_queue_head *head, struct bio *bp) { if (head->insert_point == NULL) head->last_offset = bp->bio_offset; TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); + head->total++; + head->batched = 0; } void bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) { TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); + head->total++; head->insert_point = bp; head->last_offset = bp->bio_offset; } struct bio * bioq_first(struct bio_queue_head *head) { return (TAILQ_FIRST(&head->queue)); } struct bio * bioq_takefirst(struct bio_queue_head *head) { struct bio *bp; bp = TAILQ_FIRST(&head->queue); if (bp != NULL) bioq_remove(head, bp); return (bp); } /* * Compute the sorting key. The cast to unsigned is * fundamental for correctness, see the description * near the beginning of the file. */ static inline uoff_t bioq_bio_key(struct bio_queue_head *head, struct bio *bp) { return ((uoff_t)(bp->bio_offset - head->last_offset)); } /* * Seek sort for disks. * * Sort all requests in a single queue while keeping * track of the current position of the disk with last_offset. * See above for details. */ void bioq_disksort(struct bio_queue_head *head, struct bio *bp) { struct bio *cur, *prev; uoff_t key; if ((bp->bio_flags & BIO_ORDERED) != 0) { /* * Ordered transactions can only be dispatched * after any currently queued transactions. They * also have barrier semantics - no transactions * queued in the future can pass them. */ bioq_insert_tail(head, bp); return; } + if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { + bioq_insert_tail(head, bp); + return; + } + prev = NULL; key = bioq_bio_key(head, bp); cur = TAILQ_FIRST(&head->queue); if (head->insert_point) { prev = head->insert_point; cur = TAILQ_NEXT(head->insert_point, bio_queue); } while (cur != NULL && key >= bioq_bio_key(head, cur)) { prev = cur; cur = TAILQ_NEXT(cur, bio_queue); } if (prev == NULL) TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); else TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); + head->total++; + head->batched++; } Index: head/sys/sys/bio.h =================================================================== --- head/sys/sys/bio.h (revision 335065) +++ head/sys/sys/bio.h (revision 335066) @@ -1,182 +1,184 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 * $FreeBSD$ */ #ifndef _SYS_BIO_H_ #define _SYS_BIO_H_ #include #include /* bio_cmd */ #define BIO_READ 0x01 /* Read I/O data */ #define BIO_WRITE 0x02 /* Write I/O data */ #define BIO_DELETE 0x03 /* TRIM or free blocks, i.e. mark as unused */ #define BIO_GETATTR 0x04 /* Get GEOM attributes of object */ #define BIO_FLUSH 0x05 /* Commit outstanding I/O now */ #define BIO_CMD0 0x06 /* Available for local hacks */ #define BIO_CMD1 0x07 /* Available for local hacks */ #define BIO_CMD2 0x08 /* Available for local hacks */ #define BIO_ZONE 0x09 /* Zone command */ /* bio_flags */ #define BIO_ERROR 0x01 /* An error occurred processing this bio. */ #define BIO_DONE 0x02 /* This bio is finished. */ #define BIO_ONQUEUE 0x04 /* This bio is in a queue & not yet taken. */ /* * This bio must be executed after all previous bios in the queue have been * executed, and before any successive bios can be executed. */ #define BIO_ORDERED 0x08 #define BIO_UNMAPPED 0x10 #define BIO_TRANSIENT_MAPPING 0x20 #define BIO_VLIST 0x40 #ifdef _KERNEL struct disk; struct bio; struct vm_map; /* Empty classifier tag, to prevent further classification. */ #define BIO_NOTCLASSIFIED (void *)(~0UL) typedef void bio_task_t(void *); /* * The bio structure describes an I/O operation in the kernel. */ struct bio { uint16_t bio_cmd; /* I/O operation. */ uint16_t bio_flags; /* General flags. */ uint16_t bio_cflags; /* Private use by the consumer. */ uint16_t bio_pflags; /* Private use by the provider. */ struct cdev *bio_dev; /* Device to do I/O on. */ struct disk *bio_disk; /* Valid below geom_disk.c only */ off_t bio_offset; /* Offset into file. */ long bio_bcount; /* Valid bytes in buffer. */ caddr_t bio_data; /* Memory, superblocks, indirect etc. */ struct vm_page **bio_ma; /* Or unmapped. */ int bio_ma_offset; /* Offset in the first page of bio_ma. */ int bio_ma_n; /* Number of pages in bio_ma. */ int bio_error; /* Errno for BIO_ERROR. */ long bio_resid; /* Remaining I/O in bytes. */ void (*bio_done)(struct bio *); void *bio_driver1; /* Private use by the provider. */ void *bio_driver2; /* Private use by the provider. */ void *bio_caller1; /* Private use by the consumer. */ void *bio_caller2; /* Private use by the consumer. */ TAILQ_ENTRY(bio) bio_queue; /* Disksort queue. */ const char *bio_attribute; /* Attribute for BIO_[GS]ETATTR */ struct disk_zone_args bio_zone;/* Used for BIO_ZONE */ struct g_consumer *bio_from; /* GEOM linkage */ struct g_provider *bio_to; /* GEOM linkage */ off_t bio_length; /* Like bio_bcount */ off_t bio_completed; /* Inverse of bio_resid */ u_int bio_children; /* Number of spawned bios */ u_int bio_inbed; /* Children safely home by now */ struct bio *bio_parent; /* Pointer to parent */ struct bintime bio_t0; /* Time request started */ bio_task_t *bio_task; /* Task_queue handler */ void *bio_task_arg; /* Argument to above */ void *bio_classifier1; /* Classifier tag. */ void *bio_classifier2; /* Classifier tag. */ #ifdef DIAGNOSTIC void *_bio_caller1; void *_bio_caller2; uint8_t _bio_cflags; #endif #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) struct buf *bio_track_bp; /* Parent buf for tracking */ #endif /* XXX: these go away when bio chaining is introduced */ daddr_t bio_pblkno; /* physical block number */ }; struct uio; struct devstat; struct bio_queue_head { TAILQ_HEAD(bio_queue, bio) queue; off_t last_offset; struct bio *insert_point; + int total; + int batched; }; extern struct vm_map *bio_transient_map; extern int bio_transient_maxcnt; void biodone(struct bio *bp); void biofinish(struct bio *bp, struct devstat *stat, int error); int biowait(struct bio *bp, const char *wchan); #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) void biotrack_buf(struct bio *bp, const char *location); static __inline void biotrack(struct bio *bp, const char *location) { if (bp->bio_track_bp != NULL) biotrack_buf(bp, location); } #else static __inline void biotrack(struct bio *bp __unused, const char *location __unused) { } #endif void bioq_disksort(struct bio_queue_head *ap, struct bio *bp); struct bio *bioq_first(struct bio_queue_head *head); struct bio *bioq_takefirst(struct bio_queue_head *head); void bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error); void bioq_init(struct bio_queue_head *head); void bioq_insert_head(struct bio_queue_head *head, struct bio *bp); void bioq_insert_tail(struct bio_queue_head *head, struct bio *bp); void bioq_remove(struct bio_queue_head *head, struct bio *bp); int physio(struct cdev *dev, struct uio *uio, int ioflag); #define physread physio #define physwrite physio #endif /* _KERNEL */ #endif /* !_SYS_BIO_H_ */