Index: head/sys/boot/common/bcache.c =================================================================== --- head/sys/boot/common/bcache.c (revision 298899) +++ head/sys/boot/common/bcache.c (revision 298900) @@ -1,437 +1,468 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright 2015 Toomas Soome * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include __FBSDID("$FreeBSD$"); /* * Simple hashed block cache */ #include #include #include #include #include "bootstrap.h" /* #define BCACHE_DEBUG */ #ifdef BCACHE_DEBUG # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DEBUG(fmt, args...) #endif struct bcachectl { daddr_t bc_blkno; int bc_count; }; /* * bcache per device node. cache is allocated on device first open and freed * on last close, to save memory. The issue there is the size; biosdisk * supports up to 31 (0x1f) devices. Classic setup would use single disk * to boot from, but this has changed with zfs. */ struct bcache { struct bcachectl *bcache_ctl; caddr_t bcache_data; u_int bcache_nblks; size_t ra; }; static u_int bcache_total_nblks; /* set by bcache_init */ static u_int bcache_blksize; /* set by bcache_init */ static u_int bcache_numdev; /* set by bcache_add_dev */ /* statistics */ static u_int bcache_units; /* number of devices with cache */ static u_int bcache_unit_nblks; /* nblocks per unit */ static u_int bcache_hits; static u_int bcache_misses; static u_int bcache_ops; static u_int bcache_bypasses; static u_int bcache_bcount; static u_int bcache_rablks; #define BHASH(bc, blkno) ((blkno) & ((bc)->bcache_nblks - 1)) #define BCACHE_LOOKUP(bc, blkno) \ ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno)) #define BCACHE_READAHEAD 256 #define BCACHE_MINREADAHEAD 32 static void bcache_invalidate(struct bcache *bc, daddr_t blkno); static void bcache_insert(struct bcache *bc, daddr_t blkno); static void bcache_free_instance(struct bcache *bc); /* * Initialise the cache for (nblks) of (bsize). */ void bcache_init(u_int nblks, size_t bsize) { /* set up control data */ bcache_total_nblks = nblks; bcache_blksize = bsize; } /* * add number of devices to bcache. we have to divide cache space * between the devices, so bcache_add_dev() can be used to set up the * number. The issue is, we need to get the number before actual allocations. * bcache_add_dev() is supposed to be called from device init() call, so the * assumption is, devsw dv_init is called for plain devices first, and * for zfs, last. */ void bcache_add_dev(int devices) { bcache_numdev += devices; } void * bcache_allocate(void) { u_int i; struct bcache *bc = malloc(sizeof (struct bcache)); int disks = bcache_numdev; if (disks == 0) disks = 1; /* safe guard */ if (bc == NULL) { errno = ENOMEM; return (bc); } /* * the bcache block count must be power of 2 for hash function */ i = fls(disks) - 1; /* highbit - 1 */ if (disks > (1 << i)) /* next power of 2 */ i++; bc->bcache_nblks = bcache_total_nblks >> i; bcache_unit_nblks = bc->bcache_nblks; bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); if (bc->bcache_data == NULL) { /* dont error out yet. fall back to 32 blocks and try again */ bc->bcache_nblks = 32; bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); } bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl)); if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) { bcache_free_instance(bc); errno = ENOMEM; return(NULL); } /* Flush the cache */ for (i = 0; i < bc->bcache_nblks; i++) { bc->bcache_ctl[i].bc_count = -1; bc->bcache_ctl[i].bc_blkno = -1; } bcache_units++; bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */ return (bc); } void bcache_free(void *cache) { struct bcache *bc = cache; if (bc == NULL) return; bcache_free_instance(bc); bcache_units--; } /* * Handle a write request; write directly to the disk, and populate the * cache with the new values. */ static int write_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; struct bcache *bc = dd->dv_cache; daddr_t i, nblk; nblk = size / bcache_blksize; /* Invalidate the blocks being written */ for (i = 0; i < nblk; i++) { bcache_invalidate(bc, blk + i); } /* Write the blocks */ return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize)); } /* * Handle a read request; fill in parts of the request that can * be satisfied by the cache, use the supplied strategy routine to do * device I/O and then use the I/O results to populate the cache. */ static int read_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; struct bcache *bc = dd->dv_cache; size_t i, nblk, p_size, r_size, complete, ra; int result; daddr_t p_blk; caddr_t p_buf; if (bc == NULL) { errno = ENODEV; return (-1); } if (rsize != NULL) *rsize = 0; nblk = size / bcache_blksize; if ((nblk == 0 && size != 0) || offset != 0) nblk++; result = 0; complete = 1; /* Satisfy any cache hits up front, break on first miss */ for (i = 0; i < nblk; i++) { if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) { bcache_misses += (nblk - i); complete = 0; if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD) bc->ra >>= 1; /* reduce read ahead */ break; } else { bcache_hits++; } } if (complete) { /* whole set was in cache, return it */ if (bc->ra < BCACHE_READAHEAD) bc->ra <<= 1; /* increase read ahead */ bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf, size); goto done; } /* * Fill in any misses. From check we have i pointing to first missing * block, read in all remaining blocks + readahead. * We have space at least for nblk - i before bcache wraps. */ p_blk = blk + i; p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk)); r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */ p_size = MIN(r_size, nblk - i); /* read at least those blocks */ ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size); if (ra != bc->bcache_nblks) { /* do we have RA space? */ ra = MIN(bc->ra, ra); p_size += ra; } /* invalidate bcache */ for (i = 0; i < p_size; i++) { bcache_invalidate(bc, p_blk + i); } + r_size = 0; + /* + * with read-ahead, it may happen we are attempting to read past + * disk end, as bcache has no information about disk size. + * in such case we should get partial read if some blocks can be + * read or error, if no blocks can be read. + * in either case we should return the data in bcache and only + * return error if there is no data. + */ result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0, p_size * bcache_blksize, p_buf, &r_size); - if (result) - goto done; - r_size /= bcache_blksize; for (i = 0; i < r_size; i++) bcache_insert(bc, p_blk + i); - bcache_rablks += ra; - bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf, - size); + /* update ra statistics */ + if (r_size != 0) { + if (r_size < p_size) + bcache_rablks += (p_size - r_size); + else + bcache_rablks += ra; + } + /* check how much data can we copy */ + for (i = 0; i < nblk; i++) { + if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) + break; + } + + size = i * bcache_blksize; + if (size != 0) { + bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, + buf, size); + result = 0; + } + done: if ((result == 0) && (rsize != NULL)) *rsize = size; return(result); } /* * Requests larger than 1/2 cache size will be bypassed and go * directly to the disk. XXX tune this. */ int bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; struct bcache *bc = dd->dv_cache; u_int bcache_nblks = 0; int nblk, cblk, ret; size_t csize, isize, total; bcache_ops++; if (bc != NULL) bcache_nblks = bc->bcache_nblks; /* bypass large requests, or when the cache is inactive */ if (bc == NULL || (offset == 0 && ((size * 2 / bcache_blksize) > bcache_nblks))) { DEBUG("bypass %d from %d", size / bcache_blksize, blk); bcache_bypasses++; return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize)); } /* normalize offset */ while (offset >= bcache_blksize) { blk++; offset -= bcache_blksize; } switch (rw) { case F_READ: nblk = size / bcache_blksize; if (offset || (size != 0 && nblk == 0)) nblk++; /* read at least one block */ ret = 0; total = 0; while(size) { cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */ cblk = MIN(cblk, nblk); if (size <= bcache_blksize) csize = size; else { csize = cblk * bcache_blksize; if (offset) csize -= (bcache_blksize - offset); } ret = read_strategy(devdata, rw, blk, offset, csize, buf+total, &isize); - if (ret != 0) - return (ret); + + /* + * we may have error from read ahead, if we have read some data + * return partial read. + */ + if (ret != 0 || isize == 0) { + if (total != 0) + ret = 0; + break; + } blk += (offset+isize) / bcache_blksize; offset = 0; total += isize; size -= isize; nblk = size / bcache_blksize; } if (rsize) *rsize = total; return (ret); case F_WRITE: return write_strategy(devdata, rw, blk, offset, size, buf, rsize); } return -1; } /* * Free allocated bcache instance */ static void bcache_free_instance(struct bcache *bc) { if (bc != NULL) { if (bc->bcache_ctl) free(bc->bcache_ctl); if (bc->bcache_data) free(bc->bcache_data); free(bc); } } /* * Insert a block into the cache. */ static void bcache_insert(struct bcache *bc, daddr_t blkno) { u_int cand; cand = BHASH(bc, blkno); DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount); bc->bcache_ctl[cand].bc_blkno = blkno; bc->bcache_ctl[cand].bc_count = bcache_bcount++; } /* * Invalidate a block from the cache. */ static void bcache_invalidate(struct bcache *bc, daddr_t blkno) { u_int i; i = BHASH(bc, blkno); if (bc->bcache_ctl[i].bc_blkno == blkno) { bc->bcache_ctl[i].bc_count = -1; bc->bcache_ctl[i].bc_blkno = -1; DEBUG("invalidate blk %llu", blkno); } } #ifndef BOOT2 COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache); static int command_bcache(int argc, char *argv[]) { if (argc != 1) { command_errmsg = "wrong number of arguments"; return(CMD_ERROR); } printf("\ncache blocks: %d\n", bcache_total_nblks); printf("cache blocksz: %d\n", bcache_blksize); printf("cache readahead: %d\n", bcache_rablks); printf("unit cache blocks: %d\n", bcache_unit_nblks); printf("cached units: %d\n", bcache_units); printf("%d ops %d bypasses %d hits %d misses\n", bcache_ops, bcache_bypasses, bcache_hits, bcache_misses); return(CMD_OK); } #endif Index: head/sys/boot/efi/libefi/efipart.c =================================================================== --- head/sys/boot/efi/libefi/efipart.c (revision 298899) +++ head/sys/boot/efi/libefi/efipart.c (revision 298900) @@ -1,358 +1,366 @@ /*- * Copyright (c) 2010 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static EFI_GUID blkio_guid = BLOCK_IO_PROTOCOL; static EFI_GUID devpath_guid = DEVICE_PATH_PROTOCOL; static int efipart_init(void); static int efipart_strategy(void *, int, daddr_t, size_t, size_t, char *, size_t *); static int efipart_realstrategy(void *, int, daddr_t, size_t, size_t, char *, size_t *); static int efipart_open(struct open_file *, ...); static int efipart_close(struct open_file *); static void efipart_print(int); struct devsw efipart_dev = { .dv_name = "part", .dv_type = DEVT_DISK, .dv_init = efipart_init, .dv_strategy = efipart_strategy, .dv_open = efipart_open, .dv_close = efipart_close, .dv_ioctl = noioctl, .dv_print = efipart_print, .dv_cleanup = NULL }; /* * info structure to support bcache */ #define MAXPDDEV 31 /* see MAXDEV in libi386.h */ static struct pdinfo { int pd_unit; /* unit number */ int pd_open; /* reference counter */ void *pd_bcache; /* buffer cache data */ } pdinfo [MAXPDDEV]; static int npdinfo = 0; #define PD(dev) (pdinfo[(dev)->d_unit]) static int efipart_init(void) { EFI_BLOCK_IO *blkio; EFI_DEVICE_PATH *devpath, *devpathcpy, *tmpdevpath, *node; EFI_HANDLE *hin, *hout, *aliases, handle; EFI_STATUS status; UINTN sz; u_int n, nin, nout; int err; size_t devpathlen; sz = 0; hin = NULL; status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, 0); if (status == EFI_BUFFER_TOO_SMALL) { hin = (EFI_HANDLE *)malloc(sz * 3); status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, hin); if (EFI_ERROR(status)) free(hin); } if (EFI_ERROR(status)) return (efi_status_to_errno(status)); /* Filter handles to only include FreeBSD partitions. */ nin = sz / sizeof(EFI_HANDLE); hout = hin + nin; aliases = hout + nin; nout = 0; bzero(aliases, nin * sizeof(EFI_HANDLE)); for (n = 0; n < nin; n++) { status = BS->HandleProtocol(hin[n], &devpath_guid, (void **)&devpath); if (EFI_ERROR(status)) { continue; } node = devpath; devpathlen = DevicePathNodeLength(node); while (!IsDevicePathEnd(NextDevicePathNode(node))) { node = NextDevicePathNode(node); devpathlen += DevicePathNodeLength(node); } devpathlen += DevicePathNodeLength(NextDevicePathNode(node)); status = BS->HandleProtocol(hin[n], &blkio_guid, (void**)&blkio); if (EFI_ERROR(status)) continue; if (!blkio->Media->LogicalPartition) continue; /* * If we come across a logical partition of subtype CDROM * it doesn't refer to the CD filesystem itself, but rather * to any usable El Torito boot image on it. In this case * we try to find the parent device and add that instead as * that will be the CD filesystem. */ if (DevicePathType(node) == MEDIA_DEVICE_PATH && DevicePathSubType(node) == MEDIA_CDROM_DP) { devpathcpy = malloc(devpathlen); memcpy(devpathcpy, devpath, devpathlen); node = devpathcpy; while (!IsDevicePathEnd(NextDevicePathNode(node))) node = NextDevicePathNode(node); SetDevicePathEndNode(node); tmpdevpath = devpathcpy; status = BS->LocateDevicePath(&blkio_guid, &tmpdevpath, &handle); free(devpathcpy); if (EFI_ERROR(status)) continue; hout[nout] = handle; aliases[nout] = hin[n]; } else hout[nout] = hin[n]; nout++; pdinfo[npdinfo].pd_open = 0; pdinfo[npdinfo].pd_bcache = NULL; pdinfo[npdinfo].pd_unit = npdinfo; npdinfo++; } bcache_add_dev(npdinfo); err = efi_register_handles(&efipart_dev, hout, aliases, nout); free(hin); return (err); } static void efipart_print(int verbose) { char line[80]; EFI_BLOCK_IO *blkio; EFI_HANDLE h; EFI_STATUS status; u_int unit; for (unit = 0, h = efi_find_handle(&efipart_dev, 0); h != NULL; h = efi_find_handle(&efipart_dev, ++unit)) { sprintf(line, " %s%d:", efipart_dev.dv_name, unit); pager_output(line); status = BS->HandleProtocol(h, &blkio_guid, (void **)&blkio); if (!EFI_ERROR(status)) { sprintf(line, " %llu blocks", (unsigned long long)(blkio->Media->LastBlock + 1)); pager_output(line); if (blkio->Media->RemovableMedia) pager_output(" (removable)"); } pager_output("\n"); } } static int efipart_open(struct open_file *f, ...) { va_list args; struct devdesc *dev; EFI_BLOCK_IO *blkio; EFI_HANDLE h; EFI_STATUS status; va_start(args, f); dev = va_arg(args, struct devdesc*); va_end(args); h = efi_find_handle(&efipart_dev, dev->d_unit); if (h == NULL) return (EINVAL); status = BS->HandleProtocol(h, &blkio_guid, (void **)&blkio); if (EFI_ERROR(status)) return (efi_status_to_errno(status)); if (!blkio->Media->MediaPresent) return (EAGAIN); dev->d_opendata = blkio; PD(dev).pd_open++; if (PD(dev).pd_bcache == NULL) PD(dev).pd_bcache = bcache_allocate(); return (0); } static int efipart_close(struct open_file *f) { struct devdesc *dev; dev = (struct devdesc *)(f->f_devdata); if (dev->d_opendata == NULL) return (EINVAL); dev->d_opendata = NULL; PD(dev).pd_open--; if (PD(dev).pd_open == 0) { bcache_free(PD(dev).pd_bcache); PD(dev).pd_bcache = NULL; } return (0); } /* * efipart_readwrite() * Internal equivalent of efipart_strategy(), which operates on the * media-native block size. This function expects all I/O requests * to be within the media size and returns an error if such is not * the case. */ static int efipart_readwrite(EFI_BLOCK_IO *blkio, int rw, daddr_t blk, daddr_t nblks, char *buf) { EFI_STATUS status; if (blkio == NULL) return (ENXIO); if (blk < 0 || blk > blkio->Media->LastBlock) return (EIO); if ((blk + nblks - 1) > blkio->Media->LastBlock) return (EIO); switch (rw) { case F_READ: status = blkio->ReadBlocks(blkio, blkio->Media->MediaId, blk, nblks * blkio->Media->BlockSize, buf); break; case F_WRITE: if (blkio->Media->ReadOnly) return (EROFS); status = blkio->WriteBlocks(blkio, blkio->Media->MediaId, blk, nblks * blkio->Media->BlockSize, buf); break; default: return (ENOSYS); } if (EFI_ERROR(status)) printf("%s: rw=%d, status=%lu\n", __func__, rw, (u_long)status); return (efi_status_to_errno(status)); } static int efipart_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata bcd; struct devdesc *dev; dev = (struct devdesc *)devdata; bcd.dv_strategy = efipart_realstrategy; bcd.dv_devdata = devdata; bcd.dv_cache = PD(dev).pd_bcache; return (bcache_strategy(&bcd, rw, blk, offset, size, buf, rsize)); } static int efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize) { struct devdesc *dev = (struct devdesc *)devdata; EFI_BLOCK_IO *blkio; off_t off; char *blkbuf; size_t blkoff, blksz; int error; if (dev == NULL || blk < 0) return (EINVAL); blkio = dev->d_opendata; if (blkio == NULL) return (ENXIO); if (size == 0 || (size % 512) != 0) return (EIO); + off = blk * 512; + /* make sure we don't read past disk end */ + if ((off + size) / blkio->Media->BlockSize - 1 > + blkio->Media->LastBlock) { + size = blkio->Media->LastBlock + 1 - + off / blkio->Media->BlockSize; + size = size * blkio->Media->BlockSize; + } + if (rsize != NULL) *rsize = size; if (blkio->Media->BlockSize == 512) return (efipart_readwrite(blkio, rw, blk, size / 512, buf)); /* * The block size of the media is not 512B per sector. */ blkbuf = malloc(blkio->Media->BlockSize); if (blkbuf == NULL) return (ENOMEM); error = 0; - off = blk * 512; blk = off / blkio->Media->BlockSize; blkoff = off % blkio->Media->BlockSize; blksz = blkio->Media->BlockSize - blkoff; while (size > 0) { error = efipart_readwrite(blkio, rw, blk, 1, blkbuf); if (error) break; if (size < blksz) blksz = size; bcopy(blkbuf + blkoff, buf, blksz); buf += blksz; size -= blksz; blk++; blkoff = 0; blksz = blkio->Media->BlockSize; } free(blkbuf); return (error); } Index: head/sys/boot/i386/libi386/bioscd.c =================================================================== --- head/sys/boot/i386/libi386/bioscd.c (revision 298899) +++ head/sys/boot/i386/libi386/bioscd.c (revision 298900) @@ -1,422 +1,445 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2001 John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * BIOS CD device handling for CD's that have been booted off of via no * emulation booting as defined in the El Torito standard. * * Ideas and algorithms from: * * - FreeBSD libi386/biosdisk.c * */ #include #include #include #include #include #include #include #include "libi386.h" #define BIOSCD_SECSIZE 2048 #define BUFSIZE (1 * BIOSCD_SECSIZE) #define MAXBCDEV 1 /* Major numbers for devices we frontend for. */ #define ACDMAJOR 117 #define CDMAJOR 15 #ifdef DISK_DEBUG # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DEBUG(fmt, args...) #endif struct specification_packet { u_char sp_size; u_char sp_bootmedia; u_char sp_drive; u_char sp_controller; u_int sp_lba; u_short sp_devicespec; u_short sp_buffersegment; u_short sp_loadsegment; u_short sp_sectorcount; u_short sp_cylsec; u_char sp_head; }; /* * List of BIOS devices, translation from disk unit number to * BIOS unit number. */ static struct bcinfo { int bc_unit; /* BIOS unit number */ struct specification_packet bc_sp; int bc_open; /* reference counter */ void *bc_bcache; /* buffer cache data */ } bcinfo [MAXBCDEV]; static int nbcinfo = 0; #define BC(dev) (bcinfo[(dev)->d_unit]) static int bc_read(int unit, daddr_t dblk, int blks, caddr_t dest); static int bc_init(void); static int bc_strategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); static int bc_realstrategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); static int bc_open(struct open_file *f, ...); static int bc_close(struct open_file *f); static void bc_print(int verbose); struct devsw bioscd = { "cd", DEVT_CD, bc_init, bc_strategy, bc_open, bc_close, noioctl, bc_print, NULL }; /* * Translate between BIOS device numbers and our private unit numbers. */ int bc_bios2unit(int biosdev) { int i; DEBUG("looking for bios device 0x%x", biosdev); for (i = 0; i < nbcinfo; i++) { DEBUG("bc unit %d is BIOS device 0x%x", i, bcinfo[i].bc_unit); if (bcinfo[i].bc_unit == biosdev) return(i); } return(-1); } int bc_unit2bios(int unit) { if ((unit >= 0) && (unit < nbcinfo)) return(bcinfo[unit].bc_unit); return(-1); } /* * We can't quiz, we have to be told what device to use, so this functoin * doesn't do anything. Instead, the loader calls bc_add() with the BIOS * device number to add. */ static int bc_init(void) { return (0); } int bc_add(int biosdev) { if (nbcinfo >= MAXBCDEV) return (-1); bcinfo[nbcinfo].bc_unit = biosdev; v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4b01; v86.edx = biosdev; v86.ds = VTOPSEG(&bcinfo[nbcinfo].bc_sp); v86.esi = VTOPOFF(&bcinfo[nbcinfo].bc_sp); v86int(); if ((v86.eax & 0xff00) != 0) return (-1); printf("BIOS CD is cd%d\n", nbcinfo); nbcinfo++; bcache_add_dev(nbcinfo); /* register cd device in bcache */ return(0); } /* * Print information about disks */ static void bc_print(int verbose) { char line[80]; int i; for (i = 0; i < nbcinfo; i++) { sprintf(line, " cd%d: Device 0x%x\n", i, bcinfo[i].bc_sp.sp_devicespec); pager_output(line); } } /* * Attempt to open the disk described by (dev) for use by (f). */ static int bc_open(struct open_file *f, ...) { va_list ap; struct i386_devdesc *dev; va_start(ap, f); dev = va_arg(ap, struct i386_devdesc *); va_end(ap); if (dev->d_unit >= nbcinfo) { DEBUG("attempt to open nonexistent disk"); return(ENXIO); } BC(dev).bc_open++; if (BC(dev).bc_bcache == NULL) BC(dev).bc_bcache = bcache_allocate(); return(0); } static int bc_close(struct open_file *f) { struct i386_devdesc *dev; dev = (struct i386_devdesc *)f->f_devdata; BC(dev).bc_open--; if (BC(dev).bc_open == 0) { bcache_free(BC(dev).bc_bcache); BC(dev).bc_bcache = NULL; } return(0); } static int bc_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata bcd; struct i386_devdesc *dev; dev = (struct i386_devdesc *)devdata; bcd.dv_strategy = bc_realstrategy; bcd.dv_devdata = devdata; bcd.dv_cache = BC(dev).bc_bcache; return (bcache_strategy(&bcd, rw, dblk, offset, size, buf, rsize)); } static int bc_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { struct i386_devdesc *dev; int unit; int blks; #ifdef BD_SUPPORT_FRAGS char fragbuf[BIOSCD_SECSIZE]; size_t fragsize; fragsize = size % BIOSCD_SECSIZE; #else if (size % BIOSCD_SECSIZE) return (EINVAL); #endif if (rw != F_READ) return(EROFS); dev = (struct i386_devdesc *)devdata; unit = dev->d_unit; blks = size / BIOSCD_SECSIZE; if (dblk % (BIOSCD_SECSIZE / DEV_BSIZE) != 0) return (EINVAL); dblk /= (BIOSCD_SECSIZE / DEV_BSIZE); DEBUG("read %d from %lld to %p", blks, dblk, buf); if (rsize) *rsize = 0; - if (blks && bc_read(unit, dblk, blks, buf)) { + if ((blks = bc_read(unit, dblk, blks, buf)) < 0) { DEBUG("read error"); return (EIO); + } else { + if (size / BIOSCD_SECSIZE > blks) { + if (rsize) + *rsize = blks * BIOSCD_SECSIZE; + return (0); + } } #ifdef BD_SUPPORT_FRAGS DEBUG("frag read %d from %lld+%d to %p", fragsize, dblk, blks, buf + (blks * BIOSCD_SECSIZE)); - if (fragsize && bc_read(unit, dblk + blks, 1, fragbuf)) { + if (fragsize && bc_read(unit, dblk + blks, 1, fragbuf) != 1) { + if (blks) { + if (rsize) + *rsize = blks * BIOSCD_SECSIZE; + return (0); + } DEBUG("frag read error"); return(EIO); } bcopy(fragbuf, buf + (blks * BIOSCD_SECSIZE), fragsize); #endif if (rsize) *rsize = size; return (0); } /* Max number of sectors to bounce-buffer at a time. */ #define CD_BOUNCEBUF 8 +/* return negative value for an error, otherwise blocks read */ static int bc_read(int unit, daddr_t dblk, int blks, caddr_t dest) { u_int maxfer, resid, result, retry, x; caddr_t bbuf, p, xp; static struct edd_packet packet; int biosdev; #ifdef DISK_DEBUG int error; #endif /* Just in case some idiot actually tries to read -1 blocks... */ if (blks < 0) return (-1); /* If nothing to do, just return succcess. */ if (blks == 0) return (0); /* Decide whether we have to bounce */ if (VTOP(dest) >> 20 != 0) { /* * The destination buffer is above first 1MB of * physical memory so we have to arrange a suitable * bounce buffer. */ x = min(CD_BOUNCEBUF, (unsigned)blks); bbuf = alloca(x * BIOSCD_SECSIZE); maxfer = x; } else { bbuf = NULL; maxfer = 0; } biosdev = bc_unit2bios(unit); resid = blks; p = dest; while (resid > 0) { if (bbuf) xp = bbuf; else xp = p; x = resid; if (maxfer > 0) x = min(x, maxfer); /* * Loop retrying the operation a couple of times. The BIOS * may also retry. */ for (retry = 0; retry < 3; retry++) { /* If retrying, reset the drive */ if (retry > 0) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0; v86.edx = biosdev; v86int(); } packet.len = sizeof(struct edd_packet); packet.count = x; packet.off = VTOPOFF(xp); packet.seg = VTOPSEG(xp); packet.lba = dblk; v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4200; v86.edx = biosdev; v86.ds = VTOPSEG(&packet); v86.esi = VTOPOFF(&packet); v86int(); result = V86_CY(v86.efl); if (result == 0) break; + /* fall back to 1 sector read */ + x = 1; } #ifdef DISK_DEBUG error = (v86.eax >> 8) & 0xff; #endif DEBUG("%d sectors from %lld to %p (0x%x) %s", x, dblk, p, VTOP(p), result ? "failed" : "ok"); DEBUG("unit %d status 0x%x", unit, error); + + /* still an error? break off */ + if (result != 0) + break; + if (bbuf != NULL) bcopy(bbuf, p, x * BIOSCD_SECSIZE); p += (x * BIOSCD_SECSIZE); dblk += x; resid -= x; } /* hexdump(dest, (blks * BIOSCD_SECSIZE)); */ - return(0); + + if (blks - resid == 0) + return (-1); /* read failed */ + + return (blks - resid); } /* * Return a suitable dev_t value for (dev). */ int bc_getdev(struct i386_devdesc *dev) { int biosdev, unit; int major; int rootdev; unit = dev->d_unit; biosdev = bc_unit2bios(unit); DEBUG("unit %d BIOS device %d", unit, biosdev); if (biosdev == -1) /* not a BIOS device */ return(-1); /* * XXX: Need to examine device spec here to figure out if SCSI or * ATAPI. No idea on how to figure out device number. All we can * really pass to the kernel is what bus and device on which bus we * were booted from, which dev_t isn't well suited to since those * number don't match to unit numbers very well. We may just need * to engage in a hack where we pass -C to the boot args if we are * the boot device. */ major = ACDMAJOR; unit = 0; /* XXX */ /* XXX: Assume partition 'a'. */ rootdev = MAKEBOOTDEV(major, 0, unit, 0); DEBUG("dev is 0x%x\n", rootdev); return(rootdev); } Index: head/sys/boot/i386/libi386/biosdisk.c =================================================================== --- head/sys/boot/i386/libi386/biosdisk.c (revision 298899) +++ head/sys/boot/i386/libi386/biosdisk.c (revision 298900) @@ -1,893 +1,905 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2012 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * BIOS disk device handling. * * Ideas and algorithms from: * * - NetBSD libi386/biosdisk.c * - FreeBSD biosboot/disk.c * */ #include #include #include #include #include #include #include #include "disk.h" #include "libi386.h" #ifdef LOADER_GELI_SUPPORT #include "cons.h" #include "drv.h" #include "gpt.h" #include "part.h" #include struct pentry { struct ptable_entry part; uint64_t flags; union { uint8_t bsd; uint8_t mbr; uuid_t gpt; uint16_t vtoc8; } type; STAILQ_ENTRY(pentry) entry; }; struct ptable { enum ptable_type type; uint16_t sectorsize; uint64_t sectors; STAILQ_HEAD(, pentry) entries; }; #include "geliboot.c" #endif /* LOADER_GELI_SUPPORT */ CTASSERT(sizeof(struct i386_devdesc) >= sizeof(struct disk_devdesc)); #define BIOS_NUMDRIVES 0x475 #define BIOSDISK_SECSIZE 512 #define BUFSIZE (1 * BIOSDISK_SECSIZE) #define DT_ATAPI 0x10 /* disk type for ATAPI floppies */ #define WDMAJOR 0 /* major numbers for devices we frontend for */ #define WFDMAJOR 1 #define FDMAJOR 2 #define DAMAJOR 4 #ifdef DISK_DEBUG # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DEBUG(fmt, args...) #endif /* * List of BIOS devices, translation from disk unit number to * BIOS unit number. */ static struct bdinfo { int bd_unit; /* BIOS unit number */ int bd_cyl; /* BIOS geometry */ int bd_hds; int bd_sec; int bd_flags; #define BD_MODEINT13 0x0000 #define BD_MODEEDD1 0x0001 #define BD_MODEEDD3 0x0002 #define BD_MODEMASK 0x0003 #define BD_FLOPPY 0x0004 int bd_type; /* BIOS 'drive type' (floppy only) */ uint16_t bd_sectorsize; /* Sector size */ uint64_t bd_sectors; /* Disk size */ int bd_open; /* reference counter */ void *bd_bcache; /* buffer cache data */ } bdinfo [MAXBDDEV]; static int nbdinfo = 0; #define BD(dev) (bdinfo[(dev)->d_unit]) static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest); static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest); static int bd_int13probe(struct bdinfo *bd); static int bd_init(void); static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); static int bd_open(struct open_file *f, ...); static int bd_close(struct open_file *f); static int bd_ioctl(struct open_file *f, u_long cmd, void *data); static void bd_print(int verbose); static void bd_cleanup(void); #ifdef LOADER_GELI_SUPPORT static enum isgeli { ISGELI_UNKNOWN, ISGELI_NO, ISGELI_YES }; static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS]; int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes); #endif /* LOADER_GELI_SUPPORT */ struct devsw biosdisk = { "disk", DEVT_DISK, bd_init, bd_strategy, bd_open, bd_close, bd_ioctl, bd_print, bd_cleanup }; /* * Translate between BIOS device numbers and our private unit numbers. */ int bd_bios2unit(int biosdev) { int i; DEBUG("looking for bios device 0x%x", biosdev); for (i = 0; i < nbdinfo; i++) { DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit); if (bdinfo[i].bd_unit == biosdev) return (i); } return (-1); } int bd_unit2bios(int unit) { if ((unit >= 0) && (unit < nbdinfo)) return (bdinfo[unit].bd_unit); return (-1); } /* * Quiz the BIOS for disk devices, save a little info about them. */ static int bd_init(void) { int base, unit, nfd = 0; #ifdef LOADER_GELI_SUPPORT geli_init(); #endif /* sequence 0, 0x80 */ for (base = 0; base <= 0x80; base += 0x80) { for (unit = base; (nbdinfo < MAXBDDEV); unit++) { #ifndef VIRTUALBOX /* * Check the BIOS equipment list for number * of fixed disks. */ if(base == 0x80 && (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES))) break; #endif bdinfo[nbdinfo].bd_open = 0; bdinfo[nbdinfo].bd_bcache = NULL; bdinfo[nbdinfo].bd_unit = unit; bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0; if (!bd_int13probe(&bdinfo[nbdinfo])) break; /* XXX we need "disk aliases" to make this simpler */ printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ? ('A' + unit): ('C' + unit - 0x80), nbdinfo); nbdinfo++; if (base == 0x80) nfd++; } } bcache_add_dev(nbdinfo); return(0); } static void bd_cleanup(void) { disk_cleanup(&biosdisk); } /* * Try to detect a device supported by the legacy int13 BIOS */ static int bd_int13probe(struct bdinfo *bd) { struct edd_params params; v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = bd->bd_unit; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ecx & 0x3f) == 0 || /* absurd sector number */ (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f)) /* unit # bad */ return (0); /* skip device */ /* Convert max cyl # -> # of cylinders */ bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; /* Convert max head # -> # of heads */ bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1; bd->bd_sec = v86.ecx & 0x3f; bd->bd_type = v86.ebx & 0xff; bd->bd_flags |= BD_MODEINT13; /* Calculate sectors count from the geometry */ bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec; bd->bd_sectorsize = BIOSDISK_SECSIZE; DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl, bd->bd_hds, bd->bd_sec); /* Determine if we can use EDD with this device. */ v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4100; v86.edx = bd->bd_unit; v86.ebx = 0x55aa; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ebx & 0xffff) != 0xaa55 || /* signature */ (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) return (1); /* EDD supported */ bd->bd_flags |= BD_MODEEDD1; if ((v86.eax & 0xff00) >= 0x3000) bd->bd_flags |= BD_MODEEDD3; /* Get disk params */ params.len = sizeof(struct edd_params); v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4800; v86.edx = bd->bd_unit; v86.ds = VTOPSEG(¶ms); v86.esi = VTOPOFF(¶ms); v86int(); if (!V86_CY(v86.efl)) { bd->bd_sectors = params.sectors; bd->bd_sectorsize = params.sector_size; } DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u", bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize); return (1); } /* * Print information about disks */ static void bd_print(int verbose) { static char line[80]; struct disk_devdesc dev; int i; for (i = 0; i < nbdinfo; i++) { sprintf(line, " disk%d: BIOS drive %c:\n", i, (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit): ('C' + bdinfo[i].bd_unit - 0x80)); pager_output(line); dev.d_dev = &biosdisk; dev.d_unit = i; dev.d_slice = -1; dev.d_partition = -1; if (disk_open(&dev, bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors, bdinfo[i].bd_sectorsize, (bdinfo[i].bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0) == 0) { sprintf(line, " disk%d", i); disk_print(&dev, line, verbose); disk_close(&dev); } } } /* * Attempt to open the disk described by (dev) for use by (f). * * Note that the philosophy here is "give them exactly what * they ask for". This is necessary because being too "smart" * about what the user might want leads to complications. * (eg. given no slice or partition value, with a disk that is * sliced - are they after the first BSD slice, or the DOS * slice before it?) */ static int bd_open(struct open_file *f, ...) { struct disk_devdesc *dev, rdev; int err, g_err; va_list ap; va_start(ap, f); dev = va_arg(ap, struct disk_devdesc *); va_end(ap); if (dev->d_unit < 0 || dev->d_unit >= nbdinfo) return (EIO); BD(dev).bd_open++; if (BD(dev).bd_bcache == NULL) BD(dev).bd_bcache = bcache_allocate(); err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize, (BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0); #ifdef LOADER_GELI_SUPPORT static char gelipw[GELI_PW_MAXLEN]; char *passphrase; if (err) return (err); /* if we already know there is no GELI, skip the rest */ if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_UNKNOWN) return (err); struct dsk dskp; struct ptable *table = NULL; struct ptable_entry part; struct pentry *entry; int geli_part = 0; dskp.drive = bd_unit2bios(dev->d_unit); dskp.type = dev->d_type; dskp.unit = dev->d_unit; dskp.slice = dev->d_slice; dskp.part = dev->d_partition; dskp.start = dev->d_offset; memcpy(&rdev, dev, sizeof(rdev)); /* to read the GPT table, we need to read the first sector */ rdev.d_offset = 0; /* We need the LBA of the end of the partition */ table = ptable_open(&rdev, BD(dev).bd_sectors, BD(dev).bd_sectorsize, ptblread); if (table == NULL) { DEBUG("Can't read partition table"); /* soft failure, return the exit status of disk_open */ return (err); } if (table->type == PTABLE_GPT) dskp.part = 255; STAILQ_FOREACH(entry, &table->entries, entry) { dskp.slice = entry->part.index; dskp.start = entry->part.start; if (is_geli(&dskp) == 0) { geli_status[dev->d_unit][dskp.slice] = ISGELI_YES; return (0); } if (geli_taste(bios_read, &dskp, entry->part.end - entry->part.start) == 0) { if ((passphrase = getenv("kern.geom.eli.passphrase")) != NULL) { /* Use the cached passphrase */ bcopy(passphrase, &gelipw, GELI_PW_MAXLEN); } if (geli_passphrase(&gelipw, dskp.unit, 'p', (dskp.slice > 0 ? dskp.slice : dskp.part), &dskp) == 0) { setenv("kern.geom.eli.passphrase", &gelipw, 1); bzero(gelipw, sizeof(gelipw)); geli_status[dev->d_unit][dskp.slice] = ISGELI_YES; geli_part++; } } else geli_status[dev->d_unit][dskp.slice] = ISGELI_NO; } /* none of the partitions on this disk have GELI */ if (geli_part == 0) { /* found no GELI */ geli_status[dev->d_unit][dev->d_slice] = ISGELI_NO; } #endif /* LOADER_GELI_SUPPORT */ return (err); } static int bd_close(struct open_file *f) { struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; BD(dev).bd_open--; if (BD(dev).bd_open == 0) { bcache_free(BD(dev).bd_bcache); BD(dev).bd_bcache = NULL; } return (disk_close(dev)); } static int bd_ioctl(struct open_file *f, u_long cmd, void *data) { struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = BD(dev).bd_sectorsize; break; case DIOCGMEDIASIZE: *(off_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize; break; default: return (ENOTTY); } return (0); } static int bd_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { struct bcache_devdata bcd; struct disk_devdesc *dev; dev = (struct disk_devdesc *)devdata; bcd.dv_strategy = bd_realstrategy; bcd.dv_devdata = devdata; bcd.dv_cache = BD(dev).bd_bcache; return (bcache_strategy(&bcd, rw, dblk + dev->d_offset, offset, size, buf, rsize)); } static int bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { struct disk_devdesc *dev = (struct disk_devdesc *)devdata; int blks; #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */ char fragbuf[BIOSDISK_SECSIZE]; size_t fragsize; fragsize = size % BIOSDISK_SECSIZE; #else if (size % BD(dev).bd_sectorsize) panic("bd_strategy: %d bytes I/O not multiple of block size", size); #endif DEBUG("open_disk %p", dev); blks = size / BD(dev).bd_sectorsize; if (rsize) *rsize = 0; + if (dblk >= BD(dev).bd_sectors) { + DEBUG("IO past disk end %llu", (unsigned long long)dblk); + return (EIO); + } + + if (dblk + blks > BD(dev).bd_sectors) { + /* perform partial read */ + blks = BD(dev).bd_sectors - dblk; + size = blks * BD(dev).bd_sectorsize; + DEBUG("short read %d", blks); + } + switch(rw){ case F_READ: DEBUG("read %d from %lld to %p", blks, dblk, buf); if (blks && bd_read(dev, dblk, blks, buf)) { DEBUG("read error"); return (EIO); } #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */ DEBUG("bd_strategy: frag read %d from %d+%d to %p", fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE)); if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) { DEBUG("frag read error"); return(EIO); } bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize); #endif break; case F_WRITE : DEBUG("write %d from %d to %p", blks, dblk, buf); if (blks && bd_write(dev, dblk, blks, buf)) { DEBUG("write error"); return (EIO); } #ifdef BD_SUPPORT_FRAGS if(fragsize) { DEBUG("Attempted to write a frag"); return (EIO); } #endif break; default: /* DO NOTHING */ return (EROFS); } if (rsize) *rsize = size; return (0); } /* Max number of sectors to bounce-buffer if the request crosses a 64k boundary */ #define FLOPPY_BOUNCEBUF 18 static int bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { static struct edd_packet packet; packet.len = sizeof(struct edd_packet); packet.count = blks; packet.off = VTOPOFF(dest); packet.seg = VTOPSEG(dest); packet.lba = dblk; v86.ctl = V86_FLAGS; v86.addr = 0x13; if (write) /* Should we Write with verify ?? 0x4302 ? */ v86.eax = 0x4300; else v86.eax = 0x4200; v86.edx = BD(dev).bd_unit; v86.ds = VTOPSEG(&packet); v86.esi = VTOPOFF(&packet); v86int(); return (V86_CY(v86.efl)); } static int bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { u_int x, bpc, cyl, hd, sec; bpc = BD(dev).bd_sec * BD(dev).bd_hds; /* blocks per cylinder */ x = dblk; cyl = x / bpc; /* block # / blocks per cylinder */ x %= bpc; /* block offset into cylinder */ hd = x / BD(dev).bd_sec; /* offset / blocks per track */ sec = x % BD(dev).bd_sec; /* offset into track */ /* correct sector number for 1-based BIOS numbering */ sec++; if (cyl > 1023) /* CHS doesn't support cylinders > 1023. */ return (1); v86.ctl = V86_FLAGS; v86.addr = 0x13; if (write) v86.eax = 0x300 | blks; else v86.eax = 0x200 | blks; v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec; v86.edx = (hd << 8) | BD(dev).bd_unit; v86.es = VTOPSEG(dest); v86.ebx = VTOPOFF(dest); v86int(); return (V86_CY(v86.efl)); } static int bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { u_int x, sec, result, resid, retry, maxfer; caddr_t p, xp, bbuf, breg; /* Just in case some idiot actually tries to read/write -1 blocks... */ if (blks < 0) return (-1); resid = blks; p = dest; /* Decide whether we have to bounce */ if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 && (VTOP(dest) >> 16) != (VTOP(dest + blks * BD(dev).bd_sectorsize) >> 16))) { /* * There is a 64k physical boundary somewhere in the * destination buffer, or the destination buffer is above * first 1MB of physical memory so we have to arrange a * suitable bounce buffer. Allocate a buffer twice as large * as we need to. Use the bottom half unless there is a break * there, in which case we use the top half. */ x = min(FLOPPY_BOUNCEBUF, (unsigned)blks); bbuf = alloca(x * 2 * BD(dev).bd_sectorsize); if (((u_int32_t)VTOP(bbuf) & 0xffff0000) == ((u_int32_t)VTOP(bbuf + x * BD(dev).bd_sectorsize) & 0xffff0000)) { breg = bbuf; } else { breg = bbuf + x * BD(dev).bd_sectorsize; } maxfer = x; /* limit transfers to bounce region size */ } else { breg = bbuf = NULL; maxfer = 0; } while (resid > 0) { /* * Play it safe and don't cross track boundaries. * (XXX this is probably unnecessary) */ sec = dblk % BD(dev).bd_sec; /* offset into track */ x = min(BD(dev).bd_sec - sec, resid); if (maxfer > 0) x = min(x, maxfer); /* fit bounce buffer */ /* where do we transfer to? */ xp = bbuf == NULL ? p : breg; /* * Put your Data In, Put your Data out, * Put your Data In, and shake it all about */ if (write && bbuf != NULL) bcopy(p, breg, x * BD(dev).bd_sectorsize); /* * Loop retrying the operation a couple of times. The BIOS * may also retry. */ for (retry = 0; retry < 3; retry++) { /* if retrying, reset the drive */ if (retry > 0) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0; v86.edx = BD(dev).bd_unit; v86int(); } if (BD(dev).bd_flags & BD_MODEEDD1) result = bd_edd_io(dev, dblk, x, xp, write); else result = bd_chs_io(dev, dblk, x, xp, write); if (result == 0) break; } if (write) DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x, p, VTOP(p), dblk, result ? "failed" : "ok"); else DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x, dblk, p, VTOP(p), result ? "failed" : "ok"); if (result) { return(-1); } if (!write && bbuf != NULL) bcopy(breg, p, x * BD(dev).bd_sectorsize); p += (x * BD(dev).bd_sectorsize); dblk += x; resid -= x; } /* hexdump(dest, (blks * BD(dev).bd_sectorsize)); */ return(0); } static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest) { #ifdef LOADER_GELI_SUPPORT struct dsk dskp; off_t p_off, diff; daddr_t alignlba; int err, n, alignblks; char *tmpbuf; /* if we already know there is no GELI, skip the rest */ if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_YES) return (bd_io(dev, dblk, blks, dest, 0)); if (geli_status[dev->d_unit][dev->d_slice] == ISGELI_YES) { /* * Align reads to DEV_GELIBOOT_BSIZE bytes because partial * sectors cannot be decrypted. Round the requested LBA down to * nearest multiple of DEV_GELIBOOT_BSIZE bytes. */ alignlba = dblk & ~(daddr_t)((DEV_GELIBOOT_BSIZE / BIOSDISK_SECSIZE) - 1); /* * Round number of blocks to read up to nearest multiple of * DEV_GELIBOOT_BSIZE */ alignblks = blks + (dblk - alignlba) + ((DEV_GELIBOOT_BSIZE / BIOSDISK_SECSIZE) - 1) & ~(int)((DEV_GELIBOOT_BSIZE / BIOSDISK_SECSIZE) - 1); diff = (dblk - alignlba) * BIOSDISK_SECSIZE; /* * Use a temporary buffer here because the buffer provided by * the caller may be too small. */ tmpbuf = alloca(alignblks * BIOSDISK_SECSIZE); err = bd_io(dev, alignlba, alignblks, tmpbuf, 0); if (err) return (err); dskp.drive = bd_unit2bios(dev->d_unit); dskp.type = dev->d_type; dskp.unit = dev->d_unit; dskp.slice = dev->d_slice; dskp.part = dev->d_partition; dskp.start = dev->d_offset; /* GELI needs the offset relative to the partition start */ p_off = alignlba - dskp.start; err = geli_read(&dskp, p_off * BIOSDISK_SECSIZE, tmpbuf, alignblks * BIOSDISK_SECSIZE); if (err) return (err); bcopy(tmpbuf + diff, dest, blks * BIOSDISK_SECSIZE); return (0); } #endif /* LOADER_GELI_SUPPORT */ return (bd_io(dev, dblk, blks, dest, 0)); } static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest) { return (bd_io(dev, dblk, blks, dest, 1)); } /* * Return the BIOS geometry of a given "fixed drive" in a format * suitable for the legacy bootinfo structure. Since the kernel is * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we * prefer to get the information directly, rather than rely on being * able to put it together from information already maintained for * different purposes and for a probably different number of drives. * * For valid drives, the geometry is expected in the format (31..0) * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are * indicated by returning the geometry of a "1.2M" PC-format floppy * disk. And, incidentally, what is returned is not the geometry as * such but the highest valid cylinder, head, and sector numbers. */ u_int32_t bd_getbigeom(int bunit) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = 0x80 + bunit; v86int(); if (V86_CY(v86.efl)) return 0x4f010f; return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) | (v86.edx & 0xff00) | (v86.ecx & 0x3f); } /* * Return a suitable dev_t value for (dev). * * In the case where it looks like (dev) is a SCSI disk, we allow the number of * IDE disks to be specified in $num_ide_disks. There should be a Better Way. */ int bd_getdev(struct i386_devdesc *d) { struct disk_devdesc *dev; int biosdev; int major; int rootdev; char *nip, *cp; int i, unit; dev = (struct disk_devdesc *)d; biosdev = bd_unit2bios(dev->d_unit); DEBUG("unit %d BIOS device %d", dev->d_unit, biosdev); if (biosdev == -1) /* not a BIOS device */ return(-1); if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize,(BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0) != 0) /* oops, not a viable device */ return (-1); else disk_close(dev); if (biosdev < 0x80) { /* floppy (or emulated floppy) or ATAPI device */ if (bdinfo[dev->d_unit].bd_type == DT_ATAPI) { /* is an ATAPI disk */ major = WFDMAJOR; } else { /* is a floppy disk */ major = FDMAJOR; } } else { /* assume an IDE disk */ major = WDMAJOR; } /* default root disk unit number */ unit = biosdev & 0x7f; /* XXX a better kludge to set the root disk unit number */ if ((nip = getenv("root_disk_unit")) != NULL) { i = strtol(nip, &cp, 0); /* check for parse error */ if ((cp != nip) && (*cp == 0)) unit = i; } rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition); DEBUG("dev is 0x%x\n", rootdev); return(rootdev); } #ifdef LOADER_GELI_SUPPORT int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes) { struct disk_devdesc dev; dev.d_dev = &biosdisk; dev.d_type = priv->type; dev.d_unit = priv->unit; dev.d_slice = priv->slice; dev.d_partition = priv->part; dev.d_offset = priv->start; off = off / BIOSDISK_SECSIZE; /* GELI gives us the offset relative to the partition start */ off += dev.d_offset; bytes = bytes / BIOSDISK_SECSIZE; return (bd_io(&dev, off, bytes, buf, 0)); } #endif /* LOADER_GELI_SUPPORT */