Changeset View
Standalone View
sys/kern/vfs_bio.c
Show First 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | |||||
static int inmem(struct vnode *vp, daddr_t blkno); | static int inmem(struct vnode *vp, daddr_t blkno); | ||||
static void vm_hold_free_pages(struct buf *bp, int newbsize); | static void vm_hold_free_pages(struct buf *bp, int newbsize); | ||||
static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, | static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, | ||||
vm_offset_t to); | vm_offset_t to); | ||||
static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); | static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); | ||||
static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, | static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, | ||||
vm_page_t m); | vm_page_t m); | ||||
static void vfs_clean_pages_dirty_buf(struct buf *bp); | static void vfs_clean_pages_dirty_buf(struct buf *bp); | ||||
static void vfs_setdirty_locked_object(struct buf *bp); | static void vfs_setdirty_range(struct buf *bp); | ||||
static void vfs_vmio_invalidate(struct buf *bp); | static void vfs_vmio_invalidate(struct buf *bp); | ||||
static void vfs_vmio_truncate(struct buf *bp, int npages); | static void vfs_vmio_truncate(struct buf *bp, int npages); | ||||
static void vfs_vmio_extend(struct buf *bp, int npages, int size); | static void vfs_vmio_extend(struct buf *bp, int npages, int size); | ||||
static int vfs_bio_clcheck(struct vnode *vp, int size, | static int vfs_bio_clcheck(struct vnode *vp, int size, | ||||
daddr_t lblkno, daddr_t blkno); | daddr_t lblkno, daddr_t blkno); | ||||
static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, | static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, | ||||
void (*)(struct buf *)); | void (*)(struct buf *)); | ||||
static int buf_flush(struct vnode *vp, struct bufdomain *, int); | static int buf_flush(struct vnode *vp, struct bufdomain *, int); | ||||
▲ Show 20 Lines • Show All 776 Lines • ▼ Show 20 Lines | |||||
* bit if the newly extended portion of the buffer does not contain | * bit if the newly extended portion of the buffer does not contain | ||||
* valid data. | * valid data. | ||||
*/ | */ | ||||
static __inline void | static __inline void | ||||
vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, | vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, | ||||
vm_offset_t size, vm_page_t m) | vm_offset_t size, vm_page_t m) | ||||
{ | { | ||||
VM_OBJECT_ASSERT_LOCKED(m->object); | |||||
/* | /* | ||||
* This function and its results are protected by higher level | * This function and its results are protected by higher level | ||||
* synchronization requiring vnode and buf locks to page in and | * synchronization requiring vnode and buf locks to page in and | ||||
* validate pages. | * validate pages. | ||||
*/ | */ | ||||
if (bp->b_flags & B_CACHE) { | if (bp->b_flags & B_CACHE) { | ||||
int base = (foff + off) & PAGE_MASK; | int base = (foff + off) & PAGE_MASK; | ||||
if (vm_page_is_valid(m, base, size) == 0) | if (vm_page_is_valid(m, base, size) == 0) | ||||
▲ Show 20 Lines • Show All 1,879 Lines • ▼ Show 20 Lines | KASSERT(vp->v_object != NULL, | ||||
("vfs_vmio_iodone: vnode %p has no vm_object", vp)); | ("vfs_vmio_iodone: vnode %p has no vm_object", vp)); | ||||
foff = bp->b_offset; | foff = bp->b_offset; | ||||
KASSERT(bp->b_offset != NOOFFSET, | KASSERT(bp->b_offset != NOOFFSET, | ||||
("vfs_vmio_iodone: bp %p has no buffer offset", bp)); | ("vfs_vmio_iodone: bp %p has no buffer offset", bp)); | ||||
bogus = false; | bogus = false; | ||||
iosize = bp->b_bcount - bp->b_resid; | iosize = bp->b_bcount - bp->b_resid; | ||||
VM_OBJECT_WLOCK(obj); | |||||
for (i = 0; i < bp->b_npages; i++) { | for (i = 0; i < bp->b_npages; i++) { | ||||
resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; | resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; | ||||
if (resid > iosize) | if (resid > iosize) | ||||
resid = iosize; | resid = iosize; | ||||
/* | /* | ||||
* cleanup bogus pages, restoring the originals | * cleanup bogus pages, restoring the originals | ||||
*/ | */ | ||||
m = bp->b_pages[i]; | m = bp->b_pages[i]; | ||||
if (m == bogus_page) { | if (m == bogus_page) { | ||||
if (bogus == false) { | |||||
bogus = true; | bogus = true; | ||||
VM_OBJECT_RLOCK(obj); | |||||
} | |||||
m = vm_page_lookup(obj, OFF_TO_IDX(foff)); | m = vm_page_lookup(obj, OFF_TO_IDX(foff)); | ||||
if (m == NULL) | if (m == NULL) | ||||
panic("biodone: page disappeared!"); | panic("biodone: page disappeared!"); | ||||
bp->b_pages[i] = m; | bp->b_pages[i] = m; | ||||
} else if ((bp->b_iocmd == BIO_READ) && resid > 0) { | } else if ((bp->b_iocmd == BIO_READ) && resid > 0) { | ||||
/* | /* | ||||
* In the write case, the valid and clean bits are | * In the write case, the valid and clean bits are | ||||
* already changed correctly ( see bdwrite() ), so we | * already changed correctly ( see bdwrite() ), so we | ||||
* only need to do this here in the read case. | * only need to do this here in the read case. | ||||
*/ | */ | ||||
KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK, | KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK, | ||||
resid)) == 0, ("vfs_vmio_iodone: page %p " | resid)) == 0, ("vfs_vmio_iodone: page %p " | ||||
"has unexpected dirty bits", m)); | "has unexpected dirty bits", m)); | ||||
vfs_page_set_valid(bp, foff, m); | vfs_page_set_valid(bp, foff, m); | ||||
} | } | ||||
KASSERT(OFF_TO_IDX(foff) == m->pindex, | KASSERT(OFF_TO_IDX(foff) == m->pindex, | ||||
("vfs_vmio_iodone: foff(%jd)/pindex(%ju) mismatch", | ("vfs_vmio_iodone: foff(%jd)/pindex(%ju) mismatch", | ||||
(intmax_t)foff, (uintmax_t)m->pindex)); | (intmax_t)foff, (uintmax_t)m->pindex)); | ||||
vm_page_sunbusy(m); | vm_page_sunbusy(m); | ||||
foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; | foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; | ||||
iosize -= resid; | iosize -= resid; | ||||
} | } | ||||
if (bogus) | |||||
VM_OBJECT_RUNLOCK(obj); | |||||
vm_object_pip_wakeupn(obj, bp->b_npages); | vm_object_pip_wakeupn(obj, bp->b_npages); | ||||
VM_OBJECT_WUNLOCK(obj); | |||||
if (bogus && buf_mapped(bp)) { | if (bogus && buf_mapped(bp)) { | ||||
BUF_CHECK_MAPPED(bp); | BUF_CHECK_MAPPED(bp); | ||||
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), | pmap_qenter(trunc_page((vm_offset_t)bp->b_data), | ||||
bp->b_pages, bp->b_npages); | bp->b_pages, bp->b_npages); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines | vfs_vmio_extend(struct buf *bp, int desiredpages, int size) | ||||
vm_page_t m; | vm_page_t m; | ||||
/* | /* | ||||
* Step 1, bring in the VM pages from the object, allocating | * Step 1, bring in the VM pages from the object, allocating | ||||
* them if necessary. We must clear B_CACHE if these pages | * them if necessary. We must clear B_CACHE if these pages | ||||
* are not valid for the range covered by the buffer. | * are not valid for the range covered by the buffer. | ||||
*/ | */ | ||||
obj = bp->b_bufobj->bo_object; | obj = bp->b_bufobj->bo_object; | ||||
VM_OBJECT_WLOCK(obj); | |||||
if (bp->b_npages < desiredpages) { | if (bp->b_npages < desiredpages) { | ||||
/* | /* | ||||
* We must allocate system pages since blocking | * We must allocate system pages since blocking | ||||
* here could interfere with paging I/O, no | * here could interfere with paging I/O, no | ||||
* matter which process we are. | * matter which process we are. | ||||
* | * | ||||
* Only exclusive busy can be tested here. | * Only exclusive busy can be tested here. | ||||
* Blocking on shared busy might lead to | * Blocking on shared busy might lead to | ||||
* deadlocks once allocbuf() is called after | * deadlocks once allocbuf() is called after | ||||
* pages are vfs_busy_pages(). | * pages are vfs_busy_pages(). | ||||
*/ | */ | ||||
VM_OBJECT_WLOCK(obj); | |||||
(void)vm_page_grab_pages(obj, | (void)vm_page_grab_pages(obj, | ||||
OFF_TO_IDX(bp->b_offset) + bp->b_npages, | OFF_TO_IDX(bp->b_offset) + bp->b_npages, | ||||
VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY | | VM_ALLOC_SYSTEM | VM_ALLOC_IGN_SBUSY | | ||||
VM_ALLOC_NOBUSY | VM_ALLOC_WIRED, | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED, | ||||
&bp->b_pages[bp->b_npages], desiredpages - bp->b_npages); | &bp->b_pages[bp->b_npages], desiredpages - bp->b_npages); | ||||
VM_OBJECT_WUNLOCK(obj); | |||||
bp->b_npages = desiredpages; | bp->b_npages = desiredpages; | ||||
} | } | ||||
/* | /* | ||||
* Step 2. We've loaded the pages into the buffer, | * Step 2. We've loaded the pages into the buffer, | ||||
* we have to figure out if we can still have B_CACHE | * we have to figure out if we can still have B_CACHE | ||||
* set. Note that B_CACHE is set according to the | * set. Note that B_CACHE is set according to the | ||||
* byte-granular range ( bcount and size ), not the | * byte-granular range ( bcount and size ), not the | ||||
Show All 14 Lines | while ((bp->b_flags & B_CACHE) && toff < size) { | ||||
if (tinc > (size - toff)) | if (tinc > (size - toff)) | ||||
tinc = size - toff; | tinc = size - toff; | ||||
pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT; | pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT; | ||||
m = bp->b_pages[pi]; | m = bp->b_pages[pi]; | ||||
vfs_buf_test_cache(bp, bp->b_offset, toff, tinc, m); | vfs_buf_test_cache(bp, bp->b_offset, toff, tinc, m); | ||||
toff += tinc; | toff += tinc; | ||||
tinc = PAGE_SIZE; | tinc = PAGE_SIZE; | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(obj); | |||||
/* | /* | ||||
* Step 3, fixup the KVA pmap. | * Step 3, fixup the KVA pmap. | ||||
*/ | */ | ||||
if (buf_mapped(bp)) | if (buf_mapped(bp)) | ||||
bpmap_qenter(bp); | bpmap_qenter(bp); | ||||
else | else | ||||
BUF_CHECK_UNMAPPED(bp); | BUF_CHECK_UNMAPPED(bp); | ||||
▲ Show 20 Lines • Show All 562 Lines • ▼ Show 20 Lines | vfs_clean_pages_dirty_buf(struct buf *bp) | ||||
if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0) | if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0) | ||||
return; | return; | ||||
foff = bp->b_offset; | foff = bp->b_offset; | ||||
KASSERT(bp->b_offset != NOOFFSET, | KASSERT(bp->b_offset != NOOFFSET, | ||||
("vfs_clean_pages_dirty_buf: no buffer offset")); | ("vfs_clean_pages_dirty_buf: no buffer offset")); | ||||
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); | |||||
vfs_busy_pages_acquire(bp); | vfs_busy_pages_acquire(bp); | ||||
vfs_setdirty_locked_object(bp); | vfs_setdirty_range(bp); | ||||
for (i = 0; i < bp->b_npages; i++) { | for (i = 0; i < bp->b_npages; i++) { | ||||
noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; | noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; | ||||
eoff = noff; | eoff = noff; | ||||
if (eoff > bp->b_offset + bp->b_bufsize) | if (eoff > bp->b_offset + bp->b_bufsize) | ||||
eoff = bp->b_offset + bp->b_bufsize; | eoff = bp->b_offset + bp->b_bufsize; | ||||
m = bp->b_pages[i]; | m = bp->b_pages[i]; | ||||
vfs_page_set_validclean(bp, foff, m); | vfs_page_set_validclean(bp, foff, m); | ||||
/* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */ | /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */ | ||||
foff = noff; | foff = noff; | ||||
} | } | ||||
vfs_busy_pages_release(bp); | vfs_busy_pages_release(bp); | ||||
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); | |||||
} | } | ||||
static void | static void | ||||
vfs_setdirty_locked_object(struct buf *bp) | vfs_setdirty_range(struct buf *bp) | ||||
{ | { | ||||
vm_object_t object; | |||||
int i; | |||||
object = bp->b_bufobj->bo_object; | |||||
VM_OBJECT_ASSERT_WLOCKED(object); | |||||
/* | |||||
* We qualify the scan for modified pages on whether the | |||||
* object has been flushed yet. | |||||
*/ | |||||
if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) { | |||||
jeff: I deleted this condition because I didn't want to protect the flag with the object lock. | |||||
kibUnsubmitted Not Done Inline ActionsOBJ_MIGHTBEDIRTY means that there might be (but not necessary are) dirty pages in the object, so checking the flag should be an optimization always. kib: OBJ_MIGHTBEDIRTY means that there might be (but not necessary are) dirty pages in the object… | |||||
jeffAuthorUnsubmitted Done Inline ActionsI understand that but in this case we no longer hold the object lock. So the check would race. I think this could be safe because we hold the page busy. So if there is a race to set the flag it can't involve this set of pages. If there is a race to clear the flag it similarly can't involve these pages and we may simply do extra work. It seems like a small optimization to avoid this code given the amount of page iteration that is done in general. The flag check will rely on somewhat obscure synchronization that may be fragile. This is why I removed it. jeff: I understand that but in this case we no longer hold the object lock. So the check would race. | |||||
kibUnsubmitted Not Done Inline ActionsFrom this PoV it seems to be more correct to not check the flag than to check it. But I think that it is only advisory and at worst the conversion of dirty pages to dirty buffers should happen somewhat later. kib: From this PoV it seems to be more correct to not check the flag than to check it. But I think… | |||||
jeffAuthorUnsubmitted Done Inline ActionsI feel ambivalent really. I would like to go forward with the patch as-is. If we find a workload where it really matters that we reduce the cpu time here we can look at the precise synchronization guarantees provided for MIGHTBEDIRTY optimizations. If you are ok with that can you approve of the commit? jeff: I feel ambivalent really. I would like to go forward with the patch as-is. If we find a… | |||||
vm_offset_t boffset; | vm_offset_t boffset; | ||||
vm_offset_t eoffset; | vm_offset_t eoffset; | ||||
int i; | |||||
/* | /* | ||||
* test the pages to see if they have been modified directly | * test the pages to see if they have been modified directly | ||||
* by users through the VM system. | * by users through the VM system. | ||||
*/ | */ | ||||
for (i = 0; i < bp->b_npages; i++) | for (i = 0; i < bp->b_npages; i++) | ||||
vm_page_test_dirty(bp->b_pages[i]); | vm_page_test_dirty(bp->b_pages[i]); | ||||
/* | /* | ||||
* Calculate the encompassing dirty range, boffset and eoffset, | * Calculate the encompassing dirty range, boffset and eoffset, | ||||
* (eoffset - boffset) bytes. | * (eoffset - boffset) bytes. | ||||
*/ | */ | ||||
for (i = 0; i < bp->b_npages; i++) { | for (i = 0; i < bp->b_npages; i++) { | ||||
if (bp->b_pages[i]->dirty) | if (bp->b_pages[i]->dirty) | ||||
break; | break; | ||||
} | } | ||||
boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); | boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); | ||||
for (i = bp->b_npages - 1; i >= 0; --i) { | for (i = bp->b_npages - 1; i >= 0; --i) { | ||||
if (bp->b_pages[i]->dirty) { | if (bp->b_pages[i]->dirty) { | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); | eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); | ||||
/* | /* | ||||
* Fit it to the buffer. | * Fit it to the buffer. | ||||
*/ | */ | ||||
if (eoffset > bp->b_bcount) | if (eoffset > bp->b_bcount) | ||||
eoffset = bp->b_bcount; | eoffset = bp->b_bcount; | ||||
/* | /* | ||||
* If we have a good dirty range, merge with the existing | * If we have a good dirty range, merge with the existing | ||||
* dirty range. | * dirty range. | ||||
*/ | */ | ||||
if (boffset < eoffset) { | if (boffset < eoffset) { | ||||
if (bp->b_dirtyoff > boffset) | if (bp->b_dirtyoff > boffset) | ||||
bp->b_dirtyoff = boffset; | bp->b_dirtyoff = boffset; | ||||
if (bp->b_dirtyend < eoffset) | if (bp->b_dirtyend < eoffset) | ||||
bp->b_dirtyend = eoffset; | bp->b_dirtyend = eoffset; | ||||
} | } | ||||
} | } | ||||
} | |||||
/* | /* | ||||
* Allocate the KVA mapping for an existing buffer. | * Allocate the KVA mapping for an existing buffer. | ||||
* If an unmapped buffer is provided but a mapped buffer is requested, take | * If an unmapped buffer is provided but a mapped buffer is requested, take | ||||
* also care to properly setup mappings between pages and KVA. | * also care to properly setup mappings between pages and KVA. | ||||
*/ | */ | ||||
static void | static void | ||||
bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags) | bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags) | ||||
▲ Show 20 Lines • Show All 721 Lines • ▼ Show 20 Lines | |||||
* consistent. | * consistent. | ||||
*/ | */ | ||||
void | void | ||||
vfs_unbusy_pages(struct buf *bp) | vfs_unbusy_pages(struct buf *bp) | ||||
{ | { | ||||
int i; | int i; | ||||
vm_object_t obj; | vm_object_t obj; | ||||
vm_page_t m; | vm_page_t m; | ||||
bool bogus; | |||||
runningbufwakeup(bp); | runningbufwakeup(bp); | ||||
if (!(bp->b_flags & B_VMIO)) | if (!(bp->b_flags & B_VMIO)) | ||||
return; | return; | ||||
obj = bp->b_bufobj->bo_object; | obj = bp->b_bufobj->bo_object; | ||||
VM_OBJECT_WLOCK(obj); | bogus = false; | ||||
for (i = 0; i < bp->b_npages; i++) { | for (i = 0; i < bp->b_npages; i++) { | ||||
m = bp->b_pages[i]; | m = bp->b_pages[i]; | ||||
if (m == bogus_page) { | if (m == bogus_page) { | ||||
if (bogus == false) { | |||||
bogus = true; | |||||
VM_OBJECT_RLOCK(obj); | |||||
} | |||||
m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i); | m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i); | ||||
if (!m) | if (!m) | ||||
panic("vfs_unbusy_pages: page missing\n"); | panic("vfs_unbusy_pages: page missing\n"); | ||||
bp->b_pages[i] = m; | bp->b_pages[i] = m; | ||||
if (buf_mapped(bp)) { | if (buf_mapped(bp)) { | ||||
BUF_CHECK_MAPPED(bp); | BUF_CHECK_MAPPED(bp); | ||||
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), | pmap_qenter(trunc_page((vm_offset_t)bp->b_data), | ||||
bp->b_pages, bp->b_npages); | bp->b_pages, bp->b_npages); | ||||
} else | } else | ||||
BUF_CHECK_UNMAPPED(bp); | BUF_CHECK_UNMAPPED(bp); | ||||
} | } | ||||
vm_page_sunbusy(m); | vm_page_sunbusy(m); | ||||
} | } | ||||
if (bogus) | |||||
VM_OBJECT_RUNLOCK(obj); | |||||
vm_object_pip_wakeupn(obj, bp->b_npages); | vm_object_pip_wakeupn(obj, bp->b_npages); | ||||
VM_OBJECT_WUNLOCK(obj); | |||||
} | } | ||||
/* | /* | ||||
* vfs_page_set_valid: | * vfs_page_set_valid: | ||||
* | * | ||||
* Set the valid bits in a page based on the supplied offset. The | * Set the valid bits in a page based on the supplied offset. The | ||||
* range is restricted to the buffer's size. | * range is restricted to the buffer's size. | ||||
* | * | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Acquire a shared busy on all pages in the buf. | * Acquire a shared busy on all pages in the buf. | ||||
*/ | */ | ||||
void | void | ||||
vfs_busy_pages_acquire(struct buf *bp) | vfs_busy_pages_acquire(struct buf *bp) | ||||
{ | { | ||||
int i; | int i; | ||||
VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object); | |||||
for (i = 0; i < bp->b_npages; i++) | for (i = 0; i < bp->b_npages; i++) | ||||
vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY); | vm_page_busy_acquire(bp->b_pages[i], VM_ALLOC_SBUSY); | ||||
} | } | ||||
void | void | ||||
vfs_busy_pages_release(struct buf *bp) | vfs_busy_pages_release(struct buf *bp) | ||||
{ | { | ||||
int i; | int i; | ||||
VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object); | |||||
for (i = 0; i < bp->b_npages; i++) | for (i = 0; i < bp->b_npages; i++) | ||||
vm_page_sunbusy(bp->b_pages[i]); | vm_page_sunbusy(bp->b_pages[i]); | ||||
} | } | ||||
/* | /* | ||||
* This routine is called before a device strategy routine. | * This routine is called before a device strategy routine. | ||||
* It is used to tell the VM system that paging I/O is in | * It is used to tell the VM system that paging I/O is in | ||||
* progress, and treat the pages associated with the buffer | * progress, and treat the pages associated with the buffer | ||||
Show All 16 Lines | vfs_busy_pages(struct buf *bp, int clear_modify) | ||||
if (!(bp->b_flags & B_VMIO)) | if (!(bp->b_flags & B_VMIO)) | ||||
return; | return; | ||||
obj = bp->b_bufobj->bo_object; | obj = bp->b_bufobj->bo_object; | ||||
foff = bp->b_offset; | foff = bp->b_offset; | ||||
KASSERT(bp->b_offset != NOOFFSET, | KASSERT(bp->b_offset != NOOFFSET, | ||||
("vfs_busy_pages: no buffer offset")); | ("vfs_busy_pages: no buffer offset")); | ||||
VM_OBJECT_WLOCK(obj); | |||||
if ((bp->b_flags & B_CLUSTER) == 0) { | if ((bp->b_flags & B_CLUSTER) == 0) { | ||||
vm_object_pip_add(obj, bp->b_npages); | vm_object_pip_add(obj, bp->b_npages); | ||||
vfs_busy_pages_acquire(bp); | vfs_busy_pages_acquire(bp); | ||||
} | } | ||||
if (bp->b_bufsize != 0) | if (bp->b_bufsize != 0) | ||||
vfs_setdirty_locked_object(bp); | vfs_setdirty_range(bp); | ||||
bogus = false; | bogus = false; | ||||
for (i = 0; i < bp->b_npages; i++) { | for (i = 0; i < bp->b_npages; i++) { | ||||
m = bp->b_pages[i]; | m = bp->b_pages[i]; | ||||
vm_page_assert_sbusied(m); | vm_page_assert_sbusied(m); | ||||
/* | /* | ||||
* When readying a buffer for a read ( i.e | * When readying a buffer for a read ( i.e | ||||
* clear_modify == 0 ), it is important to do | * clear_modify == 0 ), it is important to do | ||||
Show All 14 Lines | if (clear_modify) { | ||||
vfs_page_set_validclean(bp, foff, m); | vfs_page_set_validclean(bp, foff, m); | ||||
} else if (vm_page_all_valid(m) && | } else if (vm_page_all_valid(m) && | ||||
(bp->b_flags & B_CACHE) == 0) { | (bp->b_flags & B_CACHE) == 0) { | ||||
bp->b_pages[i] = bogus_page; | bp->b_pages[i] = bogus_page; | ||||
bogus = true; | bogus = true; | ||||
} | } | ||||
foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; | foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(obj); | |||||
if (bogus && buf_mapped(bp)) { | if (bogus && buf_mapped(bp)) { | ||||
BUF_CHECK_MAPPED(bp); | BUF_CHECK_MAPPED(bp); | ||||
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), | pmap_qenter(trunc_page((vm_offset_t)bp->b_data), | ||||
bp->b_pages, bp->b_npages); | bp->b_pages, bp->b_npages); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
Show All 16 Lines | vfs_bio_set_valid(struct buf *bp, int base, int size) | ||||
/* | /* | ||||
* Fixup base to be relative to beginning of first page. | * Fixup base to be relative to beginning of first page. | ||||
* Set initial n to be the maximum number of bytes in the | * Set initial n to be the maximum number of bytes in the | ||||
* first page that can be validated. | * first page that can be validated. | ||||
*/ | */ | ||||
base += (bp->b_offset & PAGE_MASK); | base += (bp->b_offset & PAGE_MASK); | ||||
n = PAGE_SIZE - (base & PAGE_MASK); | n = PAGE_SIZE - (base & PAGE_MASK); | ||||
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); | |||||
/* | /* | ||||
* Busy may not be strictly necessary here because the pages are | * Busy may not be strictly necessary here because the pages are | ||||
* unlikely to be fully valid and the vnode lock will synchronize | * unlikely to be fully valid and the vnode lock will synchronize | ||||
* their access via getpages. It is grabbed for consistency with | * their access via getpages. It is grabbed for consistency with | ||||
* other page validation. | * other page validation. | ||||
*/ | */ | ||||
vfs_busy_pages_acquire(bp); | vfs_busy_pages_acquire(bp); | ||||
for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { | for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { | ||||
m = bp->b_pages[i]; | m = bp->b_pages[i]; | ||||
if (n > size) | if (n > size) | ||||
n = size; | n = size; | ||||
vm_page_set_valid_range(m, base & PAGE_MASK, n); | vm_page_set_valid_range(m, base & PAGE_MASK, n); | ||||
base += n; | base += n; | ||||
size -= n; | size -= n; | ||||
n = PAGE_SIZE; | n = PAGE_SIZE; | ||||
} | } | ||||
vfs_busy_pages_release(bp); | vfs_busy_pages_release(bp); | ||||
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); | |||||
} | } | ||||
/* | /* | ||||
* vfs_bio_clrbuf: | * vfs_bio_clrbuf: | ||||
* | * | ||||
* If the specified buffer is a non-VMIO buffer, clear the entire | * If the specified buffer is a non-VMIO buffer, clear the entire | ||||
* buffer. If the specified buffer is a VMIO buffer, clear and | * buffer. If the specified buffer is a VMIO buffer, clear and | ||||
* validate only the previously invalid portions of the buffer. | * validate only the previously invalid portions of the buffer. | ||||
Show All 9 Lines | vfs_bio_clrbuf(struct buf *bp) | ||||
int i, j, mask, sa, ea, slide; | int i, j, mask, sa, ea, slide; | ||||
if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) { | if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) { | ||||
clrbuf(bp); | clrbuf(bp); | ||||
return; | return; | ||||
} | } | ||||
bp->b_flags &= ~B_INVAL; | bp->b_flags &= ~B_INVAL; | ||||
bp->b_ioflags &= ~BIO_ERROR; | bp->b_ioflags &= ~BIO_ERROR; | ||||
VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); | |||||
vfs_busy_pages_acquire(bp); | vfs_busy_pages_acquire(bp); | ||||
if ((bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) && | |||||
jeffAuthorUnsubmitted Done Inline ActionsI think this is simply an optimization of the general case below for single pages. I don't believe that is common or necessary. I would appreciate another set of eyes to verify. jeff: I think this is simply an optimization of the general case below for single pages. I don't… | |||||
kibUnsubmitted Not Done Inline ActionsIt seems that indeed, this case is just an optimization. The code initially appeared in some Dyson' commit from 1995 r7694 without a useful commit message. kib: It seems that indeed, this case is just an optimization. The code initially appeared in some… | |||||
(bp->b_offset & PAGE_MASK) == 0) { | |||||
if (bp->b_pages[0] == bogus_page) | |||||
goto unlock; | |||||
mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1; | |||||
VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[0]->object); | |||||
if ((bp->b_pages[0]->valid & mask) == mask) | |||||
goto unlock; | |||||
if ((bp->b_pages[0]->valid & mask) == 0) { | |||||
pmap_zero_page_area(bp->b_pages[0], 0, bp->b_bufsize); | |||||
bp->b_pages[0]->valid |= mask; | |||||
goto unlock; | |||||
} | |||||
} | |||||
sa = bp->b_offset & PAGE_MASK; | sa = bp->b_offset & PAGE_MASK; | ||||
slide = 0; | slide = 0; | ||||
for (i = 0; i < bp->b_npages; i++, sa = 0) { | for (i = 0; i < bp->b_npages; i++, sa = 0) { | ||||
slide = imin(slide + PAGE_SIZE, bp->b_offset + bp->b_bufsize); | slide = imin(slide + PAGE_SIZE, bp->b_offset + bp->b_bufsize); | ||||
ea = slide & PAGE_MASK; | ea = slide & PAGE_MASK; | ||||
if (ea == 0) | if (ea == 0) | ||||
ea = PAGE_SIZE; | ea = PAGE_SIZE; | ||||
if (bp->b_pages[i] == bogus_page) | if (bp->b_pages[i] == bogus_page) | ||||
continue; | continue; | ||||
j = sa / DEV_BSIZE; | j = sa / DEV_BSIZE; | ||||
mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j; | mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j; | ||||
VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[i]->object); | |||||
if ((bp->b_pages[i]->valid & mask) == mask) | if ((bp->b_pages[i]->valid & mask) == mask) | ||||
continue; | continue; | ||||
if ((bp->b_pages[i]->valid & mask) == 0) | if ((bp->b_pages[i]->valid & mask) == 0) | ||||
pmap_zero_page_area(bp->b_pages[i], sa, ea - sa); | pmap_zero_page_area(bp->b_pages[i], sa, ea - sa); | ||||
else { | else { | ||||
for (; sa < ea; sa += DEV_BSIZE, j++) { | for (; sa < ea; sa += DEV_BSIZE, j++) { | ||||
if ((bp->b_pages[i]->valid & (1 << j)) == 0) { | if ((bp->b_pages[i]->valid & (1 << j)) == 0) { | ||||
pmap_zero_page_area(bp->b_pages[i], | pmap_zero_page_area(bp->b_pages[i], | ||||
sa, DEV_BSIZE); | sa, DEV_BSIZE); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
bp->b_pages[i]->valid |= mask; | vm_page_set_valid_range(bp->b_pages[i], j * DEV_BSIZE, | ||||
roundup2(ea - sa, DEV_BSIZE)); | |||||
} | } | ||||
unlock: | |||||
vfs_busy_pages_release(bp); | vfs_busy_pages_release(bp); | ||||
VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); | |||||
bp->b_resid = 0; | bp->b_resid = 0; | ||||
} | } | ||||
void | void | ||||
vfs_bio_bzero_buf(struct buf *bp, int base, int size) | vfs_bio_bzero_buf(struct buf *bp, int base, int size) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m; | ||||
int i, n; | int i, n; | ||||
▲ Show 20 Lines • Show All 394 Lines • ▼ Show 20 Lines | vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count, | ||||
pgsin += pgsin_a; | pgsin += pgsin_a; | ||||
if (rahead != NULL) | if (rahead != NULL) | ||||
*rahead = pgsin_a; | *rahead = pgsin_a; | ||||
VM_CNT_INC(v_vnodein); | VM_CNT_INC(v_vnodein); | ||||
VM_CNT_ADD(v_vnodepgsin, pgsin); | VM_CNT_ADD(v_vnodepgsin, pgsin); | ||||
br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) | br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) | ||||
!= 0) ? GB_UNMAPPED : 0; | != 0) ? GB_UNMAPPED : 0; | ||||
VM_OBJECT_WLOCK(object); | |||||
again: | again: | ||||
for (i = 0; i < count; i++) | for (i = 0; i < count; i++) | ||||
vm_page_busy_downgrade(ma[i]); | vm_page_busy_downgrade(ma[i]); | ||||
VM_OBJECT_WUNLOCK(object); | |||||
lbnp = -1; | lbnp = -1; | ||||
for (i = 0; i < count; i++) { | for (i = 0; i < count; i++) { | ||||
m = ma[i]; | m = ma[i]; | ||||
/* | /* | ||||
* Pages are shared busy and the object lock is not | * Pages are shared busy and the object lock is not | ||||
* owned, which together allow for the pages' | * owned, which together allow for the pages' | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | for (; poff < poffe; poff += bsize) { | ||||
} else { | } else { | ||||
bqrelse(bp); | bqrelse(bp); | ||||
} | } | ||||
} | } | ||||
KASSERT(1 /* racy, enable for debugging */ || | KASSERT(1 /* racy, enable for debugging */ || | ||||
vm_page_all_valid(m) || i == count - 1, | vm_page_all_valid(m) || i == count - 1, | ||||
("buf %d %p invalid", i, m)); | ("buf %d %p invalid", i, m)); | ||||
if (i == count - 1 && lpart) { | if (i == count - 1 && lpart) { | ||||
VM_OBJECT_WLOCK(object); | |||||
if (!vm_page_none_valid(m) && | if (!vm_page_none_valid(m) && | ||||
!vm_page_all_valid(m)) | !vm_page_all_valid(m)) | ||||
vm_page_zero_invalid(m, TRUE); | vm_page_zero_invalid(m, TRUE); | ||||
VM_OBJECT_WUNLOCK(object); | |||||
} | } | ||||
next_page:; | next_page:; | ||||
} | } | ||||
end_pages: | end_pages: | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
redo = false; | redo = false; | ||||
for (i = 0; i < count; i++) { | for (i = 0; i < count; i++) { | ||||
Show All 11 Lines | for (i = 0; i < count; i++) { | ||||
* read loop, and partial validity for the page at | * read loop, and partial validity for the page at | ||||
* index count - 1 could mean that the page was | * index count - 1 could mean that the page was | ||||
* invalidated or removed, so we must restart for | * invalidated or removed, so we must restart for | ||||
* safety as well. | * safety as well. | ||||
*/ | */ | ||||
if (!vm_page_all_valid(ma[i])) | if (!vm_page_all_valid(ma[i])) | ||||
redo = true; | redo = true; | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(object); | |||||
if (redo && error == 0) | if (redo && error == 0) | ||||
goto again; | goto again; | ||||
VM_OBJECT_WUNLOCK(object); | |||||
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); | return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); | ||||
} | } | ||||
#include "opt_ddb.h" | #include "opt_ddb.h" | ||||
#ifdef DDB | #ifdef DDB | ||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
/* DDB command to show buffer data */ | /* DDB command to show buffer data */ | ||||
▲ Show 20 Lines • Show All 186 Lines • Show Last 20 Lines |
I deleted this condition because I didn't want to protect the flag with the object lock. However, I now believe a racy flag check is safe because we hold the busy lock on the constituent pages in these cases. I could re-institute it here although it does not seem like a significant burden to check dirty.