Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_rlock.h @@ -78,6 +78,8 @@ locked_range_t *rangelock_enter(rangelock_t *, uint64_t, uint64_t, rangelock_type_t); +locked_range_t *rangelock_tryenter(rangelock_t *, + uint64_t, uint64_t, rangelock_type_t); void rangelock_exit(locked_range_t *); void rangelock_reduce(locked_range_t *, uint64_t, uint64_t); Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c @@ -136,10 +136,11 @@ } /* - * Check if a write lock can be grabbed, or wait and recheck until available. + * Check if a write lock can be grabbed. If not, fail immediately or sleep and + * recheck until available, depending on the value of the "nonblock" parameter. */ -static void -rangelock_enter_writer(rangelock_t *rl, locked_range_t *new) +static boolean_t +rangelock_enter_writer(rangelock_t *rl, locked_range_t *new, boolean_t nonblock) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *lr; @@ -169,7 +170,7 @@ */ if (avl_numnodes(tree) == 0) { avl_add(tree, new); - return; + return (B_TRUE); } /* @@ -190,8 +191,10 @@ goto wait; avl_insert(tree, new, where); - return; + return (B_TRUE); wait: + if (nonblock) + return (B_FALSE); if (!lr->lr_write_wanted) { cv_init(&lr->lr_write_cv, NULL, CV_DEFAULT, NULL); lr->lr_write_wanted = B_TRUE; @@ -373,10 +376,11 @@ } /* - * Check if a reader lock can be grabbed, or wait and recheck until available. + * Check if a reader lock can be grabbed. If not, fail immediately or sleep and + * recheck until available, depending on the value of the "nonblock" parameter. */ -static void -rangelock_enter_reader(rangelock_t *rl, locked_range_t *new) +static boolean_t +rangelock_enter_reader(rangelock_t *rl, locked_range_t *new, boolean_t nonblock) { avl_tree_t *tree = &rl->rl_tree; locked_range_t *prev, *next; @@ -397,6 +401,8 @@ */ if (prev && (off < prev->lr_offset + prev->lr_length)) { if ((prev->lr_type == RL_WRITER) || (prev->lr_write_wanted)) { + if (nonblock) + return (B_FALSE); if (!prev->lr_read_wanted) { cv_init(&prev->lr_read_cv, NULL, CV_DEFAULT, NULL); @@ -421,6 +427,8 @@ if (off + len <= next->lr_offset) goto got_lock; if ((next->lr_type == RL_WRITER) || (next->lr_write_wanted)) { + if (nonblock) + return (B_FALSE); if (!next->lr_read_wanted) { cv_init(&next->lr_read_cv, NULL, CV_DEFAULT, NULL); @@ -439,6 +447,7 @@ * locks and bumping ref counts (r_count). */ rangelock_add_reader(tree, new, prev, where); + return (B_TRUE); } /* @@ -448,13 +457,13 @@ * the range lock structure for later unlocking (or reduce range if the * entire file is locked as RL_WRITER). */ -locked_range_t * -rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, - rangelock_type_t type) +static locked_range_t * +_rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, + rangelock_type_t type, boolean_t nonblock) { ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - locked_range_t *new = kmem_alloc(sizeof (locked_range_t), KM_SLEEP); + locked_range_t *new = kmem_alloc(sizeof (*new), KM_SLEEP); new->lr_rangelock = rl; new->lr_offset = off; if (len + off < off) /* overflow */ @@ -471,14 +480,32 @@ /* * First check for the usual case of no locks */ - if (avl_numnodes(&rl->rl_tree) == 0) + if (avl_numnodes(&rl->rl_tree) == 0) { avl_add(&rl->rl_tree, new); - else - rangelock_enter_reader(rl, new); - } else - rangelock_enter_writer(rl, new); /* RL_WRITER or RL_APPEND */ + } else if (!rangelock_enter_reader(rl, new, nonblock)) { + kmem_free(new, sizeof (*new)); + new = NULL; + } + } else if (!rangelock_enter_writer(rl, new, nonblock)) { + kmem_free(new, sizeof (*new)); + new = NULL; + } mutex_exit(&rl->rl_lock); return (new); +} + +locked_range_t * +rangelock_enter(rangelock_t *rl, uint64_t off, uint64_t len, + rangelock_type_t type) +{ + return (_rangelock_enter(rl, off, len, type, B_FALSE)); +} + +locked_range_t * +rangelock_tryenter(rangelock_t *rl, uint64_t off, uint64_t len, + rangelock_type_t type) +{ + return (_rangelock_enter(rl, off, len, type, B_TRUE)); } /* Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -4498,13 +4498,27 @@ end = IDX_TO_OFF(ma[count - 1]->pindex + 1); /* - * Lock a range covering all required and optional pages. - * Note that we need to handle the case of the block size growing. + * Try to lock a range covering all required and optional pages, to + * handle the case of the block size growing. It is not safe to block + * on the range lock since the owner may be waiting for the fault page + * to be unbusied. */ for (;;) { blksz = zp->z_blksz; - lr = rangelock_enter(&zp->z_rangelock, rounddown(start, blksz), + lr = rangelock_tryenter(&zp->z_rangelock, + rounddown(start, blksz), roundup(end, blksz) - rounddown(start, blksz), RL_READER); + if (lr == NULL) { + if (rahead != NULL) { + *rahead = 0; + rahead = NULL; + } + if (rbehind != NULL) { + *rbehind = 0; + rbehind = NULL; + } + break; + } if (blksz == zp->z_blksz) break; rangelock_exit(lr); @@ -4515,7 +4529,8 @@ obj_size = object->un_pager.vnp.vnp_size; zfs_vmobject_wunlock(object); if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { - rangelock_exit(lr); + if (lr != NULL) + rangelock_exit(lr); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_bad); } @@ -4543,7 +4558,8 @@ error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE)); - rangelock_exit(lr); + if (lr != NULL) + rangelock_exit(lr); ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs);