Changeset View
Changeset View
Standalone View
Standalone View
sys/ofed/drivers/infiniband/core/ib_core_uverbs.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 | |||||
* | |||||
* Copyright (c) 2005 Mellanox Technologies. All rights reserved. | |||||
* Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. | |||||
* Copyright 2019 Marvell. All rights reserved. | |||||
* | |||||
* This software is available to you under a choice of one of two | |||||
* licenses. You may choose to be licensed under the terms of the GNU | |||||
* General Public License (GPL) Version 2, available from the file | |||||
* COPYING in the main directory of this source tree, or the | |||||
* OpenIB.org BSD license below: | |||||
* | |||||
* Redistribution and use in source and binary forms, with or | |||||
* without modification, are permitted provided that the following | |||||
* conditions are met: | |||||
* | |||||
* - Redistributions of source code must retain the above | |||||
* copyright notice, this list of conditions and the following | |||||
* disclaimer. | |||||
* | |||||
* - Redistributions in binary form must reproduce the above | |||||
* copyright notice, this list of conditions and the following | |||||
* disclaimer in the documentation and/or other materials | |||||
* provided with the distribution. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
* SOFTWARE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <linux/xarray.h> | |||||
#include "uverbs.h" | |||||
#include "core_priv.h" | |||||
/** | |||||
* rdma_umap_priv_init() - Initialize the private data of a vma | |||||
* | |||||
* @priv: The already allocated private data | |||||
* @vma: The vm area struct that needs private data | |||||
* @entry: entry into the mmap_xa that needs to be linked with | |||||
* this vma | |||||
* | |||||
* Each time we map IO memory into user space this keeps track of the | |||||
* mapping. When the device is hot-unplugged we 'zap' the mmaps in user space | |||||
* to point to the zero page and allow the hot unplug to proceed. | |||||
* | |||||
* This is necessary for cases like PCI physical hot unplug as the actual BAR | |||||
* memory may vanish after this and access to it from userspace could MCE. | |||||
* | |||||
* RDMA drivers supporting disassociation must have their user space designed | |||||
* to cope in some way with their IO pages going to the zero page. | |||||
* | |||||
*/ | |||||
void rdma_umap_priv_init(struct rdma_umap_priv *priv, | |||||
struct vm_area_struct *vma, | |||||
struct rdma_user_mmap_entry *entry) | |||||
{ | |||||
struct ib_uverbs_file *ufile = vma->vm_file->private_data; | |||||
priv->vma = vma; | |||||
if (entry) { | |||||
kref_get(&entry->ref); | |||||
priv->entry = entry; | |||||
} | |||||
vma->vm_private_data = priv; | |||||
/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ | |||||
mutex_lock(&ufile->umap_lock); | |||||
list_add(&priv->list, &ufile->umaps); | |||||
mutex_unlock(&ufile->umap_lock); | |||||
} | |||||
EXPORT_SYMBOL(rdma_umap_priv_init); | |||||
/** | |||||
* rdma_user_mmap_io() - Map IO memory into a process | |||||
* | |||||
* @ucontext: associated user context | |||||
* @vma: the vma related to the current mmap call | |||||
* @pfn: pfn to map | |||||
* @size: size to map | |||||
* @prot: pgprot to use in remap call | |||||
* @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL | |||||
* if mmap_entry is not used by the driver | |||||
* | |||||
* This is to be called by drivers as part of their mmap() functions if they | |||||
* wish to send something like PCI-E BAR memory to userspace. | |||||
* | |||||
* Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on | |||||
* success. | |||||
*/ | |||||
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, | |||||
unsigned long pfn, unsigned long size, pgprot_t prot, | |||||
struct rdma_user_mmap_entry *entry) | |||||
{ | |||||
struct ib_uverbs_file *ufile = ucontext->ufile; | |||||
struct rdma_umap_priv *priv; | |||||
if (!(vma->vm_flags & VM_SHARED)) | |||||
return -EINVAL; | |||||
if (vma->vm_end - vma->vm_start != size) | |||||
return -EINVAL; | |||||
/* Driver is using this wrong, must be called by ib_uverbs_mmap */ | |||||
if (WARN_ON(!vma->vm_file || | |||||
vma->vm_file->private_data != ufile)) | |||||
return -EINVAL; | |||||
priv = kzalloc(sizeof(*priv), GFP_KERNEL); | |||||
if (!priv) | |||||
return -ENOMEM; | |||||
vma->vm_page_prot = prot; | |||||
if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { | |||||
kfree(priv); | |||||
return -EAGAIN; | |||||
} | |||||
rdma_umap_priv_init(priv, vma, entry); | |||||
return 0; | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_io); | |||||
/** | |||||
* rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa | |||||
* | |||||
* @ucontext: associated user context | |||||
* @pgoff: The mmap offset >> PAGE_SHIFT | |||||
* | |||||
* This function is called when a user tries to mmap with an offset (returned | |||||
* by rdma_user_mmap_get_offset()) it initially received from the driver. The | |||||
* rdma_user_mmap_entry was created by the function | |||||
* rdma_user_mmap_entry_insert(). This function increases the refcnt of the | |||||
* entry so that it won't be deleted from the xarray in the meantime. | |||||
* | |||||
* Return an reference to an entry if exists or NULL if there is no | |||||
* match. rdma_user_mmap_entry_put() must be called to put the reference. | |||||
*/ | |||||
struct rdma_user_mmap_entry * | |||||
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, | |||||
unsigned long pgoff) | |||||
{ | |||||
struct rdma_user_mmap_entry *entry; | |||||
if (pgoff > U32_MAX) | |||||
return NULL; | |||||
xa_lock(&ucontext->mmap_xa); | |||||
entry = xa_load(&ucontext->mmap_xa, pgoff); | |||||
/* | |||||
* If refcount is zero, entry is already being deleted, driver_removed | |||||
* indicates that the no further mmaps are possible and we waiting for | |||||
* the active VMAs to be closed. | |||||
*/ | |||||
if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || | |||||
!kref_get_unless_zero(&entry->ref)) | |||||
goto err; | |||||
xa_unlock(&ucontext->mmap_xa); | |||||
return entry; | |||||
err: | |||||
xa_unlock(&ucontext->mmap_xa); | |||||
return NULL; | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); | |||||
/** | |||||
* rdma_user_mmap_entry_get() - Get an entry from the mmap_xa | |||||
* | |||||
* @ucontext: associated user context | |||||
* @vma: the vma being mmap'd into | |||||
* | |||||
* This function is like rdma_user_mmap_entry_get_pgoff() except that it also | |||||
* checks that the VMA is correct. | |||||
*/ | |||||
struct rdma_user_mmap_entry * | |||||
rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, | |||||
struct vm_area_struct *vma) | |||||
{ | |||||
struct rdma_user_mmap_entry *entry; | |||||
if (!(vma->vm_flags & VM_SHARED)) | |||||
return NULL; | |||||
entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); | |||||
if (!entry) | |||||
return NULL; | |||||
if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { | |||||
rdma_user_mmap_entry_put(entry); | |||||
return NULL; | |||||
} | |||||
return entry; | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_entry_get); | |||||
static void rdma_user_mmap_entry_free(struct kref *kref) | |||||
{ | |||||
struct rdma_user_mmap_entry *entry = | |||||
container_of(kref, struct rdma_user_mmap_entry, ref); | |||||
struct ib_ucontext *ucontext = entry->ucontext; | |||||
unsigned long i; | |||||
/* | |||||
* Erase all entries occupied by this single entry, this is deferred | |||||
* until all VMA are closed so that the mmap offsets remain unique. | |||||
*/ | |||||
xa_lock(&ucontext->mmap_xa); | |||||
for (i = 0; i < entry->npages; i++) | |||||
__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); | |||||
xa_unlock(&ucontext->mmap_xa); | |||||
if (ucontext->device->mmap_free) | |||||
ucontext->device->mmap_free(entry); | |||||
} | |||||
/** | |||||
* rdma_user_mmap_entry_put() - Drop reference to the mmap entry | |||||
* | |||||
* @entry: an entry in the mmap_xa | |||||
* | |||||
* This function is called when the mapping is closed if it was | |||||
* an io mapping or when the driver is done with the entry for | |||||
* some other reason. | |||||
* Should be called after rdma_user_mmap_entry_get was called | |||||
* and entry is no longer needed. This function will erase the | |||||
* entry and free it if its refcnt reaches zero. | |||||
*/ | |||||
void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) | |||||
{ | |||||
kref_put(&entry->ref, rdma_user_mmap_entry_free); | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_entry_put); | |||||
/** | |||||
* rdma_user_mmap_entry_remove() - Drop reference to entry and | |||||
* mark it as unmmapable | |||||
* | |||||
* @entry: the entry to insert into the mmap_xa | |||||
* | |||||
* Drivers can call this to prevent userspace from creating more mappings for | |||||
* entry, however existing mmaps continue to exist and ops->mmap_free() will | |||||
* not be called until all user mmaps are destroyed. | |||||
*/ | |||||
void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) | |||||
{ | |||||
if (!entry) | |||||
return; | |||||
xa_lock(&entry->ucontext->mmap_xa); | |||||
entry->driver_removed = true; | |||||
xa_unlock(&entry->ucontext->mmap_xa); | |||||
kref_put(&entry->ref, rdma_user_mmap_entry_free); | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_entry_remove); | |||||
/** | |||||
* rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa | |||||
* in a given range. | |||||
* | |||||
* @ucontext: associated user context. | |||||
* @entry: the entry to insert into the mmap_xa | |||||
* @length: length of the address that will be mmapped | |||||
* @min_pgoff: minimum pgoff to be returned | |||||
* @max_pgoff: maximum pgoff to be returned | |||||
* | |||||
* This function should be called by drivers that use the rdma_user_mmap | |||||
* interface for implementing their mmap syscall A database of mmap offsets is | |||||
* handled in the core and helper functions are provided to insert entries | |||||
* into the database and extract entries when the user calls mmap with the | |||||
* given offset. The function allocates a unique page offset in a given range | |||||
* that should be provided to user, the user will use the offset to retrieve | |||||
* information such as address to be mapped and how. | |||||
* | |||||
* Return: 0 on success and -ENOMEM on failure | |||||
*/ | |||||
int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, | |||||
struct rdma_user_mmap_entry *entry, | |||||
size_t length, u32 min_pgoff, | |||||
u32 max_pgoff) | |||||
{ | |||||
struct ib_uverbs_file *ufile = ucontext->ufile; | |||||
u32 xa_first, xa_last, npages; | |||||
int err; | |||||
u32 i; | |||||
u32 j; | |||||
if (!entry) | |||||
return -EINVAL; | |||||
kref_init(&entry->ref); | |||||
entry->ucontext = ucontext; | |||||
/* | |||||
* We want the whole allocation to be done without interruption from a | |||||
* different thread. The allocation requires finding a free range and | |||||
* storing. During the xa_insert the lock could be released, possibly | |||||
* allowing another thread to choose the same range. | |||||
*/ | |||||
mutex_lock(&ufile->umap_lock); | |||||
xa_lock(&ucontext->mmap_xa); | |||||
/* We want to find an empty range */ | |||||
npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); | |||||
entry->npages = npages; | |||||
/* Find an empty range */ | |||||
for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) { | |||||
if (xa_load(&ucontext->mmap_xa, i + j) != NULL) { | |||||
if (unlikely(i + j == max_pgoff)) | |||||
break; | |||||
i = i + j + 1; | |||||
j = 0; | |||||
} else { | |||||
if (unlikely(i + j == max_pgoff)) | |||||
break; | |||||
j++; | |||||
} | |||||
} | |||||
if (j != npages) | |||||
goto err_unlock; | |||||
xa_first = i; | |||||
xa_last = i + j; | |||||
for (i = xa_first; i < xa_last; i++) { | |||||
err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); | |||||
if (err) | |||||
goto err_undo; | |||||
} | |||||
/* | |||||
* Internally the kernel uses a page offset, in libc this is a byte | |||||
* offset. Drivers should not return pgoff to userspace. | |||||
*/ | |||||
entry->start_pgoff = xa_first; | |||||
xa_unlock(&ucontext->mmap_xa); | |||||
mutex_unlock(&ufile->umap_lock); | |||||
return 0; | |||||
err_undo: | |||||
for (; i > xa_first; i--) | |||||
__xa_erase(&ucontext->mmap_xa, i - 1); | |||||
err_unlock: | |||||
xa_unlock(&ucontext->mmap_xa); | |||||
mutex_unlock(&ufile->umap_lock); | |||||
return -ENOMEM; | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); | |||||
/** | |||||
* rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. | |||||
* | |||||
* @ucontext: associated user context. | |||||
* @entry: the entry to insert into the mmap_xa | |||||
* @length: length of the address that will be mmapped | |||||
* | |||||
* This function should be called by drivers that use the rdma_user_mmap | |||||
* interface for handling user mmapped addresses. The database is handled in | |||||
* the core and helper functions are provided to insert entries into the | |||||
* database and extract entries when the user calls mmap with the given offset. | |||||
* The function allocates a unique page offset that should be provided to user, | |||||
* the user will use the offset to retrieve information such as address to | |||||
* be mapped and how. | |||||
* | |||||
* Return: 0 on success and -ENOMEM on failure | |||||
*/ | |||||
int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, | |||||
struct rdma_user_mmap_entry *entry, | |||||
size_t length) | |||||
{ | |||||
return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, | |||||
U32_MAX); | |||||
} | |||||
EXPORT_SYMBOL(rdma_user_mmap_entry_insert); |