INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
node_start, node_last, static, rbt_ib_umem)
-static void ib_umem_notifier_start_account(struct ib_umem *item)
+static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
{
- mutex_lock(&item->odp_data->umem_mutex);
+ mutex_lock(&umem_odp->umem_mutex);
/* Only update private counters for this umem if it has them.
* Otherwise skip it. All page faults will be delayed for this umem. */
- if (item->odp_data->mn_counters_active) {
- int notifiers_count = item->odp_data->notifiers_count++;
+ if (umem_odp->mn_counters_active) {
+ int notifiers_count = umem_odp->notifiers_count++;
if (notifiers_count == 0)
/* Initialize the completion object for waiting on
* notifiers. Since notifier_count is zero, no one
* should be waiting right now. */
- reinit_completion(&item->odp_data->notifier_completion);
+ reinit_completion(&umem_odp->notifier_completion);
}
- mutex_unlock(&item->odp_data->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
}
-static void ib_umem_notifier_end_account(struct ib_umem *item)
+static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
{
- mutex_lock(&item->odp_data->umem_mutex);
+ mutex_lock(&umem_odp->umem_mutex);
/* Only update private counters for this umem if it has them.
* Otherwise skip it. All page faults will be delayed for this umem. */
- if (item->odp_data->mn_counters_active) {
+ if (umem_odp->mn_counters_active) {
/*
* This sequence increase will notify the QP page fault that
* the page that is going to be mapped in the spte could have
* been freed.
*/
- ++item->odp_data->notifiers_seq;
- if (--item->odp_data->notifiers_count == 0)
- complete_all(&item->odp_data->notifier_completion);
+ ++umem_odp->notifiers_seq;
+ if (--umem_odp->notifiers_count == 0)
+ complete_all(&umem_odp->notifier_completion);
}
- mutex_unlock(&item->odp_data->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
}
/* Account for a new mmu notifier in an ib_ucontext. */
}
}
-static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie) {
+static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
+ u64 start, u64 end, void *cookie)
+{
+ struct ib_umem *umem = umem_odp->umem;
+
/*
* Increase the number of notifiers running, to
* prevent any further fault handling on this MR.
*/
- ib_umem_notifier_start_account(item);
- item->odp_data->dying = 1;
+ ib_umem_notifier_start_account(umem_odp);
+ umem_odp->dying = 1;
/* Make sure that the fact the umem is dying is out before we release
* all pending page faults. */
smp_wmb();
- complete_all(&item->odp_data->notifier_completion);
- item->context->invalidate_range(item, ib_umem_start(item),
- ib_umem_end(item));
+ complete_all(&umem_odp->notifier_completion);
+ umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
+ ib_umem_end(umem));
return 0;
}
up_read(&context->umem_rwsem);
}
-static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
+static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie)
{
ib_umem_notifier_start_account(item);
- item->context->invalidate_range(item, start, start + PAGE_SIZE);
+ item->umem->context->invalidate_range(item, start, start + PAGE_SIZE);
ib_umem_notifier_end_account(item);
return 0;
}
-static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie)
+static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
+ u64 start, u64 end, void *cookie)
{
ib_umem_notifier_start_account(item);
- item->context->invalidate_range(item, start, end);
+ item->umem->context->invalidate_range(item, start, end);
return 0;
}
return ret;
}
-static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
+static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie)
{
ib_umem_notifier_end_account(item);
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size)
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr, size_t size)
{
struct ib_umem *umem;
struct ib_umem_odp *odp_data;
umem->odp_data = odp_data;
- return umem;
+ return odp_data;
out_page_list:
vfree(odp_data->page_list);
return ret_val;
}
-void ib_umem_odp_release(struct ib_umem *umem)
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
+ struct ib_umem *umem = umem_odp->umem;
struct ib_ucontext *context = umem->context;
/*
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
down_write(&context->umem_rwsem);
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_remove(&umem->odp_data->interval_tree,
+ rbt_ib_umem_remove(&umem_odp->interval_tree,
&context->umem_tree);
context->odp_mrs_count--;
- if (!umem->odp_data->mn_counters_active) {
- list_del(&umem->odp_data->no_private_counters);
- complete_all(&umem->odp_data->notifier_completion);
+ if (!umem_odp->mn_counters_active) {
+ list_del(&umem_odp->no_private_counters);
+ complete_all(&umem_odp->notifier_completion);
}
/*
out:
up_read(&context->umem_rwsem);
- vfree(umem->odp_data->dma_list);
- vfree(umem->odp_data->page_list);
- kfree(umem->odp_data);
+ vfree(umem_odp->dma_list);
+ vfree(umem_odp->page_list);
+ kfree(umem_odp);
kfree(umem);
}
* @access_mask: access permissions needed for this page.
* @current_seq: sequence number for synchronization with invalidations.
* the sequence number is taken from
- * umem->odp_data->notifiers_seq.
+ * umem_odp->notifiers_seq.
*
* The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page.
* umem.
*/
static int ib_umem_odp_map_dma_single_page(
- struct ib_umem *umem,
+ struct ib_umem_odp *umem_odp,
int page_index,
struct page *page,
u64 access_mask,
unsigned long current_seq)
{
+ struct ib_umem *umem = umem_odp->umem;
struct ib_device *dev = umem->context->device;
dma_addr_t dma_addr;
int stored_page = 0;
* handle case of a racing notifier. This check also allows us to bail
* early if we have a notifier running in parallel with us.
*/
- if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
+ if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
ret = -EAGAIN;
goto out;
}
- if (!(umem->odp_data->dma_list[page_index])) {
+ if (!(umem_odp->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev,
page,
0, BIT(umem->page_shift),
ret = -EFAULT;
goto out;
}
- umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
- umem->odp_data->page_list[page_index] = page;
+ umem_odp->dma_list[page_index] = dma_addr | access_mask;
+ umem_odp->page_list[page_index] = page;
umem->npages++;
stored_page = 1;
- } else if (umem->odp_data->page_list[page_index] == page) {
- umem->odp_data->dma_list[page_index] |= access_mask;
+ } else if (umem_odp->page_list[page_index] == page) {
+ umem_odp->dma_list[page_index] |= access_mask;
} else {
pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem->odp_data->page_list[page_index], page);
+ umem_odp->page_list[page_index], page);
/* Better remove the mapping now, to prevent any further
* damage. */
remove_existing_mapping = 1;
if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline(
- umem,
+ umem_odp,
ib_umem_start(umem) + (page_index >> umem->page_shift),
ib_umem_start(umem) + ((page_index + 1) >>
umem->page_shift),
*
* Pins the range of pages passed in the argument, and maps them to
* DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem->odp_data->dma_list.
+ * umem_odp->dma_list.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
* the function from completing its task.
* An -ENOENT error code indicates that userspace process is being terminated
* and mm was already destroyed.
- * @umem: the umem to map and pin
+ * @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
* bigger due to alignment, and may also be smaller in case of an error
* range.
* @current_seq: the MMU notifiers sequance value for synchronization with
* invalidations. the sequance number is read from
- * umem->odp_data->notifiers_seq before calling this function
+ * umem_odp->notifiers_seq before calling this function
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
- u64 access_mask, unsigned long current_seq)
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask,
+ unsigned long current_seq)
{
+ struct ib_umem *umem = umem_odp->umem;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
struct page **local_page_list = NULL;
break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- mutex_lock(&umem->odp_data->umem_mutex);
+ mutex_lock(&umem_odp->umem_mutex);
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
if (user_virt & ~page_mask) {
p += PAGE_SIZE;
}
ret = ib_umem_odp_map_dma_single_page(
- umem, k, local_page_list[j],
+ umem_odp, k, local_page_list[j],
access_mask, current_seq);
if (ret < 0)
break;
p = page_to_phys(local_page_list[j]);
k++;
}
- mutex_unlock(&umem->odp_data->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
if (ret < 0) {
/* Release left over pages when handling errors. */
}
EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
+ struct ib_umem *umem = umem_odp->umem;
int idx;
u64 addr;
struct ib_device *dev = umem->context->device;
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
- mutex_lock(&umem->odp_data->umem_mutex);
+ mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
- if (umem->odp_data->page_list[idx]) {
- struct page *page = umem->odp_data->page_list[idx];
- dma_addr_t dma = umem->odp_data->dma_list[idx];
+ if (umem_odp->page_list[idx]) {
+ struct page *page = umem_odp->page_list[idx];
+ dma_addr_t dma = umem_odp->dma_list[idx];
dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
WARN_ON(!dma_addr);
/* on demand pinning support */
if (!umem->context->invalidate_range)
put_page(page);
- umem->odp_data->page_list[idx] = NULL;
- umem->odp_data->dma_list[idx] = 0;
+ umem_odp->page_list[idx] = NULL;
+ umem_odp->dma_list[idx] = 0;
umem->npages--;
}
}
- mutex_unlock(&umem->odp_data->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
- ret_val = cb(umem->umem, start, last, cookie) || ret_val;
+ ret_val = cb(umem, start, last, cookie) || ret_val;
}
return ret_val;
wake_up(&imr->q_leaf_free);
}
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
unsigned long end)
{
struct mlx5_ib_mr *mr;
const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
+ struct ib_umem *umem;
int in_block = 0;
u64 addr;
- if (!umem || !umem->odp_data) {
+ if (!umem_odp) {
pr_err("invalidation called on NULL umem or non-ODP umem\n");
return;
}
+ umem = umem_odp->umem;
- mr = umem->odp_data->private;
+ mr = umem_odp->private;
if (!mr || !mr->ibmr.pd)
return;
* estimate the cost of another UMR vs. the cost of bigger
* UMR.
*/
- if (umem->odp_data->dma_list[idx] &
+ if (umem_odp->dma_list[idx] &
(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
if (!in_block) {
blk_start_idx = idx;
* needed.
*/
- ib_umem_odp_unmap_dma_pages(umem, start, end);
+ ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
if (unlikely(!umem->npages && mr->parent &&
- !umem->odp_data->dying)) {
- WRITE_ONCE(umem->odp_data->dying, 1);
+ !umem_odp->dying)) {
+ WRITE_ONCE(umem_odp->dying, 1);
atomic_inc(&mr->parent->num_leaf_free);
- schedule_work(&umem->odp_data->work);
+ schedule_work(&umem_odp->work);
}
}
u64 addr = io_virt & MLX5_IMR_MTT_MASK;
int nentries = 0, start_idx = 0, ret;
struct mlx5_ib_mr *mtt;
- struct ib_umem *umem;
mutex_lock(&mr->umem->odp_data->umem_mutex);
odp = odp_lookup(ctx, addr, 1, mr);
if (nentries)
nentries++;
} else {
- umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
- if (IS_ERR(umem)) {
+ odp = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
+ if (IS_ERR(odp)) {
mutex_unlock(&mr->umem->odp_data->umem_mutex);
- return ERR_CAST(umem);
+ return ERR_CAST(odp);
}
- mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+ mtt = implicit_mr_alloc(mr->ibmr.pd, odp->umem, 0,
+ mr->access_flags);
if (IS_ERR(mtt)) {
mutex_unlock(&mr->umem->odp_data->umem_mutex);
- ib_umem_release(umem);
+ ib_umem_release(odp->umem);
return ERR_CAST(mtt);
}
- odp = umem->odp_data;
odp->private = mtt;
- mtt->umem = umem;
+ mtt->umem = odp->umem;
mtt->mmkey.iova = addr;
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
return imr;
}
-static int mr_leaf_free(struct ib_umem *umem, u64 start,
- u64 end, void *cookie)
+static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
+ void *cookie)
{
- struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+ struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
+ struct ib_umem *umem = umem_odp->umem;
if (mr->parent != imr)
return 0;
- ib_umem_odp_unmap_dma_pages(umem,
- ib_umem_start(umem),
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem));
- if (umem->odp_data->dying)
+ if (umem_odp->dying)
return 0;
- WRITE_ONCE(umem->odp_data->dying, 1);
+ WRITE_ONCE(umem_odp->dying, 1);
atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem->odp_data->work);
+ schedule_work(&umem_odp->work);
return 0;
}
*/
smp_rmb();
- ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+ ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
access_mask, current_seq);
if (ret < 0)
np = ret;
mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
+ current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
struct work_struct work;
};
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+ return umem->odp_data;
+}
+
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
int access);
-struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size);
-
-void ib_umem_odp_release(struct ib_umem *umem);
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr, size_t size);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
/*
* The lower 2 bits of the DMA address signal the R/W permissions for
#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
- u64 access_mask, unsigned long current_seq);
+int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+ u64 bcnt, u64 access_mask,
+ unsigned long current_seq);
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
u64 bound);
-typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
void *cookie);
/*
* Call the callback on each ib_umem in the range. Returns the logical or of
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
u64 addr, u64 length);
-static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
unsigned long mmu_seq)
{
/*
* This code is strongly based on the KVM code from
* mmu_notifier_retry. Should be called with
- * the relevant locks taken (item->odp_data->umem_mutex
+ * the relevant locks taken (umem_odp->umem_mutex
* and the ucontext umem_mutex semaphore locked for read).
*/
/* Do not allow page faults while the new ib_umem hasn't seen a state
* with zero notifiers yet, and doesn't have its own valid set of
* private counters. */
- if (!item->odp_data->mn_counters_active)
+ if (!umem_odp->mn_counters_active)
return 1;
- if (unlikely(item->odp_data->notifiers_count))
+ if (unlikely(umem_odp->notifiers_count))
return 1;
- if (item->odp_data->notifiers_seq != mmu_seq)
+ if (umem_odp->notifiers_seq != mmu_seq)
return 1;
return 0;
}
return -EINVAL;
}
-static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
- unsigned long addr,
- size_t size)
+static inline struct ib_umem_odp *
+ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
{
return ERR_PTR(-EINVAL);
}
-static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */