]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/infiniband/core/nldev.c
783e465e7c412988903088911fa655c4aa169346
[linux.git] / drivers / infiniband / core / nldev.c
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44
45 /*
46  * Sort array elements by the netlink attribute name
47  */
48 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
49         [RDMA_NLDEV_ATTR_CHARDEV]               = { .type = NLA_U64 },
50         [RDMA_NLDEV_ATTR_CHARDEV_ABI]           = { .type = NLA_U64 },
51         [RDMA_NLDEV_ATTR_CHARDEV_NAME]          = { .type = NLA_NUL_STRING,
52                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
53         [RDMA_NLDEV_ATTR_CHARDEV_TYPE]          = { .type = NLA_NUL_STRING,
54                                         .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
55         [RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
56         [RDMA_NLDEV_ATTR_DEV_INDEX]             = { .type = NLA_U32 },
57         [RDMA_NLDEV_ATTR_DEV_NAME]              = { .type = NLA_NUL_STRING,
58                                         .len = IB_DEVICE_NAME_MAX },
59         [RDMA_NLDEV_ATTR_DEV_NODE_TYPE]         = { .type = NLA_U8 },
60         [RDMA_NLDEV_ATTR_DEV_PROTOCOL]          = { .type = NLA_NUL_STRING,
61                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
62         [RDMA_NLDEV_ATTR_DRIVER]                = { .type = NLA_NESTED },
63         [RDMA_NLDEV_ATTR_DRIVER_ENTRY]          = { .type = NLA_NESTED },
64         [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]     = { .type = NLA_U8 },
65         [RDMA_NLDEV_ATTR_DRIVER_STRING]         = { .type = NLA_NUL_STRING,
66                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
67         [RDMA_NLDEV_ATTR_DRIVER_S32]            = { .type = NLA_S32 },
68         [RDMA_NLDEV_ATTR_DRIVER_S64]            = { .type = NLA_S64 },
69         [RDMA_NLDEV_ATTR_DRIVER_U32]            = { .type = NLA_U32 },
70         [RDMA_NLDEV_ATTR_DRIVER_U64]            = { .type = NLA_U64 },
71         [RDMA_NLDEV_ATTR_FW_VERSION]            = { .type = NLA_NUL_STRING,
72                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73         [RDMA_NLDEV_ATTR_LID]                   = { .type = NLA_U32 },
74         [RDMA_NLDEV_ATTR_LINK_TYPE]             = { .type = NLA_NUL_STRING,
75                                         .len = IFNAMSIZ },
76         [RDMA_NLDEV_ATTR_LMC]                   = { .type = NLA_U8 },
77         [RDMA_NLDEV_ATTR_NDEV_INDEX]            = { .type = NLA_U32 },
78         [RDMA_NLDEV_ATTR_NDEV_NAME]             = { .type = NLA_NUL_STRING,
79                                         .len = IFNAMSIZ },
80         [RDMA_NLDEV_ATTR_NODE_GUID]             = { .type = NLA_U64 },
81         [RDMA_NLDEV_ATTR_PORT_INDEX]            = { .type = NLA_U32 },
82         [RDMA_NLDEV_ATTR_PORT_PHYS_STATE]       = { .type = NLA_U8 },
83         [RDMA_NLDEV_ATTR_PORT_STATE]            = { .type = NLA_U8 },
84         [RDMA_NLDEV_ATTR_RES_CM_ID]             = { .type = NLA_NESTED },
85         [RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
86         [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]       = { .type = NLA_NESTED },
87         [RDMA_NLDEV_ATTR_RES_CQ]                = { .type = NLA_NESTED },
88         [RDMA_NLDEV_ATTR_RES_CQE]               = { .type = NLA_U32 },
89         [RDMA_NLDEV_ATTR_RES_CQN]               = { .type = NLA_U32 },
90         [RDMA_NLDEV_ATTR_RES_CQ_ENTRY]          = { .type = NLA_NESTED },
91         [RDMA_NLDEV_ATTR_RES_CTXN]              = { .type = NLA_U32 },
92         [RDMA_NLDEV_ATTR_RES_DST_ADDR]          = {
93                         .len = sizeof(struct __kernel_sockaddr_storage) },
94         [RDMA_NLDEV_ATTR_RES_IOVA]              = { .type = NLA_U64 },
95         [RDMA_NLDEV_ATTR_RES_KERN_NAME]         = { .type = NLA_NUL_STRING,
96                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
97         [RDMA_NLDEV_ATTR_RES_LKEY]              = { .type = NLA_U32 },
98         [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]    = { .type = NLA_U32 },
99         [RDMA_NLDEV_ATTR_RES_LQPN]              = { .type = NLA_U32 },
100         [RDMA_NLDEV_ATTR_RES_MR]                = { .type = NLA_NESTED },
101         [RDMA_NLDEV_ATTR_RES_MRLEN]             = { .type = NLA_U64 },
102         [RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
103         [RDMA_NLDEV_ATTR_RES_MR_ENTRY]          = { .type = NLA_NESTED },
104         [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]    = { .type = NLA_U8 },
105         [RDMA_NLDEV_ATTR_RES_PD]                = { .type = NLA_NESTED },
106         [RDMA_NLDEV_ATTR_RES_PDN]               = { .type = NLA_U32 },
107         [RDMA_NLDEV_ATTR_RES_PD_ENTRY]          = { .type = NLA_NESTED },
108         [RDMA_NLDEV_ATTR_RES_PID]               = { .type = NLA_U32 },
109         [RDMA_NLDEV_ATTR_RES_POLL_CTX]          = { .type = NLA_U8 },
110         [RDMA_NLDEV_ATTR_RES_PS]                = { .type = NLA_U32 },
111         [RDMA_NLDEV_ATTR_RES_QP]                = { .type = NLA_NESTED },
112         [RDMA_NLDEV_ATTR_RES_QP_ENTRY]          = { .type = NLA_NESTED },
113         [RDMA_NLDEV_ATTR_RES_RKEY]              = { .type = NLA_U32 },
114         [RDMA_NLDEV_ATTR_RES_RQPN]              = { .type = NLA_U32 },
115         [RDMA_NLDEV_ATTR_RES_RQ_PSN]            = { .type = NLA_U32 },
116         [RDMA_NLDEV_ATTR_RES_SQ_PSN]            = { .type = NLA_U32 },
117         [RDMA_NLDEV_ATTR_RES_SRC_ADDR]          = {
118                         .len = sizeof(struct __kernel_sockaddr_storage) },
119         [RDMA_NLDEV_ATTR_RES_STATE]             = { .type = NLA_U8 },
120         [RDMA_NLDEV_ATTR_RES_SUMMARY]           = { .type = NLA_NESTED },
121         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]     = { .type = NLA_NESTED },
122         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
123         [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
124                                         .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
125         [RDMA_NLDEV_ATTR_RES_TYPE]              = { .type = NLA_U8 },
126         [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
127         [RDMA_NLDEV_ATTR_RES_USECNT]            = { .type = NLA_U64 },
128         [RDMA_NLDEV_ATTR_SM_LID]                = { .type = NLA_U32 },
129         [RDMA_NLDEV_ATTR_SUBNET_PREFIX]         = { .type = NLA_U64 },
130         [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]   = { .type = NLA_U32 },
131         [RDMA_NLDEV_ATTR_STAT_MODE]             = { .type = NLA_U32 },
132         [RDMA_NLDEV_ATTR_STAT_RES]              = { .type = NLA_U32 },
133         [RDMA_NLDEV_ATTR_STAT_COUNTER]          = { .type = NLA_NESTED },
134         [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]    = { .type = NLA_NESTED },
135         [RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
136         [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
137         [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
138         [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
139         [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
140         [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]        = { .type = NLA_U64 },
141         [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]      = { .type = NLA_U32 },
142         [RDMA_NLDEV_NET_NS_FD]                  = { .type = NLA_U32 },
143         [RDMA_NLDEV_SYS_ATTR_NETNS_MODE]        = { .type = NLA_U8 },
144 };
145
146 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
147                                       enum rdma_nldev_print_type print_type)
148 {
149         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
150                 return -EMSGSIZE;
151         if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
152             nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
153                 return -EMSGSIZE;
154
155         return 0;
156 }
157
158 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
159                                    enum rdma_nldev_print_type print_type,
160                                    u32 value)
161 {
162         if (put_driver_name_print_type(msg, name, print_type))
163                 return -EMSGSIZE;
164         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
165                 return -EMSGSIZE;
166
167         return 0;
168 }
169
170 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
171                                    enum rdma_nldev_print_type print_type,
172                                    u64 value)
173 {
174         if (put_driver_name_print_type(msg, name, print_type))
175                 return -EMSGSIZE;
176         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
177                               RDMA_NLDEV_ATTR_PAD))
178                 return -EMSGSIZE;
179
180         return 0;
181 }
182
183 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
184 {
185         return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
186                                        value);
187 }
188 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
189
190 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
191                                u32 value)
192 {
193         return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
194                                        value);
195 }
196 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
197
198 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
199 {
200         return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
201                                        value);
202 }
203 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
204
205 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
206 {
207         return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
208                                        value);
209 }
210 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
211
212 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
213 {
214         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
215                 return -EMSGSIZE;
216         if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
217                            dev_name(&device->dev)))
218                 return -EMSGSIZE;
219
220         return 0;
221 }
222
223 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
224 {
225         char fw[IB_FW_VERSION_NAME_MAX];
226         int ret = 0;
227         u8 port;
228
229         if (fill_nldev_handle(msg, device))
230                 return -EMSGSIZE;
231
232         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
233                 return -EMSGSIZE;
234
235         BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
236         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
237                               device->attrs.device_cap_flags,
238                               RDMA_NLDEV_ATTR_PAD))
239                 return -EMSGSIZE;
240
241         ib_get_device_fw_str(device, fw);
242         /* Device without FW has strlen(fw) = 0 */
243         if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
244                 return -EMSGSIZE;
245
246         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
247                               be64_to_cpu(device->node_guid),
248                               RDMA_NLDEV_ATTR_PAD))
249                 return -EMSGSIZE;
250         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
251                               be64_to_cpu(device->attrs.sys_image_guid),
252                               RDMA_NLDEV_ATTR_PAD))
253                 return -EMSGSIZE;
254         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
255                 return -EMSGSIZE;
256         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
257                 return -EMSGSIZE;
258
259         /*
260          * Link type is determined on first port and mlx4 device
261          * which can potentially have two different link type for the same
262          * IB device is considered as better to be avoided in the future,
263          */
264         port = rdma_start_port(device);
265         if (rdma_cap_opa_mad(device, port))
266                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
267         else if (rdma_protocol_ib(device, port))
268                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
269         else if (rdma_protocol_iwarp(device, port))
270                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
271         else if (rdma_protocol_roce(device, port))
272                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
273         else if (rdma_protocol_usnic(device, port))
274                 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
275                                      "usnic");
276         return ret;
277 }
278
279 static int fill_port_info(struct sk_buff *msg,
280                           struct ib_device *device, u32 port,
281                           const struct net *net)
282 {
283         struct net_device *netdev = NULL;
284         struct ib_port_attr attr;
285         int ret;
286         u64 cap_flags = 0;
287
288         if (fill_nldev_handle(msg, device))
289                 return -EMSGSIZE;
290
291         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
292                 return -EMSGSIZE;
293
294         ret = ib_query_port(device, port, &attr);
295         if (ret)
296                 return ret;
297
298         if (rdma_protocol_ib(device, port)) {
299                 BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
300                                 sizeof(attr.port_cap_flags2)) > sizeof(u64));
301                 cap_flags = attr.port_cap_flags |
302                         ((u64)attr.port_cap_flags2 << 32);
303                 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
304                                       cap_flags, RDMA_NLDEV_ATTR_PAD))
305                         return -EMSGSIZE;
306                 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
307                                       attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
308                         return -EMSGSIZE;
309                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
310                         return -EMSGSIZE;
311                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
312                         return -EMSGSIZE;
313                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
314                         return -EMSGSIZE;
315         }
316         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
317                 return -EMSGSIZE;
318         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
319                 return -EMSGSIZE;
320
321         netdev = ib_device_get_netdev(device, port);
322         if (netdev && net_eq(dev_net(netdev), net)) {
323                 ret = nla_put_u32(msg,
324                                   RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
325                 if (ret)
326                         goto out;
327                 ret = nla_put_string(msg,
328                                      RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
329         }
330
331 out:
332         if (netdev)
333                 dev_put(netdev);
334         return ret;
335 }
336
337 static int fill_res_info_entry(struct sk_buff *msg,
338                                const char *name, u64 curr)
339 {
340         struct nlattr *entry_attr;
341
342         entry_attr = nla_nest_start_noflag(msg,
343                                            RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
344         if (!entry_attr)
345                 return -EMSGSIZE;
346
347         if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
348                 goto err;
349         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
350                               RDMA_NLDEV_ATTR_PAD))
351                 goto err;
352
353         nla_nest_end(msg, entry_attr);
354         return 0;
355
356 err:
357         nla_nest_cancel(msg, entry_attr);
358         return -EMSGSIZE;
359 }
360
361 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
362 {
363         static const char * const names[RDMA_RESTRACK_MAX] = {
364                 [RDMA_RESTRACK_PD] = "pd",
365                 [RDMA_RESTRACK_CQ] = "cq",
366                 [RDMA_RESTRACK_QP] = "qp",
367                 [RDMA_RESTRACK_CM_ID] = "cm_id",
368                 [RDMA_RESTRACK_MR] = "mr",
369                 [RDMA_RESTRACK_CTX] = "ctx",
370         };
371
372         struct nlattr *table_attr;
373         int ret, i, curr;
374
375         if (fill_nldev_handle(msg, device))
376                 return -EMSGSIZE;
377
378         table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
379         if (!table_attr)
380                 return -EMSGSIZE;
381
382         for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
383                 if (!names[i])
384                         continue;
385                 curr = rdma_restrack_count(device, i,
386                                            task_active_pid_ns(current));
387                 ret = fill_res_info_entry(msg, names[i], curr);
388                 if (ret)
389                         goto err;
390         }
391
392         nla_nest_end(msg, table_attr);
393         return 0;
394
395 err:
396         nla_nest_cancel(msg, table_attr);
397         return ret;
398 }
399
400 static int fill_res_name_pid(struct sk_buff *msg,
401                              struct rdma_restrack_entry *res)
402 {
403         /*
404          * For user resources, user is should read /proc/PID/comm to get the
405          * name of the task file.
406          */
407         if (rdma_is_kernel_res(res)) {
408                 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
409                     res->kern_name))
410                         return -EMSGSIZE;
411         } else {
412                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
413                     task_pid_vnr(res->task)))
414                         return -EMSGSIZE;
415         }
416         return 0;
417 }
418
419 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
420                            struct rdma_restrack_entry *res)
421 {
422         if (!dev->ops.fill_res_entry)
423                 return false;
424         return dev->ops.fill_res_entry(msg, res);
425 }
426
427 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
428                              struct rdma_restrack_entry *res, uint32_t port)
429 {
430         struct ib_qp *qp = container_of(res, struct ib_qp, res);
431         struct ib_device *dev = qp->device;
432         struct ib_qp_init_attr qp_init_attr;
433         struct ib_qp_attr qp_attr;
434         int ret;
435
436         ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
437         if (ret)
438                 return ret;
439
440         if (port && port != qp_attr.port_num)
441                 return -EAGAIN;
442
443         /* In create_qp() port is not set yet */
444         if (qp_attr.port_num &&
445             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
446                 goto err;
447
448         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
449                 goto err;
450         if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
451                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
452                                 qp_attr.dest_qp_num))
453                         goto err;
454                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
455                                 qp_attr.rq_psn))
456                         goto err;
457         }
458
459         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
460                 goto err;
461
462         if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
463             qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
464                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
465                                qp_attr.path_mig_state))
466                         goto err;
467         }
468         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
469                 goto err;
470         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
471                 goto err;
472
473         if (!rdma_is_kernel_res(res) &&
474             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
475                 goto err;
476
477         if (fill_res_name_pid(msg, res))
478                 goto err;
479
480         if (fill_res_entry(dev, msg, res))
481                 goto err;
482
483         return 0;
484
485 err:    return -EMSGSIZE;
486 }
487
488 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
489                                 struct rdma_restrack_entry *res, uint32_t port)
490 {
491         struct rdma_id_private *id_priv =
492                                 container_of(res, struct rdma_id_private, res);
493         struct ib_device *dev = id_priv->id.device;
494         struct rdma_cm_id *cm_id = &id_priv->id;
495
496         if (port && port != cm_id->port_num)
497                 return 0;
498
499         if (cm_id->port_num &&
500             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
501                 goto err;
502
503         if (id_priv->qp_num) {
504                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
505                         goto err;
506                 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
507                         goto err;
508         }
509
510         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
511                 goto err;
512
513         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
514                 goto err;
515
516         if (cm_id->route.addr.src_addr.ss_family &&
517             nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
518                     sizeof(cm_id->route.addr.src_addr),
519                     &cm_id->route.addr.src_addr))
520                 goto err;
521         if (cm_id->route.addr.dst_addr.ss_family &&
522             nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
523                     sizeof(cm_id->route.addr.dst_addr),
524                     &cm_id->route.addr.dst_addr))
525                 goto err;
526
527         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
528                 goto err;
529
530         if (fill_res_name_pid(msg, res))
531                 goto err;
532
533         if (fill_res_entry(dev, msg, res))
534                 goto err;
535
536         return 0;
537
538 err: return -EMSGSIZE;
539 }
540
541 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
542                              struct rdma_restrack_entry *res, uint32_t port)
543 {
544         struct ib_cq *cq = container_of(res, struct ib_cq, res);
545         struct ib_device *dev = cq->device;
546
547         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
548                 goto err;
549         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
550                               atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
551                 goto err;
552
553         /* Poll context is only valid for kernel CQs */
554         if (rdma_is_kernel_res(res) &&
555             nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
556                 goto err;
557
558         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
559                 goto err;
560
561         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
562                 goto err;
563         if (!rdma_is_kernel_res(res) &&
564             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
565                         cq->uobject->context->res.id))
566                 goto err;
567
568         if (fill_res_name_pid(msg, res))
569                 goto err;
570
571         if (fill_res_entry(dev, msg, res))
572                 goto err;
573
574         return 0;
575
576 err:    return -EMSGSIZE;
577 }
578
579 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
580                              struct rdma_restrack_entry *res, uint32_t port)
581 {
582         struct ib_mr *mr = container_of(res, struct ib_mr, res);
583         struct ib_device *dev = mr->pd->device;
584
585         if (has_cap_net_admin) {
586                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
587                         goto err;
588                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
589                         goto err;
590         }
591
592         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
593                               RDMA_NLDEV_ATTR_PAD))
594                 goto err;
595
596         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
597                 goto err;
598
599         if (!rdma_is_kernel_res(res) &&
600             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
601                 goto err;
602
603         if (fill_res_name_pid(msg, res))
604                 goto err;
605
606         if (fill_res_entry(dev, msg, res))
607                 goto err;
608
609         return 0;
610
611 err:    return -EMSGSIZE;
612 }
613
614 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
615                              struct rdma_restrack_entry *res, uint32_t port)
616 {
617         struct ib_pd *pd = container_of(res, struct ib_pd, res);
618         struct ib_device *dev = pd->device;
619
620         if (has_cap_net_admin) {
621                 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
622                                 pd->local_dma_lkey))
623                         goto err;
624                 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
625                     nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
626                                 pd->unsafe_global_rkey))
627                         goto err;
628         }
629         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
630                               atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
631                 goto err;
632
633         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
634                 goto err;
635
636         if (!rdma_is_kernel_res(res) &&
637             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
638                         pd->uobject->context->res.id))
639                 goto err;
640
641         if (fill_res_name_pid(msg, res))
642                 goto err;
643
644         if (fill_res_entry(dev, msg, res))
645                 goto err;
646
647         return 0;
648
649 err:    return -EMSGSIZE;
650 }
651
652 static int fill_stat_counter_mode(struct sk_buff *msg,
653                                   struct rdma_counter *counter)
654 {
655         struct rdma_counter_mode *m = &counter->mode;
656
657         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
658                 return -EMSGSIZE;
659
660         if (m->mode == RDMA_COUNTER_MODE_AUTO)
661                 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
662                     nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
663                         return -EMSGSIZE;
664
665         return 0;
666 }
667
668 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
669 {
670         struct nlattr *entry_attr;
671
672         entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
673         if (!entry_attr)
674                 return -EMSGSIZE;
675
676         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
677                 goto err;
678
679         nla_nest_end(msg, entry_attr);
680         return 0;
681
682 err:
683         nla_nest_cancel(msg, entry_attr);
684         return -EMSGSIZE;
685 }
686
687 static int fill_stat_counter_qps(struct sk_buff *msg,
688                                  struct rdma_counter *counter)
689 {
690         struct rdma_restrack_entry *res;
691         struct rdma_restrack_root *rt;
692         struct nlattr *table_attr;
693         struct ib_qp *qp = NULL;
694         unsigned long id = 0;
695         int ret = 0;
696
697         table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
698
699         rt = &counter->device->res[RDMA_RESTRACK_QP];
700         xa_lock(&rt->xa);
701         xa_for_each(&rt->xa, id, res) {
702                 if (!rdma_is_visible_in_pid_ns(res))
703                         continue;
704
705                 qp = container_of(res, struct ib_qp, res);
706                 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
707                         continue;
708
709                 if (!qp->counter || (qp->counter->id != counter->id))
710                         continue;
711
712                 ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
713                 if (ret)
714                         goto err;
715         }
716
717         xa_unlock(&rt->xa);
718         nla_nest_end(msg, table_attr);
719         return 0;
720
721 err:
722         xa_unlock(&rt->xa);
723         nla_nest_cancel(msg, table_attr);
724         return ret;
725 }
726
727 static int fill_stat_hwcounter_entry(struct sk_buff *msg,
728                                      const char *name, u64 value)
729 {
730         struct nlattr *entry_attr;
731
732         entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
733         if (!entry_attr)
734                 return -EMSGSIZE;
735
736         if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
737                            name))
738                 goto err;
739         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
740                               value, RDMA_NLDEV_ATTR_PAD))
741                 goto err;
742
743         nla_nest_end(msg, entry_attr);
744         return 0;
745
746 err:
747         nla_nest_cancel(msg, entry_attr);
748         return -EMSGSIZE;
749 }
750
751 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
752                                         struct rdma_counter *counter)
753 {
754         struct rdma_hw_stats *st = counter->stats;
755         struct nlattr *table_attr;
756         int i;
757
758         table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
759         if (!table_attr)
760                 return -EMSGSIZE;
761
762         for (i = 0; i < st->num_counters; i++)
763                 if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
764                         goto err;
765
766         nla_nest_end(msg, table_attr);
767         return 0;
768
769 err:
770         nla_nest_cancel(msg, table_attr);
771         return -EMSGSIZE;
772 }
773
774 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
775                                   struct rdma_restrack_entry *res,
776                                   uint32_t port)
777 {
778         struct rdma_counter *counter =
779                 container_of(res, struct rdma_counter, res);
780
781         if (port && port != counter->port)
782                 return 0;
783
784         /* Dump it even query failed */
785         rdma_counter_query_stats(counter);
786
787         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
788             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
789             fill_res_name_pid(msg, &counter->res) ||
790             fill_stat_counter_mode(msg, counter) ||
791             fill_stat_counter_qps(msg, counter) ||
792             fill_stat_counter_hwcounters(msg, counter))
793                 return -EMSGSIZE;
794
795         return 0;
796 }
797
798 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
799                           struct netlink_ext_ack *extack)
800 {
801         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
802         struct ib_device *device;
803         struct sk_buff *msg;
804         u32 index;
805         int err;
806
807         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
808                                      nldev_policy, extack);
809         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
810                 return -EINVAL;
811
812         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
813
814         device = ib_device_get_by_index(sock_net(skb->sk), index);
815         if (!device)
816                 return -EINVAL;
817
818         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
819         if (!msg) {
820                 err = -ENOMEM;
821                 goto err;
822         }
823
824         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
825                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
826                         0, 0);
827
828         err = fill_dev_info(msg, device);
829         if (err)
830                 goto err_free;
831
832         nlmsg_end(msg, nlh);
833
834         ib_device_put(device);
835         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
836
837 err_free:
838         nlmsg_free(msg);
839 err:
840         ib_device_put(device);
841         return err;
842 }
843
844 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
845                           struct netlink_ext_ack *extack)
846 {
847         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
848         struct ib_device *device;
849         u32 index;
850         int err;
851
852         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
853                                      nldev_policy, extack);
854         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
855                 return -EINVAL;
856
857         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
858         device = ib_device_get_by_index(sock_net(skb->sk), index);
859         if (!device)
860                 return -EINVAL;
861
862         if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
863                 char name[IB_DEVICE_NAME_MAX] = {};
864
865                 nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
866                             IB_DEVICE_NAME_MAX);
867                 err = ib_device_rename(device, name);
868                 goto done;
869         }
870
871         if (tb[RDMA_NLDEV_NET_NS_FD]) {
872                 u32 ns_fd;
873
874                 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
875                 err = ib_device_set_netns_put(skb, device, ns_fd);
876                 goto put_done;
877         }
878
879         if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
880                 u8 use_dim;
881
882                 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
883                 err = ib_device_set_dim(device,  use_dim);
884                 goto done;
885         }
886
887 done:
888         ib_device_put(device);
889 put_done:
890         return err;
891 }
892
893 static int _nldev_get_dumpit(struct ib_device *device,
894                              struct sk_buff *skb,
895                              struct netlink_callback *cb,
896                              unsigned int idx)
897 {
898         int start = cb->args[0];
899         struct nlmsghdr *nlh;
900
901         if (idx < start)
902                 return 0;
903
904         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
905                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
906                         0, NLM_F_MULTI);
907
908         if (fill_dev_info(skb, device)) {
909                 nlmsg_cancel(skb, nlh);
910                 goto out;
911         }
912
913         nlmsg_end(skb, nlh);
914
915         idx++;
916
917 out:    cb->args[0] = idx;
918         return skb->len;
919 }
920
921 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
922 {
923         /*
924          * There is no need to take lock, because
925          * we are relying on ib_core's locking.
926          */
927         return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
928 }
929
930 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
931                                struct netlink_ext_ack *extack)
932 {
933         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
934         struct ib_device *device;
935         struct sk_buff *msg;
936         u32 index;
937         u32 port;
938         int err;
939
940         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
941                                      nldev_policy, extack);
942         if (err ||
943             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
944             !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
945                 return -EINVAL;
946
947         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
948         device = ib_device_get_by_index(sock_net(skb->sk), index);
949         if (!device)
950                 return -EINVAL;
951
952         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
953         if (!rdma_is_port_valid(device, port)) {
954                 err = -EINVAL;
955                 goto err;
956         }
957
958         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
959         if (!msg) {
960                 err = -ENOMEM;
961                 goto err;
962         }
963
964         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
965                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
966                         0, 0);
967
968         err = fill_port_info(msg, device, port, sock_net(skb->sk));
969         if (err)
970                 goto err_free;
971
972         nlmsg_end(msg, nlh);
973         ib_device_put(device);
974
975         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
976
977 err_free:
978         nlmsg_free(msg);
979 err:
980         ib_device_put(device);
981         return err;
982 }
983
984 static int nldev_port_get_dumpit(struct sk_buff *skb,
985                                  struct netlink_callback *cb)
986 {
987         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
988         struct ib_device *device;
989         int start = cb->args[0];
990         struct nlmsghdr *nlh;
991         u32 idx = 0;
992         u32 ifindex;
993         int err;
994         unsigned int p;
995
996         err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
997                                      nldev_policy, NULL);
998         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
999                 return -EINVAL;
1000
1001         ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1002         device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1003         if (!device)
1004                 return -EINVAL;
1005
1006         rdma_for_each_port (device, p) {
1007                 /*
1008                  * The dumpit function returns all information from specific
1009                  * index. This specific index is taken from the netlink
1010                  * messages request sent by user and it is available
1011                  * in cb->args[0].
1012                  *
1013                  * Usually, the user doesn't fill this field and it causes
1014                  * to return everything.
1015                  *
1016                  */
1017                 if (idx < start) {
1018                         idx++;
1019                         continue;
1020                 }
1021
1022                 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1023                                 cb->nlh->nlmsg_seq,
1024                                 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1025                                                  RDMA_NLDEV_CMD_PORT_GET),
1026                                 0, NLM_F_MULTI);
1027
1028                 if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1029                         nlmsg_cancel(skb, nlh);
1030                         goto out;
1031                 }
1032                 idx++;
1033                 nlmsg_end(skb, nlh);
1034         }
1035
1036 out:
1037         ib_device_put(device);
1038         cb->args[0] = idx;
1039         return skb->len;
1040 }
1041
1042 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1043                               struct netlink_ext_ack *extack)
1044 {
1045         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1046         struct ib_device *device;
1047         struct sk_buff *msg;
1048         u32 index;
1049         int ret;
1050
1051         ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1052                                      nldev_policy, extack);
1053         if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1054                 return -EINVAL;
1055
1056         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1057         device = ib_device_get_by_index(sock_net(skb->sk), index);
1058         if (!device)
1059                 return -EINVAL;
1060
1061         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1062         if (!msg) {
1063                 ret = -ENOMEM;
1064                 goto err;
1065         }
1066
1067         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1068                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1069                         0, 0);
1070
1071         ret = fill_res_info(msg, device);
1072         if (ret)
1073                 goto err_free;
1074
1075         nlmsg_end(msg, nlh);
1076         ib_device_put(device);
1077         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1078
1079 err_free:
1080         nlmsg_free(msg);
1081 err:
1082         ib_device_put(device);
1083         return ret;
1084 }
1085
1086 static int _nldev_res_get_dumpit(struct ib_device *device,
1087                                  struct sk_buff *skb,
1088                                  struct netlink_callback *cb,
1089                                  unsigned int idx)
1090 {
1091         int start = cb->args[0];
1092         struct nlmsghdr *nlh;
1093
1094         if (idx < start)
1095                 return 0;
1096
1097         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1098                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1099                         0, NLM_F_MULTI);
1100
1101         if (fill_res_info(skb, device)) {
1102                 nlmsg_cancel(skb, nlh);
1103                 goto out;
1104         }
1105         nlmsg_end(skb, nlh);
1106
1107         idx++;
1108
1109 out:
1110         cb->args[0] = idx;
1111         return skb->len;
1112 }
1113
1114 static int nldev_res_get_dumpit(struct sk_buff *skb,
1115                                 struct netlink_callback *cb)
1116 {
1117         return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1118 }
1119
1120 struct nldev_fill_res_entry {
1121         int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
1122                              struct rdma_restrack_entry *res, u32 port);
1123         enum rdma_nldev_attr nldev_attr;
1124         enum rdma_nldev_command nldev_cmd;
1125         u8 flags;
1126         u32 entry;
1127         u32 id;
1128 };
1129
1130 enum nldev_res_flags {
1131         NLDEV_PER_DEV = 1 << 0,
1132 };
1133
1134 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1135         [RDMA_RESTRACK_QP] = {
1136                 .fill_res_func = fill_res_qp_entry,
1137                 .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
1138                 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1139                 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1140                 .id = RDMA_NLDEV_ATTR_RES_LQPN,
1141         },
1142         [RDMA_RESTRACK_CM_ID] = {
1143                 .fill_res_func = fill_res_cm_id_entry,
1144                 .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
1145                 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1146                 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1147                 .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1148         },
1149         [RDMA_RESTRACK_CQ] = {
1150                 .fill_res_func = fill_res_cq_entry,
1151                 .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
1152                 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1153                 .flags = NLDEV_PER_DEV,
1154                 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1155                 .id = RDMA_NLDEV_ATTR_RES_CQN,
1156         },
1157         [RDMA_RESTRACK_MR] = {
1158                 .fill_res_func = fill_res_mr_entry,
1159                 .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
1160                 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1161                 .flags = NLDEV_PER_DEV,
1162                 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1163                 .id = RDMA_NLDEV_ATTR_RES_MRN,
1164         },
1165         [RDMA_RESTRACK_PD] = {
1166                 .fill_res_func = fill_res_pd_entry,
1167                 .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
1168                 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1169                 .flags = NLDEV_PER_DEV,
1170                 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1171                 .id = RDMA_NLDEV_ATTR_RES_PDN,
1172         },
1173         [RDMA_RESTRACK_COUNTER] = {
1174                 .fill_res_func = fill_res_counter_entry,
1175                 .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
1176                 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1177                 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1178                 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1179         },
1180 };
1181
1182 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1183                                struct netlink_ext_ack *extack,
1184                                enum rdma_restrack_type res_type)
1185 {
1186         const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1187         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1188         struct rdma_restrack_entry *res;
1189         struct ib_device *device;
1190         u32 index, id, port = 0;
1191         bool has_cap_net_admin;
1192         struct sk_buff *msg;
1193         int ret;
1194
1195         ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1196                                      nldev_policy, extack);
1197         if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1198                 return -EINVAL;
1199
1200         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1201         device = ib_device_get_by_index(sock_net(skb->sk), index);
1202         if (!device)
1203                 return -EINVAL;
1204
1205         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1206                 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1207                 if (!rdma_is_port_valid(device, port)) {
1208                         ret = -EINVAL;
1209                         goto err;
1210                 }
1211         }
1212
1213         if ((port && fe->flags & NLDEV_PER_DEV) ||
1214             (!port && ~fe->flags & NLDEV_PER_DEV)) {
1215                 ret = -EINVAL;
1216                 goto err;
1217         }
1218
1219         id = nla_get_u32(tb[fe->id]);
1220         res = rdma_restrack_get_byid(device, res_type, id);
1221         if (IS_ERR(res)) {
1222                 ret = PTR_ERR(res);
1223                 goto err;
1224         }
1225
1226         if (!rdma_is_visible_in_pid_ns(res)) {
1227                 ret = -ENOENT;
1228                 goto err_get;
1229         }
1230
1231         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1232         if (!msg) {
1233                 ret = -ENOMEM;
1234                 goto err;
1235         }
1236
1237         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1238                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1239                         0, 0);
1240
1241         if (fill_nldev_handle(msg, device)) {
1242                 ret = -EMSGSIZE;
1243                 goto err_free;
1244         }
1245
1246         has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1247         ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1248         rdma_restrack_put(res);
1249         if (ret)
1250                 goto err_free;
1251
1252         nlmsg_end(msg, nlh);
1253         ib_device_put(device);
1254         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1255
1256 err_free:
1257         nlmsg_free(msg);
1258 err_get:
1259         rdma_restrack_put(res);
1260 err:
1261         ib_device_put(device);
1262         return ret;
1263 }
1264
1265 static int res_get_common_dumpit(struct sk_buff *skb,
1266                                  struct netlink_callback *cb,
1267                                  enum rdma_restrack_type res_type)
1268 {
1269         const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1270         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1271         struct rdma_restrack_entry *res;
1272         struct rdma_restrack_root *rt;
1273         int err, ret = 0, idx = 0;
1274         struct nlattr *table_attr;
1275         struct nlattr *entry_attr;
1276         struct ib_device *device;
1277         int start = cb->args[0];
1278         bool has_cap_net_admin;
1279         struct nlmsghdr *nlh;
1280         unsigned long id;
1281         u32 index, port = 0;
1282         bool filled = false;
1283
1284         err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1285                                      nldev_policy, NULL);
1286         /*
1287          * Right now, we are expecting the device index to get res information,
1288          * but it is possible to extend this code to return all devices in
1289          * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1290          * if it doesn't exist, we will iterate over all devices.
1291          *
1292          * But it is not needed for now.
1293          */
1294         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1295                 return -EINVAL;
1296
1297         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1298         device = ib_device_get_by_index(sock_net(skb->sk), index);
1299         if (!device)
1300                 return -EINVAL;
1301
1302         /*
1303          * If no PORT_INDEX is supplied, we will return all QPs from that device
1304          */
1305         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1306                 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1307                 if (!rdma_is_port_valid(device, port)) {
1308                         ret = -EINVAL;
1309                         goto err_index;
1310                 }
1311         }
1312
1313         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1314                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1315                         0, NLM_F_MULTI);
1316
1317         if (fill_nldev_handle(skb, device)) {
1318                 ret = -EMSGSIZE;
1319                 goto err;
1320         }
1321
1322         table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1323         if (!table_attr) {
1324                 ret = -EMSGSIZE;
1325                 goto err;
1326         }
1327
1328         has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1329
1330         rt = &device->res[res_type];
1331         xa_lock(&rt->xa);
1332         /*
1333          * FIXME: if the skip ahead is something common this loop should
1334          * use xas_for_each & xas_pause to optimize, we can have a lot of
1335          * objects.
1336          */
1337         xa_for_each(&rt->xa, id, res) {
1338                 if (!rdma_is_visible_in_pid_ns(res))
1339                         continue;
1340
1341                 if (idx < start || !rdma_restrack_get(res))
1342                         goto next;
1343
1344                 xa_unlock(&rt->xa);
1345
1346                 filled = true;
1347
1348                 entry_attr = nla_nest_start_noflag(skb, fe->entry);
1349                 if (!entry_attr) {
1350                         ret = -EMSGSIZE;
1351                         rdma_restrack_put(res);
1352                         goto msg_full;
1353                 }
1354
1355                 ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1356                 rdma_restrack_put(res);
1357
1358                 if (ret) {
1359                         nla_nest_cancel(skb, entry_attr);
1360                         if (ret == -EMSGSIZE)
1361                                 goto msg_full;
1362                         if (ret == -EAGAIN)
1363                                 goto again;
1364                         goto res_err;
1365                 }
1366                 nla_nest_end(skb, entry_attr);
1367 again:          xa_lock(&rt->xa);
1368 next:           idx++;
1369         }
1370         xa_unlock(&rt->xa);
1371
1372 msg_full:
1373         nla_nest_end(skb, table_attr);
1374         nlmsg_end(skb, nlh);
1375         cb->args[0] = idx;
1376
1377         /*
1378          * No more entries to fill, cancel the message and
1379          * return 0 to mark end of dumpit.
1380          */
1381         if (!filled)
1382                 goto err;
1383
1384         ib_device_put(device);
1385         return skb->len;
1386
1387 res_err:
1388         nla_nest_cancel(skb, table_attr);
1389
1390 err:
1391         nlmsg_cancel(skb, nlh);
1392
1393 err_index:
1394         ib_device_put(device);
1395         return ret;
1396 }
1397
1398 #define RES_GET_FUNCS(name, type)                                              \
1399         static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1400                                                  struct netlink_callback *cb)  \
1401         {                                                                      \
1402                 return res_get_common_dumpit(skb, cb, type);                   \
1403         }                                                                      \
1404         static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1405                                                struct nlmsghdr *nlh,           \
1406                                                struct netlink_ext_ack *extack) \
1407         {                                                                      \
1408                 return res_get_common_doit(skb, nlh, extack, type);            \
1409         }
1410
1411 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1412 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1413 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1414 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1415 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1416 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1417
1418 static LIST_HEAD(link_ops);
1419 static DECLARE_RWSEM(link_ops_rwsem);
1420
1421 static const struct rdma_link_ops *link_ops_get(const char *type)
1422 {
1423         const struct rdma_link_ops *ops;
1424
1425         list_for_each_entry(ops, &link_ops, list) {
1426                 if (!strcmp(ops->type, type))
1427                         goto out;
1428         }
1429         ops = NULL;
1430 out:
1431         return ops;
1432 }
1433
1434 void rdma_link_register(struct rdma_link_ops *ops)
1435 {
1436         down_write(&link_ops_rwsem);
1437         if (WARN_ON_ONCE(link_ops_get(ops->type)))
1438                 goto out;
1439         list_add(&ops->list, &link_ops);
1440 out:
1441         up_write(&link_ops_rwsem);
1442 }
1443 EXPORT_SYMBOL(rdma_link_register);
1444
1445 void rdma_link_unregister(struct rdma_link_ops *ops)
1446 {
1447         down_write(&link_ops_rwsem);
1448         list_del(&ops->list);
1449         up_write(&link_ops_rwsem);
1450 }
1451 EXPORT_SYMBOL(rdma_link_unregister);
1452
1453 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1454                           struct netlink_ext_ack *extack)
1455 {
1456         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1457         char ibdev_name[IB_DEVICE_NAME_MAX];
1458         const struct rdma_link_ops *ops;
1459         char ndev_name[IFNAMSIZ];
1460         struct net_device *ndev;
1461         char type[IFNAMSIZ];
1462         int err;
1463
1464         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1465                                      nldev_policy, extack);
1466         if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1467             !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1468                 return -EINVAL;
1469
1470         nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1471                     sizeof(ibdev_name));
1472         if (strchr(ibdev_name, '%'))
1473                 return -EINVAL;
1474
1475         nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1476         nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1477                     sizeof(ndev_name));
1478
1479         ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1480         if (!ndev)
1481                 return -ENODEV;
1482
1483         down_read(&link_ops_rwsem);
1484         ops = link_ops_get(type);
1485 #ifdef CONFIG_MODULES
1486         if (!ops) {
1487                 up_read(&link_ops_rwsem);
1488                 request_module("rdma-link-%s", type);
1489                 down_read(&link_ops_rwsem);
1490                 ops = link_ops_get(type);
1491         }
1492 #endif
1493         err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1494         up_read(&link_ops_rwsem);
1495         dev_put(ndev);
1496
1497         return err;
1498 }
1499
1500 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1501                           struct netlink_ext_ack *extack)
1502 {
1503         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1504         struct ib_device *device;
1505         u32 index;
1506         int err;
1507
1508         err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1509                                      nldev_policy, extack);
1510         if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1511                 return -EINVAL;
1512
1513         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1514         device = ib_device_get_by_index(sock_net(skb->sk), index);
1515         if (!device)
1516                 return -EINVAL;
1517
1518         if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1519                 ib_device_put(device);
1520                 return -EINVAL;
1521         }
1522
1523         ib_unregister_device_and_put(device);
1524         return 0;
1525 }
1526
1527 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1528                              struct netlink_ext_ack *extack)
1529 {
1530         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1531         char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1532         struct ib_client_nl_info data = {};
1533         struct ib_device *ibdev = NULL;
1534         struct sk_buff *msg;
1535         u32 index;
1536         int err;
1537
1538         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1539                           extack);
1540         if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1541                 return -EINVAL;
1542
1543         nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1544                     sizeof(client_name));
1545
1546         if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1547                 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1548                 ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1549                 if (!ibdev)
1550                         return -EINVAL;
1551
1552                 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1553                         data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1554                         if (!rdma_is_port_valid(ibdev, data.port)) {
1555                                 err = -EINVAL;
1556                                 goto out_put;
1557                         }
1558                 } else {
1559                         data.port = -1;
1560                 }
1561         } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1562                 return -EINVAL;
1563         }
1564
1565         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1566         if (!msg) {
1567                 err = -ENOMEM;
1568                 goto out_put;
1569         }
1570         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1571                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1572                                          RDMA_NLDEV_CMD_GET_CHARDEV),
1573                         0, 0);
1574
1575         data.nl_msg = msg;
1576         err = ib_get_client_nl_info(ibdev, client_name, &data);
1577         if (err)
1578                 goto out_nlmsg;
1579
1580         err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1581                                 huge_encode_dev(data.cdev->devt),
1582                                 RDMA_NLDEV_ATTR_PAD);
1583         if (err)
1584                 goto out_data;
1585         err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1586                                 RDMA_NLDEV_ATTR_PAD);
1587         if (err)
1588                 goto out_data;
1589         if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1590                            dev_name(data.cdev))) {
1591                 err = -EMSGSIZE;
1592                 goto out_data;
1593         }
1594
1595         nlmsg_end(msg, nlh);
1596         put_device(data.cdev);
1597         if (ibdev)
1598                 ib_device_put(ibdev);
1599         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1600
1601 out_data:
1602         put_device(data.cdev);
1603 out_nlmsg:
1604         nlmsg_free(msg);
1605 out_put:
1606         if (ibdev)
1607                 ib_device_put(ibdev);
1608         return err;
1609 }
1610
1611 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1612                               struct netlink_ext_ack *extack)
1613 {
1614         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1615         struct sk_buff *msg;
1616         int err;
1617
1618         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1619                           nldev_policy, extack);
1620         if (err)
1621                 return err;
1622
1623         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1624         if (!msg)
1625                 return -ENOMEM;
1626
1627         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1628                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1629                                          RDMA_NLDEV_CMD_SYS_GET),
1630                         0, 0);
1631
1632         err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1633                          (u8)ib_devices_shared_netns);
1634         if (err) {
1635                 nlmsg_free(msg);
1636                 return err;
1637         }
1638         nlmsg_end(msg, nlh);
1639         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1640 }
1641
1642 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1643                                   struct netlink_ext_ack *extack)
1644 {
1645         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1646         u8 enable;
1647         int err;
1648
1649         err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1650                           nldev_policy, extack);
1651         if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1652                 return -EINVAL;
1653
1654         enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1655         /* Only 0 and 1 are supported */
1656         if (enable > 1)
1657                 return -EINVAL;
1658
1659         err = rdma_compatdev_set(enable);
1660         return err;
1661 }
1662
1663 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1664                                struct netlink_ext_ack *extack)
1665 {
1666         u32 index, port, mode, mask = 0, qpn, cntn = 0;
1667         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1668         struct ib_device *device;
1669         struct sk_buff *msg;
1670         int ret;
1671
1672         ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1673                           nldev_policy, extack);
1674         /* Currently only counter for QP is supported */
1675         if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1676             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1677             !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1678                 return -EINVAL;
1679
1680         if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1681                 return -EINVAL;
1682
1683         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1684         device = ib_device_get_by_index(sock_net(skb->sk), index);
1685         if (!device)
1686                 return -EINVAL;
1687
1688         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1689         if (!rdma_is_port_valid(device, port)) {
1690                 ret = -EINVAL;
1691                 goto err;
1692         }
1693
1694         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1695         if (!msg) {
1696                 ret = -ENOMEM;
1697                 goto err;
1698         }
1699         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1700                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1701                                          RDMA_NLDEV_CMD_STAT_SET),
1702                         0, 0);
1703
1704         mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1705         if (mode == RDMA_COUNTER_MODE_AUTO) {
1706                 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1707                         mask = nla_get_u32(
1708                                 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1709
1710                 ret = rdma_counter_set_auto_mode(device, port,
1711                                                  mask ? true : false, mask);
1712                 if (ret)
1713                         goto err_msg;
1714         } else {
1715                 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1716                 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1717                         cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1718                         ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1719                 } else {
1720                         ret = rdma_counter_bind_qpn_alloc(device, port,
1721                                                           qpn, &cntn);
1722                 }
1723                 if (ret)
1724                         goto err_msg;
1725
1726                 if (fill_nldev_handle(msg, device) ||
1727                     nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1728                     nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1729                     nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1730                         ret = -EMSGSIZE;
1731                         goto err_fill;
1732                 }
1733         }
1734
1735         nlmsg_end(msg, nlh);
1736         ib_device_put(device);
1737         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1738
1739 err_fill:
1740         rdma_counter_unbind_qpn(device, port, qpn, cntn);
1741 err_msg:
1742         nlmsg_free(msg);
1743 err:
1744         ib_device_put(device);
1745         return ret;
1746 }
1747
1748 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1749                                struct netlink_ext_ack *extack)
1750 {
1751         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1752         struct ib_device *device;
1753         struct sk_buff *msg;
1754         u32 index, port, qpn, cntn;
1755         int ret;
1756
1757         ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1758                           nldev_policy, extack);
1759         if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1760             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1761             !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1762             !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1763                 return -EINVAL;
1764
1765         if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1766                 return -EINVAL;
1767
1768         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1769         device = ib_device_get_by_index(sock_net(skb->sk), index);
1770         if (!device)
1771                 return -EINVAL;
1772
1773         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1774         if (!rdma_is_port_valid(device, port)) {
1775                 ret = -EINVAL;
1776                 goto err;
1777         }
1778
1779         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1780         if (!msg) {
1781                 ret = -ENOMEM;
1782                 goto err;
1783         }
1784         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1785                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1786                                          RDMA_NLDEV_CMD_STAT_SET),
1787                         0, 0);
1788
1789         cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1790         qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1791         ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1792         if (ret)
1793                 goto err_unbind;
1794
1795         if (fill_nldev_handle(msg, device) ||
1796             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1797             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1798             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1799                 ret = -EMSGSIZE;
1800                 goto err_fill;
1801         }
1802
1803         nlmsg_end(msg, nlh);
1804         ib_device_put(device);
1805         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1806
1807 err_fill:
1808         rdma_counter_bind_qpn(device, port, qpn, cntn);
1809 err_unbind:
1810         nlmsg_free(msg);
1811 err:
1812         ib_device_put(device);
1813         return ret;
1814 }
1815
1816 static int stat_get_doit_default_counter(struct sk_buff *skb,
1817                                          struct nlmsghdr *nlh,
1818                                          struct netlink_ext_ack *extack,
1819                                          struct nlattr *tb[])
1820 {
1821         struct rdma_hw_stats *stats;
1822         struct nlattr *table_attr;
1823         struct ib_device *device;
1824         int ret, num_cnts, i;
1825         struct sk_buff *msg;
1826         u32 index, port;
1827         u64 v;
1828
1829         if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1830                 return -EINVAL;
1831
1832         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1833         device = ib_device_get_by_index(sock_net(skb->sk), index);
1834         if (!device)
1835                 return -EINVAL;
1836
1837         if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1838                 ret = -EINVAL;
1839                 goto err;
1840         }
1841
1842         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1843         if (!rdma_is_port_valid(device, port)) {
1844                 ret = -EINVAL;
1845                 goto err;
1846         }
1847
1848         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1849         if (!msg) {
1850                 ret = -ENOMEM;
1851                 goto err;
1852         }
1853
1854         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1855                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1856                                          RDMA_NLDEV_CMD_STAT_GET),
1857                         0, 0);
1858
1859         if (fill_nldev_handle(msg, device) ||
1860             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1861                 ret = -EMSGSIZE;
1862                 goto err_msg;
1863         }
1864
1865         stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1866         if (stats == NULL) {
1867                 ret = -EINVAL;
1868                 goto err_msg;
1869         }
1870         mutex_lock(&stats->lock);
1871
1872         num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1873         if (num_cnts < 0) {
1874                 ret = -EINVAL;
1875                 goto err_stats;
1876         }
1877
1878         table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1879         if (!table_attr) {
1880                 ret = -EMSGSIZE;
1881                 goto err_stats;
1882         }
1883         for (i = 0; i < num_cnts; i++) {
1884                 v = stats->value[i] +
1885                         rdma_counter_get_hwstat_value(device, port, i);
1886                 if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
1887                         ret = -EMSGSIZE;
1888                         goto err_table;
1889                 }
1890         }
1891         nla_nest_end(msg, table_attr);
1892
1893         mutex_unlock(&stats->lock);
1894         nlmsg_end(msg, nlh);
1895         ib_device_put(device);
1896         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1897
1898 err_table:
1899         nla_nest_cancel(msg, table_attr);
1900 err_stats:
1901         mutex_unlock(&stats->lock);
1902 err_msg:
1903         nlmsg_free(msg);
1904 err:
1905         ib_device_put(device);
1906         return ret;
1907 }
1908
1909 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1910                             struct netlink_ext_ack *extack, struct nlattr *tb[])
1911
1912 {
1913         static enum rdma_nl_counter_mode mode;
1914         static enum rdma_nl_counter_mask mask;
1915         struct ib_device *device;
1916         struct sk_buff *msg;
1917         u32 index, port;
1918         int ret;
1919
1920         if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1921                 return nldev_res_get_counter_doit(skb, nlh, extack);
1922
1923         if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1924             !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1925                 return -EINVAL;
1926
1927         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1928         device = ib_device_get_by_index(sock_net(skb->sk), index);
1929         if (!device)
1930                 return -EINVAL;
1931
1932         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1933         if (!rdma_is_port_valid(device, port)) {
1934                 ret = -EINVAL;
1935                 goto err;
1936         }
1937
1938         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1939         if (!msg) {
1940                 ret = -ENOMEM;
1941                 goto err;
1942         }
1943
1944         nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1945                         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1946                                          RDMA_NLDEV_CMD_STAT_GET),
1947                         0, 0);
1948
1949         ret = rdma_counter_get_mode(device, port, &mode, &mask);
1950         if (ret)
1951                 goto err_msg;
1952
1953         if (fill_nldev_handle(msg, device) ||
1954             nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1955             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
1956                 goto err_msg;
1957
1958         if ((mode == RDMA_COUNTER_MODE_AUTO) &&
1959             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
1960                 goto err_msg;
1961
1962         nlmsg_end(msg, nlh);
1963         ib_device_put(device);
1964         return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
1965
1966 err_msg:
1967         nlmsg_free(msg);
1968 err:
1969         ib_device_put(device);
1970         return ret;
1971 }
1972
1973 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1974                                struct netlink_ext_ack *extack)
1975 {
1976         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1977         int ret;
1978
1979         ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1980                           nldev_policy, extack);
1981         if (ret)
1982                 return -EINVAL;
1983
1984         if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
1985                 return stat_get_doit_default_counter(skb, nlh, extack, tb);
1986
1987         switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
1988         case RDMA_NLDEV_ATTR_RES_QP:
1989                 ret = stat_get_doit_qp(skb, nlh, extack, tb);
1990                 break;
1991
1992         default:
1993                 ret = -EINVAL;
1994                 break;
1995         }
1996
1997         return ret;
1998 }
1999
2000 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2001                                  struct netlink_callback *cb)
2002 {
2003         struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2004         int ret;
2005
2006         ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2007                           nldev_policy, NULL);
2008         if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2009                 return -EINVAL;
2010
2011         switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2012         case RDMA_NLDEV_ATTR_RES_QP:
2013                 ret = nldev_res_get_counter_dumpit(skb, cb);
2014                 break;
2015
2016         default:
2017                 ret = -EINVAL;
2018                 break;
2019         }
2020
2021         return ret;
2022 }
2023
2024 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2025         [RDMA_NLDEV_CMD_GET] = {
2026                 .doit = nldev_get_doit,
2027                 .dump = nldev_get_dumpit,
2028         },
2029         [RDMA_NLDEV_CMD_GET_CHARDEV] = {
2030                 .doit = nldev_get_chardev,
2031         },
2032         [RDMA_NLDEV_CMD_SET] = {
2033                 .doit = nldev_set_doit,
2034                 .flags = RDMA_NL_ADMIN_PERM,
2035         },
2036         [RDMA_NLDEV_CMD_NEWLINK] = {
2037                 .doit = nldev_newlink,
2038                 .flags = RDMA_NL_ADMIN_PERM,
2039         },
2040         [RDMA_NLDEV_CMD_DELLINK] = {
2041                 .doit = nldev_dellink,
2042                 .flags = RDMA_NL_ADMIN_PERM,
2043         },
2044         [RDMA_NLDEV_CMD_PORT_GET] = {
2045                 .doit = nldev_port_get_doit,
2046                 .dump = nldev_port_get_dumpit,
2047         },
2048         [RDMA_NLDEV_CMD_RES_GET] = {
2049                 .doit = nldev_res_get_doit,
2050                 .dump = nldev_res_get_dumpit,
2051         },
2052         [RDMA_NLDEV_CMD_RES_QP_GET] = {
2053                 .doit = nldev_res_get_qp_doit,
2054                 .dump = nldev_res_get_qp_dumpit,
2055         },
2056         [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2057                 .doit = nldev_res_get_cm_id_doit,
2058                 .dump = nldev_res_get_cm_id_dumpit,
2059         },
2060         [RDMA_NLDEV_CMD_RES_CQ_GET] = {
2061                 .doit = nldev_res_get_cq_doit,
2062                 .dump = nldev_res_get_cq_dumpit,
2063         },
2064         [RDMA_NLDEV_CMD_RES_MR_GET] = {
2065                 .doit = nldev_res_get_mr_doit,
2066                 .dump = nldev_res_get_mr_dumpit,
2067         },
2068         [RDMA_NLDEV_CMD_RES_PD_GET] = {
2069                 .doit = nldev_res_get_pd_doit,
2070                 .dump = nldev_res_get_pd_dumpit,
2071         },
2072         [RDMA_NLDEV_CMD_SYS_GET] = {
2073                 .doit = nldev_sys_get_doit,
2074         },
2075         [RDMA_NLDEV_CMD_SYS_SET] = {
2076                 .doit = nldev_set_sys_set_doit,
2077         },
2078         [RDMA_NLDEV_CMD_STAT_SET] = {
2079                 .doit = nldev_stat_set_doit,
2080                 .flags = RDMA_NL_ADMIN_PERM,
2081         },
2082         [RDMA_NLDEV_CMD_STAT_GET] = {
2083                 .doit = nldev_stat_get_doit,
2084                 .dump = nldev_stat_get_dumpit,
2085         },
2086         [RDMA_NLDEV_CMD_STAT_DEL] = {
2087                 .doit = nldev_stat_del_doit,
2088                 .flags = RDMA_NL_ADMIN_PERM,
2089         },
2090 };
2091
2092 void __init nldev_init(void)
2093 {
2094         rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2095 }
2096
2097 void __exit nldev_exit(void)
2098 {
2099         rdma_nl_unregister(RDMA_NL_NLDEV);
2100 }
2101
2102 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);