]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
4f781358aef1f3120e37c9966d4bb6babc74d116
[linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <net/netevent.h>
20 #include <net/neighbour.h>
21 #include <net/arp.h>
22 #include <net/ip_fib.h>
23 #include <net/ip6_fib.h>
24 #include <net/nexthop.h>
25 #include <net/fib_rules.h>
26 #include <net/ip_tunnels.h>
27 #include <net/l3mdev.h>
28 #include <net/addrconf.h>
29 #include <net/ndisc.h>
30 #include <net/ipv6.h>
31 #include <net/fib_notifier.h>
32 #include <net/switchdev.h>
33
34 #include "spectrum.h"
35 #include "core.h"
36 #include "reg.h"
37 #include "spectrum_cnt.h"
38 #include "spectrum_dpipe.h"
39 #include "spectrum_ipip.h"
40 #include "spectrum_mr.h"
41 #include "spectrum_mr_tcam.h"
42 #include "spectrum_router.h"
43 #include "spectrum_span.h"
44
45 struct mlxsw_sp_fib;
46 struct mlxsw_sp_vr;
47 struct mlxsw_sp_lpm_tree;
48 struct mlxsw_sp_rif_ops;
49
50 struct mlxsw_sp_router {
51         struct mlxsw_sp *mlxsw_sp;
52         struct mlxsw_sp_rif **rifs;
53         struct mlxsw_sp_vr *vrs;
54         struct rhashtable neigh_ht;
55         struct rhashtable nexthop_group_ht;
56         struct rhashtable nexthop_ht;
57         struct list_head nexthop_list;
58         struct {
59                 /* One tree for each protocol: IPv4 and IPv6 */
60                 struct mlxsw_sp_lpm_tree *proto_trees[2];
61                 struct mlxsw_sp_lpm_tree *trees;
62                 unsigned int tree_count;
63         } lpm;
64         struct {
65                 struct delayed_work dw;
66                 unsigned long interval; /* ms */
67         } neighs_update;
68         struct delayed_work nexthop_probe_dw;
69 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
70         struct list_head nexthop_neighs_list;
71         struct list_head ipip_list;
72         bool aborted;
73         struct notifier_block fib_nb;
74         struct notifier_block netevent_nb;
75         struct notifier_block inetaddr_nb;
76         struct notifier_block inet6addr_nb;
77         const struct mlxsw_sp_rif_ops **rif_ops_arr;
78         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
79 };
80
81 struct mlxsw_sp_rif {
82         struct list_head nexthop_list;
83         struct list_head neigh_list;
84         struct net_device *dev; /* NULL for underlay RIF */
85         struct mlxsw_sp_fid *fid;
86         unsigned char addr[ETH_ALEN];
87         int mtu;
88         u16 rif_index;
89         u16 vr_id;
90         const struct mlxsw_sp_rif_ops *ops;
91         struct mlxsw_sp *mlxsw_sp;
92
93         unsigned int counter_ingress;
94         bool counter_ingress_valid;
95         unsigned int counter_egress;
96         bool counter_egress_valid;
97 };
98
99 struct mlxsw_sp_rif_params {
100         struct net_device *dev;
101         union {
102                 u16 system_port;
103                 u16 lag_id;
104         };
105         u16 vid;
106         bool lag;
107 };
108
109 struct mlxsw_sp_rif_subport {
110         struct mlxsw_sp_rif common;
111         refcount_t ref_count;
112         union {
113                 u16 system_port;
114                 u16 lag_id;
115         };
116         u16 vid;
117         bool lag;
118 };
119
120 struct mlxsw_sp_rif_ipip_lb {
121         struct mlxsw_sp_rif common;
122         struct mlxsw_sp_rif_ipip_lb_config lb_config;
123         u16 ul_vr_id; /* Reserved for Spectrum-2. */
124         u16 ul_rif_id; /* Reserved for Spectrum. */
125 };
126
127 struct mlxsw_sp_rif_params_ipip_lb {
128         struct mlxsw_sp_rif_params common;
129         struct mlxsw_sp_rif_ipip_lb_config lb_config;
130 };
131
132 struct mlxsw_sp_rif_ops {
133         enum mlxsw_sp_rif_type type;
134         size_t rif_size;
135
136         void (*setup)(struct mlxsw_sp_rif *rif,
137                       const struct mlxsw_sp_rif_params *params);
138         int (*configure)(struct mlxsw_sp_rif *rif);
139         void (*deconfigure)(struct mlxsw_sp_rif *rif);
140         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
141                                          struct netlink_ext_ack *extack);
142         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
143 };
144
145 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
146 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
147 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
148                                   struct mlxsw_sp_lpm_tree *lpm_tree);
149 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
150                                      const struct mlxsw_sp_fib *fib,
151                                      u8 tree_id);
152 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
153                                        const struct mlxsw_sp_fib *fib);
154
155 static unsigned int *
156 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
157                            enum mlxsw_sp_rif_counter_dir dir)
158 {
159         switch (dir) {
160         case MLXSW_SP_RIF_COUNTER_EGRESS:
161                 return &rif->counter_egress;
162         case MLXSW_SP_RIF_COUNTER_INGRESS:
163                 return &rif->counter_ingress;
164         }
165         return NULL;
166 }
167
168 static bool
169 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
170                                enum mlxsw_sp_rif_counter_dir dir)
171 {
172         switch (dir) {
173         case MLXSW_SP_RIF_COUNTER_EGRESS:
174                 return rif->counter_egress_valid;
175         case MLXSW_SP_RIF_COUNTER_INGRESS:
176                 return rif->counter_ingress_valid;
177         }
178         return false;
179 }
180
181 static void
182 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
183                                enum mlxsw_sp_rif_counter_dir dir,
184                                bool valid)
185 {
186         switch (dir) {
187         case MLXSW_SP_RIF_COUNTER_EGRESS:
188                 rif->counter_egress_valid = valid;
189                 break;
190         case MLXSW_SP_RIF_COUNTER_INGRESS:
191                 rif->counter_ingress_valid = valid;
192                 break;
193         }
194 }
195
196 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
197                                      unsigned int counter_index, bool enable,
198                                      enum mlxsw_sp_rif_counter_dir dir)
199 {
200         char ritr_pl[MLXSW_REG_RITR_LEN];
201         bool is_egress = false;
202         int err;
203
204         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
205                 is_egress = true;
206         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
207         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
208         if (err)
209                 return err;
210
211         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
212                                     is_egress);
213         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
214 }
215
216 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
217                                    struct mlxsw_sp_rif *rif,
218                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
219 {
220         char ricnt_pl[MLXSW_REG_RICNT_LEN];
221         unsigned int *p_counter_index;
222         bool valid;
223         int err;
224
225         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
226         if (!valid)
227                 return -EINVAL;
228
229         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
230         if (!p_counter_index)
231                 return -EINVAL;
232         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
233                              MLXSW_REG_RICNT_OPCODE_NOP);
234         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
235         if (err)
236                 return err;
237         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
238         return 0;
239 }
240
241 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
242                                       unsigned int counter_index)
243 {
244         char ricnt_pl[MLXSW_REG_RICNT_LEN];
245
246         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
247                              MLXSW_REG_RICNT_OPCODE_CLEAR);
248         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
249 }
250
251 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
252                                struct mlxsw_sp_rif *rif,
253                                enum mlxsw_sp_rif_counter_dir dir)
254 {
255         unsigned int *p_counter_index;
256         int err;
257
258         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
259         if (!p_counter_index)
260                 return -EINVAL;
261         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
262                                      p_counter_index);
263         if (err)
264                 return err;
265
266         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
267         if (err)
268                 goto err_counter_clear;
269
270         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
271                                         *p_counter_index, true, dir);
272         if (err)
273                 goto err_counter_edit;
274         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
275         return 0;
276
277 err_counter_edit:
278 err_counter_clear:
279         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
280                               *p_counter_index);
281         return err;
282 }
283
284 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
285                                struct mlxsw_sp_rif *rif,
286                                enum mlxsw_sp_rif_counter_dir dir)
287 {
288         unsigned int *p_counter_index;
289
290         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
291                 return;
292
293         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
294         if (WARN_ON(!p_counter_index))
295                 return;
296         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
297                                   *p_counter_index, false, dir);
298         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
299                               *p_counter_index);
300         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
301 }
302
303 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
304 {
305         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
306         struct devlink *devlink;
307
308         devlink = priv_to_devlink(mlxsw_sp->core);
309         if (!devlink_dpipe_table_counter_enabled(devlink,
310                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
311                 return;
312         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
313 }
314
315 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
316 {
317         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
318
319         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
320 }
321
322 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
323
324 struct mlxsw_sp_prefix_usage {
325         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
326 };
327
328 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
329         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
330
331 static bool
332 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
333                          struct mlxsw_sp_prefix_usage *prefix_usage2)
334 {
335         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
336 }
337
338 static void
339 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
340                           struct mlxsw_sp_prefix_usage *prefix_usage2)
341 {
342         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
343 }
344
345 static void
346 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
347                           unsigned char prefix_len)
348 {
349         set_bit(prefix_len, prefix_usage->b);
350 }
351
352 static void
353 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
354                             unsigned char prefix_len)
355 {
356         clear_bit(prefix_len, prefix_usage->b);
357 }
358
359 struct mlxsw_sp_fib_key {
360         unsigned char addr[sizeof(struct in6_addr)];
361         unsigned char prefix_len;
362 };
363
364 enum mlxsw_sp_fib_entry_type {
365         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
366         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
367         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
368         MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
369
370         /* This is a special case of local delivery, where a packet should be
371          * decapsulated on reception. Note that there is no corresponding ENCAP,
372          * because that's a type of next hop, not of FIB entry. (There can be
373          * several next hops in a REMOTE entry, and some of them may be
374          * encapsulating entries.)
375          */
376         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
377         MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
378 };
379
380 struct mlxsw_sp_nexthop_group;
381
382 struct mlxsw_sp_fib_node {
383         struct list_head entry_list;
384         struct list_head list;
385         struct rhash_head ht_node;
386         struct mlxsw_sp_fib *fib;
387         struct mlxsw_sp_fib_key key;
388 };
389
390 struct mlxsw_sp_fib_entry_decap {
391         struct mlxsw_sp_ipip_entry *ipip_entry;
392         u32 tunnel_index;
393 };
394
395 struct mlxsw_sp_fib_entry {
396         struct list_head list;
397         struct mlxsw_sp_fib_node *fib_node;
398         enum mlxsw_sp_fib_entry_type type;
399         struct list_head nexthop_group_node;
400         struct mlxsw_sp_nexthop_group *nh_group;
401         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
402 };
403
404 struct mlxsw_sp_fib4_entry {
405         struct mlxsw_sp_fib_entry common;
406         u32 tb_id;
407         u32 prio;
408         u8 tos;
409         u8 type;
410 };
411
412 struct mlxsw_sp_fib6_entry {
413         struct mlxsw_sp_fib_entry common;
414         struct list_head rt6_list;
415         unsigned int nrt6;
416 };
417
418 struct mlxsw_sp_rt6 {
419         struct list_head list;
420         struct fib6_info *rt;
421 };
422
423 struct mlxsw_sp_lpm_tree {
424         u8 id; /* tree ID */
425         unsigned int ref_count;
426         enum mlxsw_sp_l3proto proto;
427         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
428         struct mlxsw_sp_prefix_usage prefix_usage;
429 };
430
431 struct mlxsw_sp_fib {
432         struct rhashtable ht;
433         struct list_head node_list;
434         struct mlxsw_sp_vr *vr;
435         struct mlxsw_sp_lpm_tree *lpm_tree;
436         enum mlxsw_sp_l3proto proto;
437 };
438
439 struct mlxsw_sp_vr {
440         u16 id; /* virtual router ID */
441         u32 tb_id; /* kernel fib table id */
442         unsigned int rif_count;
443         struct mlxsw_sp_fib *fib4;
444         struct mlxsw_sp_fib *fib6;
445         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
446         struct mlxsw_sp_rif *ul_rif;
447         refcount_t ul_rif_refcnt;
448 };
449
450 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
451
452 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
453                                                 struct mlxsw_sp_vr *vr,
454                                                 enum mlxsw_sp_l3proto proto)
455 {
456         struct mlxsw_sp_lpm_tree *lpm_tree;
457         struct mlxsw_sp_fib *fib;
458         int err;
459
460         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
461         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
462         if (!fib)
463                 return ERR_PTR(-ENOMEM);
464         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
465         if (err)
466                 goto err_rhashtable_init;
467         INIT_LIST_HEAD(&fib->node_list);
468         fib->proto = proto;
469         fib->vr = vr;
470         fib->lpm_tree = lpm_tree;
471         mlxsw_sp_lpm_tree_hold(lpm_tree);
472         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
473         if (err)
474                 goto err_lpm_tree_bind;
475         return fib;
476
477 err_lpm_tree_bind:
478         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
479 err_rhashtable_init:
480         kfree(fib);
481         return ERR_PTR(err);
482 }
483
484 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
485                                  struct mlxsw_sp_fib *fib)
486 {
487         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
488         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
489         WARN_ON(!list_empty(&fib->node_list));
490         rhashtable_destroy(&fib->ht);
491         kfree(fib);
492 }
493
494 static struct mlxsw_sp_lpm_tree *
495 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
496 {
497         static struct mlxsw_sp_lpm_tree *lpm_tree;
498         int i;
499
500         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
501                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
502                 if (lpm_tree->ref_count == 0)
503                         return lpm_tree;
504         }
505         return NULL;
506 }
507
508 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
509                                    struct mlxsw_sp_lpm_tree *lpm_tree)
510 {
511         char ralta_pl[MLXSW_REG_RALTA_LEN];
512
513         mlxsw_reg_ralta_pack(ralta_pl, true,
514                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
515                              lpm_tree->id);
516         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
517 }
518
519 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
520                                    struct mlxsw_sp_lpm_tree *lpm_tree)
521 {
522         char ralta_pl[MLXSW_REG_RALTA_LEN];
523
524         mlxsw_reg_ralta_pack(ralta_pl, false,
525                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
526                              lpm_tree->id);
527         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
528 }
529
530 static int
531 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
532                                   struct mlxsw_sp_prefix_usage *prefix_usage,
533                                   struct mlxsw_sp_lpm_tree *lpm_tree)
534 {
535         char ralst_pl[MLXSW_REG_RALST_LEN];
536         u8 root_bin = 0;
537         u8 prefix;
538         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
539
540         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
541                 root_bin = prefix;
542
543         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
544         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
545                 if (prefix == 0)
546                         continue;
547                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
548                                          MLXSW_REG_RALST_BIN_NO_CHILD);
549                 last_prefix = prefix;
550         }
551         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
552 }
553
554 static struct mlxsw_sp_lpm_tree *
555 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
556                          struct mlxsw_sp_prefix_usage *prefix_usage,
557                          enum mlxsw_sp_l3proto proto)
558 {
559         struct mlxsw_sp_lpm_tree *lpm_tree;
560         int err;
561
562         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
563         if (!lpm_tree)
564                 return ERR_PTR(-EBUSY);
565         lpm_tree->proto = proto;
566         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
567         if (err)
568                 return ERR_PTR(err);
569
570         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
571                                                 lpm_tree);
572         if (err)
573                 goto err_left_struct_set;
574         memcpy(&lpm_tree->prefix_usage, prefix_usage,
575                sizeof(lpm_tree->prefix_usage));
576         memset(&lpm_tree->prefix_ref_count, 0,
577                sizeof(lpm_tree->prefix_ref_count));
578         lpm_tree->ref_count = 1;
579         return lpm_tree;
580
581 err_left_struct_set:
582         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
583         return ERR_PTR(err);
584 }
585
586 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
587                                       struct mlxsw_sp_lpm_tree *lpm_tree)
588 {
589         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
590 }
591
592 static struct mlxsw_sp_lpm_tree *
593 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
594                       struct mlxsw_sp_prefix_usage *prefix_usage,
595                       enum mlxsw_sp_l3proto proto)
596 {
597         struct mlxsw_sp_lpm_tree *lpm_tree;
598         int i;
599
600         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
601                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
602                 if (lpm_tree->ref_count != 0 &&
603                     lpm_tree->proto == proto &&
604                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
605                                              prefix_usage)) {
606                         mlxsw_sp_lpm_tree_hold(lpm_tree);
607                         return lpm_tree;
608                 }
609         }
610         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
611 }
612
613 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
614 {
615         lpm_tree->ref_count++;
616 }
617
618 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
619                                   struct mlxsw_sp_lpm_tree *lpm_tree)
620 {
621         if (--lpm_tree->ref_count == 0)
622                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
623 }
624
625 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
626
627 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
628 {
629         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
630         struct mlxsw_sp_lpm_tree *lpm_tree;
631         u64 max_trees;
632         int err, i;
633
634         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
635                 return -EIO;
636
637         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
638         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
639         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
640                                              sizeof(struct mlxsw_sp_lpm_tree),
641                                              GFP_KERNEL);
642         if (!mlxsw_sp->router->lpm.trees)
643                 return -ENOMEM;
644
645         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
646                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
647                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
648         }
649
650         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
651                                          MLXSW_SP_L3_PROTO_IPV4);
652         if (IS_ERR(lpm_tree)) {
653                 err = PTR_ERR(lpm_tree);
654                 goto err_ipv4_tree_get;
655         }
656         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
657
658         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
659                                          MLXSW_SP_L3_PROTO_IPV6);
660         if (IS_ERR(lpm_tree)) {
661                 err = PTR_ERR(lpm_tree);
662                 goto err_ipv6_tree_get;
663         }
664         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
665
666         return 0;
667
668 err_ipv6_tree_get:
669         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
670         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
671 err_ipv4_tree_get:
672         kfree(mlxsw_sp->router->lpm.trees);
673         return err;
674 }
675
676 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
677 {
678         struct mlxsw_sp_lpm_tree *lpm_tree;
679
680         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
681         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
682
683         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
684         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
685
686         kfree(mlxsw_sp->router->lpm.trees);
687 }
688
689 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
690 {
691         return !!vr->fib4 || !!vr->fib6 ||
692                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
693                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
694 }
695
696 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
697 {
698         struct mlxsw_sp_vr *vr;
699         int i;
700
701         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
702                 vr = &mlxsw_sp->router->vrs[i];
703                 if (!mlxsw_sp_vr_is_used(vr))
704                         return vr;
705         }
706         return NULL;
707 }
708
709 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
710                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
711 {
712         char raltb_pl[MLXSW_REG_RALTB_LEN];
713
714         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
715                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
716                              tree_id);
717         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
718 }
719
720 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
721                                        const struct mlxsw_sp_fib *fib)
722 {
723         char raltb_pl[MLXSW_REG_RALTB_LEN];
724
725         /* Bind to tree 0 which is default */
726         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
727                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
728         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
729 }
730
731 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
732 {
733         /* For our purpose, squash main, default and local tables into one */
734         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
735                 tb_id = RT_TABLE_MAIN;
736         return tb_id;
737 }
738
739 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
740                                             u32 tb_id)
741 {
742         struct mlxsw_sp_vr *vr;
743         int i;
744
745         tb_id = mlxsw_sp_fix_tb_id(tb_id);
746
747         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
748                 vr = &mlxsw_sp->router->vrs[i];
749                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
750                         return vr;
751         }
752         return NULL;
753 }
754
755 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
756                                 u16 *vr_id)
757 {
758         struct mlxsw_sp_vr *vr;
759
760         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
761         if (!vr)
762                 return -ESRCH;
763         *vr_id = vr->id;
764
765         return 0;
766 }
767
768 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
769                                             enum mlxsw_sp_l3proto proto)
770 {
771         switch (proto) {
772         case MLXSW_SP_L3_PROTO_IPV4:
773                 return vr->fib4;
774         case MLXSW_SP_L3_PROTO_IPV6:
775                 return vr->fib6;
776         }
777         return NULL;
778 }
779
780 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
781                                               u32 tb_id,
782                                               struct netlink_ext_ack *extack)
783 {
784         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
785         struct mlxsw_sp_fib *fib4;
786         struct mlxsw_sp_fib *fib6;
787         struct mlxsw_sp_vr *vr;
788         int err;
789
790         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
791         if (!vr) {
792                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
793                 return ERR_PTR(-EBUSY);
794         }
795         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
796         if (IS_ERR(fib4))
797                 return ERR_CAST(fib4);
798         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
799         if (IS_ERR(fib6)) {
800                 err = PTR_ERR(fib6);
801                 goto err_fib6_create;
802         }
803         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
804                                              MLXSW_SP_L3_PROTO_IPV4);
805         if (IS_ERR(mr4_table)) {
806                 err = PTR_ERR(mr4_table);
807                 goto err_mr4_table_create;
808         }
809         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
810                                              MLXSW_SP_L3_PROTO_IPV6);
811         if (IS_ERR(mr6_table)) {
812                 err = PTR_ERR(mr6_table);
813                 goto err_mr6_table_create;
814         }
815
816         vr->fib4 = fib4;
817         vr->fib6 = fib6;
818         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
819         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
820         vr->tb_id = tb_id;
821         return vr;
822
823 err_mr6_table_create:
824         mlxsw_sp_mr_table_destroy(mr4_table);
825 err_mr4_table_create:
826         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
827 err_fib6_create:
828         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
829         return ERR_PTR(err);
830 }
831
832 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
833                                 struct mlxsw_sp_vr *vr)
834 {
835         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
836         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
837         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
838         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
839         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
840         vr->fib6 = NULL;
841         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
842         vr->fib4 = NULL;
843 }
844
845 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
846                                            struct netlink_ext_ack *extack)
847 {
848         struct mlxsw_sp_vr *vr;
849
850         tb_id = mlxsw_sp_fix_tb_id(tb_id);
851         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
852         if (!vr)
853                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
854         return vr;
855 }
856
857 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
858 {
859         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
860             list_empty(&vr->fib6->node_list) &&
861             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
862             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
863                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
864 }
865
866 static bool
867 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
868                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
869 {
870         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
871
872         if (!mlxsw_sp_vr_is_used(vr))
873                 return false;
874         if (fib->lpm_tree->id == tree_id)
875                 return true;
876         return false;
877 }
878
879 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
880                                         struct mlxsw_sp_fib *fib,
881                                         struct mlxsw_sp_lpm_tree *new_tree)
882 {
883         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
884         int err;
885
886         fib->lpm_tree = new_tree;
887         mlxsw_sp_lpm_tree_hold(new_tree);
888         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
889         if (err)
890                 goto err_tree_bind;
891         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
892         return 0;
893
894 err_tree_bind:
895         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
896         fib->lpm_tree = old_tree;
897         return err;
898 }
899
900 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
901                                          struct mlxsw_sp_fib *fib,
902                                          struct mlxsw_sp_lpm_tree *new_tree)
903 {
904         enum mlxsw_sp_l3proto proto = fib->proto;
905         struct mlxsw_sp_lpm_tree *old_tree;
906         u8 old_id, new_id = new_tree->id;
907         struct mlxsw_sp_vr *vr;
908         int i, err;
909
910         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
911         old_id = old_tree->id;
912
913         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
914                 vr = &mlxsw_sp->router->vrs[i];
915                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
916                         continue;
917                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
918                                                    mlxsw_sp_vr_fib(vr, proto),
919                                                    new_tree);
920                 if (err)
921                         goto err_tree_replace;
922         }
923
924         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
925                sizeof(new_tree->prefix_ref_count));
926         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
927         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
928
929         return 0;
930
931 err_tree_replace:
932         for (i--; i >= 0; i--) {
933                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
934                         continue;
935                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
936                                              mlxsw_sp_vr_fib(vr, proto),
937                                              old_tree);
938         }
939         return err;
940 }
941
942 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
943 {
944         struct mlxsw_sp_vr *vr;
945         u64 max_vrs;
946         int i;
947
948         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
949                 return -EIO;
950
951         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
952         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
953                                         GFP_KERNEL);
954         if (!mlxsw_sp->router->vrs)
955                 return -ENOMEM;
956
957         for (i = 0; i < max_vrs; i++) {
958                 vr = &mlxsw_sp->router->vrs[i];
959                 vr->id = i;
960         }
961
962         return 0;
963 }
964
965 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
966
967 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
968 {
969         /* At this stage we're guaranteed not to have new incoming
970          * FIB notifications and the work queue is free from FIBs
971          * sitting on top of mlxsw netdevs. However, we can still
972          * have other FIBs queued. Flush the queue before flushing
973          * the device's tables. No need for locks, as we're the only
974          * writer.
975          */
976         mlxsw_core_flush_owq();
977         mlxsw_sp_router_fib_flush(mlxsw_sp);
978         kfree(mlxsw_sp->router->vrs);
979 }
980
981 static struct net_device *
982 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
983 {
984         struct ip_tunnel *tun = netdev_priv(ol_dev);
985         struct net *net = dev_net(ol_dev);
986
987         return __dev_get_by_index(net, tun->parms.link);
988 }
989
990 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
991 {
992         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
993
994         if (d)
995                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
996         else
997                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
998 }
999
1000 static struct mlxsw_sp_rif *
1001 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1002                     const struct mlxsw_sp_rif_params *params,
1003                     struct netlink_ext_ack *extack);
1004
1005 static struct mlxsw_sp_rif_ipip_lb *
1006 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1007                                 enum mlxsw_sp_ipip_type ipipt,
1008                                 struct net_device *ol_dev,
1009                                 struct netlink_ext_ack *extack)
1010 {
1011         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1012         const struct mlxsw_sp_ipip_ops *ipip_ops;
1013         struct mlxsw_sp_rif *rif;
1014
1015         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1016         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1017                 .common.dev = ol_dev,
1018                 .common.lag = false,
1019                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1020         };
1021
1022         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1023         if (IS_ERR(rif))
1024                 return ERR_CAST(rif);
1025         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1026 }
1027
1028 static struct mlxsw_sp_ipip_entry *
1029 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1030                           enum mlxsw_sp_ipip_type ipipt,
1031                           struct net_device *ol_dev)
1032 {
1033         const struct mlxsw_sp_ipip_ops *ipip_ops;
1034         struct mlxsw_sp_ipip_entry *ipip_entry;
1035         struct mlxsw_sp_ipip_entry *ret = NULL;
1036
1037         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1038         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1039         if (!ipip_entry)
1040                 return ERR_PTR(-ENOMEM);
1041
1042         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1043                                                             ol_dev, NULL);
1044         if (IS_ERR(ipip_entry->ol_lb)) {
1045                 ret = ERR_CAST(ipip_entry->ol_lb);
1046                 goto err_ol_ipip_lb_create;
1047         }
1048
1049         ipip_entry->ipipt = ipipt;
1050         ipip_entry->ol_dev = ol_dev;
1051
1052         switch (ipip_ops->ul_proto) {
1053         case MLXSW_SP_L3_PROTO_IPV4:
1054                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1055                 break;
1056         case MLXSW_SP_L3_PROTO_IPV6:
1057                 WARN_ON(1);
1058                 break;
1059         }
1060
1061         return ipip_entry;
1062
1063 err_ol_ipip_lb_create:
1064         kfree(ipip_entry);
1065         return ret;
1066 }
1067
1068 static void
1069 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1070 {
1071         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1072         kfree(ipip_entry);
1073 }
1074
1075 static bool
1076 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1077                                   const enum mlxsw_sp_l3proto ul_proto,
1078                                   union mlxsw_sp_l3addr saddr,
1079                                   u32 ul_tb_id,
1080                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1081 {
1082         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1083         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1084         union mlxsw_sp_l3addr tun_saddr;
1085
1086         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1087                 return false;
1088
1089         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1090         return tun_ul_tb_id == ul_tb_id &&
1091                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1092 }
1093
1094 static int
1095 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1096                               struct mlxsw_sp_fib_entry *fib_entry,
1097                               struct mlxsw_sp_ipip_entry *ipip_entry)
1098 {
1099         u32 tunnel_index;
1100         int err;
1101
1102         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1103                                   1, &tunnel_index);
1104         if (err)
1105                 return err;
1106
1107         ipip_entry->decap_fib_entry = fib_entry;
1108         fib_entry->decap.ipip_entry = ipip_entry;
1109         fib_entry->decap.tunnel_index = tunnel_index;
1110         return 0;
1111 }
1112
1113 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1114                                           struct mlxsw_sp_fib_entry *fib_entry)
1115 {
1116         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1117         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1118         fib_entry->decap.ipip_entry = NULL;
1119         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1120                            1, fib_entry->decap.tunnel_index);
1121 }
1122
1123 static struct mlxsw_sp_fib_node *
1124 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1125                          size_t addr_len, unsigned char prefix_len);
1126 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1127                                      struct mlxsw_sp_fib_entry *fib_entry);
1128
1129 static void
1130 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1131                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1132 {
1133         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1134
1135         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1136         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1137
1138         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1139 }
1140
1141 static void
1142 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1143                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1144                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1145 {
1146         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1147                                           ipip_entry))
1148                 return;
1149         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1150
1151         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1152                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1153 }
1154
1155 static struct mlxsw_sp_fib_entry *
1156 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1157                                      enum mlxsw_sp_l3proto proto,
1158                                      const union mlxsw_sp_l3addr *addr,
1159                                      enum mlxsw_sp_fib_entry_type type)
1160 {
1161         struct mlxsw_sp_fib_entry *fib_entry;
1162         struct mlxsw_sp_fib_node *fib_node;
1163         unsigned char addr_prefix_len;
1164         struct mlxsw_sp_fib *fib;
1165         struct mlxsw_sp_vr *vr;
1166         const void *addrp;
1167         size_t addr_len;
1168         u32 addr4;
1169
1170         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1171         if (!vr)
1172                 return NULL;
1173         fib = mlxsw_sp_vr_fib(vr, proto);
1174
1175         switch (proto) {
1176         case MLXSW_SP_L3_PROTO_IPV4:
1177                 addr4 = be32_to_cpu(addr->addr4);
1178                 addrp = &addr4;
1179                 addr_len = 4;
1180                 addr_prefix_len = 32;
1181                 break;
1182         case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1183         default:
1184                 WARN_ON(1);
1185                 return NULL;
1186         }
1187
1188         fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1189                                             addr_prefix_len);
1190         if (!fib_node || list_empty(&fib_node->entry_list))
1191                 return NULL;
1192
1193         fib_entry = list_first_entry(&fib_node->entry_list,
1194                                      struct mlxsw_sp_fib_entry, list);
1195         if (fib_entry->type != type)
1196                 return NULL;
1197
1198         return fib_entry;
1199 }
1200
1201 /* Given an IPIP entry, find the corresponding decap route. */
1202 static struct mlxsw_sp_fib_entry *
1203 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1204                                struct mlxsw_sp_ipip_entry *ipip_entry)
1205 {
1206         static struct mlxsw_sp_fib_node *fib_node;
1207         const struct mlxsw_sp_ipip_ops *ipip_ops;
1208         struct mlxsw_sp_fib_entry *fib_entry;
1209         unsigned char saddr_prefix_len;
1210         union mlxsw_sp_l3addr saddr;
1211         struct mlxsw_sp_fib *ul_fib;
1212         struct mlxsw_sp_vr *ul_vr;
1213         const void *saddrp;
1214         size_t saddr_len;
1215         u32 ul_tb_id;
1216         u32 saddr4;
1217
1218         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1219
1220         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1221         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1222         if (!ul_vr)
1223                 return NULL;
1224
1225         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1226         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1227                                            ipip_entry->ol_dev);
1228
1229         switch (ipip_ops->ul_proto) {
1230         case MLXSW_SP_L3_PROTO_IPV4:
1231                 saddr4 = be32_to_cpu(saddr.addr4);
1232                 saddrp = &saddr4;
1233                 saddr_len = 4;
1234                 saddr_prefix_len = 32;
1235                 break;
1236         case MLXSW_SP_L3_PROTO_IPV6:
1237                 WARN_ON(1);
1238                 return NULL;
1239         }
1240
1241         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1242                                             saddr_prefix_len);
1243         if (!fib_node || list_empty(&fib_node->entry_list))
1244                 return NULL;
1245
1246         fib_entry = list_first_entry(&fib_node->entry_list,
1247                                      struct mlxsw_sp_fib_entry, list);
1248         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1249                 return NULL;
1250
1251         return fib_entry;
1252 }
1253
1254 static struct mlxsw_sp_ipip_entry *
1255 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1256                            enum mlxsw_sp_ipip_type ipipt,
1257                            struct net_device *ol_dev)
1258 {
1259         struct mlxsw_sp_ipip_entry *ipip_entry;
1260
1261         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1262         if (IS_ERR(ipip_entry))
1263                 return ipip_entry;
1264
1265         list_add_tail(&ipip_entry->ipip_list_node,
1266                       &mlxsw_sp->router->ipip_list);
1267
1268         return ipip_entry;
1269 }
1270
1271 static void
1272 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1273                             struct mlxsw_sp_ipip_entry *ipip_entry)
1274 {
1275         list_del(&ipip_entry->ipip_list_node);
1276         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1277 }
1278
1279 static bool
1280 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1281                                   const struct net_device *ul_dev,
1282                                   enum mlxsw_sp_l3proto ul_proto,
1283                                   union mlxsw_sp_l3addr ul_dip,
1284                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1285 {
1286         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1287         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1288
1289         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1290                 return false;
1291
1292         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1293                                                  ul_tb_id, ipip_entry);
1294 }
1295
1296 /* Given decap parameters, find the corresponding IPIP entry. */
1297 static struct mlxsw_sp_ipip_entry *
1298 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1299                                   const struct net_device *ul_dev,
1300                                   enum mlxsw_sp_l3proto ul_proto,
1301                                   union mlxsw_sp_l3addr ul_dip)
1302 {
1303         struct mlxsw_sp_ipip_entry *ipip_entry;
1304
1305         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1306                             ipip_list_node)
1307                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1308                                                       ul_proto, ul_dip,
1309                                                       ipip_entry))
1310                         return ipip_entry;
1311
1312         return NULL;
1313 }
1314
1315 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1316                                       const struct net_device *dev,
1317                                       enum mlxsw_sp_ipip_type *p_type)
1318 {
1319         struct mlxsw_sp_router *router = mlxsw_sp->router;
1320         const struct mlxsw_sp_ipip_ops *ipip_ops;
1321         enum mlxsw_sp_ipip_type ipipt;
1322
1323         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1324                 ipip_ops = router->ipip_ops_arr[ipipt];
1325                 if (dev->type == ipip_ops->dev_type) {
1326                         if (p_type)
1327                                 *p_type = ipipt;
1328                         return true;
1329                 }
1330         }
1331         return false;
1332 }
1333
1334 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1335                                 const struct net_device *dev)
1336 {
1337         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1338 }
1339
1340 static struct mlxsw_sp_ipip_entry *
1341 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1342                                    const struct net_device *ol_dev)
1343 {
1344         struct mlxsw_sp_ipip_entry *ipip_entry;
1345
1346         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1347                             ipip_list_node)
1348                 if (ipip_entry->ol_dev == ol_dev)
1349                         return ipip_entry;
1350
1351         return NULL;
1352 }
1353
1354 static struct mlxsw_sp_ipip_entry *
1355 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1356                                    const struct net_device *ul_dev,
1357                                    struct mlxsw_sp_ipip_entry *start)
1358 {
1359         struct mlxsw_sp_ipip_entry *ipip_entry;
1360
1361         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1362                                         ipip_list_node);
1363         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1364                                      ipip_list_node) {
1365                 struct net_device *ipip_ul_dev =
1366                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1367
1368                 if (ipip_ul_dev == ul_dev)
1369                         return ipip_entry;
1370         }
1371
1372         return NULL;
1373 }
1374
1375 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1376                                 const struct net_device *dev)
1377 {
1378         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1379 }
1380
1381 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1382                                                 const struct net_device *ol_dev,
1383                                                 enum mlxsw_sp_ipip_type ipipt)
1384 {
1385         const struct mlxsw_sp_ipip_ops *ops
1386                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1387
1388         /* For deciding whether decap should be offloaded, we don't care about
1389          * overlay protocol, so ask whether either one is supported.
1390          */
1391         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1392                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1393 }
1394
1395 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1396                                                 struct net_device *ol_dev)
1397 {
1398         struct mlxsw_sp_ipip_entry *ipip_entry;
1399         enum mlxsw_sp_l3proto ul_proto;
1400         enum mlxsw_sp_ipip_type ipipt;
1401         union mlxsw_sp_l3addr saddr;
1402         u32 ul_tb_id;
1403
1404         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1405         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1406                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1407                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1408                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1409                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1410                                                           saddr, ul_tb_id,
1411                                                           NULL)) {
1412                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1413                                                                 ol_dev);
1414                         if (IS_ERR(ipip_entry))
1415                                 return PTR_ERR(ipip_entry);
1416                 }
1417         }
1418
1419         return 0;
1420 }
1421
1422 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1423                                                    struct net_device *ol_dev)
1424 {
1425         struct mlxsw_sp_ipip_entry *ipip_entry;
1426
1427         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1428         if (ipip_entry)
1429                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1430 }
1431
1432 static void
1433 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1434                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1435 {
1436         struct mlxsw_sp_fib_entry *decap_fib_entry;
1437
1438         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1439         if (decap_fib_entry)
1440                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1441                                                   decap_fib_entry);
1442 }
1443
1444 static int
1445 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1446                         u16 ul_rif_id, bool enable)
1447 {
1448         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1449         struct mlxsw_sp_rif *rif = &lb_rif->common;
1450         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1451         char ritr_pl[MLXSW_REG_RITR_LEN];
1452         u32 saddr4;
1453
1454         switch (lb_cf.ul_protocol) {
1455         case MLXSW_SP_L3_PROTO_IPV4:
1456                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1457                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1458                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1459                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1460                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1461                             ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1462                 break;
1463
1464         case MLXSW_SP_L3_PROTO_IPV6:
1465                 return -EAFNOSUPPORT;
1466         }
1467
1468         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1469 }
1470
1471 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1472                                                  struct net_device *ol_dev)
1473 {
1474         struct mlxsw_sp_ipip_entry *ipip_entry;
1475         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1476         int err = 0;
1477
1478         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1479         if (ipip_entry) {
1480                 lb_rif = ipip_entry->ol_lb;
1481                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1482                                               lb_rif->ul_rif_id, true);
1483                 if (err)
1484                         goto out;
1485                 lb_rif->common.mtu = ol_dev->mtu;
1486         }
1487
1488 out:
1489         return err;
1490 }
1491
1492 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1493                                                 struct net_device *ol_dev)
1494 {
1495         struct mlxsw_sp_ipip_entry *ipip_entry;
1496
1497         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1498         if (ipip_entry)
1499                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1500 }
1501
1502 static void
1503 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1504                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1505 {
1506         if (ipip_entry->decap_fib_entry)
1507                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1508 }
1509
1510 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1511                                                   struct net_device *ol_dev)
1512 {
1513         struct mlxsw_sp_ipip_entry *ipip_entry;
1514
1515         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1516         if (ipip_entry)
1517                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1518 }
1519
1520 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1521                                          struct mlxsw_sp_rif *old_rif,
1522                                          struct mlxsw_sp_rif *new_rif);
1523 static int
1524 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1525                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1526                                  bool keep_encap,
1527                                  struct netlink_ext_ack *extack)
1528 {
1529         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1530         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1531
1532         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1533                                                      ipip_entry->ipipt,
1534                                                      ipip_entry->ol_dev,
1535                                                      extack);
1536         if (IS_ERR(new_lb_rif))
1537                 return PTR_ERR(new_lb_rif);
1538         ipip_entry->ol_lb = new_lb_rif;
1539
1540         if (keep_encap)
1541                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1542                                              &new_lb_rif->common);
1543
1544         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1545
1546         return 0;
1547 }
1548
1549 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1550                                         struct mlxsw_sp_rif *rif);
1551
1552 /**
1553  * Update the offload related to an IPIP entry. This always updates decap, and
1554  * in addition to that it also:
1555  * @recreate_loopback: recreates the associated loopback RIF
1556  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1557  *              relevant when recreate_loopback is true.
1558  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1559  *                   is only relevant when recreate_loopback is false.
1560  */
1561 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1562                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1563                                         bool recreate_loopback,
1564                                         bool keep_encap,
1565                                         bool update_nexthops,
1566                                         struct netlink_ext_ack *extack)
1567 {
1568         int err;
1569
1570         /* RIFs can't be edited, so to update loopback, we need to destroy and
1571          * recreate it. That creates a window of opportunity where RALUE and
1572          * RATR registers end up referencing a RIF that's already gone. RATRs
1573          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1574          * of RALUE, demote the decap route back.
1575          */
1576         if (ipip_entry->decap_fib_entry)
1577                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1578
1579         if (recreate_loopback) {
1580                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1581                                                        keep_encap, extack);
1582                 if (err)
1583                         return err;
1584         } else if (update_nexthops) {
1585                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1586                                             &ipip_entry->ol_lb->common);
1587         }
1588
1589         if (ipip_entry->ol_dev->flags & IFF_UP)
1590                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1591
1592         return 0;
1593 }
1594
1595 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1596                                                 struct net_device *ol_dev,
1597                                                 struct netlink_ext_ack *extack)
1598 {
1599         struct mlxsw_sp_ipip_entry *ipip_entry =
1600                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1601         enum mlxsw_sp_l3proto ul_proto;
1602         union mlxsw_sp_l3addr saddr;
1603         u32 ul_tb_id;
1604
1605         if (!ipip_entry)
1606                 return 0;
1607
1608         /* For flat configuration cases, moving overlay to a different VRF might
1609          * cause local address conflict, and the conflicting tunnels need to be
1610          * demoted.
1611          */
1612         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1613         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1614         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1615         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1616                                                  saddr, ul_tb_id,
1617                                                  ipip_entry)) {
1618                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1619                 return 0;
1620         }
1621
1622         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1623                                                    true, false, false, extack);
1624 }
1625
1626 static int
1627 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1628                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1629                                      struct net_device *ul_dev,
1630                                      struct netlink_ext_ack *extack)
1631 {
1632         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1633                                                    true, true, false, extack);
1634 }
1635
1636 static int
1637 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1638                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1639                                     struct net_device *ul_dev)
1640 {
1641         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1642                                                    false, false, true, NULL);
1643 }
1644
1645 static int
1646 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1647                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1648                                       struct net_device *ul_dev)
1649 {
1650         /* A down underlay device causes encapsulated packets to not be
1651          * forwarded, but decap still works. So refresh next hops without
1652          * touching anything else.
1653          */
1654         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1655                                                    false, false, true, NULL);
1656 }
1657
1658 static int
1659 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1660                                         struct net_device *ol_dev,
1661                                         struct netlink_ext_ack *extack)
1662 {
1663         const struct mlxsw_sp_ipip_ops *ipip_ops;
1664         struct mlxsw_sp_ipip_entry *ipip_entry;
1665         int err;
1666
1667         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1668         if (!ipip_entry)
1669                 /* A change might make a tunnel eligible for offloading, but
1670                  * that is currently not implemented. What falls to slow path
1671                  * stays there.
1672                  */
1673                 return 0;
1674
1675         /* A change might make a tunnel not eligible for offloading. */
1676         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1677                                                  ipip_entry->ipipt)) {
1678                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1679                 return 0;
1680         }
1681
1682         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1683         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1684         return err;
1685 }
1686
1687 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1688                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1689 {
1690         struct net_device *ol_dev = ipip_entry->ol_dev;
1691
1692         if (ol_dev->flags & IFF_UP)
1693                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1694         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1695 }
1696
1697 /* The configuration where several tunnels have the same local address in the
1698  * same underlay table needs special treatment in the HW. That is currently not
1699  * implemented in the driver. This function finds and demotes the first tunnel
1700  * with a given source address, except the one passed in in the argument
1701  * `except'.
1702  */
1703 bool
1704 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1705                                      enum mlxsw_sp_l3proto ul_proto,
1706                                      union mlxsw_sp_l3addr saddr,
1707                                      u32 ul_tb_id,
1708                                      const struct mlxsw_sp_ipip_entry *except)
1709 {
1710         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1711
1712         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1713                                  ipip_list_node) {
1714                 if (ipip_entry != except &&
1715                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1716                                                       ul_tb_id, ipip_entry)) {
1717                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1718                         return true;
1719                 }
1720         }
1721
1722         return false;
1723 }
1724
1725 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1726                                                      struct net_device *ul_dev)
1727 {
1728         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1729
1730         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1731                                  ipip_list_node) {
1732                 struct net_device *ipip_ul_dev =
1733                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1734
1735                 if (ipip_ul_dev == ul_dev)
1736                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1737         }
1738 }
1739
1740 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1741                                      struct net_device *ol_dev,
1742                                      unsigned long event,
1743                                      struct netdev_notifier_info *info)
1744 {
1745         struct netdev_notifier_changeupper_info *chup;
1746         struct netlink_ext_ack *extack;
1747
1748         switch (event) {
1749         case NETDEV_REGISTER:
1750                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1751         case NETDEV_UNREGISTER:
1752                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1753                 return 0;
1754         case NETDEV_UP:
1755                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1756                 return 0;
1757         case NETDEV_DOWN:
1758                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1759                 return 0;
1760         case NETDEV_CHANGEUPPER:
1761                 chup = container_of(info, typeof(*chup), info);
1762                 extack = info->extack;
1763                 if (netif_is_l3_master(chup->upper_dev))
1764                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1765                                                                     ol_dev,
1766                                                                     extack);
1767                 return 0;
1768         case NETDEV_CHANGE:
1769                 extack = info->extack;
1770                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1771                                                                ol_dev, extack);
1772         case NETDEV_CHANGEMTU:
1773                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1774         }
1775         return 0;
1776 }
1777
1778 static int
1779 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1780                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1781                                    struct net_device *ul_dev,
1782                                    unsigned long event,
1783                                    struct netdev_notifier_info *info)
1784 {
1785         struct netdev_notifier_changeupper_info *chup;
1786         struct netlink_ext_ack *extack;
1787
1788         switch (event) {
1789         case NETDEV_CHANGEUPPER:
1790                 chup = container_of(info, typeof(*chup), info);
1791                 extack = info->extack;
1792                 if (netif_is_l3_master(chup->upper_dev))
1793                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1794                                                                     ipip_entry,
1795                                                                     ul_dev,
1796                                                                     extack);
1797                 break;
1798
1799         case NETDEV_UP:
1800                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1801                                                            ul_dev);
1802         case NETDEV_DOWN:
1803                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1804                                                              ipip_entry,
1805                                                              ul_dev);
1806         }
1807         return 0;
1808 }
1809
1810 int
1811 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1812                                  struct net_device *ul_dev,
1813                                  unsigned long event,
1814                                  struct netdev_notifier_info *info)
1815 {
1816         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1817         int err;
1818
1819         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1820                                                                 ul_dev,
1821                                                                 ipip_entry))) {
1822                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1823                                                          ul_dev, event, info);
1824                 if (err) {
1825                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1826                                                                  ul_dev);
1827                         return err;
1828                 }
1829         }
1830
1831         return 0;
1832 }
1833
1834 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1835                                       enum mlxsw_sp_l3proto ul_proto,
1836                                       const union mlxsw_sp_l3addr *ul_sip,
1837                                       u32 tunnel_index)
1838 {
1839         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1840         struct mlxsw_sp_fib_entry *fib_entry;
1841         int err;
1842
1843         /* It is valid to create a tunnel with a local IP and only later
1844          * assign this IP address to a local interface
1845          */
1846         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1847                                                          ul_proto, ul_sip,
1848                                                          type);
1849         if (!fib_entry)
1850                 return 0;
1851
1852         fib_entry->decap.tunnel_index = tunnel_index;
1853         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1854
1855         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1856         if (err)
1857                 goto err_fib_entry_update;
1858
1859         return 0;
1860
1861 err_fib_entry_update:
1862         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1863         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1864         return err;
1865 }
1866
1867 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1868                                       enum mlxsw_sp_l3proto ul_proto,
1869                                       const union mlxsw_sp_l3addr *ul_sip)
1870 {
1871         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1872         struct mlxsw_sp_fib_entry *fib_entry;
1873
1874         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1875                                                          ul_proto, ul_sip,
1876                                                          type);
1877         if (!fib_entry)
1878                 return;
1879
1880         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1881         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1882 }
1883
1884 struct mlxsw_sp_neigh_key {
1885         struct neighbour *n;
1886 };
1887
1888 struct mlxsw_sp_neigh_entry {
1889         struct list_head rif_list_node;
1890         struct rhash_head ht_node;
1891         struct mlxsw_sp_neigh_key key;
1892         u16 rif;
1893         bool connected;
1894         unsigned char ha[ETH_ALEN];
1895         struct list_head nexthop_list; /* list of nexthops using
1896                                         * this neigh entry
1897                                         */
1898         struct list_head nexthop_neighs_list_node;
1899         unsigned int counter_index;
1900         bool counter_valid;
1901 };
1902
1903 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1904         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1905         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1906         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1907 };
1908
1909 struct mlxsw_sp_neigh_entry *
1910 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1911                         struct mlxsw_sp_neigh_entry *neigh_entry)
1912 {
1913         if (!neigh_entry) {
1914                 if (list_empty(&rif->neigh_list))
1915                         return NULL;
1916                 else
1917                         return list_first_entry(&rif->neigh_list,
1918                                                 typeof(*neigh_entry),
1919                                                 rif_list_node);
1920         }
1921         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1922                 return NULL;
1923         return list_next_entry(neigh_entry, rif_list_node);
1924 }
1925
1926 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1927 {
1928         return neigh_entry->key.n->tbl->family;
1929 }
1930
1931 unsigned char *
1932 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1933 {
1934         return neigh_entry->ha;
1935 }
1936
1937 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1938 {
1939         struct neighbour *n;
1940
1941         n = neigh_entry->key.n;
1942         return ntohl(*((__be32 *) n->primary_key));
1943 }
1944
1945 struct in6_addr *
1946 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1947 {
1948         struct neighbour *n;
1949
1950         n = neigh_entry->key.n;
1951         return (struct in6_addr *) &n->primary_key;
1952 }
1953
1954 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1955                                struct mlxsw_sp_neigh_entry *neigh_entry,
1956                                u64 *p_counter)
1957 {
1958         if (!neigh_entry->counter_valid)
1959                 return -EINVAL;
1960
1961         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1962                                          p_counter, NULL);
1963 }
1964
1965 static struct mlxsw_sp_neigh_entry *
1966 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1967                            u16 rif)
1968 {
1969         struct mlxsw_sp_neigh_entry *neigh_entry;
1970
1971         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1972         if (!neigh_entry)
1973                 return NULL;
1974
1975         neigh_entry->key.n = n;
1976         neigh_entry->rif = rif;
1977         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1978
1979         return neigh_entry;
1980 }
1981
1982 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1983 {
1984         kfree(neigh_entry);
1985 }
1986
1987 static int
1988 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1989                             struct mlxsw_sp_neigh_entry *neigh_entry)
1990 {
1991         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1992                                       &neigh_entry->ht_node,
1993                                       mlxsw_sp_neigh_ht_params);
1994 }
1995
1996 static void
1997 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1998                             struct mlxsw_sp_neigh_entry *neigh_entry)
1999 {
2000         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2001                                &neigh_entry->ht_node,
2002                                mlxsw_sp_neigh_ht_params);
2003 }
2004
2005 static bool
2006 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2007                                     struct mlxsw_sp_neigh_entry *neigh_entry)
2008 {
2009         struct devlink *devlink;
2010         const char *table_name;
2011
2012         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2013         case AF_INET:
2014                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2015                 break;
2016         case AF_INET6:
2017                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2018                 break;
2019         default:
2020                 WARN_ON(1);
2021                 return false;
2022         }
2023
2024         devlink = priv_to_devlink(mlxsw_sp->core);
2025         return devlink_dpipe_table_counter_enabled(devlink, table_name);
2026 }
2027
2028 static void
2029 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2030                              struct mlxsw_sp_neigh_entry *neigh_entry)
2031 {
2032         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2033                 return;
2034
2035         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2036                 return;
2037
2038         neigh_entry->counter_valid = true;
2039 }
2040
2041 static void
2042 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2043                             struct mlxsw_sp_neigh_entry *neigh_entry)
2044 {
2045         if (!neigh_entry->counter_valid)
2046                 return;
2047         mlxsw_sp_flow_counter_free(mlxsw_sp,
2048                                    neigh_entry->counter_index);
2049         neigh_entry->counter_valid = false;
2050 }
2051
2052 static struct mlxsw_sp_neigh_entry *
2053 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2054 {
2055         struct mlxsw_sp_neigh_entry *neigh_entry;
2056         struct mlxsw_sp_rif *rif;
2057         int err;
2058
2059         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2060         if (!rif)
2061                 return ERR_PTR(-EINVAL);
2062
2063         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2064         if (!neigh_entry)
2065                 return ERR_PTR(-ENOMEM);
2066
2067         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2068         if (err)
2069                 goto err_neigh_entry_insert;
2070
2071         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2072         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2073
2074         return neigh_entry;
2075
2076 err_neigh_entry_insert:
2077         mlxsw_sp_neigh_entry_free(neigh_entry);
2078         return ERR_PTR(err);
2079 }
2080
2081 static void
2082 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2083                              struct mlxsw_sp_neigh_entry *neigh_entry)
2084 {
2085         list_del(&neigh_entry->rif_list_node);
2086         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2087         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2088         mlxsw_sp_neigh_entry_free(neigh_entry);
2089 }
2090
2091 static struct mlxsw_sp_neigh_entry *
2092 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2093 {
2094         struct mlxsw_sp_neigh_key key;
2095
2096         key.n = n;
2097         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2098                                       &key, mlxsw_sp_neigh_ht_params);
2099 }
2100
2101 static void
2102 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2103 {
2104         unsigned long interval;
2105
2106 #if IS_ENABLED(CONFIG_IPV6)
2107         interval = min_t(unsigned long,
2108                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2109                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2110 #else
2111         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2112 #endif
2113         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2114 }
2115
2116 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2117                                                    char *rauhtd_pl,
2118                                                    int ent_index)
2119 {
2120         struct net_device *dev;
2121         struct neighbour *n;
2122         __be32 dipn;
2123         u32 dip;
2124         u16 rif;
2125
2126         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2127
2128         if (!mlxsw_sp->router->rifs[rif]) {
2129                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2130                 return;
2131         }
2132
2133         dipn = htonl(dip);
2134         dev = mlxsw_sp->router->rifs[rif]->dev;
2135         n = neigh_lookup(&arp_tbl, &dipn, dev);
2136         if (!n)
2137                 return;
2138
2139         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2140         neigh_event_send(n, NULL);
2141         neigh_release(n);
2142 }
2143
2144 #if IS_ENABLED(CONFIG_IPV6)
2145 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2146                                                    char *rauhtd_pl,
2147                                                    int rec_index)
2148 {
2149         struct net_device *dev;
2150         struct neighbour *n;
2151         struct in6_addr dip;
2152         u16 rif;
2153
2154         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2155                                          (char *) &dip);
2156
2157         if (!mlxsw_sp->router->rifs[rif]) {
2158                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2159                 return;
2160         }
2161
2162         dev = mlxsw_sp->router->rifs[rif]->dev;
2163         n = neigh_lookup(&nd_tbl, &dip, dev);
2164         if (!n)
2165                 return;
2166
2167         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2168         neigh_event_send(n, NULL);
2169         neigh_release(n);
2170 }
2171 #else
2172 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2173                                                    char *rauhtd_pl,
2174                                                    int rec_index)
2175 {
2176 }
2177 #endif
2178
2179 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2180                                                    char *rauhtd_pl,
2181                                                    int rec_index)
2182 {
2183         u8 num_entries;
2184         int i;
2185
2186         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2187                                                                 rec_index);
2188         /* Hardware starts counting at 0, so add 1. */
2189         num_entries++;
2190
2191         /* Each record consists of several neighbour entries. */
2192         for (i = 0; i < num_entries; i++) {
2193                 int ent_index;
2194
2195                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2196                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2197                                                        ent_index);
2198         }
2199
2200 }
2201
2202 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2203                                                    char *rauhtd_pl,
2204                                                    int rec_index)
2205 {
2206         /* One record contains one entry. */
2207         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2208                                                rec_index);
2209 }
2210
2211 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2212                                               char *rauhtd_pl, int rec_index)
2213 {
2214         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2215         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2216                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2217                                                        rec_index);
2218                 break;
2219         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2220                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2221                                                        rec_index);
2222                 break;
2223         }
2224 }
2225
2226 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2227 {
2228         u8 num_rec, last_rec_index, num_entries;
2229
2230         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2231         last_rec_index = num_rec - 1;
2232
2233         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2234                 return false;
2235         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2236             MLXSW_REG_RAUHTD_TYPE_IPV6)
2237                 return true;
2238
2239         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2240                                                                 last_rec_index);
2241         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2242                 return true;
2243         return false;
2244 }
2245
2246 static int
2247 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2248                                        char *rauhtd_pl,
2249                                        enum mlxsw_reg_rauhtd_type type)
2250 {
2251         int i, num_rec;
2252         int err;
2253
2254         /* Make sure the neighbour's netdev isn't removed in the
2255          * process.
2256          */
2257         rtnl_lock();
2258         do {
2259                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2260                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2261                                       rauhtd_pl);
2262                 if (err) {
2263                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2264                         break;
2265                 }
2266                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2267                 for (i = 0; i < num_rec; i++)
2268                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2269                                                           i);
2270         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2271         rtnl_unlock();
2272
2273         return err;
2274 }
2275
2276 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2277 {
2278         enum mlxsw_reg_rauhtd_type type;
2279         char *rauhtd_pl;
2280         int err;
2281
2282         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2283         if (!rauhtd_pl)
2284                 return -ENOMEM;
2285
2286         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2287         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2288         if (err)
2289                 goto out;
2290
2291         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2292         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2293 out:
2294         kfree(rauhtd_pl);
2295         return err;
2296 }
2297
2298 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2299 {
2300         struct mlxsw_sp_neigh_entry *neigh_entry;
2301
2302         /* Take RTNL mutex here to prevent lists from changes */
2303         rtnl_lock();
2304         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2305                             nexthop_neighs_list_node)
2306                 /* If this neigh have nexthops, make the kernel think this neigh
2307                  * is active regardless of the traffic.
2308                  */
2309                 neigh_event_send(neigh_entry->key.n, NULL);
2310         rtnl_unlock();
2311 }
2312
2313 static void
2314 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2315 {
2316         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2317
2318         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2319                                msecs_to_jiffies(interval));
2320 }
2321
2322 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2323 {
2324         struct mlxsw_sp_router *router;
2325         int err;
2326
2327         router = container_of(work, struct mlxsw_sp_router,
2328                               neighs_update.dw.work);
2329         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2330         if (err)
2331                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2332
2333         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2334
2335         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2336 }
2337
2338 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2339 {
2340         struct mlxsw_sp_neigh_entry *neigh_entry;
2341         struct mlxsw_sp_router *router;
2342
2343         router = container_of(work, struct mlxsw_sp_router,
2344                               nexthop_probe_dw.work);
2345         /* Iterate over nexthop neighbours, find those who are unresolved and
2346          * send arp on them. This solves the chicken-egg problem when
2347          * the nexthop wouldn't get offloaded until the neighbor is resolved
2348          * but it wouldn't get resolved ever in case traffic is flowing in HW
2349          * using different nexthop.
2350          *
2351          * Take RTNL mutex here to prevent lists from changes.
2352          */
2353         rtnl_lock();
2354         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2355                             nexthop_neighs_list_node)
2356                 if (!neigh_entry->connected)
2357                         neigh_event_send(neigh_entry->key.n, NULL);
2358         rtnl_unlock();
2359
2360         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2361                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2362 }
2363
2364 static void
2365 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2366                               struct mlxsw_sp_neigh_entry *neigh_entry,
2367                               bool removing);
2368
2369 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2370 {
2371         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2372                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2373 }
2374
2375 static int
2376 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2377                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2378                                 enum mlxsw_reg_rauht_op op)
2379 {
2380         struct neighbour *n = neigh_entry->key.n;
2381         u32 dip = ntohl(*((__be32 *) n->primary_key));
2382         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2383
2384         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2385                               dip);
2386         if (neigh_entry->counter_valid)
2387                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2388                                              neigh_entry->counter_index);
2389         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2390 }
2391
2392 static int
2393 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2394                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2395                                 enum mlxsw_reg_rauht_op op)
2396 {
2397         struct neighbour *n = neigh_entry->key.n;
2398         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2399         const char *dip = n->primary_key;
2400
2401         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2402                               dip);
2403         if (neigh_entry->counter_valid)
2404                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2405                                              neigh_entry->counter_index);
2406         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2407 }
2408
2409 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2410 {
2411         struct neighbour *n = neigh_entry->key.n;
2412
2413         /* Packets with a link-local destination address are trapped
2414          * after LPM lookup and never reach the neighbour table, so
2415          * there is no need to program such neighbours to the device.
2416          */
2417         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2418             IPV6_ADDR_LINKLOCAL)
2419                 return true;
2420         return false;
2421 }
2422
2423 static void
2424 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2425                             struct mlxsw_sp_neigh_entry *neigh_entry,
2426                             bool adding)
2427 {
2428         enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2429         int err;
2430
2431         if (!adding && !neigh_entry->connected)
2432                 return;
2433         neigh_entry->connected = adding;
2434         if (neigh_entry->key.n->tbl->family == AF_INET) {
2435                 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2436                                                       op);
2437                 if (err)
2438                         return;
2439         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2440                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2441                         return;
2442                 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2443                                                       op);
2444                 if (err)
2445                         return;
2446         } else {
2447                 WARN_ON_ONCE(1);
2448                 return;
2449         }
2450
2451         if (adding)
2452                 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2453         else
2454                 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2455 }
2456
2457 void
2458 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2459                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2460                                     bool adding)
2461 {
2462         if (adding)
2463                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2464         else
2465                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2466         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2467 }
2468
2469 struct mlxsw_sp_netevent_work {
2470         struct work_struct work;
2471         struct mlxsw_sp *mlxsw_sp;
2472         struct neighbour *n;
2473 };
2474
2475 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2476 {
2477         struct mlxsw_sp_netevent_work *net_work =
2478                 container_of(work, struct mlxsw_sp_netevent_work, work);
2479         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2480         struct mlxsw_sp_neigh_entry *neigh_entry;
2481         struct neighbour *n = net_work->n;
2482         unsigned char ha[ETH_ALEN];
2483         bool entry_connected;
2484         u8 nud_state, dead;
2485
2486         /* If these parameters are changed after we release the lock,
2487          * then we are guaranteed to receive another event letting us
2488          * know about it.
2489          */
2490         read_lock_bh(&n->lock);
2491         memcpy(ha, n->ha, ETH_ALEN);
2492         nud_state = n->nud_state;
2493         dead = n->dead;
2494         read_unlock_bh(&n->lock);
2495
2496         rtnl_lock();
2497         mlxsw_sp_span_respin(mlxsw_sp);
2498
2499         entry_connected = nud_state & NUD_VALID && !dead;
2500         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2501         if (!entry_connected && !neigh_entry)
2502                 goto out;
2503         if (!neigh_entry) {
2504                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2505                 if (IS_ERR(neigh_entry))
2506                         goto out;
2507         }
2508
2509         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2510         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2511         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2512
2513         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2514                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2515
2516 out:
2517         rtnl_unlock();
2518         neigh_release(n);
2519         kfree(net_work);
2520 }
2521
2522 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2523
2524 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2525 {
2526         struct mlxsw_sp_netevent_work *net_work =
2527                 container_of(work, struct mlxsw_sp_netevent_work, work);
2528         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2529
2530         mlxsw_sp_mp_hash_init(mlxsw_sp);
2531         kfree(net_work);
2532 }
2533
2534 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2535
2536 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2537 {
2538         struct mlxsw_sp_netevent_work *net_work =
2539                 container_of(work, struct mlxsw_sp_netevent_work, work);
2540         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2541
2542         __mlxsw_sp_router_init(mlxsw_sp);
2543         kfree(net_work);
2544 }
2545
2546 static int mlxsw_sp_router_schedule_work(struct net *net,
2547                                          struct notifier_block *nb,
2548                                          void (*cb)(struct work_struct *))
2549 {
2550         struct mlxsw_sp_netevent_work *net_work;
2551         struct mlxsw_sp_router *router;
2552
2553         if (!net_eq(net, &init_net))
2554                 return NOTIFY_DONE;
2555
2556         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2557         if (!net_work)
2558                 return NOTIFY_BAD;
2559
2560         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2561         INIT_WORK(&net_work->work, cb);
2562         net_work->mlxsw_sp = router->mlxsw_sp;
2563         mlxsw_core_schedule_work(&net_work->work);
2564         return NOTIFY_DONE;
2565 }
2566
2567 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2568                                           unsigned long event, void *ptr)
2569 {
2570         struct mlxsw_sp_netevent_work *net_work;
2571         struct mlxsw_sp_port *mlxsw_sp_port;
2572         struct mlxsw_sp *mlxsw_sp;
2573         unsigned long interval;
2574         struct neigh_parms *p;
2575         struct neighbour *n;
2576
2577         switch (event) {
2578         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2579                 p = ptr;
2580
2581                 /* We don't care about changes in the default table. */
2582                 if (!p->dev || (p->tbl->family != AF_INET &&
2583                                 p->tbl->family != AF_INET6))
2584                         return NOTIFY_DONE;
2585
2586                 /* We are in atomic context and can't take RTNL mutex,
2587                  * so use RCU variant to walk the device chain.
2588                  */
2589                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2590                 if (!mlxsw_sp_port)
2591                         return NOTIFY_DONE;
2592
2593                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2594                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2595                 mlxsw_sp->router->neighs_update.interval = interval;
2596
2597                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2598                 break;
2599         case NETEVENT_NEIGH_UPDATE:
2600                 n = ptr;
2601
2602                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2603                         return NOTIFY_DONE;
2604
2605                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2606                 if (!mlxsw_sp_port)
2607                         return NOTIFY_DONE;
2608
2609                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2610                 if (!net_work) {
2611                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2612                         return NOTIFY_BAD;
2613                 }
2614
2615                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2616                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2617                 net_work->n = n;
2618
2619                 /* Take a reference to ensure the neighbour won't be
2620                  * destructed until we drop the reference in delayed
2621                  * work.
2622                  */
2623                 neigh_clone(n);
2624                 mlxsw_core_schedule_work(&net_work->work);
2625                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2626                 break;
2627         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2628         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2629                 return mlxsw_sp_router_schedule_work(ptr, nb,
2630                                 mlxsw_sp_router_mp_hash_event_work);
2631
2632         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2633                 return mlxsw_sp_router_schedule_work(ptr, nb,
2634                                 mlxsw_sp_router_update_priority_work);
2635         }
2636
2637         return NOTIFY_DONE;
2638 }
2639
2640 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2641 {
2642         int err;
2643
2644         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2645                               &mlxsw_sp_neigh_ht_params);
2646         if (err)
2647                 return err;
2648
2649         /* Initialize the polling interval according to the default
2650          * table.
2651          */
2652         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2653
2654         /* Create the delayed works for the activity_update */
2655         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2656                           mlxsw_sp_router_neighs_update_work);
2657         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2658                           mlxsw_sp_router_probe_unresolved_nexthops);
2659         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2660         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2661         return 0;
2662 }
2663
2664 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2665 {
2666         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2667         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2668         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2669 }
2670
2671 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2672                                          struct mlxsw_sp_rif *rif)
2673 {
2674         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2675
2676         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2677                                  rif_list_node) {
2678                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2679                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2680         }
2681 }
2682
2683 enum mlxsw_sp_nexthop_type {
2684         MLXSW_SP_NEXTHOP_TYPE_ETH,
2685         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2686 };
2687
2688 struct mlxsw_sp_nexthop_key {
2689         struct fib_nh *fib_nh;
2690 };
2691
2692 struct mlxsw_sp_nexthop {
2693         struct list_head neigh_list_node; /* member of neigh entry list */
2694         struct list_head rif_list_node;
2695         struct list_head router_list_node;
2696         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2697                                                 * this belongs to
2698                                                 */
2699         struct rhash_head ht_node;
2700         struct mlxsw_sp_nexthop_key key;
2701         unsigned char gw_addr[sizeof(struct in6_addr)];
2702         int ifindex;
2703         int nh_weight;
2704         int norm_nh_weight;
2705         int num_adj_entries;
2706         struct mlxsw_sp_rif *rif;
2707         u8 should_offload:1, /* set indicates this neigh is connected and
2708                               * should be put to KVD linear area of this group.
2709                               */
2710            offloaded:1, /* set in case the neigh is actually put into
2711                          * KVD linear area of this group.
2712                          */
2713            update:1; /* set indicates that MAC of this neigh should be
2714                       * updated in HW
2715                       */
2716         enum mlxsw_sp_nexthop_type type;
2717         union {
2718                 struct mlxsw_sp_neigh_entry *neigh_entry;
2719                 struct mlxsw_sp_ipip_entry *ipip_entry;
2720         };
2721         unsigned int counter_index;
2722         bool counter_valid;
2723 };
2724
2725 struct mlxsw_sp_nexthop_group {
2726         void *priv;
2727         struct rhash_head ht_node;
2728         struct list_head fib_list; /* list of fib entries that use this group */
2729         struct neigh_table *neigh_tbl;
2730         u8 adj_index_valid:1,
2731            gateway:1; /* routes using the group use a gateway */
2732         u32 adj_index;
2733         u16 ecmp_size;
2734         u16 count;
2735         int sum_norm_weight;
2736         struct mlxsw_sp_nexthop nexthops[0];
2737 #define nh_rif  nexthops[0].rif
2738 };
2739
2740 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2741                                     struct mlxsw_sp_nexthop *nh)
2742 {
2743         struct devlink *devlink;
2744
2745         devlink = priv_to_devlink(mlxsw_sp->core);
2746         if (!devlink_dpipe_table_counter_enabled(devlink,
2747                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2748                 return;
2749
2750         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2751                 return;
2752
2753         nh->counter_valid = true;
2754 }
2755
2756 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2757                                    struct mlxsw_sp_nexthop *nh)
2758 {
2759         if (!nh->counter_valid)
2760                 return;
2761         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2762         nh->counter_valid = false;
2763 }
2764
2765 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2766                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2767 {
2768         if (!nh->counter_valid)
2769                 return -EINVAL;
2770
2771         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2772                                          p_counter, NULL);
2773 }
2774
2775 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2776                                                struct mlxsw_sp_nexthop *nh)
2777 {
2778         if (!nh) {
2779                 if (list_empty(&router->nexthop_list))
2780                         return NULL;
2781                 else
2782                         return list_first_entry(&router->nexthop_list,
2783                                                 typeof(*nh), router_list_node);
2784         }
2785         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2786                 return NULL;
2787         return list_next_entry(nh, router_list_node);
2788 }
2789
2790 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2791 {
2792         return nh->offloaded;
2793 }
2794
2795 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2796 {
2797         if (!nh->offloaded)
2798                 return NULL;
2799         return nh->neigh_entry->ha;
2800 }
2801
2802 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2803                              u32 *p_adj_size, u32 *p_adj_hash_index)
2804 {
2805         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2806         u32 adj_hash_index = 0;
2807         int i;
2808
2809         if (!nh->offloaded || !nh_grp->adj_index_valid)
2810                 return -EINVAL;
2811
2812         *p_adj_index = nh_grp->adj_index;
2813         *p_adj_size = nh_grp->ecmp_size;
2814
2815         for (i = 0; i < nh_grp->count; i++) {
2816                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2817
2818                 if (nh_iter == nh)
2819                         break;
2820                 if (nh_iter->offloaded)
2821                         adj_hash_index += nh_iter->num_adj_entries;
2822         }
2823
2824         *p_adj_hash_index = adj_hash_index;
2825         return 0;
2826 }
2827
2828 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2829 {
2830         return nh->rif;
2831 }
2832
2833 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2834 {
2835         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2836         int i;
2837
2838         for (i = 0; i < nh_grp->count; i++) {
2839                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2840
2841                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2842                         return true;
2843         }
2844         return false;
2845 }
2846
2847 static struct fib_info *
2848 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2849 {
2850         return nh_grp->priv;
2851 }
2852
2853 struct mlxsw_sp_nexthop_group_cmp_arg {
2854         enum mlxsw_sp_l3proto proto;
2855         union {
2856                 struct fib_info *fi;
2857                 struct mlxsw_sp_fib6_entry *fib6_entry;
2858         };
2859 };
2860
2861 static bool
2862 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2863                                     const struct in6_addr *gw, int ifindex,
2864                                     int weight)
2865 {
2866         int i;
2867
2868         for (i = 0; i < nh_grp->count; i++) {
2869                 const struct mlxsw_sp_nexthop *nh;
2870
2871                 nh = &nh_grp->nexthops[i];
2872                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2873                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2874                         return true;
2875         }
2876
2877         return false;
2878 }
2879
2880 static bool
2881 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2882                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2883 {
2884         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2885
2886         if (nh_grp->count != fib6_entry->nrt6)
2887                 return false;
2888
2889         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2890                 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
2891                 struct in6_addr *gw;
2892                 int ifindex, weight;
2893
2894                 ifindex = fib6_nh->fib_nh_dev->ifindex;
2895                 weight = fib6_nh->fib_nh_weight;
2896                 gw = &fib6_nh->fib_nh_gw6;
2897                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2898                                                          weight))
2899                         return false;
2900         }
2901
2902         return true;
2903 }
2904
2905 static int
2906 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2907 {
2908         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2909         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2910
2911         switch (cmp_arg->proto) {
2912         case MLXSW_SP_L3_PROTO_IPV4:
2913                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2914         case MLXSW_SP_L3_PROTO_IPV6:
2915                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2916                                                     cmp_arg->fib6_entry);
2917         default:
2918                 WARN_ON(1);
2919                 return 1;
2920         }
2921 }
2922
2923 static int
2924 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2925 {
2926         return nh_grp->neigh_tbl->family;
2927 }
2928
2929 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2930 {
2931         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2932         const struct mlxsw_sp_nexthop *nh;
2933         struct fib_info *fi;
2934         unsigned int val;
2935         int i;
2936
2937         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2938         case AF_INET:
2939                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2940                 return jhash(&fi, sizeof(fi), seed);
2941         case AF_INET6:
2942                 val = nh_grp->count;
2943                 for (i = 0; i < nh_grp->count; i++) {
2944                         nh = &nh_grp->nexthops[i];
2945                         val ^= nh->ifindex;
2946                 }
2947                 return jhash(&val, sizeof(val), seed);
2948         default:
2949                 WARN_ON(1);
2950                 return 0;
2951         }
2952 }
2953
2954 static u32
2955 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2956 {
2957         unsigned int val = fib6_entry->nrt6;
2958         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2959         struct net_device *dev;
2960
2961         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2962                 dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
2963                 val ^= dev->ifindex;
2964         }
2965
2966         return jhash(&val, sizeof(val), seed);
2967 }
2968
2969 static u32
2970 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2971 {
2972         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2973
2974         switch (cmp_arg->proto) {
2975         case MLXSW_SP_L3_PROTO_IPV4:
2976                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2977         case MLXSW_SP_L3_PROTO_IPV6:
2978                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2979         default:
2980                 WARN_ON(1);
2981                 return 0;
2982         }
2983 }
2984
2985 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2986         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2987         .hashfn      = mlxsw_sp_nexthop_group_hash,
2988         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2989         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2990 };
2991
2992 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2993                                          struct mlxsw_sp_nexthop_group *nh_grp)
2994 {
2995         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2996             !nh_grp->gateway)
2997                 return 0;
2998
2999         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3000                                       &nh_grp->ht_node,
3001                                       mlxsw_sp_nexthop_group_ht_params);
3002 }
3003
3004 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3005                                           struct mlxsw_sp_nexthop_group *nh_grp)
3006 {
3007         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3008             !nh_grp->gateway)
3009                 return;
3010
3011         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3012                                &nh_grp->ht_node,
3013                                mlxsw_sp_nexthop_group_ht_params);
3014 }
3015
3016 static struct mlxsw_sp_nexthop_group *
3017 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3018                                struct fib_info *fi)
3019 {
3020         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3021
3022         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3023         cmp_arg.fi = fi;
3024         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3025                                       &cmp_arg,
3026                                       mlxsw_sp_nexthop_group_ht_params);
3027 }
3028
3029 static struct mlxsw_sp_nexthop_group *
3030 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3031                                struct mlxsw_sp_fib6_entry *fib6_entry)
3032 {
3033         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3034
3035         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3036         cmp_arg.fib6_entry = fib6_entry;
3037         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3038                                       &cmp_arg,
3039                                       mlxsw_sp_nexthop_group_ht_params);
3040 }
3041
3042 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3043         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3044         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3045         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3046 };
3047
3048 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3049                                    struct mlxsw_sp_nexthop *nh)
3050 {
3051         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3052                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3053 }
3054
3055 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3056                                     struct mlxsw_sp_nexthop *nh)
3057 {
3058         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3059                                mlxsw_sp_nexthop_ht_params);
3060 }
3061
3062 static struct mlxsw_sp_nexthop *
3063 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3064                         struct mlxsw_sp_nexthop_key key)
3065 {
3066         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3067                                       mlxsw_sp_nexthop_ht_params);
3068 }
3069
3070 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3071                                              const struct mlxsw_sp_fib *fib,
3072                                              u32 adj_index, u16 ecmp_size,
3073                                              u32 new_adj_index,
3074                                              u16 new_ecmp_size)
3075 {
3076         char raleu_pl[MLXSW_REG_RALEU_LEN];
3077
3078         mlxsw_reg_raleu_pack(raleu_pl,
3079                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
3080                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
3081                              new_ecmp_size);
3082         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3083 }
3084
3085 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3086                                           struct mlxsw_sp_nexthop_group *nh_grp,
3087                                           u32 old_adj_index, u16 old_ecmp_size)
3088 {
3089         struct mlxsw_sp_fib_entry *fib_entry;
3090         struct mlxsw_sp_fib *fib = NULL;
3091         int err;
3092
3093         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3094                 if (fib == fib_entry->fib_node->fib)
3095                         continue;
3096                 fib = fib_entry->fib_node->fib;
3097                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3098                                                         old_adj_index,
3099                                                         old_ecmp_size,
3100                                                         nh_grp->adj_index,
3101                                                         nh_grp->ecmp_size);
3102                 if (err)
3103                         return err;
3104         }
3105         return 0;
3106 }
3107
3108 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3109                                      struct mlxsw_sp_nexthop *nh)
3110 {
3111         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3112         char ratr_pl[MLXSW_REG_RATR_LEN];
3113
3114         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3115                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
3116                             adj_index, neigh_entry->rif);
3117         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3118         if (nh->counter_valid)
3119                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3120         else
3121                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3122
3123         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3124 }
3125
3126 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3127                             struct mlxsw_sp_nexthop *nh)
3128 {
3129         int i;
3130
3131         for (i = 0; i < nh->num_adj_entries; i++) {
3132                 int err;
3133
3134                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3135                 if (err)
3136                         return err;
3137         }
3138
3139         return 0;
3140 }
3141
3142 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3143                                           u32 adj_index,
3144                                           struct mlxsw_sp_nexthop *nh)
3145 {
3146         const struct mlxsw_sp_ipip_ops *ipip_ops;
3147
3148         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3149         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3150 }
3151
3152 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3153                                         u32 adj_index,
3154                                         struct mlxsw_sp_nexthop *nh)
3155 {
3156         int i;
3157
3158         for (i = 0; i < nh->num_adj_entries; i++) {
3159                 int err;
3160
3161                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3162                                                      nh);
3163                 if (err)
3164                         return err;
3165         }
3166
3167         return 0;
3168 }
3169
3170 static int
3171 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3172                               struct mlxsw_sp_nexthop_group *nh_grp,
3173                               bool reallocate)
3174 {
3175         u32 adj_index = nh_grp->adj_index; /* base */
3176         struct mlxsw_sp_nexthop *nh;
3177         int i;
3178         int err;
3179
3180         for (i = 0; i < nh_grp->count; i++) {
3181                 nh = &nh_grp->nexthops[i];
3182
3183                 if (!nh->should_offload) {
3184                         nh->offloaded = 0;
3185                         continue;
3186                 }
3187
3188                 if (nh->update || reallocate) {
3189                         switch (nh->type) {
3190                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3191                                 err = mlxsw_sp_nexthop_update
3192                                             (mlxsw_sp, adj_index, nh);
3193                                 break;
3194                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3195                                 err = mlxsw_sp_nexthop_ipip_update
3196                                             (mlxsw_sp, adj_index, nh);
3197                                 break;
3198                         }
3199                         if (err)
3200                                 return err;
3201                         nh->update = 0;
3202                         nh->offloaded = 1;
3203                 }
3204                 adj_index += nh->num_adj_entries;
3205         }
3206         return 0;
3207 }
3208
3209 static bool
3210 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3211                                  const struct mlxsw_sp_fib_entry *fib_entry);
3212
3213 static int
3214 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3215                                     struct mlxsw_sp_nexthop_group *nh_grp)
3216 {
3217         struct mlxsw_sp_fib_entry *fib_entry;
3218         int err;
3219
3220         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3221                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3222                                                       fib_entry))
3223                         continue;
3224                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3225                 if (err)
3226                         return err;
3227         }
3228         return 0;
3229 }
3230
3231 static void
3232 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3233                                    enum mlxsw_reg_ralue_op op, int err);
3234
3235 static void
3236 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3237 {
3238         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3239         struct mlxsw_sp_fib_entry *fib_entry;
3240
3241         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3242                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3243                                                       fib_entry))
3244                         continue;
3245                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3246         }
3247 }
3248
3249 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3250 {
3251         /* Valid sizes for an adjacency group are:
3252          * 1-64, 512, 1024, 2048 and 4096.
3253          */
3254         if (*p_adj_grp_size <= 64)
3255                 return;
3256         else if (*p_adj_grp_size <= 512)
3257                 *p_adj_grp_size = 512;
3258         else if (*p_adj_grp_size <= 1024)
3259                 *p_adj_grp_size = 1024;
3260         else if (*p_adj_grp_size <= 2048)
3261                 *p_adj_grp_size = 2048;
3262         else
3263                 *p_adj_grp_size = 4096;
3264 }
3265
3266 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3267                                              unsigned int alloc_size)
3268 {
3269         if (alloc_size >= 4096)
3270                 *p_adj_grp_size = 4096;
3271         else if (alloc_size >= 2048)
3272                 *p_adj_grp_size = 2048;
3273         else if (alloc_size >= 1024)
3274                 *p_adj_grp_size = 1024;
3275         else if (alloc_size >= 512)
3276                 *p_adj_grp_size = 512;
3277 }
3278
3279 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3280                                      u16 *p_adj_grp_size)
3281 {
3282         unsigned int alloc_size;
3283         int err;
3284
3285         /* Round up the requested group size to the next size supported
3286          * by the device and make sure the request can be satisfied.
3287          */
3288         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3289         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3290                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3291                                               *p_adj_grp_size, &alloc_size);
3292         if (err)
3293                 return err;
3294         /* It is possible the allocation results in more allocated
3295          * entries than requested. Try to use as much of them as
3296          * possible.
3297          */
3298         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3299
3300         return 0;
3301 }
3302
3303 static void
3304 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3305 {
3306         int i, g = 0, sum_norm_weight = 0;
3307         struct mlxsw_sp_nexthop *nh;
3308
3309         for (i = 0; i < nh_grp->count; i++) {
3310                 nh = &nh_grp->nexthops[i];
3311
3312                 if (!nh->should_offload)
3313                         continue;
3314                 if (g > 0)
3315                         g = gcd(nh->nh_weight, g);
3316                 else
3317                         g = nh->nh_weight;
3318         }
3319
3320         for (i = 0; i < nh_grp->count; i++) {
3321                 nh = &nh_grp->nexthops[i];
3322
3323                 if (!nh->should_offload)
3324                         continue;
3325                 nh->norm_nh_weight = nh->nh_weight / g;
3326                 sum_norm_weight += nh->norm_nh_weight;
3327         }
3328
3329         nh_grp->sum_norm_weight = sum_norm_weight;
3330 }
3331
3332 static void
3333 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3334 {
3335         int total = nh_grp->sum_norm_weight;
3336         u16 ecmp_size = nh_grp->ecmp_size;
3337         int i, weight = 0, lower_bound = 0;
3338
3339         for (i = 0; i < nh_grp->count; i++) {
3340                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3341                 int upper_bound;
3342
3343                 if (!nh->should_offload)
3344                         continue;
3345                 weight += nh->norm_nh_weight;
3346                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3347                 nh->num_adj_entries = upper_bound - lower_bound;
3348                 lower_bound = upper_bound;
3349         }
3350 }
3351
3352 static void
3353 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3354                                struct mlxsw_sp_nexthop_group *nh_grp)
3355 {
3356         u16 ecmp_size, old_ecmp_size;
3357         struct mlxsw_sp_nexthop *nh;
3358         bool offload_change = false;
3359         u32 adj_index;
3360         bool old_adj_index_valid;
3361         u32 old_adj_index;
3362         int i;
3363         int err;
3364
3365         if (!nh_grp->gateway) {
3366                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3367                 return;
3368         }
3369
3370         for (i = 0; i < nh_grp->count; i++) {
3371                 nh = &nh_grp->nexthops[i];
3372
3373                 if (nh->should_offload != nh->offloaded) {
3374                         offload_change = true;
3375                         if (nh->should_offload)
3376                                 nh->update = 1;
3377                 }
3378         }
3379         if (!offload_change) {
3380                 /* Nothing was added or removed, so no need to reallocate. Just
3381                  * update MAC on existing adjacency indexes.
3382                  */
3383                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3384                 if (err) {
3385                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3386                         goto set_trap;
3387                 }
3388                 return;
3389         }
3390         mlxsw_sp_nexthop_group_normalize(nh_grp);
3391         if (!nh_grp->sum_norm_weight)
3392                 /* No neigh of this group is connected so we just set
3393                  * the trap and let everthing flow through kernel.
3394                  */
3395                 goto set_trap;
3396
3397         ecmp_size = nh_grp->sum_norm_weight;
3398         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3399         if (err)
3400                 /* No valid allocation size available. */
3401                 goto set_trap;
3402
3403         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3404                                   ecmp_size, &adj_index);
3405         if (err) {
3406                 /* We ran out of KVD linear space, just set the
3407                  * trap and let everything flow through kernel.
3408                  */
3409                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3410                 goto set_trap;
3411         }
3412         old_adj_index_valid = nh_grp->adj_index_valid;
3413         old_adj_index = nh_grp->adj_index;
3414         old_ecmp_size = nh_grp->ecmp_size;
3415         nh_grp->adj_index_valid = 1;
3416         nh_grp->adj_index = adj_index;
3417         nh_grp->ecmp_size = ecmp_size;
3418         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3419         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3420         if (err) {
3421                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3422                 goto set_trap;
3423         }
3424
3425         if (!old_adj_index_valid) {
3426                 /* The trap was set for fib entries, so we have to call
3427                  * fib entry update to unset it and use adjacency index.
3428                  */
3429                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3430                 if (err) {
3431                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3432                         goto set_trap;
3433                 }
3434                 return;
3435         }
3436
3437         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3438                                              old_adj_index, old_ecmp_size);
3439         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3440                            old_ecmp_size, old_adj_index);
3441         if (err) {
3442                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3443                 goto set_trap;
3444         }
3445
3446         /* Offload state within the group changed, so update the flags. */
3447         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3448
3449         return;
3450
3451 set_trap:
3452         old_adj_index_valid = nh_grp->adj_index_valid;
3453         nh_grp->adj_index_valid = 0;
3454         for (i = 0; i < nh_grp->count; i++) {
3455                 nh = &nh_grp->nexthops[i];
3456                 nh->offloaded = 0;
3457         }
3458         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3459         if (err)
3460                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3461         if (old_adj_index_valid)
3462                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3463                                    nh_grp->ecmp_size, nh_grp->adj_index);
3464 }
3465
3466 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3467                                             bool removing)
3468 {
3469         if (!removing)
3470                 nh->should_offload = 1;
3471         else
3472                 nh->should_offload = 0;
3473         nh->update = 1;
3474 }
3475
3476 static void
3477 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3478                               struct mlxsw_sp_neigh_entry *neigh_entry,
3479                               bool removing)
3480 {
3481         struct mlxsw_sp_nexthop *nh;
3482
3483         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3484                             neigh_list_node) {
3485                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3486                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3487         }
3488 }
3489
3490 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3491                                       struct mlxsw_sp_rif *rif)
3492 {
3493         if (nh->rif)
3494                 return;
3495
3496         nh->rif = rif;
3497         list_add(&nh->rif_list_node, &rif->nexthop_list);
3498 }
3499
3500 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3501 {
3502         if (!nh->rif)
3503                 return;
3504
3505         list_del(&nh->rif_list_node);
3506         nh->rif = NULL;
3507 }
3508
3509 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3510                                        struct mlxsw_sp_nexthop *nh)
3511 {
3512         struct mlxsw_sp_neigh_entry *neigh_entry;
3513         struct neighbour *n;
3514         u8 nud_state, dead;
3515         int err;
3516
3517         if (!nh->nh_grp->gateway || nh->neigh_entry)
3518                 return 0;
3519
3520         /* Take a reference of neigh here ensuring that neigh would
3521          * not be destructed before the nexthop entry is finished.
3522          * The reference is taken either in neigh_lookup() or
3523          * in neigh_create() in case n is not found.
3524          */
3525         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3526         if (!n) {
3527                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3528                                  nh->rif->dev);
3529                 if (IS_ERR(n))
3530                         return PTR_ERR(n);
3531                 neigh_event_send(n, NULL);
3532         }
3533         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3534         if (!neigh_entry) {
3535                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3536                 if (IS_ERR(neigh_entry)) {
3537                         err = -EINVAL;
3538                         goto err_neigh_entry_create;
3539                 }
3540         }
3541
3542         /* If that is the first nexthop connected to that neigh, add to
3543          * nexthop_neighs_list
3544          */
3545         if (list_empty(&neigh_entry->nexthop_list))
3546                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3547                               &mlxsw_sp->router->nexthop_neighs_list);
3548
3549         nh->neigh_entry = neigh_entry;
3550         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3551         read_lock_bh(&n->lock);
3552         nud_state = n->nud_state;
3553         dead = n->dead;
3554         read_unlock_bh(&n->lock);
3555         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3556
3557         return 0;
3558
3559 err_neigh_entry_create:
3560         neigh_release(n);
3561         return err;
3562 }
3563
3564 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3565                                         struct mlxsw_sp_nexthop *nh)
3566 {
3567         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3568         struct neighbour *n;
3569
3570         if (!neigh_entry)
3571                 return;
3572         n = neigh_entry->key.n;
3573
3574         __mlxsw_sp_nexthop_neigh_update(nh, true);
3575         list_del(&nh->neigh_list_node);
3576         nh->neigh_entry = NULL;
3577
3578         /* If that is the last nexthop connected to that neigh, remove from
3579          * nexthop_neighs_list
3580          */
3581         if (list_empty(&neigh_entry->nexthop_list))
3582                 list_del(&neigh_entry->nexthop_neighs_list_node);
3583
3584         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3585                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3586
3587         neigh_release(n);
3588 }
3589
3590 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3591 {
3592         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3593
3594         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3595 }
3596
3597 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3598                                        struct mlxsw_sp_nexthop *nh,
3599                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3600 {
3601         bool removing;
3602
3603         if (!nh->nh_grp->gateway || nh->ipip_entry)
3604                 return;
3605
3606         nh->ipip_entry = ipip_entry;
3607         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3608         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3609         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3610 }
3611
3612 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3613                                        struct mlxsw_sp_nexthop *nh)
3614 {
3615         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3616
3617         if (!ipip_entry)
3618                 return;
3619
3620         __mlxsw_sp_nexthop_neigh_update(nh, true);
3621         nh->ipip_entry = NULL;
3622 }
3623
3624 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3625                                         const struct fib_nh *fib_nh,
3626                                         enum mlxsw_sp_ipip_type *p_ipipt)
3627 {
3628         struct net_device *dev = fib_nh->fib_nh_dev;
3629
3630         return dev &&
3631                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3632                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3633 }
3634
3635 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3636                                        struct mlxsw_sp_nexthop *nh)
3637 {
3638         switch (nh->type) {
3639         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3640                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3641                 mlxsw_sp_nexthop_rif_fini(nh);
3642                 break;
3643         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3644                 mlxsw_sp_nexthop_rif_fini(nh);
3645                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3646                 break;
3647         }
3648 }
3649
3650 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3651                                        struct mlxsw_sp_nexthop *nh,
3652                                        struct fib_nh *fib_nh)
3653 {
3654         const struct mlxsw_sp_ipip_ops *ipip_ops;
3655         struct net_device *dev = fib_nh->fib_nh_dev;
3656         struct mlxsw_sp_ipip_entry *ipip_entry;
3657         struct mlxsw_sp_rif *rif;
3658         int err;
3659
3660         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3661         if (ipip_entry) {
3662                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3663                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3664                                           MLXSW_SP_L3_PROTO_IPV4)) {
3665                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3666                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3667                         return 0;
3668                 }
3669         }
3670
3671         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3672         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3673         if (!rif)
3674                 return 0;
3675
3676         mlxsw_sp_nexthop_rif_init(nh, rif);
3677         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3678         if (err)
3679                 goto err_neigh_init;
3680
3681         return 0;
3682
3683 err_neigh_init:
3684         mlxsw_sp_nexthop_rif_fini(nh);
3685         return err;
3686 }
3687
3688 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3689                                         struct mlxsw_sp_nexthop *nh)
3690 {
3691         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3692 }
3693
3694 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3695                                   struct mlxsw_sp_nexthop_group *nh_grp,
3696                                   struct mlxsw_sp_nexthop *nh,
3697                                   struct fib_nh *fib_nh)
3698 {
3699         struct net_device *dev = fib_nh->fib_nh_dev;
3700         struct in_device *in_dev;
3701         int err;
3702
3703         nh->nh_grp = nh_grp;
3704         nh->key.fib_nh = fib_nh;
3705 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3706         nh->nh_weight = fib_nh->fib_nh_weight;
3707 #else
3708         nh->nh_weight = 1;
3709 #endif
3710         memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3711         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3712         if (err)
3713                 return err;
3714
3715         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3716         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3717
3718         if (!dev)
3719                 return 0;
3720
3721         in_dev = __in_dev_get_rtnl(dev);
3722         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3723             fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
3724                 return 0;
3725
3726         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3727         if (err)
3728                 goto err_nexthop_neigh_init;
3729
3730         return 0;
3731
3732 err_nexthop_neigh_init:
3733         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3734         return err;
3735 }
3736
3737 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3738                                    struct mlxsw_sp_nexthop *nh)
3739 {
3740         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3741         list_del(&nh->router_list_node);
3742         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3743         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3744 }
3745
3746 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3747                                     unsigned long event, struct fib_nh *fib_nh)
3748 {
3749         struct mlxsw_sp_nexthop_key key;
3750         struct mlxsw_sp_nexthop *nh;
3751
3752         if (mlxsw_sp->router->aborted)
3753                 return;
3754
3755         key.fib_nh = fib_nh;
3756         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3757         if (WARN_ON_ONCE(!nh))
3758                 return;
3759
3760         switch (event) {
3761         case FIB_EVENT_NH_ADD:
3762                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3763                 break;
3764         case FIB_EVENT_NH_DEL:
3765                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3766                 break;
3767         }
3768
3769         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3770 }
3771
3772 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3773                                         struct mlxsw_sp_rif *rif)
3774 {
3775         struct mlxsw_sp_nexthop *nh;
3776         bool removing;
3777
3778         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3779                 switch (nh->type) {
3780                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3781                         removing = false;
3782                         break;
3783                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3784                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3785                         break;
3786                 default:
3787                         WARN_ON(1);
3788                         continue;
3789                 }
3790
3791                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3792                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3793         }
3794 }
3795
3796 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3797                                          struct mlxsw_sp_rif *old_rif,
3798                                          struct mlxsw_sp_rif *new_rif)
3799 {
3800         struct mlxsw_sp_nexthop *nh;
3801
3802         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3803         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3804                 nh->rif = new_rif;
3805         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3806 }
3807
3808 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3809                                            struct mlxsw_sp_rif *rif)
3810 {
3811         struct mlxsw_sp_nexthop *nh, *tmp;
3812
3813         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3814                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3815                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3816         }
3817 }
3818
3819 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3820                                    struct fib_info *fi)
3821 {
3822         const struct fib_nh *nh = fib_info_nh(fi, 0);
3823
3824         return nh->fib_nh_scope == RT_SCOPE_LINK ||
3825                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
3826 }
3827
3828 static struct mlxsw_sp_nexthop_group *
3829 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3830 {
3831         unsigned int nhs = fib_info_num_path(fi);
3832         struct mlxsw_sp_nexthop_group *nh_grp;
3833         struct mlxsw_sp_nexthop *nh;
3834         struct fib_nh *fib_nh;
3835         int i;
3836         int err;
3837
3838         nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
3839         if (!nh_grp)
3840                 return ERR_PTR(-ENOMEM);
3841         nh_grp->priv = fi;
3842         INIT_LIST_HEAD(&nh_grp->fib_list);
3843         nh_grp->neigh_tbl = &arp_tbl;
3844
3845         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3846         nh_grp->count = nhs;
3847         fib_info_hold(fi);
3848         for (i = 0; i < nh_grp->count; i++) {
3849                 nh = &nh_grp->nexthops[i];
3850                 fib_nh = fib_info_nh(fi, i);
3851                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3852                 if (err)
3853                         goto err_nexthop4_init;
3854         }
3855         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3856         if (err)
3857                 goto err_nexthop_group_insert;
3858         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3859         return nh_grp;
3860
3861 err_nexthop_group_insert:
3862 err_nexthop4_init:
3863         for (i--; i >= 0; i--) {
3864                 nh = &nh_grp->nexthops[i];
3865                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3866         }
3867         fib_info_put(fi);
3868         kfree(nh_grp);
3869         return ERR_PTR(err);
3870 }
3871
3872 static void
3873 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3874                                 struct mlxsw_sp_nexthop_group *nh_grp)
3875 {
3876         struct mlxsw_sp_nexthop *nh;
3877         int i;
3878
3879         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3880         for (i = 0; i < nh_grp->count; i++) {
3881                 nh = &nh_grp->nexthops[i];
3882                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3883         }
3884         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3885         WARN_ON_ONCE(nh_grp->adj_index_valid);
3886         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3887         kfree(nh_grp);
3888 }
3889
3890 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3891                                        struct mlxsw_sp_fib_entry *fib_entry,
3892                                        struct fib_info *fi)
3893 {
3894         struct mlxsw_sp_nexthop_group *nh_grp;
3895
3896         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3897         if (!nh_grp) {
3898                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3899                 if (IS_ERR(nh_grp))
3900                         return PTR_ERR(nh_grp);
3901         }
3902         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3903         fib_entry->nh_group = nh_grp;
3904         return 0;
3905 }
3906
3907 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3908                                         struct mlxsw_sp_fib_entry *fib_entry)
3909 {
3910         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3911
3912         list_del(&fib_entry->nexthop_group_node);
3913         if (!list_empty(&nh_grp->fib_list))
3914                 return;
3915         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3916 }
3917
3918 static bool
3919 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3920 {
3921         struct mlxsw_sp_fib4_entry *fib4_entry;
3922
3923         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3924                                   common);
3925         return !fib4_entry->tos;
3926 }
3927
3928 static bool
3929 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3930 {
3931         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3932
3933         switch (fib_entry->fib_node->fib->proto) {
3934         case MLXSW_SP_L3_PROTO_IPV4:
3935                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3936                         return false;
3937                 break;
3938         case MLXSW_SP_L3_PROTO_IPV6:
3939                 break;
3940         }
3941
3942         switch (fib_entry->type) {
3943         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3944                 return !!nh_group->adj_index_valid;
3945         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3946                 return !!nh_group->nh_rif;
3947         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
3948         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3949         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3950                 return true;
3951         default:
3952                 return false;
3953         }
3954 }
3955
3956 static struct mlxsw_sp_nexthop *
3957 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3958                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3959 {
3960         int i;
3961
3962         for (i = 0; i < nh_grp->count; i++) {
3963                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3964                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3965
3966                 if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
3967                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3968                                     &rt->fib6_nh->fib_nh_gw6))
3969                         return nh;
3970                 continue;
3971         }
3972
3973         return NULL;
3974 }
3975
3976 static void
3977 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3978 {
3979         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3980         int i;
3981
3982         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3983             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
3984             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3985             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3986                 nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3987                 return;
3988         }
3989
3990         for (i = 0; i < nh_grp->count; i++) {
3991                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3992
3993                 if (nh->offloaded)
3994                         nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3995                 else
3996                         nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3997         }
3998 }
3999
4000 static void
4001 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4002 {
4003         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4004         int i;
4005
4006         if (!list_is_singular(&nh_grp->fib_list))
4007                 return;
4008
4009         for (i = 0; i < nh_grp->count; i++) {
4010                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4011
4012                 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4013         }
4014 }
4015
4016 static void
4017 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4018 {
4019         struct mlxsw_sp_fib6_entry *fib6_entry;
4020         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4021
4022         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4023                                   common);
4024
4025         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4026             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
4027                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4028                                  list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4029                 return;
4030         }
4031
4032         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4033                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4034                 struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
4035                 struct mlxsw_sp_nexthop *nh;
4036
4037                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4038                 if (nh && nh->offloaded)
4039                         fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4040                 else
4041                         fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4042         }
4043 }
4044
4045 static void
4046 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4047 {
4048         struct mlxsw_sp_fib6_entry *fib6_entry;
4049         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4050
4051         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4052                                   common);
4053         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4054                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4055
4056                 rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4057         }
4058 }
4059
4060 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4061 {
4062         switch (fib_entry->fib_node->fib->proto) {
4063         case MLXSW_SP_L3_PROTO_IPV4:
4064                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
4065                 break;
4066         case MLXSW_SP_L3_PROTO_IPV6:
4067                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
4068                 break;
4069         }
4070 }
4071
4072 static void
4073 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4074 {
4075         switch (fib_entry->fib_node->fib->proto) {
4076         case MLXSW_SP_L3_PROTO_IPV4:
4077                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4078                 break;
4079         case MLXSW_SP_L3_PROTO_IPV6:
4080                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4081                 break;
4082         }
4083 }
4084
4085 static void
4086 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4087                                    enum mlxsw_reg_ralue_op op, int err)
4088 {
4089         switch (op) {
4090         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4091                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4092         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4093                 if (err)
4094                         return;
4095                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4096                         mlxsw_sp_fib_entry_offload_set(fib_entry);
4097                 else
4098                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
4099                 return;
4100         default:
4101                 return;
4102         }
4103 }
4104
4105 static void
4106 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4107                               const struct mlxsw_sp_fib_entry *fib_entry,
4108                               enum mlxsw_reg_ralue_op op)
4109 {
4110         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4111         enum mlxsw_reg_ralxx_protocol proto;
4112         u32 *p_dip;
4113
4114         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4115
4116         switch (fib->proto) {
4117         case MLXSW_SP_L3_PROTO_IPV4:
4118                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4119                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4120                                       fib_entry->fib_node->key.prefix_len,
4121                                       *p_dip);
4122                 break;
4123         case MLXSW_SP_L3_PROTO_IPV6:
4124                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4125                                       fib_entry->fib_node->key.prefix_len,
4126                                       fib_entry->fib_node->key.addr);
4127                 break;
4128         }
4129 }
4130
4131 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4132                                         struct mlxsw_sp_fib_entry *fib_entry,
4133                                         enum mlxsw_reg_ralue_op op)
4134 {
4135         char ralue_pl[MLXSW_REG_RALUE_LEN];
4136         enum mlxsw_reg_ralue_trap_action trap_action;
4137         u16 trap_id = 0;
4138         u32 adjacency_index = 0;
4139         u16 ecmp_size = 0;
4140
4141         /* In case the nexthop group adjacency index is valid, use it
4142          * with provided ECMP size. Otherwise, setup trap and pass
4143          * traffic to kernel.
4144          */
4145         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4146                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4147                 adjacency_index = fib_entry->nh_group->adj_index;
4148                 ecmp_size = fib_entry->nh_group->ecmp_size;
4149         } else {
4150                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4151                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4152         }
4153
4154         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4155         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4156                                         adjacency_index, ecmp_size);
4157         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4158 }
4159
4160 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4161                                        struct mlxsw_sp_fib_entry *fib_entry,
4162                                        enum mlxsw_reg_ralue_op op)
4163 {
4164         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4165         enum mlxsw_reg_ralue_trap_action trap_action;
4166         char ralue_pl[MLXSW_REG_RALUE_LEN];
4167         u16 trap_id = 0;
4168         u16 rif_index = 0;
4169
4170         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4171                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4172                 rif_index = rif->rif_index;
4173         } else {
4174                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4175                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4176         }
4177
4178         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4179         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4180                                        rif_index);
4181         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4182 }
4183
4184 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4185                                       struct mlxsw_sp_fib_entry *fib_entry,
4186                                       enum mlxsw_reg_ralue_op op)
4187 {
4188         char ralue_pl[MLXSW_REG_RALUE_LEN];
4189
4190         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4191         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4192         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4193 }
4194
4195 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4196                                            struct mlxsw_sp_fib_entry *fib_entry,
4197                                            enum mlxsw_reg_ralue_op op)
4198 {
4199         enum mlxsw_reg_ralue_trap_action trap_action;
4200         char ralue_pl[MLXSW_REG_RALUE_LEN];
4201
4202         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4203         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4204         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4205         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4206 }
4207
4208 static int
4209 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4210                                  struct mlxsw_sp_fib_entry *fib_entry,
4211                                  enum mlxsw_reg_ralue_op op)
4212 {
4213         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4214         const struct mlxsw_sp_ipip_ops *ipip_ops;
4215
4216         if (WARN_ON(!ipip_entry))
4217                 return -EINVAL;
4218
4219         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4220         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4221                                       fib_entry->decap.tunnel_index);
4222 }
4223
4224 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4225                                            struct mlxsw_sp_fib_entry *fib_entry,
4226                                            enum mlxsw_reg_ralue_op op)
4227 {
4228         char ralue_pl[MLXSW_REG_RALUE_LEN];
4229
4230         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4231         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4232                                            fib_entry->decap.tunnel_index);
4233         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4234 }
4235
4236 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4237                                    struct mlxsw_sp_fib_entry *fib_entry,
4238                                    enum mlxsw_reg_ralue_op op)
4239 {
4240         switch (fib_entry->type) {
4241         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4242                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4243         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4244                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4245         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4246                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4247         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4248                 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4249         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4250                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4251                                                         fib_entry, op);
4252         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4253                 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4254         }
4255         return -EINVAL;
4256 }
4257
4258 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4259                                  struct mlxsw_sp_fib_entry *fib_entry,
4260                                  enum mlxsw_reg_ralue_op op)
4261 {
4262         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4263
4264         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4265
4266         return err;
4267 }
4268
4269 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4270                                      struct mlxsw_sp_fib_entry *fib_entry)
4271 {
4272         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4273                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4274 }
4275
4276 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4277                                   struct mlxsw_sp_fib_entry *fib_entry)
4278 {
4279         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4280                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4281 }
4282
4283 static int
4284 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4285                              const struct fib_entry_notifier_info *fen_info,
4286                              struct mlxsw_sp_fib_entry *fib_entry)
4287 {
4288         struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
4289         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4290         u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4291         struct mlxsw_sp_ipip_entry *ipip_entry;
4292         struct fib_info *fi = fen_info->fi;
4293
4294         switch (fen_info->type) {
4295         case RTN_LOCAL:
4296                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4297                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4298                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4299                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4300                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4301                                                              fib_entry,
4302                                                              ipip_entry);
4303                 }
4304                 if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4305                                                      dip.addr4)) {
4306                         u32 t_index;
4307
4308                         t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4309                         fib_entry->decap.tunnel_index = t_index;
4310                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4311                         return 0;
4312                 }
4313                 /* fall through */
4314         case RTN_BROADCAST:
4315                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4316                 return 0;
4317         case RTN_BLACKHOLE:
4318                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4319                 return 0;
4320         case RTN_UNREACHABLE: /* fall through */
4321         case RTN_PROHIBIT:
4322                 /* Packets hitting these routes need to be trapped, but
4323                  * can do so with a lower priority than packets directed
4324                  * at the host, so use action type local instead of trap.
4325                  */
4326                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4327                 return 0;
4328         case RTN_UNICAST:
4329                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4330                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4331                 else
4332                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4333                 return 0;
4334         default:
4335                 return -EINVAL;
4336         }
4337 }
4338
4339 static struct mlxsw_sp_fib4_entry *
4340 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4341                            struct mlxsw_sp_fib_node *fib_node,
4342                            const struct fib_entry_notifier_info *fen_info)
4343 {
4344         struct mlxsw_sp_fib4_entry *fib4_entry;
4345         struct mlxsw_sp_fib_entry *fib_entry;
4346         int err;
4347
4348         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4349         if (!fib4_entry)
4350                 return ERR_PTR(-ENOMEM);
4351         fib_entry = &fib4_entry->common;
4352
4353         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4354         if (err)
4355                 goto err_fib4_entry_type_set;
4356
4357         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4358         if (err)
4359                 goto err_nexthop4_group_get;
4360
4361         fib4_entry->prio = fen_info->fi->fib_priority;
4362         fib4_entry->tb_id = fen_info->tb_id;
4363         fib4_entry->type = fen_info->type;
4364         fib4_entry->tos = fen_info->tos;
4365
4366         fib_entry->fib_node = fib_node;
4367
4368         return fib4_entry;
4369
4370 err_nexthop4_group_get:
4371 err_fib4_entry_type_set:
4372         kfree(fib4_entry);
4373         return ERR_PTR(err);
4374 }
4375
4376 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4377                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4378 {
4379         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4380         kfree(fib4_entry);
4381 }
4382
4383 static struct mlxsw_sp_fib4_entry *
4384 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4385                            const struct fib_entry_notifier_info *fen_info)
4386 {
4387         struct mlxsw_sp_fib4_entry *fib4_entry;
4388         struct mlxsw_sp_fib_node *fib_node;
4389         struct mlxsw_sp_fib *fib;
4390         struct mlxsw_sp_vr *vr;
4391
4392         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4393         if (!vr)
4394                 return NULL;
4395         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4396
4397         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4398                                             sizeof(fen_info->dst),
4399                                             fen_info->dst_len);
4400         if (!fib_node)
4401                 return NULL;
4402
4403         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4404                 if (fib4_entry->tb_id == fen_info->tb_id &&
4405                     fib4_entry->tos == fen_info->tos &&
4406                     fib4_entry->type == fen_info->type &&
4407                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4408                     fen_info->fi) {
4409                         return fib4_entry;
4410                 }
4411         }
4412
4413         return NULL;
4414 }
4415
4416 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4417         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4418         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4419         .key_len = sizeof(struct mlxsw_sp_fib_key),
4420         .automatic_shrinking = true,
4421 };
4422
4423 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4424                                     struct mlxsw_sp_fib_node *fib_node)
4425 {
4426         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4427                                       mlxsw_sp_fib_ht_params);
4428 }
4429
4430 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4431                                      struct mlxsw_sp_fib_node *fib_node)
4432 {
4433         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4434                                mlxsw_sp_fib_ht_params);
4435 }
4436
4437 static struct mlxsw_sp_fib_node *
4438 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4439                          size_t addr_len, unsigned char prefix_len)
4440 {
4441         struct mlxsw_sp_fib_key key;
4442
4443         memset(&key, 0, sizeof(key));
4444         memcpy(key.addr, addr, addr_len);
4445         key.prefix_len = prefix_len;
4446         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4447 }
4448
4449 static struct mlxsw_sp_fib_node *
4450 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4451                          size_t addr_len, unsigned char prefix_len)
4452 {
4453         struct mlxsw_sp_fib_node *fib_node;
4454
4455         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4456         if (!fib_node)
4457                 return NULL;
4458
4459         INIT_LIST_HEAD(&fib_node->entry_list);
4460         list_add(&fib_node->list, &fib->node_list);
4461         memcpy(fib_node->key.addr, addr, addr_len);
4462         fib_node->key.prefix_len = prefix_len;
4463
4464         return fib_node;
4465 }
4466
4467 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4468 {
4469         list_del(&fib_node->list);
4470         WARN_ON(!list_empty(&fib_node->entry_list));
4471         kfree(fib_node);
4472 }
4473
4474 static bool
4475 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4476                                  const struct mlxsw_sp_fib_entry *fib_entry)
4477 {
4478         return list_first_entry(&fib_node->entry_list,
4479                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4480 }
4481
4482 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4483                                       struct mlxsw_sp_fib_node *fib_node)
4484 {
4485         struct mlxsw_sp_prefix_usage req_prefix_usage;
4486         struct mlxsw_sp_fib *fib = fib_node->fib;
4487         struct mlxsw_sp_lpm_tree *lpm_tree;
4488         int err;
4489
4490         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4491         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4492                 goto out;
4493
4494         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4495         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4496         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4497                                          fib->proto);
4498         if (IS_ERR(lpm_tree))
4499                 return PTR_ERR(lpm_tree);
4500
4501         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4502         if (err)
4503                 goto err_lpm_tree_replace;
4504
4505 out:
4506         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4507         return 0;
4508
4509 err_lpm_tree_replace:
4510         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4511         return err;
4512 }
4513
4514 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4515                                          struct mlxsw_sp_fib_node *fib_node)
4516 {
4517         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4518         struct mlxsw_sp_prefix_usage req_prefix_usage;
4519         struct mlxsw_sp_fib *fib = fib_node->fib;
4520         int err;
4521
4522         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4523                 return;
4524         /* Try to construct a new LPM tree from the current prefix usage
4525          * minus the unused one. If we fail, continue using the old one.
4526          */
4527         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4528         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4529                                     fib_node->key.prefix_len);
4530         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4531                                          fib->proto);
4532         if (IS_ERR(lpm_tree))
4533                 return;
4534
4535         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4536         if (err)
4537                 goto err_lpm_tree_replace;
4538
4539         return;
4540
4541 err_lpm_tree_replace:
4542         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4543 }
4544
4545 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4546                                   struct mlxsw_sp_fib_node *fib_node,
4547                                   struct mlxsw_sp_fib *fib)
4548 {
4549         int err;
4550
4551         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4552         if (err)
4553                 return err;
4554         fib_node->fib = fib;
4555
4556         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4557         if (err)
4558                 goto err_fib_lpm_tree_link;
4559
4560         return 0;
4561
4562 err_fib_lpm_tree_link:
4563         fib_node->fib = NULL;
4564         mlxsw_sp_fib_node_remove(fib, fib_node);
4565         return err;
4566 }
4567
4568 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4569                                    struct mlxsw_sp_fib_node *fib_node)
4570 {
4571         struct mlxsw_sp_fib *fib = fib_node->fib;
4572
4573         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4574         fib_node->fib = NULL;
4575         mlxsw_sp_fib_node_remove(fib, fib_node);
4576 }
4577
4578 static struct mlxsw_sp_fib_node *
4579 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4580                       size_t addr_len, unsigned char prefix_len,
4581                       enum mlxsw_sp_l3proto proto)
4582 {
4583         struct mlxsw_sp_fib_node *fib_node;
4584         struct mlxsw_sp_fib *fib;
4585         struct mlxsw_sp_vr *vr;
4586         int err;
4587
4588         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4589         if (IS_ERR(vr))
4590                 return ERR_CAST(vr);
4591         fib = mlxsw_sp_vr_fib(vr, proto);
4592
4593         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4594         if (fib_node)
4595                 return fib_node;
4596
4597         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4598         if (!fib_node) {
4599                 err = -ENOMEM;
4600                 goto err_fib_node_create;
4601         }
4602
4603         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4604         if (err)
4605                 goto err_fib_node_init;
4606
4607         return fib_node;
4608
4609 err_fib_node_init:
4610         mlxsw_sp_fib_node_destroy(fib_node);
4611 err_fib_node_create:
4612         mlxsw_sp_vr_put(mlxsw_sp, vr);
4613         return ERR_PTR(err);
4614 }
4615
4616 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4617                                   struct mlxsw_sp_fib_node *fib_node)
4618 {
4619         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4620
4621         if (!list_empty(&fib_node->entry_list))
4622                 return;
4623         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4624         mlxsw_sp_fib_node_destroy(fib_node);
4625         mlxsw_sp_vr_put(mlxsw_sp, vr);
4626 }
4627
4628 static struct mlxsw_sp_fib4_entry *
4629 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4630                               const struct mlxsw_sp_fib4_entry *new4_entry)
4631 {
4632         struct mlxsw_sp_fib4_entry *fib4_entry;
4633
4634         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4635                 if (fib4_entry->tb_id > new4_entry->tb_id)
4636                         continue;
4637                 if (fib4_entry->tb_id != new4_entry->tb_id)
4638                         break;
4639                 if (fib4_entry->tos > new4_entry->tos)
4640                         continue;
4641                 if (fib4_entry->prio >= new4_entry->prio ||
4642                     fib4_entry->tos < new4_entry->tos)
4643                         return fib4_entry;
4644         }
4645
4646         return NULL;
4647 }
4648
4649 static int
4650 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4651                                struct mlxsw_sp_fib4_entry *new4_entry)
4652 {
4653         struct mlxsw_sp_fib_node *fib_node;
4654
4655         if (WARN_ON(!fib4_entry))
4656                 return -EINVAL;
4657
4658         fib_node = fib4_entry->common.fib_node;
4659         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4660                                  common.list) {
4661                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4662                     fib4_entry->tos != new4_entry->tos ||
4663                     fib4_entry->prio != new4_entry->prio)
4664                         break;
4665         }
4666
4667         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4668         return 0;
4669 }
4670
4671 static int
4672 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4673                                bool replace, bool append)
4674 {
4675         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4676         struct mlxsw_sp_fib4_entry *fib4_entry;
4677
4678         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4679
4680         if (append)
4681                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4682         if (replace && WARN_ON(!fib4_entry))
4683                 return -EINVAL;
4684
4685         /* Insert new entry before replaced one, so that we can later
4686          * remove the second.
4687          */
4688         if (fib4_entry) {
4689                 list_add_tail(&new4_entry->common.list,
4690                               &fib4_entry->common.list);
4691         } else {
4692                 struct mlxsw_sp_fib4_entry *last;
4693
4694                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4695                         if (new4_entry->tb_id > last->tb_id)
4696                                 break;
4697                         fib4_entry = last;
4698                 }
4699
4700                 if (fib4_entry)
4701                         list_add(&new4_entry->common.list,
4702                                  &fib4_entry->common.list);
4703                 else
4704                         list_add(&new4_entry->common.list,
4705                                  &fib_node->entry_list);
4706         }
4707
4708         return 0;
4709 }
4710
4711 static void
4712 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4713 {
4714         list_del(&fib4_entry->common.list);
4715 }
4716
4717 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4718                                        struct mlxsw_sp_fib_entry *fib_entry)
4719 {
4720         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4721
4722         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4723                 return 0;
4724
4725         /* To prevent packet loss, overwrite the previously offloaded
4726          * entry.
4727          */
4728         if (!list_is_singular(&fib_node->entry_list)) {
4729                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4730                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4731
4732                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4733         }
4734
4735         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4736 }
4737
4738 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4739                                         struct mlxsw_sp_fib_entry *fib_entry)
4740 {
4741         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4742
4743         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4744                 return;
4745
4746         /* Promote the next entry by overwriting the deleted entry */
4747         if (!list_is_singular(&fib_node->entry_list)) {
4748                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4749                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4750
4751                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4752                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4753                 return;
4754         }
4755
4756         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4757 }
4758
4759 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4760                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4761                                          bool replace, bool append)
4762 {
4763         int err;
4764
4765         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4766         if (err)
4767                 return err;
4768
4769         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4770         if (err)
4771                 goto err_fib_node_entry_add;
4772
4773         return 0;
4774
4775 err_fib_node_entry_add:
4776         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4777         return err;
4778 }
4779
4780 static void
4781 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4782                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4783 {
4784         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4785         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4786
4787         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4788                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4789 }
4790
4791 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4792                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4793                                         bool replace)
4794 {
4795         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4796         struct mlxsw_sp_fib4_entry *replaced;
4797
4798         if (!replace)
4799                 return;
4800
4801         /* We inserted the new entry before replaced one */
4802         replaced = list_next_entry(fib4_entry, common.list);
4803
4804         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4805         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4806         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4807 }
4808
4809 static int
4810 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4811                          const struct fib_entry_notifier_info *fen_info,
4812                          bool replace, bool append)
4813 {
4814         struct mlxsw_sp_fib4_entry *fib4_entry;
4815         struct mlxsw_sp_fib_node *fib_node;
4816         int err;
4817
4818         if (mlxsw_sp->router->aborted)
4819                 return 0;
4820
4821         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4822                                          &fen_info->dst, sizeof(fen_info->dst),
4823                                          fen_info->dst_len,
4824                                          MLXSW_SP_L3_PROTO_IPV4);
4825         if (IS_ERR(fib_node)) {
4826                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4827                 return PTR_ERR(fib_node);
4828         }
4829
4830         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4831         if (IS_ERR(fib4_entry)) {
4832                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4833                 err = PTR_ERR(fib4_entry);
4834                 goto err_fib4_entry_create;
4835         }
4836
4837         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4838                                             append);
4839         if (err) {
4840                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4841                 goto err_fib4_node_entry_link;
4842         }
4843
4844         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4845
4846         return 0;
4847
4848 err_fib4_node_entry_link:
4849         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4850 err_fib4_entry_create:
4851         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4852         return err;
4853 }
4854
4855 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4856                                      struct fib_entry_notifier_info *fen_info)
4857 {
4858         struct mlxsw_sp_fib4_entry *fib4_entry;
4859         struct mlxsw_sp_fib_node *fib_node;
4860
4861         if (mlxsw_sp->router->aborted)
4862                 return;
4863
4864         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4865         if (WARN_ON(!fib4_entry))
4866                 return;
4867         fib_node = fib4_entry->common.fib_node;
4868
4869         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4870         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4871         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4872 }
4873
4874 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4875 {
4876         /* Packets with link-local destination IP arriving to the router
4877          * are trapped to the CPU, so no need to program specific routes
4878          * for them.
4879          */
4880         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4881                 return true;
4882
4883         /* Multicast routes aren't supported, so ignore them. Neighbour
4884          * Discovery packets are specifically trapped.
4885          */
4886         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4887                 return true;
4888
4889         /* Cloned routes are irrelevant in the forwarding path. */
4890         if (rt->fib6_flags & RTF_CACHE)
4891                 return true;
4892
4893         return false;
4894 }
4895
4896 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4897 {
4898         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4899
4900         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4901         if (!mlxsw_sp_rt6)
4902                 return ERR_PTR(-ENOMEM);
4903
4904         /* In case of route replace, replaced route is deleted with
4905          * no notification. Take reference to prevent accessing freed
4906          * memory.
4907          */
4908         mlxsw_sp_rt6->rt = rt;
4909         fib6_info_hold(rt);
4910
4911         return mlxsw_sp_rt6;
4912 }
4913
4914 #if IS_ENABLED(CONFIG_IPV6)
4915 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4916 {
4917         fib6_info_release(rt);
4918 }
4919 #else
4920 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4921 {
4922 }
4923 #endif
4924
4925 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4926 {
4927         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4928         kfree(mlxsw_sp_rt6);
4929 }
4930
4931 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4932 {
4933         /* RTF_CACHE routes are ignored */
4934         return !(rt->fib6_flags & RTF_ADDRCONF) &&
4935                 rt->fib6_nh->fib_nh_gw_family;
4936 }
4937
4938 static struct fib6_info *
4939 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4940 {
4941         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4942                                 list)->rt;
4943 }
4944
4945 static struct mlxsw_sp_fib6_entry *
4946 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4947                                  const struct fib6_info *nrt, bool replace)
4948 {
4949         struct mlxsw_sp_fib6_entry *fib6_entry;
4950
4951         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4952                 return NULL;
4953
4954         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4955                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4956
4957                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4958                  * virtual router.
4959                  */
4960                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4961                         continue;
4962                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4963                         break;
4964                 if (rt->fib6_metric < nrt->fib6_metric)
4965                         continue;
4966                 if (rt->fib6_metric == nrt->fib6_metric &&
4967                     mlxsw_sp_fib6_rt_can_mp(rt))
4968                         return fib6_entry;
4969                 if (rt->fib6_metric > nrt->fib6_metric)
4970                         break;
4971         }
4972
4973         return NULL;
4974 }
4975
4976 static struct mlxsw_sp_rt6 *
4977 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4978                             const struct fib6_info *rt)
4979 {
4980         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4981
4982         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4983                 if (mlxsw_sp_rt6->rt == rt)
4984                         return mlxsw_sp_rt6;
4985         }
4986
4987         return NULL;
4988 }
4989
4990 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4991                                         const struct fib6_info *rt,
4992                                         enum mlxsw_sp_ipip_type *ret)
4993 {
4994         return rt->fib6_nh->fib_nh_dev &&
4995                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
4996 }
4997
4998 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4999                                        struct mlxsw_sp_nexthop_group *nh_grp,
5000                                        struct mlxsw_sp_nexthop *nh,
5001                                        const struct fib6_info *rt)
5002 {
5003         const struct mlxsw_sp_ipip_ops *ipip_ops;
5004         struct mlxsw_sp_ipip_entry *ipip_entry;
5005         struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5006         struct mlxsw_sp_rif *rif;
5007         int err;
5008
5009         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5010         if (ipip_entry) {
5011                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5012                 if (ipip_ops->can_offload(mlxsw_sp, dev,
5013                                           MLXSW_SP_L3_PROTO_IPV6)) {
5014                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5015                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5016                         return 0;
5017                 }
5018         }
5019
5020         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5021         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5022         if (!rif)
5023                 return 0;
5024         mlxsw_sp_nexthop_rif_init(nh, rif);
5025
5026         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5027         if (err)
5028                 goto err_nexthop_neigh_init;
5029
5030         return 0;
5031
5032 err_nexthop_neigh_init:
5033         mlxsw_sp_nexthop_rif_fini(nh);
5034         return err;
5035 }
5036
5037 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5038                                         struct mlxsw_sp_nexthop *nh)
5039 {
5040         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5041 }
5042
5043 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5044                                   struct mlxsw_sp_nexthop_group *nh_grp,
5045                                   struct mlxsw_sp_nexthop *nh,
5046                                   const struct fib6_info *rt)
5047 {
5048         struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5049
5050         nh->nh_grp = nh_grp;
5051         nh->nh_weight = rt->fib6_nh->fib_nh_weight;
5052         memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
5053         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5054
5055         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5056
5057         if (!dev)
5058                 return 0;
5059         nh->ifindex = dev->ifindex;
5060
5061         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5062 }
5063
5064 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5065                                    struct mlxsw_sp_nexthop *nh)
5066 {
5067         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5068         list_del(&nh->router_list_node);
5069         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5070 }
5071
5072 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5073                                     const struct fib6_info *rt)
5074 {
5075         return rt->fib6_nh->fib_nh_gw_family ||
5076                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5077 }
5078
5079 static struct mlxsw_sp_nexthop_group *
5080 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5081                                struct mlxsw_sp_fib6_entry *fib6_entry)
5082 {
5083         struct mlxsw_sp_nexthop_group *nh_grp;
5084         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5085         struct mlxsw_sp_nexthop *nh;
5086         int i = 0;
5087         int err;
5088
5089         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5090                          GFP_KERNEL);
5091         if (!nh_grp)
5092                 return ERR_PTR(-ENOMEM);
5093         INIT_LIST_HEAD(&nh_grp->fib_list);
5094 #if IS_ENABLED(CONFIG_IPV6)
5095         nh_grp->neigh_tbl = &nd_tbl;
5096 #endif
5097         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5098                                         struct mlxsw_sp_rt6, list);
5099         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5100         nh_grp->count = fib6_entry->nrt6;
5101         for (i = 0; i < nh_grp->count; i++) {
5102                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5103
5104                 nh = &nh_grp->nexthops[i];
5105                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5106                 if (err)
5107                         goto err_nexthop6_init;
5108                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5109         }
5110
5111         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5112         if (err)
5113                 goto err_nexthop_group_insert;
5114
5115         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5116         return nh_grp;
5117
5118 err_nexthop_group_insert:
5119 err_nexthop6_init:
5120         for (i--; i >= 0; i--) {
5121                 nh = &nh_grp->nexthops[i];
5122                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5123         }
5124         kfree(nh_grp);
5125         return ERR_PTR(err);
5126 }
5127
5128 static void
5129 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5130                                 struct mlxsw_sp_nexthop_group *nh_grp)
5131 {
5132         struct mlxsw_sp_nexthop *nh;
5133         int i = nh_grp->count;
5134
5135         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5136         for (i--; i >= 0; i--) {
5137                 nh = &nh_grp->nexthops[i];
5138                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5139         }
5140         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5141         WARN_ON(nh_grp->adj_index_valid);
5142         kfree(nh_grp);
5143 }
5144
5145 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5146                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5147 {
5148         struct mlxsw_sp_nexthop_group *nh_grp;
5149
5150         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5151         if (!nh_grp) {
5152                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5153                 if (IS_ERR(nh_grp))
5154                         return PTR_ERR(nh_grp);
5155         }
5156
5157         list_add_tail(&fib6_entry->common.nexthop_group_node,
5158                       &nh_grp->fib_list);
5159         fib6_entry->common.nh_group = nh_grp;
5160
5161         return 0;
5162 }
5163
5164 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5165                                         struct mlxsw_sp_fib_entry *fib_entry)
5166 {
5167         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5168
5169         list_del(&fib_entry->nexthop_group_node);
5170         if (!list_empty(&nh_grp->fib_list))
5171                 return;
5172         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5173 }
5174
5175 static int
5176 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5177                                struct mlxsw_sp_fib6_entry *fib6_entry)
5178 {
5179         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5180         int err;
5181
5182         fib6_entry->common.nh_group = NULL;
5183         list_del(&fib6_entry->common.nexthop_group_node);
5184
5185         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5186         if (err)
5187                 goto err_nexthop6_group_get;
5188
5189         /* In case this entry is offloaded, then the adjacency index
5190          * currently associated with it in the device's table is that
5191          * of the old group. Start using the new one instead.
5192          */
5193         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5194         if (err)
5195                 goto err_fib_node_entry_add;
5196
5197         if (list_empty(&old_nh_grp->fib_list))
5198                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5199
5200         return 0;
5201
5202 err_fib_node_entry_add:
5203         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5204 err_nexthop6_group_get:
5205         list_add_tail(&fib6_entry->common.nexthop_group_node,
5206                       &old_nh_grp->fib_list);
5207         fib6_entry->common.nh_group = old_nh_grp;
5208         return err;
5209 }
5210
5211 static int
5212 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5213                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5214                                 struct fib6_info *rt)
5215 {
5216         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5217         int err;
5218
5219         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5220         if (IS_ERR(mlxsw_sp_rt6))
5221                 return PTR_ERR(mlxsw_sp_rt6);
5222
5223         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5224         fib6_entry->nrt6++;
5225
5226         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5227         if (err)
5228                 goto err_nexthop6_group_update;
5229
5230         return 0;
5231
5232 err_nexthop6_group_update:
5233         fib6_entry->nrt6--;
5234         list_del(&mlxsw_sp_rt6->list);
5235         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5236         return err;
5237 }
5238
5239 static void
5240 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5241                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5242                                 struct fib6_info *rt)
5243 {
5244         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5245
5246         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5247         if (WARN_ON(!mlxsw_sp_rt6))
5248                 return;
5249
5250         fib6_entry->nrt6--;
5251         list_del(&mlxsw_sp_rt6->list);
5252         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5253         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5254 }
5255
5256 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5257                                          struct mlxsw_sp_fib_entry *fib_entry,
5258                                          const struct fib6_info *rt)
5259 {
5260         /* Packets hitting RTF_REJECT routes need to be discarded by the
5261          * stack. We can rely on their destination device not having a
5262          * RIF (it's the loopback device) and can thus use action type
5263          * local, which will cause them to be trapped with a lower
5264          * priority than packets that need to be locally received.
5265          */
5266         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5267                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5268         else if (rt->fib6_type == RTN_BLACKHOLE)
5269                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5270         else if (rt->fib6_flags & RTF_REJECT)
5271                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5272         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5273                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5274         else
5275                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5276 }
5277
5278 static void
5279 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5280 {
5281         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5282
5283         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5284                                  list) {
5285                 fib6_entry->nrt6--;
5286                 list_del(&mlxsw_sp_rt6->list);
5287                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5288         }
5289 }
5290
5291 static struct mlxsw_sp_fib6_entry *
5292 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5293                            struct mlxsw_sp_fib_node *fib_node,
5294                            struct fib6_info *rt)
5295 {
5296         struct mlxsw_sp_fib6_entry *fib6_entry;
5297         struct mlxsw_sp_fib_entry *fib_entry;
5298         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5299         int err;
5300
5301         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5302         if (!fib6_entry)
5303                 return ERR_PTR(-ENOMEM);
5304         fib_entry = &fib6_entry->common;
5305
5306         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5307         if (IS_ERR(mlxsw_sp_rt6)) {
5308                 err = PTR_ERR(mlxsw_sp_rt6);
5309                 goto err_rt6_create;
5310         }
5311
5312         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5313
5314         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5315         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5316         fib6_entry->nrt6 = 1;
5317         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5318         if (err)
5319                 goto err_nexthop6_group_get;
5320
5321         fib_entry->fib_node = fib_node;
5322
5323         return fib6_entry;
5324
5325 err_nexthop6_group_get:
5326         list_del(&mlxsw_sp_rt6->list);
5327         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5328 err_rt6_create:
5329         kfree(fib6_entry);
5330         return ERR_PTR(err);
5331 }
5332
5333 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5334                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5335 {
5336         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5337         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5338         WARN_ON(fib6_entry->nrt6);
5339         kfree(fib6_entry);
5340 }
5341
5342 static struct mlxsw_sp_fib6_entry *
5343 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5344                               const struct fib6_info *nrt, bool replace)
5345 {
5346         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5347
5348         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5349                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5350
5351                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5352                         continue;
5353                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5354                         break;
5355                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5356                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5357                             mlxsw_sp_fib6_rt_can_mp(nrt))
5358                                 return fib6_entry;
5359                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5360                                 fallback = fallback ?: fib6_entry;
5361                 }
5362                 if (rt->fib6_metric > nrt->fib6_metric)
5363                         return fallback ?: fib6_entry;
5364         }
5365
5366         return fallback;
5367 }
5368
5369 static int
5370 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5371                                bool replace)
5372 {
5373         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5374         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5375         struct mlxsw_sp_fib6_entry *fib6_entry;
5376
5377         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5378
5379         if (replace && WARN_ON(!fib6_entry))
5380                 return -EINVAL;
5381
5382         if (fib6_entry) {
5383                 list_add_tail(&new6_entry->common.list,
5384                               &fib6_entry->common.list);
5385         } else {
5386                 struct mlxsw_sp_fib6_entry *last;
5387
5388                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5389                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5390
5391                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5392                                 break;
5393                         fib6_entry = last;
5394                 }
5395
5396                 if (fib6_entry)
5397                         list_add(&new6_entry->common.list,
5398                                  &fib6_entry->common.list);
5399                 else
5400                         list_add(&new6_entry->common.list,
5401                                  &fib_node->entry_list);
5402         }
5403
5404         return 0;
5405 }
5406
5407 static void
5408 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5409 {
5410         list_del(&fib6_entry->common.list);
5411 }
5412
5413 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5414                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5415                                          bool replace)
5416 {
5417         int err;
5418
5419         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5420         if (err)
5421                 return err;
5422
5423         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5424         if (err)
5425                 goto err_fib_node_entry_add;
5426
5427         return 0;
5428
5429 err_fib_node_entry_add:
5430         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5431         return err;
5432 }
5433
5434 static void
5435 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5436                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5437 {
5438         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5439         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5440 }
5441
5442 static struct mlxsw_sp_fib6_entry *
5443 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5444                            const struct fib6_info *rt)
5445 {
5446         struct mlxsw_sp_fib6_entry *fib6_entry;
5447         struct mlxsw_sp_fib_node *fib_node;
5448         struct mlxsw_sp_fib *fib;
5449         struct mlxsw_sp_vr *vr;
5450
5451         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5452         if (!vr)
5453                 return NULL;
5454         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5455
5456         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5457                                             sizeof(rt->fib6_dst.addr),
5458                                             rt->fib6_dst.plen);
5459         if (!fib_node)
5460                 return NULL;
5461
5462         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5463                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5464
5465                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5466                     rt->fib6_metric == iter_rt->fib6_metric &&
5467                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5468                         return fib6_entry;
5469         }
5470
5471         return NULL;
5472 }
5473
5474 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5475                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5476                                         bool replace)
5477 {
5478         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5479         struct mlxsw_sp_fib6_entry *replaced;
5480
5481         if (!replace)
5482                 return;
5483
5484         replaced = list_next_entry(fib6_entry, common.list);
5485
5486         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5487         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5488         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5489 }
5490
5491 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5492                                     struct fib6_info *rt, bool replace)
5493 {
5494         struct mlxsw_sp_fib6_entry *fib6_entry;
5495         struct mlxsw_sp_fib_node *fib_node;
5496         int err;
5497
5498         if (mlxsw_sp->router->aborted)
5499                 return 0;
5500
5501         if (rt->fib6_src.plen)
5502                 return -EINVAL;
5503
5504         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5505                 return 0;
5506
5507         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5508                                          &rt->fib6_dst.addr,
5509                                          sizeof(rt->fib6_dst.addr),
5510                                          rt->fib6_dst.plen,
5511                                          MLXSW_SP_L3_PROTO_IPV6);
5512         if (IS_ERR(fib_node))
5513                 return PTR_ERR(fib_node);
5514
5515         /* Before creating a new entry, try to append route to an existing
5516          * multipath entry.
5517          */
5518         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5519         if (fib6_entry) {
5520                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5521                 if (err)
5522                         goto err_fib6_entry_nexthop_add;
5523                 return 0;
5524         }
5525
5526         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5527         if (IS_ERR(fib6_entry)) {
5528                 err = PTR_ERR(fib6_entry);
5529                 goto err_fib6_entry_create;
5530         }
5531
5532         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5533         if (err)
5534                 goto err_fib6_node_entry_link;
5535
5536         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5537
5538         return 0;
5539
5540 err_fib6_node_entry_link:
5541         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5542 err_fib6_entry_create:
5543 err_fib6_entry_nexthop_add:
5544         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5545         return err;
5546 }
5547
5548 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5549                                      struct fib6_info *rt)
5550 {
5551         struct mlxsw_sp_fib6_entry *fib6_entry;
5552         struct mlxsw_sp_fib_node *fib_node;
5553
5554         if (mlxsw_sp->router->aborted)
5555                 return;
5556
5557         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5558                 return;
5559
5560         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5561         if (WARN_ON(!fib6_entry))
5562                 return;
5563
5564         /* If route is part of a multipath entry, but not the last one
5565          * removed, then only reduce its nexthop group.
5566          */
5567         if (!list_is_singular(&fib6_entry->rt6_list)) {
5568                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5569                 return;
5570         }
5571
5572         fib_node = fib6_entry->common.fib_node;
5573
5574         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5575         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5576         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5577 }
5578
5579 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5580                                             enum mlxsw_reg_ralxx_protocol proto,
5581                                             u8 tree_id)
5582 {
5583         char ralta_pl[MLXSW_REG_RALTA_LEN];
5584         char ralst_pl[MLXSW_REG_RALST_LEN];
5585         int i, err;
5586
5587         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5588         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5589         if (err)
5590                 return err;
5591
5592         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5593         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5594         if (err)
5595                 return err;
5596
5597         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5598                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5599                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5600                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5601
5602                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5603                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5604                                       raltb_pl);
5605                 if (err)
5606                         return err;
5607
5608                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5609                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5610                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5611                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5612                                       ralue_pl);
5613                 if (err)
5614                         return err;
5615         }
5616
5617         return 0;
5618 }
5619
5620 static struct mlxsw_sp_mr_table *
5621 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5622 {
5623         if (family == RTNL_FAMILY_IPMR)
5624                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5625         else
5626                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5627 }
5628
5629 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5630                                      struct mfc_entry_notifier_info *men_info,
5631                                      bool replace)
5632 {
5633         struct mlxsw_sp_mr_table *mrt;
5634         struct mlxsw_sp_vr *vr;
5635
5636         if (mlxsw_sp->router->aborted)
5637                 return 0;
5638
5639         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5640         if (IS_ERR(vr))
5641                 return PTR_ERR(vr);
5642
5643         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5644         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5645 }
5646
5647 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5648                                       struct mfc_entry_notifier_info *men_info)
5649 {
5650         struct mlxsw_sp_mr_table *mrt;
5651         struct mlxsw_sp_vr *vr;
5652
5653         if (mlxsw_sp->router->aborted)
5654                 return;
5655
5656         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5657         if (WARN_ON(!vr))
5658                 return;
5659
5660         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5661         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5662         mlxsw_sp_vr_put(mlxsw_sp, vr);
5663 }
5664
5665 static int
5666 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5667                               struct vif_entry_notifier_info *ven_info)
5668 {
5669         struct mlxsw_sp_mr_table *mrt;
5670         struct mlxsw_sp_rif *rif;
5671         struct mlxsw_sp_vr *vr;
5672
5673         if (mlxsw_sp->router->aborted)
5674                 return 0;
5675
5676         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5677         if (IS_ERR(vr))
5678                 return PTR_ERR(vr);
5679
5680         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5681         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5682         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5683                                    ven_info->vif_index,
5684                                    ven_info->vif_flags, rif);
5685 }
5686
5687 static void
5688 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5689                               struct vif_entry_notifier_info *ven_info)
5690 {
5691         struct mlxsw_sp_mr_table *mrt;
5692         struct mlxsw_sp_vr *vr;
5693
5694         if (mlxsw_sp->router->aborted)
5695                 return;
5696
5697         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5698         if (WARN_ON(!vr))
5699                 return;
5700
5701         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5702         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5703         mlxsw_sp_vr_put(mlxsw_sp, vr);
5704 }
5705
5706 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5707 {
5708         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5709         int err;
5710
5711         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5712                                                MLXSW_SP_LPM_TREE_MIN);
5713         if (err)
5714                 return err;
5715
5716         /* The multicast router code does not need an abort trap as by default,
5717          * packets that don't match any routes are trapped to the CPU.
5718          */
5719
5720         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5721         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5722                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5723 }
5724
5725 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5726                                      struct mlxsw_sp_fib_node *fib_node)
5727 {
5728         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5729
5730         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5731                                  common.list) {
5732                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5733
5734                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5735                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5736                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5737                 /* Break when entry list is empty and node was freed.
5738                  * Otherwise, we'll access freed memory in the next
5739                  * iteration.
5740                  */
5741                 if (do_break)
5742                         break;
5743         }
5744 }
5745
5746 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5747                                      struct mlxsw_sp_fib_node *fib_node)
5748 {
5749         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5750
5751         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5752                                  common.list) {
5753                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5754
5755                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5756                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5757                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5758                 if (do_break)
5759                         break;
5760         }
5761 }
5762
5763 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5764                                     struct mlxsw_sp_fib_node *fib_node)
5765 {
5766         switch (fib_node->fib->proto) {
5767         case MLXSW_SP_L3_PROTO_IPV4:
5768                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5769                 break;
5770         case MLXSW_SP_L3_PROTO_IPV6:
5771                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5772                 break;
5773         }
5774 }
5775
5776 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5777                                   struct mlxsw_sp_vr *vr,
5778                                   enum mlxsw_sp_l3proto proto)
5779 {
5780         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5781         struct mlxsw_sp_fib_node *fib_node, *tmp;
5782
5783         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5784                 bool do_break = &tmp->list == &fib->node_list;
5785
5786                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5787                 if (do_break)
5788                         break;
5789         }
5790 }
5791
5792 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5793 {
5794         int i, j;
5795
5796         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5797                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5798
5799                 if (!mlxsw_sp_vr_is_used(vr))
5800                         continue;
5801
5802                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5803                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5804                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5805
5806                 /* If virtual router was only used for IPv4, then it's no
5807                  * longer used.
5808                  */
5809                 if (!mlxsw_sp_vr_is_used(vr))
5810                         continue;
5811                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5812         }
5813 }
5814
5815 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5816 {
5817         int err;
5818
5819         if (mlxsw_sp->router->aborted)
5820                 return;
5821         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5822         mlxsw_sp_router_fib_flush(mlxsw_sp);
5823         mlxsw_sp->router->aborted = true;
5824         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5825         if (err)
5826                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5827 }
5828
5829 struct mlxsw_sp_fib_event_work {
5830         struct work_struct work;
5831         union {
5832                 struct fib6_entry_notifier_info fen6_info;
5833                 struct fib_entry_notifier_info fen_info;
5834                 struct fib_rule_notifier_info fr_info;
5835                 struct fib_nh_notifier_info fnh_info;
5836                 struct mfc_entry_notifier_info men_info;
5837                 struct vif_entry_notifier_info ven_info;
5838         };
5839         struct mlxsw_sp *mlxsw_sp;
5840         unsigned long event;
5841 };
5842
5843 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5844 {
5845         struct mlxsw_sp_fib_event_work *fib_work =
5846                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5847         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5848         bool replace, append;
5849         int err;
5850
5851         /* Protect internal structures from changes */
5852         rtnl_lock();
5853         mlxsw_sp_span_respin(mlxsw_sp);
5854
5855         switch (fib_work->event) {
5856         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5857         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5858         case FIB_EVENT_ENTRY_ADD:
5859                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5860                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5861                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5862                                                replace, append);
5863                 if (err)
5864                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5865                 fib_info_put(fib_work->fen_info.fi);
5866                 break;
5867         case FIB_EVENT_ENTRY_DEL:
5868                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5869                 fib_info_put(fib_work->fen_info.fi);
5870                 break;
5871         case FIB_EVENT_RULE_ADD:
5872                 /* if we get here, a rule was added that we do not support.
5873                  * just do the fib_abort
5874                  */
5875                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5876                 break;
5877         case FIB_EVENT_NH_ADD: /* fall through */
5878         case FIB_EVENT_NH_DEL:
5879                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5880                                         fib_work->fnh_info.fib_nh);
5881                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5882                 break;
5883         }
5884         rtnl_unlock();
5885         kfree(fib_work);
5886 }
5887
5888 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5889 {
5890         struct mlxsw_sp_fib_event_work *fib_work =
5891                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5892         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5893         bool replace;
5894         int err;
5895
5896         rtnl_lock();
5897         mlxsw_sp_span_respin(mlxsw_sp);
5898
5899         switch (fib_work->event) {
5900         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5901         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5902         case FIB_EVENT_ENTRY_ADD:
5903                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5904                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5905                                                fib_work->fen6_info.rt, replace);
5906                 if (err)
5907                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5908                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5909                 break;
5910         case FIB_EVENT_ENTRY_DEL:
5911                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5912                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5913                 break;
5914         case FIB_EVENT_RULE_ADD:
5915                 /* if we get here, a rule was added that we do not support.
5916                  * just do the fib_abort
5917                  */
5918                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5919                 break;
5920         }
5921         rtnl_unlock();
5922         kfree(fib_work);
5923 }
5924
5925 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5926 {
5927         struct mlxsw_sp_fib_event_work *fib_work =
5928                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5929         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5930         bool replace;
5931         int err;
5932
5933         rtnl_lock();
5934         switch (fib_work->event) {
5935         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5936         case FIB_EVENT_ENTRY_ADD:
5937                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5938
5939                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5940                                                 replace);
5941                 if (err)
5942                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5943                 mr_cache_put(fib_work->men_info.mfc);
5944                 break;
5945         case FIB_EVENT_ENTRY_DEL:
5946                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5947                 mr_cache_put(fib_work->men_info.mfc);
5948                 break;
5949         case FIB_EVENT_VIF_ADD:
5950                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5951                                                     &fib_work->ven_info);
5952                 if (err)
5953                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5954                 dev_put(fib_work->ven_info.dev);
5955                 break;
5956         case FIB_EVENT_VIF_DEL:
5957                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5958                                               &fib_work->ven_info);
5959                 dev_put(fib_work->ven_info.dev);
5960                 break;
5961         case FIB_EVENT_RULE_ADD:
5962                 /* if we get here, a rule was added that we do not support.
5963                  * just do the fib_abort
5964                  */
5965                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5966                 break;
5967         }
5968         rtnl_unlock();
5969         kfree(fib_work);
5970 }
5971
5972 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5973                                        struct fib_notifier_info *info)
5974 {
5975         struct fib_entry_notifier_info *fen_info;
5976         struct fib_nh_notifier_info *fnh_info;
5977
5978         switch (fib_work->event) {
5979         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5980         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5981         case FIB_EVENT_ENTRY_ADD: /* fall through */
5982         case FIB_EVENT_ENTRY_DEL:
5983                 fen_info = container_of(info, struct fib_entry_notifier_info,
5984                                         info);
5985                 fib_work->fen_info = *fen_info;
5986                 /* Take reference on fib_info to prevent it from being
5987                  * freed while work is queued. Release it afterwards.
5988                  */
5989                 fib_info_hold(fib_work->fen_info.fi);
5990                 break;
5991         case FIB_EVENT_NH_ADD: /* fall through */
5992         case FIB_EVENT_NH_DEL:
5993                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5994                                         info);
5995                 fib_work->fnh_info = *fnh_info;
5996                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5997                 break;
5998         }
5999 }
6000
6001 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
6002                                        struct fib_notifier_info *info)
6003 {
6004         struct fib6_entry_notifier_info *fen6_info;
6005
6006         switch (fib_work->event) {
6007         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6008         case FIB_EVENT_ENTRY_APPEND: /* fall through */
6009         case FIB_EVENT_ENTRY_ADD: /* fall through */
6010         case FIB_EVENT_ENTRY_DEL:
6011                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
6012                                          info);
6013                 fib_work->fen6_info = *fen6_info;
6014                 fib6_info_hold(fib_work->fen6_info.rt);
6015                 break;
6016         }
6017 }
6018
6019 static void
6020 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6021                             struct fib_notifier_info *info)
6022 {
6023         switch (fib_work->event) {
6024         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6025         case FIB_EVENT_ENTRY_ADD: /* fall through */
6026         case FIB_EVENT_ENTRY_DEL:
6027                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6028                 mr_cache_hold(fib_work->men_info.mfc);
6029                 break;
6030         case FIB_EVENT_VIF_ADD: /* fall through */
6031         case FIB_EVENT_VIF_DEL:
6032                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6033                 dev_hold(fib_work->ven_info.dev);
6034                 break;
6035         }
6036 }
6037
6038 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6039                                           struct fib_notifier_info *info,
6040                                           struct mlxsw_sp *mlxsw_sp)
6041 {
6042         struct netlink_ext_ack *extack = info->extack;
6043         struct fib_rule_notifier_info *fr_info;
6044         struct fib_rule *rule;
6045         int err = 0;
6046
6047         /* nothing to do at the moment */
6048         if (event == FIB_EVENT_RULE_DEL)
6049                 return 0;
6050
6051         if (mlxsw_sp->router->aborted)
6052                 return 0;
6053
6054         fr_info = container_of(info, struct fib_rule_notifier_info, info);
6055         rule = fr_info->rule;
6056
6057         /* Rule only affects locally generated traffic */
6058         if (rule->iifindex == info->net->loopback_dev->ifindex)
6059                 return 0;
6060
6061         switch (info->family) {
6062         case AF_INET:
6063                 if (!fib4_rule_default(rule) && !rule->l3mdev)
6064                         err = -EOPNOTSUPP;
6065                 break;
6066         case AF_INET6:
6067                 if (!fib6_rule_default(rule) && !rule->l3mdev)
6068                         err = -EOPNOTSUPP;
6069                 break;
6070         case RTNL_FAMILY_IPMR:
6071                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
6072                         err = -EOPNOTSUPP;
6073                 break;
6074         case RTNL_FAMILY_IP6MR:
6075                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6076                         err = -EOPNOTSUPP;
6077                 break;
6078         }
6079
6080         if (err < 0)
6081                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6082
6083         return err;
6084 }
6085
6086 /* Called with rcu_read_lock() */
6087 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6088                                      unsigned long event, void *ptr)
6089 {
6090         struct mlxsw_sp_fib_event_work *fib_work;
6091         struct fib_notifier_info *info = ptr;
6092         struct mlxsw_sp_router *router;
6093         int err;
6094
6095         if (!net_eq(info->net, &init_net) ||
6096             (info->family != AF_INET && info->family != AF_INET6 &&
6097              info->family != RTNL_FAMILY_IPMR &&
6098              info->family != RTNL_FAMILY_IP6MR))
6099                 return NOTIFY_DONE;
6100
6101         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6102
6103         switch (event) {
6104         case FIB_EVENT_RULE_ADD: /* fall through */
6105         case FIB_EVENT_RULE_DEL:
6106                 err = mlxsw_sp_router_fib_rule_event(event, info,
6107                                                      router->mlxsw_sp);
6108                 if (!err || info->extack)
6109                         return notifier_from_errno(err);
6110                 break;
6111         case FIB_EVENT_ENTRY_ADD:
6112         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6113         case FIB_EVENT_ENTRY_APPEND:  /* fall through */
6114                 if (router->aborted) {
6115                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6116                         return notifier_from_errno(-EINVAL);
6117                 }
6118                 if (info->family == AF_INET) {
6119                         struct fib_entry_notifier_info *fen_info = ptr;
6120
6121                         if (fen_info->fi->fib_nh_is_v6) {
6122                                 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6123                                 return notifier_from_errno(-EINVAL);
6124                         }
6125                 }
6126                 break;
6127         }
6128
6129         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6130         if (WARN_ON(!fib_work))
6131                 return NOTIFY_BAD;
6132
6133         fib_work->mlxsw_sp = router->mlxsw_sp;
6134         fib_work->event = event;
6135
6136         switch (info->family) {
6137         case AF_INET:
6138                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6139                 mlxsw_sp_router_fib4_event(fib_work, info);
6140                 break;
6141         case AF_INET6:
6142                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6143                 mlxsw_sp_router_fib6_event(fib_work, info);
6144                 break;
6145         case RTNL_FAMILY_IP6MR:
6146         case RTNL_FAMILY_IPMR:
6147                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6148                 mlxsw_sp_router_fibmr_event(fib_work, info);
6149                 break;
6150         }
6151
6152         mlxsw_core_schedule_work(&fib_work->work);
6153
6154         return NOTIFY_DONE;
6155 }
6156
6157 struct mlxsw_sp_rif *
6158 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6159                          const struct net_device *dev)
6160 {
6161         int i;
6162
6163         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6164                 if (mlxsw_sp->router->rifs[i] &&
6165                     mlxsw_sp->router->rifs[i]->dev == dev)
6166                         return mlxsw_sp->router->rifs[i];
6167
6168         return NULL;
6169 }
6170
6171 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6172 {
6173         char ritr_pl[MLXSW_REG_RITR_LEN];
6174         int err;
6175
6176         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6177         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6178         if (err)
6179                 return err;
6180
6181         mlxsw_reg_ritr_enable_set(ritr_pl, false);
6182         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6183 }
6184
6185 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6186                                           struct mlxsw_sp_rif *rif)
6187 {
6188         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6189         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6190         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6191 }
6192
6193 static bool
6194 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6195                            unsigned long event)
6196 {
6197         struct inet6_dev *inet6_dev;
6198         bool addr_list_empty = true;
6199         struct in_device *idev;
6200
6201         switch (event) {
6202         case NETDEV_UP:
6203                 return rif == NULL;
6204         case NETDEV_DOWN:
6205                 idev = __in_dev_get_rtnl(dev);
6206                 if (idev && idev->ifa_list)
6207                         addr_list_empty = false;
6208
6209                 inet6_dev = __in6_dev_get(dev);
6210                 if (addr_list_empty && inet6_dev &&
6211                     !list_empty(&inet6_dev->addr_list))
6212                         addr_list_empty = false;
6213
6214                 /* macvlans do not have a RIF, but rather piggy back on the
6215                  * RIF of their lower device.
6216                  */
6217                 if (netif_is_macvlan(dev) && addr_list_empty)
6218                         return true;
6219
6220                 if (rif && addr_list_empty &&
6221                     !netif_is_l3_slave(rif->dev))
6222                         return true;
6223                 /* It is possible we already removed the RIF ourselves
6224                  * if it was assigned to a netdev that is now a bridge
6225                  * or LAG slave.
6226                  */
6227                 return false;
6228         }
6229
6230         return false;
6231 }
6232
6233 static enum mlxsw_sp_rif_type
6234 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6235                       const struct net_device *dev)
6236 {
6237         enum mlxsw_sp_fid_type type;
6238
6239         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6240                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6241
6242         /* Otherwise RIF type is derived from the type of the underlying FID. */
6243         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6244                 type = MLXSW_SP_FID_TYPE_8021Q;
6245         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6246                 type = MLXSW_SP_FID_TYPE_8021Q;
6247         else if (netif_is_bridge_master(dev))
6248                 type = MLXSW_SP_FID_TYPE_8021D;
6249         else
6250                 type = MLXSW_SP_FID_TYPE_RFID;
6251
6252         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6253 }
6254
6255 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6256 {
6257         int i;
6258
6259         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6260                 if (!mlxsw_sp->router->rifs[i]) {
6261                         *p_rif_index = i;
6262                         return 0;
6263                 }
6264         }
6265
6266         return -ENOBUFS;
6267 }
6268
6269 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6270                                                u16 vr_id,
6271                                                struct net_device *l3_dev)
6272 {
6273         struct mlxsw_sp_rif *rif;
6274
6275         rif = kzalloc(rif_size, GFP_KERNEL);
6276         if (!rif)
6277                 return NULL;
6278
6279         INIT_LIST_HEAD(&rif->nexthop_list);
6280         INIT_LIST_HEAD(&rif->neigh_list);
6281         if (l3_dev) {
6282                 ether_addr_copy(rif->addr, l3_dev->dev_addr);
6283                 rif->mtu = l3_dev->mtu;
6284                 rif->dev = l3_dev;
6285         }
6286         rif->vr_id = vr_id;
6287         rif->rif_index = rif_index;
6288
6289         return rif;
6290 }
6291
6292 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6293                                            u16 rif_index)
6294 {
6295         return mlxsw_sp->router->rifs[rif_index];
6296 }
6297
6298 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6299 {
6300         return rif->rif_index;
6301 }
6302
6303 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6304 {
6305         return lb_rif->common.rif_index;
6306 }
6307
6308 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6309 {
6310         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6311         struct mlxsw_sp_vr *ul_vr;
6312
6313         ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6314         if (WARN_ON(IS_ERR(ul_vr)))
6315                 return 0;
6316
6317         return ul_vr->id;
6318 }
6319
6320 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6321 {
6322         return lb_rif->ul_rif_id;
6323 }
6324
6325 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6326 {
6327         return rif->dev->ifindex;
6328 }
6329
6330 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6331 {
6332         return rif->dev;
6333 }
6334
6335 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6336 {
6337         return rif->fid;
6338 }
6339
6340 static struct mlxsw_sp_rif *
6341 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6342                     const struct mlxsw_sp_rif_params *params,
6343                     struct netlink_ext_ack *extack)
6344 {
6345         u32 tb_id = l3mdev_fib_table(params->dev);
6346         const struct mlxsw_sp_rif_ops *ops;
6347         struct mlxsw_sp_fid *fid = NULL;
6348         enum mlxsw_sp_rif_type type;
6349         struct mlxsw_sp_rif *rif;
6350         struct mlxsw_sp_vr *vr;
6351         u16 rif_index;
6352         int i, err;
6353
6354         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6355         ops = mlxsw_sp->rif_ops_arr[type];
6356
6357         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6358         if (IS_ERR(vr))
6359                 return ERR_CAST(vr);
6360         vr->rif_count++;
6361
6362         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6363         if (err) {
6364                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6365                 goto err_rif_index_alloc;
6366         }
6367
6368         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6369         if (!rif) {
6370                 err = -ENOMEM;
6371                 goto err_rif_alloc;
6372         }
6373         dev_hold(rif->dev);
6374         mlxsw_sp->router->rifs[rif_index] = rif;
6375         rif->mlxsw_sp = mlxsw_sp;
6376         rif->ops = ops;
6377
6378         if (ops->fid_get) {
6379                 fid = ops->fid_get(rif, extack);
6380                 if (IS_ERR(fid)) {
6381                         err = PTR_ERR(fid);
6382                         goto err_fid_get;
6383                 }
6384                 rif->fid = fid;
6385         }
6386
6387         if (ops->setup)
6388                 ops->setup(rif, params);
6389
6390         err = ops->configure(rif);
6391         if (err)
6392                 goto err_configure;
6393
6394         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6395                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6396                 if (err)
6397                         goto err_mr_rif_add;
6398         }
6399
6400         mlxsw_sp_rif_counters_alloc(rif);
6401
6402         return rif;
6403
6404 err_mr_rif_add:
6405         for (i--; i >= 0; i--)
6406                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6407         ops->deconfigure(rif);
6408 err_configure:
6409         if (fid)
6410                 mlxsw_sp_fid_put(fid);
6411 err_fid_get:
6412         mlxsw_sp->router->rifs[rif_index] = NULL;
6413         dev_put(rif->dev);
6414         kfree(rif);
6415 err_rif_alloc:
6416 err_rif_index_alloc:
6417         vr->rif_count--;
6418         mlxsw_sp_vr_put(mlxsw_sp, vr);
6419         return ERR_PTR(err);
6420 }
6421
6422 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6423 {
6424         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6425         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6426         struct mlxsw_sp_fid *fid = rif->fid;
6427         struct mlxsw_sp_vr *vr;
6428         int i;
6429
6430         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6431         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6432
6433         mlxsw_sp_rif_counters_free(rif);
6434         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6435                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6436         ops->deconfigure(rif);
6437         if (fid)
6438                 /* Loopback RIFs are not associated with a FID. */
6439                 mlxsw_sp_fid_put(fid);
6440         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6441         dev_put(rif->dev);
6442         kfree(rif);
6443         vr->rif_count--;
6444         mlxsw_sp_vr_put(mlxsw_sp, vr);
6445 }
6446
6447 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6448                                  struct net_device *dev)
6449 {
6450         struct mlxsw_sp_rif *rif;
6451
6452         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6453         if (!rif)
6454                 return;
6455         mlxsw_sp_rif_destroy(rif);
6456 }
6457
6458 static void
6459 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6460                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6461 {
6462         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6463
6464         params->vid = mlxsw_sp_port_vlan->vid;
6465         params->lag = mlxsw_sp_port->lagged;
6466         if (params->lag)
6467                 params->lag_id = mlxsw_sp_port->lag_id;
6468         else
6469                 params->system_port = mlxsw_sp_port->local_port;
6470 }
6471
6472 static struct mlxsw_sp_rif_subport *
6473 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6474 {
6475         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6476 }
6477
6478 static struct mlxsw_sp_rif *
6479 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6480                          const struct mlxsw_sp_rif_params *params,
6481                          struct netlink_ext_ack *extack)
6482 {
6483         struct mlxsw_sp_rif_subport *rif_subport;
6484         struct mlxsw_sp_rif *rif;
6485
6486         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6487         if (!rif)
6488                 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6489
6490         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6491         refcount_inc(&rif_subport->ref_count);
6492         return rif;
6493 }
6494
6495 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6496 {
6497         struct mlxsw_sp_rif_subport *rif_subport;
6498
6499         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6500         if (!refcount_dec_and_test(&rif_subport->ref_count))
6501                 return;
6502
6503         mlxsw_sp_rif_destroy(rif);
6504 }
6505
6506 static int
6507 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6508                                struct net_device *l3_dev,
6509                                struct netlink_ext_ack *extack)
6510 {
6511         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6512         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6513         struct mlxsw_sp_rif_params params = {
6514                 .dev = l3_dev,
6515         };
6516         u16 vid = mlxsw_sp_port_vlan->vid;
6517         struct mlxsw_sp_rif *rif;
6518         struct mlxsw_sp_fid *fid;
6519         int err;
6520
6521         mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6522         rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6523         if (IS_ERR(rif))
6524                 return PTR_ERR(rif);
6525
6526         /* FID was already created, just take a reference */
6527         fid = rif->ops->fid_get(rif, extack);
6528         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6529         if (err)
6530                 goto err_fid_port_vid_map;
6531
6532         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6533         if (err)
6534                 goto err_port_vid_learning_set;
6535
6536         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6537                                         BR_STATE_FORWARDING);
6538         if (err)
6539                 goto err_port_vid_stp_set;
6540
6541         mlxsw_sp_port_vlan->fid = fid;
6542
6543         return 0;
6544
6545 err_port_vid_stp_set:
6546         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6547 err_port_vid_learning_set:
6548         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6549 err_fid_port_vid_map:
6550         mlxsw_sp_fid_put(fid);
6551         mlxsw_sp_rif_subport_put(rif);
6552         return err;
6553 }
6554
6555 void
6556 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6557 {
6558         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6559         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6560         struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6561         u16 vid = mlxsw_sp_port_vlan->vid;
6562
6563         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6564                 return;
6565
6566         mlxsw_sp_port_vlan->fid = NULL;
6567         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6568         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6569         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6570         mlxsw_sp_fid_put(fid);
6571         mlxsw_sp_rif_subport_put(rif);
6572 }
6573
6574 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6575                                              struct net_device *port_dev,
6576                                              unsigned long event, u16 vid,
6577                                              struct netlink_ext_ack *extack)
6578 {
6579         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6580         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6581
6582         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6583         if (WARN_ON(!mlxsw_sp_port_vlan))
6584                 return -EINVAL;
6585
6586         switch (event) {
6587         case NETDEV_UP:
6588                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6589                                                       l3_dev, extack);
6590         case NETDEV_DOWN:
6591                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6592                 break;
6593         }
6594
6595         return 0;
6596 }
6597
6598 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6599                                         unsigned long event,
6600                                         struct netlink_ext_ack *extack)
6601 {
6602         if (netif_is_bridge_port(port_dev) ||
6603             netif_is_lag_port(port_dev) ||
6604             netif_is_ovs_port(port_dev))
6605                 return 0;
6606
6607         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6608                                                  MLXSW_SP_DEFAULT_VID, extack);
6609 }
6610
6611 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6612                                          struct net_device *lag_dev,
6613                                          unsigned long event, u16 vid,
6614                                          struct netlink_ext_ack *extack)
6615 {
6616         struct net_device *port_dev;
6617         struct list_head *iter;
6618         int err;
6619
6620         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6621                 if (mlxsw_sp_port_dev_check(port_dev)) {
6622                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6623                                                                 port_dev,
6624                                                                 event, vid,
6625                                                                 extack);
6626                         if (err)
6627                                 return err;
6628                 }
6629         }
6630
6631         return 0;
6632 }
6633
6634 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6635                                        unsigned long event,
6636                                        struct netlink_ext_ack *extack)
6637 {
6638         if (netif_is_bridge_port(lag_dev))
6639                 return 0;
6640
6641         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6642                                              MLXSW_SP_DEFAULT_VID, extack);
6643 }
6644
6645 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6646                                           struct net_device *l3_dev,
6647                                           unsigned long event,
6648                                           struct netlink_ext_ack *extack)
6649 {
6650         struct mlxsw_sp_rif_params params = {
6651                 .dev = l3_dev,
6652         };
6653         struct mlxsw_sp_rif *rif;
6654
6655         switch (event) {
6656         case NETDEV_UP:
6657                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6658                 if (IS_ERR(rif))
6659                         return PTR_ERR(rif);
6660                 break;
6661         case NETDEV_DOWN:
6662                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6663                 mlxsw_sp_rif_destroy(rif);
6664                 break;
6665         }
6666
6667         return 0;
6668 }
6669
6670 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6671                                         struct net_device *vlan_dev,
6672                                         unsigned long event,
6673                                         struct netlink_ext_ack *extack)
6674 {
6675         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6676         u16 vid = vlan_dev_vlan_id(vlan_dev);
6677
6678         if (netif_is_bridge_port(vlan_dev))
6679                 return 0;
6680
6681         if (mlxsw_sp_port_dev_check(real_dev))
6682                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6683                                                          event, vid, extack);
6684         else if (netif_is_lag_master(real_dev))
6685                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6686                                                      vid, extack);
6687         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6688                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6689                                                       extack);
6690
6691         return 0;
6692 }
6693
6694 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6695 {
6696         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6697         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6698
6699         return ether_addr_equal_masked(mac, vrrp4, mask);
6700 }
6701
6702 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6703 {
6704         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6705         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6706
6707         return ether_addr_equal_masked(mac, vrrp6, mask);
6708 }
6709
6710 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6711                                 const u8 *mac, bool adding)
6712 {
6713         char ritr_pl[MLXSW_REG_RITR_LEN];
6714         u8 vrrp_id = adding ? mac[5] : 0;
6715         int err;
6716
6717         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6718             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6719                 return 0;
6720
6721         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6722         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6723         if (err)
6724                 return err;
6725
6726         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6727                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6728         else
6729                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6730
6731         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6732 }
6733
6734 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6735                                     const struct net_device *macvlan_dev,
6736                                     struct netlink_ext_ack *extack)
6737 {
6738         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6739         struct mlxsw_sp_rif *rif;
6740         int err;
6741
6742         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6743         if (!rif) {
6744                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6745                 return -EOPNOTSUPP;
6746         }
6747
6748         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6749                                   mlxsw_sp_fid_index(rif->fid), true);
6750         if (err)
6751                 return err;
6752
6753         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6754                                    macvlan_dev->dev_addr, true);
6755         if (err)
6756                 goto err_rif_vrrp_add;
6757
6758         /* Make sure the bridge driver does not have this MAC pointing at
6759          * some other port.
6760          */
6761         if (rif->ops->fdb_del)
6762                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6763
6764         return 0;
6765
6766 err_rif_vrrp_add:
6767         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6768                             mlxsw_sp_fid_index(rif->fid), false);
6769         return err;
6770 }
6771
6772 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6773                               const struct net_device *macvlan_dev)
6774 {
6775         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6776         struct mlxsw_sp_rif *rif;
6777
6778         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6779         /* If we do not have a RIF, then we already took care of
6780          * removing the macvlan's MAC during RIF deletion.
6781          */
6782         if (!rif)
6783                 return;
6784         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6785                              false);
6786         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6787                             mlxsw_sp_fid_index(rif->fid), false);
6788 }
6789
6790 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6791                                            struct net_device *macvlan_dev,
6792                                            unsigned long event,
6793                                            struct netlink_ext_ack *extack)
6794 {
6795         switch (event) {
6796         case NETDEV_UP:
6797                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6798         case NETDEV_DOWN:
6799                 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6800                 break;
6801         }
6802
6803         return 0;
6804 }
6805
6806 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6807                                                struct net_device *dev,
6808                                                const unsigned char *dev_addr,
6809                                                struct netlink_ext_ack *extack)
6810 {
6811         struct mlxsw_sp_rif *rif;
6812         int i;
6813
6814         /* A RIF is not created for macvlan netdevs. Their MAC is used to
6815          * populate the FDB
6816          */
6817         if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6818                 return 0;
6819
6820         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6821                 rif = mlxsw_sp->router->rifs[i];
6822                 if (rif && rif->dev && rif->dev != dev &&
6823                     !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6824                                              mlxsw_sp->mac_mask)) {
6825                         NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6826                         return -EINVAL;
6827                 }
6828         }
6829
6830         return 0;
6831 }
6832
6833 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6834                                      struct net_device *dev,
6835                                      unsigned long event,
6836                                      struct netlink_ext_ack *extack)
6837 {
6838         if (mlxsw_sp_port_dev_check(dev))
6839                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6840         else if (netif_is_lag_master(dev))
6841                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6842         else if (netif_is_bridge_master(dev))
6843                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6844                                                       extack);
6845         else if (is_vlan_dev(dev))
6846                 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6847                                                     extack);
6848         else if (netif_is_macvlan(dev))
6849                 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6850                                                        extack);
6851         else
6852                 return 0;
6853 }
6854
6855 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6856                                    unsigned long event, void *ptr)
6857 {
6858         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6859         struct net_device *dev = ifa->ifa_dev->dev;
6860         struct mlxsw_sp_router *router;
6861         struct mlxsw_sp_rif *rif;
6862         int err = 0;
6863
6864         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6865         if (event == NETDEV_UP)
6866                 goto out;
6867
6868         router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6869         rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6870         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6871                 goto out;
6872
6873         err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6874 out:
6875         return notifier_from_errno(err);
6876 }
6877
6878 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6879                                   unsigned long event, void *ptr)
6880 {
6881         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6882         struct net_device *dev = ivi->ivi_dev->dev;
6883         struct mlxsw_sp *mlxsw_sp;
6884         struct mlxsw_sp_rif *rif;
6885         int err = 0;
6886
6887         mlxsw_sp = mlxsw_sp_lower_get(dev);
6888         if (!mlxsw_sp)
6889                 goto out;
6890
6891         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6892         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6893                 goto out;
6894
6895         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6896                                                   ivi->extack);
6897         if (err)
6898                 goto out;
6899
6900         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6901 out:
6902         return notifier_from_errno(err);
6903 }
6904
6905 struct mlxsw_sp_inet6addr_event_work {
6906         struct work_struct work;
6907         struct mlxsw_sp *mlxsw_sp;
6908         struct net_device *dev;
6909         unsigned long event;
6910 };
6911
6912 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6913 {
6914         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6915                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6916         struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6917         struct net_device *dev = inet6addr_work->dev;
6918         unsigned long event = inet6addr_work->event;
6919         struct mlxsw_sp_rif *rif;
6920
6921         rtnl_lock();
6922
6923         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6924         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6925                 goto out;
6926
6927         __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
6928 out:
6929         rtnl_unlock();
6930         dev_put(dev);
6931         kfree(inet6addr_work);
6932 }
6933
6934 /* Called with rcu_read_lock() */
6935 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
6936                                     unsigned long event, void *ptr)
6937 {
6938         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6939         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6940         struct net_device *dev = if6->idev->dev;
6941         struct mlxsw_sp_router *router;
6942
6943         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6944         if (event == NETDEV_UP)
6945                 return NOTIFY_DONE;
6946
6947         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6948         if (!inet6addr_work)
6949                 return NOTIFY_BAD;
6950
6951         router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
6952         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6953         inet6addr_work->mlxsw_sp = router->mlxsw_sp;
6954         inet6addr_work->dev = dev;
6955         inet6addr_work->event = event;
6956         dev_hold(dev);
6957         mlxsw_core_schedule_work(&inet6addr_work->work);
6958
6959         return NOTIFY_DONE;
6960 }
6961
6962 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6963                                    unsigned long event, void *ptr)
6964 {
6965         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6966         struct net_device *dev = i6vi->i6vi_dev->dev;
6967         struct mlxsw_sp *mlxsw_sp;
6968         struct mlxsw_sp_rif *rif;
6969         int err = 0;
6970
6971         mlxsw_sp = mlxsw_sp_lower_get(dev);
6972         if (!mlxsw_sp)
6973                 goto out;
6974
6975         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6976         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6977                 goto out;
6978
6979         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6980                                                   i6vi->extack);
6981         if (err)
6982                 goto out;
6983
6984         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
6985 out:
6986         return notifier_from_errno(err);
6987 }
6988
6989 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6990                              const char *mac, int mtu)
6991 {
6992         char ritr_pl[MLXSW_REG_RITR_LEN];
6993         int err;
6994
6995         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6996         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6997         if (err)
6998                 return err;
6999
7000         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
7001         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
7002         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
7003         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7004 }
7005
7006 static int
7007 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7008                                   struct mlxsw_sp_rif *rif)
7009 {
7010         struct net_device *dev = rif->dev;
7011         u16 fid_index;
7012         int err;
7013
7014         fid_index = mlxsw_sp_fid_index(rif->fid);
7015
7016         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7017         if (err)
7018                 return err;
7019
7020         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7021                                 dev->mtu);
7022         if (err)
7023                 goto err_rif_edit;
7024
7025         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7026         if (err)
7027                 goto err_rif_fdb_op;
7028
7029         if (rif->mtu != dev->mtu) {
7030                 struct mlxsw_sp_vr *vr;
7031                 int i;
7032
7033                 /* The RIF is relevant only to its mr_table instance, as unlike
7034                  * unicast routing, in multicast routing a RIF cannot be shared
7035                  * between several multicast routing tables.
7036                  */
7037                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
7038                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7039                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7040                                                    rif, dev->mtu);
7041         }
7042
7043         ether_addr_copy(rif->addr, dev->dev_addr);
7044         rif->mtu = dev->mtu;
7045
7046         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7047
7048         return 0;
7049
7050 err_rif_fdb_op:
7051         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7052 err_rif_edit:
7053         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7054         return err;
7055 }
7056
7057 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7058                             struct netdev_notifier_pre_changeaddr_info *info)
7059 {
7060         struct netlink_ext_ack *extack;
7061
7062         extack = netdev_notifier_info_to_extack(&info->info);
7063         return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7064                                                    info->dev_addr, extack);
7065 }
7066
7067 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7068                                          unsigned long event, void *ptr)
7069 {
7070         struct mlxsw_sp *mlxsw_sp;
7071         struct mlxsw_sp_rif *rif;
7072
7073         mlxsw_sp = mlxsw_sp_lower_get(dev);
7074         if (!mlxsw_sp)
7075                 return 0;
7076
7077         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7078         if (!rif)
7079                 return 0;
7080
7081         switch (event) {
7082         case NETDEV_CHANGEMTU: /* fall through */
7083         case NETDEV_CHANGEADDR:
7084                 return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7085         case NETDEV_PRE_CHANGEADDR:
7086                 return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7087         }
7088
7089         return 0;
7090 }
7091
7092 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7093                                   struct net_device *l3_dev,
7094                                   struct netlink_ext_ack *extack)
7095 {
7096         struct mlxsw_sp_rif *rif;
7097
7098         /* If netdev is already associated with a RIF, then we need to
7099          * destroy it and create a new one with the new virtual router ID.
7100          */
7101         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7102         if (rif)
7103                 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7104                                           extack);
7105
7106         return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7107 }
7108
7109 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7110                                     struct net_device *l3_dev)
7111 {
7112         struct mlxsw_sp_rif *rif;
7113
7114         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7115         if (!rif)
7116                 return;
7117         __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7118 }
7119
7120 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7121                                  struct netdev_notifier_changeupper_info *info)
7122 {
7123         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7124         int err = 0;
7125
7126         /* We do not create a RIF for a macvlan, but only use it to
7127          * direct more MAC addresses to the router.
7128          */
7129         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7130                 return 0;
7131
7132         switch (event) {
7133         case NETDEV_PRECHANGEUPPER:
7134                 return 0;
7135         case NETDEV_CHANGEUPPER:
7136                 if (info->linking) {
7137                         struct netlink_ext_ack *extack;
7138
7139                         extack = netdev_notifier_info_to_extack(&info->info);
7140                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7141                 } else {
7142                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7143                 }
7144                 break;
7145         }
7146
7147         return err;
7148 }
7149
7150 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7151 {
7152         struct mlxsw_sp_rif *rif = data;
7153
7154         if (!netif_is_macvlan(dev))
7155                 return 0;
7156
7157         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7158                                    mlxsw_sp_fid_index(rif->fid), false);
7159 }
7160
7161 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7162 {
7163         if (!netif_is_macvlan_port(rif->dev))
7164                 return 0;
7165
7166         netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7167         return netdev_walk_all_upper_dev_rcu(rif->dev,
7168                                              __mlxsw_sp_rif_macvlan_flush, rif);
7169 }
7170
7171 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7172                                        const struct mlxsw_sp_rif_params *params)
7173 {
7174         struct mlxsw_sp_rif_subport *rif_subport;
7175
7176         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7177         refcount_set(&rif_subport->ref_count, 1);
7178         rif_subport->vid = params->vid;
7179         rif_subport->lag = params->lag;
7180         if (params->lag)
7181                 rif_subport->lag_id = params->lag_id;
7182         else
7183                 rif_subport->system_port = params->system_port;
7184 }
7185
7186 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7187 {
7188         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7189         struct mlxsw_sp_rif_subport *rif_subport;
7190         char ritr_pl[MLXSW_REG_RITR_LEN];
7191
7192         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7193         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7194                             rif->rif_index, rif->vr_id, rif->dev->mtu);
7195         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7196         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7197                                   rif_subport->lag ? rif_subport->lag_id :
7198                                                      rif_subport->system_port,
7199                                   rif_subport->vid);
7200
7201         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7202 }
7203
7204 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7205 {
7206         int err;
7207
7208         err = mlxsw_sp_rif_subport_op(rif, true);
7209         if (err)
7210                 return err;
7211
7212         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7213                                   mlxsw_sp_fid_index(rif->fid), true);
7214         if (err)
7215                 goto err_rif_fdb_op;
7216
7217         mlxsw_sp_fid_rif_set(rif->fid, rif);
7218         return 0;
7219
7220 err_rif_fdb_op:
7221         mlxsw_sp_rif_subport_op(rif, false);
7222         return err;
7223 }
7224
7225 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7226 {
7227         struct mlxsw_sp_fid *fid = rif->fid;
7228
7229         mlxsw_sp_fid_rif_set(fid, NULL);
7230         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7231                             mlxsw_sp_fid_index(fid), false);
7232         mlxsw_sp_rif_macvlan_flush(rif);
7233         mlxsw_sp_rif_subport_op(rif, false);
7234 }
7235
7236 static struct mlxsw_sp_fid *
7237 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7238                              struct netlink_ext_ack *extack)
7239 {
7240         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7241 }
7242
7243 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7244         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
7245         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
7246         .setup                  = mlxsw_sp_rif_subport_setup,
7247         .configure              = mlxsw_sp_rif_subport_configure,
7248         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
7249         .fid_get                = mlxsw_sp_rif_subport_fid_get,
7250 };
7251
7252 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7253                                     enum mlxsw_reg_ritr_if_type type,
7254                                     u16 vid_fid, bool enable)
7255 {
7256         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7257         char ritr_pl[MLXSW_REG_RITR_LEN];
7258
7259         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7260                             rif->dev->mtu);
7261         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7262         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7263
7264         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7265 }
7266
7267 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7268 {
7269         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7270 }
7271
7272 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7273 {
7274         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7275         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7276         int err;
7277
7278         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7279         if (err)
7280                 return err;
7281
7282         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7283                                      mlxsw_sp_router_port(mlxsw_sp), true);
7284         if (err)
7285                 goto err_fid_mc_flood_set;
7286
7287         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7288                                      mlxsw_sp_router_port(mlxsw_sp), true);
7289         if (err)
7290                 goto err_fid_bc_flood_set;
7291
7292         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7293                                   mlxsw_sp_fid_index(rif->fid), true);
7294         if (err)
7295                 goto err_rif_fdb_op;
7296
7297         mlxsw_sp_fid_rif_set(rif->fid, rif);
7298         return 0;
7299
7300 err_rif_fdb_op:
7301         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7302                                mlxsw_sp_router_port(mlxsw_sp), false);
7303 err_fid_bc_flood_set:
7304         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7305                                mlxsw_sp_router_port(mlxsw_sp), false);
7306 err_fid_mc_flood_set:
7307         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7308         return err;
7309 }
7310
7311 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7312 {
7313         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7314         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7315         struct mlxsw_sp_fid *fid = rif->fid;
7316
7317         mlxsw_sp_fid_rif_set(fid, NULL);
7318         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7319                             mlxsw_sp_fid_index(fid), false);
7320         mlxsw_sp_rif_macvlan_flush(rif);
7321         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7322                                mlxsw_sp_router_port(mlxsw_sp), false);
7323         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7324                                mlxsw_sp_router_port(mlxsw_sp), false);
7325         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7326 }
7327
7328 static struct mlxsw_sp_fid *
7329 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7330                           struct netlink_ext_ack *extack)
7331 {
7332         struct net_device *br_dev = rif->dev;
7333         u16 vid;
7334         int err;
7335
7336         if (is_vlan_dev(rif->dev)) {
7337                 vid = vlan_dev_vlan_id(rif->dev);
7338                 br_dev = vlan_dev_real_dev(rif->dev);
7339                 if (WARN_ON(!netif_is_bridge_master(br_dev)))
7340                         return ERR_PTR(-EINVAL);
7341         } else {
7342                 err = br_vlan_get_pvid(rif->dev, &vid);
7343                 if (err < 0 || !vid) {
7344                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7345                         return ERR_PTR(-EINVAL);
7346                 }
7347         }
7348
7349         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7350 }
7351
7352 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7353 {
7354         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7355         struct switchdev_notifier_fdb_info info;
7356         struct net_device *br_dev;
7357         struct net_device *dev;
7358
7359         br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7360         dev = br_fdb_find_port(br_dev, mac, vid);
7361         if (!dev)
7362                 return;
7363
7364         info.addr = mac;
7365         info.vid = vid;
7366         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7367                                  NULL);
7368 }
7369
7370 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7371         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7372         .rif_size               = sizeof(struct mlxsw_sp_rif),
7373         .configure              = mlxsw_sp_rif_vlan_configure,
7374         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7375         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7376         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7377 };
7378
7379 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7380 {
7381         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7382         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7383         int err;
7384
7385         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7386                                        true);
7387         if (err)
7388                 return err;
7389
7390         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7391                                      mlxsw_sp_router_port(mlxsw_sp), true);
7392         if (err)
7393                 goto err_fid_mc_flood_set;
7394
7395         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7396                                      mlxsw_sp_router_port(mlxsw_sp), true);
7397         if (err)
7398                 goto err_fid_bc_flood_set;
7399
7400         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7401                                   mlxsw_sp_fid_index(rif->fid), true);
7402         if (err)
7403                 goto err_rif_fdb_op;
7404
7405         mlxsw_sp_fid_rif_set(rif->fid, rif);
7406         return 0;
7407
7408 err_rif_fdb_op:
7409         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7410                                mlxsw_sp_router_port(mlxsw_sp), false);
7411 err_fid_bc_flood_set:
7412         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7413                                mlxsw_sp_router_port(mlxsw_sp), false);
7414 err_fid_mc_flood_set:
7415         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7416         return err;
7417 }
7418
7419 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7420 {
7421         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7422         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7423         struct mlxsw_sp_fid *fid = rif->fid;
7424
7425         mlxsw_sp_fid_rif_set(fid, NULL);
7426         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7427                             mlxsw_sp_fid_index(fid), false);
7428         mlxsw_sp_rif_macvlan_flush(rif);
7429         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7430                                mlxsw_sp_router_port(mlxsw_sp), false);
7431         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7432                                mlxsw_sp_router_port(mlxsw_sp), false);
7433         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7434 }
7435
7436 static struct mlxsw_sp_fid *
7437 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7438                          struct netlink_ext_ack *extack)
7439 {
7440         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7441 }
7442
7443 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7444 {
7445         struct switchdev_notifier_fdb_info info;
7446         struct net_device *dev;
7447
7448         dev = br_fdb_find_port(rif->dev, mac, 0);
7449         if (!dev)
7450                 return;
7451
7452         info.addr = mac;
7453         info.vid = 0;
7454         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7455                                  NULL);
7456 }
7457
7458 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7459         .type                   = MLXSW_SP_RIF_TYPE_FID,
7460         .rif_size               = sizeof(struct mlxsw_sp_rif),
7461         .configure              = mlxsw_sp_rif_fid_configure,
7462         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7463         .fid_get                = mlxsw_sp_rif_fid_fid_get,
7464         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7465 };
7466
7467 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7468         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7469         .rif_size               = sizeof(struct mlxsw_sp_rif),
7470         .configure              = mlxsw_sp_rif_fid_configure,
7471         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7472         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7473         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7474 };
7475
7476 static struct mlxsw_sp_rif_ipip_lb *
7477 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7478 {
7479         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7480 }
7481
7482 static void
7483 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7484                            const struct mlxsw_sp_rif_params *params)
7485 {
7486         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7487         struct mlxsw_sp_rif_ipip_lb *rif_lb;
7488
7489         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7490                                  common);
7491         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7492         rif_lb->lb_config = params_lb->lb_config;
7493 }
7494
7495 static int
7496 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7497 {
7498         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7499         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7500         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7501         struct mlxsw_sp_vr *ul_vr;
7502         int err;
7503
7504         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7505         if (IS_ERR(ul_vr))
7506                 return PTR_ERR(ul_vr);
7507
7508         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7509         if (err)
7510                 goto err_loopback_op;
7511
7512         lb_rif->ul_vr_id = ul_vr->id;
7513         lb_rif->ul_rif_id = 0;
7514         ++ul_vr->rif_count;
7515         return 0;
7516
7517 err_loopback_op:
7518         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7519         return err;
7520 }
7521
7522 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7523 {
7524         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7525         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7526         struct mlxsw_sp_vr *ul_vr;
7527
7528         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7529         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7530
7531         --ul_vr->rif_count;
7532         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7533 }
7534
7535 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7536         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7537         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7538         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7539         .configure              = mlxsw_sp1_rif_ipip_lb_configure,
7540         .deconfigure            = mlxsw_sp1_rif_ipip_lb_deconfigure,
7541 };
7542
7543 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7544         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7545         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7546         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7547         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp1_rif_ipip_lb_ops,
7548 };
7549
7550 static int
7551 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7552 {
7553         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7554         char ritr_pl[MLXSW_REG_RITR_LEN];
7555
7556         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7557                             ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7558         mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7559                                              MLXSW_REG_RITR_LOOPBACK_GENERIC);
7560
7561         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7562 }
7563
7564 static struct mlxsw_sp_rif *
7565 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7566                        struct netlink_ext_ack *extack)
7567 {
7568         struct mlxsw_sp_rif *ul_rif;
7569         u16 rif_index;
7570         int err;
7571
7572         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7573         if (err) {
7574                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7575                 return ERR_PTR(err);
7576         }
7577
7578         ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7579         if (!ul_rif)
7580                 return ERR_PTR(-ENOMEM);
7581
7582         mlxsw_sp->router->rifs[rif_index] = ul_rif;
7583         ul_rif->mlxsw_sp = mlxsw_sp;
7584         err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7585         if (err)
7586                 goto ul_rif_op_err;
7587
7588         return ul_rif;
7589
7590 ul_rif_op_err:
7591         mlxsw_sp->router->rifs[rif_index] = NULL;
7592         kfree(ul_rif);
7593         return ERR_PTR(err);
7594 }
7595
7596 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7597 {
7598         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7599
7600         mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7601         mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7602         kfree(ul_rif);
7603 }
7604
7605 static struct mlxsw_sp_rif *
7606 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7607                     struct netlink_ext_ack *extack)
7608 {
7609         struct mlxsw_sp_vr *vr;
7610         int err;
7611
7612         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7613         if (IS_ERR(vr))
7614                 return ERR_CAST(vr);
7615
7616         if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7617                 return vr->ul_rif;
7618
7619         vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7620         if (IS_ERR(vr->ul_rif)) {
7621                 err = PTR_ERR(vr->ul_rif);
7622                 goto err_ul_rif_create;
7623         }
7624
7625         vr->rif_count++;
7626         refcount_set(&vr->ul_rif_refcnt, 1);
7627
7628         return vr->ul_rif;
7629
7630 err_ul_rif_create:
7631         mlxsw_sp_vr_put(mlxsw_sp, vr);
7632         return ERR_PTR(err);
7633 }
7634
7635 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7636 {
7637         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7638         struct mlxsw_sp_vr *vr;
7639
7640         vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7641
7642         if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7643                 return;
7644
7645         vr->rif_count--;
7646         mlxsw_sp_ul_rif_destroy(ul_rif);
7647         mlxsw_sp_vr_put(mlxsw_sp, vr);
7648 }
7649
7650 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7651                                u16 *ul_rif_index)
7652 {
7653         struct mlxsw_sp_rif *ul_rif;
7654
7655         ASSERT_RTNL();
7656
7657         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7658         if (IS_ERR(ul_rif))
7659                 return PTR_ERR(ul_rif);
7660         *ul_rif_index = ul_rif->rif_index;
7661
7662         return 0;
7663 }
7664
7665 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7666 {
7667         struct mlxsw_sp_rif *ul_rif;
7668
7669         ASSERT_RTNL();
7670
7671         ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7672         if (WARN_ON(!ul_rif))
7673                 return;
7674
7675         mlxsw_sp_ul_rif_put(ul_rif);
7676 }
7677
7678 static int
7679 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7680 {
7681         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7682         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7683         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7684         struct mlxsw_sp_rif *ul_rif;
7685         int err;
7686
7687         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7688         if (IS_ERR(ul_rif))
7689                 return PTR_ERR(ul_rif);
7690
7691         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7692         if (err)
7693                 goto err_loopback_op;
7694
7695         lb_rif->ul_vr_id = 0;
7696         lb_rif->ul_rif_id = ul_rif->rif_index;
7697
7698         return 0;
7699
7700 err_loopback_op:
7701         mlxsw_sp_ul_rif_put(ul_rif);
7702         return err;
7703 }
7704
7705 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7706 {
7707         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7708         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7709         struct mlxsw_sp_rif *ul_rif;
7710
7711         ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7712         mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7713         mlxsw_sp_ul_rif_put(ul_rif);
7714 }
7715
7716 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7717         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7718         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7719         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7720         .configure              = mlxsw_sp2_rif_ipip_lb_configure,
7721         .deconfigure            = mlxsw_sp2_rif_ipip_lb_deconfigure,
7722 };
7723
7724 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7725         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7726         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7727         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7728         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp2_rif_ipip_lb_ops,
7729 };
7730
7731 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7732 {
7733         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7734
7735         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7736                                          sizeof(struct mlxsw_sp_rif *),
7737                                          GFP_KERNEL);
7738         if (!mlxsw_sp->router->rifs)
7739                 return -ENOMEM;
7740
7741         return 0;
7742 }
7743
7744 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7745 {
7746         int i;
7747
7748         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7749                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7750
7751         kfree(mlxsw_sp->router->rifs);
7752 }
7753
7754 static int
7755 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7756 {
7757         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7758
7759         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7760         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7761 }
7762
7763 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7764 {
7765         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7766         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7767         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7768 }
7769
7770 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7771 {
7772         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7773 }
7774
7775 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7776 {
7777         struct mlxsw_sp_router *router;
7778
7779         /* Flush pending FIB notifications and then flush the device's
7780          * table before requesting another dump. The FIB notification
7781          * block is unregistered, so no need to take RTNL.
7782          */
7783         mlxsw_core_flush_owq();
7784         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7785         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7786 }
7787
7788 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7789 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7790 {
7791         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7792 }
7793
7794 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7795 {
7796         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7797 }
7798
7799 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7800 {
7801         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7802
7803         mlxsw_sp_mp_hash_header_set(recr2_pl,
7804                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7805         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7806         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7807         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7808         if (only_l3)
7809                 return;
7810         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7811         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7812         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7813         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7814 }
7815
7816 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7817 {
7818         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7819
7820         mlxsw_sp_mp_hash_header_set(recr2_pl,
7821                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7822         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7823         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7824         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7825         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7826         if (only_l3) {
7827                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7828                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7829         } else {
7830                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7831                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7832                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7833                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7834                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7835                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7836         }
7837 }
7838
7839 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7840 {
7841         char recr2_pl[MLXSW_REG_RECR2_LEN];
7842         u32 seed;
7843
7844         seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
7845         mlxsw_reg_recr2_pack(recr2_pl, seed);
7846         mlxsw_sp_mp4_hash_init(recr2_pl);
7847         mlxsw_sp_mp6_hash_init(recr2_pl);
7848
7849         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7850 }
7851 #else
7852 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7853 {
7854         return 0;
7855 }
7856 #endif
7857
7858 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7859 {
7860         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7861         unsigned int i;
7862
7863         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7864
7865         /* HW is determining switch priority based on DSCP-bits, but the
7866          * kernel is still doing that based on the ToS. Since there's a
7867          * mismatch in bits we need to make sure to translate the right
7868          * value ToS would observe, skipping the 2 least-significant ECN bits.
7869          */
7870         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7871                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7872
7873         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7874 }
7875
7876 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7877 {
7878         bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7879         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7880         u64 max_rifs;
7881         int err;
7882
7883         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7884                 return -EIO;
7885         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7886
7887         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7888         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7889         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7890         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7891         if (err)
7892                 return err;
7893         return 0;
7894 }
7895
7896 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7897 {
7898         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7899
7900         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7901         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7902 }
7903
7904 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7905 {
7906         struct mlxsw_sp_router *router;
7907         int err;
7908
7909         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7910         if (!router)
7911                 return -ENOMEM;
7912         mlxsw_sp->router = router;
7913         router->mlxsw_sp = mlxsw_sp;
7914
7915         router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
7916         err = register_inetaddr_notifier(&router->inetaddr_nb);
7917         if (err)
7918                 goto err_register_inetaddr_notifier;
7919
7920         router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
7921         err = register_inet6addr_notifier(&router->inet6addr_nb);
7922         if (err)
7923                 goto err_register_inet6addr_notifier;
7924
7925         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7926         err = __mlxsw_sp_router_init(mlxsw_sp);
7927         if (err)
7928                 goto err_router_init;
7929
7930         err = mlxsw_sp_rifs_init(mlxsw_sp);
7931         if (err)
7932                 goto err_rifs_init;
7933
7934         err = mlxsw_sp_ipips_init(mlxsw_sp);
7935         if (err)
7936                 goto err_ipips_init;
7937
7938         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7939                               &mlxsw_sp_nexthop_ht_params);
7940         if (err)
7941                 goto err_nexthop_ht_init;
7942
7943         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7944                               &mlxsw_sp_nexthop_group_ht_params);
7945         if (err)
7946                 goto err_nexthop_group_ht_init;
7947
7948         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7949         err = mlxsw_sp_lpm_init(mlxsw_sp);
7950         if (err)
7951                 goto err_lpm_init;
7952
7953         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7954         if (err)
7955                 goto err_mr_init;
7956
7957         err = mlxsw_sp_vrs_init(mlxsw_sp);
7958         if (err)
7959                 goto err_vrs_init;
7960
7961         err = mlxsw_sp_neigh_init(mlxsw_sp);
7962         if (err)
7963                 goto err_neigh_init;
7964
7965         mlxsw_sp->router->netevent_nb.notifier_call =
7966                 mlxsw_sp_router_netevent_event;
7967         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7968         if (err)
7969                 goto err_register_netevent_notifier;
7970
7971         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7972         if (err)
7973                 goto err_mp_hash_init;
7974
7975         err = mlxsw_sp_dscp_init(mlxsw_sp);
7976         if (err)
7977                 goto err_dscp_init;
7978
7979         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7980         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7981                                     mlxsw_sp_router_fib_dump_flush);
7982         if (err)
7983                 goto err_register_fib_notifier;
7984
7985         return 0;
7986
7987 err_register_fib_notifier:
7988 err_dscp_init:
7989 err_mp_hash_init:
7990         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7991 err_register_netevent_notifier:
7992         mlxsw_sp_neigh_fini(mlxsw_sp);
7993 err_neigh_init:
7994         mlxsw_sp_vrs_fini(mlxsw_sp);
7995 err_vrs_init:
7996         mlxsw_sp_mr_fini(mlxsw_sp);
7997 err_mr_init:
7998         mlxsw_sp_lpm_fini(mlxsw_sp);
7999 err_lpm_init:
8000         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8001 err_nexthop_group_ht_init:
8002         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8003 err_nexthop_ht_init:
8004         mlxsw_sp_ipips_fini(mlxsw_sp);
8005 err_ipips_init:
8006         mlxsw_sp_rifs_fini(mlxsw_sp);
8007 err_rifs_init:
8008         __mlxsw_sp_router_fini(mlxsw_sp);
8009 err_router_init:
8010         unregister_inet6addr_notifier(&router->inet6addr_nb);
8011 err_register_inet6addr_notifier:
8012         unregister_inetaddr_notifier(&router->inetaddr_nb);
8013 err_register_inetaddr_notifier:
8014         kfree(mlxsw_sp->router);
8015         return err;
8016 }
8017
8018 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8019 {
8020         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
8021         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8022         mlxsw_sp_neigh_fini(mlxsw_sp);
8023         mlxsw_sp_vrs_fini(mlxsw_sp);
8024         mlxsw_sp_mr_fini(mlxsw_sp);
8025         mlxsw_sp_lpm_fini(mlxsw_sp);
8026         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8027         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8028         mlxsw_sp_ipips_fini(mlxsw_sp);
8029         mlxsw_sp_rifs_fini(mlxsw_sp);
8030         __mlxsw_sp_router_fini(mlxsw_sp);
8031         unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8032         unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8033         kfree(mlxsw_sp->router);
8034 }