]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
cifs: fix GlobalMid_Lock bug in cifs_reconnect
[linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/if_macvlan.h>
17 #include <linux/refcount.h>
18 #include <linux/jhash.h>
19 #include <net/netevent.h>
20 #include <net/neighbour.h>
21 #include <net/arp.h>
22 #include <net/ip_fib.h>
23 #include <net/ip6_fib.h>
24 #include <net/fib_rules.h>
25 #include <net/ip_tunnels.h>
26 #include <net/l3mdev.h>
27 #include <net/addrconf.h>
28 #include <net/ndisc.h>
29 #include <net/ipv6.h>
30 #include <net/fib_notifier.h>
31 #include <net/switchdev.h>
32
33 #include "spectrum.h"
34 #include "core.h"
35 #include "reg.h"
36 #include "spectrum_cnt.h"
37 #include "spectrum_dpipe.h"
38 #include "spectrum_ipip.h"
39 #include "spectrum_mr.h"
40 #include "spectrum_mr_tcam.h"
41 #include "spectrum_router.h"
42 #include "spectrum_span.h"
43
44 struct mlxsw_sp_fib;
45 struct mlxsw_sp_vr;
46 struct mlxsw_sp_lpm_tree;
47 struct mlxsw_sp_rif_ops;
48
49 struct mlxsw_sp_router {
50         struct mlxsw_sp *mlxsw_sp;
51         struct mlxsw_sp_rif **rifs;
52         struct mlxsw_sp_vr *vrs;
53         struct rhashtable neigh_ht;
54         struct rhashtable nexthop_group_ht;
55         struct rhashtable nexthop_ht;
56         struct list_head nexthop_list;
57         struct {
58                 /* One tree for each protocol: IPv4 and IPv6 */
59                 struct mlxsw_sp_lpm_tree *proto_trees[2];
60                 struct mlxsw_sp_lpm_tree *trees;
61                 unsigned int tree_count;
62         } lpm;
63         struct {
64                 struct delayed_work dw;
65                 unsigned long interval; /* ms */
66         } neighs_update;
67         struct delayed_work nexthop_probe_dw;
68 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
69         struct list_head nexthop_neighs_list;
70         struct list_head ipip_list;
71         bool aborted;
72         struct notifier_block fib_nb;
73         struct notifier_block netevent_nb;
74         struct notifier_block inetaddr_nb;
75         struct notifier_block inet6addr_nb;
76         const struct mlxsw_sp_rif_ops **rif_ops_arr;
77         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
78 };
79
80 struct mlxsw_sp_rif {
81         struct list_head nexthop_list;
82         struct list_head neigh_list;
83         struct net_device *dev; /* NULL for underlay RIF */
84         struct mlxsw_sp_fid *fid;
85         unsigned char addr[ETH_ALEN];
86         int mtu;
87         u16 rif_index;
88         u16 vr_id;
89         const struct mlxsw_sp_rif_ops *ops;
90         struct mlxsw_sp *mlxsw_sp;
91
92         unsigned int counter_ingress;
93         bool counter_ingress_valid;
94         unsigned int counter_egress;
95         bool counter_egress_valid;
96 };
97
98 struct mlxsw_sp_rif_params {
99         struct net_device *dev;
100         union {
101                 u16 system_port;
102                 u16 lag_id;
103         };
104         u16 vid;
105         bool lag;
106 };
107
108 struct mlxsw_sp_rif_subport {
109         struct mlxsw_sp_rif common;
110         refcount_t ref_count;
111         union {
112                 u16 system_port;
113                 u16 lag_id;
114         };
115         u16 vid;
116         bool lag;
117 };
118
119 struct mlxsw_sp_rif_ipip_lb {
120         struct mlxsw_sp_rif common;
121         struct mlxsw_sp_rif_ipip_lb_config lb_config;
122         u16 ul_vr_id; /* Reserved for Spectrum-2. */
123         u16 ul_rif_id; /* Reserved for Spectrum. */
124 };
125
126 struct mlxsw_sp_rif_params_ipip_lb {
127         struct mlxsw_sp_rif_params common;
128         struct mlxsw_sp_rif_ipip_lb_config lb_config;
129 };
130
131 struct mlxsw_sp_rif_ops {
132         enum mlxsw_sp_rif_type type;
133         size_t rif_size;
134
135         void (*setup)(struct mlxsw_sp_rif *rif,
136                       const struct mlxsw_sp_rif_params *params);
137         int (*configure)(struct mlxsw_sp_rif *rif);
138         void (*deconfigure)(struct mlxsw_sp_rif *rif);
139         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
140                                          struct netlink_ext_ack *extack);
141         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
142 };
143
144 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
145 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
146 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
147                                   struct mlxsw_sp_lpm_tree *lpm_tree);
148 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
149                                      const struct mlxsw_sp_fib *fib,
150                                      u8 tree_id);
151 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
152                                        const struct mlxsw_sp_fib *fib);
153
154 static unsigned int *
155 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
156                            enum mlxsw_sp_rif_counter_dir dir)
157 {
158         switch (dir) {
159         case MLXSW_SP_RIF_COUNTER_EGRESS:
160                 return &rif->counter_egress;
161         case MLXSW_SP_RIF_COUNTER_INGRESS:
162                 return &rif->counter_ingress;
163         }
164         return NULL;
165 }
166
167 static bool
168 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
169                                enum mlxsw_sp_rif_counter_dir dir)
170 {
171         switch (dir) {
172         case MLXSW_SP_RIF_COUNTER_EGRESS:
173                 return rif->counter_egress_valid;
174         case MLXSW_SP_RIF_COUNTER_INGRESS:
175                 return rif->counter_ingress_valid;
176         }
177         return false;
178 }
179
180 static void
181 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
182                                enum mlxsw_sp_rif_counter_dir dir,
183                                bool valid)
184 {
185         switch (dir) {
186         case MLXSW_SP_RIF_COUNTER_EGRESS:
187                 rif->counter_egress_valid = valid;
188                 break;
189         case MLXSW_SP_RIF_COUNTER_INGRESS:
190                 rif->counter_ingress_valid = valid;
191                 break;
192         }
193 }
194
195 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
196                                      unsigned int counter_index, bool enable,
197                                      enum mlxsw_sp_rif_counter_dir dir)
198 {
199         char ritr_pl[MLXSW_REG_RITR_LEN];
200         bool is_egress = false;
201         int err;
202
203         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
204                 is_egress = true;
205         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
206         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207         if (err)
208                 return err;
209
210         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
211                                     is_egress);
212         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
213 }
214
215 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
216                                    struct mlxsw_sp_rif *rif,
217                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
218 {
219         char ricnt_pl[MLXSW_REG_RICNT_LEN];
220         unsigned int *p_counter_index;
221         bool valid;
222         int err;
223
224         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
225         if (!valid)
226                 return -EINVAL;
227
228         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
229         if (!p_counter_index)
230                 return -EINVAL;
231         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
232                              MLXSW_REG_RICNT_OPCODE_NOP);
233         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
234         if (err)
235                 return err;
236         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
237         return 0;
238 }
239
240 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
241                                       unsigned int counter_index)
242 {
243         char ricnt_pl[MLXSW_REG_RICNT_LEN];
244
245         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
246                              MLXSW_REG_RICNT_OPCODE_CLEAR);
247         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
248 }
249
250 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
251                                struct mlxsw_sp_rif *rif,
252                                enum mlxsw_sp_rif_counter_dir dir)
253 {
254         unsigned int *p_counter_index;
255         int err;
256
257         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
258         if (!p_counter_index)
259                 return -EINVAL;
260         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
261                                      p_counter_index);
262         if (err)
263                 return err;
264
265         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
266         if (err)
267                 goto err_counter_clear;
268
269         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
270                                         *p_counter_index, true, dir);
271         if (err)
272                 goto err_counter_edit;
273         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
274         return 0;
275
276 err_counter_edit:
277 err_counter_clear:
278         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
279                               *p_counter_index);
280         return err;
281 }
282
283 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
284                                struct mlxsw_sp_rif *rif,
285                                enum mlxsw_sp_rif_counter_dir dir)
286 {
287         unsigned int *p_counter_index;
288
289         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
290                 return;
291
292         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
293         if (WARN_ON(!p_counter_index))
294                 return;
295         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
296                                   *p_counter_index, false, dir);
297         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
298                               *p_counter_index);
299         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
300 }
301
302 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
303 {
304         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
305         struct devlink *devlink;
306
307         devlink = priv_to_devlink(mlxsw_sp->core);
308         if (!devlink_dpipe_table_counter_enabled(devlink,
309                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
310                 return;
311         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
312 }
313
314 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
315 {
316         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
317
318         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
319 }
320
321 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
322
323 struct mlxsw_sp_prefix_usage {
324         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
325 };
326
327 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
328         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
329
330 static bool
331 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
332                          struct mlxsw_sp_prefix_usage *prefix_usage2)
333 {
334         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
335 }
336
337 static void
338 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
339                           struct mlxsw_sp_prefix_usage *prefix_usage2)
340 {
341         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
342 }
343
344 static void
345 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
346                           unsigned char prefix_len)
347 {
348         set_bit(prefix_len, prefix_usage->b);
349 }
350
351 static void
352 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
353                             unsigned char prefix_len)
354 {
355         clear_bit(prefix_len, prefix_usage->b);
356 }
357
358 struct mlxsw_sp_fib_key {
359         unsigned char addr[sizeof(struct in6_addr)];
360         unsigned char prefix_len;
361 };
362
363 enum mlxsw_sp_fib_entry_type {
364         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
365         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
366         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
367         MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
368
369         /* This is a special case of local delivery, where a packet should be
370          * decapsulated on reception. Note that there is no corresponding ENCAP,
371          * because that's a type of next hop, not of FIB entry. (There can be
372          * several next hops in a REMOTE entry, and some of them may be
373          * encapsulating entries.)
374          */
375         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
376         MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
377 };
378
379 struct mlxsw_sp_nexthop_group;
380
381 struct mlxsw_sp_fib_node {
382         struct list_head entry_list;
383         struct list_head list;
384         struct rhash_head ht_node;
385         struct mlxsw_sp_fib *fib;
386         struct mlxsw_sp_fib_key key;
387 };
388
389 struct mlxsw_sp_fib_entry_decap {
390         struct mlxsw_sp_ipip_entry *ipip_entry;
391         u32 tunnel_index;
392 };
393
394 struct mlxsw_sp_fib_entry {
395         struct list_head list;
396         struct mlxsw_sp_fib_node *fib_node;
397         enum mlxsw_sp_fib_entry_type type;
398         struct list_head nexthop_group_node;
399         struct mlxsw_sp_nexthop_group *nh_group;
400         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
401 };
402
403 struct mlxsw_sp_fib4_entry {
404         struct mlxsw_sp_fib_entry common;
405         u32 tb_id;
406         u32 prio;
407         u8 tos;
408         u8 type;
409 };
410
411 struct mlxsw_sp_fib6_entry {
412         struct mlxsw_sp_fib_entry common;
413         struct list_head rt6_list;
414         unsigned int nrt6;
415 };
416
417 struct mlxsw_sp_rt6 {
418         struct list_head list;
419         struct fib6_info *rt;
420 };
421
422 struct mlxsw_sp_lpm_tree {
423         u8 id; /* tree ID */
424         unsigned int ref_count;
425         enum mlxsw_sp_l3proto proto;
426         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
427         struct mlxsw_sp_prefix_usage prefix_usage;
428 };
429
430 struct mlxsw_sp_fib {
431         struct rhashtable ht;
432         struct list_head node_list;
433         struct mlxsw_sp_vr *vr;
434         struct mlxsw_sp_lpm_tree *lpm_tree;
435         enum mlxsw_sp_l3proto proto;
436 };
437
438 struct mlxsw_sp_vr {
439         u16 id; /* virtual router ID */
440         u32 tb_id; /* kernel fib table id */
441         unsigned int rif_count;
442         struct mlxsw_sp_fib *fib4;
443         struct mlxsw_sp_fib *fib6;
444         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
445         struct mlxsw_sp_rif *ul_rif;
446         refcount_t ul_rif_refcnt;
447 };
448
449 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
450
451 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
452                                                 struct mlxsw_sp_vr *vr,
453                                                 enum mlxsw_sp_l3proto proto)
454 {
455         struct mlxsw_sp_lpm_tree *lpm_tree;
456         struct mlxsw_sp_fib *fib;
457         int err;
458
459         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
460         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
461         if (!fib)
462                 return ERR_PTR(-ENOMEM);
463         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
464         if (err)
465                 goto err_rhashtable_init;
466         INIT_LIST_HEAD(&fib->node_list);
467         fib->proto = proto;
468         fib->vr = vr;
469         fib->lpm_tree = lpm_tree;
470         mlxsw_sp_lpm_tree_hold(lpm_tree);
471         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
472         if (err)
473                 goto err_lpm_tree_bind;
474         return fib;
475
476 err_lpm_tree_bind:
477         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
478 err_rhashtable_init:
479         kfree(fib);
480         return ERR_PTR(err);
481 }
482
483 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
484                                  struct mlxsw_sp_fib *fib)
485 {
486         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
487         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
488         WARN_ON(!list_empty(&fib->node_list));
489         rhashtable_destroy(&fib->ht);
490         kfree(fib);
491 }
492
493 static struct mlxsw_sp_lpm_tree *
494 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
495 {
496         static struct mlxsw_sp_lpm_tree *lpm_tree;
497         int i;
498
499         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
500                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
501                 if (lpm_tree->ref_count == 0)
502                         return lpm_tree;
503         }
504         return NULL;
505 }
506
507 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
508                                    struct mlxsw_sp_lpm_tree *lpm_tree)
509 {
510         char ralta_pl[MLXSW_REG_RALTA_LEN];
511
512         mlxsw_reg_ralta_pack(ralta_pl, true,
513                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
514                              lpm_tree->id);
515         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
516 }
517
518 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
519                                    struct mlxsw_sp_lpm_tree *lpm_tree)
520 {
521         char ralta_pl[MLXSW_REG_RALTA_LEN];
522
523         mlxsw_reg_ralta_pack(ralta_pl, false,
524                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
525                              lpm_tree->id);
526         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
527 }
528
529 static int
530 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
531                                   struct mlxsw_sp_prefix_usage *prefix_usage,
532                                   struct mlxsw_sp_lpm_tree *lpm_tree)
533 {
534         char ralst_pl[MLXSW_REG_RALST_LEN];
535         u8 root_bin = 0;
536         u8 prefix;
537         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
538
539         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
540                 root_bin = prefix;
541
542         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
543         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
544                 if (prefix == 0)
545                         continue;
546                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
547                                          MLXSW_REG_RALST_BIN_NO_CHILD);
548                 last_prefix = prefix;
549         }
550         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
551 }
552
553 static struct mlxsw_sp_lpm_tree *
554 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
555                          struct mlxsw_sp_prefix_usage *prefix_usage,
556                          enum mlxsw_sp_l3proto proto)
557 {
558         struct mlxsw_sp_lpm_tree *lpm_tree;
559         int err;
560
561         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
562         if (!lpm_tree)
563                 return ERR_PTR(-EBUSY);
564         lpm_tree->proto = proto;
565         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
566         if (err)
567                 return ERR_PTR(err);
568
569         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
570                                                 lpm_tree);
571         if (err)
572                 goto err_left_struct_set;
573         memcpy(&lpm_tree->prefix_usage, prefix_usage,
574                sizeof(lpm_tree->prefix_usage));
575         memset(&lpm_tree->prefix_ref_count, 0,
576                sizeof(lpm_tree->prefix_ref_count));
577         lpm_tree->ref_count = 1;
578         return lpm_tree;
579
580 err_left_struct_set:
581         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
582         return ERR_PTR(err);
583 }
584
585 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
586                                       struct mlxsw_sp_lpm_tree *lpm_tree)
587 {
588         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
589 }
590
591 static struct mlxsw_sp_lpm_tree *
592 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
593                       struct mlxsw_sp_prefix_usage *prefix_usage,
594                       enum mlxsw_sp_l3proto proto)
595 {
596         struct mlxsw_sp_lpm_tree *lpm_tree;
597         int i;
598
599         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
600                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
601                 if (lpm_tree->ref_count != 0 &&
602                     lpm_tree->proto == proto &&
603                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
604                                              prefix_usage)) {
605                         mlxsw_sp_lpm_tree_hold(lpm_tree);
606                         return lpm_tree;
607                 }
608         }
609         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
610 }
611
612 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
613 {
614         lpm_tree->ref_count++;
615 }
616
617 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
618                                   struct mlxsw_sp_lpm_tree *lpm_tree)
619 {
620         if (--lpm_tree->ref_count == 0)
621                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
622 }
623
624 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
625
626 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
627 {
628         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
629         struct mlxsw_sp_lpm_tree *lpm_tree;
630         u64 max_trees;
631         int err, i;
632
633         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
634                 return -EIO;
635
636         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
637         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
638         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
639                                              sizeof(struct mlxsw_sp_lpm_tree),
640                                              GFP_KERNEL);
641         if (!mlxsw_sp->router->lpm.trees)
642                 return -ENOMEM;
643
644         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
645                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
646                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
647         }
648
649         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
650                                          MLXSW_SP_L3_PROTO_IPV4);
651         if (IS_ERR(lpm_tree)) {
652                 err = PTR_ERR(lpm_tree);
653                 goto err_ipv4_tree_get;
654         }
655         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
656
657         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
658                                          MLXSW_SP_L3_PROTO_IPV6);
659         if (IS_ERR(lpm_tree)) {
660                 err = PTR_ERR(lpm_tree);
661                 goto err_ipv6_tree_get;
662         }
663         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
664
665         return 0;
666
667 err_ipv6_tree_get:
668         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
669         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
670 err_ipv4_tree_get:
671         kfree(mlxsw_sp->router->lpm.trees);
672         return err;
673 }
674
675 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
676 {
677         struct mlxsw_sp_lpm_tree *lpm_tree;
678
679         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
680         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
681
682         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
683         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
684
685         kfree(mlxsw_sp->router->lpm.trees);
686 }
687
688 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
689 {
690         return !!vr->fib4 || !!vr->fib6 ||
691                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
692                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
693 }
694
695 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
696 {
697         struct mlxsw_sp_vr *vr;
698         int i;
699
700         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
701                 vr = &mlxsw_sp->router->vrs[i];
702                 if (!mlxsw_sp_vr_is_used(vr))
703                         return vr;
704         }
705         return NULL;
706 }
707
708 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
709                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
710 {
711         char raltb_pl[MLXSW_REG_RALTB_LEN];
712
713         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
714                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
715                              tree_id);
716         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
717 }
718
719 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
720                                        const struct mlxsw_sp_fib *fib)
721 {
722         char raltb_pl[MLXSW_REG_RALTB_LEN];
723
724         /* Bind to tree 0 which is default */
725         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
726                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
727         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
728 }
729
730 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
731 {
732         /* For our purpose, squash main, default and local tables into one */
733         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
734                 tb_id = RT_TABLE_MAIN;
735         return tb_id;
736 }
737
738 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
739                                             u32 tb_id)
740 {
741         struct mlxsw_sp_vr *vr;
742         int i;
743
744         tb_id = mlxsw_sp_fix_tb_id(tb_id);
745
746         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
747                 vr = &mlxsw_sp->router->vrs[i];
748                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
749                         return vr;
750         }
751         return NULL;
752 }
753
754 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
755                                 u16 *vr_id)
756 {
757         struct mlxsw_sp_vr *vr;
758
759         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
760         if (!vr)
761                 return -ESRCH;
762         *vr_id = vr->id;
763
764         return 0;
765 }
766
767 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
768                                             enum mlxsw_sp_l3proto proto)
769 {
770         switch (proto) {
771         case MLXSW_SP_L3_PROTO_IPV4:
772                 return vr->fib4;
773         case MLXSW_SP_L3_PROTO_IPV6:
774                 return vr->fib6;
775         }
776         return NULL;
777 }
778
779 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
780                                               u32 tb_id,
781                                               struct netlink_ext_ack *extack)
782 {
783         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
784         struct mlxsw_sp_fib *fib4;
785         struct mlxsw_sp_fib *fib6;
786         struct mlxsw_sp_vr *vr;
787         int err;
788
789         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
790         if (!vr) {
791                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
792                 return ERR_PTR(-EBUSY);
793         }
794         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
795         if (IS_ERR(fib4))
796                 return ERR_CAST(fib4);
797         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
798         if (IS_ERR(fib6)) {
799                 err = PTR_ERR(fib6);
800                 goto err_fib6_create;
801         }
802         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
803                                              MLXSW_SP_L3_PROTO_IPV4);
804         if (IS_ERR(mr4_table)) {
805                 err = PTR_ERR(mr4_table);
806                 goto err_mr4_table_create;
807         }
808         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
809                                              MLXSW_SP_L3_PROTO_IPV6);
810         if (IS_ERR(mr6_table)) {
811                 err = PTR_ERR(mr6_table);
812                 goto err_mr6_table_create;
813         }
814
815         vr->fib4 = fib4;
816         vr->fib6 = fib6;
817         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
818         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
819         vr->tb_id = tb_id;
820         return vr;
821
822 err_mr6_table_create:
823         mlxsw_sp_mr_table_destroy(mr4_table);
824 err_mr4_table_create:
825         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
826 err_fib6_create:
827         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
828         return ERR_PTR(err);
829 }
830
831 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
832                                 struct mlxsw_sp_vr *vr)
833 {
834         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
835         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
836         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
837         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
838         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
839         vr->fib6 = NULL;
840         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
841         vr->fib4 = NULL;
842 }
843
844 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
845                                            struct netlink_ext_ack *extack)
846 {
847         struct mlxsw_sp_vr *vr;
848
849         tb_id = mlxsw_sp_fix_tb_id(tb_id);
850         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
851         if (!vr)
852                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
853         return vr;
854 }
855
856 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
857 {
858         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
859             list_empty(&vr->fib6->node_list) &&
860             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
861             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
862                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
863 }
864
865 static bool
866 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
867                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
868 {
869         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
870
871         if (!mlxsw_sp_vr_is_used(vr))
872                 return false;
873         if (fib->lpm_tree->id == tree_id)
874                 return true;
875         return false;
876 }
877
878 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
879                                         struct mlxsw_sp_fib *fib,
880                                         struct mlxsw_sp_lpm_tree *new_tree)
881 {
882         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
883         int err;
884
885         fib->lpm_tree = new_tree;
886         mlxsw_sp_lpm_tree_hold(new_tree);
887         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
888         if (err)
889                 goto err_tree_bind;
890         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
891         return 0;
892
893 err_tree_bind:
894         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
895         fib->lpm_tree = old_tree;
896         return err;
897 }
898
899 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
900                                          struct mlxsw_sp_fib *fib,
901                                          struct mlxsw_sp_lpm_tree *new_tree)
902 {
903         enum mlxsw_sp_l3proto proto = fib->proto;
904         struct mlxsw_sp_lpm_tree *old_tree;
905         u8 old_id, new_id = new_tree->id;
906         struct mlxsw_sp_vr *vr;
907         int i, err;
908
909         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
910         old_id = old_tree->id;
911
912         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
913                 vr = &mlxsw_sp->router->vrs[i];
914                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
915                         continue;
916                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
917                                                    mlxsw_sp_vr_fib(vr, proto),
918                                                    new_tree);
919                 if (err)
920                         goto err_tree_replace;
921         }
922
923         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
924                sizeof(new_tree->prefix_ref_count));
925         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
926         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
927
928         return 0;
929
930 err_tree_replace:
931         for (i--; i >= 0; i--) {
932                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
933                         continue;
934                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
935                                              mlxsw_sp_vr_fib(vr, proto),
936                                              old_tree);
937         }
938         return err;
939 }
940
941 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
942 {
943         struct mlxsw_sp_vr *vr;
944         u64 max_vrs;
945         int i;
946
947         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
948                 return -EIO;
949
950         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
951         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
952                                         GFP_KERNEL);
953         if (!mlxsw_sp->router->vrs)
954                 return -ENOMEM;
955
956         for (i = 0; i < max_vrs; i++) {
957                 vr = &mlxsw_sp->router->vrs[i];
958                 vr->id = i;
959         }
960
961         return 0;
962 }
963
964 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
965
966 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
967 {
968         /* At this stage we're guaranteed not to have new incoming
969          * FIB notifications and the work queue is free from FIBs
970          * sitting on top of mlxsw netdevs. However, we can still
971          * have other FIBs queued. Flush the queue before flushing
972          * the device's tables. No need for locks, as we're the only
973          * writer.
974          */
975         mlxsw_core_flush_owq();
976         mlxsw_sp_router_fib_flush(mlxsw_sp);
977         kfree(mlxsw_sp->router->vrs);
978 }
979
980 static struct net_device *
981 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
982 {
983         struct ip_tunnel *tun = netdev_priv(ol_dev);
984         struct net *net = dev_net(ol_dev);
985
986         return __dev_get_by_index(net, tun->parms.link);
987 }
988
989 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
990 {
991         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
992
993         if (d)
994                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
995         else
996                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
997 }
998
999 static struct mlxsw_sp_rif *
1000 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1001                     const struct mlxsw_sp_rif_params *params,
1002                     struct netlink_ext_ack *extack);
1003
1004 static struct mlxsw_sp_rif_ipip_lb *
1005 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1006                                 enum mlxsw_sp_ipip_type ipipt,
1007                                 struct net_device *ol_dev,
1008                                 struct netlink_ext_ack *extack)
1009 {
1010         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1011         const struct mlxsw_sp_ipip_ops *ipip_ops;
1012         struct mlxsw_sp_rif *rif;
1013
1014         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1015         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1016                 .common.dev = ol_dev,
1017                 .common.lag = false,
1018                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1019         };
1020
1021         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1022         if (IS_ERR(rif))
1023                 return ERR_CAST(rif);
1024         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1025 }
1026
1027 static struct mlxsw_sp_ipip_entry *
1028 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1029                           enum mlxsw_sp_ipip_type ipipt,
1030                           struct net_device *ol_dev)
1031 {
1032         const struct mlxsw_sp_ipip_ops *ipip_ops;
1033         struct mlxsw_sp_ipip_entry *ipip_entry;
1034         struct mlxsw_sp_ipip_entry *ret = NULL;
1035
1036         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1037         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1038         if (!ipip_entry)
1039                 return ERR_PTR(-ENOMEM);
1040
1041         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1042                                                             ol_dev, NULL);
1043         if (IS_ERR(ipip_entry->ol_lb)) {
1044                 ret = ERR_CAST(ipip_entry->ol_lb);
1045                 goto err_ol_ipip_lb_create;
1046         }
1047
1048         ipip_entry->ipipt = ipipt;
1049         ipip_entry->ol_dev = ol_dev;
1050
1051         switch (ipip_ops->ul_proto) {
1052         case MLXSW_SP_L3_PROTO_IPV4:
1053                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1054                 break;
1055         case MLXSW_SP_L3_PROTO_IPV6:
1056                 WARN_ON(1);
1057                 break;
1058         }
1059
1060         return ipip_entry;
1061
1062 err_ol_ipip_lb_create:
1063         kfree(ipip_entry);
1064         return ret;
1065 }
1066
1067 static void
1068 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1069 {
1070         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1071         kfree(ipip_entry);
1072 }
1073
1074 static bool
1075 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1076                                   const enum mlxsw_sp_l3proto ul_proto,
1077                                   union mlxsw_sp_l3addr saddr,
1078                                   u32 ul_tb_id,
1079                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1080 {
1081         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1082         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1083         union mlxsw_sp_l3addr tun_saddr;
1084
1085         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1086                 return false;
1087
1088         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1089         return tun_ul_tb_id == ul_tb_id &&
1090                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1091 }
1092
1093 static int
1094 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1095                               struct mlxsw_sp_fib_entry *fib_entry,
1096                               struct mlxsw_sp_ipip_entry *ipip_entry)
1097 {
1098         u32 tunnel_index;
1099         int err;
1100
1101         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1102                                   1, &tunnel_index);
1103         if (err)
1104                 return err;
1105
1106         ipip_entry->decap_fib_entry = fib_entry;
1107         fib_entry->decap.ipip_entry = ipip_entry;
1108         fib_entry->decap.tunnel_index = tunnel_index;
1109         return 0;
1110 }
1111
1112 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1113                                           struct mlxsw_sp_fib_entry *fib_entry)
1114 {
1115         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1116         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1117         fib_entry->decap.ipip_entry = NULL;
1118         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1119                            1, fib_entry->decap.tunnel_index);
1120 }
1121
1122 static struct mlxsw_sp_fib_node *
1123 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1124                          size_t addr_len, unsigned char prefix_len);
1125 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1126                                      struct mlxsw_sp_fib_entry *fib_entry);
1127
1128 static void
1129 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1130                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1131 {
1132         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1133
1134         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1135         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1136
1137         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1138 }
1139
1140 static void
1141 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1142                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1143                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1144 {
1145         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1146                                           ipip_entry))
1147                 return;
1148         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1149
1150         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1151                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1152 }
1153
1154 static struct mlxsw_sp_fib_entry *
1155 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1156                                      enum mlxsw_sp_l3proto proto,
1157                                      const union mlxsw_sp_l3addr *addr,
1158                                      enum mlxsw_sp_fib_entry_type type)
1159 {
1160         struct mlxsw_sp_fib_entry *fib_entry;
1161         struct mlxsw_sp_fib_node *fib_node;
1162         unsigned char addr_prefix_len;
1163         struct mlxsw_sp_fib *fib;
1164         struct mlxsw_sp_vr *vr;
1165         const void *addrp;
1166         size_t addr_len;
1167         u32 addr4;
1168
1169         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1170         if (!vr)
1171                 return NULL;
1172         fib = mlxsw_sp_vr_fib(vr, proto);
1173
1174         switch (proto) {
1175         case MLXSW_SP_L3_PROTO_IPV4:
1176                 addr4 = be32_to_cpu(addr->addr4);
1177                 addrp = &addr4;
1178                 addr_len = 4;
1179                 addr_prefix_len = 32;
1180                 break;
1181         case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1182         default:
1183                 WARN_ON(1);
1184                 return NULL;
1185         }
1186
1187         fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1188                                             addr_prefix_len);
1189         if (!fib_node || list_empty(&fib_node->entry_list))
1190                 return NULL;
1191
1192         fib_entry = list_first_entry(&fib_node->entry_list,
1193                                      struct mlxsw_sp_fib_entry, list);
1194         if (fib_entry->type != type)
1195                 return NULL;
1196
1197         return fib_entry;
1198 }
1199
1200 /* Given an IPIP entry, find the corresponding decap route. */
1201 static struct mlxsw_sp_fib_entry *
1202 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1203                                struct mlxsw_sp_ipip_entry *ipip_entry)
1204 {
1205         static struct mlxsw_sp_fib_node *fib_node;
1206         const struct mlxsw_sp_ipip_ops *ipip_ops;
1207         struct mlxsw_sp_fib_entry *fib_entry;
1208         unsigned char saddr_prefix_len;
1209         union mlxsw_sp_l3addr saddr;
1210         struct mlxsw_sp_fib *ul_fib;
1211         struct mlxsw_sp_vr *ul_vr;
1212         const void *saddrp;
1213         size_t saddr_len;
1214         u32 ul_tb_id;
1215         u32 saddr4;
1216
1217         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1218
1219         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1220         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1221         if (!ul_vr)
1222                 return NULL;
1223
1224         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1225         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1226                                            ipip_entry->ol_dev);
1227
1228         switch (ipip_ops->ul_proto) {
1229         case MLXSW_SP_L3_PROTO_IPV4:
1230                 saddr4 = be32_to_cpu(saddr.addr4);
1231                 saddrp = &saddr4;
1232                 saddr_len = 4;
1233                 saddr_prefix_len = 32;
1234                 break;
1235         case MLXSW_SP_L3_PROTO_IPV6:
1236                 WARN_ON(1);
1237                 return NULL;
1238         }
1239
1240         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1241                                             saddr_prefix_len);
1242         if (!fib_node || list_empty(&fib_node->entry_list))
1243                 return NULL;
1244
1245         fib_entry = list_first_entry(&fib_node->entry_list,
1246                                      struct mlxsw_sp_fib_entry, list);
1247         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1248                 return NULL;
1249
1250         return fib_entry;
1251 }
1252
1253 static struct mlxsw_sp_ipip_entry *
1254 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1255                            enum mlxsw_sp_ipip_type ipipt,
1256                            struct net_device *ol_dev)
1257 {
1258         struct mlxsw_sp_ipip_entry *ipip_entry;
1259
1260         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1261         if (IS_ERR(ipip_entry))
1262                 return ipip_entry;
1263
1264         list_add_tail(&ipip_entry->ipip_list_node,
1265                       &mlxsw_sp->router->ipip_list);
1266
1267         return ipip_entry;
1268 }
1269
1270 static void
1271 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1272                             struct mlxsw_sp_ipip_entry *ipip_entry)
1273 {
1274         list_del(&ipip_entry->ipip_list_node);
1275         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1276 }
1277
1278 static bool
1279 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1280                                   const struct net_device *ul_dev,
1281                                   enum mlxsw_sp_l3proto ul_proto,
1282                                   union mlxsw_sp_l3addr ul_dip,
1283                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1284 {
1285         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1286         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1287
1288         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1289                 return false;
1290
1291         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1292                                                  ul_tb_id, ipip_entry);
1293 }
1294
1295 /* Given decap parameters, find the corresponding IPIP entry. */
1296 static struct mlxsw_sp_ipip_entry *
1297 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1298                                   const struct net_device *ul_dev,
1299                                   enum mlxsw_sp_l3proto ul_proto,
1300                                   union mlxsw_sp_l3addr ul_dip)
1301 {
1302         struct mlxsw_sp_ipip_entry *ipip_entry;
1303
1304         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1305                             ipip_list_node)
1306                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1307                                                       ul_proto, ul_dip,
1308                                                       ipip_entry))
1309                         return ipip_entry;
1310
1311         return NULL;
1312 }
1313
1314 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1315                                       const struct net_device *dev,
1316                                       enum mlxsw_sp_ipip_type *p_type)
1317 {
1318         struct mlxsw_sp_router *router = mlxsw_sp->router;
1319         const struct mlxsw_sp_ipip_ops *ipip_ops;
1320         enum mlxsw_sp_ipip_type ipipt;
1321
1322         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1323                 ipip_ops = router->ipip_ops_arr[ipipt];
1324                 if (dev->type == ipip_ops->dev_type) {
1325                         if (p_type)
1326                                 *p_type = ipipt;
1327                         return true;
1328                 }
1329         }
1330         return false;
1331 }
1332
1333 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1334                                 const struct net_device *dev)
1335 {
1336         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1337 }
1338
1339 static struct mlxsw_sp_ipip_entry *
1340 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1341                                    const struct net_device *ol_dev)
1342 {
1343         struct mlxsw_sp_ipip_entry *ipip_entry;
1344
1345         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1346                             ipip_list_node)
1347                 if (ipip_entry->ol_dev == ol_dev)
1348                         return ipip_entry;
1349
1350         return NULL;
1351 }
1352
1353 static struct mlxsw_sp_ipip_entry *
1354 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1355                                    const struct net_device *ul_dev,
1356                                    struct mlxsw_sp_ipip_entry *start)
1357 {
1358         struct mlxsw_sp_ipip_entry *ipip_entry;
1359
1360         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1361                                         ipip_list_node);
1362         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1363                                      ipip_list_node) {
1364                 struct net_device *ipip_ul_dev =
1365                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1366
1367                 if (ipip_ul_dev == ul_dev)
1368                         return ipip_entry;
1369         }
1370
1371         return NULL;
1372 }
1373
1374 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1375                                 const struct net_device *dev)
1376 {
1377         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1378 }
1379
1380 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1381                                                 const struct net_device *ol_dev,
1382                                                 enum mlxsw_sp_ipip_type ipipt)
1383 {
1384         const struct mlxsw_sp_ipip_ops *ops
1385                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1386
1387         /* For deciding whether decap should be offloaded, we don't care about
1388          * overlay protocol, so ask whether either one is supported.
1389          */
1390         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1391                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1392 }
1393
1394 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1395                                                 struct net_device *ol_dev)
1396 {
1397         struct mlxsw_sp_ipip_entry *ipip_entry;
1398         enum mlxsw_sp_l3proto ul_proto;
1399         enum mlxsw_sp_ipip_type ipipt;
1400         union mlxsw_sp_l3addr saddr;
1401         u32 ul_tb_id;
1402
1403         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1404         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1405                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1406                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1407                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1408                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1409                                                           saddr, ul_tb_id,
1410                                                           NULL)) {
1411                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1412                                                                 ol_dev);
1413                         if (IS_ERR(ipip_entry))
1414                                 return PTR_ERR(ipip_entry);
1415                 }
1416         }
1417
1418         return 0;
1419 }
1420
1421 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1422                                                    struct net_device *ol_dev)
1423 {
1424         struct mlxsw_sp_ipip_entry *ipip_entry;
1425
1426         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1427         if (ipip_entry)
1428                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1429 }
1430
1431 static void
1432 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1433                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1434 {
1435         struct mlxsw_sp_fib_entry *decap_fib_entry;
1436
1437         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1438         if (decap_fib_entry)
1439                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1440                                                   decap_fib_entry);
1441 }
1442
1443 static int
1444 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1445                         u16 ul_rif_id, bool enable)
1446 {
1447         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1448         struct mlxsw_sp_rif *rif = &lb_rif->common;
1449         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1450         char ritr_pl[MLXSW_REG_RITR_LEN];
1451         u32 saddr4;
1452
1453         switch (lb_cf.ul_protocol) {
1454         case MLXSW_SP_L3_PROTO_IPV4:
1455                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1456                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1457                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1458                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1459                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1460                             ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1461                 break;
1462
1463         case MLXSW_SP_L3_PROTO_IPV6:
1464                 return -EAFNOSUPPORT;
1465         }
1466
1467         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1468 }
1469
1470 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1471                                                  struct net_device *ol_dev)
1472 {
1473         struct mlxsw_sp_ipip_entry *ipip_entry;
1474         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1475         int err = 0;
1476
1477         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1478         if (ipip_entry) {
1479                 lb_rif = ipip_entry->ol_lb;
1480                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1481                                               lb_rif->ul_rif_id, true);
1482                 if (err)
1483                         goto out;
1484                 lb_rif->common.mtu = ol_dev->mtu;
1485         }
1486
1487 out:
1488         return err;
1489 }
1490
1491 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1492                                                 struct net_device *ol_dev)
1493 {
1494         struct mlxsw_sp_ipip_entry *ipip_entry;
1495
1496         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1497         if (ipip_entry)
1498                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1499 }
1500
1501 static void
1502 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1503                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1504 {
1505         if (ipip_entry->decap_fib_entry)
1506                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1507 }
1508
1509 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1510                                                   struct net_device *ol_dev)
1511 {
1512         struct mlxsw_sp_ipip_entry *ipip_entry;
1513
1514         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1515         if (ipip_entry)
1516                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1517 }
1518
1519 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1520                                          struct mlxsw_sp_rif *old_rif,
1521                                          struct mlxsw_sp_rif *new_rif);
1522 static int
1523 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1524                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1525                                  bool keep_encap,
1526                                  struct netlink_ext_ack *extack)
1527 {
1528         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1529         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1530
1531         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1532                                                      ipip_entry->ipipt,
1533                                                      ipip_entry->ol_dev,
1534                                                      extack);
1535         if (IS_ERR(new_lb_rif))
1536                 return PTR_ERR(new_lb_rif);
1537         ipip_entry->ol_lb = new_lb_rif;
1538
1539         if (keep_encap)
1540                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1541                                              &new_lb_rif->common);
1542
1543         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1544
1545         return 0;
1546 }
1547
1548 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1549                                         struct mlxsw_sp_rif *rif);
1550
1551 /**
1552  * Update the offload related to an IPIP entry. This always updates decap, and
1553  * in addition to that it also:
1554  * @recreate_loopback: recreates the associated loopback RIF
1555  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1556  *              relevant when recreate_loopback is true.
1557  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1558  *                   is only relevant when recreate_loopback is false.
1559  */
1560 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1561                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1562                                         bool recreate_loopback,
1563                                         bool keep_encap,
1564                                         bool update_nexthops,
1565                                         struct netlink_ext_ack *extack)
1566 {
1567         int err;
1568
1569         /* RIFs can't be edited, so to update loopback, we need to destroy and
1570          * recreate it. That creates a window of opportunity where RALUE and
1571          * RATR registers end up referencing a RIF that's already gone. RATRs
1572          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1573          * of RALUE, demote the decap route back.
1574          */
1575         if (ipip_entry->decap_fib_entry)
1576                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1577
1578         if (recreate_loopback) {
1579                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1580                                                        keep_encap, extack);
1581                 if (err)
1582                         return err;
1583         } else if (update_nexthops) {
1584                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1585                                             &ipip_entry->ol_lb->common);
1586         }
1587
1588         if (ipip_entry->ol_dev->flags & IFF_UP)
1589                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1590
1591         return 0;
1592 }
1593
1594 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1595                                                 struct net_device *ol_dev,
1596                                                 struct netlink_ext_ack *extack)
1597 {
1598         struct mlxsw_sp_ipip_entry *ipip_entry =
1599                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1600         enum mlxsw_sp_l3proto ul_proto;
1601         union mlxsw_sp_l3addr saddr;
1602         u32 ul_tb_id;
1603
1604         if (!ipip_entry)
1605                 return 0;
1606
1607         /* For flat configuration cases, moving overlay to a different VRF might
1608          * cause local address conflict, and the conflicting tunnels need to be
1609          * demoted.
1610          */
1611         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1612         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1613         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1614         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1615                                                  saddr, ul_tb_id,
1616                                                  ipip_entry)) {
1617                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1618                 return 0;
1619         }
1620
1621         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1622                                                    true, false, false, extack);
1623 }
1624
1625 static int
1626 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1627                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1628                                      struct net_device *ul_dev,
1629                                      struct netlink_ext_ack *extack)
1630 {
1631         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1632                                                    true, true, false, extack);
1633 }
1634
1635 static int
1636 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1637                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1638                                     struct net_device *ul_dev)
1639 {
1640         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1641                                                    false, false, true, NULL);
1642 }
1643
1644 static int
1645 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1646                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1647                                       struct net_device *ul_dev)
1648 {
1649         /* A down underlay device causes encapsulated packets to not be
1650          * forwarded, but decap still works. So refresh next hops without
1651          * touching anything else.
1652          */
1653         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1654                                                    false, false, true, NULL);
1655 }
1656
1657 static int
1658 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1659                                         struct net_device *ol_dev,
1660                                         struct netlink_ext_ack *extack)
1661 {
1662         const struct mlxsw_sp_ipip_ops *ipip_ops;
1663         struct mlxsw_sp_ipip_entry *ipip_entry;
1664         int err;
1665
1666         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1667         if (!ipip_entry)
1668                 /* A change might make a tunnel eligible for offloading, but
1669                  * that is currently not implemented. What falls to slow path
1670                  * stays there.
1671                  */
1672                 return 0;
1673
1674         /* A change might make a tunnel not eligible for offloading. */
1675         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1676                                                  ipip_entry->ipipt)) {
1677                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1678                 return 0;
1679         }
1680
1681         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1682         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1683         return err;
1684 }
1685
1686 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1687                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1688 {
1689         struct net_device *ol_dev = ipip_entry->ol_dev;
1690
1691         if (ol_dev->flags & IFF_UP)
1692                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1693         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1694 }
1695
1696 /* The configuration where several tunnels have the same local address in the
1697  * same underlay table needs special treatment in the HW. That is currently not
1698  * implemented in the driver. This function finds and demotes the first tunnel
1699  * with a given source address, except the one passed in in the argument
1700  * `except'.
1701  */
1702 bool
1703 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1704                                      enum mlxsw_sp_l3proto ul_proto,
1705                                      union mlxsw_sp_l3addr saddr,
1706                                      u32 ul_tb_id,
1707                                      const struct mlxsw_sp_ipip_entry *except)
1708 {
1709         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1710
1711         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1712                                  ipip_list_node) {
1713                 if (ipip_entry != except &&
1714                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1715                                                       ul_tb_id, ipip_entry)) {
1716                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1717                         return true;
1718                 }
1719         }
1720
1721         return false;
1722 }
1723
1724 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1725                                                      struct net_device *ul_dev)
1726 {
1727         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1728
1729         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1730                                  ipip_list_node) {
1731                 struct net_device *ipip_ul_dev =
1732                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1733
1734                 if (ipip_ul_dev == ul_dev)
1735                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1736         }
1737 }
1738
1739 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1740                                      struct net_device *ol_dev,
1741                                      unsigned long event,
1742                                      struct netdev_notifier_info *info)
1743 {
1744         struct netdev_notifier_changeupper_info *chup;
1745         struct netlink_ext_ack *extack;
1746
1747         switch (event) {
1748         case NETDEV_REGISTER:
1749                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1750         case NETDEV_UNREGISTER:
1751                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1752                 return 0;
1753         case NETDEV_UP:
1754                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1755                 return 0;
1756         case NETDEV_DOWN:
1757                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1758                 return 0;
1759         case NETDEV_CHANGEUPPER:
1760                 chup = container_of(info, typeof(*chup), info);
1761                 extack = info->extack;
1762                 if (netif_is_l3_master(chup->upper_dev))
1763                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1764                                                                     ol_dev,
1765                                                                     extack);
1766                 return 0;
1767         case NETDEV_CHANGE:
1768                 extack = info->extack;
1769                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1770                                                                ol_dev, extack);
1771         case NETDEV_CHANGEMTU:
1772                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1773         }
1774         return 0;
1775 }
1776
1777 static int
1778 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1779                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1780                                    struct net_device *ul_dev,
1781                                    unsigned long event,
1782                                    struct netdev_notifier_info *info)
1783 {
1784         struct netdev_notifier_changeupper_info *chup;
1785         struct netlink_ext_ack *extack;
1786
1787         switch (event) {
1788         case NETDEV_CHANGEUPPER:
1789                 chup = container_of(info, typeof(*chup), info);
1790                 extack = info->extack;
1791                 if (netif_is_l3_master(chup->upper_dev))
1792                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1793                                                                     ipip_entry,
1794                                                                     ul_dev,
1795                                                                     extack);
1796                 break;
1797
1798         case NETDEV_UP:
1799                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1800                                                            ul_dev);
1801         case NETDEV_DOWN:
1802                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1803                                                              ipip_entry,
1804                                                              ul_dev);
1805         }
1806         return 0;
1807 }
1808
1809 int
1810 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1811                                  struct net_device *ul_dev,
1812                                  unsigned long event,
1813                                  struct netdev_notifier_info *info)
1814 {
1815         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1816         int err;
1817
1818         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1819                                                                 ul_dev,
1820                                                                 ipip_entry))) {
1821                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1822                                                          ul_dev, event, info);
1823                 if (err) {
1824                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1825                                                                  ul_dev);
1826                         return err;
1827                 }
1828         }
1829
1830         return 0;
1831 }
1832
1833 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1834                                       enum mlxsw_sp_l3proto ul_proto,
1835                                       const union mlxsw_sp_l3addr *ul_sip,
1836                                       u32 tunnel_index)
1837 {
1838         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1839         struct mlxsw_sp_fib_entry *fib_entry;
1840         int err;
1841
1842         /* It is valid to create a tunnel with a local IP and only later
1843          * assign this IP address to a local interface
1844          */
1845         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1846                                                          ul_proto, ul_sip,
1847                                                          type);
1848         if (!fib_entry)
1849                 return 0;
1850
1851         fib_entry->decap.tunnel_index = tunnel_index;
1852         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1853
1854         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1855         if (err)
1856                 goto err_fib_entry_update;
1857
1858         return 0;
1859
1860 err_fib_entry_update:
1861         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1862         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1863         return err;
1864 }
1865
1866 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1867                                       enum mlxsw_sp_l3proto ul_proto,
1868                                       const union mlxsw_sp_l3addr *ul_sip)
1869 {
1870         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1871         struct mlxsw_sp_fib_entry *fib_entry;
1872
1873         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1874                                                          ul_proto, ul_sip,
1875                                                          type);
1876         if (!fib_entry)
1877                 return;
1878
1879         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1880         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1881 }
1882
1883 struct mlxsw_sp_neigh_key {
1884         struct neighbour *n;
1885 };
1886
1887 struct mlxsw_sp_neigh_entry {
1888         struct list_head rif_list_node;
1889         struct rhash_head ht_node;
1890         struct mlxsw_sp_neigh_key key;
1891         u16 rif;
1892         bool connected;
1893         unsigned char ha[ETH_ALEN];
1894         struct list_head nexthop_list; /* list of nexthops using
1895                                         * this neigh entry
1896                                         */
1897         struct list_head nexthop_neighs_list_node;
1898         unsigned int counter_index;
1899         bool counter_valid;
1900 };
1901
1902 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1903         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1904         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1905         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1906 };
1907
1908 struct mlxsw_sp_neigh_entry *
1909 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1910                         struct mlxsw_sp_neigh_entry *neigh_entry)
1911 {
1912         if (!neigh_entry) {
1913                 if (list_empty(&rif->neigh_list))
1914                         return NULL;
1915                 else
1916                         return list_first_entry(&rif->neigh_list,
1917                                                 typeof(*neigh_entry),
1918                                                 rif_list_node);
1919         }
1920         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1921                 return NULL;
1922         return list_next_entry(neigh_entry, rif_list_node);
1923 }
1924
1925 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1926 {
1927         return neigh_entry->key.n->tbl->family;
1928 }
1929
1930 unsigned char *
1931 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1932 {
1933         return neigh_entry->ha;
1934 }
1935
1936 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1937 {
1938         struct neighbour *n;
1939
1940         n = neigh_entry->key.n;
1941         return ntohl(*((__be32 *) n->primary_key));
1942 }
1943
1944 struct in6_addr *
1945 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1946 {
1947         struct neighbour *n;
1948
1949         n = neigh_entry->key.n;
1950         return (struct in6_addr *) &n->primary_key;
1951 }
1952
1953 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1954                                struct mlxsw_sp_neigh_entry *neigh_entry,
1955                                u64 *p_counter)
1956 {
1957         if (!neigh_entry->counter_valid)
1958                 return -EINVAL;
1959
1960         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1961                                          p_counter, NULL);
1962 }
1963
1964 static struct mlxsw_sp_neigh_entry *
1965 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1966                            u16 rif)
1967 {
1968         struct mlxsw_sp_neigh_entry *neigh_entry;
1969
1970         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1971         if (!neigh_entry)
1972                 return NULL;
1973
1974         neigh_entry->key.n = n;
1975         neigh_entry->rif = rif;
1976         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1977
1978         return neigh_entry;
1979 }
1980
1981 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1982 {
1983         kfree(neigh_entry);
1984 }
1985
1986 static int
1987 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1988                             struct mlxsw_sp_neigh_entry *neigh_entry)
1989 {
1990         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1991                                       &neigh_entry->ht_node,
1992                                       mlxsw_sp_neigh_ht_params);
1993 }
1994
1995 static void
1996 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1997                             struct mlxsw_sp_neigh_entry *neigh_entry)
1998 {
1999         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2000                                &neigh_entry->ht_node,
2001                                mlxsw_sp_neigh_ht_params);
2002 }
2003
2004 static bool
2005 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2006                                     struct mlxsw_sp_neigh_entry *neigh_entry)
2007 {
2008         struct devlink *devlink;
2009         const char *table_name;
2010
2011         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2012         case AF_INET:
2013                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2014                 break;
2015         case AF_INET6:
2016                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2017                 break;
2018         default:
2019                 WARN_ON(1);
2020                 return false;
2021         }
2022
2023         devlink = priv_to_devlink(mlxsw_sp->core);
2024         return devlink_dpipe_table_counter_enabled(devlink, table_name);
2025 }
2026
2027 static void
2028 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2029                              struct mlxsw_sp_neigh_entry *neigh_entry)
2030 {
2031         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2032                 return;
2033
2034         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2035                 return;
2036
2037         neigh_entry->counter_valid = true;
2038 }
2039
2040 static void
2041 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2042                             struct mlxsw_sp_neigh_entry *neigh_entry)
2043 {
2044         if (!neigh_entry->counter_valid)
2045                 return;
2046         mlxsw_sp_flow_counter_free(mlxsw_sp,
2047                                    neigh_entry->counter_index);
2048         neigh_entry->counter_valid = false;
2049 }
2050
2051 static struct mlxsw_sp_neigh_entry *
2052 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2053 {
2054         struct mlxsw_sp_neigh_entry *neigh_entry;
2055         struct mlxsw_sp_rif *rif;
2056         int err;
2057
2058         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2059         if (!rif)
2060                 return ERR_PTR(-EINVAL);
2061
2062         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2063         if (!neigh_entry)
2064                 return ERR_PTR(-ENOMEM);
2065
2066         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2067         if (err)
2068                 goto err_neigh_entry_insert;
2069
2070         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2071         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2072
2073         return neigh_entry;
2074
2075 err_neigh_entry_insert:
2076         mlxsw_sp_neigh_entry_free(neigh_entry);
2077         return ERR_PTR(err);
2078 }
2079
2080 static void
2081 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2082                              struct mlxsw_sp_neigh_entry *neigh_entry)
2083 {
2084         list_del(&neigh_entry->rif_list_node);
2085         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2086         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2087         mlxsw_sp_neigh_entry_free(neigh_entry);
2088 }
2089
2090 static struct mlxsw_sp_neigh_entry *
2091 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2092 {
2093         struct mlxsw_sp_neigh_key key;
2094
2095         key.n = n;
2096         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2097                                       &key, mlxsw_sp_neigh_ht_params);
2098 }
2099
2100 static void
2101 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2102 {
2103         unsigned long interval;
2104
2105 #if IS_ENABLED(CONFIG_IPV6)
2106         interval = min_t(unsigned long,
2107                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2108                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2109 #else
2110         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2111 #endif
2112         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2113 }
2114
2115 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2116                                                    char *rauhtd_pl,
2117                                                    int ent_index)
2118 {
2119         struct net_device *dev;
2120         struct neighbour *n;
2121         __be32 dipn;
2122         u32 dip;
2123         u16 rif;
2124
2125         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2126
2127         if (!mlxsw_sp->router->rifs[rif]) {
2128                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2129                 return;
2130         }
2131
2132         dipn = htonl(dip);
2133         dev = mlxsw_sp->router->rifs[rif]->dev;
2134         n = neigh_lookup(&arp_tbl, &dipn, dev);
2135         if (!n)
2136                 return;
2137
2138         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2139         neigh_event_send(n, NULL);
2140         neigh_release(n);
2141 }
2142
2143 #if IS_ENABLED(CONFIG_IPV6)
2144 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2145                                                    char *rauhtd_pl,
2146                                                    int rec_index)
2147 {
2148         struct net_device *dev;
2149         struct neighbour *n;
2150         struct in6_addr dip;
2151         u16 rif;
2152
2153         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2154                                          (char *) &dip);
2155
2156         if (!mlxsw_sp->router->rifs[rif]) {
2157                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2158                 return;
2159         }
2160
2161         dev = mlxsw_sp->router->rifs[rif]->dev;
2162         n = neigh_lookup(&nd_tbl, &dip, dev);
2163         if (!n)
2164                 return;
2165
2166         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2167         neigh_event_send(n, NULL);
2168         neigh_release(n);
2169 }
2170 #else
2171 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2172                                                    char *rauhtd_pl,
2173                                                    int rec_index)
2174 {
2175 }
2176 #endif
2177
2178 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2179                                                    char *rauhtd_pl,
2180                                                    int rec_index)
2181 {
2182         u8 num_entries;
2183         int i;
2184
2185         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2186                                                                 rec_index);
2187         /* Hardware starts counting at 0, so add 1. */
2188         num_entries++;
2189
2190         /* Each record consists of several neighbour entries. */
2191         for (i = 0; i < num_entries; i++) {
2192                 int ent_index;
2193
2194                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2195                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2196                                                        ent_index);
2197         }
2198
2199 }
2200
2201 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2202                                                    char *rauhtd_pl,
2203                                                    int rec_index)
2204 {
2205         /* One record contains one entry. */
2206         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2207                                                rec_index);
2208 }
2209
2210 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2211                                               char *rauhtd_pl, int rec_index)
2212 {
2213         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2214         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2215                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2216                                                        rec_index);
2217                 break;
2218         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2219                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2220                                                        rec_index);
2221                 break;
2222         }
2223 }
2224
2225 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2226 {
2227         u8 num_rec, last_rec_index, num_entries;
2228
2229         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2230         last_rec_index = num_rec - 1;
2231
2232         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2233                 return false;
2234         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2235             MLXSW_REG_RAUHTD_TYPE_IPV6)
2236                 return true;
2237
2238         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2239                                                                 last_rec_index);
2240         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2241                 return true;
2242         return false;
2243 }
2244
2245 static int
2246 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2247                                        char *rauhtd_pl,
2248                                        enum mlxsw_reg_rauhtd_type type)
2249 {
2250         int i, num_rec;
2251         int err;
2252
2253         /* Make sure the neighbour's netdev isn't removed in the
2254          * process.
2255          */
2256         rtnl_lock();
2257         do {
2258                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2259                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2260                                       rauhtd_pl);
2261                 if (err) {
2262                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2263                         break;
2264                 }
2265                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2266                 for (i = 0; i < num_rec; i++)
2267                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2268                                                           i);
2269         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2270         rtnl_unlock();
2271
2272         return err;
2273 }
2274
2275 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2276 {
2277         enum mlxsw_reg_rauhtd_type type;
2278         char *rauhtd_pl;
2279         int err;
2280
2281         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2282         if (!rauhtd_pl)
2283                 return -ENOMEM;
2284
2285         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2286         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2287         if (err)
2288                 goto out;
2289
2290         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2291         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2292 out:
2293         kfree(rauhtd_pl);
2294         return err;
2295 }
2296
2297 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2298 {
2299         struct mlxsw_sp_neigh_entry *neigh_entry;
2300
2301         /* Take RTNL mutex here to prevent lists from changes */
2302         rtnl_lock();
2303         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2304                             nexthop_neighs_list_node)
2305                 /* If this neigh have nexthops, make the kernel think this neigh
2306                  * is active regardless of the traffic.
2307                  */
2308                 neigh_event_send(neigh_entry->key.n, NULL);
2309         rtnl_unlock();
2310 }
2311
2312 static void
2313 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2314 {
2315         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2316
2317         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2318                                msecs_to_jiffies(interval));
2319 }
2320
2321 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2322 {
2323         struct mlxsw_sp_router *router;
2324         int err;
2325
2326         router = container_of(work, struct mlxsw_sp_router,
2327                               neighs_update.dw.work);
2328         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2329         if (err)
2330                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2331
2332         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2333
2334         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2335 }
2336
2337 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2338 {
2339         struct mlxsw_sp_neigh_entry *neigh_entry;
2340         struct mlxsw_sp_router *router;
2341
2342         router = container_of(work, struct mlxsw_sp_router,
2343                               nexthop_probe_dw.work);
2344         /* Iterate over nexthop neighbours, find those who are unresolved and
2345          * send arp on them. This solves the chicken-egg problem when
2346          * the nexthop wouldn't get offloaded until the neighbor is resolved
2347          * but it wouldn't get resolved ever in case traffic is flowing in HW
2348          * using different nexthop.
2349          *
2350          * Take RTNL mutex here to prevent lists from changes.
2351          */
2352         rtnl_lock();
2353         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2354                             nexthop_neighs_list_node)
2355                 if (!neigh_entry->connected)
2356                         neigh_event_send(neigh_entry->key.n, NULL);
2357         rtnl_unlock();
2358
2359         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2360                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2361 }
2362
2363 static void
2364 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2365                               struct mlxsw_sp_neigh_entry *neigh_entry,
2366                               bool removing);
2367
2368 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2369 {
2370         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2371                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2372 }
2373
2374 static int
2375 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2376                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2377                                 enum mlxsw_reg_rauht_op op)
2378 {
2379         struct neighbour *n = neigh_entry->key.n;
2380         u32 dip = ntohl(*((__be32 *) n->primary_key));
2381         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2382
2383         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2384                               dip);
2385         if (neigh_entry->counter_valid)
2386                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2387                                              neigh_entry->counter_index);
2388         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2389 }
2390
2391 static int
2392 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2393                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2394                                 enum mlxsw_reg_rauht_op op)
2395 {
2396         struct neighbour *n = neigh_entry->key.n;
2397         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2398         const char *dip = n->primary_key;
2399
2400         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2401                               dip);
2402         if (neigh_entry->counter_valid)
2403                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2404                                              neigh_entry->counter_index);
2405         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2406 }
2407
2408 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2409 {
2410         struct neighbour *n = neigh_entry->key.n;
2411
2412         /* Packets with a link-local destination address are trapped
2413          * after LPM lookup and never reach the neighbour table, so
2414          * there is no need to program such neighbours to the device.
2415          */
2416         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2417             IPV6_ADDR_LINKLOCAL)
2418                 return true;
2419         return false;
2420 }
2421
2422 static void
2423 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2424                             struct mlxsw_sp_neigh_entry *neigh_entry,
2425                             bool adding)
2426 {
2427         enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2428         int err;
2429
2430         if (!adding && !neigh_entry->connected)
2431                 return;
2432         neigh_entry->connected = adding;
2433         if (neigh_entry->key.n->tbl->family == AF_INET) {
2434                 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2435                                                       op);
2436                 if (err)
2437                         return;
2438         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2439                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2440                         return;
2441                 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2442                                                       op);
2443                 if (err)
2444                         return;
2445         } else {
2446                 WARN_ON_ONCE(1);
2447                 return;
2448         }
2449
2450         if (adding)
2451                 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2452         else
2453                 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2454 }
2455
2456 void
2457 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2458                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2459                                     bool adding)
2460 {
2461         if (adding)
2462                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2463         else
2464                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2465         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2466 }
2467
2468 struct mlxsw_sp_netevent_work {
2469         struct work_struct work;
2470         struct mlxsw_sp *mlxsw_sp;
2471         struct neighbour *n;
2472 };
2473
2474 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2475 {
2476         struct mlxsw_sp_netevent_work *net_work =
2477                 container_of(work, struct mlxsw_sp_netevent_work, work);
2478         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2479         struct mlxsw_sp_neigh_entry *neigh_entry;
2480         struct neighbour *n = net_work->n;
2481         unsigned char ha[ETH_ALEN];
2482         bool entry_connected;
2483         u8 nud_state, dead;
2484
2485         /* If these parameters are changed after we release the lock,
2486          * then we are guaranteed to receive another event letting us
2487          * know about it.
2488          */
2489         read_lock_bh(&n->lock);
2490         memcpy(ha, n->ha, ETH_ALEN);
2491         nud_state = n->nud_state;
2492         dead = n->dead;
2493         read_unlock_bh(&n->lock);
2494
2495         rtnl_lock();
2496         mlxsw_sp_span_respin(mlxsw_sp);
2497
2498         entry_connected = nud_state & NUD_VALID && !dead;
2499         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2500         if (!entry_connected && !neigh_entry)
2501                 goto out;
2502         if (!neigh_entry) {
2503                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2504                 if (IS_ERR(neigh_entry))
2505                         goto out;
2506         }
2507
2508         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2509         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2510         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2511
2512         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2513                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2514
2515 out:
2516         rtnl_unlock();
2517         neigh_release(n);
2518         kfree(net_work);
2519 }
2520
2521 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2522
2523 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2524 {
2525         struct mlxsw_sp_netevent_work *net_work =
2526                 container_of(work, struct mlxsw_sp_netevent_work, work);
2527         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2528
2529         mlxsw_sp_mp_hash_init(mlxsw_sp);
2530         kfree(net_work);
2531 }
2532
2533 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2534
2535 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2536 {
2537         struct mlxsw_sp_netevent_work *net_work =
2538                 container_of(work, struct mlxsw_sp_netevent_work, work);
2539         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2540
2541         __mlxsw_sp_router_init(mlxsw_sp);
2542         kfree(net_work);
2543 }
2544
2545 static int mlxsw_sp_router_schedule_work(struct net *net,
2546                                          struct notifier_block *nb,
2547                                          void (*cb)(struct work_struct *))
2548 {
2549         struct mlxsw_sp_netevent_work *net_work;
2550         struct mlxsw_sp_router *router;
2551
2552         if (!net_eq(net, &init_net))
2553                 return NOTIFY_DONE;
2554
2555         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2556         if (!net_work)
2557                 return NOTIFY_BAD;
2558
2559         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2560         INIT_WORK(&net_work->work, cb);
2561         net_work->mlxsw_sp = router->mlxsw_sp;
2562         mlxsw_core_schedule_work(&net_work->work);
2563         return NOTIFY_DONE;
2564 }
2565
2566 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2567                                           unsigned long event, void *ptr)
2568 {
2569         struct mlxsw_sp_netevent_work *net_work;
2570         struct mlxsw_sp_port *mlxsw_sp_port;
2571         struct mlxsw_sp *mlxsw_sp;
2572         unsigned long interval;
2573         struct neigh_parms *p;
2574         struct neighbour *n;
2575
2576         switch (event) {
2577         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2578                 p = ptr;
2579
2580                 /* We don't care about changes in the default table. */
2581                 if (!p->dev || (p->tbl->family != AF_INET &&
2582                                 p->tbl->family != AF_INET6))
2583                         return NOTIFY_DONE;
2584
2585                 /* We are in atomic context and can't take RTNL mutex,
2586                  * so use RCU variant to walk the device chain.
2587                  */
2588                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2589                 if (!mlxsw_sp_port)
2590                         return NOTIFY_DONE;
2591
2592                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2593                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2594                 mlxsw_sp->router->neighs_update.interval = interval;
2595
2596                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2597                 break;
2598         case NETEVENT_NEIGH_UPDATE:
2599                 n = ptr;
2600
2601                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2602                         return NOTIFY_DONE;
2603
2604                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2605                 if (!mlxsw_sp_port)
2606                         return NOTIFY_DONE;
2607
2608                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2609                 if (!net_work) {
2610                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2611                         return NOTIFY_BAD;
2612                 }
2613
2614                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2615                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2616                 net_work->n = n;
2617
2618                 /* Take a reference to ensure the neighbour won't be
2619                  * destructed until we drop the reference in delayed
2620                  * work.
2621                  */
2622                 neigh_clone(n);
2623                 mlxsw_core_schedule_work(&net_work->work);
2624                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2625                 break;
2626         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2627         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2628                 return mlxsw_sp_router_schedule_work(ptr, nb,
2629                                 mlxsw_sp_router_mp_hash_event_work);
2630
2631         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2632                 return mlxsw_sp_router_schedule_work(ptr, nb,
2633                                 mlxsw_sp_router_update_priority_work);
2634         }
2635
2636         return NOTIFY_DONE;
2637 }
2638
2639 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2640 {
2641         int err;
2642
2643         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2644                               &mlxsw_sp_neigh_ht_params);
2645         if (err)
2646                 return err;
2647
2648         /* Initialize the polling interval according to the default
2649          * table.
2650          */
2651         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2652
2653         /* Create the delayed works for the activity_update */
2654         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2655                           mlxsw_sp_router_neighs_update_work);
2656         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2657                           mlxsw_sp_router_probe_unresolved_nexthops);
2658         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2659         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2660         return 0;
2661 }
2662
2663 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2664 {
2665         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2666         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2667         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2668 }
2669
2670 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2671                                          struct mlxsw_sp_rif *rif)
2672 {
2673         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2674
2675         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2676                                  rif_list_node) {
2677                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2678                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2679         }
2680 }
2681
2682 enum mlxsw_sp_nexthop_type {
2683         MLXSW_SP_NEXTHOP_TYPE_ETH,
2684         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2685 };
2686
2687 struct mlxsw_sp_nexthop_key {
2688         struct fib_nh *fib_nh;
2689 };
2690
2691 struct mlxsw_sp_nexthop {
2692         struct list_head neigh_list_node; /* member of neigh entry list */
2693         struct list_head rif_list_node;
2694         struct list_head router_list_node;
2695         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2696                                                 * this belongs to
2697                                                 */
2698         struct rhash_head ht_node;
2699         struct mlxsw_sp_nexthop_key key;
2700         unsigned char gw_addr[sizeof(struct in6_addr)];
2701         int ifindex;
2702         int nh_weight;
2703         int norm_nh_weight;
2704         int num_adj_entries;
2705         struct mlxsw_sp_rif *rif;
2706         u8 should_offload:1, /* set indicates this neigh is connected and
2707                               * should be put to KVD linear area of this group.
2708                               */
2709            offloaded:1, /* set in case the neigh is actually put into
2710                          * KVD linear area of this group.
2711                          */
2712            update:1; /* set indicates that MAC of this neigh should be
2713                       * updated in HW
2714                       */
2715         enum mlxsw_sp_nexthop_type type;
2716         union {
2717                 struct mlxsw_sp_neigh_entry *neigh_entry;
2718                 struct mlxsw_sp_ipip_entry *ipip_entry;
2719         };
2720         unsigned int counter_index;
2721         bool counter_valid;
2722 };
2723
2724 struct mlxsw_sp_nexthop_group {
2725         void *priv;
2726         struct rhash_head ht_node;
2727         struct list_head fib_list; /* list of fib entries that use this group */
2728         struct neigh_table *neigh_tbl;
2729         u8 adj_index_valid:1,
2730            gateway:1; /* routes using the group use a gateway */
2731         u32 adj_index;
2732         u16 ecmp_size;
2733         u16 count;
2734         int sum_norm_weight;
2735         struct mlxsw_sp_nexthop nexthops[0];
2736 #define nh_rif  nexthops[0].rif
2737 };
2738
2739 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2740                                     struct mlxsw_sp_nexthop *nh)
2741 {
2742         struct devlink *devlink;
2743
2744         devlink = priv_to_devlink(mlxsw_sp->core);
2745         if (!devlink_dpipe_table_counter_enabled(devlink,
2746                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2747                 return;
2748
2749         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2750                 return;
2751
2752         nh->counter_valid = true;
2753 }
2754
2755 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2756                                    struct mlxsw_sp_nexthop *nh)
2757 {
2758         if (!nh->counter_valid)
2759                 return;
2760         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2761         nh->counter_valid = false;
2762 }
2763
2764 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2765                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2766 {
2767         if (!nh->counter_valid)
2768                 return -EINVAL;
2769
2770         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2771                                          p_counter, NULL);
2772 }
2773
2774 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2775                                                struct mlxsw_sp_nexthop *nh)
2776 {
2777         if (!nh) {
2778                 if (list_empty(&router->nexthop_list))
2779                         return NULL;
2780                 else
2781                         return list_first_entry(&router->nexthop_list,
2782                                                 typeof(*nh), router_list_node);
2783         }
2784         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2785                 return NULL;
2786         return list_next_entry(nh, router_list_node);
2787 }
2788
2789 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2790 {
2791         return nh->offloaded;
2792 }
2793
2794 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2795 {
2796         if (!nh->offloaded)
2797                 return NULL;
2798         return nh->neigh_entry->ha;
2799 }
2800
2801 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2802                              u32 *p_adj_size, u32 *p_adj_hash_index)
2803 {
2804         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2805         u32 adj_hash_index = 0;
2806         int i;
2807
2808         if (!nh->offloaded || !nh_grp->adj_index_valid)
2809                 return -EINVAL;
2810
2811         *p_adj_index = nh_grp->adj_index;
2812         *p_adj_size = nh_grp->ecmp_size;
2813
2814         for (i = 0; i < nh_grp->count; i++) {
2815                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2816
2817                 if (nh_iter == nh)
2818                         break;
2819                 if (nh_iter->offloaded)
2820                         adj_hash_index += nh_iter->num_adj_entries;
2821         }
2822
2823         *p_adj_hash_index = adj_hash_index;
2824         return 0;
2825 }
2826
2827 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2828 {
2829         return nh->rif;
2830 }
2831
2832 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2833 {
2834         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2835         int i;
2836
2837         for (i = 0; i < nh_grp->count; i++) {
2838                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2839
2840                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2841                         return true;
2842         }
2843         return false;
2844 }
2845
2846 static struct fib_info *
2847 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2848 {
2849         return nh_grp->priv;
2850 }
2851
2852 struct mlxsw_sp_nexthop_group_cmp_arg {
2853         enum mlxsw_sp_l3proto proto;
2854         union {
2855                 struct fib_info *fi;
2856                 struct mlxsw_sp_fib6_entry *fib6_entry;
2857         };
2858 };
2859
2860 static bool
2861 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2862                                     const struct in6_addr *gw, int ifindex,
2863                                     int weight)
2864 {
2865         int i;
2866
2867         for (i = 0; i < nh_grp->count; i++) {
2868                 const struct mlxsw_sp_nexthop *nh;
2869
2870                 nh = &nh_grp->nexthops[i];
2871                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2872                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2873                         return true;
2874         }
2875
2876         return false;
2877 }
2878
2879 static bool
2880 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2881                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2882 {
2883         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2884
2885         if (nh_grp->count != fib6_entry->nrt6)
2886                 return false;
2887
2888         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2889                 struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
2890                 struct in6_addr *gw;
2891                 int ifindex, weight;
2892
2893                 ifindex = fib6_nh->fib_nh_dev->ifindex;
2894                 weight = fib6_nh->fib_nh_weight;
2895                 gw = &fib6_nh->fib_nh_gw6;
2896                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2897                                                          weight))
2898                         return false;
2899         }
2900
2901         return true;
2902 }
2903
2904 static int
2905 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2906 {
2907         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2908         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2909
2910         switch (cmp_arg->proto) {
2911         case MLXSW_SP_L3_PROTO_IPV4:
2912                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2913         case MLXSW_SP_L3_PROTO_IPV6:
2914                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2915                                                     cmp_arg->fib6_entry);
2916         default:
2917                 WARN_ON(1);
2918                 return 1;
2919         }
2920 }
2921
2922 static int
2923 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2924 {
2925         return nh_grp->neigh_tbl->family;
2926 }
2927
2928 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2929 {
2930         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2931         const struct mlxsw_sp_nexthop *nh;
2932         struct fib_info *fi;
2933         unsigned int val;
2934         int i;
2935
2936         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2937         case AF_INET:
2938                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2939                 return jhash(&fi, sizeof(fi), seed);
2940         case AF_INET6:
2941                 val = nh_grp->count;
2942                 for (i = 0; i < nh_grp->count; i++) {
2943                         nh = &nh_grp->nexthops[i];
2944                         val ^= nh->ifindex;
2945                 }
2946                 return jhash(&val, sizeof(val), seed);
2947         default:
2948                 WARN_ON(1);
2949                 return 0;
2950         }
2951 }
2952
2953 static u32
2954 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2955 {
2956         unsigned int val = fib6_entry->nrt6;
2957         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2958         struct net_device *dev;
2959
2960         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2961                 dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
2962                 val ^= dev->ifindex;
2963         }
2964
2965         return jhash(&val, sizeof(val), seed);
2966 }
2967
2968 static u32
2969 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2970 {
2971         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2972
2973         switch (cmp_arg->proto) {
2974         case MLXSW_SP_L3_PROTO_IPV4:
2975                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2976         case MLXSW_SP_L3_PROTO_IPV6:
2977                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2978         default:
2979                 WARN_ON(1);
2980                 return 0;
2981         }
2982 }
2983
2984 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2985         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2986         .hashfn      = mlxsw_sp_nexthop_group_hash,
2987         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2988         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2989 };
2990
2991 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2992                                          struct mlxsw_sp_nexthop_group *nh_grp)
2993 {
2994         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2995             !nh_grp->gateway)
2996                 return 0;
2997
2998         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2999                                       &nh_grp->ht_node,
3000                                       mlxsw_sp_nexthop_group_ht_params);
3001 }
3002
3003 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3004                                           struct mlxsw_sp_nexthop_group *nh_grp)
3005 {
3006         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3007             !nh_grp->gateway)
3008                 return;
3009
3010         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3011                                &nh_grp->ht_node,
3012                                mlxsw_sp_nexthop_group_ht_params);
3013 }
3014
3015 static struct mlxsw_sp_nexthop_group *
3016 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3017                                struct fib_info *fi)
3018 {
3019         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3020
3021         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3022         cmp_arg.fi = fi;
3023         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3024                                       &cmp_arg,
3025                                       mlxsw_sp_nexthop_group_ht_params);
3026 }
3027
3028 static struct mlxsw_sp_nexthop_group *
3029 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3030                                struct mlxsw_sp_fib6_entry *fib6_entry)
3031 {
3032         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3033
3034         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3035         cmp_arg.fib6_entry = fib6_entry;
3036         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3037                                       &cmp_arg,
3038                                       mlxsw_sp_nexthop_group_ht_params);
3039 }
3040
3041 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3042         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3043         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3044         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3045 };
3046
3047 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3048                                    struct mlxsw_sp_nexthop *nh)
3049 {
3050         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3051                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3052 }
3053
3054 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3055                                     struct mlxsw_sp_nexthop *nh)
3056 {
3057         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3058                                mlxsw_sp_nexthop_ht_params);
3059 }
3060
3061 static struct mlxsw_sp_nexthop *
3062 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3063                         struct mlxsw_sp_nexthop_key key)
3064 {
3065         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3066                                       mlxsw_sp_nexthop_ht_params);
3067 }
3068
3069 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3070                                              const struct mlxsw_sp_fib *fib,
3071                                              u32 adj_index, u16 ecmp_size,
3072                                              u32 new_adj_index,
3073                                              u16 new_ecmp_size)
3074 {
3075         char raleu_pl[MLXSW_REG_RALEU_LEN];
3076
3077         mlxsw_reg_raleu_pack(raleu_pl,
3078                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
3079                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
3080                              new_ecmp_size);
3081         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3082 }
3083
3084 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3085                                           struct mlxsw_sp_nexthop_group *nh_grp,
3086                                           u32 old_adj_index, u16 old_ecmp_size)
3087 {
3088         struct mlxsw_sp_fib_entry *fib_entry;
3089         struct mlxsw_sp_fib *fib = NULL;
3090         int err;
3091
3092         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3093                 if (fib == fib_entry->fib_node->fib)
3094                         continue;
3095                 fib = fib_entry->fib_node->fib;
3096                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3097                                                         old_adj_index,
3098                                                         old_ecmp_size,
3099                                                         nh_grp->adj_index,
3100                                                         nh_grp->ecmp_size);
3101                 if (err)
3102                         return err;
3103         }
3104         return 0;
3105 }
3106
3107 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3108                                      struct mlxsw_sp_nexthop *nh)
3109 {
3110         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3111         char ratr_pl[MLXSW_REG_RATR_LEN];
3112
3113         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3114                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
3115                             adj_index, neigh_entry->rif);
3116         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3117         if (nh->counter_valid)
3118                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3119         else
3120                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3121
3122         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3123 }
3124
3125 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3126                             struct mlxsw_sp_nexthop *nh)
3127 {
3128         int i;
3129
3130         for (i = 0; i < nh->num_adj_entries; i++) {
3131                 int err;
3132
3133                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3134                 if (err)
3135                         return err;
3136         }
3137
3138         return 0;
3139 }
3140
3141 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3142                                           u32 adj_index,
3143                                           struct mlxsw_sp_nexthop *nh)
3144 {
3145         const struct mlxsw_sp_ipip_ops *ipip_ops;
3146
3147         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3148         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3149 }
3150
3151 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3152                                         u32 adj_index,
3153                                         struct mlxsw_sp_nexthop *nh)
3154 {
3155         int i;
3156
3157         for (i = 0; i < nh->num_adj_entries; i++) {
3158                 int err;
3159
3160                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3161                                                      nh);
3162                 if (err)
3163                         return err;
3164         }
3165
3166         return 0;
3167 }
3168
3169 static int
3170 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3171                               struct mlxsw_sp_nexthop_group *nh_grp,
3172                               bool reallocate)
3173 {
3174         u32 adj_index = nh_grp->adj_index; /* base */
3175         struct mlxsw_sp_nexthop *nh;
3176         int i;
3177         int err;
3178
3179         for (i = 0; i < nh_grp->count; i++) {
3180                 nh = &nh_grp->nexthops[i];
3181
3182                 if (!nh->should_offload) {
3183                         nh->offloaded = 0;
3184                         continue;
3185                 }
3186
3187                 if (nh->update || reallocate) {
3188                         switch (nh->type) {
3189                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3190                                 err = mlxsw_sp_nexthop_update
3191                                             (mlxsw_sp, adj_index, nh);
3192                                 break;
3193                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3194                                 err = mlxsw_sp_nexthop_ipip_update
3195                                             (mlxsw_sp, adj_index, nh);
3196                                 break;
3197                         }
3198                         if (err)
3199                                 return err;
3200                         nh->update = 0;
3201                         nh->offloaded = 1;
3202                 }
3203                 adj_index += nh->num_adj_entries;
3204         }
3205         return 0;
3206 }
3207
3208 static bool
3209 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3210                                  const struct mlxsw_sp_fib_entry *fib_entry);
3211
3212 static int
3213 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3214                                     struct mlxsw_sp_nexthop_group *nh_grp)
3215 {
3216         struct mlxsw_sp_fib_entry *fib_entry;
3217         int err;
3218
3219         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3220                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3221                                                       fib_entry))
3222                         continue;
3223                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3224                 if (err)
3225                         return err;
3226         }
3227         return 0;
3228 }
3229
3230 static void
3231 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3232                                    enum mlxsw_reg_ralue_op op, int err);
3233
3234 static void
3235 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3236 {
3237         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3238         struct mlxsw_sp_fib_entry *fib_entry;
3239
3240         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3241                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3242                                                       fib_entry))
3243                         continue;
3244                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3245         }
3246 }
3247
3248 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3249 {
3250         /* Valid sizes for an adjacency group are:
3251          * 1-64, 512, 1024, 2048 and 4096.
3252          */
3253         if (*p_adj_grp_size <= 64)
3254                 return;
3255         else if (*p_adj_grp_size <= 512)
3256                 *p_adj_grp_size = 512;
3257         else if (*p_adj_grp_size <= 1024)
3258                 *p_adj_grp_size = 1024;
3259         else if (*p_adj_grp_size <= 2048)
3260                 *p_adj_grp_size = 2048;
3261         else
3262                 *p_adj_grp_size = 4096;
3263 }
3264
3265 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3266                                              unsigned int alloc_size)
3267 {
3268         if (alloc_size >= 4096)
3269                 *p_adj_grp_size = 4096;
3270         else if (alloc_size >= 2048)
3271                 *p_adj_grp_size = 2048;
3272         else if (alloc_size >= 1024)
3273                 *p_adj_grp_size = 1024;
3274         else if (alloc_size >= 512)
3275                 *p_adj_grp_size = 512;
3276 }
3277
3278 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3279                                      u16 *p_adj_grp_size)
3280 {
3281         unsigned int alloc_size;
3282         int err;
3283
3284         /* Round up the requested group size to the next size supported
3285          * by the device and make sure the request can be satisfied.
3286          */
3287         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3288         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3289                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3290                                               *p_adj_grp_size, &alloc_size);
3291         if (err)
3292                 return err;
3293         /* It is possible the allocation results in more allocated
3294          * entries than requested. Try to use as much of them as
3295          * possible.
3296          */
3297         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3298
3299         return 0;
3300 }
3301
3302 static void
3303 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3304 {
3305         int i, g = 0, sum_norm_weight = 0;
3306         struct mlxsw_sp_nexthop *nh;
3307
3308         for (i = 0; i < nh_grp->count; i++) {
3309                 nh = &nh_grp->nexthops[i];
3310
3311                 if (!nh->should_offload)
3312                         continue;
3313                 if (g > 0)
3314                         g = gcd(nh->nh_weight, g);
3315                 else
3316                         g = nh->nh_weight;
3317         }
3318
3319         for (i = 0; i < nh_grp->count; i++) {
3320                 nh = &nh_grp->nexthops[i];
3321
3322                 if (!nh->should_offload)
3323                         continue;
3324                 nh->norm_nh_weight = nh->nh_weight / g;
3325                 sum_norm_weight += nh->norm_nh_weight;
3326         }
3327
3328         nh_grp->sum_norm_weight = sum_norm_weight;
3329 }
3330
3331 static void
3332 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3333 {
3334         int total = nh_grp->sum_norm_weight;
3335         u16 ecmp_size = nh_grp->ecmp_size;
3336         int i, weight = 0, lower_bound = 0;
3337
3338         for (i = 0; i < nh_grp->count; i++) {
3339                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3340                 int upper_bound;
3341
3342                 if (!nh->should_offload)
3343                         continue;
3344                 weight += nh->norm_nh_weight;
3345                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3346                 nh->num_adj_entries = upper_bound - lower_bound;
3347                 lower_bound = upper_bound;
3348         }
3349 }
3350
3351 static void
3352 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3353                                struct mlxsw_sp_nexthop_group *nh_grp)
3354 {
3355         u16 ecmp_size, old_ecmp_size;
3356         struct mlxsw_sp_nexthop *nh;
3357         bool offload_change = false;
3358         u32 adj_index;
3359         bool old_adj_index_valid;
3360         u32 old_adj_index;
3361         int i;
3362         int err;
3363
3364         if (!nh_grp->gateway) {
3365                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3366                 return;
3367         }
3368
3369         for (i = 0; i < nh_grp->count; i++) {
3370                 nh = &nh_grp->nexthops[i];
3371
3372                 if (nh->should_offload != nh->offloaded) {
3373                         offload_change = true;
3374                         if (nh->should_offload)
3375                                 nh->update = 1;
3376                 }
3377         }
3378         if (!offload_change) {
3379                 /* Nothing was added or removed, so no need to reallocate. Just
3380                  * update MAC on existing adjacency indexes.
3381                  */
3382                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3383                 if (err) {
3384                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3385                         goto set_trap;
3386                 }
3387                 return;
3388         }
3389         mlxsw_sp_nexthop_group_normalize(nh_grp);
3390         if (!nh_grp->sum_norm_weight)
3391                 /* No neigh of this group is connected so we just set
3392                  * the trap and let everthing flow through kernel.
3393                  */
3394                 goto set_trap;
3395
3396         ecmp_size = nh_grp->sum_norm_weight;
3397         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3398         if (err)
3399                 /* No valid allocation size available. */
3400                 goto set_trap;
3401
3402         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3403                                   ecmp_size, &adj_index);
3404         if (err) {
3405                 /* We ran out of KVD linear space, just set the
3406                  * trap and let everything flow through kernel.
3407                  */
3408                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3409                 goto set_trap;
3410         }
3411         old_adj_index_valid = nh_grp->adj_index_valid;
3412         old_adj_index = nh_grp->adj_index;
3413         old_ecmp_size = nh_grp->ecmp_size;
3414         nh_grp->adj_index_valid = 1;
3415         nh_grp->adj_index = adj_index;
3416         nh_grp->ecmp_size = ecmp_size;
3417         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3418         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3419         if (err) {
3420                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3421                 goto set_trap;
3422         }
3423
3424         if (!old_adj_index_valid) {
3425                 /* The trap was set for fib entries, so we have to call
3426                  * fib entry update to unset it and use adjacency index.
3427                  */
3428                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3429                 if (err) {
3430                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3431                         goto set_trap;
3432                 }
3433                 return;
3434         }
3435
3436         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3437                                              old_adj_index, old_ecmp_size);
3438         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3439                            old_ecmp_size, old_adj_index);
3440         if (err) {
3441                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3442                 goto set_trap;
3443         }
3444
3445         /* Offload state within the group changed, so update the flags. */
3446         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3447
3448         return;
3449
3450 set_trap:
3451         old_adj_index_valid = nh_grp->adj_index_valid;
3452         nh_grp->adj_index_valid = 0;
3453         for (i = 0; i < nh_grp->count; i++) {
3454                 nh = &nh_grp->nexthops[i];
3455                 nh->offloaded = 0;
3456         }
3457         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3458         if (err)
3459                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3460         if (old_adj_index_valid)
3461                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3462                                    nh_grp->ecmp_size, nh_grp->adj_index);
3463 }
3464
3465 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3466                                             bool removing)
3467 {
3468         if (!removing)
3469                 nh->should_offload = 1;
3470         else
3471                 nh->should_offload = 0;
3472         nh->update = 1;
3473 }
3474
3475 static void
3476 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3477                               struct mlxsw_sp_neigh_entry *neigh_entry,
3478                               bool removing)
3479 {
3480         struct mlxsw_sp_nexthop *nh;
3481
3482         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3483                             neigh_list_node) {
3484                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3485                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3486         }
3487 }
3488
3489 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3490                                       struct mlxsw_sp_rif *rif)
3491 {
3492         if (nh->rif)
3493                 return;
3494
3495         nh->rif = rif;
3496         list_add(&nh->rif_list_node, &rif->nexthop_list);
3497 }
3498
3499 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3500 {
3501         if (!nh->rif)
3502                 return;
3503
3504         list_del(&nh->rif_list_node);
3505         nh->rif = NULL;
3506 }
3507
3508 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3509                                        struct mlxsw_sp_nexthop *nh)
3510 {
3511         struct mlxsw_sp_neigh_entry *neigh_entry;
3512         struct neighbour *n;
3513         u8 nud_state, dead;
3514         int err;
3515
3516         if (!nh->nh_grp->gateway || nh->neigh_entry)
3517                 return 0;
3518
3519         /* Take a reference of neigh here ensuring that neigh would
3520          * not be destructed before the nexthop entry is finished.
3521          * The reference is taken either in neigh_lookup() or
3522          * in neigh_create() in case n is not found.
3523          */
3524         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3525         if (!n) {
3526                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3527                                  nh->rif->dev);
3528                 if (IS_ERR(n))
3529                         return PTR_ERR(n);
3530                 neigh_event_send(n, NULL);
3531         }
3532         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3533         if (!neigh_entry) {
3534                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3535                 if (IS_ERR(neigh_entry)) {
3536                         err = -EINVAL;
3537                         goto err_neigh_entry_create;
3538                 }
3539         }
3540
3541         /* If that is the first nexthop connected to that neigh, add to
3542          * nexthop_neighs_list
3543          */
3544         if (list_empty(&neigh_entry->nexthop_list))
3545                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3546                               &mlxsw_sp->router->nexthop_neighs_list);
3547
3548         nh->neigh_entry = neigh_entry;
3549         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3550         read_lock_bh(&n->lock);
3551         nud_state = n->nud_state;
3552         dead = n->dead;
3553         read_unlock_bh(&n->lock);
3554         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3555
3556         return 0;
3557
3558 err_neigh_entry_create:
3559         neigh_release(n);
3560         return err;
3561 }
3562
3563 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3564                                         struct mlxsw_sp_nexthop *nh)
3565 {
3566         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3567         struct neighbour *n;
3568
3569         if (!neigh_entry)
3570                 return;
3571         n = neigh_entry->key.n;
3572
3573         __mlxsw_sp_nexthop_neigh_update(nh, true);
3574         list_del(&nh->neigh_list_node);
3575         nh->neigh_entry = NULL;
3576
3577         /* If that is the last nexthop connected to that neigh, remove from
3578          * nexthop_neighs_list
3579          */
3580         if (list_empty(&neigh_entry->nexthop_list))
3581                 list_del(&neigh_entry->nexthop_neighs_list_node);
3582
3583         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3584                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3585
3586         neigh_release(n);
3587 }
3588
3589 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3590 {
3591         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3592
3593         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3594 }
3595
3596 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3597                                        struct mlxsw_sp_nexthop *nh,
3598                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3599 {
3600         bool removing;
3601
3602         if (!nh->nh_grp->gateway || nh->ipip_entry)
3603                 return;
3604
3605         nh->ipip_entry = ipip_entry;
3606         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3607         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3608         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3609 }
3610
3611 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3612                                        struct mlxsw_sp_nexthop *nh)
3613 {
3614         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3615
3616         if (!ipip_entry)
3617                 return;
3618
3619         __mlxsw_sp_nexthop_neigh_update(nh, true);
3620         nh->ipip_entry = NULL;
3621 }
3622
3623 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3624                                         const struct fib_nh *fib_nh,
3625                                         enum mlxsw_sp_ipip_type *p_ipipt)
3626 {
3627         struct net_device *dev = fib_nh->fib_nh_dev;
3628
3629         return dev &&
3630                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3631                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3632 }
3633
3634 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3635                                        struct mlxsw_sp_nexthop *nh)
3636 {
3637         switch (nh->type) {
3638         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3639                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3640                 mlxsw_sp_nexthop_rif_fini(nh);
3641                 break;
3642         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3643                 mlxsw_sp_nexthop_rif_fini(nh);
3644                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3645                 break;
3646         }
3647 }
3648
3649 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3650                                        struct mlxsw_sp_nexthop *nh,
3651                                        struct fib_nh *fib_nh)
3652 {
3653         const struct mlxsw_sp_ipip_ops *ipip_ops;
3654         struct net_device *dev = fib_nh->fib_nh_dev;
3655         struct mlxsw_sp_ipip_entry *ipip_entry;
3656         struct mlxsw_sp_rif *rif;
3657         int err;
3658
3659         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3660         if (ipip_entry) {
3661                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3662                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3663                                           MLXSW_SP_L3_PROTO_IPV4)) {
3664                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3665                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3666                         return 0;
3667                 }
3668         }
3669
3670         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3671         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3672         if (!rif)
3673                 return 0;
3674
3675         mlxsw_sp_nexthop_rif_init(nh, rif);
3676         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3677         if (err)
3678                 goto err_neigh_init;
3679
3680         return 0;
3681
3682 err_neigh_init:
3683         mlxsw_sp_nexthop_rif_fini(nh);
3684         return err;
3685 }
3686
3687 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3688                                         struct mlxsw_sp_nexthop *nh)
3689 {
3690         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3691 }
3692
3693 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3694                                   struct mlxsw_sp_nexthop_group *nh_grp,
3695                                   struct mlxsw_sp_nexthop *nh,
3696                                   struct fib_nh *fib_nh)
3697 {
3698         struct net_device *dev = fib_nh->fib_nh_dev;
3699         struct in_device *in_dev;
3700         int err;
3701
3702         nh->nh_grp = nh_grp;
3703         nh->key.fib_nh = fib_nh;
3704 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3705         nh->nh_weight = fib_nh->fib_nh_weight;
3706 #else
3707         nh->nh_weight = 1;
3708 #endif
3709         memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3710         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3711         if (err)
3712                 return err;
3713
3714         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3715         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3716
3717         if (!dev)
3718                 return 0;
3719
3720         in_dev = __in_dev_get_rtnl(dev);
3721         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3722             fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
3723                 return 0;
3724
3725         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3726         if (err)
3727                 goto err_nexthop_neigh_init;
3728
3729         return 0;
3730
3731 err_nexthop_neigh_init:
3732         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3733         return err;
3734 }
3735
3736 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3737                                    struct mlxsw_sp_nexthop *nh)
3738 {
3739         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3740         list_del(&nh->router_list_node);
3741         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3742         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3743 }
3744
3745 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3746                                     unsigned long event, struct fib_nh *fib_nh)
3747 {
3748         struct mlxsw_sp_nexthop_key key;
3749         struct mlxsw_sp_nexthop *nh;
3750
3751         if (mlxsw_sp->router->aborted)
3752                 return;
3753
3754         key.fib_nh = fib_nh;
3755         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3756         if (WARN_ON_ONCE(!nh))
3757                 return;
3758
3759         switch (event) {
3760         case FIB_EVENT_NH_ADD:
3761                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3762                 break;
3763         case FIB_EVENT_NH_DEL:
3764                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3765                 break;
3766         }
3767
3768         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3769 }
3770
3771 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3772                                         struct mlxsw_sp_rif *rif)
3773 {
3774         struct mlxsw_sp_nexthop *nh;
3775         bool removing;
3776
3777         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3778                 switch (nh->type) {
3779                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3780                         removing = false;
3781                         break;
3782                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3783                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3784                         break;
3785                 default:
3786                         WARN_ON(1);
3787                         continue;
3788                 }
3789
3790                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3791                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3792         }
3793 }
3794
3795 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3796                                          struct mlxsw_sp_rif *old_rif,
3797                                          struct mlxsw_sp_rif *new_rif)
3798 {
3799         struct mlxsw_sp_nexthop *nh;
3800
3801         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3802         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3803                 nh->rif = new_rif;
3804         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3805 }
3806
3807 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3808                                            struct mlxsw_sp_rif *rif)
3809 {
3810         struct mlxsw_sp_nexthop *nh, *tmp;
3811
3812         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3813                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3814                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3815         }
3816 }
3817
3818 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3819                                    const struct fib_info *fi)
3820 {
3821         return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
3822                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3823 }
3824
3825 static struct mlxsw_sp_nexthop_group *
3826 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3827 {
3828         struct mlxsw_sp_nexthop_group *nh_grp;
3829         struct mlxsw_sp_nexthop *nh;
3830         struct fib_nh *fib_nh;
3831         int i;
3832         int err;
3833
3834         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
3835                          GFP_KERNEL);
3836         if (!nh_grp)
3837                 return ERR_PTR(-ENOMEM);
3838         nh_grp->priv = fi;
3839         INIT_LIST_HEAD(&nh_grp->fib_list);
3840         nh_grp->neigh_tbl = &arp_tbl;
3841
3842         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3843         nh_grp->count = fi->fib_nhs;
3844         fib_info_hold(fi);
3845         for (i = 0; i < nh_grp->count; i++) {
3846                 nh = &nh_grp->nexthops[i];
3847                 fib_nh = &fi->fib_nh[i];
3848                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3849                 if (err)
3850                         goto err_nexthop4_init;
3851         }
3852         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3853         if (err)
3854                 goto err_nexthop_group_insert;
3855         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3856         return nh_grp;
3857
3858 err_nexthop_group_insert:
3859 err_nexthop4_init:
3860         for (i--; i >= 0; i--) {
3861                 nh = &nh_grp->nexthops[i];
3862                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3863         }
3864         fib_info_put(fi);
3865         kfree(nh_grp);
3866         return ERR_PTR(err);
3867 }
3868
3869 static void
3870 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3871                                 struct mlxsw_sp_nexthop_group *nh_grp)
3872 {
3873         struct mlxsw_sp_nexthop *nh;
3874         int i;
3875
3876         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3877         for (i = 0; i < nh_grp->count; i++) {
3878                 nh = &nh_grp->nexthops[i];
3879                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3880         }
3881         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3882         WARN_ON_ONCE(nh_grp->adj_index_valid);
3883         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3884         kfree(nh_grp);
3885 }
3886
3887 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3888                                        struct mlxsw_sp_fib_entry *fib_entry,
3889                                        struct fib_info *fi)
3890 {
3891         struct mlxsw_sp_nexthop_group *nh_grp;
3892
3893         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3894         if (!nh_grp) {
3895                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3896                 if (IS_ERR(nh_grp))
3897                         return PTR_ERR(nh_grp);
3898         }
3899         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3900         fib_entry->nh_group = nh_grp;
3901         return 0;
3902 }
3903
3904 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3905                                         struct mlxsw_sp_fib_entry *fib_entry)
3906 {
3907         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3908
3909         list_del(&fib_entry->nexthop_group_node);
3910         if (!list_empty(&nh_grp->fib_list))
3911                 return;
3912         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3913 }
3914
3915 static bool
3916 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3917 {
3918         struct mlxsw_sp_fib4_entry *fib4_entry;
3919
3920         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3921                                   common);
3922         return !fib4_entry->tos;
3923 }
3924
3925 static bool
3926 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3927 {
3928         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3929
3930         switch (fib_entry->fib_node->fib->proto) {
3931         case MLXSW_SP_L3_PROTO_IPV4:
3932                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3933                         return false;
3934                 break;
3935         case MLXSW_SP_L3_PROTO_IPV6:
3936                 break;
3937         }
3938
3939         switch (fib_entry->type) {
3940         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3941                 return !!nh_group->adj_index_valid;
3942         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3943                 return !!nh_group->nh_rif;
3944         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
3945         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3946         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3947                 return true;
3948         default:
3949                 return false;
3950         }
3951 }
3952
3953 static struct mlxsw_sp_nexthop *
3954 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3955                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3956 {
3957         int i;
3958
3959         for (i = 0; i < nh_grp->count; i++) {
3960                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3961                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3962
3963                 if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
3964                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3965                                     &rt->fib6_nh.fib_nh_gw6))
3966                         return nh;
3967                 continue;
3968         }
3969
3970         return NULL;
3971 }
3972
3973 static void
3974 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3975 {
3976         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3977         int i;
3978
3979         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3980             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
3981             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3982             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3983                 nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3984                 return;
3985         }
3986
3987         for (i = 0; i < nh_grp->count; i++) {
3988                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3989
3990                 if (nh->offloaded)
3991                         nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3992                 else
3993                         nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3994         }
3995 }
3996
3997 static void
3998 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3999 {
4000         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4001         int i;
4002
4003         if (!list_is_singular(&nh_grp->fib_list))
4004                 return;
4005
4006         for (i = 0; i < nh_grp->count; i++) {
4007                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4008
4009                 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4010         }
4011 }
4012
4013 static void
4014 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4015 {
4016         struct mlxsw_sp_fib6_entry *fib6_entry;
4017         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4018
4019         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4020                                   common);
4021
4022         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4023             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
4024                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4025                                  list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
4026                 return;
4027         }
4028
4029         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4030                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4031                 struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
4032                 struct mlxsw_sp_nexthop *nh;
4033
4034                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4035                 if (nh && nh->offloaded)
4036                         fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4037                 else
4038                         fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4039         }
4040 }
4041
4042 static void
4043 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4044 {
4045         struct mlxsw_sp_fib6_entry *fib6_entry;
4046         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4047
4048         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4049                                   common);
4050         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4051                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4052
4053                 rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
4054         }
4055 }
4056
4057 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4058 {
4059         switch (fib_entry->fib_node->fib->proto) {
4060         case MLXSW_SP_L3_PROTO_IPV4:
4061                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
4062                 break;
4063         case MLXSW_SP_L3_PROTO_IPV6:
4064                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
4065                 break;
4066         }
4067 }
4068
4069 static void
4070 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4071 {
4072         switch (fib_entry->fib_node->fib->proto) {
4073         case MLXSW_SP_L3_PROTO_IPV4:
4074                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4075                 break;
4076         case MLXSW_SP_L3_PROTO_IPV6:
4077                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4078                 break;
4079         }
4080 }
4081
4082 static void
4083 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4084                                    enum mlxsw_reg_ralue_op op, int err)
4085 {
4086         switch (op) {
4087         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4088                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4089         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4090                 if (err)
4091                         return;
4092                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4093                         mlxsw_sp_fib_entry_offload_set(fib_entry);
4094                 else
4095                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
4096                 return;
4097         default:
4098                 return;
4099         }
4100 }
4101
4102 static void
4103 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4104                               const struct mlxsw_sp_fib_entry *fib_entry,
4105                               enum mlxsw_reg_ralue_op op)
4106 {
4107         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4108         enum mlxsw_reg_ralxx_protocol proto;
4109         u32 *p_dip;
4110
4111         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4112
4113         switch (fib->proto) {
4114         case MLXSW_SP_L3_PROTO_IPV4:
4115                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4116                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4117                                       fib_entry->fib_node->key.prefix_len,
4118                                       *p_dip);
4119                 break;
4120         case MLXSW_SP_L3_PROTO_IPV6:
4121                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4122                                       fib_entry->fib_node->key.prefix_len,
4123                                       fib_entry->fib_node->key.addr);
4124                 break;
4125         }
4126 }
4127
4128 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4129                                         struct mlxsw_sp_fib_entry *fib_entry,
4130                                         enum mlxsw_reg_ralue_op op)
4131 {
4132         char ralue_pl[MLXSW_REG_RALUE_LEN];
4133         enum mlxsw_reg_ralue_trap_action trap_action;
4134         u16 trap_id = 0;
4135         u32 adjacency_index = 0;
4136         u16 ecmp_size = 0;
4137
4138         /* In case the nexthop group adjacency index is valid, use it
4139          * with provided ECMP size. Otherwise, setup trap and pass
4140          * traffic to kernel.
4141          */
4142         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4143                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4144                 adjacency_index = fib_entry->nh_group->adj_index;
4145                 ecmp_size = fib_entry->nh_group->ecmp_size;
4146         } else {
4147                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4148                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4149         }
4150
4151         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4152         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4153                                         adjacency_index, ecmp_size);
4154         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4155 }
4156
4157 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4158                                        struct mlxsw_sp_fib_entry *fib_entry,
4159                                        enum mlxsw_reg_ralue_op op)
4160 {
4161         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4162         enum mlxsw_reg_ralue_trap_action trap_action;
4163         char ralue_pl[MLXSW_REG_RALUE_LEN];
4164         u16 trap_id = 0;
4165         u16 rif_index = 0;
4166
4167         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4168                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4169                 rif_index = rif->rif_index;
4170         } else {
4171                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4172                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4173         }
4174
4175         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4176         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4177                                        rif_index);
4178         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4179 }
4180
4181 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4182                                       struct mlxsw_sp_fib_entry *fib_entry,
4183                                       enum mlxsw_reg_ralue_op op)
4184 {
4185         char ralue_pl[MLXSW_REG_RALUE_LEN];
4186
4187         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4188         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4189         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4190 }
4191
4192 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4193                                            struct mlxsw_sp_fib_entry *fib_entry,
4194                                            enum mlxsw_reg_ralue_op op)
4195 {
4196         enum mlxsw_reg_ralue_trap_action trap_action;
4197         char ralue_pl[MLXSW_REG_RALUE_LEN];
4198
4199         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4200         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4201         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4202         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4203 }
4204
4205 static int
4206 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4207                                  struct mlxsw_sp_fib_entry *fib_entry,
4208                                  enum mlxsw_reg_ralue_op op)
4209 {
4210         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4211         const struct mlxsw_sp_ipip_ops *ipip_ops;
4212
4213         if (WARN_ON(!ipip_entry))
4214                 return -EINVAL;
4215
4216         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4217         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4218                                       fib_entry->decap.tunnel_index);
4219 }
4220
4221 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4222                                            struct mlxsw_sp_fib_entry *fib_entry,
4223                                            enum mlxsw_reg_ralue_op op)
4224 {
4225         char ralue_pl[MLXSW_REG_RALUE_LEN];
4226
4227         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4228         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4229                                            fib_entry->decap.tunnel_index);
4230         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4231 }
4232
4233 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4234                                    struct mlxsw_sp_fib_entry *fib_entry,
4235                                    enum mlxsw_reg_ralue_op op)
4236 {
4237         switch (fib_entry->type) {
4238         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4239                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4240         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4241                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4242         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4243                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4244         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4245                 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4246         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4247                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4248                                                         fib_entry, op);
4249         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4250                 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4251         }
4252         return -EINVAL;
4253 }
4254
4255 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4256                                  struct mlxsw_sp_fib_entry *fib_entry,
4257                                  enum mlxsw_reg_ralue_op op)
4258 {
4259         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4260
4261         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4262
4263         return err;
4264 }
4265
4266 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4267                                      struct mlxsw_sp_fib_entry *fib_entry)
4268 {
4269         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4270                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4271 }
4272
4273 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4274                                   struct mlxsw_sp_fib_entry *fib_entry)
4275 {
4276         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4277                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4278 }
4279
4280 static int
4281 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4282                              const struct fib_entry_notifier_info *fen_info,
4283                              struct mlxsw_sp_fib_entry *fib_entry)
4284 {
4285         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4286         u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4287         struct net_device *dev = fen_info->fi->fib_dev;
4288         struct mlxsw_sp_ipip_entry *ipip_entry;
4289         struct fib_info *fi = fen_info->fi;
4290
4291         switch (fen_info->type) {
4292         case RTN_LOCAL:
4293                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4294                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4295                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4296                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4297                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4298                                                              fib_entry,
4299                                                              ipip_entry);
4300                 }
4301                 if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4302                                                      dip.addr4)) {
4303                         u32 t_index;
4304
4305                         t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4306                         fib_entry->decap.tunnel_index = t_index;
4307                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4308                         return 0;
4309                 }
4310                 /* fall through */
4311         case RTN_BROADCAST:
4312                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4313                 return 0;
4314         case RTN_BLACKHOLE:
4315                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4316                 return 0;
4317         case RTN_UNREACHABLE: /* fall through */
4318         case RTN_PROHIBIT:
4319                 /* Packets hitting these routes need to be trapped, but
4320                  * can do so with a lower priority than packets directed
4321                  * at the host, so use action type local instead of trap.
4322                  */
4323                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4324                 return 0;
4325         case RTN_UNICAST:
4326                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4327                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4328                 else
4329                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4330                 return 0;
4331         default:
4332                 return -EINVAL;
4333         }
4334 }
4335
4336 static struct mlxsw_sp_fib4_entry *
4337 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4338                            struct mlxsw_sp_fib_node *fib_node,
4339                            const struct fib_entry_notifier_info *fen_info)
4340 {
4341         struct mlxsw_sp_fib4_entry *fib4_entry;
4342         struct mlxsw_sp_fib_entry *fib_entry;
4343         int err;
4344
4345         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4346         if (!fib4_entry)
4347                 return ERR_PTR(-ENOMEM);
4348         fib_entry = &fib4_entry->common;
4349
4350         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4351         if (err)
4352                 goto err_fib4_entry_type_set;
4353
4354         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4355         if (err)
4356                 goto err_nexthop4_group_get;
4357
4358         fib4_entry->prio = fen_info->fi->fib_priority;
4359         fib4_entry->tb_id = fen_info->tb_id;
4360         fib4_entry->type = fen_info->type;
4361         fib4_entry->tos = fen_info->tos;
4362
4363         fib_entry->fib_node = fib_node;
4364
4365         return fib4_entry;
4366
4367 err_nexthop4_group_get:
4368 err_fib4_entry_type_set:
4369         kfree(fib4_entry);
4370         return ERR_PTR(err);
4371 }
4372
4373 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4374                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4375 {
4376         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4377         kfree(fib4_entry);
4378 }
4379
4380 static struct mlxsw_sp_fib4_entry *
4381 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4382                            const struct fib_entry_notifier_info *fen_info)
4383 {
4384         struct mlxsw_sp_fib4_entry *fib4_entry;
4385         struct mlxsw_sp_fib_node *fib_node;
4386         struct mlxsw_sp_fib *fib;
4387         struct mlxsw_sp_vr *vr;
4388
4389         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4390         if (!vr)
4391                 return NULL;
4392         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4393
4394         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4395                                             sizeof(fen_info->dst),
4396                                             fen_info->dst_len);
4397         if (!fib_node)
4398                 return NULL;
4399
4400         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4401                 if (fib4_entry->tb_id == fen_info->tb_id &&
4402                     fib4_entry->tos == fen_info->tos &&
4403                     fib4_entry->type == fen_info->type &&
4404                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4405                     fen_info->fi) {
4406                         return fib4_entry;
4407                 }
4408         }
4409
4410         return NULL;
4411 }
4412
4413 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4414         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4415         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4416         .key_len = sizeof(struct mlxsw_sp_fib_key),
4417         .automatic_shrinking = true,
4418 };
4419
4420 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4421                                     struct mlxsw_sp_fib_node *fib_node)
4422 {
4423         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4424                                       mlxsw_sp_fib_ht_params);
4425 }
4426
4427 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4428                                      struct mlxsw_sp_fib_node *fib_node)
4429 {
4430         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4431                                mlxsw_sp_fib_ht_params);
4432 }
4433
4434 static struct mlxsw_sp_fib_node *
4435 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4436                          size_t addr_len, unsigned char prefix_len)
4437 {
4438         struct mlxsw_sp_fib_key key;
4439
4440         memset(&key, 0, sizeof(key));
4441         memcpy(key.addr, addr, addr_len);
4442         key.prefix_len = prefix_len;
4443         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4444 }
4445
4446 static struct mlxsw_sp_fib_node *
4447 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4448                          size_t addr_len, unsigned char prefix_len)
4449 {
4450         struct mlxsw_sp_fib_node *fib_node;
4451
4452         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4453         if (!fib_node)
4454                 return NULL;
4455
4456         INIT_LIST_HEAD(&fib_node->entry_list);
4457         list_add(&fib_node->list, &fib->node_list);
4458         memcpy(fib_node->key.addr, addr, addr_len);
4459         fib_node->key.prefix_len = prefix_len;
4460
4461         return fib_node;
4462 }
4463
4464 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4465 {
4466         list_del(&fib_node->list);
4467         WARN_ON(!list_empty(&fib_node->entry_list));
4468         kfree(fib_node);
4469 }
4470
4471 static bool
4472 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4473                                  const struct mlxsw_sp_fib_entry *fib_entry)
4474 {
4475         return list_first_entry(&fib_node->entry_list,
4476                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4477 }
4478
4479 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4480                                       struct mlxsw_sp_fib_node *fib_node)
4481 {
4482         struct mlxsw_sp_prefix_usage req_prefix_usage;
4483         struct mlxsw_sp_fib *fib = fib_node->fib;
4484         struct mlxsw_sp_lpm_tree *lpm_tree;
4485         int err;
4486
4487         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4488         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4489                 goto out;
4490
4491         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4492         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4493         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4494                                          fib->proto);
4495         if (IS_ERR(lpm_tree))
4496                 return PTR_ERR(lpm_tree);
4497
4498         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4499         if (err)
4500                 goto err_lpm_tree_replace;
4501
4502 out:
4503         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4504         return 0;
4505
4506 err_lpm_tree_replace:
4507         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4508         return err;
4509 }
4510
4511 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4512                                          struct mlxsw_sp_fib_node *fib_node)
4513 {
4514         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4515         struct mlxsw_sp_prefix_usage req_prefix_usage;
4516         struct mlxsw_sp_fib *fib = fib_node->fib;
4517         int err;
4518
4519         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4520                 return;
4521         /* Try to construct a new LPM tree from the current prefix usage
4522          * minus the unused one. If we fail, continue using the old one.
4523          */
4524         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4525         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4526                                     fib_node->key.prefix_len);
4527         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4528                                          fib->proto);
4529         if (IS_ERR(lpm_tree))
4530                 return;
4531
4532         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4533         if (err)
4534                 goto err_lpm_tree_replace;
4535
4536         return;
4537
4538 err_lpm_tree_replace:
4539         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4540 }
4541
4542 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4543                                   struct mlxsw_sp_fib_node *fib_node,
4544                                   struct mlxsw_sp_fib *fib)
4545 {
4546         int err;
4547
4548         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4549         if (err)
4550                 return err;
4551         fib_node->fib = fib;
4552
4553         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4554         if (err)
4555                 goto err_fib_lpm_tree_link;
4556
4557         return 0;
4558
4559 err_fib_lpm_tree_link:
4560         fib_node->fib = NULL;
4561         mlxsw_sp_fib_node_remove(fib, fib_node);
4562         return err;
4563 }
4564
4565 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4566                                    struct mlxsw_sp_fib_node *fib_node)
4567 {
4568         struct mlxsw_sp_fib *fib = fib_node->fib;
4569
4570         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4571         fib_node->fib = NULL;
4572         mlxsw_sp_fib_node_remove(fib, fib_node);
4573 }
4574
4575 static struct mlxsw_sp_fib_node *
4576 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4577                       size_t addr_len, unsigned char prefix_len,
4578                       enum mlxsw_sp_l3proto proto)
4579 {
4580         struct mlxsw_sp_fib_node *fib_node;
4581         struct mlxsw_sp_fib *fib;
4582         struct mlxsw_sp_vr *vr;
4583         int err;
4584
4585         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4586         if (IS_ERR(vr))
4587                 return ERR_CAST(vr);
4588         fib = mlxsw_sp_vr_fib(vr, proto);
4589
4590         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4591         if (fib_node)
4592                 return fib_node;
4593
4594         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4595         if (!fib_node) {
4596                 err = -ENOMEM;
4597                 goto err_fib_node_create;
4598         }
4599
4600         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4601         if (err)
4602                 goto err_fib_node_init;
4603
4604         return fib_node;
4605
4606 err_fib_node_init:
4607         mlxsw_sp_fib_node_destroy(fib_node);
4608 err_fib_node_create:
4609         mlxsw_sp_vr_put(mlxsw_sp, vr);
4610         return ERR_PTR(err);
4611 }
4612
4613 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4614                                   struct mlxsw_sp_fib_node *fib_node)
4615 {
4616         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4617
4618         if (!list_empty(&fib_node->entry_list))
4619                 return;
4620         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4621         mlxsw_sp_fib_node_destroy(fib_node);
4622         mlxsw_sp_vr_put(mlxsw_sp, vr);
4623 }
4624
4625 static struct mlxsw_sp_fib4_entry *
4626 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4627                               const struct mlxsw_sp_fib4_entry *new4_entry)
4628 {
4629         struct mlxsw_sp_fib4_entry *fib4_entry;
4630
4631         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4632                 if (fib4_entry->tb_id > new4_entry->tb_id)
4633                         continue;
4634                 if (fib4_entry->tb_id != new4_entry->tb_id)
4635                         break;
4636                 if (fib4_entry->tos > new4_entry->tos)
4637                         continue;
4638                 if (fib4_entry->prio >= new4_entry->prio ||
4639                     fib4_entry->tos < new4_entry->tos)
4640                         return fib4_entry;
4641         }
4642
4643         return NULL;
4644 }
4645
4646 static int
4647 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4648                                struct mlxsw_sp_fib4_entry *new4_entry)
4649 {
4650         struct mlxsw_sp_fib_node *fib_node;
4651
4652         if (WARN_ON(!fib4_entry))
4653                 return -EINVAL;
4654
4655         fib_node = fib4_entry->common.fib_node;
4656         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4657                                  common.list) {
4658                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4659                     fib4_entry->tos != new4_entry->tos ||
4660                     fib4_entry->prio != new4_entry->prio)
4661                         break;
4662         }
4663
4664         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4665         return 0;
4666 }
4667
4668 static int
4669 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4670                                bool replace, bool append)
4671 {
4672         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4673         struct mlxsw_sp_fib4_entry *fib4_entry;
4674
4675         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4676
4677         if (append)
4678                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4679         if (replace && WARN_ON(!fib4_entry))
4680                 return -EINVAL;
4681
4682         /* Insert new entry before replaced one, so that we can later
4683          * remove the second.
4684          */
4685         if (fib4_entry) {
4686                 list_add_tail(&new4_entry->common.list,
4687                               &fib4_entry->common.list);
4688         } else {
4689                 struct mlxsw_sp_fib4_entry *last;
4690
4691                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4692                         if (new4_entry->tb_id > last->tb_id)
4693                                 break;
4694                         fib4_entry = last;
4695                 }
4696
4697                 if (fib4_entry)
4698                         list_add(&new4_entry->common.list,
4699                                  &fib4_entry->common.list);
4700                 else
4701                         list_add(&new4_entry->common.list,
4702                                  &fib_node->entry_list);
4703         }
4704
4705         return 0;
4706 }
4707
4708 static void
4709 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4710 {
4711         list_del(&fib4_entry->common.list);
4712 }
4713
4714 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4715                                        struct mlxsw_sp_fib_entry *fib_entry)
4716 {
4717         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4718
4719         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4720                 return 0;
4721
4722         /* To prevent packet loss, overwrite the previously offloaded
4723          * entry.
4724          */
4725         if (!list_is_singular(&fib_node->entry_list)) {
4726                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4727                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4728
4729                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4730         }
4731
4732         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4733 }
4734
4735 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4736                                         struct mlxsw_sp_fib_entry *fib_entry)
4737 {
4738         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4739
4740         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4741                 return;
4742
4743         /* Promote the next entry by overwriting the deleted entry */
4744         if (!list_is_singular(&fib_node->entry_list)) {
4745                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4746                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4747
4748                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4749                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4750                 return;
4751         }
4752
4753         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4754 }
4755
4756 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4757                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4758                                          bool replace, bool append)
4759 {
4760         int err;
4761
4762         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4763         if (err)
4764                 return err;
4765
4766         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4767         if (err)
4768                 goto err_fib_node_entry_add;
4769
4770         return 0;
4771
4772 err_fib_node_entry_add:
4773         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4774         return err;
4775 }
4776
4777 static void
4778 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4779                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4780 {
4781         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4782         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4783
4784         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4785                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4786 }
4787
4788 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4789                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4790                                         bool replace)
4791 {
4792         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4793         struct mlxsw_sp_fib4_entry *replaced;
4794
4795         if (!replace)
4796                 return;
4797
4798         /* We inserted the new entry before replaced one */
4799         replaced = list_next_entry(fib4_entry, common.list);
4800
4801         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4802         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4803         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4804 }
4805
4806 static int
4807 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4808                          const struct fib_entry_notifier_info *fen_info,
4809                          bool replace, bool append)
4810 {
4811         struct mlxsw_sp_fib4_entry *fib4_entry;
4812         struct mlxsw_sp_fib_node *fib_node;
4813         int err;
4814
4815         if (mlxsw_sp->router->aborted)
4816                 return 0;
4817
4818         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4819                                          &fen_info->dst, sizeof(fen_info->dst),
4820                                          fen_info->dst_len,
4821                                          MLXSW_SP_L3_PROTO_IPV4);
4822         if (IS_ERR(fib_node)) {
4823                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4824                 return PTR_ERR(fib_node);
4825         }
4826
4827         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4828         if (IS_ERR(fib4_entry)) {
4829                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4830                 err = PTR_ERR(fib4_entry);
4831                 goto err_fib4_entry_create;
4832         }
4833
4834         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4835                                             append);
4836         if (err) {
4837                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4838                 goto err_fib4_node_entry_link;
4839         }
4840
4841         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4842
4843         return 0;
4844
4845 err_fib4_node_entry_link:
4846         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4847 err_fib4_entry_create:
4848         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4849         return err;
4850 }
4851
4852 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4853                                      struct fib_entry_notifier_info *fen_info)
4854 {
4855         struct mlxsw_sp_fib4_entry *fib4_entry;
4856         struct mlxsw_sp_fib_node *fib_node;
4857
4858         if (mlxsw_sp->router->aborted)
4859                 return;
4860
4861         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4862         if (WARN_ON(!fib4_entry))
4863                 return;
4864         fib_node = fib4_entry->common.fib_node;
4865
4866         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4867         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4868         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4869 }
4870
4871 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4872 {
4873         /* Packets with link-local destination IP arriving to the router
4874          * are trapped to the CPU, so no need to program specific routes
4875          * for them.
4876          */
4877         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4878                 return true;
4879
4880         /* Multicast routes aren't supported, so ignore them. Neighbour
4881          * Discovery packets are specifically trapped.
4882          */
4883         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4884                 return true;
4885
4886         /* Cloned routes are irrelevant in the forwarding path. */
4887         if (rt->fib6_flags & RTF_CACHE)
4888                 return true;
4889
4890         return false;
4891 }
4892
4893 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4894 {
4895         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4896
4897         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4898         if (!mlxsw_sp_rt6)
4899                 return ERR_PTR(-ENOMEM);
4900
4901         /* In case of route replace, replaced route is deleted with
4902          * no notification. Take reference to prevent accessing freed
4903          * memory.
4904          */
4905         mlxsw_sp_rt6->rt = rt;
4906         fib6_info_hold(rt);
4907
4908         return mlxsw_sp_rt6;
4909 }
4910
4911 #if IS_ENABLED(CONFIG_IPV6)
4912 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4913 {
4914         fib6_info_release(rt);
4915 }
4916 #else
4917 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4918 {
4919 }
4920 #endif
4921
4922 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4923 {
4924         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4925         kfree(mlxsw_sp_rt6);
4926 }
4927
4928 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4929 {
4930         /* RTF_CACHE routes are ignored */
4931         return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
4932 }
4933
4934 static struct fib6_info *
4935 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4936 {
4937         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4938                                 list)->rt;
4939 }
4940
4941 static struct mlxsw_sp_fib6_entry *
4942 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4943                                  const struct fib6_info *nrt, bool replace)
4944 {
4945         struct mlxsw_sp_fib6_entry *fib6_entry;
4946
4947         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4948                 return NULL;
4949
4950         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4951                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4952
4953                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4954                  * virtual router.
4955                  */
4956                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4957                         continue;
4958                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4959                         break;
4960                 if (rt->fib6_metric < nrt->fib6_metric)
4961                         continue;
4962                 if (rt->fib6_metric == nrt->fib6_metric &&
4963                     mlxsw_sp_fib6_rt_can_mp(rt))
4964                         return fib6_entry;
4965                 if (rt->fib6_metric > nrt->fib6_metric)
4966                         break;
4967         }
4968
4969         return NULL;
4970 }
4971
4972 static struct mlxsw_sp_rt6 *
4973 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4974                             const struct fib6_info *rt)
4975 {
4976         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4977
4978         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4979                 if (mlxsw_sp_rt6->rt == rt)
4980                         return mlxsw_sp_rt6;
4981         }
4982
4983         return NULL;
4984 }
4985
4986 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4987                                         const struct fib6_info *rt,
4988                                         enum mlxsw_sp_ipip_type *ret)
4989 {
4990         return rt->fib6_nh.fib_nh_dev &&
4991                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
4992 }
4993
4994 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4995                                        struct mlxsw_sp_nexthop_group *nh_grp,
4996                                        struct mlxsw_sp_nexthop *nh,
4997                                        const struct fib6_info *rt)
4998 {
4999         const struct mlxsw_sp_ipip_ops *ipip_ops;
5000         struct mlxsw_sp_ipip_entry *ipip_entry;
5001         struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5002         struct mlxsw_sp_rif *rif;
5003         int err;
5004
5005         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5006         if (ipip_entry) {
5007                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5008                 if (ipip_ops->can_offload(mlxsw_sp, dev,
5009                                           MLXSW_SP_L3_PROTO_IPV6)) {
5010                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5011                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5012                         return 0;
5013                 }
5014         }
5015
5016         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5017         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5018         if (!rif)
5019                 return 0;
5020         mlxsw_sp_nexthop_rif_init(nh, rif);
5021
5022         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5023         if (err)
5024                 goto err_nexthop_neigh_init;
5025
5026         return 0;
5027
5028 err_nexthop_neigh_init:
5029         mlxsw_sp_nexthop_rif_fini(nh);
5030         return err;
5031 }
5032
5033 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5034                                         struct mlxsw_sp_nexthop *nh)
5035 {
5036         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5037 }
5038
5039 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5040                                   struct mlxsw_sp_nexthop_group *nh_grp,
5041                                   struct mlxsw_sp_nexthop *nh,
5042                                   const struct fib6_info *rt)
5043 {
5044         struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5045
5046         nh->nh_grp = nh_grp;
5047         nh->nh_weight = rt->fib6_nh.fib_nh_weight;
5048         memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
5049         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5050
5051         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5052
5053         if (!dev)
5054                 return 0;
5055         nh->ifindex = dev->ifindex;
5056
5057         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5058 }
5059
5060 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5061                                    struct mlxsw_sp_nexthop *nh)
5062 {
5063         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5064         list_del(&nh->router_list_node);
5065         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5066 }
5067
5068 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5069                                     const struct fib6_info *rt)
5070 {
5071         return rt->fib6_nh.fib_nh_gw_family ||
5072                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5073 }
5074
5075 static struct mlxsw_sp_nexthop_group *
5076 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5077                                struct mlxsw_sp_fib6_entry *fib6_entry)
5078 {
5079         struct mlxsw_sp_nexthop_group *nh_grp;
5080         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5081         struct mlxsw_sp_nexthop *nh;
5082         int i = 0;
5083         int err;
5084
5085         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5086                          GFP_KERNEL);
5087         if (!nh_grp)
5088                 return ERR_PTR(-ENOMEM);
5089         INIT_LIST_HEAD(&nh_grp->fib_list);
5090 #if IS_ENABLED(CONFIG_IPV6)
5091         nh_grp->neigh_tbl = &nd_tbl;
5092 #endif
5093         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5094                                         struct mlxsw_sp_rt6, list);
5095         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5096         nh_grp->count = fib6_entry->nrt6;
5097         for (i = 0; i < nh_grp->count; i++) {
5098                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5099
5100                 nh = &nh_grp->nexthops[i];
5101                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5102                 if (err)
5103                         goto err_nexthop6_init;
5104                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5105         }
5106
5107         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5108         if (err)
5109                 goto err_nexthop_group_insert;
5110
5111         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5112         return nh_grp;
5113
5114 err_nexthop_group_insert:
5115 err_nexthop6_init:
5116         for (i--; i >= 0; i--) {
5117                 nh = &nh_grp->nexthops[i];
5118                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5119         }
5120         kfree(nh_grp);
5121         return ERR_PTR(err);
5122 }
5123
5124 static void
5125 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5126                                 struct mlxsw_sp_nexthop_group *nh_grp)
5127 {
5128         struct mlxsw_sp_nexthop *nh;
5129         int i = nh_grp->count;
5130
5131         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5132         for (i--; i >= 0; i--) {
5133                 nh = &nh_grp->nexthops[i];
5134                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5135         }
5136         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5137         WARN_ON(nh_grp->adj_index_valid);
5138         kfree(nh_grp);
5139 }
5140
5141 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5142                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5143 {
5144         struct mlxsw_sp_nexthop_group *nh_grp;
5145
5146         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5147         if (!nh_grp) {
5148                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5149                 if (IS_ERR(nh_grp))
5150                         return PTR_ERR(nh_grp);
5151         }
5152
5153         list_add_tail(&fib6_entry->common.nexthop_group_node,
5154                       &nh_grp->fib_list);
5155         fib6_entry->common.nh_group = nh_grp;
5156
5157         return 0;
5158 }
5159
5160 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5161                                         struct mlxsw_sp_fib_entry *fib_entry)
5162 {
5163         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5164
5165         list_del(&fib_entry->nexthop_group_node);
5166         if (!list_empty(&nh_grp->fib_list))
5167                 return;
5168         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5169 }
5170
5171 static int
5172 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5173                                struct mlxsw_sp_fib6_entry *fib6_entry)
5174 {
5175         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5176         int err;
5177
5178         fib6_entry->common.nh_group = NULL;
5179         list_del(&fib6_entry->common.nexthop_group_node);
5180
5181         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5182         if (err)
5183                 goto err_nexthop6_group_get;
5184
5185         /* In case this entry is offloaded, then the adjacency index
5186          * currently associated with it in the device's table is that
5187          * of the old group. Start using the new one instead.
5188          */
5189         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5190         if (err)
5191                 goto err_fib_node_entry_add;
5192
5193         if (list_empty(&old_nh_grp->fib_list))
5194                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5195
5196         return 0;
5197
5198 err_fib_node_entry_add:
5199         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5200 err_nexthop6_group_get:
5201         list_add_tail(&fib6_entry->common.nexthop_group_node,
5202                       &old_nh_grp->fib_list);
5203         fib6_entry->common.nh_group = old_nh_grp;
5204         return err;
5205 }
5206
5207 static int
5208 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5209                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5210                                 struct fib6_info *rt)
5211 {
5212         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5213         int err;
5214
5215         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5216         if (IS_ERR(mlxsw_sp_rt6))
5217                 return PTR_ERR(mlxsw_sp_rt6);
5218
5219         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5220         fib6_entry->nrt6++;
5221
5222         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5223         if (err)
5224                 goto err_nexthop6_group_update;
5225
5226         return 0;
5227
5228 err_nexthop6_group_update:
5229         fib6_entry->nrt6--;
5230         list_del(&mlxsw_sp_rt6->list);
5231         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5232         return err;
5233 }
5234
5235 static void
5236 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5237                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5238                                 struct fib6_info *rt)
5239 {
5240         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5241
5242         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5243         if (WARN_ON(!mlxsw_sp_rt6))
5244                 return;
5245
5246         fib6_entry->nrt6--;
5247         list_del(&mlxsw_sp_rt6->list);
5248         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5249         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5250 }
5251
5252 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5253                                          struct mlxsw_sp_fib_entry *fib_entry,
5254                                          const struct fib6_info *rt)
5255 {
5256         /* Packets hitting RTF_REJECT routes need to be discarded by the
5257          * stack. We can rely on their destination device not having a
5258          * RIF (it's the loopback device) and can thus use action type
5259          * local, which will cause them to be trapped with a lower
5260          * priority than packets that need to be locally received.
5261          */
5262         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5263                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5264         else if (rt->fib6_type == RTN_BLACKHOLE)
5265                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5266         else if (rt->fib6_flags & RTF_REJECT)
5267                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5268         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5269                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5270         else
5271                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5272 }
5273
5274 static void
5275 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5276 {
5277         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5278
5279         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5280                                  list) {
5281                 fib6_entry->nrt6--;
5282                 list_del(&mlxsw_sp_rt6->list);
5283                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5284         }
5285 }
5286
5287 static struct mlxsw_sp_fib6_entry *
5288 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5289                            struct mlxsw_sp_fib_node *fib_node,
5290                            struct fib6_info *rt)
5291 {
5292         struct mlxsw_sp_fib6_entry *fib6_entry;
5293         struct mlxsw_sp_fib_entry *fib_entry;
5294         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5295         int err;
5296
5297         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5298         if (!fib6_entry)
5299                 return ERR_PTR(-ENOMEM);
5300         fib_entry = &fib6_entry->common;
5301
5302         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5303         if (IS_ERR(mlxsw_sp_rt6)) {
5304                 err = PTR_ERR(mlxsw_sp_rt6);
5305                 goto err_rt6_create;
5306         }
5307
5308         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5309
5310         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5311         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5312         fib6_entry->nrt6 = 1;
5313         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5314         if (err)
5315                 goto err_nexthop6_group_get;
5316
5317         fib_entry->fib_node = fib_node;
5318
5319         return fib6_entry;
5320
5321 err_nexthop6_group_get:
5322         list_del(&mlxsw_sp_rt6->list);
5323         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5324 err_rt6_create:
5325         kfree(fib6_entry);
5326         return ERR_PTR(err);
5327 }
5328
5329 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5330                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5331 {
5332         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5333         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5334         WARN_ON(fib6_entry->nrt6);
5335         kfree(fib6_entry);
5336 }
5337
5338 static struct mlxsw_sp_fib6_entry *
5339 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5340                               const struct fib6_info *nrt, bool replace)
5341 {
5342         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5343
5344         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5345                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5346
5347                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5348                         continue;
5349                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5350                         break;
5351                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5352                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5353                             mlxsw_sp_fib6_rt_can_mp(nrt))
5354                                 return fib6_entry;
5355                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5356                                 fallback = fallback ?: fib6_entry;
5357                 }
5358                 if (rt->fib6_metric > nrt->fib6_metric)
5359                         return fallback ?: fib6_entry;
5360         }
5361
5362         return fallback;
5363 }
5364
5365 static int
5366 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5367                                bool replace)
5368 {
5369         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5370         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5371         struct mlxsw_sp_fib6_entry *fib6_entry;
5372
5373         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5374
5375         if (replace && WARN_ON(!fib6_entry))
5376                 return -EINVAL;
5377
5378         if (fib6_entry) {
5379                 list_add_tail(&new6_entry->common.list,
5380                               &fib6_entry->common.list);
5381         } else {
5382                 struct mlxsw_sp_fib6_entry *last;
5383
5384                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5385                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5386
5387                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5388                                 break;
5389                         fib6_entry = last;
5390                 }
5391
5392                 if (fib6_entry)
5393                         list_add(&new6_entry->common.list,
5394                                  &fib6_entry->common.list);
5395                 else
5396                         list_add(&new6_entry->common.list,
5397                                  &fib_node->entry_list);
5398         }
5399
5400         return 0;
5401 }
5402
5403 static void
5404 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5405 {
5406         list_del(&fib6_entry->common.list);
5407 }
5408
5409 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5410                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5411                                          bool replace)
5412 {
5413         int err;
5414
5415         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5416         if (err)
5417                 return err;
5418
5419         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5420         if (err)
5421                 goto err_fib_node_entry_add;
5422
5423         return 0;
5424
5425 err_fib_node_entry_add:
5426         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5427         return err;
5428 }
5429
5430 static void
5431 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5432                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5433 {
5434         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5435         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5436 }
5437
5438 static struct mlxsw_sp_fib6_entry *
5439 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5440                            const struct fib6_info *rt)
5441 {
5442         struct mlxsw_sp_fib6_entry *fib6_entry;
5443         struct mlxsw_sp_fib_node *fib_node;
5444         struct mlxsw_sp_fib *fib;
5445         struct mlxsw_sp_vr *vr;
5446
5447         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5448         if (!vr)
5449                 return NULL;
5450         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5451
5452         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5453                                             sizeof(rt->fib6_dst.addr),
5454                                             rt->fib6_dst.plen);
5455         if (!fib_node)
5456                 return NULL;
5457
5458         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5459                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5460
5461                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5462                     rt->fib6_metric == iter_rt->fib6_metric &&
5463                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5464                         return fib6_entry;
5465         }
5466
5467         return NULL;
5468 }
5469
5470 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5471                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5472                                         bool replace)
5473 {
5474         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5475         struct mlxsw_sp_fib6_entry *replaced;
5476
5477         if (!replace)
5478                 return;
5479
5480         replaced = list_next_entry(fib6_entry, common.list);
5481
5482         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5483         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5484         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5485 }
5486
5487 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5488                                     struct fib6_info *rt, bool replace)
5489 {
5490         struct mlxsw_sp_fib6_entry *fib6_entry;
5491         struct mlxsw_sp_fib_node *fib_node;
5492         int err;
5493
5494         if (mlxsw_sp->router->aborted)
5495                 return 0;
5496
5497         if (rt->fib6_src.plen)
5498                 return -EINVAL;
5499
5500         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5501                 return 0;
5502
5503         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5504                                          &rt->fib6_dst.addr,
5505                                          sizeof(rt->fib6_dst.addr),
5506                                          rt->fib6_dst.plen,
5507                                          MLXSW_SP_L3_PROTO_IPV6);
5508         if (IS_ERR(fib_node))
5509                 return PTR_ERR(fib_node);
5510
5511         /* Before creating a new entry, try to append route to an existing
5512          * multipath entry.
5513          */
5514         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5515         if (fib6_entry) {
5516                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5517                 if (err)
5518                         goto err_fib6_entry_nexthop_add;
5519                 return 0;
5520         }
5521
5522         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5523         if (IS_ERR(fib6_entry)) {
5524                 err = PTR_ERR(fib6_entry);
5525                 goto err_fib6_entry_create;
5526         }
5527
5528         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5529         if (err)
5530                 goto err_fib6_node_entry_link;
5531
5532         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5533
5534         return 0;
5535
5536 err_fib6_node_entry_link:
5537         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5538 err_fib6_entry_create:
5539 err_fib6_entry_nexthop_add:
5540         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5541         return err;
5542 }
5543
5544 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5545                                      struct fib6_info *rt)
5546 {
5547         struct mlxsw_sp_fib6_entry *fib6_entry;
5548         struct mlxsw_sp_fib_node *fib_node;
5549
5550         if (mlxsw_sp->router->aborted)
5551                 return;
5552
5553         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5554                 return;
5555
5556         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5557         if (WARN_ON(!fib6_entry))
5558                 return;
5559
5560         /* If route is part of a multipath entry, but not the last one
5561          * removed, then only reduce its nexthop group.
5562          */
5563         if (!list_is_singular(&fib6_entry->rt6_list)) {
5564                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5565                 return;
5566         }
5567
5568         fib_node = fib6_entry->common.fib_node;
5569
5570         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5571         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5572         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5573 }
5574
5575 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5576                                             enum mlxsw_reg_ralxx_protocol proto,
5577                                             u8 tree_id)
5578 {
5579         char ralta_pl[MLXSW_REG_RALTA_LEN];
5580         char ralst_pl[MLXSW_REG_RALST_LEN];
5581         int i, err;
5582
5583         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5584         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5585         if (err)
5586                 return err;
5587
5588         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5589         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5590         if (err)
5591                 return err;
5592
5593         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5594                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5595                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5596                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5597
5598                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5599                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5600                                       raltb_pl);
5601                 if (err)
5602                         return err;
5603
5604                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5605                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5606                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5607                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5608                                       ralue_pl);
5609                 if (err)
5610                         return err;
5611         }
5612
5613         return 0;
5614 }
5615
5616 static struct mlxsw_sp_mr_table *
5617 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5618 {
5619         if (family == RTNL_FAMILY_IPMR)
5620                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5621         else
5622                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5623 }
5624
5625 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5626                                      struct mfc_entry_notifier_info *men_info,
5627                                      bool replace)
5628 {
5629         struct mlxsw_sp_mr_table *mrt;
5630         struct mlxsw_sp_vr *vr;
5631
5632         if (mlxsw_sp->router->aborted)
5633                 return 0;
5634
5635         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5636         if (IS_ERR(vr))
5637                 return PTR_ERR(vr);
5638
5639         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5640         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5641 }
5642
5643 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5644                                       struct mfc_entry_notifier_info *men_info)
5645 {
5646         struct mlxsw_sp_mr_table *mrt;
5647         struct mlxsw_sp_vr *vr;
5648
5649         if (mlxsw_sp->router->aborted)
5650                 return;
5651
5652         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5653         if (WARN_ON(!vr))
5654                 return;
5655
5656         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5657         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5658         mlxsw_sp_vr_put(mlxsw_sp, vr);
5659 }
5660
5661 static int
5662 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5663                               struct vif_entry_notifier_info *ven_info)
5664 {
5665         struct mlxsw_sp_mr_table *mrt;
5666         struct mlxsw_sp_rif *rif;
5667         struct mlxsw_sp_vr *vr;
5668
5669         if (mlxsw_sp->router->aborted)
5670                 return 0;
5671
5672         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5673         if (IS_ERR(vr))
5674                 return PTR_ERR(vr);
5675
5676         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5677         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5678         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5679                                    ven_info->vif_index,
5680                                    ven_info->vif_flags, rif);
5681 }
5682
5683 static void
5684 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5685                               struct vif_entry_notifier_info *ven_info)
5686 {
5687         struct mlxsw_sp_mr_table *mrt;
5688         struct mlxsw_sp_vr *vr;
5689
5690         if (mlxsw_sp->router->aborted)
5691                 return;
5692
5693         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5694         if (WARN_ON(!vr))
5695                 return;
5696
5697         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5698         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5699         mlxsw_sp_vr_put(mlxsw_sp, vr);
5700 }
5701
5702 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5703 {
5704         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5705         int err;
5706
5707         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5708                                                MLXSW_SP_LPM_TREE_MIN);
5709         if (err)
5710                 return err;
5711
5712         /* The multicast router code does not need an abort trap as by default,
5713          * packets that don't match any routes are trapped to the CPU.
5714          */
5715
5716         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5717         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5718                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5719 }
5720
5721 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5722                                      struct mlxsw_sp_fib_node *fib_node)
5723 {
5724         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5725
5726         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5727                                  common.list) {
5728                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5729
5730                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5731                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5732                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5733                 /* Break when entry list is empty and node was freed.
5734                  * Otherwise, we'll access freed memory in the next
5735                  * iteration.
5736                  */
5737                 if (do_break)
5738                         break;
5739         }
5740 }
5741
5742 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5743                                      struct mlxsw_sp_fib_node *fib_node)
5744 {
5745         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5746
5747         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5748                                  common.list) {
5749                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5750
5751                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5752                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5753                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5754                 if (do_break)
5755                         break;
5756         }
5757 }
5758
5759 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5760                                     struct mlxsw_sp_fib_node *fib_node)
5761 {
5762         switch (fib_node->fib->proto) {
5763         case MLXSW_SP_L3_PROTO_IPV4:
5764                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5765                 break;
5766         case MLXSW_SP_L3_PROTO_IPV6:
5767                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5768                 break;
5769         }
5770 }
5771
5772 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5773                                   struct mlxsw_sp_vr *vr,
5774                                   enum mlxsw_sp_l3proto proto)
5775 {
5776         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5777         struct mlxsw_sp_fib_node *fib_node, *tmp;
5778
5779         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5780                 bool do_break = &tmp->list == &fib->node_list;
5781
5782                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5783                 if (do_break)
5784                         break;
5785         }
5786 }
5787
5788 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5789 {
5790         int i, j;
5791
5792         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5793                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5794
5795                 if (!mlxsw_sp_vr_is_used(vr))
5796                         continue;
5797
5798                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5799                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5800                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5801
5802                 /* If virtual router was only used for IPv4, then it's no
5803                  * longer used.
5804                  */
5805                 if (!mlxsw_sp_vr_is_used(vr))
5806                         continue;
5807                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5808         }
5809 }
5810
5811 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5812 {
5813         int err;
5814
5815         if (mlxsw_sp->router->aborted)
5816                 return;
5817         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5818         mlxsw_sp_router_fib_flush(mlxsw_sp);
5819         mlxsw_sp->router->aborted = true;
5820         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5821         if (err)
5822                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5823 }
5824
5825 struct mlxsw_sp_fib_event_work {
5826         struct work_struct work;
5827         union {
5828                 struct fib6_entry_notifier_info fen6_info;
5829                 struct fib_entry_notifier_info fen_info;
5830                 struct fib_rule_notifier_info fr_info;
5831                 struct fib_nh_notifier_info fnh_info;
5832                 struct mfc_entry_notifier_info men_info;
5833                 struct vif_entry_notifier_info ven_info;
5834         };
5835         struct mlxsw_sp *mlxsw_sp;
5836         unsigned long event;
5837 };
5838
5839 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5840 {
5841         struct mlxsw_sp_fib_event_work *fib_work =
5842                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5843         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5844         bool replace, append;
5845         int err;
5846
5847         /* Protect internal structures from changes */
5848         rtnl_lock();
5849         mlxsw_sp_span_respin(mlxsw_sp);
5850
5851         switch (fib_work->event) {
5852         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5853         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5854         case FIB_EVENT_ENTRY_ADD:
5855                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5856                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5857                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5858                                                replace, append);
5859                 if (err)
5860                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5861                 fib_info_put(fib_work->fen_info.fi);
5862                 break;
5863         case FIB_EVENT_ENTRY_DEL:
5864                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5865                 fib_info_put(fib_work->fen_info.fi);
5866                 break;
5867         case FIB_EVENT_RULE_ADD:
5868                 /* if we get here, a rule was added that we do not support.
5869                  * just do the fib_abort
5870                  */
5871                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5872                 break;
5873         case FIB_EVENT_NH_ADD: /* fall through */
5874         case FIB_EVENT_NH_DEL:
5875                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5876                                         fib_work->fnh_info.fib_nh);
5877                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5878                 break;
5879         }
5880         rtnl_unlock();
5881         kfree(fib_work);
5882 }
5883
5884 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5885 {
5886         struct mlxsw_sp_fib_event_work *fib_work =
5887                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5888         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5889         bool replace;
5890         int err;
5891
5892         rtnl_lock();
5893         mlxsw_sp_span_respin(mlxsw_sp);
5894
5895         switch (fib_work->event) {
5896         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5897         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5898         case FIB_EVENT_ENTRY_ADD:
5899                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5900                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5901                                                fib_work->fen6_info.rt, replace);
5902                 if (err)
5903                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5904                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5905                 break;
5906         case FIB_EVENT_ENTRY_DEL:
5907                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5908                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5909                 break;
5910         case FIB_EVENT_RULE_ADD:
5911                 /* if we get here, a rule was added that we do not support.
5912                  * just do the fib_abort
5913                  */
5914                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5915                 break;
5916         }
5917         rtnl_unlock();
5918         kfree(fib_work);
5919 }
5920
5921 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5922 {
5923         struct mlxsw_sp_fib_event_work *fib_work =
5924                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5925         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5926         bool replace;
5927         int err;
5928
5929         rtnl_lock();
5930         switch (fib_work->event) {
5931         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5932         case FIB_EVENT_ENTRY_ADD:
5933                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5934
5935                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5936                                                 replace);
5937                 if (err)
5938                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5939                 mr_cache_put(fib_work->men_info.mfc);
5940                 break;
5941         case FIB_EVENT_ENTRY_DEL:
5942                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5943                 mr_cache_put(fib_work->men_info.mfc);
5944                 break;
5945         case FIB_EVENT_VIF_ADD:
5946                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5947                                                     &fib_work->ven_info);
5948                 if (err)
5949                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5950                 dev_put(fib_work->ven_info.dev);
5951                 break;
5952         case FIB_EVENT_VIF_DEL:
5953                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5954                                               &fib_work->ven_info);
5955                 dev_put(fib_work->ven_info.dev);
5956                 break;
5957         case FIB_EVENT_RULE_ADD:
5958                 /* if we get here, a rule was added that we do not support.
5959                  * just do the fib_abort
5960                  */
5961                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5962                 break;
5963         }
5964         rtnl_unlock();
5965         kfree(fib_work);
5966 }
5967
5968 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5969                                        struct fib_notifier_info *info)
5970 {
5971         struct fib_entry_notifier_info *fen_info;
5972         struct fib_nh_notifier_info *fnh_info;
5973
5974         switch (fib_work->event) {
5975         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5976         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5977         case FIB_EVENT_ENTRY_ADD: /* fall through */
5978         case FIB_EVENT_ENTRY_DEL:
5979                 fen_info = container_of(info, struct fib_entry_notifier_info,
5980                                         info);
5981                 fib_work->fen_info = *fen_info;
5982                 /* Take reference on fib_info to prevent it from being
5983                  * freed while work is queued. Release it afterwards.
5984                  */
5985                 fib_info_hold(fib_work->fen_info.fi);
5986                 break;
5987         case FIB_EVENT_NH_ADD: /* fall through */
5988         case FIB_EVENT_NH_DEL:
5989                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5990                                         info);
5991                 fib_work->fnh_info = *fnh_info;
5992                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5993                 break;
5994         }
5995 }
5996
5997 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5998                                        struct fib_notifier_info *info)
5999 {
6000         struct fib6_entry_notifier_info *fen6_info;
6001
6002         switch (fib_work->event) {
6003         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6004         case FIB_EVENT_ENTRY_APPEND: /* fall through */
6005         case FIB_EVENT_ENTRY_ADD: /* fall through */
6006         case FIB_EVENT_ENTRY_DEL:
6007                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
6008                                          info);
6009                 fib_work->fen6_info = *fen6_info;
6010                 fib6_info_hold(fib_work->fen6_info.rt);
6011                 break;
6012         }
6013 }
6014
6015 static void
6016 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6017                             struct fib_notifier_info *info)
6018 {
6019         switch (fib_work->event) {
6020         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6021         case FIB_EVENT_ENTRY_ADD: /* fall through */
6022         case FIB_EVENT_ENTRY_DEL:
6023                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6024                 mr_cache_hold(fib_work->men_info.mfc);
6025                 break;
6026         case FIB_EVENT_VIF_ADD: /* fall through */
6027         case FIB_EVENT_VIF_DEL:
6028                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6029                 dev_hold(fib_work->ven_info.dev);
6030                 break;
6031         }
6032 }
6033
6034 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6035                                           struct fib_notifier_info *info,
6036                                           struct mlxsw_sp *mlxsw_sp)
6037 {
6038         struct netlink_ext_ack *extack = info->extack;
6039         struct fib_rule_notifier_info *fr_info;
6040         struct fib_rule *rule;
6041         int err = 0;
6042
6043         /* nothing to do at the moment */
6044         if (event == FIB_EVENT_RULE_DEL)
6045                 return 0;
6046
6047         if (mlxsw_sp->router->aborted)
6048                 return 0;
6049
6050         fr_info = container_of(info, struct fib_rule_notifier_info, info);
6051         rule = fr_info->rule;
6052
6053         /* Rule only affects locally generated traffic */
6054         if (rule->iifindex == info->net->loopback_dev->ifindex)
6055                 return 0;
6056
6057         switch (info->family) {
6058         case AF_INET:
6059                 if (!fib4_rule_default(rule) && !rule->l3mdev)
6060                         err = -EOPNOTSUPP;
6061                 break;
6062         case AF_INET6:
6063                 if (!fib6_rule_default(rule) && !rule->l3mdev)
6064                         err = -EOPNOTSUPP;
6065                 break;
6066         case RTNL_FAMILY_IPMR:
6067                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
6068                         err = -EOPNOTSUPP;
6069                 break;
6070         case RTNL_FAMILY_IP6MR:
6071                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6072                         err = -EOPNOTSUPP;
6073                 break;
6074         }
6075
6076         if (err < 0)
6077                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6078
6079         return err;
6080 }
6081
6082 /* Called with rcu_read_lock() */
6083 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6084                                      unsigned long event, void *ptr)
6085 {
6086         struct mlxsw_sp_fib_event_work *fib_work;
6087         struct fib_notifier_info *info = ptr;
6088         struct mlxsw_sp_router *router;
6089         int err;
6090
6091         if (!net_eq(info->net, &init_net) ||
6092             (info->family != AF_INET && info->family != AF_INET6 &&
6093              info->family != RTNL_FAMILY_IPMR &&
6094              info->family != RTNL_FAMILY_IP6MR))
6095                 return NOTIFY_DONE;
6096
6097         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6098
6099         switch (event) {
6100         case FIB_EVENT_RULE_ADD: /* fall through */
6101         case FIB_EVENT_RULE_DEL:
6102                 err = mlxsw_sp_router_fib_rule_event(event, info,
6103                                                      router->mlxsw_sp);
6104                 if (!err || info->extack)
6105                         return notifier_from_errno(err);
6106                 break;
6107         case FIB_EVENT_ENTRY_ADD:
6108         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6109         case FIB_EVENT_ENTRY_APPEND:  /* fall through */
6110                 if (router->aborted) {
6111                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6112                         return notifier_from_errno(-EINVAL);
6113                 }
6114                 if (info->family == AF_INET) {
6115                         struct fib_entry_notifier_info *fen_info = ptr;
6116
6117                         if (fen_info->fi->fib_nh_is_v6) {
6118                                 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6119                                 return notifier_from_errno(-EINVAL);
6120                         }
6121                 }
6122                 break;
6123         }
6124
6125         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6126         if (WARN_ON(!fib_work))
6127                 return NOTIFY_BAD;
6128
6129         fib_work->mlxsw_sp = router->mlxsw_sp;
6130         fib_work->event = event;
6131
6132         switch (info->family) {
6133         case AF_INET:
6134                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6135                 mlxsw_sp_router_fib4_event(fib_work, info);
6136                 break;
6137         case AF_INET6:
6138                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6139                 mlxsw_sp_router_fib6_event(fib_work, info);
6140                 break;
6141         case RTNL_FAMILY_IP6MR:
6142         case RTNL_FAMILY_IPMR:
6143                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6144                 mlxsw_sp_router_fibmr_event(fib_work, info);
6145                 break;
6146         }
6147
6148         mlxsw_core_schedule_work(&fib_work->work);
6149
6150         return NOTIFY_DONE;
6151 }
6152
6153 struct mlxsw_sp_rif *
6154 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6155                          const struct net_device *dev)
6156 {
6157         int i;
6158
6159         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6160                 if (mlxsw_sp->router->rifs[i] &&
6161                     mlxsw_sp->router->rifs[i]->dev == dev)
6162                         return mlxsw_sp->router->rifs[i];
6163
6164         return NULL;
6165 }
6166
6167 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6168 {
6169         char ritr_pl[MLXSW_REG_RITR_LEN];
6170         int err;
6171
6172         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6173         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6174         if (err)
6175                 return err;
6176
6177         mlxsw_reg_ritr_enable_set(ritr_pl, false);
6178         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6179 }
6180
6181 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6182                                           struct mlxsw_sp_rif *rif)
6183 {
6184         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6185         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6186         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6187 }
6188
6189 static bool
6190 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6191                            unsigned long event)
6192 {
6193         struct inet6_dev *inet6_dev;
6194         bool addr_list_empty = true;
6195         struct in_device *idev;
6196
6197         switch (event) {
6198         case NETDEV_UP:
6199                 return rif == NULL;
6200         case NETDEV_DOWN:
6201                 idev = __in_dev_get_rtnl(dev);
6202                 if (idev && idev->ifa_list)
6203                         addr_list_empty = false;
6204
6205                 inet6_dev = __in6_dev_get(dev);
6206                 if (addr_list_empty && inet6_dev &&
6207                     !list_empty(&inet6_dev->addr_list))
6208                         addr_list_empty = false;
6209
6210                 /* macvlans do not have a RIF, but rather piggy back on the
6211                  * RIF of their lower device.
6212                  */
6213                 if (netif_is_macvlan(dev) && addr_list_empty)
6214                         return true;
6215
6216                 if (rif && addr_list_empty &&
6217                     !netif_is_l3_slave(rif->dev))
6218                         return true;
6219                 /* It is possible we already removed the RIF ourselves
6220                  * if it was assigned to a netdev that is now a bridge
6221                  * or LAG slave.
6222                  */
6223                 return false;
6224         }
6225
6226         return false;
6227 }
6228
6229 static enum mlxsw_sp_rif_type
6230 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6231                       const struct net_device *dev)
6232 {
6233         enum mlxsw_sp_fid_type type;
6234
6235         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6236                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6237
6238         /* Otherwise RIF type is derived from the type of the underlying FID. */
6239         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6240                 type = MLXSW_SP_FID_TYPE_8021Q;
6241         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6242                 type = MLXSW_SP_FID_TYPE_8021Q;
6243         else if (netif_is_bridge_master(dev))
6244                 type = MLXSW_SP_FID_TYPE_8021D;
6245         else
6246                 type = MLXSW_SP_FID_TYPE_RFID;
6247
6248         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6249 }
6250
6251 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6252 {
6253         int i;
6254
6255         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6256                 if (!mlxsw_sp->router->rifs[i]) {
6257                         *p_rif_index = i;
6258                         return 0;
6259                 }
6260         }
6261
6262         return -ENOBUFS;
6263 }
6264
6265 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6266                                                u16 vr_id,
6267                                                struct net_device *l3_dev)
6268 {
6269         struct mlxsw_sp_rif *rif;
6270
6271         rif = kzalloc(rif_size, GFP_KERNEL);
6272         if (!rif)
6273                 return NULL;
6274
6275         INIT_LIST_HEAD(&rif->nexthop_list);
6276         INIT_LIST_HEAD(&rif->neigh_list);
6277         if (l3_dev) {
6278                 ether_addr_copy(rif->addr, l3_dev->dev_addr);
6279                 rif->mtu = l3_dev->mtu;
6280                 rif->dev = l3_dev;
6281         }
6282         rif->vr_id = vr_id;
6283         rif->rif_index = rif_index;
6284
6285         return rif;
6286 }
6287
6288 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6289                                            u16 rif_index)
6290 {
6291         return mlxsw_sp->router->rifs[rif_index];
6292 }
6293
6294 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6295 {
6296         return rif->rif_index;
6297 }
6298
6299 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6300 {
6301         return lb_rif->common.rif_index;
6302 }
6303
6304 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6305 {
6306         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6307         struct mlxsw_sp_vr *ul_vr;
6308
6309         ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6310         if (WARN_ON(IS_ERR(ul_vr)))
6311                 return 0;
6312
6313         return ul_vr->id;
6314 }
6315
6316 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6317 {
6318         return lb_rif->ul_rif_id;
6319 }
6320
6321 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6322 {
6323         return rif->dev->ifindex;
6324 }
6325
6326 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6327 {
6328         return rif->dev;
6329 }
6330
6331 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6332 {
6333         return rif->fid;
6334 }
6335
6336 static struct mlxsw_sp_rif *
6337 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6338                     const struct mlxsw_sp_rif_params *params,
6339                     struct netlink_ext_ack *extack)
6340 {
6341         u32 tb_id = l3mdev_fib_table(params->dev);
6342         const struct mlxsw_sp_rif_ops *ops;
6343         struct mlxsw_sp_fid *fid = NULL;
6344         enum mlxsw_sp_rif_type type;
6345         struct mlxsw_sp_rif *rif;
6346         struct mlxsw_sp_vr *vr;
6347         u16 rif_index;
6348         int i, err;
6349
6350         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6351         ops = mlxsw_sp->rif_ops_arr[type];
6352
6353         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6354         if (IS_ERR(vr))
6355                 return ERR_CAST(vr);
6356         vr->rif_count++;
6357
6358         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6359         if (err) {
6360                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6361                 goto err_rif_index_alloc;
6362         }
6363
6364         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6365         if (!rif) {
6366                 err = -ENOMEM;
6367                 goto err_rif_alloc;
6368         }
6369         dev_hold(rif->dev);
6370         mlxsw_sp->router->rifs[rif_index] = rif;
6371         rif->mlxsw_sp = mlxsw_sp;
6372         rif->ops = ops;
6373
6374         if (ops->fid_get) {
6375                 fid = ops->fid_get(rif, extack);
6376                 if (IS_ERR(fid)) {
6377                         err = PTR_ERR(fid);
6378                         goto err_fid_get;
6379                 }
6380                 rif->fid = fid;
6381         }
6382
6383         if (ops->setup)
6384                 ops->setup(rif, params);
6385
6386         err = ops->configure(rif);
6387         if (err)
6388                 goto err_configure;
6389
6390         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6391                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6392                 if (err)
6393                         goto err_mr_rif_add;
6394         }
6395
6396         mlxsw_sp_rif_counters_alloc(rif);
6397
6398         return rif;
6399
6400 err_mr_rif_add:
6401         for (i--; i >= 0; i--)
6402                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6403         ops->deconfigure(rif);
6404 err_configure:
6405         if (fid)
6406                 mlxsw_sp_fid_put(fid);
6407 err_fid_get:
6408         mlxsw_sp->router->rifs[rif_index] = NULL;
6409         dev_put(rif->dev);
6410         kfree(rif);
6411 err_rif_alloc:
6412 err_rif_index_alloc:
6413         vr->rif_count--;
6414         mlxsw_sp_vr_put(mlxsw_sp, vr);
6415         return ERR_PTR(err);
6416 }
6417
6418 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6419 {
6420         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6421         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6422         struct mlxsw_sp_fid *fid = rif->fid;
6423         struct mlxsw_sp_vr *vr;
6424         int i;
6425
6426         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6427         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6428
6429         mlxsw_sp_rif_counters_free(rif);
6430         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6431                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6432         ops->deconfigure(rif);
6433         if (fid)
6434                 /* Loopback RIFs are not associated with a FID. */
6435                 mlxsw_sp_fid_put(fid);
6436         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6437         dev_put(rif->dev);
6438         kfree(rif);
6439         vr->rif_count--;
6440         mlxsw_sp_vr_put(mlxsw_sp, vr);
6441 }
6442
6443 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6444                                  struct net_device *dev)
6445 {
6446         struct mlxsw_sp_rif *rif;
6447
6448         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6449         if (!rif)
6450                 return;
6451         mlxsw_sp_rif_destroy(rif);
6452 }
6453
6454 static void
6455 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6456                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6457 {
6458         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6459
6460         params->vid = mlxsw_sp_port_vlan->vid;
6461         params->lag = mlxsw_sp_port->lagged;
6462         if (params->lag)
6463                 params->lag_id = mlxsw_sp_port->lag_id;
6464         else
6465                 params->system_port = mlxsw_sp_port->local_port;
6466 }
6467
6468 static struct mlxsw_sp_rif_subport *
6469 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6470 {
6471         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6472 }
6473
6474 static struct mlxsw_sp_rif *
6475 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6476                          const struct mlxsw_sp_rif_params *params,
6477                          struct netlink_ext_ack *extack)
6478 {
6479         struct mlxsw_sp_rif_subport *rif_subport;
6480         struct mlxsw_sp_rif *rif;
6481
6482         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6483         if (!rif)
6484                 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6485
6486         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6487         refcount_inc(&rif_subport->ref_count);
6488         return rif;
6489 }
6490
6491 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6492 {
6493         struct mlxsw_sp_rif_subport *rif_subport;
6494
6495         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6496         if (!refcount_dec_and_test(&rif_subport->ref_count))
6497                 return;
6498
6499         mlxsw_sp_rif_destroy(rif);
6500 }
6501
6502 static int
6503 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6504                                struct net_device *l3_dev,
6505                                struct netlink_ext_ack *extack)
6506 {
6507         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6508         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6509         struct mlxsw_sp_rif_params params = {
6510                 .dev = l3_dev,
6511         };
6512         u16 vid = mlxsw_sp_port_vlan->vid;
6513         struct mlxsw_sp_rif *rif;
6514         struct mlxsw_sp_fid *fid;
6515         int err;
6516
6517         mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6518         rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6519         if (IS_ERR(rif))
6520                 return PTR_ERR(rif);
6521
6522         /* FID was already created, just take a reference */
6523         fid = rif->ops->fid_get(rif, extack);
6524         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6525         if (err)
6526                 goto err_fid_port_vid_map;
6527
6528         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6529         if (err)
6530                 goto err_port_vid_learning_set;
6531
6532         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6533                                         BR_STATE_FORWARDING);
6534         if (err)
6535                 goto err_port_vid_stp_set;
6536
6537         mlxsw_sp_port_vlan->fid = fid;
6538
6539         return 0;
6540
6541 err_port_vid_stp_set:
6542         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6543 err_port_vid_learning_set:
6544         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6545 err_fid_port_vid_map:
6546         mlxsw_sp_fid_put(fid);
6547         mlxsw_sp_rif_subport_put(rif);
6548         return err;
6549 }
6550
6551 void
6552 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6553 {
6554         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6555         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6556         struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6557         u16 vid = mlxsw_sp_port_vlan->vid;
6558
6559         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6560                 return;
6561
6562         mlxsw_sp_port_vlan->fid = NULL;
6563         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6564         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6565         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6566         mlxsw_sp_fid_put(fid);
6567         mlxsw_sp_rif_subport_put(rif);
6568 }
6569
6570 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6571                                              struct net_device *port_dev,
6572                                              unsigned long event, u16 vid,
6573                                              struct netlink_ext_ack *extack)
6574 {
6575         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6576         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6577
6578         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6579         if (WARN_ON(!mlxsw_sp_port_vlan))
6580                 return -EINVAL;
6581
6582         switch (event) {
6583         case NETDEV_UP:
6584                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6585                                                       l3_dev, extack);
6586         case NETDEV_DOWN:
6587                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6588                 break;
6589         }
6590
6591         return 0;
6592 }
6593
6594 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6595                                         unsigned long event,
6596                                         struct netlink_ext_ack *extack)
6597 {
6598         if (netif_is_bridge_port(port_dev) ||
6599             netif_is_lag_port(port_dev) ||
6600             netif_is_ovs_port(port_dev))
6601                 return 0;
6602
6603         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6604                                                  MLXSW_SP_DEFAULT_VID, extack);
6605 }
6606
6607 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6608                                          struct net_device *lag_dev,
6609                                          unsigned long event, u16 vid,
6610                                          struct netlink_ext_ack *extack)
6611 {
6612         struct net_device *port_dev;
6613         struct list_head *iter;
6614         int err;
6615
6616         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6617                 if (mlxsw_sp_port_dev_check(port_dev)) {
6618                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6619                                                                 port_dev,
6620                                                                 event, vid,
6621                                                                 extack);
6622                         if (err)
6623                                 return err;
6624                 }
6625         }
6626
6627         return 0;
6628 }
6629
6630 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6631                                        unsigned long event,
6632                                        struct netlink_ext_ack *extack)
6633 {
6634         if (netif_is_bridge_port(lag_dev))
6635                 return 0;
6636
6637         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6638                                              MLXSW_SP_DEFAULT_VID, extack);
6639 }
6640
6641 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6642                                           struct net_device *l3_dev,
6643                                           unsigned long event,
6644                                           struct netlink_ext_ack *extack)
6645 {
6646         struct mlxsw_sp_rif_params params = {
6647                 .dev = l3_dev,
6648         };
6649         struct mlxsw_sp_rif *rif;
6650
6651         switch (event) {
6652         case NETDEV_UP:
6653                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6654                 if (IS_ERR(rif))
6655                         return PTR_ERR(rif);
6656                 break;
6657         case NETDEV_DOWN:
6658                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6659                 mlxsw_sp_rif_destroy(rif);
6660                 break;
6661         }
6662
6663         return 0;
6664 }
6665
6666 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6667                                         struct net_device *vlan_dev,
6668                                         unsigned long event,
6669                                         struct netlink_ext_ack *extack)
6670 {
6671         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6672         u16 vid = vlan_dev_vlan_id(vlan_dev);
6673
6674         if (netif_is_bridge_port(vlan_dev))
6675                 return 0;
6676
6677         if (mlxsw_sp_port_dev_check(real_dev))
6678                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6679                                                          event, vid, extack);
6680         else if (netif_is_lag_master(real_dev))
6681                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6682                                                      vid, extack);
6683         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6684                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6685                                                       extack);
6686
6687         return 0;
6688 }
6689
6690 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6691 {
6692         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6693         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6694
6695         return ether_addr_equal_masked(mac, vrrp4, mask);
6696 }
6697
6698 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6699 {
6700         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6701         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6702
6703         return ether_addr_equal_masked(mac, vrrp6, mask);
6704 }
6705
6706 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6707                                 const u8 *mac, bool adding)
6708 {
6709         char ritr_pl[MLXSW_REG_RITR_LEN];
6710         u8 vrrp_id = adding ? mac[5] : 0;
6711         int err;
6712
6713         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6714             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6715                 return 0;
6716
6717         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6718         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6719         if (err)
6720                 return err;
6721
6722         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6723                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6724         else
6725                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6726
6727         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6728 }
6729
6730 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6731                                     const struct net_device *macvlan_dev,
6732                                     struct netlink_ext_ack *extack)
6733 {
6734         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6735         struct mlxsw_sp_rif *rif;
6736         int err;
6737
6738         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6739         if (!rif) {
6740                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6741                 return -EOPNOTSUPP;
6742         }
6743
6744         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6745                                   mlxsw_sp_fid_index(rif->fid), true);
6746         if (err)
6747                 return err;
6748
6749         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6750                                    macvlan_dev->dev_addr, true);
6751         if (err)
6752                 goto err_rif_vrrp_add;
6753
6754         /* Make sure the bridge driver does not have this MAC pointing at
6755          * some other port.
6756          */
6757         if (rif->ops->fdb_del)
6758                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6759
6760         return 0;
6761
6762 err_rif_vrrp_add:
6763         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6764                             mlxsw_sp_fid_index(rif->fid), false);
6765         return err;
6766 }
6767
6768 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6769                               const struct net_device *macvlan_dev)
6770 {
6771         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6772         struct mlxsw_sp_rif *rif;
6773
6774         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6775         /* If we do not have a RIF, then we already took care of
6776          * removing the macvlan's MAC during RIF deletion.
6777          */
6778         if (!rif)
6779                 return;
6780         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6781                              false);
6782         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6783                             mlxsw_sp_fid_index(rif->fid), false);
6784 }
6785
6786 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6787                                            struct net_device *macvlan_dev,
6788                                            unsigned long event,
6789                                            struct netlink_ext_ack *extack)
6790 {
6791         switch (event) {
6792         case NETDEV_UP:
6793                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6794         case NETDEV_DOWN:
6795                 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6796                 break;
6797         }
6798
6799         return 0;
6800 }
6801
6802 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6803                                                struct net_device *dev,
6804                                                const unsigned char *dev_addr,
6805                                                struct netlink_ext_ack *extack)
6806 {
6807         struct mlxsw_sp_rif *rif;
6808         int i;
6809
6810         /* A RIF is not created for macvlan netdevs. Their MAC is used to
6811          * populate the FDB
6812          */
6813         if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6814                 return 0;
6815
6816         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6817                 rif = mlxsw_sp->router->rifs[i];
6818                 if (rif && rif->dev && rif->dev != dev &&
6819                     !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6820                                              mlxsw_sp->mac_mask)) {
6821                         NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6822                         return -EINVAL;
6823                 }
6824         }
6825
6826         return 0;
6827 }
6828
6829 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6830                                      struct net_device *dev,
6831                                      unsigned long event,
6832                                      struct netlink_ext_ack *extack)
6833 {
6834         if (mlxsw_sp_port_dev_check(dev))
6835                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6836         else if (netif_is_lag_master(dev))
6837                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6838         else if (netif_is_bridge_master(dev))
6839                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6840                                                       extack);
6841         else if (is_vlan_dev(dev))
6842                 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6843                                                     extack);
6844         else if (netif_is_macvlan(dev))
6845                 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6846                                                        extack);
6847         else
6848                 return 0;
6849 }
6850
6851 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6852                                    unsigned long event, void *ptr)
6853 {
6854         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6855         struct net_device *dev = ifa->ifa_dev->dev;
6856         struct mlxsw_sp_router *router;
6857         struct mlxsw_sp_rif *rif;
6858         int err = 0;
6859
6860         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6861         if (event == NETDEV_UP)
6862                 goto out;
6863
6864         router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6865         rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6866         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6867                 goto out;
6868
6869         err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6870 out:
6871         return notifier_from_errno(err);
6872 }
6873
6874 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6875                                   unsigned long event, void *ptr)
6876 {
6877         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6878         struct net_device *dev = ivi->ivi_dev->dev;
6879         struct mlxsw_sp *mlxsw_sp;
6880         struct mlxsw_sp_rif *rif;
6881         int err = 0;
6882
6883         mlxsw_sp = mlxsw_sp_lower_get(dev);
6884         if (!mlxsw_sp)
6885                 goto out;
6886
6887         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6888         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6889                 goto out;
6890
6891         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6892                                                   ivi->extack);
6893         if (err)
6894                 goto out;
6895
6896         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6897 out:
6898         return notifier_from_errno(err);
6899 }
6900
6901 struct mlxsw_sp_inet6addr_event_work {
6902         struct work_struct work;
6903         struct mlxsw_sp *mlxsw_sp;
6904         struct net_device *dev;
6905         unsigned long event;
6906 };
6907
6908 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6909 {
6910         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6911                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6912         struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6913         struct net_device *dev = inet6addr_work->dev;
6914         unsigned long event = inet6addr_work->event;
6915         struct mlxsw_sp_rif *rif;
6916
6917         rtnl_lock();
6918
6919         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6920         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6921                 goto out;
6922
6923         __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
6924 out:
6925         rtnl_unlock();
6926         dev_put(dev);
6927         kfree(inet6addr_work);
6928 }
6929
6930 /* Called with rcu_read_lock() */
6931 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
6932                                     unsigned long event, void *ptr)
6933 {
6934         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6935         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6936         struct net_device *dev = if6->idev->dev;
6937         struct mlxsw_sp_router *router;
6938
6939         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6940         if (event == NETDEV_UP)
6941                 return NOTIFY_DONE;
6942
6943         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6944         if (!inet6addr_work)
6945                 return NOTIFY_BAD;
6946
6947         router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
6948         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6949         inet6addr_work->mlxsw_sp = router->mlxsw_sp;
6950         inet6addr_work->dev = dev;
6951         inet6addr_work->event = event;
6952         dev_hold(dev);
6953         mlxsw_core_schedule_work(&inet6addr_work->work);
6954
6955         return NOTIFY_DONE;
6956 }
6957
6958 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6959                                    unsigned long event, void *ptr)
6960 {
6961         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6962         struct net_device *dev = i6vi->i6vi_dev->dev;
6963         struct mlxsw_sp *mlxsw_sp;
6964         struct mlxsw_sp_rif *rif;
6965         int err = 0;
6966
6967         mlxsw_sp = mlxsw_sp_lower_get(dev);
6968         if (!mlxsw_sp)
6969                 goto out;
6970
6971         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6972         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6973                 goto out;
6974
6975         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6976                                                   i6vi->extack);
6977         if (err)
6978                 goto out;
6979
6980         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
6981 out:
6982         return notifier_from_errno(err);
6983 }
6984
6985 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6986                              const char *mac, int mtu)
6987 {
6988         char ritr_pl[MLXSW_REG_RITR_LEN];
6989         int err;
6990
6991         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6992         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6993         if (err)
6994                 return err;
6995
6996         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6997         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6998         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6999         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7000 }
7001
7002 static int
7003 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7004                                   struct mlxsw_sp_rif *rif)
7005 {
7006         struct net_device *dev = rif->dev;
7007         u16 fid_index;
7008         int err;
7009
7010         fid_index = mlxsw_sp_fid_index(rif->fid);
7011
7012         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7013         if (err)
7014                 return err;
7015
7016         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7017                                 dev->mtu);
7018         if (err)
7019                 goto err_rif_edit;
7020
7021         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7022         if (err)
7023                 goto err_rif_fdb_op;
7024
7025         if (rif->mtu != dev->mtu) {
7026                 struct mlxsw_sp_vr *vr;
7027                 int i;
7028
7029                 /* The RIF is relevant only to its mr_table instance, as unlike
7030                  * unicast routing, in multicast routing a RIF cannot be shared
7031                  * between several multicast routing tables.
7032                  */
7033                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
7034                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7035                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7036                                                    rif, dev->mtu);
7037         }
7038
7039         ether_addr_copy(rif->addr, dev->dev_addr);
7040         rif->mtu = dev->mtu;
7041
7042         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7043
7044         return 0;
7045
7046 err_rif_fdb_op:
7047         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7048 err_rif_edit:
7049         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7050         return err;
7051 }
7052
7053 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7054                             struct netdev_notifier_pre_changeaddr_info *info)
7055 {
7056         struct netlink_ext_ack *extack;
7057
7058         extack = netdev_notifier_info_to_extack(&info->info);
7059         return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7060                                                    info->dev_addr, extack);
7061 }
7062
7063 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7064                                          unsigned long event, void *ptr)
7065 {
7066         struct mlxsw_sp *mlxsw_sp;
7067         struct mlxsw_sp_rif *rif;
7068
7069         mlxsw_sp = mlxsw_sp_lower_get(dev);
7070         if (!mlxsw_sp)
7071                 return 0;
7072
7073         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7074         if (!rif)
7075                 return 0;
7076
7077         switch (event) {
7078         case NETDEV_CHANGEMTU: /* fall through */
7079         case NETDEV_CHANGEADDR:
7080                 return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7081         case NETDEV_PRE_CHANGEADDR:
7082                 return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7083         }
7084
7085         return 0;
7086 }
7087
7088 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7089                                   struct net_device *l3_dev,
7090                                   struct netlink_ext_ack *extack)
7091 {
7092         struct mlxsw_sp_rif *rif;
7093
7094         /* If netdev is already associated with a RIF, then we need to
7095          * destroy it and create a new one with the new virtual router ID.
7096          */
7097         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7098         if (rif)
7099                 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7100                                           extack);
7101
7102         return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7103 }
7104
7105 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7106                                     struct net_device *l3_dev)
7107 {
7108         struct mlxsw_sp_rif *rif;
7109
7110         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7111         if (!rif)
7112                 return;
7113         __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7114 }
7115
7116 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7117                                  struct netdev_notifier_changeupper_info *info)
7118 {
7119         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7120         int err = 0;
7121
7122         /* We do not create a RIF for a macvlan, but only use it to
7123          * direct more MAC addresses to the router.
7124          */
7125         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7126                 return 0;
7127
7128         switch (event) {
7129         case NETDEV_PRECHANGEUPPER:
7130                 return 0;
7131         case NETDEV_CHANGEUPPER:
7132                 if (info->linking) {
7133                         struct netlink_ext_ack *extack;
7134
7135                         extack = netdev_notifier_info_to_extack(&info->info);
7136                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7137                 } else {
7138                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7139                 }
7140                 break;
7141         }
7142
7143         return err;
7144 }
7145
7146 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7147 {
7148         struct mlxsw_sp_rif *rif = data;
7149
7150         if (!netif_is_macvlan(dev))
7151                 return 0;
7152
7153         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7154                                    mlxsw_sp_fid_index(rif->fid), false);
7155 }
7156
7157 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7158 {
7159         if (!netif_is_macvlan_port(rif->dev))
7160                 return 0;
7161
7162         netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7163         return netdev_walk_all_upper_dev_rcu(rif->dev,
7164                                              __mlxsw_sp_rif_macvlan_flush, rif);
7165 }
7166
7167 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7168                                        const struct mlxsw_sp_rif_params *params)
7169 {
7170         struct mlxsw_sp_rif_subport *rif_subport;
7171
7172         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7173         refcount_set(&rif_subport->ref_count, 1);
7174         rif_subport->vid = params->vid;
7175         rif_subport->lag = params->lag;
7176         if (params->lag)
7177                 rif_subport->lag_id = params->lag_id;
7178         else
7179                 rif_subport->system_port = params->system_port;
7180 }
7181
7182 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7183 {
7184         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7185         struct mlxsw_sp_rif_subport *rif_subport;
7186         char ritr_pl[MLXSW_REG_RITR_LEN];
7187
7188         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7189         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7190                             rif->rif_index, rif->vr_id, rif->dev->mtu);
7191         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7192         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7193                                   rif_subport->lag ? rif_subport->lag_id :
7194                                                      rif_subport->system_port,
7195                                   rif_subport->vid);
7196
7197         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7198 }
7199
7200 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7201 {
7202         int err;
7203
7204         err = mlxsw_sp_rif_subport_op(rif, true);
7205         if (err)
7206                 return err;
7207
7208         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7209                                   mlxsw_sp_fid_index(rif->fid), true);
7210         if (err)
7211                 goto err_rif_fdb_op;
7212
7213         mlxsw_sp_fid_rif_set(rif->fid, rif);
7214         return 0;
7215
7216 err_rif_fdb_op:
7217         mlxsw_sp_rif_subport_op(rif, false);
7218         return err;
7219 }
7220
7221 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7222 {
7223         struct mlxsw_sp_fid *fid = rif->fid;
7224
7225         mlxsw_sp_fid_rif_set(fid, NULL);
7226         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7227                             mlxsw_sp_fid_index(fid), false);
7228         mlxsw_sp_rif_macvlan_flush(rif);
7229         mlxsw_sp_rif_subport_op(rif, false);
7230 }
7231
7232 static struct mlxsw_sp_fid *
7233 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7234                              struct netlink_ext_ack *extack)
7235 {
7236         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7237 }
7238
7239 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7240         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
7241         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
7242         .setup                  = mlxsw_sp_rif_subport_setup,
7243         .configure              = mlxsw_sp_rif_subport_configure,
7244         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
7245         .fid_get                = mlxsw_sp_rif_subport_fid_get,
7246 };
7247
7248 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7249                                     enum mlxsw_reg_ritr_if_type type,
7250                                     u16 vid_fid, bool enable)
7251 {
7252         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7253         char ritr_pl[MLXSW_REG_RITR_LEN];
7254
7255         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7256                             rif->dev->mtu);
7257         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7258         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7259
7260         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7261 }
7262
7263 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7264 {
7265         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7266 }
7267
7268 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7269 {
7270         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7271         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7272         int err;
7273
7274         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7275         if (err)
7276                 return err;
7277
7278         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7279                                      mlxsw_sp_router_port(mlxsw_sp), true);
7280         if (err)
7281                 goto err_fid_mc_flood_set;
7282
7283         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7284                                      mlxsw_sp_router_port(mlxsw_sp), true);
7285         if (err)
7286                 goto err_fid_bc_flood_set;
7287
7288         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7289                                   mlxsw_sp_fid_index(rif->fid), true);
7290         if (err)
7291                 goto err_rif_fdb_op;
7292
7293         mlxsw_sp_fid_rif_set(rif->fid, rif);
7294         return 0;
7295
7296 err_rif_fdb_op:
7297         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7298                                mlxsw_sp_router_port(mlxsw_sp), false);
7299 err_fid_bc_flood_set:
7300         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7301                                mlxsw_sp_router_port(mlxsw_sp), false);
7302 err_fid_mc_flood_set:
7303         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7304         return err;
7305 }
7306
7307 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7308 {
7309         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7310         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7311         struct mlxsw_sp_fid *fid = rif->fid;
7312
7313         mlxsw_sp_fid_rif_set(fid, NULL);
7314         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7315                             mlxsw_sp_fid_index(fid), false);
7316         mlxsw_sp_rif_macvlan_flush(rif);
7317         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7318                                mlxsw_sp_router_port(mlxsw_sp), false);
7319         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7320                                mlxsw_sp_router_port(mlxsw_sp), false);
7321         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7322 }
7323
7324 static struct mlxsw_sp_fid *
7325 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7326                           struct netlink_ext_ack *extack)
7327 {
7328         struct net_device *br_dev = rif->dev;
7329         u16 vid;
7330         int err;
7331
7332         if (is_vlan_dev(rif->dev)) {
7333                 vid = vlan_dev_vlan_id(rif->dev);
7334                 br_dev = vlan_dev_real_dev(rif->dev);
7335                 if (WARN_ON(!netif_is_bridge_master(br_dev)))
7336                         return ERR_PTR(-EINVAL);
7337         } else {
7338                 err = br_vlan_get_pvid(rif->dev, &vid);
7339                 if (err < 0 || !vid) {
7340                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7341                         return ERR_PTR(-EINVAL);
7342                 }
7343         }
7344
7345         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7346 }
7347
7348 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7349 {
7350         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7351         struct switchdev_notifier_fdb_info info;
7352         struct net_device *br_dev;
7353         struct net_device *dev;
7354
7355         br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7356         dev = br_fdb_find_port(br_dev, mac, vid);
7357         if (!dev)
7358                 return;
7359
7360         info.addr = mac;
7361         info.vid = vid;
7362         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7363                                  NULL);
7364 }
7365
7366 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7367         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7368         .rif_size               = sizeof(struct mlxsw_sp_rif),
7369         .configure              = mlxsw_sp_rif_vlan_configure,
7370         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7371         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7372         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7373 };
7374
7375 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7376 {
7377         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7378         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7379         int err;
7380
7381         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7382                                        true);
7383         if (err)
7384                 return err;
7385
7386         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7387                                      mlxsw_sp_router_port(mlxsw_sp), true);
7388         if (err)
7389                 goto err_fid_mc_flood_set;
7390
7391         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7392                                      mlxsw_sp_router_port(mlxsw_sp), true);
7393         if (err)
7394                 goto err_fid_bc_flood_set;
7395
7396         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7397                                   mlxsw_sp_fid_index(rif->fid), true);
7398         if (err)
7399                 goto err_rif_fdb_op;
7400
7401         mlxsw_sp_fid_rif_set(rif->fid, rif);
7402         return 0;
7403
7404 err_rif_fdb_op:
7405         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7406                                mlxsw_sp_router_port(mlxsw_sp), false);
7407 err_fid_bc_flood_set:
7408         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7409                                mlxsw_sp_router_port(mlxsw_sp), false);
7410 err_fid_mc_flood_set:
7411         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7412         return err;
7413 }
7414
7415 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7416 {
7417         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7418         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7419         struct mlxsw_sp_fid *fid = rif->fid;
7420
7421         mlxsw_sp_fid_rif_set(fid, NULL);
7422         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7423                             mlxsw_sp_fid_index(fid), false);
7424         mlxsw_sp_rif_macvlan_flush(rif);
7425         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7426                                mlxsw_sp_router_port(mlxsw_sp), false);
7427         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7428                                mlxsw_sp_router_port(mlxsw_sp), false);
7429         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7430 }
7431
7432 static struct mlxsw_sp_fid *
7433 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7434                          struct netlink_ext_ack *extack)
7435 {
7436         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7437 }
7438
7439 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7440 {
7441         struct switchdev_notifier_fdb_info info;
7442         struct net_device *dev;
7443
7444         dev = br_fdb_find_port(rif->dev, mac, 0);
7445         if (!dev)
7446                 return;
7447
7448         info.addr = mac;
7449         info.vid = 0;
7450         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7451                                  NULL);
7452 }
7453
7454 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7455         .type                   = MLXSW_SP_RIF_TYPE_FID,
7456         .rif_size               = sizeof(struct mlxsw_sp_rif),
7457         .configure              = mlxsw_sp_rif_fid_configure,
7458         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7459         .fid_get                = mlxsw_sp_rif_fid_fid_get,
7460         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7461 };
7462
7463 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7464         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7465         .rif_size               = sizeof(struct mlxsw_sp_rif),
7466         .configure              = mlxsw_sp_rif_fid_configure,
7467         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7468         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7469         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7470 };
7471
7472 static struct mlxsw_sp_rif_ipip_lb *
7473 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7474 {
7475         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7476 }
7477
7478 static void
7479 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7480                            const struct mlxsw_sp_rif_params *params)
7481 {
7482         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7483         struct mlxsw_sp_rif_ipip_lb *rif_lb;
7484
7485         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7486                                  common);
7487         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7488         rif_lb->lb_config = params_lb->lb_config;
7489 }
7490
7491 static int
7492 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7493 {
7494         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7495         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7496         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7497         struct mlxsw_sp_vr *ul_vr;
7498         int err;
7499
7500         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7501         if (IS_ERR(ul_vr))
7502                 return PTR_ERR(ul_vr);
7503
7504         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7505         if (err)
7506                 goto err_loopback_op;
7507
7508         lb_rif->ul_vr_id = ul_vr->id;
7509         lb_rif->ul_rif_id = 0;
7510         ++ul_vr->rif_count;
7511         return 0;
7512
7513 err_loopback_op:
7514         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7515         return err;
7516 }
7517
7518 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7519 {
7520         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7521         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7522         struct mlxsw_sp_vr *ul_vr;
7523
7524         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7525         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7526
7527         --ul_vr->rif_count;
7528         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7529 }
7530
7531 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7532         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7533         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7534         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7535         .configure              = mlxsw_sp1_rif_ipip_lb_configure,
7536         .deconfigure            = mlxsw_sp1_rif_ipip_lb_deconfigure,
7537 };
7538
7539 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7540         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7541         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7542         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7543         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp1_rif_ipip_lb_ops,
7544 };
7545
7546 static int
7547 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7548 {
7549         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7550         char ritr_pl[MLXSW_REG_RITR_LEN];
7551
7552         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7553                             ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7554         mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7555                                              MLXSW_REG_RITR_LOOPBACK_GENERIC);
7556
7557         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7558 }
7559
7560 static struct mlxsw_sp_rif *
7561 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7562                        struct netlink_ext_ack *extack)
7563 {
7564         struct mlxsw_sp_rif *ul_rif;
7565         u16 rif_index;
7566         int err;
7567
7568         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7569         if (err) {
7570                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7571                 return ERR_PTR(err);
7572         }
7573
7574         ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7575         if (!ul_rif)
7576                 return ERR_PTR(-ENOMEM);
7577
7578         mlxsw_sp->router->rifs[rif_index] = ul_rif;
7579         ul_rif->mlxsw_sp = mlxsw_sp;
7580         err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7581         if (err)
7582                 goto ul_rif_op_err;
7583
7584         return ul_rif;
7585
7586 ul_rif_op_err:
7587         mlxsw_sp->router->rifs[rif_index] = NULL;
7588         kfree(ul_rif);
7589         return ERR_PTR(err);
7590 }
7591
7592 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7593 {
7594         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7595
7596         mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7597         mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7598         kfree(ul_rif);
7599 }
7600
7601 static struct mlxsw_sp_rif *
7602 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7603                     struct netlink_ext_ack *extack)
7604 {
7605         struct mlxsw_sp_vr *vr;
7606         int err;
7607
7608         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7609         if (IS_ERR(vr))
7610                 return ERR_CAST(vr);
7611
7612         if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7613                 return vr->ul_rif;
7614
7615         vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7616         if (IS_ERR(vr->ul_rif)) {
7617                 err = PTR_ERR(vr->ul_rif);
7618                 goto err_ul_rif_create;
7619         }
7620
7621         vr->rif_count++;
7622         refcount_set(&vr->ul_rif_refcnt, 1);
7623
7624         return vr->ul_rif;
7625
7626 err_ul_rif_create:
7627         mlxsw_sp_vr_put(mlxsw_sp, vr);
7628         return ERR_PTR(err);
7629 }
7630
7631 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7632 {
7633         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7634         struct mlxsw_sp_vr *vr;
7635
7636         vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7637
7638         if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7639                 return;
7640
7641         vr->rif_count--;
7642         mlxsw_sp_ul_rif_destroy(ul_rif);
7643         mlxsw_sp_vr_put(mlxsw_sp, vr);
7644 }
7645
7646 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7647                                u16 *ul_rif_index)
7648 {
7649         struct mlxsw_sp_rif *ul_rif;
7650
7651         ASSERT_RTNL();
7652
7653         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7654         if (IS_ERR(ul_rif))
7655                 return PTR_ERR(ul_rif);
7656         *ul_rif_index = ul_rif->rif_index;
7657
7658         return 0;
7659 }
7660
7661 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7662 {
7663         struct mlxsw_sp_rif *ul_rif;
7664
7665         ASSERT_RTNL();
7666
7667         ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7668         if (WARN_ON(!ul_rif))
7669                 return;
7670
7671         mlxsw_sp_ul_rif_put(ul_rif);
7672 }
7673
7674 static int
7675 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7676 {
7677         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7678         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7679         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7680         struct mlxsw_sp_rif *ul_rif;
7681         int err;
7682
7683         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7684         if (IS_ERR(ul_rif))
7685                 return PTR_ERR(ul_rif);
7686
7687         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7688         if (err)
7689                 goto err_loopback_op;
7690
7691         lb_rif->ul_vr_id = 0;
7692         lb_rif->ul_rif_id = ul_rif->rif_index;
7693
7694         return 0;
7695
7696 err_loopback_op:
7697         mlxsw_sp_ul_rif_put(ul_rif);
7698         return err;
7699 }
7700
7701 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7702 {
7703         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7704         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7705         struct mlxsw_sp_rif *ul_rif;
7706
7707         ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7708         mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7709         mlxsw_sp_ul_rif_put(ul_rif);
7710 }
7711
7712 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7713         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7714         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7715         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7716         .configure              = mlxsw_sp2_rif_ipip_lb_configure,
7717         .deconfigure            = mlxsw_sp2_rif_ipip_lb_deconfigure,
7718 };
7719
7720 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7721         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7722         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7723         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7724         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp2_rif_ipip_lb_ops,
7725 };
7726
7727 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7728 {
7729         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7730
7731         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7732                                          sizeof(struct mlxsw_sp_rif *),
7733                                          GFP_KERNEL);
7734         if (!mlxsw_sp->router->rifs)
7735                 return -ENOMEM;
7736
7737         return 0;
7738 }
7739
7740 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7741 {
7742         int i;
7743
7744         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7745                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7746
7747         kfree(mlxsw_sp->router->rifs);
7748 }
7749
7750 static int
7751 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7752 {
7753         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7754
7755         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7756         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7757 }
7758
7759 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7760 {
7761         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7762         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7763         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7764 }
7765
7766 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7767 {
7768         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7769 }
7770
7771 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7772 {
7773         struct mlxsw_sp_router *router;
7774
7775         /* Flush pending FIB notifications and then flush the device's
7776          * table before requesting another dump. The FIB notification
7777          * block is unregistered, so no need to take RTNL.
7778          */
7779         mlxsw_core_flush_owq();
7780         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7781         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7782 }
7783
7784 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7785 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7786 {
7787         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7788 }
7789
7790 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7791 {
7792         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7793 }
7794
7795 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7796 {
7797         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7798
7799         mlxsw_sp_mp_hash_header_set(recr2_pl,
7800                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7801         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7802         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7803         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7804         if (only_l3)
7805                 return;
7806         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7807         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7808         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7809         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7810 }
7811
7812 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7813 {
7814         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7815
7816         mlxsw_sp_mp_hash_header_set(recr2_pl,
7817                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7818         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7819         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7820         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7821         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7822         if (only_l3) {
7823                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7824                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7825         } else {
7826                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7827                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7828                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7829                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7830                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7831                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7832         }
7833 }
7834
7835 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7836 {
7837         char recr2_pl[MLXSW_REG_RECR2_LEN];
7838         u32 seed;
7839
7840         seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
7841         mlxsw_reg_recr2_pack(recr2_pl, seed);
7842         mlxsw_sp_mp4_hash_init(recr2_pl);
7843         mlxsw_sp_mp6_hash_init(recr2_pl);
7844
7845         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7846 }
7847 #else
7848 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7849 {
7850         return 0;
7851 }
7852 #endif
7853
7854 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7855 {
7856         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7857         unsigned int i;
7858
7859         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7860
7861         /* HW is determining switch priority based on DSCP-bits, but the
7862          * kernel is still doing that based on the ToS. Since there's a
7863          * mismatch in bits we need to make sure to translate the right
7864          * value ToS would observe, skipping the 2 least-significant ECN bits.
7865          */
7866         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7867                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7868
7869         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7870 }
7871
7872 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7873 {
7874         bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7875         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7876         u64 max_rifs;
7877         int err;
7878
7879         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7880                 return -EIO;
7881         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7882
7883         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7884         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7885         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7886         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7887         if (err)
7888                 return err;
7889         return 0;
7890 }
7891
7892 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7893 {
7894         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7895
7896         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7897         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7898 }
7899
7900 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7901 {
7902         struct mlxsw_sp_router *router;
7903         int err;
7904
7905         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7906         if (!router)
7907                 return -ENOMEM;
7908         mlxsw_sp->router = router;
7909         router->mlxsw_sp = mlxsw_sp;
7910
7911         router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
7912         err = register_inetaddr_notifier(&router->inetaddr_nb);
7913         if (err)
7914                 goto err_register_inetaddr_notifier;
7915
7916         router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
7917         err = register_inet6addr_notifier(&router->inet6addr_nb);
7918         if (err)
7919                 goto err_register_inet6addr_notifier;
7920
7921         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7922         err = __mlxsw_sp_router_init(mlxsw_sp);
7923         if (err)
7924                 goto err_router_init;
7925
7926         err = mlxsw_sp_rifs_init(mlxsw_sp);
7927         if (err)
7928                 goto err_rifs_init;
7929
7930         err = mlxsw_sp_ipips_init(mlxsw_sp);
7931         if (err)
7932                 goto err_ipips_init;
7933
7934         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7935                               &mlxsw_sp_nexthop_ht_params);
7936         if (err)
7937                 goto err_nexthop_ht_init;
7938
7939         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7940                               &mlxsw_sp_nexthop_group_ht_params);
7941         if (err)
7942                 goto err_nexthop_group_ht_init;
7943
7944         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7945         err = mlxsw_sp_lpm_init(mlxsw_sp);
7946         if (err)
7947                 goto err_lpm_init;
7948
7949         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7950         if (err)
7951                 goto err_mr_init;
7952
7953         err = mlxsw_sp_vrs_init(mlxsw_sp);
7954         if (err)
7955                 goto err_vrs_init;
7956
7957         err = mlxsw_sp_neigh_init(mlxsw_sp);
7958         if (err)
7959                 goto err_neigh_init;
7960
7961         mlxsw_sp->router->netevent_nb.notifier_call =
7962                 mlxsw_sp_router_netevent_event;
7963         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7964         if (err)
7965                 goto err_register_netevent_notifier;
7966
7967         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7968         if (err)
7969                 goto err_mp_hash_init;
7970
7971         err = mlxsw_sp_dscp_init(mlxsw_sp);
7972         if (err)
7973                 goto err_dscp_init;
7974
7975         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7976         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7977                                     mlxsw_sp_router_fib_dump_flush);
7978         if (err)
7979                 goto err_register_fib_notifier;
7980
7981         return 0;
7982
7983 err_register_fib_notifier:
7984 err_dscp_init:
7985 err_mp_hash_init:
7986         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7987 err_register_netevent_notifier:
7988         mlxsw_sp_neigh_fini(mlxsw_sp);
7989 err_neigh_init:
7990         mlxsw_sp_vrs_fini(mlxsw_sp);
7991 err_vrs_init:
7992         mlxsw_sp_mr_fini(mlxsw_sp);
7993 err_mr_init:
7994         mlxsw_sp_lpm_fini(mlxsw_sp);
7995 err_lpm_init:
7996         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7997 err_nexthop_group_ht_init:
7998         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7999 err_nexthop_ht_init:
8000         mlxsw_sp_ipips_fini(mlxsw_sp);
8001 err_ipips_init:
8002         mlxsw_sp_rifs_fini(mlxsw_sp);
8003 err_rifs_init:
8004         __mlxsw_sp_router_fini(mlxsw_sp);
8005 err_router_init:
8006         unregister_inet6addr_notifier(&router->inet6addr_nb);
8007 err_register_inet6addr_notifier:
8008         unregister_inetaddr_notifier(&router->inetaddr_nb);
8009 err_register_inetaddr_notifier:
8010         kfree(mlxsw_sp->router);
8011         return err;
8012 }
8013
8014 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8015 {
8016         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
8017         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8018         mlxsw_sp_neigh_fini(mlxsw_sp);
8019         mlxsw_sp_vrs_fini(mlxsw_sp);
8020         mlxsw_sp_mr_fini(mlxsw_sp);
8021         mlxsw_sp_lpm_fini(mlxsw_sp);
8022         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8023         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8024         mlxsw_sp_ipips_fini(mlxsw_sp);
8025         mlxsw_sp_rifs_fini(mlxsw_sp);
8026         __mlxsw_sp_router_fini(mlxsw_sp);
8027         unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8028         unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8029         kfree(mlxsw_sp->router);
8030 }