]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
Merge branch 'x86/cpu' into x86/urgent
[linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/refcount.h>
19 #include <net/netevent.h>
20 #include <net/neighbour.h>
21 #include <net/arp.h>
22 #include <net/ip_fib.h>
23 #include <net/ip6_fib.h>
24 #include <net/fib_rules.h>
25 #include <net/ip_tunnels.h>
26 #include <net/l3mdev.h>
27 #include <net/addrconf.h>
28 #include <net/ndisc.h>
29 #include <net/ipv6.h>
30 #include <net/fib_notifier.h>
31 #include <net/switchdev.h>
32
33 #include "spectrum.h"
34 #include "core.h"
35 #include "reg.h"
36 #include "spectrum_cnt.h"
37 #include "spectrum_dpipe.h"
38 #include "spectrum_ipip.h"
39 #include "spectrum_mr.h"
40 #include "spectrum_mr_tcam.h"
41 #include "spectrum_router.h"
42 #include "spectrum_span.h"
43
44 struct mlxsw_sp_fib;
45 struct mlxsw_sp_vr;
46 struct mlxsw_sp_lpm_tree;
47 struct mlxsw_sp_rif_ops;
48
49 struct mlxsw_sp_router {
50         struct mlxsw_sp *mlxsw_sp;
51         struct mlxsw_sp_rif **rifs;
52         struct mlxsw_sp_vr *vrs;
53         struct rhashtable neigh_ht;
54         struct rhashtable nexthop_group_ht;
55         struct rhashtable nexthop_ht;
56         struct list_head nexthop_list;
57         struct {
58                 /* One tree for each protocol: IPv4 and IPv6 */
59                 struct mlxsw_sp_lpm_tree *proto_trees[2];
60                 struct mlxsw_sp_lpm_tree *trees;
61                 unsigned int tree_count;
62         } lpm;
63         struct {
64                 struct delayed_work dw;
65                 unsigned long interval; /* ms */
66         } neighs_update;
67         struct delayed_work nexthop_probe_dw;
68 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
69         struct list_head nexthop_neighs_list;
70         struct list_head ipip_list;
71         bool aborted;
72         struct notifier_block fib_nb;
73         struct notifier_block netevent_nb;
74         struct notifier_block inetaddr_nb;
75         struct notifier_block inet6addr_nb;
76         const struct mlxsw_sp_rif_ops **rif_ops_arr;
77         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
78 };
79
80 struct mlxsw_sp_rif {
81         struct list_head nexthop_list;
82         struct list_head neigh_list;
83         struct net_device *dev; /* NULL for underlay RIF */
84         struct mlxsw_sp_fid *fid;
85         unsigned char addr[ETH_ALEN];
86         int mtu;
87         u16 rif_index;
88         u16 vr_id;
89         const struct mlxsw_sp_rif_ops *ops;
90         struct mlxsw_sp *mlxsw_sp;
91
92         unsigned int counter_ingress;
93         bool counter_ingress_valid;
94         unsigned int counter_egress;
95         bool counter_egress_valid;
96 };
97
98 struct mlxsw_sp_rif_params {
99         struct net_device *dev;
100         union {
101                 u16 system_port;
102                 u16 lag_id;
103         };
104         u16 vid;
105         bool lag;
106 };
107
108 struct mlxsw_sp_rif_subport {
109         struct mlxsw_sp_rif common;
110         refcount_t ref_count;
111         union {
112                 u16 system_port;
113                 u16 lag_id;
114         };
115         u16 vid;
116         bool lag;
117 };
118
119 struct mlxsw_sp_rif_ipip_lb {
120         struct mlxsw_sp_rif common;
121         struct mlxsw_sp_rif_ipip_lb_config lb_config;
122         u16 ul_vr_id; /* Reserved for Spectrum-2. */
123         u16 ul_rif_id; /* Reserved for Spectrum. */
124 };
125
126 struct mlxsw_sp_rif_params_ipip_lb {
127         struct mlxsw_sp_rif_params common;
128         struct mlxsw_sp_rif_ipip_lb_config lb_config;
129 };
130
131 struct mlxsw_sp_rif_ops {
132         enum mlxsw_sp_rif_type type;
133         size_t rif_size;
134
135         void (*setup)(struct mlxsw_sp_rif *rif,
136                       const struct mlxsw_sp_rif_params *params);
137         int (*configure)(struct mlxsw_sp_rif *rif);
138         void (*deconfigure)(struct mlxsw_sp_rif *rif);
139         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
140                                          struct netlink_ext_ack *extack);
141         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
142 };
143
144 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
145 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
146 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
147                                   struct mlxsw_sp_lpm_tree *lpm_tree);
148 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
149                                      const struct mlxsw_sp_fib *fib,
150                                      u8 tree_id);
151 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
152                                        const struct mlxsw_sp_fib *fib);
153
154 static unsigned int *
155 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
156                            enum mlxsw_sp_rif_counter_dir dir)
157 {
158         switch (dir) {
159         case MLXSW_SP_RIF_COUNTER_EGRESS:
160                 return &rif->counter_egress;
161         case MLXSW_SP_RIF_COUNTER_INGRESS:
162                 return &rif->counter_ingress;
163         }
164         return NULL;
165 }
166
167 static bool
168 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
169                                enum mlxsw_sp_rif_counter_dir dir)
170 {
171         switch (dir) {
172         case MLXSW_SP_RIF_COUNTER_EGRESS:
173                 return rif->counter_egress_valid;
174         case MLXSW_SP_RIF_COUNTER_INGRESS:
175                 return rif->counter_ingress_valid;
176         }
177         return false;
178 }
179
180 static void
181 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
182                                enum mlxsw_sp_rif_counter_dir dir,
183                                bool valid)
184 {
185         switch (dir) {
186         case MLXSW_SP_RIF_COUNTER_EGRESS:
187                 rif->counter_egress_valid = valid;
188                 break;
189         case MLXSW_SP_RIF_COUNTER_INGRESS:
190                 rif->counter_ingress_valid = valid;
191                 break;
192         }
193 }
194
195 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
196                                      unsigned int counter_index, bool enable,
197                                      enum mlxsw_sp_rif_counter_dir dir)
198 {
199         char ritr_pl[MLXSW_REG_RITR_LEN];
200         bool is_egress = false;
201         int err;
202
203         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
204                 is_egress = true;
205         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
206         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207         if (err)
208                 return err;
209
210         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
211                                     is_egress);
212         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
213 }
214
215 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
216                                    struct mlxsw_sp_rif *rif,
217                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
218 {
219         char ricnt_pl[MLXSW_REG_RICNT_LEN];
220         unsigned int *p_counter_index;
221         bool valid;
222         int err;
223
224         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
225         if (!valid)
226                 return -EINVAL;
227
228         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
229         if (!p_counter_index)
230                 return -EINVAL;
231         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
232                              MLXSW_REG_RICNT_OPCODE_NOP);
233         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
234         if (err)
235                 return err;
236         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
237         return 0;
238 }
239
240 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
241                                       unsigned int counter_index)
242 {
243         char ricnt_pl[MLXSW_REG_RICNT_LEN];
244
245         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
246                              MLXSW_REG_RICNT_OPCODE_CLEAR);
247         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
248 }
249
250 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
251                                struct mlxsw_sp_rif *rif,
252                                enum mlxsw_sp_rif_counter_dir dir)
253 {
254         unsigned int *p_counter_index;
255         int err;
256
257         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
258         if (!p_counter_index)
259                 return -EINVAL;
260         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
261                                      p_counter_index);
262         if (err)
263                 return err;
264
265         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
266         if (err)
267                 goto err_counter_clear;
268
269         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
270                                         *p_counter_index, true, dir);
271         if (err)
272                 goto err_counter_edit;
273         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
274         return 0;
275
276 err_counter_edit:
277 err_counter_clear:
278         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
279                               *p_counter_index);
280         return err;
281 }
282
283 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
284                                struct mlxsw_sp_rif *rif,
285                                enum mlxsw_sp_rif_counter_dir dir)
286 {
287         unsigned int *p_counter_index;
288
289         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
290                 return;
291
292         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
293         if (WARN_ON(!p_counter_index))
294                 return;
295         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
296                                   *p_counter_index, false, dir);
297         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
298                               *p_counter_index);
299         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
300 }
301
302 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
303 {
304         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
305         struct devlink *devlink;
306
307         devlink = priv_to_devlink(mlxsw_sp->core);
308         if (!devlink_dpipe_table_counter_enabled(devlink,
309                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
310                 return;
311         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
312 }
313
314 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
315 {
316         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
317
318         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
319 }
320
321 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
322
323 struct mlxsw_sp_prefix_usage {
324         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
325 };
326
327 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
328         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
329
330 static bool
331 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
332                          struct mlxsw_sp_prefix_usage *prefix_usage2)
333 {
334         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
335 }
336
337 static void
338 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
339                           struct mlxsw_sp_prefix_usage *prefix_usage2)
340 {
341         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
342 }
343
344 static void
345 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
346                           unsigned char prefix_len)
347 {
348         set_bit(prefix_len, prefix_usage->b);
349 }
350
351 static void
352 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
353                             unsigned char prefix_len)
354 {
355         clear_bit(prefix_len, prefix_usage->b);
356 }
357
358 struct mlxsw_sp_fib_key {
359         unsigned char addr[sizeof(struct in6_addr)];
360         unsigned char prefix_len;
361 };
362
363 enum mlxsw_sp_fib_entry_type {
364         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
365         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
366         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
367         MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
368
369         /* This is a special case of local delivery, where a packet should be
370          * decapsulated on reception. Note that there is no corresponding ENCAP,
371          * because that's a type of next hop, not of FIB entry. (There can be
372          * several next hops in a REMOTE entry, and some of them may be
373          * encapsulating entries.)
374          */
375         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
376         MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
377 };
378
379 struct mlxsw_sp_nexthop_group;
380
381 struct mlxsw_sp_fib_node {
382         struct list_head entry_list;
383         struct list_head list;
384         struct rhash_head ht_node;
385         struct mlxsw_sp_fib *fib;
386         struct mlxsw_sp_fib_key key;
387 };
388
389 struct mlxsw_sp_fib_entry_decap {
390         struct mlxsw_sp_ipip_entry *ipip_entry;
391         u32 tunnel_index;
392 };
393
394 struct mlxsw_sp_fib_entry {
395         struct list_head list;
396         struct mlxsw_sp_fib_node *fib_node;
397         enum mlxsw_sp_fib_entry_type type;
398         struct list_head nexthop_group_node;
399         struct mlxsw_sp_nexthop_group *nh_group;
400         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
401 };
402
403 struct mlxsw_sp_fib4_entry {
404         struct mlxsw_sp_fib_entry common;
405         u32 tb_id;
406         u32 prio;
407         u8 tos;
408         u8 type;
409 };
410
411 struct mlxsw_sp_fib6_entry {
412         struct mlxsw_sp_fib_entry common;
413         struct list_head rt6_list;
414         unsigned int nrt6;
415 };
416
417 struct mlxsw_sp_rt6 {
418         struct list_head list;
419         struct fib6_info *rt;
420 };
421
422 struct mlxsw_sp_lpm_tree {
423         u8 id; /* tree ID */
424         unsigned int ref_count;
425         enum mlxsw_sp_l3proto proto;
426         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
427         struct mlxsw_sp_prefix_usage prefix_usage;
428 };
429
430 struct mlxsw_sp_fib {
431         struct rhashtable ht;
432         struct list_head node_list;
433         struct mlxsw_sp_vr *vr;
434         struct mlxsw_sp_lpm_tree *lpm_tree;
435         enum mlxsw_sp_l3proto proto;
436 };
437
438 struct mlxsw_sp_vr {
439         u16 id; /* virtual router ID */
440         u32 tb_id; /* kernel fib table id */
441         unsigned int rif_count;
442         struct mlxsw_sp_fib *fib4;
443         struct mlxsw_sp_fib *fib6;
444         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
445         struct mlxsw_sp_rif *ul_rif;
446         refcount_t ul_rif_refcnt;
447 };
448
449 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
450
451 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
452                                                 struct mlxsw_sp_vr *vr,
453                                                 enum mlxsw_sp_l3proto proto)
454 {
455         struct mlxsw_sp_lpm_tree *lpm_tree;
456         struct mlxsw_sp_fib *fib;
457         int err;
458
459         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
460         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
461         if (!fib)
462                 return ERR_PTR(-ENOMEM);
463         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
464         if (err)
465                 goto err_rhashtable_init;
466         INIT_LIST_HEAD(&fib->node_list);
467         fib->proto = proto;
468         fib->vr = vr;
469         fib->lpm_tree = lpm_tree;
470         mlxsw_sp_lpm_tree_hold(lpm_tree);
471         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
472         if (err)
473                 goto err_lpm_tree_bind;
474         return fib;
475
476 err_lpm_tree_bind:
477         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
478 err_rhashtable_init:
479         kfree(fib);
480         return ERR_PTR(err);
481 }
482
483 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
484                                  struct mlxsw_sp_fib *fib)
485 {
486         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
487         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
488         WARN_ON(!list_empty(&fib->node_list));
489         rhashtable_destroy(&fib->ht);
490         kfree(fib);
491 }
492
493 static struct mlxsw_sp_lpm_tree *
494 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
495 {
496         static struct mlxsw_sp_lpm_tree *lpm_tree;
497         int i;
498
499         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
500                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
501                 if (lpm_tree->ref_count == 0)
502                         return lpm_tree;
503         }
504         return NULL;
505 }
506
507 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
508                                    struct mlxsw_sp_lpm_tree *lpm_tree)
509 {
510         char ralta_pl[MLXSW_REG_RALTA_LEN];
511
512         mlxsw_reg_ralta_pack(ralta_pl, true,
513                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
514                              lpm_tree->id);
515         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
516 }
517
518 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
519                                    struct mlxsw_sp_lpm_tree *lpm_tree)
520 {
521         char ralta_pl[MLXSW_REG_RALTA_LEN];
522
523         mlxsw_reg_ralta_pack(ralta_pl, false,
524                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
525                              lpm_tree->id);
526         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
527 }
528
529 static int
530 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
531                                   struct mlxsw_sp_prefix_usage *prefix_usage,
532                                   struct mlxsw_sp_lpm_tree *lpm_tree)
533 {
534         char ralst_pl[MLXSW_REG_RALST_LEN];
535         u8 root_bin = 0;
536         u8 prefix;
537         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
538
539         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
540                 root_bin = prefix;
541
542         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
543         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
544                 if (prefix == 0)
545                         continue;
546                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
547                                          MLXSW_REG_RALST_BIN_NO_CHILD);
548                 last_prefix = prefix;
549         }
550         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
551 }
552
553 static struct mlxsw_sp_lpm_tree *
554 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
555                          struct mlxsw_sp_prefix_usage *prefix_usage,
556                          enum mlxsw_sp_l3proto proto)
557 {
558         struct mlxsw_sp_lpm_tree *lpm_tree;
559         int err;
560
561         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
562         if (!lpm_tree)
563                 return ERR_PTR(-EBUSY);
564         lpm_tree->proto = proto;
565         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
566         if (err)
567                 return ERR_PTR(err);
568
569         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
570                                                 lpm_tree);
571         if (err)
572                 goto err_left_struct_set;
573         memcpy(&lpm_tree->prefix_usage, prefix_usage,
574                sizeof(lpm_tree->prefix_usage));
575         memset(&lpm_tree->prefix_ref_count, 0,
576                sizeof(lpm_tree->prefix_ref_count));
577         lpm_tree->ref_count = 1;
578         return lpm_tree;
579
580 err_left_struct_set:
581         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
582         return ERR_PTR(err);
583 }
584
585 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
586                                       struct mlxsw_sp_lpm_tree *lpm_tree)
587 {
588         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
589 }
590
591 static struct mlxsw_sp_lpm_tree *
592 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
593                       struct mlxsw_sp_prefix_usage *prefix_usage,
594                       enum mlxsw_sp_l3proto proto)
595 {
596         struct mlxsw_sp_lpm_tree *lpm_tree;
597         int i;
598
599         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
600                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
601                 if (lpm_tree->ref_count != 0 &&
602                     lpm_tree->proto == proto &&
603                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
604                                              prefix_usage)) {
605                         mlxsw_sp_lpm_tree_hold(lpm_tree);
606                         return lpm_tree;
607                 }
608         }
609         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
610 }
611
612 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
613 {
614         lpm_tree->ref_count++;
615 }
616
617 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
618                                   struct mlxsw_sp_lpm_tree *lpm_tree)
619 {
620         if (--lpm_tree->ref_count == 0)
621                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
622 }
623
624 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
625
626 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
627 {
628         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
629         struct mlxsw_sp_lpm_tree *lpm_tree;
630         u64 max_trees;
631         int err, i;
632
633         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
634                 return -EIO;
635
636         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
637         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
638         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
639                                              sizeof(struct mlxsw_sp_lpm_tree),
640                                              GFP_KERNEL);
641         if (!mlxsw_sp->router->lpm.trees)
642                 return -ENOMEM;
643
644         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
645                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
646                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
647         }
648
649         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
650                                          MLXSW_SP_L3_PROTO_IPV4);
651         if (IS_ERR(lpm_tree)) {
652                 err = PTR_ERR(lpm_tree);
653                 goto err_ipv4_tree_get;
654         }
655         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
656
657         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
658                                          MLXSW_SP_L3_PROTO_IPV6);
659         if (IS_ERR(lpm_tree)) {
660                 err = PTR_ERR(lpm_tree);
661                 goto err_ipv6_tree_get;
662         }
663         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
664
665         return 0;
666
667 err_ipv6_tree_get:
668         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
669         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
670 err_ipv4_tree_get:
671         kfree(mlxsw_sp->router->lpm.trees);
672         return err;
673 }
674
675 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
676 {
677         struct mlxsw_sp_lpm_tree *lpm_tree;
678
679         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
680         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
681
682         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
683         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
684
685         kfree(mlxsw_sp->router->lpm.trees);
686 }
687
688 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
689 {
690         return !!vr->fib4 || !!vr->fib6 ||
691                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
692                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
693 }
694
695 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
696 {
697         struct mlxsw_sp_vr *vr;
698         int i;
699
700         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
701                 vr = &mlxsw_sp->router->vrs[i];
702                 if (!mlxsw_sp_vr_is_used(vr))
703                         return vr;
704         }
705         return NULL;
706 }
707
708 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
709                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
710 {
711         char raltb_pl[MLXSW_REG_RALTB_LEN];
712
713         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
714                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
715                              tree_id);
716         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
717 }
718
719 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
720                                        const struct mlxsw_sp_fib *fib)
721 {
722         char raltb_pl[MLXSW_REG_RALTB_LEN];
723
724         /* Bind to tree 0 which is default */
725         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
726                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
727         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
728 }
729
730 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
731 {
732         /* For our purpose, squash main, default and local tables into one */
733         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
734                 tb_id = RT_TABLE_MAIN;
735         return tb_id;
736 }
737
738 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
739                                             u32 tb_id)
740 {
741         struct mlxsw_sp_vr *vr;
742         int i;
743
744         tb_id = mlxsw_sp_fix_tb_id(tb_id);
745
746         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
747                 vr = &mlxsw_sp->router->vrs[i];
748                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
749                         return vr;
750         }
751         return NULL;
752 }
753
754 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
755                                 u16 *vr_id)
756 {
757         struct mlxsw_sp_vr *vr;
758
759         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
760         if (!vr)
761                 return -ESRCH;
762         *vr_id = vr->id;
763
764         return 0;
765 }
766
767 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
768                                             enum mlxsw_sp_l3proto proto)
769 {
770         switch (proto) {
771         case MLXSW_SP_L3_PROTO_IPV4:
772                 return vr->fib4;
773         case MLXSW_SP_L3_PROTO_IPV6:
774                 return vr->fib6;
775         }
776         return NULL;
777 }
778
779 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
780                                               u32 tb_id,
781                                               struct netlink_ext_ack *extack)
782 {
783         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
784         struct mlxsw_sp_fib *fib4;
785         struct mlxsw_sp_fib *fib6;
786         struct mlxsw_sp_vr *vr;
787         int err;
788
789         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
790         if (!vr) {
791                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
792                 return ERR_PTR(-EBUSY);
793         }
794         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
795         if (IS_ERR(fib4))
796                 return ERR_CAST(fib4);
797         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
798         if (IS_ERR(fib6)) {
799                 err = PTR_ERR(fib6);
800                 goto err_fib6_create;
801         }
802         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
803                                              MLXSW_SP_L3_PROTO_IPV4);
804         if (IS_ERR(mr4_table)) {
805                 err = PTR_ERR(mr4_table);
806                 goto err_mr4_table_create;
807         }
808         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
809                                              MLXSW_SP_L3_PROTO_IPV6);
810         if (IS_ERR(mr6_table)) {
811                 err = PTR_ERR(mr6_table);
812                 goto err_mr6_table_create;
813         }
814
815         vr->fib4 = fib4;
816         vr->fib6 = fib6;
817         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
818         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
819         vr->tb_id = tb_id;
820         return vr;
821
822 err_mr6_table_create:
823         mlxsw_sp_mr_table_destroy(mr4_table);
824 err_mr4_table_create:
825         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
826 err_fib6_create:
827         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
828         return ERR_PTR(err);
829 }
830
831 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
832                                 struct mlxsw_sp_vr *vr)
833 {
834         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
835         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
836         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
837         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
838         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
839         vr->fib6 = NULL;
840         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
841         vr->fib4 = NULL;
842 }
843
844 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
845                                            struct netlink_ext_ack *extack)
846 {
847         struct mlxsw_sp_vr *vr;
848
849         tb_id = mlxsw_sp_fix_tb_id(tb_id);
850         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
851         if (!vr)
852                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
853         return vr;
854 }
855
856 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
857 {
858         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
859             list_empty(&vr->fib6->node_list) &&
860             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
861             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
862                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
863 }
864
865 static bool
866 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
867                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
868 {
869         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
870
871         if (!mlxsw_sp_vr_is_used(vr))
872                 return false;
873         if (fib->lpm_tree->id == tree_id)
874                 return true;
875         return false;
876 }
877
878 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
879                                         struct mlxsw_sp_fib *fib,
880                                         struct mlxsw_sp_lpm_tree *new_tree)
881 {
882         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
883         int err;
884
885         fib->lpm_tree = new_tree;
886         mlxsw_sp_lpm_tree_hold(new_tree);
887         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
888         if (err)
889                 goto err_tree_bind;
890         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
891         return 0;
892
893 err_tree_bind:
894         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
895         fib->lpm_tree = old_tree;
896         return err;
897 }
898
899 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
900                                          struct mlxsw_sp_fib *fib,
901                                          struct mlxsw_sp_lpm_tree *new_tree)
902 {
903         enum mlxsw_sp_l3proto proto = fib->proto;
904         struct mlxsw_sp_lpm_tree *old_tree;
905         u8 old_id, new_id = new_tree->id;
906         struct mlxsw_sp_vr *vr;
907         int i, err;
908
909         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
910         old_id = old_tree->id;
911
912         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
913                 vr = &mlxsw_sp->router->vrs[i];
914                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
915                         continue;
916                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
917                                                    mlxsw_sp_vr_fib(vr, proto),
918                                                    new_tree);
919                 if (err)
920                         goto err_tree_replace;
921         }
922
923         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
924                sizeof(new_tree->prefix_ref_count));
925         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
926         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
927
928         return 0;
929
930 err_tree_replace:
931         for (i--; i >= 0; i--) {
932                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
933                         continue;
934                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
935                                              mlxsw_sp_vr_fib(vr, proto),
936                                              old_tree);
937         }
938         return err;
939 }
940
941 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
942 {
943         struct mlxsw_sp_vr *vr;
944         u64 max_vrs;
945         int i;
946
947         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
948                 return -EIO;
949
950         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
951         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
952                                         GFP_KERNEL);
953         if (!mlxsw_sp->router->vrs)
954                 return -ENOMEM;
955
956         for (i = 0; i < max_vrs; i++) {
957                 vr = &mlxsw_sp->router->vrs[i];
958                 vr->id = i;
959         }
960
961         return 0;
962 }
963
964 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
965
966 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
967 {
968         /* At this stage we're guaranteed not to have new incoming
969          * FIB notifications and the work queue is free from FIBs
970          * sitting on top of mlxsw netdevs. However, we can still
971          * have other FIBs queued. Flush the queue before flushing
972          * the device's tables. No need for locks, as we're the only
973          * writer.
974          */
975         mlxsw_core_flush_owq();
976         mlxsw_sp_router_fib_flush(mlxsw_sp);
977         kfree(mlxsw_sp->router->vrs);
978 }
979
980 static struct net_device *
981 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
982 {
983         struct ip_tunnel *tun = netdev_priv(ol_dev);
984         struct net *net = dev_net(ol_dev);
985
986         return __dev_get_by_index(net, tun->parms.link);
987 }
988
989 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
990 {
991         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
992
993         if (d)
994                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
995         else
996                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
997 }
998
999 static struct mlxsw_sp_rif *
1000 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1001                     const struct mlxsw_sp_rif_params *params,
1002                     struct netlink_ext_ack *extack);
1003
1004 static struct mlxsw_sp_rif_ipip_lb *
1005 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1006                                 enum mlxsw_sp_ipip_type ipipt,
1007                                 struct net_device *ol_dev,
1008                                 struct netlink_ext_ack *extack)
1009 {
1010         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1011         const struct mlxsw_sp_ipip_ops *ipip_ops;
1012         struct mlxsw_sp_rif *rif;
1013
1014         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1015         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1016                 .common.dev = ol_dev,
1017                 .common.lag = false,
1018                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1019         };
1020
1021         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1022         if (IS_ERR(rif))
1023                 return ERR_CAST(rif);
1024         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1025 }
1026
1027 static struct mlxsw_sp_ipip_entry *
1028 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1029                           enum mlxsw_sp_ipip_type ipipt,
1030                           struct net_device *ol_dev)
1031 {
1032         const struct mlxsw_sp_ipip_ops *ipip_ops;
1033         struct mlxsw_sp_ipip_entry *ipip_entry;
1034         struct mlxsw_sp_ipip_entry *ret = NULL;
1035
1036         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1037         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1038         if (!ipip_entry)
1039                 return ERR_PTR(-ENOMEM);
1040
1041         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1042                                                             ol_dev, NULL);
1043         if (IS_ERR(ipip_entry->ol_lb)) {
1044                 ret = ERR_CAST(ipip_entry->ol_lb);
1045                 goto err_ol_ipip_lb_create;
1046         }
1047
1048         ipip_entry->ipipt = ipipt;
1049         ipip_entry->ol_dev = ol_dev;
1050
1051         switch (ipip_ops->ul_proto) {
1052         case MLXSW_SP_L3_PROTO_IPV4:
1053                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1054                 break;
1055         case MLXSW_SP_L3_PROTO_IPV6:
1056                 WARN_ON(1);
1057                 break;
1058         }
1059
1060         return ipip_entry;
1061
1062 err_ol_ipip_lb_create:
1063         kfree(ipip_entry);
1064         return ret;
1065 }
1066
1067 static void
1068 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1069 {
1070         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1071         kfree(ipip_entry);
1072 }
1073
1074 static bool
1075 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1076                                   const enum mlxsw_sp_l3proto ul_proto,
1077                                   union mlxsw_sp_l3addr saddr,
1078                                   u32 ul_tb_id,
1079                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1080 {
1081         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1082         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1083         union mlxsw_sp_l3addr tun_saddr;
1084
1085         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1086                 return false;
1087
1088         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1089         return tun_ul_tb_id == ul_tb_id &&
1090                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1091 }
1092
1093 static int
1094 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1095                               struct mlxsw_sp_fib_entry *fib_entry,
1096                               struct mlxsw_sp_ipip_entry *ipip_entry)
1097 {
1098         u32 tunnel_index;
1099         int err;
1100
1101         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1102                                   1, &tunnel_index);
1103         if (err)
1104                 return err;
1105
1106         ipip_entry->decap_fib_entry = fib_entry;
1107         fib_entry->decap.ipip_entry = ipip_entry;
1108         fib_entry->decap.tunnel_index = tunnel_index;
1109         return 0;
1110 }
1111
1112 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1113                                           struct mlxsw_sp_fib_entry *fib_entry)
1114 {
1115         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1116         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1117         fib_entry->decap.ipip_entry = NULL;
1118         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1119                            1, fib_entry->decap.tunnel_index);
1120 }
1121
1122 static struct mlxsw_sp_fib_node *
1123 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1124                          size_t addr_len, unsigned char prefix_len);
1125 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1126                                      struct mlxsw_sp_fib_entry *fib_entry);
1127
1128 static void
1129 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1130                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1131 {
1132         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1133
1134         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1135         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1136
1137         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1138 }
1139
1140 static void
1141 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1142                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1143                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1144 {
1145         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1146                                           ipip_entry))
1147                 return;
1148         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1149
1150         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1151                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1152 }
1153
1154 static struct mlxsw_sp_fib_entry *
1155 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1156                                      enum mlxsw_sp_l3proto proto,
1157                                      const union mlxsw_sp_l3addr *addr,
1158                                      enum mlxsw_sp_fib_entry_type type)
1159 {
1160         struct mlxsw_sp_fib_entry *fib_entry;
1161         struct mlxsw_sp_fib_node *fib_node;
1162         unsigned char addr_prefix_len;
1163         struct mlxsw_sp_fib *fib;
1164         struct mlxsw_sp_vr *vr;
1165         const void *addrp;
1166         size_t addr_len;
1167         u32 addr4;
1168
1169         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1170         if (!vr)
1171                 return NULL;
1172         fib = mlxsw_sp_vr_fib(vr, proto);
1173
1174         switch (proto) {
1175         case MLXSW_SP_L3_PROTO_IPV4:
1176                 addr4 = be32_to_cpu(addr->addr4);
1177                 addrp = &addr4;
1178                 addr_len = 4;
1179                 addr_prefix_len = 32;
1180                 break;
1181         case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1182         default:
1183                 WARN_ON(1);
1184                 return NULL;
1185         }
1186
1187         fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1188                                             addr_prefix_len);
1189         if (!fib_node || list_empty(&fib_node->entry_list))
1190                 return NULL;
1191
1192         fib_entry = list_first_entry(&fib_node->entry_list,
1193                                      struct mlxsw_sp_fib_entry, list);
1194         if (fib_entry->type != type)
1195                 return NULL;
1196
1197         return fib_entry;
1198 }
1199
1200 /* Given an IPIP entry, find the corresponding decap route. */
1201 static struct mlxsw_sp_fib_entry *
1202 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1203                                struct mlxsw_sp_ipip_entry *ipip_entry)
1204 {
1205         static struct mlxsw_sp_fib_node *fib_node;
1206         const struct mlxsw_sp_ipip_ops *ipip_ops;
1207         struct mlxsw_sp_fib_entry *fib_entry;
1208         unsigned char saddr_prefix_len;
1209         union mlxsw_sp_l3addr saddr;
1210         struct mlxsw_sp_fib *ul_fib;
1211         struct mlxsw_sp_vr *ul_vr;
1212         const void *saddrp;
1213         size_t saddr_len;
1214         u32 ul_tb_id;
1215         u32 saddr4;
1216
1217         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1218
1219         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1220         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1221         if (!ul_vr)
1222                 return NULL;
1223
1224         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1225         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1226                                            ipip_entry->ol_dev);
1227
1228         switch (ipip_ops->ul_proto) {
1229         case MLXSW_SP_L3_PROTO_IPV4:
1230                 saddr4 = be32_to_cpu(saddr.addr4);
1231                 saddrp = &saddr4;
1232                 saddr_len = 4;
1233                 saddr_prefix_len = 32;
1234                 break;
1235         case MLXSW_SP_L3_PROTO_IPV6:
1236                 WARN_ON(1);
1237                 return NULL;
1238         }
1239
1240         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1241                                             saddr_prefix_len);
1242         if (!fib_node || list_empty(&fib_node->entry_list))
1243                 return NULL;
1244
1245         fib_entry = list_first_entry(&fib_node->entry_list,
1246                                      struct mlxsw_sp_fib_entry, list);
1247         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1248                 return NULL;
1249
1250         return fib_entry;
1251 }
1252
1253 static struct mlxsw_sp_ipip_entry *
1254 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1255                            enum mlxsw_sp_ipip_type ipipt,
1256                            struct net_device *ol_dev)
1257 {
1258         struct mlxsw_sp_ipip_entry *ipip_entry;
1259
1260         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1261         if (IS_ERR(ipip_entry))
1262                 return ipip_entry;
1263
1264         list_add_tail(&ipip_entry->ipip_list_node,
1265                       &mlxsw_sp->router->ipip_list);
1266
1267         return ipip_entry;
1268 }
1269
1270 static void
1271 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1272                             struct mlxsw_sp_ipip_entry *ipip_entry)
1273 {
1274         list_del(&ipip_entry->ipip_list_node);
1275         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1276 }
1277
1278 static bool
1279 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1280                                   const struct net_device *ul_dev,
1281                                   enum mlxsw_sp_l3proto ul_proto,
1282                                   union mlxsw_sp_l3addr ul_dip,
1283                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1284 {
1285         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1286         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1287
1288         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1289                 return false;
1290
1291         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1292                                                  ul_tb_id, ipip_entry);
1293 }
1294
1295 /* Given decap parameters, find the corresponding IPIP entry. */
1296 static struct mlxsw_sp_ipip_entry *
1297 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1298                                   const struct net_device *ul_dev,
1299                                   enum mlxsw_sp_l3proto ul_proto,
1300                                   union mlxsw_sp_l3addr ul_dip)
1301 {
1302         struct mlxsw_sp_ipip_entry *ipip_entry;
1303
1304         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1305                             ipip_list_node)
1306                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1307                                                       ul_proto, ul_dip,
1308                                                       ipip_entry))
1309                         return ipip_entry;
1310
1311         return NULL;
1312 }
1313
1314 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1315                                       const struct net_device *dev,
1316                                       enum mlxsw_sp_ipip_type *p_type)
1317 {
1318         struct mlxsw_sp_router *router = mlxsw_sp->router;
1319         const struct mlxsw_sp_ipip_ops *ipip_ops;
1320         enum mlxsw_sp_ipip_type ipipt;
1321
1322         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1323                 ipip_ops = router->ipip_ops_arr[ipipt];
1324                 if (dev->type == ipip_ops->dev_type) {
1325                         if (p_type)
1326                                 *p_type = ipipt;
1327                         return true;
1328                 }
1329         }
1330         return false;
1331 }
1332
1333 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1334                                 const struct net_device *dev)
1335 {
1336         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1337 }
1338
1339 static struct mlxsw_sp_ipip_entry *
1340 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1341                                    const struct net_device *ol_dev)
1342 {
1343         struct mlxsw_sp_ipip_entry *ipip_entry;
1344
1345         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1346                             ipip_list_node)
1347                 if (ipip_entry->ol_dev == ol_dev)
1348                         return ipip_entry;
1349
1350         return NULL;
1351 }
1352
1353 static struct mlxsw_sp_ipip_entry *
1354 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1355                                    const struct net_device *ul_dev,
1356                                    struct mlxsw_sp_ipip_entry *start)
1357 {
1358         struct mlxsw_sp_ipip_entry *ipip_entry;
1359
1360         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1361                                         ipip_list_node);
1362         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1363                                      ipip_list_node) {
1364                 struct net_device *ipip_ul_dev =
1365                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1366
1367                 if (ipip_ul_dev == ul_dev)
1368                         return ipip_entry;
1369         }
1370
1371         return NULL;
1372 }
1373
1374 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1375                                 const struct net_device *dev)
1376 {
1377         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1378 }
1379
1380 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1381                                                 const struct net_device *ol_dev,
1382                                                 enum mlxsw_sp_ipip_type ipipt)
1383 {
1384         const struct mlxsw_sp_ipip_ops *ops
1385                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1386
1387         /* For deciding whether decap should be offloaded, we don't care about
1388          * overlay protocol, so ask whether either one is supported.
1389          */
1390         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1391                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1392 }
1393
1394 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1395                                                 struct net_device *ol_dev)
1396 {
1397         struct mlxsw_sp_ipip_entry *ipip_entry;
1398         enum mlxsw_sp_l3proto ul_proto;
1399         enum mlxsw_sp_ipip_type ipipt;
1400         union mlxsw_sp_l3addr saddr;
1401         u32 ul_tb_id;
1402
1403         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1404         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1405                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1406                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1407                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1408                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1409                                                           saddr, ul_tb_id,
1410                                                           NULL)) {
1411                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1412                                                                 ol_dev);
1413                         if (IS_ERR(ipip_entry))
1414                                 return PTR_ERR(ipip_entry);
1415                 }
1416         }
1417
1418         return 0;
1419 }
1420
1421 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1422                                                    struct net_device *ol_dev)
1423 {
1424         struct mlxsw_sp_ipip_entry *ipip_entry;
1425
1426         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1427         if (ipip_entry)
1428                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1429 }
1430
1431 static void
1432 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1433                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1434 {
1435         struct mlxsw_sp_fib_entry *decap_fib_entry;
1436
1437         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1438         if (decap_fib_entry)
1439                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1440                                                   decap_fib_entry);
1441 }
1442
1443 static int
1444 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1445                         u16 ul_rif_id, bool enable)
1446 {
1447         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1448         struct mlxsw_sp_rif *rif = &lb_rif->common;
1449         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1450         char ritr_pl[MLXSW_REG_RITR_LEN];
1451         u32 saddr4;
1452
1453         switch (lb_cf.ul_protocol) {
1454         case MLXSW_SP_L3_PROTO_IPV4:
1455                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1456                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1457                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1458                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1459                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1460                             ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1461                 break;
1462
1463         case MLXSW_SP_L3_PROTO_IPV6:
1464                 return -EAFNOSUPPORT;
1465         }
1466
1467         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1468 }
1469
1470 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1471                                                  struct net_device *ol_dev)
1472 {
1473         struct mlxsw_sp_ipip_entry *ipip_entry;
1474         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1475         int err = 0;
1476
1477         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1478         if (ipip_entry) {
1479                 lb_rif = ipip_entry->ol_lb;
1480                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1481                                               lb_rif->ul_rif_id, true);
1482                 if (err)
1483                         goto out;
1484                 lb_rif->common.mtu = ol_dev->mtu;
1485         }
1486
1487 out:
1488         return err;
1489 }
1490
1491 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1492                                                 struct net_device *ol_dev)
1493 {
1494         struct mlxsw_sp_ipip_entry *ipip_entry;
1495
1496         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1497         if (ipip_entry)
1498                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1499 }
1500
1501 static void
1502 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1503                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1504 {
1505         if (ipip_entry->decap_fib_entry)
1506                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1507 }
1508
1509 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1510                                                   struct net_device *ol_dev)
1511 {
1512         struct mlxsw_sp_ipip_entry *ipip_entry;
1513
1514         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1515         if (ipip_entry)
1516                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1517 }
1518
1519 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1520                                          struct mlxsw_sp_rif *old_rif,
1521                                          struct mlxsw_sp_rif *new_rif);
1522 static int
1523 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1524                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1525                                  bool keep_encap,
1526                                  struct netlink_ext_ack *extack)
1527 {
1528         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1529         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1530
1531         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1532                                                      ipip_entry->ipipt,
1533                                                      ipip_entry->ol_dev,
1534                                                      extack);
1535         if (IS_ERR(new_lb_rif))
1536                 return PTR_ERR(new_lb_rif);
1537         ipip_entry->ol_lb = new_lb_rif;
1538
1539         if (keep_encap)
1540                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1541                                              &new_lb_rif->common);
1542
1543         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1544
1545         return 0;
1546 }
1547
1548 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1549                                         struct mlxsw_sp_rif *rif);
1550
1551 /**
1552  * Update the offload related to an IPIP entry. This always updates decap, and
1553  * in addition to that it also:
1554  * @recreate_loopback: recreates the associated loopback RIF
1555  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1556  *              relevant when recreate_loopback is true.
1557  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1558  *                   is only relevant when recreate_loopback is false.
1559  */
1560 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1561                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1562                                         bool recreate_loopback,
1563                                         bool keep_encap,
1564                                         bool update_nexthops,
1565                                         struct netlink_ext_ack *extack)
1566 {
1567         int err;
1568
1569         /* RIFs can't be edited, so to update loopback, we need to destroy and
1570          * recreate it. That creates a window of opportunity where RALUE and
1571          * RATR registers end up referencing a RIF that's already gone. RATRs
1572          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1573          * of RALUE, demote the decap route back.
1574          */
1575         if (ipip_entry->decap_fib_entry)
1576                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1577
1578         if (recreate_loopback) {
1579                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1580                                                        keep_encap, extack);
1581                 if (err)
1582                         return err;
1583         } else if (update_nexthops) {
1584                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1585                                             &ipip_entry->ol_lb->common);
1586         }
1587
1588         if (ipip_entry->ol_dev->flags & IFF_UP)
1589                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1590
1591         return 0;
1592 }
1593
1594 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1595                                                 struct net_device *ol_dev,
1596                                                 struct netlink_ext_ack *extack)
1597 {
1598         struct mlxsw_sp_ipip_entry *ipip_entry =
1599                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1600         enum mlxsw_sp_l3proto ul_proto;
1601         union mlxsw_sp_l3addr saddr;
1602         u32 ul_tb_id;
1603
1604         if (!ipip_entry)
1605                 return 0;
1606
1607         /* For flat configuration cases, moving overlay to a different VRF might
1608          * cause local address conflict, and the conflicting tunnels need to be
1609          * demoted.
1610          */
1611         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1612         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1613         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1614         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1615                                                  saddr, ul_tb_id,
1616                                                  ipip_entry)) {
1617                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1618                 return 0;
1619         }
1620
1621         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1622                                                    true, false, false, extack);
1623 }
1624
1625 static int
1626 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1627                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1628                                      struct net_device *ul_dev,
1629                                      struct netlink_ext_ack *extack)
1630 {
1631         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1632                                                    true, true, false, extack);
1633 }
1634
1635 static int
1636 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1637                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1638                                     struct net_device *ul_dev)
1639 {
1640         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1641                                                    false, false, true, NULL);
1642 }
1643
1644 static int
1645 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1646                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1647                                       struct net_device *ul_dev)
1648 {
1649         /* A down underlay device causes encapsulated packets to not be
1650          * forwarded, but decap still works. So refresh next hops without
1651          * touching anything else.
1652          */
1653         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1654                                                    false, false, true, NULL);
1655 }
1656
1657 static int
1658 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1659                                         struct net_device *ol_dev,
1660                                         struct netlink_ext_ack *extack)
1661 {
1662         const struct mlxsw_sp_ipip_ops *ipip_ops;
1663         struct mlxsw_sp_ipip_entry *ipip_entry;
1664         int err;
1665
1666         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1667         if (!ipip_entry)
1668                 /* A change might make a tunnel eligible for offloading, but
1669                  * that is currently not implemented. What falls to slow path
1670                  * stays there.
1671                  */
1672                 return 0;
1673
1674         /* A change might make a tunnel not eligible for offloading. */
1675         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1676                                                  ipip_entry->ipipt)) {
1677                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1678                 return 0;
1679         }
1680
1681         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1682         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1683         return err;
1684 }
1685
1686 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1687                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1688 {
1689         struct net_device *ol_dev = ipip_entry->ol_dev;
1690
1691         if (ol_dev->flags & IFF_UP)
1692                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1693         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1694 }
1695
1696 /* The configuration where several tunnels have the same local address in the
1697  * same underlay table needs special treatment in the HW. That is currently not
1698  * implemented in the driver. This function finds and demotes the first tunnel
1699  * with a given source address, except the one passed in in the argument
1700  * `except'.
1701  */
1702 bool
1703 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1704                                      enum mlxsw_sp_l3proto ul_proto,
1705                                      union mlxsw_sp_l3addr saddr,
1706                                      u32 ul_tb_id,
1707                                      const struct mlxsw_sp_ipip_entry *except)
1708 {
1709         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1710
1711         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1712                                  ipip_list_node) {
1713                 if (ipip_entry != except &&
1714                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1715                                                       ul_tb_id, ipip_entry)) {
1716                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1717                         return true;
1718                 }
1719         }
1720
1721         return false;
1722 }
1723
1724 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1725                                                      struct net_device *ul_dev)
1726 {
1727         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1728
1729         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1730                                  ipip_list_node) {
1731                 struct net_device *ipip_ul_dev =
1732                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1733
1734                 if (ipip_ul_dev == ul_dev)
1735                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1736         }
1737 }
1738
1739 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1740                                      struct net_device *ol_dev,
1741                                      unsigned long event,
1742                                      struct netdev_notifier_info *info)
1743 {
1744         struct netdev_notifier_changeupper_info *chup;
1745         struct netlink_ext_ack *extack;
1746
1747         switch (event) {
1748         case NETDEV_REGISTER:
1749                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1750         case NETDEV_UNREGISTER:
1751                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1752                 return 0;
1753         case NETDEV_UP:
1754                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1755                 return 0;
1756         case NETDEV_DOWN:
1757                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1758                 return 0;
1759         case NETDEV_CHANGEUPPER:
1760                 chup = container_of(info, typeof(*chup), info);
1761                 extack = info->extack;
1762                 if (netif_is_l3_master(chup->upper_dev))
1763                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1764                                                                     ol_dev,
1765                                                                     extack);
1766                 return 0;
1767         case NETDEV_CHANGE:
1768                 extack = info->extack;
1769                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1770                                                                ol_dev, extack);
1771         case NETDEV_CHANGEMTU:
1772                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1773         }
1774         return 0;
1775 }
1776
1777 static int
1778 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1779                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1780                                    struct net_device *ul_dev,
1781                                    unsigned long event,
1782                                    struct netdev_notifier_info *info)
1783 {
1784         struct netdev_notifier_changeupper_info *chup;
1785         struct netlink_ext_ack *extack;
1786
1787         switch (event) {
1788         case NETDEV_CHANGEUPPER:
1789                 chup = container_of(info, typeof(*chup), info);
1790                 extack = info->extack;
1791                 if (netif_is_l3_master(chup->upper_dev))
1792                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1793                                                                     ipip_entry,
1794                                                                     ul_dev,
1795                                                                     extack);
1796                 break;
1797
1798         case NETDEV_UP:
1799                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1800                                                            ul_dev);
1801         case NETDEV_DOWN:
1802                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1803                                                              ipip_entry,
1804                                                              ul_dev);
1805         }
1806         return 0;
1807 }
1808
1809 int
1810 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1811                                  struct net_device *ul_dev,
1812                                  unsigned long event,
1813                                  struct netdev_notifier_info *info)
1814 {
1815         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1816         int err;
1817
1818         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1819                                                                 ul_dev,
1820                                                                 ipip_entry))) {
1821                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1822                                                          ul_dev, event, info);
1823                 if (err) {
1824                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1825                                                                  ul_dev);
1826                         return err;
1827                 }
1828         }
1829
1830         return 0;
1831 }
1832
1833 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1834                                       enum mlxsw_sp_l3proto ul_proto,
1835                                       const union mlxsw_sp_l3addr *ul_sip,
1836                                       u32 tunnel_index)
1837 {
1838         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1839         struct mlxsw_sp_fib_entry *fib_entry;
1840         int err;
1841
1842         /* It is valid to create a tunnel with a local IP and only later
1843          * assign this IP address to a local interface
1844          */
1845         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1846                                                          ul_proto, ul_sip,
1847                                                          type);
1848         if (!fib_entry)
1849                 return 0;
1850
1851         fib_entry->decap.tunnel_index = tunnel_index;
1852         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1853
1854         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1855         if (err)
1856                 goto err_fib_entry_update;
1857
1858         return 0;
1859
1860 err_fib_entry_update:
1861         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1862         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1863         return err;
1864 }
1865
1866 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1867                                       enum mlxsw_sp_l3proto ul_proto,
1868                                       const union mlxsw_sp_l3addr *ul_sip)
1869 {
1870         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1871         struct mlxsw_sp_fib_entry *fib_entry;
1872
1873         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1874                                                          ul_proto, ul_sip,
1875                                                          type);
1876         if (!fib_entry)
1877                 return;
1878
1879         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1880         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1881 }
1882
1883 struct mlxsw_sp_neigh_key {
1884         struct neighbour *n;
1885 };
1886
1887 struct mlxsw_sp_neigh_entry {
1888         struct list_head rif_list_node;
1889         struct rhash_head ht_node;
1890         struct mlxsw_sp_neigh_key key;
1891         u16 rif;
1892         bool connected;
1893         unsigned char ha[ETH_ALEN];
1894         struct list_head nexthop_list; /* list of nexthops using
1895                                         * this neigh entry
1896                                         */
1897         struct list_head nexthop_neighs_list_node;
1898         unsigned int counter_index;
1899         bool counter_valid;
1900 };
1901
1902 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1903         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1904         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1905         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1906 };
1907
1908 struct mlxsw_sp_neigh_entry *
1909 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1910                         struct mlxsw_sp_neigh_entry *neigh_entry)
1911 {
1912         if (!neigh_entry) {
1913                 if (list_empty(&rif->neigh_list))
1914                         return NULL;
1915                 else
1916                         return list_first_entry(&rif->neigh_list,
1917                                                 typeof(*neigh_entry),
1918                                                 rif_list_node);
1919         }
1920         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1921                 return NULL;
1922         return list_next_entry(neigh_entry, rif_list_node);
1923 }
1924
1925 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1926 {
1927         return neigh_entry->key.n->tbl->family;
1928 }
1929
1930 unsigned char *
1931 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1932 {
1933         return neigh_entry->ha;
1934 }
1935
1936 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1937 {
1938         struct neighbour *n;
1939
1940         n = neigh_entry->key.n;
1941         return ntohl(*((__be32 *) n->primary_key));
1942 }
1943
1944 struct in6_addr *
1945 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1946 {
1947         struct neighbour *n;
1948
1949         n = neigh_entry->key.n;
1950         return (struct in6_addr *) &n->primary_key;
1951 }
1952
1953 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1954                                struct mlxsw_sp_neigh_entry *neigh_entry,
1955                                u64 *p_counter)
1956 {
1957         if (!neigh_entry->counter_valid)
1958                 return -EINVAL;
1959
1960         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1961                                          p_counter, NULL);
1962 }
1963
1964 static struct mlxsw_sp_neigh_entry *
1965 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1966                            u16 rif)
1967 {
1968         struct mlxsw_sp_neigh_entry *neigh_entry;
1969
1970         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1971         if (!neigh_entry)
1972                 return NULL;
1973
1974         neigh_entry->key.n = n;
1975         neigh_entry->rif = rif;
1976         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1977
1978         return neigh_entry;
1979 }
1980
1981 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1982 {
1983         kfree(neigh_entry);
1984 }
1985
1986 static int
1987 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1988                             struct mlxsw_sp_neigh_entry *neigh_entry)
1989 {
1990         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1991                                       &neigh_entry->ht_node,
1992                                       mlxsw_sp_neigh_ht_params);
1993 }
1994
1995 static void
1996 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1997                             struct mlxsw_sp_neigh_entry *neigh_entry)
1998 {
1999         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2000                                &neigh_entry->ht_node,
2001                                mlxsw_sp_neigh_ht_params);
2002 }
2003
2004 static bool
2005 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2006                                     struct mlxsw_sp_neigh_entry *neigh_entry)
2007 {
2008         struct devlink *devlink;
2009         const char *table_name;
2010
2011         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2012         case AF_INET:
2013                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2014                 break;
2015         case AF_INET6:
2016                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2017                 break;
2018         default:
2019                 WARN_ON(1);
2020                 return false;
2021         }
2022
2023         devlink = priv_to_devlink(mlxsw_sp->core);
2024         return devlink_dpipe_table_counter_enabled(devlink, table_name);
2025 }
2026
2027 static void
2028 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2029                              struct mlxsw_sp_neigh_entry *neigh_entry)
2030 {
2031         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2032                 return;
2033
2034         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2035                 return;
2036
2037         neigh_entry->counter_valid = true;
2038 }
2039
2040 static void
2041 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2042                             struct mlxsw_sp_neigh_entry *neigh_entry)
2043 {
2044         if (!neigh_entry->counter_valid)
2045                 return;
2046         mlxsw_sp_flow_counter_free(mlxsw_sp,
2047                                    neigh_entry->counter_index);
2048         neigh_entry->counter_valid = false;
2049 }
2050
2051 static struct mlxsw_sp_neigh_entry *
2052 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2053 {
2054         struct mlxsw_sp_neigh_entry *neigh_entry;
2055         struct mlxsw_sp_rif *rif;
2056         int err;
2057
2058         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2059         if (!rif)
2060                 return ERR_PTR(-EINVAL);
2061
2062         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2063         if (!neigh_entry)
2064                 return ERR_PTR(-ENOMEM);
2065
2066         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2067         if (err)
2068                 goto err_neigh_entry_insert;
2069
2070         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2071         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2072
2073         return neigh_entry;
2074
2075 err_neigh_entry_insert:
2076         mlxsw_sp_neigh_entry_free(neigh_entry);
2077         return ERR_PTR(err);
2078 }
2079
2080 static void
2081 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2082                              struct mlxsw_sp_neigh_entry *neigh_entry)
2083 {
2084         list_del(&neigh_entry->rif_list_node);
2085         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2086         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2087         mlxsw_sp_neigh_entry_free(neigh_entry);
2088 }
2089
2090 static struct mlxsw_sp_neigh_entry *
2091 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2092 {
2093         struct mlxsw_sp_neigh_key key;
2094
2095         key.n = n;
2096         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2097                                       &key, mlxsw_sp_neigh_ht_params);
2098 }
2099
2100 static void
2101 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2102 {
2103         unsigned long interval;
2104
2105 #if IS_ENABLED(CONFIG_IPV6)
2106         interval = min_t(unsigned long,
2107                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2108                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2109 #else
2110         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2111 #endif
2112         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2113 }
2114
2115 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2116                                                    char *rauhtd_pl,
2117                                                    int ent_index)
2118 {
2119         struct net_device *dev;
2120         struct neighbour *n;
2121         __be32 dipn;
2122         u32 dip;
2123         u16 rif;
2124
2125         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2126
2127         if (!mlxsw_sp->router->rifs[rif]) {
2128                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2129                 return;
2130         }
2131
2132         dipn = htonl(dip);
2133         dev = mlxsw_sp->router->rifs[rif]->dev;
2134         n = neigh_lookup(&arp_tbl, &dipn, dev);
2135         if (!n)
2136                 return;
2137
2138         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2139         neigh_event_send(n, NULL);
2140         neigh_release(n);
2141 }
2142
2143 #if IS_ENABLED(CONFIG_IPV6)
2144 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2145                                                    char *rauhtd_pl,
2146                                                    int rec_index)
2147 {
2148         struct net_device *dev;
2149         struct neighbour *n;
2150         struct in6_addr dip;
2151         u16 rif;
2152
2153         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2154                                          (char *) &dip);
2155
2156         if (!mlxsw_sp->router->rifs[rif]) {
2157                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2158                 return;
2159         }
2160
2161         dev = mlxsw_sp->router->rifs[rif]->dev;
2162         n = neigh_lookup(&nd_tbl, &dip, dev);
2163         if (!n)
2164                 return;
2165
2166         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2167         neigh_event_send(n, NULL);
2168         neigh_release(n);
2169 }
2170 #else
2171 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2172                                                    char *rauhtd_pl,
2173                                                    int rec_index)
2174 {
2175 }
2176 #endif
2177
2178 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2179                                                    char *rauhtd_pl,
2180                                                    int rec_index)
2181 {
2182         u8 num_entries;
2183         int i;
2184
2185         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2186                                                                 rec_index);
2187         /* Hardware starts counting at 0, so add 1. */
2188         num_entries++;
2189
2190         /* Each record consists of several neighbour entries. */
2191         for (i = 0; i < num_entries; i++) {
2192                 int ent_index;
2193
2194                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2195                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2196                                                        ent_index);
2197         }
2198
2199 }
2200
2201 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2202                                                    char *rauhtd_pl,
2203                                                    int rec_index)
2204 {
2205         /* One record contains one entry. */
2206         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2207                                                rec_index);
2208 }
2209
2210 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2211                                               char *rauhtd_pl, int rec_index)
2212 {
2213         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2214         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2215                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2216                                                        rec_index);
2217                 break;
2218         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2219                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2220                                                        rec_index);
2221                 break;
2222         }
2223 }
2224
2225 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2226 {
2227         u8 num_rec, last_rec_index, num_entries;
2228
2229         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2230         last_rec_index = num_rec - 1;
2231
2232         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2233                 return false;
2234         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2235             MLXSW_REG_RAUHTD_TYPE_IPV6)
2236                 return true;
2237
2238         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2239                                                                 last_rec_index);
2240         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2241                 return true;
2242         return false;
2243 }
2244
2245 static int
2246 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2247                                        char *rauhtd_pl,
2248                                        enum mlxsw_reg_rauhtd_type type)
2249 {
2250         int i, num_rec;
2251         int err;
2252
2253         /* Make sure the neighbour's netdev isn't removed in the
2254          * process.
2255          */
2256         rtnl_lock();
2257         do {
2258                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2259                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2260                                       rauhtd_pl);
2261                 if (err) {
2262                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2263                         break;
2264                 }
2265                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2266                 for (i = 0; i < num_rec; i++)
2267                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2268                                                           i);
2269         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2270         rtnl_unlock();
2271
2272         return err;
2273 }
2274
2275 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2276 {
2277         enum mlxsw_reg_rauhtd_type type;
2278         char *rauhtd_pl;
2279         int err;
2280
2281         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2282         if (!rauhtd_pl)
2283                 return -ENOMEM;
2284
2285         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2286         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2287         if (err)
2288                 goto out;
2289
2290         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2291         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2292 out:
2293         kfree(rauhtd_pl);
2294         return err;
2295 }
2296
2297 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2298 {
2299         struct mlxsw_sp_neigh_entry *neigh_entry;
2300
2301         /* Take RTNL mutex here to prevent lists from changes */
2302         rtnl_lock();
2303         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2304                             nexthop_neighs_list_node)
2305                 /* If this neigh have nexthops, make the kernel think this neigh
2306                  * is active regardless of the traffic.
2307                  */
2308                 neigh_event_send(neigh_entry->key.n, NULL);
2309         rtnl_unlock();
2310 }
2311
2312 static void
2313 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2314 {
2315         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2316
2317         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2318                                msecs_to_jiffies(interval));
2319 }
2320
2321 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2322 {
2323         struct mlxsw_sp_router *router;
2324         int err;
2325
2326         router = container_of(work, struct mlxsw_sp_router,
2327                               neighs_update.dw.work);
2328         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2329         if (err)
2330                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2331
2332         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2333
2334         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2335 }
2336
2337 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2338 {
2339         struct mlxsw_sp_neigh_entry *neigh_entry;
2340         struct mlxsw_sp_router *router;
2341
2342         router = container_of(work, struct mlxsw_sp_router,
2343                               nexthop_probe_dw.work);
2344         /* Iterate over nexthop neighbours, find those who are unresolved and
2345          * send arp on them. This solves the chicken-egg problem when
2346          * the nexthop wouldn't get offloaded until the neighbor is resolved
2347          * but it wouldn't get resolved ever in case traffic is flowing in HW
2348          * using different nexthop.
2349          *
2350          * Take RTNL mutex here to prevent lists from changes.
2351          */
2352         rtnl_lock();
2353         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2354                             nexthop_neighs_list_node)
2355                 if (!neigh_entry->connected)
2356                         neigh_event_send(neigh_entry->key.n, NULL);
2357         rtnl_unlock();
2358
2359         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2360                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2361 }
2362
2363 static void
2364 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2365                               struct mlxsw_sp_neigh_entry *neigh_entry,
2366                               bool removing);
2367
2368 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2369 {
2370         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2371                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2372 }
2373
2374 static void
2375 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2376                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2377                                 enum mlxsw_reg_rauht_op op)
2378 {
2379         struct neighbour *n = neigh_entry->key.n;
2380         u32 dip = ntohl(*((__be32 *) n->primary_key));
2381         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2382
2383         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2384                               dip);
2385         if (neigh_entry->counter_valid)
2386                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2387                                              neigh_entry->counter_index);
2388         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2389 }
2390
2391 static void
2392 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2393                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2394                                 enum mlxsw_reg_rauht_op op)
2395 {
2396         struct neighbour *n = neigh_entry->key.n;
2397         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2398         const char *dip = n->primary_key;
2399
2400         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2401                               dip);
2402         if (neigh_entry->counter_valid)
2403                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2404                                              neigh_entry->counter_index);
2405         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2406 }
2407
2408 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2409 {
2410         struct neighbour *n = neigh_entry->key.n;
2411
2412         /* Packets with a link-local destination address are trapped
2413          * after LPM lookup and never reach the neighbour table, so
2414          * there is no need to program such neighbours to the device.
2415          */
2416         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2417             IPV6_ADDR_LINKLOCAL)
2418                 return true;
2419         return false;
2420 }
2421
2422 static void
2423 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2424                             struct mlxsw_sp_neigh_entry *neigh_entry,
2425                             bool adding)
2426 {
2427         if (!adding && !neigh_entry->connected)
2428                 return;
2429         neigh_entry->connected = adding;
2430         if (neigh_entry->key.n->tbl->family == AF_INET) {
2431                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2432                                                 mlxsw_sp_rauht_op(adding));
2433         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2434                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2435                         return;
2436                 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2437                                                 mlxsw_sp_rauht_op(adding));
2438         } else {
2439                 WARN_ON_ONCE(1);
2440         }
2441 }
2442
2443 void
2444 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2445                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2446                                     bool adding)
2447 {
2448         if (adding)
2449                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2450         else
2451                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2452         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2453 }
2454
2455 struct mlxsw_sp_netevent_work {
2456         struct work_struct work;
2457         struct mlxsw_sp *mlxsw_sp;
2458         struct neighbour *n;
2459 };
2460
2461 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2462 {
2463         struct mlxsw_sp_netevent_work *net_work =
2464                 container_of(work, struct mlxsw_sp_netevent_work, work);
2465         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2466         struct mlxsw_sp_neigh_entry *neigh_entry;
2467         struct neighbour *n = net_work->n;
2468         unsigned char ha[ETH_ALEN];
2469         bool entry_connected;
2470         u8 nud_state, dead;
2471
2472         /* If these parameters are changed after we release the lock,
2473          * then we are guaranteed to receive another event letting us
2474          * know about it.
2475          */
2476         read_lock_bh(&n->lock);
2477         memcpy(ha, n->ha, ETH_ALEN);
2478         nud_state = n->nud_state;
2479         dead = n->dead;
2480         read_unlock_bh(&n->lock);
2481
2482         rtnl_lock();
2483         mlxsw_sp_span_respin(mlxsw_sp);
2484
2485         entry_connected = nud_state & NUD_VALID && !dead;
2486         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2487         if (!entry_connected && !neigh_entry)
2488                 goto out;
2489         if (!neigh_entry) {
2490                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2491                 if (IS_ERR(neigh_entry))
2492                         goto out;
2493         }
2494
2495         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2496         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2497         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2498
2499         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2500                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2501
2502 out:
2503         rtnl_unlock();
2504         neigh_release(n);
2505         kfree(net_work);
2506 }
2507
2508 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2509
2510 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2511 {
2512         struct mlxsw_sp_netevent_work *net_work =
2513                 container_of(work, struct mlxsw_sp_netevent_work, work);
2514         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2515
2516         mlxsw_sp_mp_hash_init(mlxsw_sp);
2517         kfree(net_work);
2518 }
2519
2520 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2521
2522 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2523 {
2524         struct mlxsw_sp_netevent_work *net_work =
2525                 container_of(work, struct mlxsw_sp_netevent_work, work);
2526         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2527
2528         __mlxsw_sp_router_init(mlxsw_sp);
2529         kfree(net_work);
2530 }
2531
2532 static int mlxsw_sp_router_schedule_work(struct net *net,
2533                                          struct notifier_block *nb,
2534                                          void (*cb)(struct work_struct *))
2535 {
2536         struct mlxsw_sp_netevent_work *net_work;
2537         struct mlxsw_sp_router *router;
2538
2539         if (!net_eq(net, &init_net))
2540                 return NOTIFY_DONE;
2541
2542         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2543         if (!net_work)
2544                 return NOTIFY_BAD;
2545
2546         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2547         INIT_WORK(&net_work->work, cb);
2548         net_work->mlxsw_sp = router->mlxsw_sp;
2549         mlxsw_core_schedule_work(&net_work->work);
2550         return NOTIFY_DONE;
2551 }
2552
2553 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2554                                           unsigned long event, void *ptr)
2555 {
2556         struct mlxsw_sp_netevent_work *net_work;
2557         struct mlxsw_sp_port *mlxsw_sp_port;
2558         struct mlxsw_sp *mlxsw_sp;
2559         unsigned long interval;
2560         struct neigh_parms *p;
2561         struct neighbour *n;
2562
2563         switch (event) {
2564         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2565                 p = ptr;
2566
2567                 /* We don't care about changes in the default table. */
2568                 if (!p->dev || (p->tbl->family != AF_INET &&
2569                                 p->tbl->family != AF_INET6))
2570                         return NOTIFY_DONE;
2571
2572                 /* We are in atomic context and can't take RTNL mutex,
2573                  * so use RCU variant to walk the device chain.
2574                  */
2575                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2576                 if (!mlxsw_sp_port)
2577                         return NOTIFY_DONE;
2578
2579                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2580                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2581                 mlxsw_sp->router->neighs_update.interval = interval;
2582
2583                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2584                 break;
2585         case NETEVENT_NEIGH_UPDATE:
2586                 n = ptr;
2587
2588                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2589                         return NOTIFY_DONE;
2590
2591                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2592                 if (!mlxsw_sp_port)
2593                         return NOTIFY_DONE;
2594
2595                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2596                 if (!net_work) {
2597                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2598                         return NOTIFY_BAD;
2599                 }
2600
2601                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2602                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2603                 net_work->n = n;
2604
2605                 /* Take a reference to ensure the neighbour won't be
2606                  * destructed until we drop the reference in delayed
2607                  * work.
2608                  */
2609                 neigh_clone(n);
2610                 mlxsw_core_schedule_work(&net_work->work);
2611                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2612                 break;
2613         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2614         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2615                 return mlxsw_sp_router_schedule_work(ptr, nb,
2616                                 mlxsw_sp_router_mp_hash_event_work);
2617
2618         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2619                 return mlxsw_sp_router_schedule_work(ptr, nb,
2620                                 mlxsw_sp_router_update_priority_work);
2621         }
2622
2623         return NOTIFY_DONE;
2624 }
2625
2626 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2627 {
2628         int err;
2629
2630         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2631                               &mlxsw_sp_neigh_ht_params);
2632         if (err)
2633                 return err;
2634
2635         /* Initialize the polling interval according to the default
2636          * table.
2637          */
2638         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2639
2640         /* Create the delayed works for the activity_update */
2641         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2642                           mlxsw_sp_router_neighs_update_work);
2643         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2644                           mlxsw_sp_router_probe_unresolved_nexthops);
2645         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2646         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2647         return 0;
2648 }
2649
2650 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2651 {
2652         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2653         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2654         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2655 }
2656
2657 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2658                                          struct mlxsw_sp_rif *rif)
2659 {
2660         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2661
2662         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2663                                  rif_list_node) {
2664                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2665                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2666         }
2667 }
2668
2669 enum mlxsw_sp_nexthop_type {
2670         MLXSW_SP_NEXTHOP_TYPE_ETH,
2671         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2672 };
2673
2674 struct mlxsw_sp_nexthop_key {
2675         struct fib_nh *fib_nh;
2676 };
2677
2678 struct mlxsw_sp_nexthop {
2679         struct list_head neigh_list_node; /* member of neigh entry list */
2680         struct list_head rif_list_node;
2681         struct list_head router_list_node;
2682         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2683                                                 * this belongs to
2684                                                 */
2685         struct rhash_head ht_node;
2686         struct mlxsw_sp_nexthop_key key;
2687         unsigned char gw_addr[sizeof(struct in6_addr)];
2688         int ifindex;
2689         int nh_weight;
2690         int norm_nh_weight;
2691         int num_adj_entries;
2692         struct mlxsw_sp_rif *rif;
2693         u8 should_offload:1, /* set indicates this neigh is connected and
2694                               * should be put to KVD linear area of this group.
2695                               */
2696            offloaded:1, /* set in case the neigh is actually put into
2697                          * KVD linear area of this group.
2698                          */
2699            update:1; /* set indicates that MAC of this neigh should be
2700                       * updated in HW
2701                       */
2702         enum mlxsw_sp_nexthop_type type;
2703         union {
2704                 struct mlxsw_sp_neigh_entry *neigh_entry;
2705                 struct mlxsw_sp_ipip_entry *ipip_entry;
2706         };
2707         unsigned int counter_index;
2708         bool counter_valid;
2709 };
2710
2711 struct mlxsw_sp_nexthop_group {
2712         void *priv;
2713         struct rhash_head ht_node;
2714         struct list_head fib_list; /* list of fib entries that use this group */
2715         struct neigh_table *neigh_tbl;
2716         u8 adj_index_valid:1,
2717            gateway:1; /* routes using the group use a gateway */
2718         u32 adj_index;
2719         u16 ecmp_size;
2720         u16 count;
2721         int sum_norm_weight;
2722         struct mlxsw_sp_nexthop nexthops[0];
2723 #define nh_rif  nexthops[0].rif
2724 };
2725
2726 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2727                                     struct mlxsw_sp_nexthop *nh)
2728 {
2729         struct devlink *devlink;
2730
2731         devlink = priv_to_devlink(mlxsw_sp->core);
2732         if (!devlink_dpipe_table_counter_enabled(devlink,
2733                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2734                 return;
2735
2736         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2737                 return;
2738
2739         nh->counter_valid = true;
2740 }
2741
2742 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2743                                    struct mlxsw_sp_nexthop *nh)
2744 {
2745         if (!nh->counter_valid)
2746                 return;
2747         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2748         nh->counter_valid = false;
2749 }
2750
2751 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2752                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2753 {
2754         if (!nh->counter_valid)
2755                 return -EINVAL;
2756
2757         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2758                                          p_counter, NULL);
2759 }
2760
2761 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2762                                                struct mlxsw_sp_nexthop *nh)
2763 {
2764         if (!nh) {
2765                 if (list_empty(&router->nexthop_list))
2766                         return NULL;
2767                 else
2768                         return list_first_entry(&router->nexthop_list,
2769                                                 typeof(*nh), router_list_node);
2770         }
2771         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2772                 return NULL;
2773         return list_next_entry(nh, router_list_node);
2774 }
2775
2776 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2777 {
2778         return nh->offloaded;
2779 }
2780
2781 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2782 {
2783         if (!nh->offloaded)
2784                 return NULL;
2785         return nh->neigh_entry->ha;
2786 }
2787
2788 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2789                              u32 *p_adj_size, u32 *p_adj_hash_index)
2790 {
2791         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2792         u32 adj_hash_index = 0;
2793         int i;
2794
2795         if (!nh->offloaded || !nh_grp->adj_index_valid)
2796                 return -EINVAL;
2797
2798         *p_adj_index = nh_grp->adj_index;
2799         *p_adj_size = nh_grp->ecmp_size;
2800
2801         for (i = 0; i < nh_grp->count; i++) {
2802                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2803
2804                 if (nh_iter == nh)
2805                         break;
2806                 if (nh_iter->offloaded)
2807                         adj_hash_index += nh_iter->num_adj_entries;
2808         }
2809
2810         *p_adj_hash_index = adj_hash_index;
2811         return 0;
2812 }
2813
2814 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2815 {
2816         return nh->rif;
2817 }
2818
2819 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2820 {
2821         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2822         int i;
2823
2824         for (i = 0; i < nh_grp->count; i++) {
2825                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2826
2827                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2828                         return true;
2829         }
2830         return false;
2831 }
2832
2833 static struct fib_info *
2834 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2835 {
2836         return nh_grp->priv;
2837 }
2838
2839 struct mlxsw_sp_nexthop_group_cmp_arg {
2840         enum mlxsw_sp_l3proto proto;
2841         union {
2842                 struct fib_info *fi;
2843                 struct mlxsw_sp_fib6_entry *fib6_entry;
2844         };
2845 };
2846
2847 static bool
2848 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2849                                     const struct in6_addr *gw, int ifindex,
2850                                     int weight)
2851 {
2852         int i;
2853
2854         for (i = 0; i < nh_grp->count; i++) {
2855                 const struct mlxsw_sp_nexthop *nh;
2856
2857                 nh = &nh_grp->nexthops[i];
2858                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2859                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2860                         return true;
2861         }
2862
2863         return false;
2864 }
2865
2866 static bool
2867 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2868                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2869 {
2870         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2871
2872         if (nh_grp->count != fib6_entry->nrt6)
2873                 return false;
2874
2875         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2876                 struct in6_addr *gw;
2877                 int ifindex, weight;
2878
2879                 ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2880                 weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2881                 gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2882                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2883                                                          weight))
2884                         return false;
2885         }
2886
2887         return true;
2888 }
2889
2890 static int
2891 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2892 {
2893         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2894         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2895
2896         switch (cmp_arg->proto) {
2897         case MLXSW_SP_L3_PROTO_IPV4:
2898                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2899         case MLXSW_SP_L3_PROTO_IPV6:
2900                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2901                                                     cmp_arg->fib6_entry);
2902         default:
2903                 WARN_ON(1);
2904                 return 1;
2905         }
2906 }
2907
2908 static int
2909 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2910 {
2911         return nh_grp->neigh_tbl->family;
2912 }
2913
2914 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2915 {
2916         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2917         const struct mlxsw_sp_nexthop *nh;
2918         struct fib_info *fi;
2919         unsigned int val;
2920         int i;
2921
2922         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2923         case AF_INET:
2924                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2925                 return jhash(&fi, sizeof(fi), seed);
2926         case AF_INET6:
2927                 val = nh_grp->count;
2928                 for (i = 0; i < nh_grp->count; i++) {
2929                         nh = &nh_grp->nexthops[i];
2930                         val ^= nh->ifindex;
2931                 }
2932                 return jhash(&val, sizeof(val), seed);
2933         default:
2934                 WARN_ON(1);
2935                 return 0;
2936         }
2937 }
2938
2939 static u32
2940 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2941 {
2942         unsigned int val = fib6_entry->nrt6;
2943         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2944         struct net_device *dev;
2945
2946         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2947                 dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2948                 val ^= dev->ifindex;
2949         }
2950
2951         return jhash(&val, sizeof(val), seed);
2952 }
2953
2954 static u32
2955 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2956 {
2957         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2958
2959         switch (cmp_arg->proto) {
2960         case MLXSW_SP_L3_PROTO_IPV4:
2961                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2962         case MLXSW_SP_L3_PROTO_IPV6:
2963                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2964         default:
2965                 WARN_ON(1);
2966                 return 0;
2967         }
2968 }
2969
2970 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2971         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2972         .hashfn      = mlxsw_sp_nexthop_group_hash,
2973         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2974         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2975 };
2976
2977 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2978                                          struct mlxsw_sp_nexthop_group *nh_grp)
2979 {
2980         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2981             !nh_grp->gateway)
2982                 return 0;
2983
2984         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2985                                       &nh_grp->ht_node,
2986                                       mlxsw_sp_nexthop_group_ht_params);
2987 }
2988
2989 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2990                                           struct mlxsw_sp_nexthop_group *nh_grp)
2991 {
2992         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2993             !nh_grp->gateway)
2994                 return;
2995
2996         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2997                                &nh_grp->ht_node,
2998                                mlxsw_sp_nexthop_group_ht_params);
2999 }
3000
3001 static struct mlxsw_sp_nexthop_group *
3002 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3003                                struct fib_info *fi)
3004 {
3005         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3006
3007         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3008         cmp_arg.fi = fi;
3009         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3010                                       &cmp_arg,
3011                                       mlxsw_sp_nexthop_group_ht_params);
3012 }
3013
3014 static struct mlxsw_sp_nexthop_group *
3015 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3016                                struct mlxsw_sp_fib6_entry *fib6_entry)
3017 {
3018         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3019
3020         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3021         cmp_arg.fib6_entry = fib6_entry;
3022         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3023                                       &cmp_arg,
3024                                       mlxsw_sp_nexthop_group_ht_params);
3025 }
3026
3027 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3028         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3029         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3030         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3031 };
3032
3033 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3034                                    struct mlxsw_sp_nexthop *nh)
3035 {
3036         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3037                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3038 }
3039
3040 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3041                                     struct mlxsw_sp_nexthop *nh)
3042 {
3043         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3044                                mlxsw_sp_nexthop_ht_params);
3045 }
3046
3047 static struct mlxsw_sp_nexthop *
3048 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3049                         struct mlxsw_sp_nexthop_key key)
3050 {
3051         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3052                                       mlxsw_sp_nexthop_ht_params);
3053 }
3054
3055 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3056                                              const struct mlxsw_sp_fib *fib,
3057                                              u32 adj_index, u16 ecmp_size,
3058                                              u32 new_adj_index,
3059                                              u16 new_ecmp_size)
3060 {
3061         char raleu_pl[MLXSW_REG_RALEU_LEN];
3062
3063         mlxsw_reg_raleu_pack(raleu_pl,
3064                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
3065                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
3066                              new_ecmp_size);
3067         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3068 }
3069
3070 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3071                                           struct mlxsw_sp_nexthop_group *nh_grp,
3072                                           u32 old_adj_index, u16 old_ecmp_size)
3073 {
3074         struct mlxsw_sp_fib_entry *fib_entry;
3075         struct mlxsw_sp_fib *fib = NULL;
3076         int err;
3077
3078         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3079                 if (fib == fib_entry->fib_node->fib)
3080                         continue;
3081                 fib = fib_entry->fib_node->fib;
3082                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3083                                                         old_adj_index,
3084                                                         old_ecmp_size,
3085                                                         nh_grp->adj_index,
3086                                                         nh_grp->ecmp_size);
3087                 if (err)
3088                         return err;
3089         }
3090         return 0;
3091 }
3092
3093 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3094                                      struct mlxsw_sp_nexthop *nh)
3095 {
3096         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3097         char ratr_pl[MLXSW_REG_RATR_LEN];
3098
3099         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3100                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
3101                             adj_index, neigh_entry->rif);
3102         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3103         if (nh->counter_valid)
3104                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3105         else
3106                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3107
3108         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3109 }
3110
3111 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3112                             struct mlxsw_sp_nexthop *nh)
3113 {
3114         int i;
3115
3116         for (i = 0; i < nh->num_adj_entries; i++) {
3117                 int err;
3118
3119                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3120                 if (err)
3121                         return err;
3122         }
3123
3124         return 0;
3125 }
3126
3127 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3128                                           u32 adj_index,
3129                                           struct mlxsw_sp_nexthop *nh)
3130 {
3131         const struct mlxsw_sp_ipip_ops *ipip_ops;
3132
3133         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3134         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3135 }
3136
3137 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3138                                         u32 adj_index,
3139                                         struct mlxsw_sp_nexthop *nh)
3140 {
3141         int i;
3142
3143         for (i = 0; i < nh->num_adj_entries; i++) {
3144                 int err;
3145
3146                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3147                                                      nh);
3148                 if (err)
3149                         return err;
3150         }
3151
3152         return 0;
3153 }
3154
3155 static int
3156 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3157                               struct mlxsw_sp_nexthop_group *nh_grp,
3158                               bool reallocate)
3159 {
3160         u32 adj_index = nh_grp->adj_index; /* base */
3161         struct mlxsw_sp_nexthop *nh;
3162         int i;
3163         int err;
3164
3165         for (i = 0; i < nh_grp->count; i++) {
3166                 nh = &nh_grp->nexthops[i];
3167
3168                 if (!nh->should_offload) {
3169                         nh->offloaded = 0;
3170                         continue;
3171                 }
3172
3173                 if (nh->update || reallocate) {
3174                         switch (nh->type) {
3175                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3176                                 err = mlxsw_sp_nexthop_update
3177                                             (mlxsw_sp, adj_index, nh);
3178                                 break;
3179                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3180                                 err = mlxsw_sp_nexthop_ipip_update
3181                                             (mlxsw_sp, adj_index, nh);
3182                                 break;
3183                         }
3184                         if (err)
3185                                 return err;
3186                         nh->update = 0;
3187                         nh->offloaded = 1;
3188                 }
3189                 adj_index += nh->num_adj_entries;
3190         }
3191         return 0;
3192 }
3193
3194 static bool
3195 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3196                                  const struct mlxsw_sp_fib_entry *fib_entry);
3197
3198 static int
3199 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3200                                     struct mlxsw_sp_nexthop_group *nh_grp)
3201 {
3202         struct mlxsw_sp_fib_entry *fib_entry;
3203         int err;
3204
3205         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3206                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3207                                                       fib_entry))
3208                         continue;
3209                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3210                 if (err)
3211                         return err;
3212         }
3213         return 0;
3214 }
3215
3216 static void
3217 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3218                                    enum mlxsw_reg_ralue_op op, int err);
3219
3220 static void
3221 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3222 {
3223         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3224         struct mlxsw_sp_fib_entry *fib_entry;
3225
3226         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3227                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3228                                                       fib_entry))
3229                         continue;
3230                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3231         }
3232 }
3233
3234 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3235 {
3236         /* Valid sizes for an adjacency group are:
3237          * 1-64, 512, 1024, 2048 and 4096.
3238          */
3239         if (*p_adj_grp_size <= 64)
3240                 return;
3241         else if (*p_adj_grp_size <= 512)
3242                 *p_adj_grp_size = 512;
3243         else if (*p_adj_grp_size <= 1024)
3244                 *p_adj_grp_size = 1024;
3245         else if (*p_adj_grp_size <= 2048)
3246                 *p_adj_grp_size = 2048;
3247         else
3248                 *p_adj_grp_size = 4096;
3249 }
3250
3251 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3252                                              unsigned int alloc_size)
3253 {
3254         if (alloc_size >= 4096)
3255                 *p_adj_grp_size = 4096;
3256         else if (alloc_size >= 2048)
3257                 *p_adj_grp_size = 2048;
3258         else if (alloc_size >= 1024)
3259                 *p_adj_grp_size = 1024;
3260         else if (alloc_size >= 512)
3261                 *p_adj_grp_size = 512;
3262 }
3263
3264 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3265                                      u16 *p_adj_grp_size)
3266 {
3267         unsigned int alloc_size;
3268         int err;
3269
3270         /* Round up the requested group size to the next size supported
3271          * by the device and make sure the request can be satisfied.
3272          */
3273         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3274         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3275                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3276                                               *p_adj_grp_size, &alloc_size);
3277         if (err)
3278                 return err;
3279         /* It is possible the allocation results in more allocated
3280          * entries than requested. Try to use as much of them as
3281          * possible.
3282          */
3283         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3284
3285         return 0;
3286 }
3287
3288 static void
3289 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3290 {
3291         int i, g = 0, sum_norm_weight = 0;
3292         struct mlxsw_sp_nexthop *nh;
3293
3294         for (i = 0; i < nh_grp->count; i++) {
3295                 nh = &nh_grp->nexthops[i];
3296
3297                 if (!nh->should_offload)
3298                         continue;
3299                 if (g > 0)
3300                         g = gcd(nh->nh_weight, g);
3301                 else
3302                         g = nh->nh_weight;
3303         }
3304
3305         for (i = 0; i < nh_grp->count; i++) {
3306                 nh = &nh_grp->nexthops[i];
3307
3308                 if (!nh->should_offload)
3309                         continue;
3310                 nh->norm_nh_weight = nh->nh_weight / g;
3311                 sum_norm_weight += nh->norm_nh_weight;
3312         }
3313
3314         nh_grp->sum_norm_weight = sum_norm_weight;
3315 }
3316
3317 static void
3318 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3319 {
3320         int total = nh_grp->sum_norm_weight;
3321         u16 ecmp_size = nh_grp->ecmp_size;
3322         int i, weight = 0, lower_bound = 0;
3323
3324         for (i = 0; i < nh_grp->count; i++) {
3325                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3326                 int upper_bound;
3327
3328                 if (!nh->should_offload)
3329                         continue;
3330                 weight += nh->norm_nh_weight;
3331                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3332                 nh->num_adj_entries = upper_bound - lower_bound;
3333                 lower_bound = upper_bound;
3334         }
3335 }
3336
3337 static void
3338 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3339                                struct mlxsw_sp_nexthop_group *nh_grp)
3340 {
3341         u16 ecmp_size, old_ecmp_size;
3342         struct mlxsw_sp_nexthop *nh;
3343         bool offload_change = false;
3344         u32 adj_index;
3345         bool old_adj_index_valid;
3346         u32 old_adj_index;
3347         int i;
3348         int err;
3349
3350         if (!nh_grp->gateway) {
3351                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3352                 return;
3353         }
3354
3355         for (i = 0; i < nh_grp->count; i++) {
3356                 nh = &nh_grp->nexthops[i];
3357
3358                 if (nh->should_offload != nh->offloaded) {
3359                         offload_change = true;
3360                         if (nh->should_offload)
3361                                 nh->update = 1;
3362                 }
3363         }
3364         if (!offload_change) {
3365                 /* Nothing was added or removed, so no need to reallocate. Just
3366                  * update MAC on existing adjacency indexes.
3367                  */
3368                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3369                 if (err) {
3370                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3371                         goto set_trap;
3372                 }
3373                 return;
3374         }
3375         mlxsw_sp_nexthop_group_normalize(nh_grp);
3376         if (!nh_grp->sum_norm_weight)
3377                 /* No neigh of this group is connected so we just set
3378                  * the trap and let everthing flow through kernel.
3379                  */
3380                 goto set_trap;
3381
3382         ecmp_size = nh_grp->sum_norm_weight;
3383         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3384         if (err)
3385                 /* No valid allocation size available. */
3386                 goto set_trap;
3387
3388         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3389                                   ecmp_size, &adj_index);
3390         if (err) {
3391                 /* We ran out of KVD linear space, just set the
3392                  * trap and let everything flow through kernel.
3393                  */
3394                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3395                 goto set_trap;
3396         }
3397         old_adj_index_valid = nh_grp->adj_index_valid;
3398         old_adj_index = nh_grp->adj_index;
3399         old_ecmp_size = nh_grp->ecmp_size;
3400         nh_grp->adj_index_valid = 1;
3401         nh_grp->adj_index = adj_index;
3402         nh_grp->ecmp_size = ecmp_size;
3403         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3404         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3405         if (err) {
3406                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3407                 goto set_trap;
3408         }
3409
3410         if (!old_adj_index_valid) {
3411                 /* The trap was set for fib entries, so we have to call
3412                  * fib entry update to unset it and use adjacency index.
3413                  */
3414                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3415                 if (err) {
3416                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3417                         goto set_trap;
3418                 }
3419                 return;
3420         }
3421
3422         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3423                                              old_adj_index, old_ecmp_size);
3424         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3425                            old_ecmp_size, old_adj_index);
3426         if (err) {
3427                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3428                 goto set_trap;
3429         }
3430
3431         /* Offload state within the group changed, so update the flags. */
3432         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3433
3434         return;
3435
3436 set_trap:
3437         old_adj_index_valid = nh_grp->adj_index_valid;
3438         nh_grp->adj_index_valid = 0;
3439         for (i = 0; i < nh_grp->count; i++) {
3440                 nh = &nh_grp->nexthops[i];
3441                 nh->offloaded = 0;
3442         }
3443         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3444         if (err)
3445                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3446         if (old_adj_index_valid)
3447                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3448                                    nh_grp->ecmp_size, nh_grp->adj_index);
3449 }
3450
3451 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3452                                             bool removing)
3453 {
3454         if (!removing)
3455                 nh->should_offload = 1;
3456         else
3457                 nh->should_offload = 0;
3458         nh->update = 1;
3459 }
3460
3461 static void
3462 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3463                               struct mlxsw_sp_neigh_entry *neigh_entry,
3464                               bool removing)
3465 {
3466         struct mlxsw_sp_nexthop *nh;
3467
3468         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3469                             neigh_list_node) {
3470                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3471                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3472         }
3473 }
3474
3475 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3476                                       struct mlxsw_sp_rif *rif)
3477 {
3478         if (nh->rif)
3479                 return;
3480
3481         nh->rif = rif;
3482         list_add(&nh->rif_list_node, &rif->nexthop_list);
3483 }
3484
3485 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3486 {
3487         if (!nh->rif)
3488                 return;
3489
3490         list_del(&nh->rif_list_node);
3491         nh->rif = NULL;
3492 }
3493
3494 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3495                                        struct mlxsw_sp_nexthop *nh)
3496 {
3497         struct mlxsw_sp_neigh_entry *neigh_entry;
3498         struct neighbour *n;
3499         u8 nud_state, dead;
3500         int err;
3501
3502         if (!nh->nh_grp->gateway || nh->neigh_entry)
3503                 return 0;
3504
3505         /* Take a reference of neigh here ensuring that neigh would
3506          * not be destructed before the nexthop entry is finished.
3507          * The reference is taken either in neigh_lookup() or
3508          * in neigh_create() in case n is not found.
3509          */
3510         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3511         if (!n) {
3512                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3513                                  nh->rif->dev);
3514                 if (IS_ERR(n))
3515                         return PTR_ERR(n);
3516                 neigh_event_send(n, NULL);
3517         }
3518         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3519         if (!neigh_entry) {
3520                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3521                 if (IS_ERR(neigh_entry)) {
3522                         err = -EINVAL;
3523                         goto err_neigh_entry_create;
3524                 }
3525         }
3526
3527         /* If that is the first nexthop connected to that neigh, add to
3528          * nexthop_neighs_list
3529          */
3530         if (list_empty(&neigh_entry->nexthop_list))
3531                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3532                               &mlxsw_sp->router->nexthop_neighs_list);
3533
3534         nh->neigh_entry = neigh_entry;
3535         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3536         read_lock_bh(&n->lock);
3537         nud_state = n->nud_state;
3538         dead = n->dead;
3539         read_unlock_bh(&n->lock);
3540         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3541
3542         return 0;
3543
3544 err_neigh_entry_create:
3545         neigh_release(n);
3546         return err;
3547 }
3548
3549 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3550                                         struct mlxsw_sp_nexthop *nh)
3551 {
3552         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3553         struct neighbour *n;
3554
3555         if (!neigh_entry)
3556                 return;
3557         n = neigh_entry->key.n;
3558
3559         __mlxsw_sp_nexthop_neigh_update(nh, true);
3560         list_del(&nh->neigh_list_node);
3561         nh->neigh_entry = NULL;
3562
3563         /* If that is the last nexthop connected to that neigh, remove from
3564          * nexthop_neighs_list
3565          */
3566         if (list_empty(&neigh_entry->nexthop_list))
3567                 list_del(&neigh_entry->nexthop_neighs_list_node);
3568
3569         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3570                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3571
3572         neigh_release(n);
3573 }
3574
3575 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3576 {
3577         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3578
3579         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3580 }
3581
3582 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3583                                        struct mlxsw_sp_nexthop *nh,
3584                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3585 {
3586         bool removing;
3587
3588         if (!nh->nh_grp->gateway || nh->ipip_entry)
3589                 return;
3590
3591         nh->ipip_entry = ipip_entry;
3592         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3593         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3594         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3595 }
3596
3597 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3598                                        struct mlxsw_sp_nexthop *nh)
3599 {
3600         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3601
3602         if (!ipip_entry)
3603                 return;
3604
3605         __mlxsw_sp_nexthop_neigh_update(nh, true);
3606         nh->ipip_entry = NULL;
3607 }
3608
3609 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3610                                         const struct fib_nh *fib_nh,
3611                                         enum mlxsw_sp_ipip_type *p_ipipt)
3612 {
3613         struct net_device *dev = fib_nh->nh_dev;
3614
3615         return dev &&
3616                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3617                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3618 }
3619
3620 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3621                                        struct mlxsw_sp_nexthop *nh)
3622 {
3623         switch (nh->type) {
3624         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3625                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3626                 mlxsw_sp_nexthop_rif_fini(nh);
3627                 break;
3628         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3629                 mlxsw_sp_nexthop_rif_fini(nh);
3630                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3631                 break;
3632         }
3633 }
3634
3635 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3636                                        struct mlxsw_sp_nexthop *nh,
3637                                        struct fib_nh *fib_nh)
3638 {
3639         const struct mlxsw_sp_ipip_ops *ipip_ops;
3640         struct net_device *dev = fib_nh->nh_dev;
3641         struct mlxsw_sp_ipip_entry *ipip_entry;
3642         struct mlxsw_sp_rif *rif;
3643         int err;
3644
3645         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3646         if (ipip_entry) {
3647                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3648                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3649                                           MLXSW_SP_L3_PROTO_IPV4)) {
3650                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3651                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3652                         return 0;
3653                 }
3654         }
3655
3656         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3657         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3658         if (!rif)
3659                 return 0;
3660
3661         mlxsw_sp_nexthop_rif_init(nh, rif);
3662         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3663         if (err)
3664                 goto err_neigh_init;
3665
3666         return 0;
3667
3668 err_neigh_init:
3669         mlxsw_sp_nexthop_rif_fini(nh);
3670         return err;
3671 }
3672
3673 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3674                                         struct mlxsw_sp_nexthop *nh)
3675 {
3676         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3677 }
3678
3679 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3680                                   struct mlxsw_sp_nexthop_group *nh_grp,
3681                                   struct mlxsw_sp_nexthop *nh,
3682                                   struct fib_nh *fib_nh)
3683 {
3684         struct net_device *dev = fib_nh->nh_dev;
3685         struct in_device *in_dev;
3686         int err;
3687
3688         nh->nh_grp = nh_grp;
3689         nh->key.fib_nh = fib_nh;
3690 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3691         nh->nh_weight = fib_nh->nh_weight;
3692 #else
3693         nh->nh_weight = 1;
3694 #endif
3695         memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3696         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3697         if (err)
3698                 return err;
3699
3700         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3701         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3702
3703         if (!dev)
3704                 return 0;
3705
3706         in_dev = __in_dev_get_rtnl(dev);
3707         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3708             fib_nh->nh_flags & RTNH_F_LINKDOWN)
3709                 return 0;
3710
3711         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3712         if (err)
3713                 goto err_nexthop_neigh_init;
3714
3715         return 0;
3716
3717 err_nexthop_neigh_init:
3718         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3719         return err;
3720 }
3721
3722 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3723                                    struct mlxsw_sp_nexthop *nh)
3724 {
3725         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3726         list_del(&nh->router_list_node);
3727         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3728         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3729 }
3730
3731 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3732                                     unsigned long event, struct fib_nh *fib_nh)
3733 {
3734         struct mlxsw_sp_nexthop_key key;
3735         struct mlxsw_sp_nexthop *nh;
3736
3737         if (mlxsw_sp->router->aborted)
3738                 return;
3739
3740         key.fib_nh = fib_nh;
3741         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3742         if (WARN_ON_ONCE(!nh))
3743                 return;
3744
3745         switch (event) {
3746         case FIB_EVENT_NH_ADD:
3747                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3748                 break;
3749         case FIB_EVENT_NH_DEL:
3750                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3751                 break;
3752         }
3753
3754         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3755 }
3756
3757 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3758                                         struct mlxsw_sp_rif *rif)
3759 {
3760         struct mlxsw_sp_nexthop *nh;
3761         bool removing;
3762
3763         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3764                 switch (nh->type) {
3765                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3766                         removing = false;
3767                         break;
3768                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3769                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3770                         break;
3771                 default:
3772                         WARN_ON(1);
3773                         continue;
3774                 }
3775
3776                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3777                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3778         }
3779 }
3780
3781 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3782                                          struct mlxsw_sp_rif *old_rif,
3783                                          struct mlxsw_sp_rif *new_rif)
3784 {
3785         struct mlxsw_sp_nexthop *nh;
3786
3787         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3788         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3789                 nh->rif = new_rif;
3790         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3791 }
3792
3793 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3794                                            struct mlxsw_sp_rif *rif)
3795 {
3796         struct mlxsw_sp_nexthop *nh, *tmp;
3797
3798         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3799                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3800                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3801         }
3802 }
3803
3804 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3805                                    const struct fib_info *fi)
3806 {
3807         return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3808                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3809 }
3810
3811 static struct mlxsw_sp_nexthop_group *
3812 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3813 {
3814         struct mlxsw_sp_nexthop_group *nh_grp;
3815         struct mlxsw_sp_nexthop *nh;
3816         struct fib_nh *fib_nh;
3817         int i;
3818         int err;
3819
3820         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
3821                          GFP_KERNEL);
3822         if (!nh_grp)
3823                 return ERR_PTR(-ENOMEM);
3824         nh_grp->priv = fi;
3825         INIT_LIST_HEAD(&nh_grp->fib_list);
3826         nh_grp->neigh_tbl = &arp_tbl;
3827
3828         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3829         nh_grp->count = fi->fib_nhs;
3830         fib_info_hold(fi);
3831         for (i = 0; i < nh_grp->count; i++) {
3832                 nh = &nh_grp->nexthops[i];
3833                 fib_nh = &fi->fib_nh[i];
3834                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3835                 if (err)
3836                         goto err_nexthop4_init;
3837         }
3838         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3839         if (err)
3840                 goto err_nexthop_group_insert;
3841         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3842         return nh_grp;
3843
3844 err_nexthop_group_insert:
3845 err_nexthop4_init:
3846         for (i--; i >= 0; i--) {
3847                 nh = &nh_grp->nexthops[i];
3848                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3849         }
3850         fib_info_put(fi);
3851         kfree(nh_grp);
3852         return ERR_PTR(err);
3853 }
3854
3855 static void
3856 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3857                                 struct mlxsw_sp_nexthop_group *nh_grp)
3858 {
3859         struct mlxsw_sp_nexthop *nh;
3860         int i;
3861
3862         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3863         for (i = 0; i < nh_grp->count; i++) {
3864                 nh = &nh_grp->nexthops[i];
3865                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3866         }
3867         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3868         WARN_ON_ONCE(nh_grp->adj_index_valid);
3869         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3870         kfree(nh_grp);
3871 }
3872
3873 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3874                                        struct mlxsw_sp_fib_entry *fib_entry,
3875                                        struct fib_info *fi)
3876 {
3877         struct mlxsw_sp_nexthop_group *nh_grp;
3878
3879         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3880         if (!nh_grp) {
3881                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3882                 if (IS_ERR(nh_grp))
3883                         return PTR_ERR(nh_grp);
3884         }
3885         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3886         fib_entry->nh_group = nh_grp;
3887         return 0;
3888 }
3889
3890 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3891                                         struct mlxsw_sp_fib_entry *fib_entry)
3892 {
3893         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3894
3895         list_del(&fib_entry->nexthop_group_node);
3896         if (!list_empty(&nh_grp->fib_list))
3897                 return;
3898         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3899 }
3900
3901 static bool
3902 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3903 {
3904         struct mlxsw_sp_fib4_entry *fib4_entry;
3905
3906         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3907                                   common);
3908         return !fib4_entry->tos;
3909 }
3910
3911 static bool
3912 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3913 {
3914         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3915
3916         switch (fib_entry->fib_node->fib->proto) {
3917         case MLXSW_SP_L3_PROTO_IPV4:
3918                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3919                         return false;
3920                 break;
3921         case MLXSW_SP_L3_PROTO_IPV6:
3922                 break;
3923         }
3924
3925         switch (fib_entry->type) {
3926         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3927                 return !!nh_group->adj_index_valid;
3928         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3929                 return !!nh_group->nh_rif;
3930         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
3931         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3932         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3933                 return true;
3934         default:
3935                 return false;
3936         }
3937 }
3938
3939 static struct mlxsw_sp_nexthop *
3940 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3941                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3942 {
3943         int i;
3944
3945         for (i = 0; i < nh_grp->count; i++) {
3946                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3947                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3948
3949                 if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3950                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3951                                     &rt->fib6_nh.nh_gw))
3952                         return nh;
3953                 continue;
3954         }
3955
3956         return NULL;
3957 }
3958
3959 static void
3960 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3961 {
3962         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3963         int i;
3964
3965         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3966             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
3967             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3968             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3969                 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3970                 return;
3971         }
3972
3973         for (i = 0; i < nh_grp->count; i++) {
3974                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3975
3976                 if (nh->offloaded)
3977                         nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3978                 else
3979                         nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3980         }
3981 }
3982
3983 static void
3984 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3985 {
3986         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3987         int i;
3988
3989         if (!list_is_singular(&nh_grp->fib_list))
3990                 return;
3991
3992         for (i = 0; i < nh_grp->count; i++) {
3993                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3994
3995                 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3996         }
3997 }
3998
3999 static void
4000 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4001 {
4002         struct mlxsw_sp_fib6_entry *fib6_entry;
4003         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4004
4005         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4006                                   common);
4007
4008         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4009             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
4010                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4011                                  list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4012                 return;
4013         }
4014
4015         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4016                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4017                 struct mlxsw_sp_nexthop *nh;
4018
4019                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4020                 if (nh && nh->offloaded)
4021                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
4022                 else
4023                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4024         }
4025 }
4026
4027 static void
4028 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4029 {
4030         struct mlxsw_sp_fib6_entry *fib6_entry;
4031         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4032
4033         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4034                                   common);
4035         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4036                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4037
4038                 rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
4039         }
4040 }
4041
4042 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4043 {
4044         switch (fib_entry->fib_node->fib->proto) {
4045         case MLXSW_SP_L3_PROTO_IPV4:
4046                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
4047                 break;
4048         case MLXSW_SP_L3_PROTO_IPV6:
4049                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
4050                 break;
4051         }
4052 }
4053
4054 static void
4055 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4056 {
4057         switch (fib_entry->fib_node->fib->proto) {
4058         case MLXSW_SP_L3_PROTO_IPV4:
4059                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4060                 break;
4061         case MLXSW_SP_L3_PROTO_IPV6:
4062                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4063                 break;
4064         }
4065 }
4066
4067 static void
4068 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4069                                    enum mlxsw_reg_ralue_op op, int err)
4070 {
4071         switch (op) {
4072         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4073                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4074         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4075                 if (err)
4076                         return;
4077                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4078                         mlxsw_sp_fib_entry_offload_set(fib_entry);
4079                 else
4080                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
4081                 return;
4082         default:
4083                 return;
4084         }
4085 }
4086
4087 static void
4088 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4089                               const struct mlxsw_sp_fib_entry *fib_entry,
4090                               enum mlxsw_reg_ralue_op op)
4091 {
4092         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4093         enum mlxsw_reg_ralxx_protocol proto;
4094         u32 *p_dip;
4095
4096         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4097
4098         switch (fib->proto) {
4099         case MLXSW_SP_L3_PROTO_IPV4:
4100                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4101                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4102                                       fib_entry->fib_node->key.prefix_len,
4103                                       *p_dip);
4104                 break;
4105         case MLXSW_SP_L3_PROTO_IPV6:
4106                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4107                                       fib_entry->fib_node->key.prefix_len,
4108                                       fib_entry->fib_node->key.addr);
4109                 break;
4110         }
4111 }
4112
4113 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4114                                         struct mlxsw_sp_fib_entry *fib_entry,
4115                                         enum mlxsw_reg_ralue_op op)
4116 {
4117         char ralue_pl[MLXSW_REG_RALUE_LEN];
4118         enum mlxsw_reg_ralue_trap_action trap_action;
4119         u16 trap_id = 0;
4120         u32 adjacency_index = 0;
4121         u16 ecmp_size = 0;
4122
4123         /* In case the nexthop group adjacency index is valid, use it
4124          * with provided ECMP size. Otherwise, setup trap and pass
4125          * traffic to kernel.
4126          */
4127         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4128                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4129                 adjacency_index = fib_entry->nh_group->adj_index;
4130                 ecmp_size = fib_entry->nh_group->ecmp_size;
4131         } else {
4132                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4133                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4134         }
4135
4136         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4137         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4138                                         adjacency_index, ecmp_size);
4139         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4140 }
4141
4142 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4143                                        struct mlxsw_sp_fib_entry *fib_entry,
4144                                        enum mlxsw_reg_ralue_op op)
4145 {
4146         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4147         enum mlxsw_reg_ralue_trap_action trap_action;
4148         char ralue_pl[MLXSW_REG_RALUE_LEN];
4149         u16 trap_id = 0;
4150         u16 rif_index = 0;
4151
4152         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4153                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4154                 rif_index = rif->rif_index;
4155         } else {
4156                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4157                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4158         }
4159
4160         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4161         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4162                                        rif_index);
4163         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4164 }
4165
4166 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4167                                       struct mlxsw_sp_fib_entry *fib_entry,
4168                                       enum mlxsw_reg_ralue_op op)
4169 {
4170         char ralue_pl[MLXSW_REG_RALUE_LEN];
4171
4172         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4173         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4174         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4175 }
4176
4177 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4178                                            struct mlxsw_sp_fib_entry *fib_entry,
4179                                            enum mlxsw_reg_ralue_op op)
4180 {
4181         enum mlxsw_reg_ralue_trap_action trap_action;
4182         char ralue_pl[MLXSW_REG_RALUE_LEN];
4183
4184         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4185         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4186         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4187         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4188 }
4189
4190 static int
4191 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4192                                  struct mlxsw_sp_fib_entry *fib_entry,
4193                                  enum mlxsw_reg_ralue_op op)
4194 {
4195         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4196         const struct mlxsw_sp_ipip_ops *ipip_ops;
4197
4198         if (WARN_ON(!ipip_entry))
4199                 return -EINVAL;
4200
4201         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4202         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4203                                       fib_entry->decap.tunnel_index);
4204 }
4205
4206 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4207                                            struct mlxsw_sp_fib_entry *fib_entry,
4208                                            enum mlxsw_reg_ralue_op op)
4209 {
4210         char ralue_pl[MLXSW_REG_RALUE_LEN];
4211
4212         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4213         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4214                                            fib_entry->decap.tunnel_index);
4215         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4216 }
4217
4218 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4219                                    struct mlxsw_sp_fib_entry *fib_entry,
4220                                    enum mlxsw_reg_ralue_op op)
4221 {
4222         switch (fib_entry->type) {
4223         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4224                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4225         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4226                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4227         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4228                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4229         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4230                 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4231         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4232                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4233                                                         fib_entry, op);
4234         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4235                 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4236         }
4237         return -EINVAL;
4238 }
4239
4240 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4241                                  struct mlxsw_sp_fib_entry *fib_entry,
4242                                  enum mlxsw_reg_ralue_op op)
4243 {
4244         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4245
4246         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4247
4248         return err;
4249 }
4250
4251 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4252                                      struct mlxsw_sp_fib_entry *fib_entry)
4253 {
4254         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4255                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4256 }
4257
4258 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4259                                   struct mlxsw_sp_fib_entry *fib_entry)
4260 {
4261         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4262                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4263 }
4264
4265 static int
4266 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4267                              const struct fib_entry_notifier_info *fen_info,
4268                              struct mlxsw_sp_fib_entry *fib_entry)
4269 {
4270         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4271         u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4272         struct net_device *dev = fen_info->fi->fib_dev;
4273         struct mlxsw_sp_ipip_entry *ipip_entry;
4274         struct fib_info *fi = fen_info->fi;
4275
4276         switch (fen_info->type) {
4277         case RTN_LOCAL:
4278                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4279                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4280                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4281                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4282                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4283                                                              fib_entry,
4284                                                              ipip_entry);
4285                 }
4286                 if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4287                                                      dip.addr4)) {
4288                         u32 t_index;
4289
4290                         t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4291                         fib_entry->decap.tunnel_index = t_index;
4292                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4293                         return 0;
4294                 }
4295                 /* fall through */
4296         case RTN_BROADCAST:
4297                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4298                 return 0;
4299         case RTN_BLACKHOLE:
4300                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4301                 return 0;
4302         case RTN_UNREACHABLE: /* fall through */
4303         case RTN_PROHIBIT:
4304                 /* Packets hitting these routes need to be trapped, but
4305                  * can do so with a lower priority than packets directed
4306                  * at the host, so use action type local instead of trap.
4307                  */
4308                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4309                 return 0;
4310         case RTN_UNICAST:
4311                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4312                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4313                 else
4314                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4315                 return 0;
4316         default:
4317                 return -EINVAL;
4318         }
4319 }
4320
4321 static struct mlxsw_sp_fib4_entry *
4322 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4323                            struct mlxsw_sp_fib_node *fib_node,
4324                            const struct fib_entry_notifier_info *fen_info)
4325 {
4326         struct mlxsw_sp_fib4_entry *fib4_entry;
4327         struct mlxsw_sp_fib_entry *fib_entry;
4328         int err;
4329
4330         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4331         if (!fib4_entry)
4332                 return ERR_PTR(-ENOMEM);
4333         fib_entry = &fib4_entry->common;
4334
4335         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4336         if (err)
4337                 goto err_fib4_entry_type_set;
4338
4339         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4340         if (err)
4341                 goto err_nexthop4_group_get;
4342
4343         fib4_entry->prio = fen_info->fi->fib_priority;
4344         fib4_entry->tb_id = fen_info->tb_id;
4345         fib4_entry->type = fen_info->type;
4346         fib4_entry->tos = fen_info->tos;
4347
4348         fib_entry->fib_node = fib_node;
4349
4350         return fib4_entry;
4351
4352 err_nexthop4_group_get:
4353 err_fib4_entry_type_set:
4354         kfree(fib4_entry);
4355         return ERR_PTR(err);
4356 }
4357
4358 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4359                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4360 {
4361         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4362         kfree(fib4_entry);
4363 }
4364
4365 static struct mlxsw_sp_fib4_entry *
4366 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4367                            const struct fib_entry_notifier_info *fen_info)
4368 {
4369         struct mlxsw_sp_fib4_entry *fib4_entry;
4370         struct mlxsw_sp_fib_node *fib_node;
4371         struct mlxsw_sp_fib *fib;
4372         struct mlxsw_sp_vr *vr;
4373
4374         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4375         if (!vr)
4376                 return NULL;
4377         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4378
4379         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4380                                             sizeof(fen_info->dst),
4381                                             fen_info->dst_len);
4382         if (!fib_node)
4383                 return NULL;
4384
4385         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4386                 if (fib4_entry->tb_id == fen_info->tb_id &&
4387                     fib4_entry->tos == fen_info->tos &&
4388                     fib4_entry->type == fen_info->type &&
4389                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4390                     fen_info->fi) {
4391                         return fib4_entry;
4392                 }
4393         }
4394
4395         return NULL;
4396 }
4397
4398 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4399         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4400         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4401         .key_len = sizeof(struct mlxsw_sp_fib_key),
4402         .automatic_shrinking = true,
4403 };
4404
4405 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4406                                     struct mlxsw_sp_fib_node *fib_node)
4407 {
4408         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4409                                       mlxsw_sp_fib_ht_params);
4410 }
4411
4412 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4413                                      struct mlxsw_sp_fib_node *fib_node)
4414 {
4415         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4416                                mlxsw_sp_fib_ht_params);
4417 }
4418
4419 static struct mlxsw_sp_fib_node *
4420 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4421                          size_t addr_len, unsigned char prefix_len)
4422 {
4423         struct mlxsw_sp_fib_key key;
4424
4425         memset(&key, 0, sizeof(key));
4426         memcpy(key.addr, addr, addr_len);
4427         key.prefix_len = prefix_len;
4428         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4429 }
4430
4431 static struct mlxsw_sp_fib_node *
4432 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4433                          size_t addr_len, unsigned char prefix_len)
4434 {
4435         struct mlxsw_sp_fib_node *fib_node;
4436
4437         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4438         if (!fib_node)
4439                 return NULL;
4440
4441         INIT_LIST_HEAD(&fib_node->entry_list);
4442         list_add(&fib_node->list, &fib->node_list);
4443         memcpy(fib_node->key.addr, addr, addr_len);
4444         fib_node->key.prefix_len = prefix_len;
4445
4446         return fib_node;
4447 }
4448
4449 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4450 {
4451         list_del(&fib_node->list);
4452         WARN_ON(!list_empty(&fib_node->entry_list));
4453         kfree(fib_node);
4454 }
4455
4456 static bool
4457 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4458                                  const struct mlxsw_sp_fib_entry *fib_entry)
4459 {
4460         return list_first_entry(&fib_node->entry_list,
4461                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4462 }
4463
4464 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4465                                       struct mlxsw_sp_fib_node *fib_node)
4466 {
4467         struct mlxsw_sp_prefix_usage req_prefix_usage;
4468         struct mlxsw_sp_fib *fib = fib_node->fib;
4469         struct mlxsw_sp_lpm_tree *lpm_tree;
4470         int err;
4471
4472         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4473         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4474                 goto out;
4475
4476         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4477         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4478         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4479                                          fib->proto);
4480         if (IS_ERR(lpm_tree))
4481                 return PTR_ERR(lpm_tree);
4482
4483         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4484         if (err)
4485                 goto err_lpm_tree_replace;
4486
4487 out:
4488         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4489         return 0;
4490
4491 err_lpm_tree_replace:
4492         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4493         return err;
4494 }
4495
4496 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4497                                          struct mlxsw_sp_fib_node *fib_node)
4498 {
4499         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4500         struct mlxsw_sp_prefix_usage req_prefix_usage;
4501         struct mlxsw_sp_fib *fib = fib_node->fib;
4502         int err;
4503
4504         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4505                 return;
4506         /* Try to construct a new LPM tree from the current prefix usage
4507          * minus the unused one. If we fail, continue using the old one.
4508          */
4509         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4510         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4511                                     fib_node->key.prefix_len);
4512         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4513                                          fib->proto);
4514         if (IS_ERR(lpm_tree))
4515                 return;
4516
4517         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4518         if (err)
4519                 goto err_lpm_tree_replace;
4520
4521         return;
4522
4523 err_lpm_tree_replace:
4524         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4525 }
4526
4527 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4528                                   struct mlxsw_sp_fib_node *fib_node,
4529                                   struct mlxsw_sp_fib *fib)
4530 {
4531         int err;
4532
4533         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4534         if (err)
4535                 return err;
4536         fib_node->fib = fib;
4537
4538         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4539         if (err)
4540                 goto err_fib_lpm_tree_link;
4541
4542         return 0;
4543
4544 err_fib_lpm_tree_link:
4545         fib_node->fib = NULL;
4546         mlxsw_sp_fib_node_remove(fib, fib_node);
4547         return err;
4548 }
4549
4550 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4551                                    struct mlxsw_sp_fib_node *fib_node)
4552 {
4553         struct mlxsw_sp_fib *fib = fib_node->fib;
4554
4555         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4556         fib_node->fib = NULL;
4557         mlxsw_sp_fib_node_remove(fib, fib_node);
4558 }
4559
4560 static struct mlxsw_sp_fib_node *
4561 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4562                       size_t addr_len, unsigned char prefix_len,
4563                       enum mlxsw_sp_l3proto proto)
4564 {
4565         struct mlxsw_sp_fib_node *fib_node;
4566         struct mlxsw_sp_fib *fib;
4567         struct mlxsw_sp_vr *vr;
4568         int err;
4569
4570         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4571         if (IS_ERR(vr))
4572                 return ERR_CAST(vr);
4573         fib = mlxsw_sp_vr_fib(vr, proto);
4574
4575         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4576         if (fib_node)
4577                 return fib_node;
4578
4579         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4580         if (!fib_node) {
4581                 err = -ENOMEM;
4582                 goto err_fib_node_create;
4583         }
4584
4585         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4586         if (err)
4587                 goto err_fib_node_init;
4588
4589         return fib_node;
4590
4591 err_fib_node_init:
4592         mlxsw_sp_fib_node_destroy(fib_node);
4593 err_fib_node_create:
4594         mlxsw_sp_vr_put(mlxsw_sp, vr);
4595         return ERR_PTR(err);
4596 }
4597
4598 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4599                                   struct mlxsw_sp_fib_node *fib_node)
4600 {
4601         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4602
4603         if (!list_empty(&fib_node->entry_list))
4604                 return;
4605         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4606         mlxsw_sp_fib_node_destroy(fib_node);
4607         mlxsw_sp_vr_put(mlxsw_sp, vr);
4608 }
4609
4610 static struct mlxsw_sp_fib4_entry *
4611 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4612                               const struct mlxsw_sp_fib4_entry *new4_entry)
4613 {
4614         struct mlxsw_sp_fib4_entry *fib4_entry;
4615
4616         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4617                 if (fib4_entry->tb_id > new4_entry->tb_id)
4618                         continue;
4619                 if (fib4_entry->tb_id != new4_entry->tb_id)
4620                         break;
4621                 if (fib4_entry->tos > new4_entry->tos)
4622                         continue;
4623                 if (fib4_entry->prio >= new4_entry->prio ||
4624                     fib4_entry->tos < new4_entry->tos)
4625                         return fib4_entry;
4626         }
4627
4628         return NULL;
4629 }
4630
4631 static int
4632 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4633                                struct mlxsw_sp_fib4_entry *new4_entry)
4634 {
4635         struct mlxsw_sp_fib_node *fib_node;
4636
4637         if (WARN_ON(!fib4_entry))
4638                 return -EINVAL;
4639
4640         fib_node = fib4_entry->common.fib_node;
4641         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4642                                  common.list) {
4643                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4644                     fib4_entry->tos != new4_entry->tos ||
4645                     fib4_entry->prio != new4_entry->prio)
4646                         break;
4647         }
4648
4649         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4650         return 0;
4651 }
4652
4653 static int
4654 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4655                                bool replace, bool append)
4656 {
4657         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4658         struct mlxsw_sp_fib4_entry *fib4_entry;
4659
4660         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4661
4662         if (append)
4663                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4664         if (replace && WARN_ON(!fib4_entry))
4665                 return -EINVAL;
4666
4667         /* Insert new entry before replaced one, so that we can later
4668          * remove the second.
4669          */
4670         if (fib4_entry) {
4671                 list_add_tail(&new4_entry->common.list,
4672                               &fib4_entry->common.list);
4673         } else {
4674                 struct mlxsw_sp_fib4_entry *last;
4675
4676                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4677                         if (new4_entry->tb_id > last->tb_id)
4678                                 break;
4679                         fib4_entry = last;
4680                 }
4681
4682                 if (fib4_entry)
4683                         list_add(&new4_entry->common.list,
4684                                  &fib4_entry->common.list);
4685                 else
4686                         list_add(&new4_entry->common.list,
4687                                  &fib_node->entry_list);
4688         }
4689
4690         return 0;
4691 }
4692
4693 static void
4694 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4695 {
4696         list_del(&fib4_entry->common.list);
4697 }
4698
4699 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4700                                        struct mlxsw_sp_fib_entry *fib_entry)
4701 {
4702         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4703
4704         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4705                 return 0;
4706
4707         /* To prevent packet loss, overwrite the previously offloaded
4708          * entry.
4709          */
4710         if (!list_is_singular(&fib_node->entry_list)) {
4711                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4712                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4713
4714                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4715         }
4716
4717         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4718 }
4719
4720 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4721                                         struct mlxsw_sp_fib_entry *fib_entry)
4722 {
4723         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4724
4725         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4726                 return;
4727
4728         /* Promote the next entry by overwriting the deleted entry */
4729         if (!list_is_singular(&fib_node->entry_list)) {
4730                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4731                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4732
4733                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4734                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4735                 return;
4736         }
4737
4738         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4739 }
4740
4741 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4742                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4743                                          bool replace, bool append)
4744 {
4745         int err;
4746
4747         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4748         if (err)
4749                 return err;
4750
4751         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4752         if (err)
4753                 goto err_fib_node_entry_add;
4754
4755         return 0;
4756
4757 err_fib_node_entry_add:
4758         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4759         return err;
4760 }
4761
4762 static void
4763 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4764                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4765 {
4766         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4767         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4768
4769         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4770                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4771 }
4772
4773 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4774                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4775                                         bool replace)
4776 {
4777         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4778         struct mlxsw_sp_fib4_entry *replaced;
4779
4780         if (!replace)
4781                 return;
4782
4783         /* We inserted the new entry before replaced one */
4784         replaced = list_next_entry(fib4_entry, common.list);
4785
4786         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4787         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4788         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4789 }
4790
4791 static int
4792 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4793                          const struct fib_entry_notifier_info *fen_info,
4794                          bool replace, bool append)
4795 {
4796         struct mlxsw_sp_fib4_entry *fib4_entry;
4797         struct mlxsw_sp_fib_node *fib_node;
4798         int err;
4799
4800         if (mlxsw_sp->router->aborted)
4801                 return 0;
4802
4803         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4804                                          &fen_info->dst, sizeof(fen_info->dst),
4805                                          fen_info->dst_len,
4806                                          MLXSW_SP_L3_PROTO_IPV4);
4807         if (IS_ERR(fib_node)) {
4808                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4809                 return PTR_ERR(fib_node);
4810         }
4811
4812         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4813         if (IS_ERR(fib4_entry)) {
4814                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4815                 err = PTR_ERR(fib4_entry);
4816                 goto err_fib4_entry_create;
4817         }
4818
4819         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4820                                             append);
4821         if (err) {
4822                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4823                 goto err_fib4_node_entry_link;
4824         }
4825
4826         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4827
4828         return 0;
4829
4830 err_fib4_node_entry_link:
4831         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4832 err_fib4_entry_create:
4833         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4834         return err;
4835 }
4836
4837 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4838                                      struct fib_entry_notifier_info *fen_info)
4839 {
4840         struct mlxsw_sp_fib4_entry *fib4_entry;
4841         struct mlxsw_sp_fib_node *fib_node;
4842
4843         if (mlxsw_sp->router->aborted)
4844                 return;
4845
4846         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4847         if (WARN_ON(!fib4_entry))
4848                 return;
4849         fib_node = fib4_entry->common.fib_node;
4850
4851         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4852         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4853         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4854 }
4855
4856 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4857 {
4858         /* Packets with link-local destination IP arriving to the router
4859          * are trapped to the CPU, so no need to program specific routes
4860          * for them.
4861          */
4862         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4863                 return true;
4864
4865         /* Multicast routes aren't supported, so ignore them. Neighbour
4866          * Discovery packets are specifically trapped.
4867          */
4868         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4869                 return true;
4870
4871         /* Cloned routes are irrelevant in the forwarding path. */
4872         if (rt->fib6_flags & RTF_CACHE)
4873                 return true;
4874
4875         return false;
4876 }
4877
4878 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4879 {
4880         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4881
4882         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4883         if (!mlxsw_sp_rt6)
4884                 return ERR_PTR(-ENOMEM);
4885
4886         /* In case of route replace, replaced route is deleted with
4887          * no notification. Take reference to prevent accessing freed
4888          * memory.
4889          */
4890         mlxsw_sp_rt6->rt = rt;
4891         fib6_info_hold(rt);
4892
4893         return mlxsw_sp_rt6;
4894 }
4895
4896 #if IS_ENABLED(CONFIG_IPV6)
4897 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4898 {
4899         fib6_info_release(rt);
4900 }
4901 #else
4902 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4903 {
4904 }
4905 #endif
4906
4907 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4908 {
4909         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4910         kfree(mlxsw_sp_rt6);
4911 }
4912
4913 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4914 {
4915         /* RTF_CACHE routes are ignored */
4916         return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4917 }
4918
4919 static struct fib6_info *
4920 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4921 {
4922         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4923                                 list)->rt;
4924 }
4925
4926 static struct mlxsw_sp_fib6_entry *
4927 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4928                                  const struct fib6_info *nrt, bool replace)
4929 {
4930         struct mlxsw_sp_fib6_entry *fib6_entry;
4931
4932         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4933                 return NULL;
4934
4935         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4936                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4937
4938                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4939                  * virtual router.
4940                  */
4941                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4942                         continue;
4943                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4944                         break;
4945                 if (rt->fib6_metric < nrt->fib6_metric)
4946                         continue;
4947                 if (rt->fib6_metric == nrt->fib6_metric &&
4948                     mlxsw_sp_fib6_rt_can_mp(rt))
4949                         return fib6_entry;
4950                 if (rt->fib6_metric > nrt->fib6_metric)
4951                         break;
4952         }
4953
4954         return NULL;
4955 }
4956
4957 static struct mlxsw_sp_rt6 *
4958 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4959                             const struct fib6_info *rt)
4960 {
4961         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4962
4963         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4964                 if (mlxsw_sp_rt6->rt == rt)
4965                         return mlxsw_sp_rt6;
4966         }
4967
4968         return NULL;
4969 }
4970
4971 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4972                                         const struct fib6_info *rt,
4973                                         enum mlxsw_sp_ipip_type *ret)
4974 {
4975         return rt->fib6_nh.nh_dev &&
4976                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4977 }
4978
4979 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4980                                        struct mlxsw_sp_nexthop_group *nh_grp,
4981                                        struct mlxsw_sp_nexthop *nh,
4982                                        const struct fib6_info *rt)
4983 {
4984         const struct mlxsw_sp_ipip_ops *ipip_ops;
4985         struct mlxsw_sp_ipip_entry *ipip_entry;
4986         struct net_device *dev = rt->fib6_nh.nh_dev;
4987         struct mlxsw_sp_rif *rif;
4988         int err;
4989
4990         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4991         if (ipip_entry) {
4992                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4993                 if (ipip_ops->can_offload(mlxsw_sp, dev,
4994                                           MLXSW_SP_L3_PROTO_IPV6)) {
4995                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4996                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4997                         return 0;
4998                 }
4999         }
5000
5001         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5002         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5003         if (!rif)
5004                 return 0;
5005         mlxsw_sp_nexthop_rif_init(nh, rif);
5006
5007         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5008         if (err)
5009                 goto err_nexthop_neigh_init;
5010
5011         return 0;
5012
5013 err_nexthop_neigh_init:
5014         mlxsw_sp_nexthop_rif_fini(nh);
5015         return err;
5016 }
5017
5018 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5019                                         struct mlxsw_sp_nexthop *nh)
5020 {
5021         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5022 }
5023
5024 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5025                                   struct mlxsw_sp_nexthop_group *nh_grp,
5026                                   struct mlxsw_sp_nexthop *nh,
5027                                   const struct fib6_info *rt)
5028 {
5029         struct net_device *dev = rt->fib6_nh.nh_dev;
5030
5031         nh->nh_grp = nh_grp;
5032         nh->nh_weight = rt->fib6_nh.nh_weight;
5033         memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
5034         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5035
5036         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5037
5038         if (!dev)
5039                 return 0;
5040         nh->ifindex = dev->ifindex;
5041
5042         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5043 }
5044
5045 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5046                                    struct mlxsw_sp_nexthop *nh)
5047 {
5048         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5049         list_del(&nh->router_list_node);
5050         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5051 }
5052
5053 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5054                                     const struct fib6_info *rt)
5055 {
5056         return rt->fib6_flags & RTF_GATEWAY ||
5057                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5058 }
5059
5060 static struct mlxsw_sp_nexthop_group *
5061 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5062                                struct mlxsw_sp_fib6_entry *fib6_entry)
5063 {
5064         struct mlxsw_sp_nexthop_group *nh_grp;
5065         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5066         struct mlxsw_sp_nexthop *nh;
5067         int i = 0;
5068         int err;
5069
5070         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5071                          GFP_KERNEL);
5072         if (!nh_grp)
5073                 return ERR_PTR(-ENOMEM);
5074         INIT_LIST_HEAD(&nh_grp->fib_list);
5075 #if IS_ENABLED(CONFIG_IPV6)
5076         nh_grp->neigh_tbl = &nd_tbl;
5077 #endif
5078         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5079                                         struct mlxsw_sp_rt6, list);
5080         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5081         nh_grp->count = fib6_entry->nrt6;
5082         for (i = 0; i < nh_grp->count; i++) {
5083                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5084
5085                 nh = &nh_grp->nexthops[i];
5086                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5087                 if (err)
5088                         goto err_nexthop6_init;
5089                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5090         }
5091
5092         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5093         if (err)
5094                 goto err_nexthop_group_insert;
5095
5096         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5097         return nh_grp;
5098
5099 err_nexthop_group_insert:
5100 err_nexthop6_init:
5101         for (i--; i >= 0; i--) {
5102                 nh = &nh_grp->nexthops[i];
5103                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5104         }
5105         kfree(nh_grp);
5106         return ERR_PTR(err);
5107 }
5108
5109 static void
5110 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5111                                 struct mlxsw_sp_nexthop_group *nh_grp)
5112 {
5113         struct mlxsw_sp_nexthop *nh;
5114         int i = nh_grp->count;
5115
5116         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5117         for (i--; i >= 0; i--) {
5118                 nh = &nh_grp->nexthops[i];
5119                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5120         }
5121         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5122         WARN_ON(nh_grp->adj_index_valid);
5123         kfree(nh_grp);
5124 }
5125
5126 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5127                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5128 {
5129         struct mlxsw_sp_nexthop_group *nh_grp;
5130
5131         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5132         if (!nh_grp) {
5133                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5134                 if (IS_ERR(nh_grp))
5135                         return PTR_ERR(nh_grp);
5136         }
5137
5138         list_add_tail(&fib6_entry->common.nexthop_group_node,
5139                       &nh_grp->fib_list);
5140         fib6_entry->common.nh_group = nh_grp;
5141
5142         return 0;
5143 }
5144
5145 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5146                                         struct mlxsw_sp_fib_entry *fib_entry)
5147 {
5148         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5149
5150         list_del(&fib_entry->nexthop_group_node);
5151         if (!list_empty(&nh_grp->fib_list))
5152                 return;
5153         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5154 }
5155
5156 static int
5157 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5158                                struct mlxsw_sp_fib6_entry *fib6_entry)
5159 {
5160         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5161         int err;
5162
5163         fib6_entry->common.nh_group = NULL;
5164         list_del(&fib6_entry->common.nexthop_group_node);
5165
5166         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5167         if (err)
5168                 goto err_nexthop6_group_get;
5169
5170         /* In case this entry is offloaded, then the adjacency index
5171          * currently associated with it in the device's table is that
5172          * of the old group. Start using the new one instead.
5173          */
5174         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5175         if (err)
5176                 goto err_fib_node_entry_add;
5177
5178         if (list_empty(&old_nh_grp->fib_list))
5179                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5180
5181         return 0;
5182
5183 err_fib_node_entry_add:
5184         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5185 err_nexthop6_group_get:
5186         list_add_tail(&fib6_entry->common.nexthop_group_node,
5187                       &old_nh_grp->fib_list);
5188         fib6_entry->common.nh_group = old_nh_grp;
5189         return err;
5190 }
5191
5192 static int
5193 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5194                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5195                                 struct fib6_info *rt)
5196 {
5197         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5198         int err;
5199
5200         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5201         if (IS_ERR(mlxsw_sp_rt6))
5202                 return PTR_ERR(mlxsw_sp_rt6);
5203
5204         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5205         fib6_entry->nrt6++;
5206
5207         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5208         if (err)
5209                 goto err_nexthop6_group_update;
5210
5211         return 0;
5212
5213 err_nexthop6_group_update:
5214         fib6_entry->nrt6--;
5215         list_del(&mlxsw_sp_rt6->list);
5216         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5217         return err;
5218 }
5219
5220 static void
5221 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5222                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5223                                 struct fib6_info *rt)
5224 {
5225         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5226
5227         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5228         if (WARN_ON(!mlxsw_sp_rt6))
5229                 return;
5230
5231         fib6_entry->nrt6--;
5232         list_del(&mlxsw_sp_rt6->list);
5233         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5234         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5235 }
5236
5237 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5238                                          struct mlxsw_sp_fib_entry *fib_entry,
5239                                          const struct fib6_info *rt)
5240 {
5241         /* Packets hitting RTF_REJECT routes need to be discarded by the
5242          * stack. We can rely on their destination device not having a
5243          * RIF (it's the loopback device) and can thus use action type
5244          * local, which will cause them to be trapped with a lower
5245          * priority than packets that need to be locally received.
5246          */
5247         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5248                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5249         else if (rt->fib6_type == RTN_BLACKHOLE)
5250                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5251         else if (rt->fib6_flags & RTF_REJECT)
5252                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5253         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5254                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5255         else
5256                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5257 }
5258
5259 static void
5260 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5261 {
5262         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5263
5264         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5265                                  list) {
5266                 fib6_entry->nrt6--;
5267                 list_del(&mlxsw_sp_rt6->list);
5268                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5269         }
5270 }
5271
5272 static struct mlxsw_sp_fib6_entry *
5273 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5274                            struct mlxsw_sp_fib_node *fib_node,
5275                            struct fib6_info *rt)
5276 {
5277         struct mlxsw_sp_fib6_entry *fib6_entry;
5278         struct mlxsw_sp_fib_entry *fib_entry;
5279         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5280         int err;
5281
5282         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5283         if (!fib6_entry)
5284                 return ERR_PTR(-ENOMEM);
5285         fib_entry = &fib6_entry->common;
5286
5287         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5288         if (IS_ERR(mlxsw_sp_rt6)) {
5289                 err = PTR_ERR(mlxsw_sp_rt6);
5290                 goto err_rt6_create;
5291         }
5292
5293         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5294
5295         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5296         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5297         fib6_entry->nrt6 = 1;
5298         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5299         if (err)
5300                 goto err_nexthop6_group_get;
5301
5302         fib_entry->fib_node = fib_node;
5303
5304         return fib6_entry;
5305
5306 err_nexthop6_group_get:
5307         list_del(&mlxsw_sp_rt6->list);
5308         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5309 err_rt6_create:
5310         kfree(fib6_entry);
5311         return ERR_PTR(err);
5312 }
5313
5314 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5315                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5316 {
5317         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5318         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5319         WARN_ON(fib6_entry->nrt6);
5320         kfree(fib6_entry);
5321 }
5322
5323 static struct mlxsw_sp_fib6_entry *
5324 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5325                               const struct fib6_info *nrt, bool replace)
5326 {
5327         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5328
5329         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5330                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5331
5332                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5333                         continue;
5334                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5335                         break;
5336                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5337                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5338                             mlxsw_sp_fib6_rt_can_mp(nrt))
5339                                 return fib6_entry;
5340                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5341                                 fallback = fallback ?: fib6_entry;
5342                 }
5343                 if (rt->fib6_metric > nrt->fib6_metric)
5344                         return fallback ?: fib6_entry;
5345         }
5346
5347         return fallback;
5348 }
5349
5350 static int
5351 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5352                                bool replace)
5353 {
5354         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5355         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5356         struct mlxsw_sp_fib6_entry *fib6_entry;
5357
5358         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5359
5360         if (replace && WARN_ON(!fib6_entry))
5361                 return -EINVAL;
5362
5363         if (fib6_entry) {
5364                 list_add_tail(&new6_entry->common.list,
5365                               &fib6_entry->common.list);
5366         } else {
5367                 struct mlxsw_sp_fib6_entry *last;
5368
5369                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5370                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5371
5372                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5373                                 break;
5374                         fib6_entry = last;
5375                 }
5376
5377                 if (fib6_entry)
5378                         list_add(&new6_entry->common.list,
5379                                  &fib6_entry->common.list);
5380                 else
5381                         list_add(&new6_entry->common.list,
5382                                  &fib_node->entry_list);
5383         }
5384
5385         return 0;
5386 }
5387
5388 static void
5389 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5390 {
5391         list_del(&fib6_entry->common.list);
5392 }
5393
5394 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5395                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5396                                          bool replace)
5397 {
5398         int err;
5399
5400         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5401         if (err)
5402                 return err;
5403
5404         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5405         if (err)
5406                 goto err_fib_node_entry_add;
5407
5408         return 0;
5409
5410 err_fib_node_entry_add:
5411         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5412         return err;
5413 }
5414
5415 static void
5416 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5417                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5418 {
5419         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5420         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5421 }
5422
5423 static struct mlxsw_sp_fib6_entry *
5424 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5425                            const struct fib6_info *rt)
5426 {
5427         struct mlxsw_sp_fib6_entry *fib6_entry;
5428         struct mlxsw_sp_fib_node *fib_node;
5429         struct mlxsw_sp_fib *fib;
5430         struct mlxsw_sp_vr *vr;
5431
5432         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5433         if (!vr)
5434                 return NULL;
5435         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5436
5437         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5438                                             sizeof(rt->fib6_dst.addr),
5439                                             rt->fib6_dst.plen);
5440         if (!fib_node)
5441                 return NULL;
5442
5443         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5444                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5445
5446                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5447                     rt->fib6_metric == iter_rt->fib6_metric &&
5448                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5449                         return fib6_entry;
5450         }
5451
5452         return NULL;
5453 }
5454
5455 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5456                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5457                                         bool replace)
5458 {
5459         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5460         struct mlxsw_sp_fib6_entry *replaced;
5461
5462         if (!replace)
5463                 return;
5464
5465         replaced = list_next_entry(fib6_entry, common.list);
5466
5467         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5468         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5469         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5470 }
5471
5472 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5473                                     struct fib6_info *rt, bool replace)
5474 {
5475         struct mlxsw_sp_fib6_entry *fib6_entry;
5476         struct mlxsw_sp_fib_node *fib_node;
5477         int err;
5478
5479         if (mlxsw_sp->router->aborted)
5480                 return 0;
5481
5482         if (rt->fib6_src.plen)
5483                 return -EINVAL;
5484
5485         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5486                 return 0;
5487
5488         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5489                                          &rt->fib6_dst.addr,
5490                                          sizeof(rt->fib6_dst.addr),
5491                                          rt->fib6_dst.plen,
5492                                          MLXSW_SP_L3_PROTO_IPV6);
5493         if (IS_ERR(fib_node))
5494                 return PTR_ERR(fib_node);
5495
5496         /* Before creating a new entry, try to append route to an existing
5497          * multipath entry.
5498          */
5499         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5500         if (fib6_entry) {
5501                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5502                 if (err)
5503                         goto err_fib6_entry_nexthop_add;
5504                 return 0;
5505         }
5506
5507         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5508         if (IS_ERR(fib6_entry)) {
5509                 err = PTR_ERR(fib6_entry);
5510                 goto err_fib6_entry_create;
5511         }
5512
5513         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5514         if (err)
5515                 goto err_fib6_node_entry_link;
5516
5517         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5518
5519         return 0;
5520
5521 err_fib6_node_entry_link:
5522         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5523 err_fib6_entry_create:
5524 err_fib6_entry_nexthop_add:
5525         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5526         return err;
5527 }
5528
5529 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5530                                      struct fib6_info *rt)
5531 {
5532         struct mlxsw_sp_fib6_entry *fib6_entry;
5533         struct mlxsw_sp_fib_node *fib_node;
5534
5535         if (mlxsw_sp->router->aborted)
5536                 return;
5537
5538         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5539                 return;
5540
5541         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5542         if (WARN_ON(!fib6_entry))
5543                 return;
5544
5545         /* If route is part of a multipath entry, but not the last one
5546          * removed, then only reduce its nexthop group.
5547          */
5548         if (!list_is_singular(&fib6_entry->rt6_list)) {
5549                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5550                 return;
5551         }
5552
5553         fib_node = fib6_entry->common.fib_node;
5554
5555         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5556         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5557         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5558 }
5559
5560 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5561                                             enum mlxsw_reg_ralxx_protocol proto,
5562                                             u8 tree_id)
5563 {
5564         char ralta_pl[MLXSW_REG_RALTA_LEN];
5565         char ralst_pl[MLXSW_REG_RALST_LEN];
5566         int i, err;
5567
5568         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5569         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5570         if (err)
5571                 return err;
5572
5573         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5574         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5575         if (err)
5576                 return err;
5577
5578         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5579                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5580                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5581                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5582
5583                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5584                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5585                                       raltb_pl);
5586                 if (err)
5587                         return err;
5588
5589                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5590                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5591                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5592                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5593                                       ralue_pl);
5594                 if (err)
5595                         return err;
5596         }
5597
5598         return 0;
5599 }
5600
5601 static struct mlxsw_sp_mr_table *
5602 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5603 {
5604         if (family == RTNL_FAMILY_IPMR)
5605                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5606         else
5607                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5608 }
5609
5610 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5611                                      struct mfc_entry_notifier_info *men_info,
5612                                      bool replace)
5613 {
5614         struct mlxsw_sp_mr_table *mrt;
5615         struct mlxsw_sp_vr *vr;
5616
5617         if (mlxsw_sp->router->aborted)
5618                 return 0;
5619
5620         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5621         if (IS_ERR(vr))
5622                 return PTR_ERR(vr);
5623
5624         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5625         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5626 }
5627
5628 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5629                                       struct mfc_entry_notifier_info *men_info)
5630 {
5631         struct mlxsw_sp_mr_table *mrt;
5632         struct mlxsw_sp_vr *vr;
5633
5634         if (mlxsw_sp->router->aborted)
5635                 return;
5636
5637         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5638         if (WARN_ON(!vr))
5639                 return;
5640
5641         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5642         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5643         mlxsw_sp_vr_put(mlxsw_sp, vr);
5644 }
5645
5646 static int
5647 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5648                               struct vif_entry_notifier_info *ven_info)
5649 {
5650         struct mlxsw_sp_mr_table *mrt;
5651         struct mlxsw_sp_rif *rif;
5652         struct mlxsw_sp_vr *vr;
5653
5654         if (mlxsw_sp->router->aborted)
5655                 return 0;
5656
5657         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5658         if (IS_ERR(vr))
5659                 return PTR_ERR(vr);
5660
5661         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5662         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5663         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5664                                    ven_info->vif_index,
5665                                    ven_info->vif_flags, rif);
5666 }
5667
5668 static void
5669 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5670                               struct vif_entry_notifier_info *ven_info)
5671 {
5672         struct mlxsw_sp_mr_table *mrt;
5673         struct mlxsw_sp_vr *vr;
5674
5675         if (mlxsw_sp->router->aborted)
5676                 return;
5677
5678         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5679         if (WARN_ON(!vr))
5680                 return;
5681
5682         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5683         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5684         mlxsw_sp_vr_put(mlxsw_sp, vr);
5685 }
5686
5687 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5688 {
5689         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5690         int err;
5691
5692         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5693                                                MLXSW_SP_LPM_TREE_MIN);
5694         if (err)
5695                 return err;
5696
5697         /* The multicast router code does not need an abort trap as by default,
5698          * packets that don't match any routes are trapped to the CPU.
5699          */
5700
5701         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5702         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5703                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5704 }
5705
5706 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5707                                      struct mlxsw_sp_fib_node *fib_node)
5708 {
5709         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5710
5711         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5712                                  common.list) {
5713                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5714
5715                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5716                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5717                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5718                 /* Break when entry list is empty and node was freed.
5719                  * Otherwise, we'll access freed memory in the next
5720                  * iteration.
5721                  */
5722                 if (do_break)
5723                         break;
5724         }
5725 }
5726
5727 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5728                                      struct mlxsw_sp_fib_node *fib_node)
5729 {
5730         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5731
5732         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5733                                  common.list) {
5734                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5735
5736                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5737                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5738                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5739                 if (do_break)
5740                         break;
5741         }
5742 }
5743
5744 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5745                                     struct mlxsw_sp_fib_node *fib_node)
5746 {
5747         switch (fib_node->fib->proto) {
5748         case MLXSW_SP_L3_PROTO_IPV4:
5749                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5750                 break;
5751         case MLXSW_SP_L3_PROTO_IPV6:
5752                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5753                 break;
5754         }
5755 }
5756
5757 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5758                                   struct mlxsw_sp_vr *vr,
5759                                   enum mlxsw_sp_l3proto proto)
5760 {
5761         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5762         struct mlxsw_sp_fib_node *fib_node, *tmp;
5763
5764         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5765                 bool do_break = &tmp->list == &fib->node_list;
5766
5767                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5768                 if (do_break)
5769                         break;
5770         }
5771 }
5772
5773 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5774 {
5775         int i, j;
5776
5777         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5778                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5779
5780                 if (!mlxsw_sp_vr_is_used(vr))
5781                         continue;
5782
5783                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5784                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5785                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5786
5787                 /* If virtual router was only used for IPv4, then it's no
5788                  * longer used.
5789                  */
5790                 if (!mlxsw_sp_vr_is_used(vr))
5791                         continue;
5792                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5793         }
5794 }
5795
5796 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5797 {
5798         int err;
5799
5800         if (mlxsw_sp->router->aborted)
5801                 return;
5802         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5803         mlxsw_sp_router_fib_flush(mlxsw_sp);
5804         mlxsw_sp->router->aborted = true;
5805         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5806         if (err)
5807                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5808 }
5809
5810 struct mlxsw_sp_fib_event_work {
5811         struct work_struct work;
5812         union {
5813                 struct fib6_entry_notifier_info fen6_info;
5814                 struct fib_entry_notifier_info fen_info;
5815                 struct fib_rule_notifier_info fr_info;
5816                 struct fib_nh_notifier_info fnh_info;
5817                 struct mfc_entry_notifier_info men_info;
5818                 struct vif_entry_notifier_info ven_info;
5819         };
5820         struct mlxsw_sp *mlxsw_sp;
5821         unsigned long event;
5822 };
5823
5824 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5825 {
5826         struct mlxsw_sp_fib_event_work *fib_work =
5827                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5828         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5829         bool replace, append;
5830         int err;
5831
5832         /* Protect internal structures from changes */
5833         rtnl_lock();
5834         mlxsw_sp_span_respin(mlxsw_sp);
5835
5836         switch (fib_work->event) {
5837         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5838         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5839         case FIB_EVENT_ENTRY_ADD:
5840                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5841                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5842                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5843                                                replace, append);
5844                 if (err)
5845                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5846                 fib_info_put(fib_work->fen_info.fi);
5847                 break;
5848         case FIB_EVENT_ENTRY_DEL:
5849                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5850                 fib_info_put(fib_work->fen_info.fi);
5851                 break;
5852         case FIB_EVENT_RULE_ADD:
5853                 /* if we get here, a rule was added that we do not support.
5854                  * just do the fib_abort
5855                  */
5856                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5857                 break;
5858         case FIB_EVENT_NH_ADD: /* fall through */
5859         case FIB_EVENT_NH_DEL:
5860                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5861                                         fib_work->fnh_info.fib_nh);
5862                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5863                 break;
5864         }
5865         rtnl_unlock();
5866         kfree(fib_work);
5867 }
5868
5869 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5870 {
5871         struct mlxsw_sp_fib_event_work *fib_work =
5872                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5873         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5874         bool replace;
5875         int err;
5876
5877         rtnl_lock();
5878         mlxsw_sp_span_respin(mlxsw_sp);
5879
5880         switch (fib_work->event) {
5881         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5882         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5883         case FIB_EVENT_ENTRY_ADD:
5884                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5885                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5886                                                fib_work->fen6_info.rt, replace);
5887                 if (err)
5888                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5889                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5890                 break;
5891         case FIB_EVENT_ENTRY_DEL:
5892                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5893                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5894                 break;
5895         case FIB_EVENT_RULE_ADD:
5896                 /* if we get here, a rule was added that we do not support.
5897                  * just do the fib_abort
5898                  */
5899                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5900                 break;
5901         }
5902         rtnl_unlock();
5903         kfree(fib_work);
5904 }
5905
5906 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5907 {
5908         struct mlxsw_sp_fib_event_work *fib_work =
5909                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5910         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5911         bool replace;
5912         int err;
5913
5914         rtnl_lock();
5915         switch (fib_work->event) {
5916         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5917         case FIB_EVENT_ENTRY_ADD:
5918                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5919
5920                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5921                                                 replace);
5922                 if (err)
5923                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5924                 mr_cache_put(fib_work->men_info.mfc);
5925                 break;
5926         case FIB_EVENT_ENTRY_DEL:
5927                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5928                 mr_cache_put(fib_work->men_info.mfc);
5929                 break;
5930         case FIB_EVENT_VIF_ADD:
5931                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5932                                                     &fib_work->ven_info);
5933                 if (err)
5934                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5935                 dev_put(fib_work->ven_info.dev);
5936                 break;
5937         case FIB_EVENT_VIF_DEL:
5938                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5939                                               &fib_work->ven_info);
5940                 dev_put(fib_work->ven_info.dev);
5941                 break;
5942         case FIB_EVENT_RULE_ADD:
5943                 /* if we get here, a rule was added that we do not support.
5944                  * just do the fib_abort
5945                  */
5946                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5947                 break;
5948         }
5949         rtnl_unlock();
5950         kfree(fib_work);
5951 }
5952
5953 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5954                                        struct fib_notifier_info *info)
5955 {
5956         struct fib_entry_notifier_info *fen_info;
5957         struct fib_nh_notifier_info *fnh_info;
5958
5959         switch (fib_work->event) {
5960         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5961         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5962         case FIB_EVENT_ENTRY_ADD: /* fall through */
5963         case FIB_EVENT_ENTRY_DEL:
5964                 fen_info = container_of(info, struct fib_entry_notifier_info,
5965                                         info);
5966                 fib_work->fen_info = *fen_info;
5967                 /* Take reference on fib_info to prevent it from being
5968                  * freed while work is queued. Release it afterwards.
5969                  */
5970                 fib_info_hold(fib_work->fen_info.fi);
5971                 break;
5972         case FIB_EVENT_NH_ADD: /* fall through */
5973         case FIB_EVENT_NH_DEL:
5974                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5975                                         info);
5976                 fib_work->fnh_info = *fnh_info;
5977                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5978                 break;
5979         }
5980 }
5981
5982 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5983                                        struct fib_notifier_info *info)
5984 {
5985         struct fib6_entry_notifier_info *fen6_info;
5986
5987         switch (fib_work->event) {
5988         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5989         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5990         case FIB_EVENT_ENTRY_ADD: /* fall through */
5991         case FIB_EVENT_ENTRY_DEL:
5992                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5993                                          info);
5994                 fib_work->fen6_info = *fen6_info;
5995                 fib6_info_hold(fib_work->fen6_info.rt);
5996                 break;
5997         }
5998 }
5999
6000 static void
6001 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6002                             struct fib_notifier_info *info)
6003 {
6004         switch (fib_work->event) {
6005         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6006         case FIB_EVENT_ENTRY_ADD: /* fall through */
6007         case FIB_EVENT_ENTRY_DEL:
6008                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6009                 mr_cache_hold(fib_work->men_info.mfc);
6010                 break;
6011         case FIB_EVENT_VIF_ADD: /* fall through */
6012         case FIB_EVENT_VIF_DEL:
6013                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6014                 dev_hold(fib_work->ven_info.dev);
6015                 break;
6016         }
6017 }
6018
6019 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6020                                           struct fib_notifier_info *info,
6021                                           struct mlxsw_sp *mlxsw_sp)
6022 {
6023         struct netlink_ext_ack *extack = info->extack;
6024         struct fib_rule_notifier_info *fr_info;
6025         struct fib_rule *rule;
6026         int err = 0;
6027
6028         /* nothing to do at the moment */
6029         if (event == FIB_EVENT_RULE_DEL)
6030                 return 0;
6031
6032         if (mlxsw_sp->router->aborted)
6033                 return 0;
6034
6035         fr_info = container_of(info, struct fib_rule_notifier_info, info);
6036         rule = fr_info->rule;
6037
6038         switch (info->family) {
6039         case AF_INET:
6040                 if (!fib4_rule_default(rule) && !rule->l3mdev)
6041                         err = -EOPNOTSUPP;
6042                 break;
6043         case AF_INET6:
6044                 if (!fib6_rule_default(rule) && !rule->l3mdev)
6045                         err = -EOPNOTSUPP;
6046                 break;
6047         case RTNL_FAMILY_IPMR:
6048                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
6049                         err = -EOPNOTSUPP;
6050                 break;
6051         case RTNL_FAMILY_IP6MR:
6052                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6053                         err = -EOPNOTSUPP;
6054                 break;
6055         }
6056
6057         if (err < 0)
6058                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6059
6060         return err;
6061 }
6062
6063 /* Called with rcu_read_lock() */
6064 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6065                                      unsigned long event, void *ptr)
6066 {
6067         struct mlxsw_sp_fib_event_work *fib_work;
6068         struct fib_notifier_info *info = ptr;
6069         struct mlxsw_sp_router *router;
6070         int err;
6071
6072         if (!net_eq(info->net, &init_net) ||
6073             (info->family != AF_INET && info->family != AF_INET6 &&
6074              info->family != RTNL_FAMILY_IPMR &&
6075              info->family != RTNL_FAMILY_IP6MR))
6076                 return NOTIFY_DONE;
6077
6078         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6079
6080         switch (event) {
6081         case FIB_EVENT_RULE_ADD: /* fall through */
6082         case FIB_EVENT_RULE_DEL:
6083                 err = mlxsw_sp_router_fib_rule_event(event, info,
6084                                                      router->mlxsw_sp);
6085                 if (!err || info->extack)
6086                         return notifier_from_errno(err);
6087                 break;
6088         case FIB_EVENT_ENTRY_ADD:
6089                 if (router->aborted) {
6090                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6091                         return notifier_from_errno(-EINVAL);
6092                 }
6093                 break;
6094         }
6095
6096         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6097         if (WARN_ON(!fib_work))
6098                 return NOTIFY_BAD;
6099
6100         fib_work->mlxsw_sp = router->mlxsw_sp;
6101         fib_work->event = event;
6102
6103         switch (info->family) {
6104         case AF_INET:
6105                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6106                 mlxsw_sp_router_fib4_event(fib_work, info);
6107                 break;
6108         case AF_INET6:
6109                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6110                 mlxsw_sp_router_fib6_event(fib_work, info);
6111                 break;
6112         case RTNL_FAMILY_IP6MR:
6113         case RTNL_FAMILY_IPMR:
6114                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6115                 mlxsw_sp_router_fibmr_event(fib_work, info);
6116                 break;
6117         }
6118
6119         mlxsw_core_schedule_work(&fib_work->work);
6120
6121         return NOTIFY_DONE;
6122 }
6123
6124 struct mlxsw_sp_rif *
6125 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6126                          const struct net_device *dev)
6127 {
6128         int i;
6129
6130         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6131                 if (mlxsw_sp->router->rifs[i] &&
6132                     mlxsw_sp->router->rifs[i]->dev == dev)
6133                         return mlxsw_sp->router->rifs[i];
6134
6135         return NULL;
6136 }
6137
6138 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6139 {
6140         char ritr_pl[MLXSW_REG_RITR_LEN];
6141         int err;
6142
6143         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6144         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6145         if (err)
6146                 return err;
6147
6148         mlxsw_reg_ritr_enable_set(ritr_pl, false);
6149         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6150 }
6151
6152 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6153                                           struct mlxsw_sp_rif *rif)
6154 {
6155         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6156         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6157         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6158 }
6159
6160 static bool
6161 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6162                            unsigned long event)
6163 {
6164         struct inet6_dev *inet6_dev;
6165         bool addr_list_empty = true;
6166         struct in_device *idev;
6167
6168         switch (event) {
6169         case NETDEV_UP:
6170                 return rif == NULL;
6171         case NETDEV_DOWN:
6172                 idev = __in_dev_get_rtnl(dev);
6173                 if (idev && idev->ifa_list)
6174                         addr_list_empty = false;
6175
6176                 inet6_dev = __in6_dev_get(dev);
6177                 if (addr_list_empty && inet6_dev &&
6178                     !list_empty(&inet6_dev->addr_list))
6179                         addr_list_empty = false;
6180
6181                 /* macvlans do not have a RIF, but rather piggy back on the
6182                  * RIF of their lower device.
6183                  */
6184                 if (netif_is_macvlan(dev) && addr_list_empty)
6185                         return true;
6186
6187                 if (rif && addr_list_empty &&
6188                     !netif_is_l3_slave(rif->dev))
6189                         return true;
6190                 /* It is possible we already removed the RIF ourselves
6191                  * if it was assigned to a netdev that is now a bridge
6192                  * or LAG slave.
6193                  */
6194                 return false;
6195         }
6196
6197         return false;
6198 }
6199
6200 static enum mlxsw_sp_rif_type
6201 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6202                       const struct net_device *dev)
6203 {
6204         enum mlxsw_sp_fid_type type;
6205
6206         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6207                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6208
6209         /* Otherwise RIF type is derived from the type of the underlying FID. */
6210         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6211                 type = MLXSW_SP_FID_TYPE_8021Q;
6212         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6213                 type = MLXSW_SP_FID_TYPE_8021Q;
6214         else if (netif_is_bridge_master(dev))
6215                 type = MLXSW_SP_FID_TYPE_8021D;
6216         else
6217                 type = MLXSW_SP_FID_TYPE_RFID;
6218
6219         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6220 }
6221
6222 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6223 {
6224         int i;
6225
6226         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6227                 if (!mlxsw_sp->router->rifs[i]) {
6228                         *p_rif_index = i;
6229                         return 0;
6230                 }
6231         }
6232
6233         return -ENOBUFS;
6234 }
6235
6236 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6237                                                u16 vr_id,
6238                                                struct net_device *l3_dev)
6239 {
6240         struct mlxsw_sp_rif *rif;
6241
6242         rif = kzalloc(rif_size, GFP_KERNEL);
6243         if (!rif)
6244                 return NULL;
6245
6246         INIT_LIST_HEAD(&rif->nexthop_list);
6247         INIT_LIST_HEAD(&rif->neigh_list);
6248         if (l3_dev) {
6249                 ether_addr_copy(rif->addr, l3_dev->dev_addr);
6250                 rif->mtu = l3_dev->mtu;
6251                 rif->dev = l3_dev;
6252         }
6253         rif->vr_id = vr_id;
6254         rif->rif_index = rif_index;
6255
6256         return rif;
6257 }
6258
6259 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6260                                            u16 rif_index)
6261 {
6262         return mlxsw_sp->router->rifs[rif_index];
6263 }
6264
6265 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6266 {
6267         return rif->rif_index;
6268 }
6269
6270 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6271 {
6272         return lb_rif->common.rif_index;
6273 }
6274
6275 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6276 {
6277         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6278         struct mlxsw_sp_vr *ul_vr;
6279
6280         ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6281         if (WARN_ON(IS_ERR(ul_vr)))
6282                 return 0;
6283
6284         return ul_vr->id;
6285 }
6286
6287 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6288 {
6289         return lb_rif->ul_rif_id;
6290 }
6291
6292 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6293 {
6294         return rif->dev->ifindex;
6295 }
6296
6297 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6298 {
6299         return rif->dev;
6300 }
6301
6302 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6303 {
6304         return rif->fid;
6305 }
6306
6307 static struct mlxsw_sp_rif *
6308 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6309                     const struct mlxsw_sp_rif_params *params,
6310                     struct netlink_ext_ack *extack)
6311 {
6312         u32 tb_id = l3mdev_fib_table(params->dev);
6313         const struct mlxsw_sp_rif_ops *ops;
6314         struct mlxsw_sp_fid *fid = NULL;
6315         enum mlxsw_sp_rif_type type;
6316         struct mlxsw_sp_rif *rif;
6317         struct mlxsw_sp_vr *vr;
6318         u16 rif_index;
6319         int i, err;
6320
6321         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6322         ops = mlxsw_sp->rif_ops_arr[type];
6323
6324         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6325         if (IS_ERR(vr))
6326                 return ERR_CAST(vr);
6327         vr->rif_count++;
6328
6329         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6330         if (err) {
6331                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6332                 goto err_rif_index_alloc;
6333         }
6334
6335         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6336         if (!rif) {
6337                 err = -ENOMEM;
6338                 goto err_rif_alloc;
6339         }
6340         dev_hold(rif->dev);
6341         mlxsw_sp->router->rifs[rif_index] = rif;
6342         rif->mlxsw_sp = mlxsw_sp;
6343         rif->ops = ops;
6344
6345         if (ops->fid_get) {
6346                 fid = ops->fid_get(rif, extack);
6347                 if (IS_ERR(fid)) {
6348                         err = PTR_ERR(fid);
6349                         goto err_fid_get;
6350                 }
6351                 rif->fid = fid;
6352         }
6353
6354         if (ops->setup)
6355                 ops->setup(rif, params);
6356
6357         err = ops->configure(rif);
6358         if (err)
6359                 goto err_configure;
6360
6361         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6362                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6363                 if (err)
6364                         goto err_mr_rif_add;
6365         }
6366
6367         mlxsw_sp_rif_counters_alloc(rif);
6368
6369         return rif;
6370
6371 err_mr_rif_add:
6372         for (i--; i >= 0; i--)
6373                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6374         ops->deconfigure(rif);
6375 err_configure:
6376         if (fid)
6377                 mlxsw_sp_fid_put(fid);
6378 err_fid_get:
6379         mlxsw_sp->router->rifs[rif_index] = NULL;
6380         dev_put(rif->dev);
6381         kfree(rif);
6382 err_rif_alloc:
6383 err_rif_index_alloc:
6384         vr->rif_count--;
6385         mlxsw_sp_vr_put(mlxsw_sp, vr);
6386         return ERR_PTR(err);
6387 }
6388
6389 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6390 {
6391         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6392         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6393         struct mlxsw_sp_fid *fid = rif->fid;
6394         struct mlxsw_sp_vr *vr;
6395         int i;
6396
6397         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6398         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6399
6400         mlxsw_sp_rif_counters_free(rif);
6401         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6402                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6403         ops->deconfigure(rif);
6404         if (fid)
6405                 /* Loopback RIFs are not associated with a FID. */
6406                 mlxsw_sp_fid_put(fid);
6407         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6408         dev_put(rif->dev);
6409         kfree(rif);
6410         vr->rif_count--;
6411         mlxsw_sp_vr_put(mlxsw_sp, vr);
6412 }
6413
6414 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6415                                  struct net_device *dev)
6416 {
6417         struct mlxsw_sp_rif *rif;
6418
6419         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6420         if (!rif)
6421                 return;
6422         mlxsw_sp_rif_destroy(rif);
6423 }
6424
6425 static void
6426 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6427                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6428 {
6429         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6430
6431         params->vid = mlxsw_sp_port_vlan->vid;
6432         params->lag = mlxsw_sp_port->lagged;
6433         if (params->lag)
6434                 params->lag_id = mlxsw_sp_port->lag_id;
6435         else
6436                 params->system_port = mlxsw_sp_port->local_port;
6437 }
6438
6439 static struct mlxsw_sp_rif_subport *
6440 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6441 {
6442         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6443 }
6444
6445 static struct mlxsw_sp_rif *
6446 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6447                          const struct mlxsw_sp_rif_params *params,
6448                          struct netlink_ext_ack *extack)
6449 {
6450         struct mlxsw_sp_rif_subport *rif_subport;
6451         struct mlxsw_sp_rif *rif;
6452
6453         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6454         if (!rif)
6455                 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6456
6457         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6458         refcount_inc(&rif_subport->ref_count);
6459         return rif;
6460 }
6461
6462 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6463 {
6464         struct mlxsw_sp_rif_subport *rif_subport;
6465
6466         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6467         if (!refcount_dec_and_test(&rif_subport->ref_count))
6468                 return;
6469
6470         mlxsw_sp_rif_destroy(rif);
6471 }
6472
6473 static int
6474 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6475                                struct net_device *l3_dev,
6476                                struct netlink_ext_ack *extack)
6477 {
6478         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6479         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6480         struct mlxsw_sp_rif_params params = {
6481                 .dev = l3_dev,
6482         };
6483         u16 vid = mlxsw_sp_port_vlan->vid;
6484         struct mlxsw_sp_rif *rif;
6485         struct mlxsw_sp_fid *fid;
6486         int err;
6487
6488         mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6489         rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6490         if (IS_ERR(rif))
6491                 return PTR_ERR(rif);
6492
6493         /* FID was already created, just take a reference */
6494         fid = rif->ops->fid_get(rif, extack);
6495         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6496         if (err)
6497                 goto err_fid_port_vid_map;
6498
6499         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6500         if (err)
6501                 goto err_port_vid_learning_set;
6502
6503         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6504                                         BR_STATE_FORWARDING);
6505         if (err)
6506                 goto err_port_vid_stp_set;
6507
6508         mlxsw_sp_port_vlan->fid = fid;
6509
6510         return 0;
6511
6512 err_port_vid_stp_set:
6513         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6514 err_port_vid_learning_set:
6515         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6516 err_fid_port_vid_map:
6517         mlxsw_sp_fid_put(fid);
6518         mlxsw_sp_rif_subport_put(rif);
6519         return err;
6520 }
6521
6522 void
6523 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6524 {
6525         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6526         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6527         struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6528         u16 vid = mlxsw_sp_port_vlan->vid;
6529
6530         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6531                 return;
6532
6533         mlxsw_sp_port_vlan->fid = NULL;
6534         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6535         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6536         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6537         mlxsw_sp_fid_put(fid);
6538         mlxsw_sp_rif_subport_put(rif);
6539 }
6540
6541 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6542                                              struct net_device *port_dev,
6543                                              unsigned long event, u16 vid,
6544                                              struct netlink_ext_ack *extack)
6545 {
6546         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6547         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6548
6549         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6550         if (WARN_ON(!mlxsw_sp_port_vlan))
6551                 return -EINVAL;
6552
6553         switch (event) {
6554         case NETDEV_UP:
6555                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6556                                                       l3_dev, extack);
6557         case NETDEV_DOWN:
6558                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6559                 break;
6560         }
6561
6562         return 0;
6563 }
6564
6565 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6566                                         unsigned long event,
6567                                         struct netlink_ext_ack *extack)
6568 {
6569         if (netif_is_bridge_port(port_dev) ||
6570             netif_is_lag_port(port_dev) ||
6571             netif_is_ovs_port(port_dev))
6572                 return 0;
6573
6574         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6575                                                  MLXSW_SP_DEFAULT_VID, extack);
6576 }
6577
6578 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6579                                          struct net_device *lag_dev,
6580                                          unsigned long event, u16 vid,
6581                                          struct netlink_ext_ack *extack)
6582 {
6583         struct net_device *port_dev;
6584         struct list_head *iter;
6585         int err;
6586
6587         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6588                 if (mlxsw_sp_port_dev_check(port_dev)) {
6589                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6590                                                                 port_dev,
6591                                                                 event, vid,
6592                                                                 extack);
6593                         if (err)
6594                                 return err;
6595                 }
6596         }
6597
6598         return 0;
6599 }
6600
6601 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6602                                        unsigned long event,
6603                                        struct netlink_ext_ack *extack)
6604 {
6605         if (netif_is_bridge_port(lag_dev))
6606                 return 0;
6607
6608         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6609                                              MLXSW_SP_DEFAULT_VID, extack);
6610 }
6611
6612 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6613                                           struct net_device *l3_dev,
6614                                           unsigned long event,
6615                                           struct netlink_ext_ack *extack)
6616 {
6617         struct mlxsw_sp_rif_params params = {
6618                 .dev = l3_dev,
6619         };
6620         struct mlxsw_sp_rif *rif;
6621
6622         switch (event) {
6623         case NETDEV_UP:
6624                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6625                 if (IS_ERR(rif))
6626                         return PTR_ERR(rif);
6627                 break;
6628         case NETDEV_DOWN:
6629                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6630                 mlxsw_sp_rif_destroy(rif);
6631                 break;
6632         }
6633
6634         return 0;
6635 }
6636
6637 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6638                                         struct net_device *vlan_dev,
6639                                         unsigned long event,
6640                                         struct netlink_ext_ack *extack)
6641 {
6642         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6643         u16 vid = vlan_dev_vlan_id(vlan_dev);
6644
6645         if (netif_is_bridge_port(vlan_dev))
6646                 return 0;
6647
6648         if (mlxsw_sp_port_dev_check(real_dev))
6649                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6650                                                          event, vid, extack);
6651         else if (netif_is_lag_master(real_dev))
6652                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6653                                                      vid, extack);
6654         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6655                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6656                                                       extack);
6657
6658         return 0;
6659 }
6660
6661 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6662 {
6663         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6664         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6665
6666         return ether_addr_equal_masked(mac, vrrp4, mask);
6667 }
6668
6669 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6670 {
6671         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6672         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6673
6674         return ether_addr_equal_masked(mac, vrrp6, mask);
6675 }
6676
6677 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6678                                 const u8 *mac, bool adding)
6679 {
6680         char ritr_pl[MLXSW_REG_RITR_LEN];
6681         u8 vrrp_id = adding ? mac[5] : 0;
6682         int err;
6683
6684         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6685             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6686                 return 0;
6687
6688         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6689         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6690         if (err)
6691                 return err;
6692
6693         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6694                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6695         else
6696                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6697
6698         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6699 }
6700
6701 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6702                                     const struct net_device *macvlan_dev,
6703                                     struct netlink_ext_ack *extack)
6704 {
6705         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6706         struct mlxsw_sp_rif *rif;
6707         int err;
6708
6709         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6710         if (!rif) {
6711                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6712                 return -EOPNOTSUPP;
6713         }
6714
6715         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6716                                   mlxsw_sp_fid_index(rif->fid), true);
6717         if (err)
6718                 return err;
6719
6720         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6721                                    macvlan_dev->dev_addr, true);
6722         if (err)
6723                 goto err_rif_vrrp_add;
6724
6725         /* Make sure the bridge driver does not have this MAC pointing at
6726          * some other port.
6727          */
6728         if (rif->ops->fdb_del)
6729                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6730
6731         return 0;
6732
6733 err_rif_vrrp_add:
6734         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6735                             mlxsw_sp_fid_index(rif->fid), false);
6736         return err;
6737 }
6738
6739 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6740                               const struct net_device *macvlan_dev)
6741 {
6742         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6743         struct mlxsw_sp_rif *rif;
6744
6745         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6746         /* If we do not have a RIF, then we already took care of
6747          * removing the macvlan's MAC during RIF deletion.
6748          */
6749         if (!rif)
6750                 return;
6751         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6752                              false);
6753         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6754                             mlxsw_sp_fid_index(rif->fid), false);
6755 }
6756
6757 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6758                                            struct net_device *macvlan_dev,
6759                                            unsigned long event,
6760                                            struct netlink_ext_ack *extack)
6761 {
6762         switch (event) {
6763         case NETDEV_UP:
6764                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6765         case NETDEV_DOWN:
6766                 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6767                 break;
6768         }
6769
6770         return 0;
6771 }
6772
6773 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6774                                                struct net_device *dev,
6775                                                const unsigned char *dev_addr,
6776                                                struct netlink_ext_ack *extack)
6777 {
6778         struct mlxsw_sp_rif *rif;
6779         int i;
6780
6781         /* A RIF is not created for macvlan netdevs. Their MAC is used to
6782          * populate the FDB
6783          */
6784         if (netif_is_macvlan(dev))
6785                 return 0;
6786
6787         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6788                 rif = mlxsw_sp->router->rifs[i];
6789                 if (rif && rif->dev && rif->dev != dev &&
6790                     !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6791                                              mlxsw_sp->mac_mask)) {
6792                         NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6793                         return -EINVAL;
6794                 }
6795         }
6796
6797         return 0;
6798 }
6799
6800 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6801                                      struct net_device *dev,
6802                                      unsigned long event,
6803                                      struct netlink_ext_ack *extack)
6804 {
6805         if (mlxsw_sp_port_dev_check(dev))
6806                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6807         else if (netif_is_lag_master(dev))
6808                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6809         else if (netif_is_bridge_master(dev))
6810                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6811                                                       extack);
6812         else if (is_vlan_dev(dev))
6813                 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6814                                                     extack);
6815         else if (netif_is_macvlan(dev))
6816                 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6817                                                        extack);
6818         else
6819                 return 0;
6820 }
6821
6822 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6823                                    unsigned long event, void *ptr)
6824 {
6825         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6826         struct net_device *dev = ifa->ifa_dev->dev;
6827         struct mlxsw_sp_router *router;
6828         struct mlxsw_sp_rif *rif;
6829         int err = 0;
6830
6831         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6832         if (event == NETDEV_UP)
6833                 goto out;
6834
6835         router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6836         rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6837         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6838                 goto out;
6839
6840         err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6841 out:
6842         return notifier_from_errno(err);
6843 }
6844
6845 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6846                                   unsigned long event, void *ptr)
6847 {
6848         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6849         struct net_device *dev = ivi->ivi_dev->dev;
6850         struct mlxsw_sp *mlxsw_sp;
6851         struct mlxsw_sp_rif *rif;
6852         int err = 0;
6853
6854         mlxsw_sp = mlxsw_sp_lower_get(dev);
6855         if (!mlxsw_sp)
6856                 goto out;
6857
6858         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6859         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6860                 goto out;
6861
6862         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6863                                                   ivi->extack);
6864         if (err)
6865                 goto out;
6866
6867         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6868 out:
6869         return notifier_from_errno(err);
6870 }
6871
6872 struct mlxsw_sp_inet6addr_event_work {
6873         struct work_struct work;
6874         struct mlxsw_sp *mlxsw_sp;
6875         struct net_device *dev;
6876         unsigned long event;
6877 };
6878
6879 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6880 {
6881         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6882                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6883         struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6884         struct net_device *dev = inet6addr_work->dev;
6885         unsigned long event = inet6addr_work->event;
6886         struct mlxsw_sp_rif *rif;
6887
6888         rtnl_lock();
6889
6890         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6891         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6892                 goto out;
6893
6894         __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
6895 out:
6896         rtnl_unlock();
6897         dev_put(dev);
6898         kfree(inet6addr_work);
6899 }
6900
6901 /* Called with rcu_read_lock() */
6902 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
6903                                     unsigned long event, void *ptr)
6904 {
6905         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6906         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6907         struct net_device *dev = if6->idev->dev;
6908         struct mlxsw_sp_router *router;
6909
6910         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6911         if (event == NETDEV_UP)
6912                 return NOTIFY_DONE;
6913
6914         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6915         if (!inet6addr_work)
6916                 return NOTIFY_BAD;
6917
6918         router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
6919         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6920         inet6addr_work->mlxsw_sp = router->mlxsw_sp;
6921         inet6addr_work->dev = dev;
6922         inet6addr_work->event = event;
6923         dev_hold(dev);
6924         mlxsw_core_schedule_work(&inet6addr_work->work);
6925
6926         return NOTIFY_DONE;
6927 }
6928
6929 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6930                                    unsigned long event, void *ptr)
6931 {
6932         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6933         struct net_device *dev = i6vi->i6vi_dev->dev;
6934         struct mlxsw_sp *mlxsw_sp;
6935         struct mlxsw_sp_rif *rif;
6936         int err = 0;
6937
6938         mlxsw_sp = mlxsw_sp_lower_get(dev);
6939         if (!mlxsw_sp)
6940                 goto out;
6941
6942         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6943         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6944                 goto out;
6945
6946         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6947                                                   i6vi->extack);
6948         if (err)
6949                 goto out;
6950
6951         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
6952 out:
6953         return notifier_from_errno(err);
6954 }
6955
6956 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6957                              const char *mac, int mtu)
6958 {
6959         char ritr_pl[MLXSW_REG_RITR_LEN];
6960         int err;
6961
6962         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6963         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6964         if (err)
6965                 return err;
6966
6967         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6968         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6969         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6970         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6971 }
6972
6973 static int
6974 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
6975                                   struct mlxsw_sp_rif *rif)
6976 {
6977         struct net_device *dev = rif->dev;
6978         u16 fid_index;
6979         int err;
6980
6981         fid_index = mlxsw_sp_fid_index(rif->fid);
6982
6983         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6984         if (err)
6985                 return err;
6986
6987         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6988                                 dev->mtu);
6989         if (err)
6990                 goto err_rif_edit;
6991
6992         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6993         if (err)
6994                 goto err_rif_fdb_op;
6995
6996         if (rif->mtu != dev->mtu) {
6997                 struct mlxsw_sp_vr *vr;
6998                 int i;
6999
7000                 /* The RIF is relevant only to its mr_table instance, as unlike
7001                  * unicast routing, in multicast routing a RIF cannot be shared
7002                  * between several multicast routing tables.
7003                  */
7004                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
7005                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7006                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7007                                                    rif, dev->mtu);
7008         }
7009
7010         ether_addr_copy(rif->addr, dev->dev_addr);
7011         rif->mtu = dev->mtu;
7012
7013         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7014
7015         return 0;
7016
7017 err_rif_fdb_op:
7018         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7019 err_rif_edit:
7020         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7021         return err;
7022 }
7023
7024 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7025                             struct netdev_notifier_pre_changeaddr_info *info)
7026 {
7027         struct netlink_ext_ack *extack;
7028
7029         extack = netdev_notifier_info_to_extack(&info->info);
7030         return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7031                                                    info->dev_addr, extack);
7032 }
7033
7034 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7035                                          unsigned long event, void *ptr)
7036 {
7037         struct mlxsw_sp *mlxsw_sp;
7038         struct mlxsw_sp_rif *rif;
7039
7040         mlxsw_sp = mlxsw_sp_lower_get(dev);
7041         if (!mlxsw_sp)
7042                 return 0;
7043
7044         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7045         if (!rif)
7046                 return 0;
7047
7048         switch (event) {
7049         case NETDEV_CHANGEMTU: /* fall through */
7050         case NETDEV_CHANGEADDR:
7051                 return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7052         case NETDEV_PRE_CHANGEADDR:
7053                 return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7054         }
7055
7056         return 0;
7057 }
7058
7059 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7060                                   struct net_device *l3_dev,
7061                                   struct netlink_ext_ack *extack)
7062 {
7063         struct mlxsw_sp_rif *rif;
7064
7065         /* If netdev is already associated with a RIF, then we need to
7066          * destroy it and create a new one with the new virtual router ID.
7067          */
7068         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7069         if (rif)
7070                 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7071                                           extack);
7072
7073         return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7074 }
7075
7076 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7077                                     struct net_device *l3_dev)
7078 {
7079         struct mlxsw_sp_rif *rif;
7080
7081         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7082         if (!rif)
7083                 return;
7084         __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7085 }
7086
7087 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7088                                  struct netdev_notifier_changeupper_info *info)
7089 {
7090         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7091         int err = 0;
7092
7093         /* We do not create a RIF for a macvlan, but only use it to
7094          * direct more MAC addresses to the router.
7095          */
7096         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7097                 return 0;
7098
7099         switch (event) {
7100         case NETDEV_PRECHANGEUPPER:
7101                 return 0;
7102         case NETDEV_CHANGEUPPER:
7103                 if (info->linking) {
7104                         struct netlink_ext_ack *extack;
7105
7106                         extack = netdev_notifier_info_to_extack(&info->info);
7107                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7108                 } else {
7109                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7110                 }
7111                 break;
7112         }
7113
7114         return err;
7115 }
7116
7117 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7118 {
7119         struct mlxsw_sp_rif *rif = data;
7120
7121         if (!netif_is_macvlan(dev))
7122                 return 0;
7123
7124         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7125                                    mlxsw_sp_fid_index(rif->fid), false);
7126 }
7127
7128 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7129 {
7130         if (!netif_is_macvlan_port(rif->dev))
7131                 return 0;
7132
7133         netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7134         return netdev_walk_all_upper_dev_rcu(rif->dev,
7135                                              __mlxsw_sp_rif_macvlan_flush, rif);
7136 }
7137
7138 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7139                                        const struct mlxsw_sp_rif_params *params)
7140 {
7141         struct mlxsw_sp_rif_subport *rif_subport;
7142
7143         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7144         refcount_set(&rif_subport->ref_count, 1);
7145         rif_subport->vid = params->vid;
7146         rif_subport->lag = params->lag;
7147         if (params->lag)
7148                 rif_subport->lag_id = params->lag_id;
7149         else
7150                 rif_subport->system_port = params->system_port;
7151 }
7152
7153 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7154 {
7155         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7156         struct mlxsw_sp_rif_subport *rif_subport;
7157         char ritr_pl[MLXSW_REG_RITR_LEN];
7158
7159         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7160         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7161                             rif->rif_index, rif->vr_id, rif->dev->mtu);
7162         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7163         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7164                                   rif_subport->lag ? rif_subport->lag_id :
7165                                                      rif_subport->system_port,
7166                                   rif_subport->vid);
7167
7168         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7169 }
7170
7171 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7172 {
7173         int err;
7174
7175         err = mlxsw_sp_rif_subport_op(rif, true);
7176         if (err)
7177                 return err;
7178
7179         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7180                                   mlxsw_sp_fid_index(rif->fid), true);
7181         if (err)
7182                 goto err_rif_fdb_op;
7183
7184         mlxsw_sp_fid_rif_set(rif->fid, rif);
7185         return 0;
7186
7187 err_rif_fdb_op:
7188         mlxsw_sp_rif_subport_op(rif, false);
7189         return err;
7190 }
7191
7192 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7193 {
7194         struct mlxsw_sp_fid *fid = rif->fid;
7195
7196         mlxsw_sp_fid_rif_set(fid, NULL);
7197         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7198                             mlxsw_sp_fid_index(fid), false);
7199         mlxsw_sp_rif_macvlan_flush(rif);
7200         mlxsw_sp_rif_subport_op(rif, false);
7201 }
7202
7203 static struct mlxsw_sp_fid *
7204 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7205                              struct netlink_ext_ack *extack)
7206 {
7207         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7208 }
7209
7210 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7211         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
7212         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
7213         .setup                  = mlxsw_sp_rif_subport_setup,
7214         .configure              = mlxsw_sp_rif_subport_configure,
7215         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
7216         .fid_get                = mlxsw_sp_rif_subport_fid_get,
7217 };
7218
7219 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7220                                     enum mlxsw_reg_ritr_if_type type,
7221                                     u16 vid_fid, bool enable)
7222 {
7223         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7224         char ritr_pl[MLXSW_REG_RITR_LEN];
7225
7226         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7227                             rif->dev->mtu);
7228         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7229         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7230
7231         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7232 }
7233
7234 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7235 {
7236         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7237 }
7238
7239 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7240 {
7241         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7242         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7243         int err;
7244
7245         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7246         if (err)
7247                 return err;
7248
7249         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7250                                      mlxsw_sp_router_port(mlxsw_sp), true);
7251         if (err)
7252                 goto err_fid_mc_flood_set;
7253
7254         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7255                                      mlxsw_sp_router_port(mlxsw_sp), true);
7256         if (err)
7257                 goto err_fid_bc_flood_set;
7258
7259         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7260                                   mlxsw_sp_fid_index(rif->fid), true);
7261         if (err)
7262                 goto err_rif_fdb_op;
7263
7264         mlxsw_sp_fid_rif_set(rif->fid, rif);
7265         return 0;
7266
7267 err_rif_fdb_op:
7268         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7269                                mlxsw_sp_router_port(mlxsw_sp), false);
7270 err_fid_bc_flood_set:
7271         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7272                                mlxsw_sp_router_port(mlxsw_sp), false);
7273 err_fid_mc_flood_set:
7274         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7275         return err;
7276 }
7277
7278 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7279 {
7280         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7281         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7282         struct mlxsw_sp_fid *fid = rif->fid;
7283
7284         mlxsw_sp_fid_rif_set(fid, NULL);
7285         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7286                             mlxsw_sp_fid_index(fid), false);
7287         mlxsw_sp_rif_macvlan_flush(rif);
7288         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7289                                mlxsw_sp_router_port(mlxsw_sp), false);
7290         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7291                                mlxsw_sp_router_port(mlxsw_sp), false);
7292         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7293 }
7294
7295 static struct mlxsw_sp_fid *
7296 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7297                           struct netlink_ext_ack *extack)
7298 {
7299         struct net_device *br_dev = rif->dev;
7300         u16 vid;
7301         int err;
7302
7303         if (is_vlan_dev(rif->dev)) {
7304                 vid = vlan_dev_vlan_id(rif->dev);
7305                 br_dev = vlan_dev_real_dev(rif->dev);
7306                 if (WARN_ON(!netif_is_bridge_master(br_dev)))
7307                         return ERR_PTR(-EINVAL);
7308         } else {
7309                 err = br_vlan_get_pvid(rif->dev, &vid);
7310                 if (err < 0 || !vid) {
7311                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7312                         return ERR_PTR(-EINVAL);
7313                 }
7314         }
7315
7316         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7317 }
7318
7319 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7320 {
7321         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7322         struct switchdev_notifier_fdb_info info;
7323         struct net_device *br_dev;
7324         struct net_device *dev;
7325
7326         br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7327         dev = br_fdb_find_port(br_dev, mac, vid);
7328         if (!dev)
7329                 return;
7330
7331         info.addr = mac;
7332         info.vid = vid;
7333         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7334                                  NULL);
7335 }
7336
7337 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7338         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7339         .rif_size               = sizeof(struct mlxsw_sp_rif),
7340         .configure              = mlxsw_sp_rif_vlan_configure,
7341         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7342         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7343         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7344 };
7345
7346 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7347 {
7348         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7349         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7350         int err;
7351
7352         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7353                                        true);
7354         if (err)
7355                 return err;
7356
7357         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7358                                      mlxsw_sp_router_port(mlxsw_sp), true);
7359         if (err)
7360                 goto err_fid_mc_flood_set;
7361
7362         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7363                                      mlxsw_sp_router_port(mlxsw_sp), true);
7364         if (err)
7365                 goto err_fid_bc_flood_set;
7366
7367         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7368                                   mlxsw_sp_fid_index(rif->fid), true);
7369         if (err)
7370                 goto err_rif_fdb_op;
7371
7372         mlxsw_sp_fid_rif_set(rif->fid, rif);
7373         return 0;
7374
7375 err_rif_fdb_op:
7376         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7377                                mlxsw_sp_router_port(mlxsw_sp), false);
7378 err_fid_bc_flood_set:
7379         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7380                                mlxsw_sp_router_port(mlxsw_sp), false);
7381 err_fid_mc_flood_set:
7382         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7383         return err;
7384 }
7385
7386 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7387 {
7388         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7389         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7390         struct mlxsw_sp_fid *fid = rif->fid;
7391
7392         mlxsw_sp_fid_rif_set(fid, NULL);
7393         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7394                             mlxsw_sp_fid_index(fid), false);
7395         mlxsw_sp_rif_macvlan_flush(rif);
7396         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7397                                mlxsw_sp_router_port(mlxsw_sp), false);
7398         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7399                                mlxsw_sp_router_port(mlxsw_sp), false);
7400         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7401 }
7402
7403 static struct mlxsw_sp_fid *
7404 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7405                          struct netlink_ext_ack *extack)
7406 {
7407         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7408 }
7409
7410 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7411 {
7412         struct switchdev_notifier_fdb_info info;
7413         struct net_device *dev;
7414
7415         dev = br_fdb_find_port(rif->dev, mac, 0);
7416         if (!dev)
7417                 return;
7418
7419         info.addr = mac;
7420         info.vid = 0;
7421         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7422                                  NULL);
7423 }
7424
7425 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7426         .type                   = MLXSW_SP_RIF_TYPE_FID,
7427         .rif_size               = sizeof(struct mlxsw_sp_rif),
7428         .configure              = mlxsw_sp_rif_fid_configure,
7429         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7430         .fid_get                = mlxsw_sp_rif_fid_fid_get,
7431         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7432 };
7433
7434 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7435         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7436         .rif_size               = sizeof(struct mlxsw_sp_rif),
7437         .configure              = mlxsw_sp_rif_fid_configure,
7438         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7439         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7440         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7441 };
7442
7443 static struct mlxsw_sp_rif_ipip_lb *
7444 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7445 {
7446         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7447 }
7448
7449 static void
7450 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7451                            const struct mlxsw_sp_rif_params *params)
7452 {
7453         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7454         struct mlxsw_sp_rif_ipip_lb *rif_lb;
7455
7456         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7457                                  common);
7458         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7459         rif_lb->lb_config = params_lb->lb_config;
7460 }
7461
7462 static int
7463 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7464 {
7465         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7466         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7467         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7468         struct mlxsw_sp_vr *ul_vr;
7469         int err;
7470
7471         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7472         if (IS_ERR(ul_vr))
7473                 return PTR_ERR(ul_vr);
7474
7475         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7476         if (err)
7477                 goto err_loopback_op;
7478
7479         lb_rif->ul_vr_id = ul_vr->id;
7480         lb_rif->ul_rif_id = 0;
7481         ++ul_vr->rif_count;
7482         return 0;
7483
7484 err_loopback_op:
7485         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7486         return err;
7487 }
7488
7489 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7490 {
7491         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7492         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7493         struct mlxsw_sp_vr *ul_vr;
7494
7495         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7496         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7497
7498         --ul_vr->rif_count;
7499         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7500 }
7501
7502 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7503         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7504         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7505         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7506         .configure              = mlxsw_sp1_rif_ipip_lb_configure,
7507         .deconfigure            = mlxsw_sp1_rif_ipip_lb_deconfigure,
7508 };
7509
7510 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7511         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7512         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7513         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7514         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp1_rif_ipip_lb_ops,
7515 };
7516
7517 static int
7518 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7519 {
7520         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7521         char ritr_pl[MLXSW_REG_RITR_LEN];
7522
7523         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7524                             ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7525         mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7526                                              MLXSW_REG_RITR_LOOPBACK_GENERIC);
7527
7528         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7529 }
7530
7531 static struct mlxsw_sp_rif *
7532 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7533                        struct netlink_ext_ack *extack)
7534 {
7535         struct mlxsw_sp_rif *ul_rif;
7536         u16 rif_index;
7537         int err;
7538
7539         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7540         if (err) {
7541                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7542                 return ERR_PTR(err);
7543         }
7544
7545         ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7546         if (!ul_rif)
7547                 return ERR_PTR(-ENOMEM);
7548
7549         mlxsw_sp->router->rifs[rif_index] = ul_rif;
7550         ul_rif->mlxsw_sp = mlxsw_sp;
7551         err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7552         if (err)
7553                 goto ul_rif_op_err;
7554
7555         return ul_rif;
7556
7557 ul_rif_op_err:
7558         mlxsw_sp->router->rifs[rif_index] = NULL;
7559         kfree(ul_rif);
7560         return ERR_PTR(err);
7561 }
7562
7563 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7564 {
7565         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7566
7567         mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7568         mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7569         kfree(ul_rif);
7570 }
7571
7572 static struct mlxsw_sp_rif *
7573 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7574                     struct netlink_ext_ack *extack)
7575 {
7576         struct mlxsw_sp_vr *vr;
7577         int err;
7578
7579         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7580         if (IS_ERR(vr))
7581                 return ERR_CAST(vr);
7582
7583         if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7584                 return vr->ul_rif;
7585
7586         vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7587         if (IS_ERR(vr->ul_rif)) {
7588                 err = PTR_ERR(vr->ul_rif);
7589                 goto err_ul_rif_create;
7590         }
7591
7592         vr->rif_count++;
7593         refcount_set(&vr->ul_rif_refcnt, 1);
7594
7595         return vr->ul_rif;
7596
7597 err_ul_rif_create:
7598         mlxsw_sp_vr_put(mlxsw_sp, vr);
7599         return ERR_PTR(err);
7600 }
7601
7602 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7603 {
7604         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7605         struct mlxsw_sp_vr *vr;
7606
7607         vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7608
7609         if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7610                 return;
7611
7612         vr->rif_count--;
7613         mlxsw_sp_ul_rif_destroy(ul_rif);
7614         mlxsw_sp_vr_put(mlxsw_sp, vr);
7615 }
7616
7617 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7618                                u16 *ul_rif_index)
7619 {
7620         struct mlxsw_sp_rif *ul_rif;
7621
7622         ASSERT_RTNL();
7623
7624         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7625         if (IS_ERR(ul_rif))
7626                 return PTR_ERR(ul_rif);
7627         *ul_rif_index = ul_rif->rif_index;
7628
7629         return 0;
7630 }
7631
7632 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7633 {
7634         struct mlxsw_sp_rif *ul_rif;
7635
7636         ASSERT_RTNL();
7637
7638         ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7639         if (WARN_ON(!ul_rif))
7640                 return;
7641
7642         mlxsw_sp_ul_rif_put(ul_rif);
7643 }
7644
7645 static int
7646 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7647 {
7648         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7649         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7650         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7651         struct mlxsw_sp_rif *ul_rif;
7652         int err;
7653
7654         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7655         if (IS_ERR(ul_rif))
7656                 return PTR_ERR(ul_rif);
7657
7658         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7659         if (err)
7660                 goto err_loopback_op;
7661
7662         lb_rif->ul_vr_id = 0;
7663         lb_rif->ul_rif_id = ul_rif->rif_index;
7664
7665         return 0;
7666
7667 err_loopback_op:
7668         mlxsw_sp_ul_rif_put(ul_rif);
7669         return err;
7670 }
7671
7672 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7673 {
7674         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7675         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7676         struct mlxsw_sp_rif *ul_rif;
7677
7678         ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7679         mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7680         mlxsw_sp_ul_rif_put(ul_rif);
7681 }
7682
7683 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7684         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7685         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7686         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7687         .configure              = mlxsw_sp2_rif_ipip_lb_configure,
7688         .deconfigure            = mlxsw_sp2_rif_ipip_lb_deconfigure,
7689 };
7690
7691 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7692         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7693         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7694         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7695         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp2_rif_ipip_lb_ops,
7696 };
7697
7698 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7699 {
7700         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7701
7702         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7703                                          sizeof(struct mlxsw_sp_rif *),
7704                                          GFP_KERNEL);
7705         if (!mlxsw_sp->router->rifs)
7706                 return -ENOMEM;
7707
7708         return 0;
7709 }
7710
7711 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7712 {
7713         int i;
7714
7715         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7716                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7717
7718         kfree(mlxsw_sp->router->rifs);
7719 }
7720
7721 static int
7722 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7723 {
7724         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7725
7726         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7727         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7728 }
7729
7730 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7731 {
7732         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7733         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7734         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7735 }
7736
7737 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7738 {
7739         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7740 }
7741
7742 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7743 {
7744         struct mlxsw_sp_router *router;
7745
7746         /* Flush pending FIB notifications and then flush the device's
7747          * table before requesting another dump. The FIB notification
7748          * block is unregistered, so no need to take RTNL.
7749          */
7750         mlxsw_core_flush_owq();
7751         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7752         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7753 }
7754
7755 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7756 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7757 {
7758         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7759 }
7760
7761 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7762 {
7763         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7764 }
7765
7766 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7767 {
7768         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7769
7770         mlxsw_sp_mp_hash_header_set(recr2_pl,
7771                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7772         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7773         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7774         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7775         if (only_l3)
7776                 return;
7777         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7778         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7779         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7780         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7781 }
7782
7783 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7784 {
7785         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7786
7787         mlxsw_sp_mp_hash_header_set(recr2_pl,
7788                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7789         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7790         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7791         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7792         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7793         if (only_l3) {
7794                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7795                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7796         } else {
7797                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7798                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7799                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7800                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7801                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7802                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7803         }
7804 }
7805
7806 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7807 {
7808         char recr2_pl[MLXSW_REG_RECR2_LEN];
7809         u32 seed;
7810
7811         get_random_bytes(&seed, sizeof(seed));
7812         mlxsw_reg_recr2_pack(recr2_pl, seed);
7813         mlxsw_sp_mp4_hash_init(recr2_pl);
7814         mlxsw_sp_mp6_hash_init(recr2_pl);
7815
7816         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7817 }
7818 #else
7819 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7820 {
7821         return 0;
7822 }
7823 #endif
7824
7825 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7826 {
7827         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7828         unsigned int i;
7829
7830         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7831
7832         /* HW is determining switch priority based on DSCP-bits, but the
7833          * kernel is still doing that based on the ToS. Since there's a
7834          * mismatch in bits we need to make sure to translate the right
7835          * value ToS would observe, skipping the 2 least-significant ECN bits.
7836          */
7837         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7838                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7839
7840         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7841 }
7842
7843 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7844 {
7845         bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7846         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7847         u64 max_rifs;
7848         int err;
7849
7850         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7851                 return -EIO;
7852         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7853
7854         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7855         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7856         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7857         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7858         if (err)
7859                 return err;
7860         return 0;
7861 }
7862
7863 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7864 {
7865         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7866
7867         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7868         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7869 }
7870
7871 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7872 {
7873         struct mlxsw_sp_router *router;
7874         int err;
7875
7876         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7877         if (!router)
7878                 return -ENOMEM;
7879         mlxsw_sp->router = router;
7880         router->mlxsw_sp = mlxsw_sp;
7881
7882         router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
7883         err = register_inetaddr_notifier(&router->inetaddr_nb);
7884         if (err)
7885                 goto err_register_inetaddr_notifier;
7886
7887         router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
7888         err = register_inet6addr_notifier(&router->inet6addr_nb);
7889         if (err)
7890                 goto err_register_inet6addr_notifier;
7891
7892         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7893         err = __mlxsw_sp_router_init(mlxsw_sp);
7894         if (err)
7895                 goto err_router_init;
7896
7897         err = mlxsw_sp_rifs_init(mlxsw_sp);
7898         if (err)
7899                 goto err_rifs_init;
7900
7901         err = mlxsw_sp_ipips_init(mlxsw_sp);
7902         if (err)
7903                 goto err_ipips_init;
7904
7905         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7906                               &mlxsw_sp_nexthop_ht_params);
7907         if (err)
7908                 goto err_nexthop_ht_init;
7909
7910         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7911                               &mlxsw_sp_nexthop_group_ht_params);
7912         if (err)
7913                 goto err_nexthop_group_ht_init;
7914
7915         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7916         err = mlxsw_sp_lpm_init(mlxsw_sp);
7917         if (err)
7918                 goto err_lpm_init;
7919
7920         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7921         if (err)
7922                 goto err_mr_init;
7923
7924         err = mlxsw_sp_vrs_init(mlxsw_sp);
7925         if (err)
7926                 goto err_vrs_init;
7927
7928         err = mlxsw_sp_neigh_init(mlxsw_sp);
7929         if (err)
7930                 goto err_neigh_init;
7931
7932         mlxsw_sp->router->netevent_nb.notifier_call =
7933                 mlxsw_sp_router_netevent_event;
7934         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7935         if (err)
7936                 goto err_register_netevent_notifier;
7937
7938         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7939         if (err)
7940                 goto err_mp_hash_init;
7941
7942         err = mlxsw_sp_dscp_init(mlxsw_sp);
7943         if (err)
7944                 goto err_dscp_init;
7945
7946         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7947         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7948                                     mlxsw_sp_router_fib_dump_flush);
7949         if (err)
7950                 goto err_register_fib_notifier;
7951
7952         return 0;
7953
7954 err_register_fib_notifier:
7955 err_dscp_init:
7956 err_mp_hash_init:
7957         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7958 err_register_netevent_notifier:
7959         mlxsw_sp_neigh_fini(mlxsw_sp);
7960 err_neigh_init:
7961         mlxsw_sp_vrs_fini(mlxsw_sp);
7962 err_vrs_init:
7963         mlxsw_sp_mr_fini(mlxsw_sp);
7964 err_mr_init:
7965         mlxsw_sp_lpm_fini(mlxsw_sp);
7966 err_lpm_init:
7967         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7968 err_nexthop_group_ht_init:
7969         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7970 err_nexthop_ht_init:
7971         mlxsw_sp_ipips_fini(mlxsw_sp);
7972 err_ipips_init:
7973         mlxsw_sp_rifs_fini(mlxsw_sp);
7974 err_rifs_init:
7975         __mlxsw_sp_router_fini(mlxsw_sp);
7976 err_router_init:
7977         unregister_inet6addr_notifier(&router->inet6addr_nb);
7978 err_register_inet6addr_notifier:
7979         unregister_inetaddr_notifier(&router->inetaddr_nb);
7980 err_register_inetaddr_notifier:
7981         kfree(mlxsw_sp->router);
7982         return err;
7983 }
7984
7985 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7986 {
7987         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7988         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7989         mlxsw_sp_neigh_fini(mlxsw_sp);
7990         mlxsw_sp_vrs_fini(mlxsw_sp);
7991         mlxsw_sp_mr_fini(mlxsw_sp);
7992         mlxsw_sp_lpm_fini(mlxsw_sp);
7993         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7994         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7995         mlxsw_sp_ipips_fini(mlxsw_sp);
7996         mlxsw_sp_rifs_fini(mlxsw_sp);
7997         __mlxsw_sp_router_fini(mlxsw_sp);
7998         unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
7999         unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8000         kfree(mlxsw_sp->router);
8001 }