]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/refcount.h>
19 #include <net/netevent.h>
20 #include <net/neighbour.h>
21 #include <net/arp.h>
22 #include <net/ip_fib.h>
23 #include <net/ip6_fib.h>
24 #include <net/fib_rules.h>
25 #include <net/ip_tunnels.h>
26 #include <net/l3mdev.h>
27 #include <net/addrconf.h>
28 #include <net/ndisc.h>
29 #include <net/ipv6.h>
30 #include <net/fib_notifier.h>
31 #include <net/switchdev.h>
32
33 #include "spectrum.h"
34 #include "core.h"
35 #include "reg.h"
36 #include "spectrum_cnt.h"
37 #include "spectrum_dpipe.h"
38 #include "spectrum_ipip.h"
39 #include "spectrum_mr.h"
40 #include "spectrum_mr_tcam.h"
41 #include "spectrum_router.h"
42 #include "spectrum_span.h"
43
44 struct mlxsw_sp_fib;
45 struct mlxsw_sp_vr;
46 struct mlxsw_sp_lpm_tree;
47 struct mlxsw_sp_rif_ops;
48
49 struct mlxsw_sp_router {
50         struct mlxsw_sp *mlxsw_sp;
51         struct mlxsw_sp_rif **rifs;
52         struct mlxsw_sp_vr *vrs;
53         struct rhashtable neigh_ht;
54         struct rhashtable nexthop_group_ht;
55         struct rhashtable nexthop_ht;
56         struct list_head nexthop_list;
57         struct {
58                 /* One tree for each protocol: IPv4 and IPv6 */
59                 struct mlxsw_sp_lpm_tree *proto_trees[2];
60                 struct mlxsw_sp_lpm_tree *trees;
61                 unsigned int tree_count;
62         } lpm;
63         struct {
64                 struct delayed_work dw;
65                 unsigned long interval; /* ms */
66         } neighs_update;
67         struct delayed_work nexthop_probe_dw;
68 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
69         struct list_head nexthop_neighs_list;
70         struct list_head ipip_list;
71         bool aborted;
72         struct notifier_block fib_nb;
73         struct notifier_block netevent_nb;
74         struct notifier_block inetaddr_nb;
75         struct notifier_block inet6addr_nb;
76         const struct mlxsw_sp_rif_ops **rif_ops_arr;
77         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
78 };
79
80 struct mlxsw_sp_rif {
81         struct list_head nexthop_list;
82         struct list_head neigh_list;
83         struct net_device *dev; /* NULL for underlay RIF */
84         struct mlxsw_sp_fid *fid;
85         unsigned char addr[ETH_ALEN];
86         int mtu;
87         u16 rif_index;
88         u16 vr_id;
89         const struct mlxsw_sp_rif_ops *ops;
90         struct mlxsw_sp *mlxsw_sp;
91
92         unsigned int counter_ingress;
93         bool counter_ingress_valid;
94         unsigned int counter_egress;
95         bool counter_egress_valid;
96 };
97
98 struct mlxsw_sp_rif_params {
99         struct net_device *dev;
100         union {
101                 u16 system_port;
102                 u16 lag_id;
103         };
104         u16 vid;
105         bool lag;
106 };
107
108 struct mlxsw_sp_rif_subport {
109         struct mlxsw_sp_rif common;
110         refcount_t ref_count;
111         union {
112                 u16 system_port;
113                 u16 lag_id;
114         };
115         u16 vid;
116         bool lag;
117 };
118
119 struct mlxsw_sp_rif_ipip_lb {
120         struct mlxsw_sp_rif common;
121         struct mlxsw_sp_rif_ipip_lb_config lb_config;
122         u16 ul_vr_id; /* Reserved for Spectrum-2. */
123         u16 ul_rif_id; /* Reserved for Spectrum. */
124 };
125
126 struct mlxsw_sp_rif_params_ipip_lb {
127         struct mlxsw_sp_rif_params common;
128         struct mlxsw_sp_rif_ipip_lb_config lb_config;
129 };
130
131 struct mlxsw_sp_rif_ops {
132         enum mlxsw_sp_rif_type type;
133         size_t rif_size;
134
135         void (*setup)(struct mlxsw_sp_rif *rif,
136                       const struct mlxsw_sp_rif_params *params);
137         int (*configure)(struct mlxsw_sp_rif *rif);
138         void (*deconfigure)(struct mlxsw_sp_rif *rif);
139         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
140                                          struct netlink_ext_ack *extack);
141         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
142 };
143
144 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
145 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
146 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
147                                   struct mlxsw_sp_lpm_tree *lpm_tree);
148 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
149                                      const struct mlxsw_sp_fib *fib,
150                                      u8 tree_id);
151 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
152                                        const struct mlxsw_sp_fib *fib);
153
154 static unsigned int *
155 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
156                            enum mlxsw_sp_rif_counter_dir dir)
157 {
158         switch (dir) {
159         case MLXSW_SP_RIF_COUNTER_EGRESS:
160                 return &rif->counter_egress;
161         case MLXSW_SP_RIF_COUNTER_INGRESS:
162                 return &rif->counter_ingress;
163         }
164         return NULL;
165 }
166
167 static bool
168 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
169                                enum mlxsw_sp_rif_counter_dir dir)
170 {
171         switch (dir) {
172         case MLXSW_SP_RIF_COUNTER_EGRESS:
173                 return rif->counter_egress_valid;
174         case MLXSW_SP_RIF_COUNTER_INGRESS:
175                 return rif->counter_ingress_valid;
176         }
177         return false;
178 }
179
180 static void
181 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
182                                enum mlxsw_sp_rif_counter_dir dir,
183                                bool valid)
184 {
185         switch (dir) {
186         case MLXSW_SP_RIF_COUNTER_EGRESS:
187                 rif->counter_egress_valid = valid;
188                 break;
189         case MLXSW_SP_RIF_COUNTER_INGRESS:
190                 rif->counter_ingress_valid = valid;
191                 break;
192         }
193 }
194
195 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
196                                      unsigned int counter_index, bool enable,
197                                      enum mlxsw_sp_rif_counter_dir dir)
198 {
199         char ritr_pl[MLXSW_REG_RITR_LEN];
200         bool is_egress = false;
201         int err;
202
203         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
204                 is_egress = true;
205         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
206         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207         if (err)
208                 return err;
209
210         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
211                                     is_egress);
212         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
213 }
214
215 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
216                                    struct mlxsw_sp_rif *rif,
217                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
218 {
219         char ricnt_pl[MLXSW_REG_RICNT_LEN];
220         unsigned int *p_counter_index;
221         bool valid;
222         int err;
223
224         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
225         if (!valid)
226                 return -EINVAL;
227
228         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
229         if (!p_counter_index)
230                 return -EINVAL;
231         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
232                              MLXSW_REG_RICNT_OPCODE_NOP);
233         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
234         if (err)
235                 return err;
236         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
237         return 0;
238 }
239
240 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
241                                       unsigned int counter_index)
242 {
243         char ricnt_pl[MLXSW_REG_RICNT_LEN];
244
245         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
246                              MLXSW_REG_RICNT_OPCODE_CLEAR);
247         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
248 }
249
250 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
251                                struct mlxsw_sp_rif *rif,
252                                enum mlxsw_sp_rif_counter_dir dir)
253 {
254         unsigned int *p_counter_index;
255         int err;
256
257         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
258         if (!p_counter_index)
259                 return -EINVAL;
260         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
261                                      p_counter_index);
262         if (err)
263                 return err;
264
265         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
266         if (err)
267                 goto err_counter_clear;
268
269         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
270                                         *p_counter_index, true, dir);
271         if (err)
272                 goto err_counter_edit;
273         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
274         return 0;
275
276 err_counter_edit:
277 err_counter_clear:
278         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
279                               *p_counter_index);
280         return err;
281 }
282
283 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
284                                struct mlxsw_sp_rif *rif,
285                                enum mlxsw_sp_rif_counter_dir dir)
286 {
287         unsigned int *p_counter_index;
288
289         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
290                 return;
291
292         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
293         if (WARN_ON(!p_counter_index))
294                 return;
295         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
296                                   *p_counter_index, false, dir);
297         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
298                               *p_counter_index);
299         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
300 }
301
302 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
303 {
304         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
305         struct devlink *devlink;
306
307         devlink = priv_to_devlink(mlxsw_sp->core);
308         if (!devlink_dpipe_table_counter_enabled(devlink,
309                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
310                 return;
311         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
312 }
313
314 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
315 {
316         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
317
318         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
319 }
320
321 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
322
323 struct mlxsw_sp_prefix_usage {
324         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
325 };
326
327 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
328         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
329
330 static bool
331 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
332                          struct mlxsw_sp_prefix_usage *prefix_usage2)
333 {
334         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
335 }
336
337 static void
338 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
339                           struct mlxsw_sp_prefix_usage *prefix_usage2)
340 {
341         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
342 }
343
344 static void
345 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
346                           unsigned char prefix_len)
347 {
348         set_bit(prefix_len, prefix_usage->b);
349 }
350
351 static void
352 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
353                             unsigned char prefix_len)
354 {
355         clear_bit(prefix_len, prefix_usage->b);
356 }
357
358 struct mlxsw_sp_fib_key {
359         unsigned char addr[sizeof(struct in6_addr)];
360         unsigned char prefix_len;
361 };
362
363 enum mlxsw_sp_fib_entry_type {
364         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
365         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
366         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
367         MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
368
369         /* This is a special case of local delivery, where a packet should be
370          * decapsulated on reception. Note that there is no corresponding ENCAP,
371          * because that's a type of next hop, not of FIB entry. (There can be
372          * several next hops in a REMOTE entry, and some of them may be
373          * encapsulating entries.)
374          */
375         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
376         MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
377 };
378
379 struct mlxsw_sp_nexthop_group;
380
381 struct mlxsw_sp_fib_node {
382         struct list_head entry_list;
383         struct list_head list;
384         struct rhash_head ht_node;
385         struct mlxsw_sp_fib *fib;
386         struct mlxsw_sp_fib_key key;
387 };
388
389 struct mlxsw_sp_fib_entry_decap {
390         struct mlxsw_sp_ipip_entry *ipip_entry;
391         u32 tunnel_index;
392 };
393
394 struct mlxsw_sp_fib_entry {
395         struct list_head list;
396         struct mlxsw_sp_fib_node *fib_node;
397         enum mlxsw_sp_fib_entry_type type;
398         struct list_head nexthop_group_node;
399         struct mlxsw_sp_nexthop_group *nh_group;
400         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
401 };
402
403 struct mlxsw_sp_fib4_entry {
404         struct mlxsw_sp_fib_entry common;
405         u32 tb_id;
406         u32 prio;
407         u8 tos;
408         u8 type;
409 };
410
411 struct mlxsw_sp_fib6_entry {
412         struct mlxsw_sp_fib_entry common;
413         struct list_head rt6_list;
414         unsigned int nrt6;
415 };
416
417 struct mlxsw_sp_rt6 {
418         struct list_head list;
419         struct fib6_info *rt;
420 };
421
422 struct mlxsw_sp_lpm_tree {
423         u8 id; /* tree ID */
424         unsigned int ref_count;
425         enum mlxsw_sp_l3proto proto;
426         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
427         struct mlxsw_sp_prefix_usage prefix_usage;
428 };
429
430 struct mlxsw_sp_fib {
431         struct rhashtable ht;
432         struct list_head node_list;
433         struct mlxsw_sp_vr *vr;
434         struct mlxsw_sp_lpm_tree *lpm_tree;
435         enum mlxsw_sp_l3proto proto;
436 };
437
438 struct mlxsw_sp_vr {
439         u16 id; /* virtual router ID */
440         u32 tb_id; /* kernel fib table id */
441         unsigned int rif_count;
442         struct mlxsw_sp_fib *fib4;
443         struct mlxsw_sp_fib *fib6;
444         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
445         struct mlxsw_sp_rif *ul_rif;
446         refcount_t ul_rif_refcnt;
447 };
448
449 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
450
451 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
452                                                 struct mlxsw_sp_vr *vr,
453                                                 enum mlxsw_sp_l3proto proto)
454 {
455         struct mlxsw_sp_lpm_tree *lpm_tree;
456         struct mlxsw_sp_fib *fib;
457         int err;
458
459         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
460         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
461         if (!fib)
462                 return ERR_PTR(-ENOMEM);
463         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
464         if (err)
465                 goto err_rhashtable_init;
466         INIT_LIST_HEAD(&fib->node_list);
467         fib->proto = proto;
468         fib->vr = vr;
469         fib->lpm_tree = lpm_tree;
470         mlxsw_sp_lpm_tree_hold(lpm_tree);
471         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
472         if (err)
473                 goto err_lpm_tree_bind;
474         return fib;
475
476 err_lpm_tree_bind:
477         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
478 err_rhashtable_init:
479         kfree(fib);
480         return ERR_PTR(err);
481 }
482
483 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
484                                  struct mlxsw_sp_fib *fib)
485 {
486         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
487         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
488         WARN_ON(!list_empty(&fib->node_list));
489         rhashtable_destroy(&fib->ht);
490         kfree(fib);
491 }
492
493 static struct mlxsw_sp_lpm_tree *
494 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
495 {
496         static struct mlxsw_sp_lpm_tree *lpm_tree;
497         int i;
498
499         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
500                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
501                 if (lpm_tree->ref_count == 0)
502                         return lpm_tree;
503         }
504         return NULL;
505 }
506
507 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
508                                    struct mlxsw_sp_lpm_tree *lpm_tree)
509 {
510         char ralta_pl[MLXSW_REG_RALTA_LEN];
511
512         mlxsw_reg_ralta_pack(ralta_pl, true,
513                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
514                              lpm_tree->id);
515         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
516 }
517
518 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
519                                    struct mlxsw_sp_lpm_tree *lpm_tree)
520 {
521         char ralta_pl[MLXSW_REG_RALTA_LEN];
522
523         mlxsw_reg_ralta_pack(ralta_pl, false,
524                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
525                              lpm_tree->id);
526         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
527 }
528
529 static int
530 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
531                                   struct mlxsw_sp_prefix_usage *prefix_usage,
532                                   struct mlxsw_sp_lpm_tree *lpm_tree)
533 {
534         char ralst_pl[MLXSW_REG_RALST_LEN];
535         u8 root_bin = 0;
536         u8 prefix;
537         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
538
539         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
540                 root_bin = prefix;
541
542         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
543         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
544                 if (prefix == 0)
545                         continue;
546                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
547                                          MLXSW_REG_RALST_BIN_NO_CHILD);
548                 last_prefix = prefix;
549         }
550         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
551 }
552
553 static struct mlxsw_sp_lpm_tree *
554 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
555                          struct mlxsw_sp_prefix_usage *prefix_usage,
556                          enum mlxsw_sp_l3proto proto)
557 {
558         struct mlxsw_sp_lpm_tree *lpm_tree;
559         int err;
560
561         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
562         if (!lpm_tree)
563                 return ERR_PTR(-EBUSY);
564         lpm_tree->proto = proto;
565         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
566         if (err)
567                 return ERR_PTR(err);
568
569         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
570                                                 lpm_tree);
571         if (err)
572                 goto err_left_struct_set;
573         memcpy(&lpm_tree->prefix_usage, prefix_usage,
574                sizeof(lpm_tree->prefix_usage));
575         memset(&lpm_tree->prefix_ref_count, 0,
576                sizeof(lpm_tree->prefix_ref_count));
577         lpm_tree->ref_count = 1;
578         return lpm_tree;
579
580 err_left_struct_set:
581         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
582         return ERR_PTR(err);
583 }
584
585 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
586                                       struct mlxsw_sp_lpm_tree *lpm_tree)
587 {
588         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
589 }
590
591 static struct mlxsw_sp_lpm_tree *
592 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
593                       struct mlxsw_sp_prefix_usage *prefix_usage,
594                       enum mlxsw_sp_l3proto proto)
595 {
596         struct mlxsw_sp_lpm_tree *lpm_tree;
597         int i;
598
599         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
600                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
601                 if (lpm_tree->ref_count != 0 &&
602                     lpm_tree->proto == proto &&
603                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
604                                              prefix_usage)) {
605                         mlxsw_sp_lpm_tree_hold(lpm_tree);
606                         return lpm_tree;
607                 }
608         }
609         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
610 }
611
612 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
613 {
614         lpm_tree->ref_count++;
615 }
616
617 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
618                                   struct mlxsw_sp_lpm_tree *lpm_tree)
619 {
620         if (--lpm_tree->ref_count == 0)
621                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
622 }
623
624 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
625
626 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
627 {
628         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
629         struct mlxsw_sp_lpm_tree *lpm_tree;
630         u64 max_trees;
631         int err, i;
632
633         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
634                 return -EIO;
635
636         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
637         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
638         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
639                                              sizeof(struct mlxsw_sp_lpm_tree),
640                                              GFP_KERNEL);
641         if (!mlxsw_sp->router->lpm.trees)
642                 return -ENOMEM;
643
644         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
645                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
646                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
647         }
648
649         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
650                                          MLXSW_SP_L3_PROTO_IPV4);
651         if (IS_ERR(lpm_tree)) {
652                 err = PTR_ERR(lpm_tree);
653                 goto err_ipv4_tree_get;
654         }
655         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
656
657         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
658                                          MLXSW_SP_L3_PROTO_IPV6);
659         if (IS_ERR(lpm_tree)) {
660                 err = PTR_ERR(lpm_tree);
661                 goto err_ipv6_tree_get;
662         }
663         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
664
665         return 0;
666
667 err_ipv6_tree_get:
668         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
669         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
670 err_ipv4_tree_get:
671         kfree(mlxsw_sp->router->lpm.trees);
672         return err;
673 }
674
675 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
676 {
677         struct mlxsw_sp_lpm_tree *lpm_tree;
678
679         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
680         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
681
682         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
683         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
684
685         kfree(mlxsw_sp->router->lpm.trees);
686 }
687
688 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
689 {
690         return !!vr->fib4 || !!vr->fib6 ||
691                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
692                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
693 }
694
695 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
696 {
697         struct mlxsw_sp_vr *vr;
698         int i;
699
700         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
701                 vr = &mlxsw_sp->router->vrs[i];
702                 if (!mlxsw_sp_vr_is_used(vr))
703                         return vr;
704         }
705         return NULL;
706 }
707
708 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
709                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
710 {
711         char raltb_pl[MLXSW_REG_RALTB_LEN];
712
713         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
714                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
715                              tree_id);
716         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
717 }
718
719 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
720                                        const struct mlxsw_sp_fib *fib)
721 {
722         char raltb_pl[MLXSW_REG_RALTB_LEN];
723
724         /* Bind to tree 0 which is default */
725         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
726                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
727         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
728 }
729
730 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
731 {
732         /* For our purpose, squash main, default and local tables into one */
733         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
734                 tb_id = RT_TABLE_MAIN;
735         return tb_id;
736 }
737
738 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
739                                             u32 tb_id)
740 {
741         struct mlxsw_sp_vr *vr;
742         int i;
743
744         tb_id = mlxsw_sp_fix_tb_id(tb_id);
745
746         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
747                 vr = &mlxsw_sp->router->vrs[i];
748                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
749                         return vr;
750         }
751         return NULL;
752 }
753
754 int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
755                                 u16 *vr_id)
756 {
757         struct mlxsw_sp_vr *vr;
758
759         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
760         if (!vr)
761                 return -ESRCH;
762         *vr_id = vr->id;
763
764         return 0;
765 }
766
767 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
768                                             enum mlxsw_sp_l3proto proto)
769 {
770         switch (proto) {
771         case MLXSW_SP_L3_PROTO_IPV4:
772                 return vr->fib4;
773         case MLXSW_SP_L3_PROTO_IPV6:
774                 return vr->fib6;
775         }
776         return NULL;
777 }
778
779 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
780                                               u32 tb_id,
781                                               struct netlink_ext_ack *extack)
782 {
783         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
784         struct mlxsw_sp_fib *fib4;
785         struct mlxsw_sp_fib *fib6;
786         struct mlxsw_sp_vr *vr;
787         int err;
788
789         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
790         if (!vr) {
791                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
792                 return ERR_PTR(-EBUSY);
793         }
794         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
795         if (IS_ERR(fib4))
796                 return ERR_CAST(fib4);
797         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
798         if (IS_ERR(fib6)) {
799                 err = PTR_ERR(fib6);
800                 goto err_fib6_create;
801         }
802         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
803                                              MLXSW_SP_L3_PROTO_IPV4);
804         if (IS_ERR(mr4_table)) {
805                 err = PTR_ERR(mr4_table);
806                 goto err_mr4_table_create;
807         }
808         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
809                                              MLXSW_SP_L3_PROTO_IPV6);
810         if (IS_ERR(mr6_table)) {
811                 err = PTR_ERR(mr6_table);
812                 goto err_mr6_table_create;
813         }
814
815         vr->fib4 = fib4;
816         vr->fib6 = fib6;
817         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
818         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
819         vr->tb_id = tb_id;
820         return vr;
821
822 err_mr6_table_create:
823         mlxsw_sp_mr_table_destroy(mr4_table);
824 err_mr4_table_create:
825         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
826 err_fib6_create:
827         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
828         return ERR_PTR(err);
829 }
830
831 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
832                                 struct mlxsw_sp_vr *vr)
833 {
834         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
835         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
836         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
837         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
838         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
839         vr->fib6 = NULL;
840         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
841         vr->fib4 = NULL;
842 }
843
844 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
845                                            struct netlink_ext_ack *extack)
846 {
847         struct mlxsw_sp_vr *vr;
848
849         tb_id = mlxsw_sp_fix_tb_id(tb_id);
850         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
851         if (!vr)
852                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
853         return vr;
854 }
855
856 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
857 {
858         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
859             list_empty(&vr->fib6->node_list) &&
860             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
861             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
862                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
863 }
864
865 static bool
866 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
867                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
868 {
869         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
870
871         if (!mlxsw_sp_vr_is_used(vr))
872                 return false;
873         if (fib->lpm_tree->id == tree_id)
874                 return true;
875         return false;
876 }
877
878 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
879                                         struct mlxsw_sp_fib *fib,
880                                         struct mlxsw_sp_lpm_tree *new_tree)
881 {
882         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
883         int err;
884
885         fib->lpm_tree = new_tree;
886         mlxsw_sp_lpm_tree_hold(new_tree);
887         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
888         if (err)
889                 goto err_tree_bind;
890         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
891         return 0;
892
893 err_tree_bind:
894         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
895         fib->lpm_tree = old_tree;
896         return err;
897 }
898
899 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
900                                          struct mlxsw_sp_fib *fib,
901                                          struct mlxsw_sp_lpm_tree *new_tree)
902 {
903         enum mlxsw_sp_l3proto proto = fib->proto;
904         struct mlxsw_sp_lpm_tree *old_tree;
905         u8 old_id, new_id = new_tree->id;
906         struct mlxsw_sp_vr *vr;
907         int i, err;
908
909         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
910         old_id = old_tree->id;
911
912         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
913                 vr = &mlxsw_sp->router->vrs[i];
914                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
915                         continue;
916                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
917                                                    mlxsw_sp_vr_fib(vr, proto),
918                                                    new_tree);
919                 if (err)
920                         goto err_tree_replace;
921         }
922
923         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
924                sizeof(new_tree->prefix_ref_count));
925         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
926         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
927
928         return 0;
929
930 err_tree_replace:
931         for (i--; i >= 0; i--) {
932                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
933                         continue;
934                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
935                                              mlxsw_sp_vr_fib(vr, proto),
936                                              old_tree);
937         }
938         return err;
939 }
940
941 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
942 {
943         struct mlxsw_sp_vr *vr;
944         u64 max_vrs;
945         int i;
946
947         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
948                 return -EIO;
949
950         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
951         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
952                                         GFP_KERNEL);
953         if (!mlxsw_sp->router->vrs)
954                 return -ENOMEM;
955
956         for (i = 0; i < max_vrs; i++) {
957                 vr = &mlxsw_sp->router->vrs[i];
958                 vr->id = i;
959         }
960
961         return 0;
962 }
963
964 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
965
966 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
967 {
968         /* At this stage we're guaranteed not to have new incoming
969          * FIB notifications and the work queue is free from FIBs
970          * sitting on top of mlxsw netdevs. However, we can still
971          * have other FIBs queued. Flush the queue before flushing
972          * the device's tables. No need for locks, as we're the only
973          * writer.
974          */
975         mlxsw_core_flush_owq();
976         mlxsw_sp_router_fib_flush(mlxsw_sp);
977         kfree(mlxsw_sp->router->vrs);
978 }
979
980 static struct net_device *
981 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
982 {
983         struct ip_tunnel *tun = netdev_priv(ol_dev);
984         struct net *net = dev_net(ol_dev);
985
986         return __dev_get_by_index(net, tun->parms.link);
987 }
988
989 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
990 {
991         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
992
993         if (d)
994                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
995         else
996                 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
997 }
998
999 static struct mlxsw_sp_rif *
1000 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1001                     const struct mlxsw_sp_rif_params *params,
1002                     struct netlink_ext_ack *extack);
1003
1004 static struct mlxsw_sp_rif_ipip_lb *
1005 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1006                                 enum mlxsw_sp_ipip_type ipipt,
1007                                 struct net_device *ol_dev,
1008                                 struct netlink_ext_ack *extack)
1009 {
1010         struct mlxsw_sp_rif_params_ipip_lb lb_params;
1011         const struct mlxsw_sp_ipip_ops *ipip_ops;
1012         struct mlxsw_sp_rif *rif;
1013
1014         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1015         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1016                 .common.dev = ol_dev,
1017                 .common.lag = false,
1018                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1019         };
1020
1021         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1022         if (IS_ERR(rif))
1023                 return ERR_CAST(rif);
1024         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1025 }
1026
1027 static struct mlxsw_sp_ipip_entry *
1028 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1029                           enum mlxsw_sp_ipip_type ipipt,
1030                           struct net_device *ol_dev)
1031 {
1032         const struct mlxsw_sp_ipip_ops *ipip_ops;
1033         struct mlxsw_sp_ipip_entry *ipip_entry;
1034         struct mlxsw_sp_ipip_entry *ret = NULL;
1035
1036         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1037         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1038         if (!ipip_entry)
1039                 return ERR_PTR(-ENOMEM);
1040
1041         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1042                                                             ol_dev, NULL);
1043         if (IS_ERR(ipip_entry->ol_lb)) {
1044                 ret = ERR_CAST(ipip_entry->ol_lb);
1045                 goto err_ol_ipip_lb_create;
1046         }
1047
1048         ipip_entry->ipipt = ipipt;
1049         ipip_entry->ol_dev = ol_dev;
1050
1051         switch (ipip_ops->ul_proto) {
1052         case MLXSW_SP_L3_PROTO_IPV4:
1053                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1054                 break;
1055         case MLXSW_SP_L3_PROTO_IPV6:
1056                 WARN_ON(1);
1057                 break;
1058         }
1059
1060         return ipip_entry;
1061
1062 err_ol_ipip_lb_create:
1063         kfree(ipip_entry);
1064         return ret;
1065 }
1066
1067 static void
1068 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1069 {
1070         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1071         kfree(ipip_entry);
1072 }
1073
1074 static bool
1075 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1076                                   const enum mlxsw_sp_l3proto ul_proto,
1077                                   union mlxsw_sp_l3addr saddr,
1078                                   u32 ul_tb_id,
1079                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1080 {
1081         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1082         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1083         union mlxsw_sp_l3addr tun_saddr;
1084
1085         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1086                 return false;
1087
1088         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1089         return tun_ul_tb_id == ul_tb_id &&
1090                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1091 }
1092
1093 static int
1094 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1095                               struct mlxsw_sp_fib_entry *fib_entry,
1096                               struct mlxsw_sp_ipip_entry *ipip_entry)
1097 {
1098         u32 tunnel_index;
1099         int err;
1100
1101         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1102                                   1, &tunnel_index);
1103         if (err)
1104                 return err;
1105
1106         ipip_entry->decap_fib_entry = fib_entry;
1107         fib_entry->decap.ipip_entry = ipip_entry;
1108         fib_entry->decap.tunnel_index = tunnel_index;
1109         return 0;
1110 }
1111
1112 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1113                                           struct mlxsw_sp_fib_entry *fib_entry)
1114 {
1115         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1116         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1117         fib_entry->decap.ipip_entry = NULL;
1118         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1119                            1, fib_entry->decap.tunnel_index);
1120 }
1121
1122 static struct mlxsw_sp_fib_node *
1123 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1124                          size_t addr_len, unsigned char prefix_len);
1125 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1126                                      struct mlxsw_sp_fib_entry *fib_entry);
1127
1128 static void
1129 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1130                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1131 {
1132         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1133
1134         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1135         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1136
1137         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1138 }
1139
1140 static void
1141 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1142                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1143                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1144 {
1145         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1146                                           ipip_entry))
1147                 return;
1148         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1149
1150         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1151                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1152 }
1153
1154 static struct mlxsw_sp_fib_entry *
1155 mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1156                                      enum mlxsw_sp_l3proto proto,
1157                                      const union mlxsw_sp_l3addr *addr,
1158                                      enum mlxsw_sp_fib_entry_type type)
1159 {
1160         struct mlxsw_sp_fib_entry *fib_entry;
1161         struct mlxsw_sp_fib_node *fib_node;
1162         unsigned char addr_prefix_len;
1163         struct mlxsw_sp_fib *fib;
1164         struct mlxsw_sp_vr *vr;
1165         const void *addrp;
1166         size_t addr_len;
1167         u32 addr4;
1168
1169         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1170         if (!vr)
1171                 return NULL;
1172         fib = mlxsw_sp_vr_fib(vr, proto);
1173
1174         switch (proto) {
1175         case MLXSW_SP_L3_PROTO_IPV4:
1176                 addr4 = be32_to_cpu(addr->addr4);
1177                 addrp = &addr4;
1178                 addr_len = 4;
1179                 addr_prefix_len = 32;
1180                 break;
1181         case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1182         default:
1183                 WARN_ON(1);
1184                 return NULL;
1185         }
1186
1187         fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1188                                             addr_prefix_len);
1189         if (!fib_node || list_empty(&fib_node->entry_list))
1190                 return NULL;
1191
1192         fib_entry = list_first_entry(&fib_node->entry_list,
1193                                      struct mlxsw_sp_fib_entry, list);
1194         if (fib_entry->type != type)
1195                 return NULL;
1196
1197         return fib_entry;
1198 }
1199
1200 /* Given an IPIP entry, find the corresponding decap route. */
1201 static struct mlxsw_sp_fib_entry *
1202 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1203                                struct mlxsw_sp_ipip_entry *ipip_entry)
1204 {
1205         static struct mlxsw_sp_fib_node *fib_node;
1206         const struct mlxsw_sp_ipip_ops *ipip_ops;
1207         struct mlxsw_sp_fib_entry *fib_entry;
1208         unsigned char saddr_prefix_len;
1209         union mlxsw_sp_l3addr saddr;
1210         struct mlxsw_sp_fib *ul_fib;
1211         struct mlxsw_sp_vr *ul_vr;
1212         const void *saddrp;
1213         size_t saddr_len;
1214         u32 ul_tb_id;
1215         u32 saddr4;
1216
1217         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1218
1219         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1220         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1221         if (!ul_vr)
1222                 return NULL;
1223
1224         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1225         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1226                                            ipip_entry->ol_dev);
1227
1228         switch (ipip_ops->ul_proto) {
1229         case MLXSW_SP_L3_PROTO_IPV4:
1230                 saddr4 = be32_to_cpu(saddr.addr4);
1231                 saddrp = &saddr4;
1232                 saddr_len = 4;
1233                 saddr_prefix_len = 32;
1234                 break;
1235         case MLXSW_SP_L3_PROTO_IPV6:
1236                 WARN_ON(1);
1237                 return NULL;
1238         }
1239
1240         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1241                                             saddr_prefix_len);
1242         if (!fib_node || list_empty(&fib_node->entry_list))
1243                 return NULL;
1244
1245         fib_entry = list_first_entry(&fib_node->entry_list,
1246                                      struct mlxsw_sp_fib_entry, list);
1247         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1248                 return NULL;
1249
1250         return fib_entry;
1251 }
1252
1253 static struct mlxsw_sp_ipip_entry *
1254 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1255                            enum mlxsw_sp_ipip_type ipipt,
1256                            struct net_device *ol_dev)
1257 {
1258         struct mlxsw_sp_ipip_entry *ipip_entry;
1259
1260         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1261         if (IS_ERR(ipip_entry))
1262                 return ipip_entry;
1263
1264         list_add_tail(&ipip_entry->ipip_list_node,
1265                       &mlxsw_sp->router->ipip_list);
1266
1267         return ipip_entry;
1268 }
1269
1270 static void
1271 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1272                             struct mlxsw_sp_ipip_entry *ipip_entry)
1273 {
1274         list_del(&ipip_entry->ipip_list_node);
1275         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1276 }
1277
1278 static bool
1279 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1280                                   const struct net_device *ul_dev,
1281                                   enum mlxsw_sp_l3proto ul_proto,
1282                                   union mlxsw_sp_l3addr ul_dip,
1283                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1284 {
1285         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1286         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1287
1288         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1289                 return false;
1290
1291         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1292                                                  ul_tb_id, ipip_entry);
1293 }
1294
1295 /* Given decap parameters, find the corresponding IPIP entry. */
1296 static struct mlxsw_sp_ipip_entry *
1297 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1298                                   const struct net_device *ul_dev,
1299                                   enum mlxsw_sp_l3proto ul_proto,
1300                                   union mlxsw_sp_l3addr ul_dip)
1301 {
1302         struct mlxsw_sp_ipip_entry *ipip_entry;
1303
1304         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1305                             ipip_list_node)
1306                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1307                                                       ul_proto, ul_dip,
1308                                                       ipip_entry))
1309                         return ipip_entry;
1310
1311         return NULL;
1312 }
1313
1314 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1315                                       const struct net_device *dev,
1316                                       enum mlxsw_sp_ipip_type *p_type)
1317 {
1318         struct mlxsw_sp_router *router = mlxsw_sp->router;
1319         const struct mlxsw_sp_ipip_ops *ipip_ops;
1320         enum mlxsw_sp_ipip_type ipipt;
1321
1322         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1323                 ipip_ops = router->ipip_ops_arr[ipipt];
1324                 if (dev->type == ipip_ops->dev_type) {
1325                         if (p_type)
1326                                 *p_type = ipipt;
1327                         return true;
1328                 }
1329         }
1330         return false;
1331 }
1332
1333 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1334                                 const struct net_device *dev)
1335 {
1336         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1337 }
1338
1339 static struct mlxsw_sp_ipip_entry *
1340 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1341                                    const struct net_device *ol_dev)
1342 {
1343         struct mlxsw_sp_ipip_entry *ipip_entry;
1344
1345         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1346                             ipip_list_node)
1347                 if (ipip_entry->ol_dev == ol_dev)
1348                         return ipip_entry;
1349
1350         return NULL;
1351 }
1352
1353 static struct mlxsw_sp_ipip_entry *
1354 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1355                                    const struct net_device *ul_dev,
1356                                    struct mlxsw_sp_ipip_entry *start)
1357 {
1358         struct mlxsw_sp_ipip_entry *ipip_entry;
1359
1360         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1361                                         ipip_list_node);
1362         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1363                                      ipip_list_node) {
1364                 struct net_device *ipip_ul_dev =
1365                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1366
1367                 if (ipip_ul_dev == ul_dev)
1368                         return ipip_entry;
1369         }
1370
1371         return NULL;
1372 }
1373
1374 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1375                                 const struct net_device *dev)
1376 {
1377         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1378 }
1379
1380 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1381                                                 const struct net_device *ol_dev,
1382                                                 enum mlxsw_sp_ipip_type ipipt)
1383 {
1384         const struct mlxsw_sp_ipip_ops *ops
1385                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1386
1387         /* For deciding whether decap should be offloaded, we don't care about
1388          * overlay protocol, so ask whether either one is supported.
1389          */
1390         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1391                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1392 }
1393
1394 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1395                                                 struct net_device *ol_dev)
1396 {
1397         struct mlxsw_sp_ipip_entry *ipip_entry;
1398         enum mlxsw_sp_l3proto ul_proto;
1399         enum mlxsw_sp_ipip_type ipipt;
1400         union mlxsw_sp_l3addr saddr;
1401         u32 ul_tb_id;
1402
1403         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1404         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1405                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1406                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1407                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1408                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1409                                                           saddr, ul_tb_id,
1410                                                           NULL)) {
1411                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1412                                                                 ol_dev);
1413                         if (IS_ERR(ipip_entry))
1414                                 return PTR_ERR(ipip_entry);
1415                 }
1416         }
1417
1418         return 0;
1419 }
1420
1421 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1422                                                    struct net_device *ol_dev)
1423 {
1424         struct mlxsw_sp_ipip_entry *ipip_entry;
1425
1426         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1427         if (ipip_entry)
1428                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1429 }
1430
1431 static void
1432 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1433                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1434 {
1435         struct mlxsw_sp_fib_entry *decap_fib_entry;
1436
1437         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1438         if (decap_fib_entry)
1439                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1440                                                   decap_fib_entry);
1441 }
1442
1443 static int
1444 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1445                         u16 ul_rif_id, bool enable)
1446 {
1447         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1448         struct mlxsw_sp_rif *rif = &lb_rif->common;
1449         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1450         char ritr_pl[MLXSW_REG_RITR_LEN];
1451         u32 saddr4;
1452
1453         switch (lb_cf.ul_protocol) {
1454         case MLXSW_SP_L3_PROTO_IPV4:
1455                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1456                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1457                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1458                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1459                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1460                             ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1461                 break;
1462
1463         case MLXSW_SP_L3_PROTO_IPV6:
1464                 return -EAFNOSUPPORT;
1465         }
1466
1467         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1468 }
1469
1470 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1471                                                  struct net_device *ol_dev)
1472 {
1473         struct mlxsw_sp_ipip_entry *ipip_entry;
1474         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1475         int err = 0;
1476
1477         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1478         if (ipip_entry) {
1479                 lb_rif = ipip_entry->ol_lb;
1480                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1481                                               lb_rif->ul_rif_id, true);
1482                 if (err)
1483                         goto out;
1484                 lb_rif->common.mtu = ol_dev->mtu;
1485         }
1486
1487 out:
1488         return err;
1489 }
1490
1491 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1492                                                 struct net_device *ol_dev)
1493 {
1494         struct mlxsw_sp_ipip_entry *ipip_entry;
1495
1496         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1497         if (ipip_entry)
1498                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1499 }
1500
1501 static void
1502 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1503                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1504 {
1505         if (ipip_entry->decap_fib_entry)
1506                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1507 }
1508
1509 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1510                                                   struct net_device *ol_dev)
1511 {
1512         struct mlxsw_sp_ipip_entry *ipip_entry;
1513
1514         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1515         if (ipip_entry)
1516                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1517 }
1518
1519 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1520                                          struct mlxsw_sp_rif *old_rif,
1521                                          struct mlxsw_sp_rif *new_rif);
1522 static int
1523 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1524                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1525                                  bool keep_encap,
1526                                  struct netlink_ext_ack *extack)
1527 {
1528         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1529         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1530
1531         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1532                                                      ipip_entry->ipipt,
1533                                                      ipip_entry->ol_dev,
1534                                                      extack);
1535         if (IS_ERR(new_lb_rif))
1536                 return PTR_ERR(new_lb_rif);
1537         ipip_entry->ol_lb = new_lb_rif;
1538
1539         if (keep_encap)
1540                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1541                                              &new_lb_rif->common);
1542
1543         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1544
1545         return 0;
1546 }
1547
1548 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1549                                         struct mlxsw_sp_rif *rif);
1550
1551 /**
1552  * Update the offload related to an IPIP entry. This always updates decap, and
1553  * in addition to that it also:
1554  * @recreate_loopback: recreates the associated loopback RIF
1555  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1556  *              relevant when recreate_loopback is true.
1557  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1558  *                   is only relevant when recreate_loopback is false.
1559  */
1560 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1561                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1562                                         bool recreate_loopback,
1563                                         bool keep_encap,
1564                                         bool update_nexthops,
1565                                         struct netlink_ext_ack *extack)
1566 {
1567         int err;
1568
1569         /* RIFs can't be edited, so to update loopback, we need to destroy and
1570          * recreate it. That creates a window of opportunity where RALUE and
1571          * RATR registers end up referencing a RIF that's already gone. RATRs
1572          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1573          * of RALUE, demote the decap route back.
1574          */
1575         if (ipip_entry->decap_fib_entry)
1576                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1577
1578         if (recreate_loopback) {
1579                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1580                                                        keep_encap, extack);
1581                 if (err)
1582                         return err;
1583         } else if (update_nexthops) {
1584                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1585                                             &ipip_entry->ol_lb->common);
1586         }
1587
1588         if (ipip_entry->ol_dev->flags & IFF_UP)
1589                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1590
1591         return 0;
1592 }
1593
1594 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1595                                                 struct net_device *ol_dev,
1596                                                 struct netlink_ext_ack *extack)
1597 {
1598         struct mlxsw_sp_ipip_entry *ipip_entry =
1599                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1600         enum mlxsw_sp_l3proto ul_proto;
1601         union mlxsw_sp_l3addr saddr;
1602         u32 ul_tb_id;
1603
1604         if (!ipip_entry)
1605                 return 0;
1606
1607         /* For flat configuration cases, moving overlay to a different VRF might
1608          * cause local address conflict, and the conflicting tunnels need to be
1609          * demoted.
1610          */
1611         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1612         ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1613         saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1614         if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1615                                                  saddr, ul_tb_id,
1616                                                  ipip_entry)) {
1617                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1618                 return 0;
1619         }
1620
1621         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1622                                                    true, false, false, extack);
1623 }
1624
1625 static int
1626 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1627                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1628                                      struct net_device *ul_dev,
1629                                      struct netlink_ext_ack *extack)
1630 {
1631         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1632                                                    true, true, false, extack);
1633 }
1634
1635 static int
1636 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1637                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1638                                     struct net_device *ul_dev)
1639 {
1640         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1641                                                    false, false, true, NULL);
1642 }
1643
1644 static int
1645 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1646                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1647                                       struct net_device *ul_dev)
1648 {
1649         /* A down underlay device causes encapsulated packets to not be
1650          * forwarded, but decap still works. So refresh next hops without
1651          * touching anything else.
1652          */
1653         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1654                                                    false, false, true, NULL);
1655 }
1656
1657 static int
1658 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1659                                         struct net_device *ol_dev,
1660                                         struct netlink_ext_ack *extack)
1661 {
1662         const struct mlxsw_sp_ipip_ops *ipip_ops;
1663         struct mlxsw_sp_ipip_entry *ipip_entry;
1664         int err;
1665
1666         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1667         if (!ipip_entry)
1668                 /* A change might make a tunnel eligible for offloading, but
1669                  * that is currently not implemented. What falls to slow path
1670                  * stays there.
1671                  */
1672                 return 0;
1673
1674         /* A change might make a tunnel not eligible for offloading. */
1675         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1676                                                  ipip_entry->ipipt)) {
1677                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1678                 return 0;
1679         }
1680
1681         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1682         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1683         return err;
1684 }
1685
1686 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1687                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1688 {
1689         struct net_device *ol_dev = ipip_entry->ol_dev;
1690
1691         if (ol_dev->flags & IFF_UP)
1692                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1693         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1694 }
1695
1696 /* The configuration where several tunnels have the same local address in the
1697  * same underlay table needs special treatment in the HW. That is currently not
1698  * implemented in the driver. This function finds and demotes the first tunnel
1699  * with a given source address, except the one passed in in the argument
1700  * `except'.
1701  */
1702 bool
1703 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1704                                      enum mlxsw_sp_l3proto ul_proto,
1705                                      union mlxsw_sp_l3addr saddr,
1706                                      u32 ul_tb_id,
1707                                      const struct mlxsw_sp_ipip_entry *except)
1708 {
1709         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1710
1711         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1712                                  ipip_list_node) {
1713                 if (ipip_entry != except &&
1714                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1715                                                       ul_tb_id, ipip_entry)) {
1716                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1717                         return true;
1718                 }
1719         }
1720
1721         return false;
1722 }
1723
1724 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1725                                                      struct net_device *ul_dev)
1726 {
1727         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1728
1729         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1730                                  ipip_list_node) {
1731                 struct net_device *ipip_ul_dev =
1732                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1733
1734                 if (ipip_ul_dev == ul_dev)
1735                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1736         }
1737 }
1738
1739 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1740                                      struct net_device *ol_dev,
1741                                      unsigned long event,
1742                                      struct netdev_notifier_info *info)
1743 {
1744         struct netdev_notifier_changeupper_info *chup;
1745         struct netlink_ext_ack *extack;
1746
1747         switch (event) {
1748         case NETDEV_REGISTER:
1749                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1750         case NETDEV_UNREGISTER:
1751                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1752                 return 0;
1753         case NETDEV_UP:
1754                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1755                 return 0;
1756         case NETDEV_DOWN:
1757                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1758                 return 0;
1759         case NETDEV_CHANGEUPPER:
1760                 chup = container_of(info, typeof(*chup), info);
1761                 extack = info->extack;
1762                 if (netif_is_l3_master(chup->upper_dev))
1763                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1764                                                                     ol_dev,
1765                                                                     extack);
1766                 return 0;
1767         case NETDEV_CHANGE:
1768                 extack = info->extack;
1769                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1770                                                                ol_dev, extack);
1771         case NETDEV_CHANGEMTU:
1772                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1773         }
1774         return 0;
1775 }
1776
1777 static int
1778 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1779                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1780                                    struct net_device *ul_dev,
1781                                    unsigned long event,
1782                                    struct netdev_notifier_info *info)
1783 {
1784         struct netdev_notifier_changeupper_info *chup;
1785         struct netlink_ext_ack *extack;
1786
1787         switch (event) {
1788         case NETDEV_CHANGEUPPER:
1789                 chup = container_of(info, typeof(*chup), info);
1790                 extack = info->extack;
1791                 if (netif_is_l3_master(chup->upper_dev))
1792                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1793                                                                     ipip_entry,
1794                                                                     ul_dev,
1795                                                                     extack);
1796                 break;
1797
1798         case NETDEV_UP:
1799                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1800                                                            ul_dev);
1801         case NETDEV_DOWN:
1802                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1803                                                              ipip_entry,
1804                                                              ul_dev);
1805         }
1806         return 0;
1807 }
1808
1809 int
1810 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1811                                  struct net_device *ul_dev,
1812                                  unsigned long event,
1813                                  struct netdev_notifier_info *info)
1814 {
1815         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1816         int err;
1817
1818         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1819                                                                 ul_dev,
1820                                                                 ipip_entry))) {
1821                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1822                                                          ul_dev, event, info);
1823                 if (err) {
1824                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1825                                                                  ul_dev);
1826                         return err;
1827                 }
1828         }
1829
1830         return 0;
1831 }
1832
1833 int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1834                                       enum mlxsw_sp_l3proto ul_proto,
1835                                       const union mlxsw_sp_l3addr *ul_sip,
1836                                       u32 tunnel_index)
1837 {
1838         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1839         struct mlxsw_sp_fib_entry *fib_entry;
1840         int err;
1841
1842         /* It is valid to create a tunnel with a local IP and only later
1843          * assign this IP address to a local interface
1844          */
1845         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1846                                                          ul_proto, ul_sip,
1847                                                          type);
1848         if (!fib_entry)
1849                 return 0;
1850
1851         fib_entry->decap.tunnel_index = tunnel_index;
1852         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1853
1854         err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1855         if (err)
1856                 goto err_fib_entry_update;
1857
1858         return 0;
1859
1860 err_fib_entry_update:
1861         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1862         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1863         return err;
1864 }
1865
1866 void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1867                                       enum mlxsw_sp_l3proto ul_proto,
1868                                       const union mlxsw_sp_l3addr *ul_sip)
1869 {
1870         enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1871         struct mlxsw_sp_fib_entry *fib_entry;
1872
1873         fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1874                                                          ul_proto, ul_sip,
1875                                                          type);
1876         if (!fib_entry)
1877                 return;
1878
1879         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1880         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1881 }
1882
1883 struct mlxsw_sp_neigh_key {
1884         struct neighbour *n;
1885 };
1886
1887 struct mlxsw_sp_neigh_entry {
1888         struct list_head rif_list_node;
1889         struct rhash_head ht_node;
1890         struct mlxsw_sp_neigh_key key;
1891         u16 rif;
1892         bool connected;
1893         unsigned char ha[ETH_ALEN];
1894         struct list_head nexthop_list; /* list of nexthops using
1895                                         * this neigh entry
1896                                         */
1897         struct list_head nexthop_neighs_list_node;
1898         unsigned int counter_index;
1899         bool counter_valid;
1900 };
1901
1902 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1903         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1904         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1905         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1906 };
1907
1908 struct mlxsw_sp_neigh_entry *
1909 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1910                         struct mlxsw_sp_neigh_entry *neigh_entry)
1911 {
1912         if (!neigh_entry) {
1913                 if (list_empty(&rif->neigh_list))
1914                         return NULL;
1915                 else
1916                         return list_first_entry(&rif->neigh_list,
1917                                                 typeof(*neigh_entry),
1918                                                 rif_list_node);
1919         }
1920         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1921                 return NULL;
1922         return list_next_entry(neigh_entry, rif_list_node);
1923 }
1924
1925 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1926 {
1927         return neigh_entry->key.n->tbl->family;
1928 }
1929
1930 unsigned char *
1931 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1932 {
1933         return neigh_entry->ha;
1934 }
1935
1936 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1937 {
1938         struct neighbour *n;
1939
1940         n = neigh_entry->key.n;
1941         return ntohl(*((__be32 *) n->primary_key));
1942 }
1943
1944 struct in6_addr *
1945 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1946 {
1947         struct neighbour *n;
1948
1949         n = neigh_entry->key.n;
1950         return (struct in6_addr *) &n->primary_key;
1951 }
1952
1953 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1954                                struct mlxsw_sp_neigh_entry *neigh_entry,
1955                                u64 *p_counter)
1956 {
1957         if (!neigh_entry->counter_valid)
1958                 return -EINVAL;
1959
1960         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1961                                          p_counter, NULL);
1962 }
1963
1964 static struct mlxsw_sp_neigh_entry *
1965 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1966                            u16 rif)
1967 {
1968         struct mlxsw_sp_neigh_entry *neigh_entry;
1969
1970         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1971         if (!neigh_entry)
1972                 return NULL;
1973
1974         neigh_entry->key.n = n;
1975         neigh_entry->rif = rif;
1976         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1977
1978         return neigh_entry;
1979 }
1980
1981 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1982 {
1983         kfree(neigh_entry);
1984 }
1985
1986 static int
1987 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1988                             struct mlxsw_sp_neigh_entry *neigh_entry)
1989 {
1990         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1991                                       &neigh_entry->ht_node,
1992                                       mlxsw_sp_neigh_ht_params);
1993 }
1994
1995 static void
1996 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1997                             struct mlxsw_sp_neigh_entry *neigh_entry)
1998 {
1999         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2000                                &neigh_entry->ht_node,
2001                                mlxsw_sp_neigh_ht_params);
2002 }
2003
2004 static bool
2005 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2006                                     struct mlxsw_sp_neigh_entry *neigh_entry)
2007 {
2008         struct devlink *devlink;
2009         const char *table_name;
2010
2011         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2012         case AF_INET:
2013                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2014                 break;
2015         case AF_INET6:
2016                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2017                 break;
2018         default:
2019                 WARN_ON(1);
2020                 return false;
2021         }
2022
2023         devlink = priv_to_devlink(mlxsw_sp->core);
2024         return devlink_dpipe_table_counter_enabled(devlink, table_name);
2025 }
2026
2027 static void
2028 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2029                              struct mlxsw_sp_neigh_entry *neigh_entry)
2030 {
2031         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2032                 return;
2033
2034         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2035                 return;
2036
2037         neigh_entry->counter_valid = true;
2038 }
2039
2040 static void
2041 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2042                             struct mlxsw_sp_neigh_entry *neigh_entry)
2043 {
2044         if (!neigh_entry->counter_valid)
2045                 return;
2046         mlxsw_sp_flow_counter_free(mlxsw_sp,
2047                                    neigh_entry->counter_index);
2048         neigh_entry->counter_valid = false;
2049 }
2050
2051 static struct mlxsw_sp_neigh_entry *
2052 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2053 {
2054         struct mlxsw_sp_neigh_entry *neigh_entry;
2055         struct mlxsw_sp_rif *rif;
2056         int err;
2057
2058         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2059         if (!rif)
2060                 return ERR_PTR(-EINVAL);
2061
2062         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2063         if (!neigh_entry)
2064                 return ERR_PTR(-ENOMEM);
2065
2066         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2067         if (err)
2068                 goto err_neigh_entry_insert;
2069
2070         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2071         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2072
2073         return neigh_entry;
2074
2075 err_neigh_entry_insert:
2076         mlxsw_sp_neigh_entry_free(neigh_entry);
2077         return ERR_PTR(err);
2078 }
2079
2080 static void
2081 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2082                              struct mlxsw_sp_neigh_entry *neigh_entry)
2083 {
2084         list_del(&neigh_entry->rif_list_node);
2085         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2086         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2087         mlxsw_sp_neigh_entry_free(neigh_entry);
2088 }
2089
2090 static struct mlxsw_sp_neigh_entry *
2091 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2092 {
2093         struct mlxsw_sp_neigh_key key;
2094
2095         key.n = n;
2096         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2097                                       &key, mlxsw_sp_neigh_ht_params);
2098 }
2099
2100 static void
2101 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2102 {
2103         unsigned long interval;
2104
2105 #if IS_ENABLED(CONFIG_IPV6)
2106         interval = min_t(unsigned long,
2107                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2108                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2109 #else
2110         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2111 #endif
2112         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2113 }
2114
2115 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2116                                                    char *rauhtd_pl,
2117                                                    int ent_index)
2118 {
2119         struct net_device *dev;
2120         struct neighbour *n;
2121         __be32 dipn;
2122         u32 dip;
2123         u16 rif;
2124
2125         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2126
2127         if (!mlxsw_sp->router->rifs[rif]) {
2128                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2129                 return;
2130         }
2131
2132         dipn = htonl(dip);
2133         dev = mlxsw_sp->router->rifs[rif]->dev;
2134         n = neigh_lookup(&arp_tbl, &dipn, dev);
2135         if (!n)
2136                 return;
2137
2138         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2139         neigh_event_send(n, NULL);
2140         neigh_release(n);
2141 }
2142
2143 #if IS_ENABLED(CONFIG_IPV6)
2144 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2145                                                    char *rauhtd_pl,
2146                                                    int rec_index)
2147 {
2148         struct net_device *dev;
2149         struct neighbour *n;
2150         struct in6_addr dip;
2151         u16 rif;
2152
2153         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2154                                          (char *) &dip);
2155
2156         if (!mlxsw_sp->router->rifs[rif]) {
2157                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2158                 return;
2159         }
2160
2161         dev = mlxsw_sp->router->rifs[rif]->dev;
2162         n = neigh_lookup(&nd_tbl, &dip, dev);
2163         if (!n)
2164                 return;
2165
2166         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2167         neigh_event_send(n, NULL);
2168         neigh_release(n);
2169 }
2170 #else
2171 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2172                                                    char *rauhtd_pl,
2173                                                    int rec_index)
2174 {
2175 }
2176 #endif
2177
2178 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2179                                                    char *rauhtd_pl,
2180                                                    int rec_index)
2181 {
2182         u8 num_entries;
2183         int i;
2184
2185         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2186                                                                 rec_index);
2187         /* Hardware starts counting at 0, so add 1. */
2188         num_entries++;
2189
2190         /* Each record consists of several neighbour entries. */
2191         for (i = 0; i < num_entries; i++) {
2192                 int ent_index;
2193
2194                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2195                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2196                                                        ent_index);
2197         }
2198
2199 }
2200
2201 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2202                                                    char *rauhtd_pl,
2203                                                    int rec_index)
2204 {
2205         /* One record contains one entry. */
2206         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2207                                                rec_index);
2208 }
2209
2210 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2211                                               char *rauhtd_pl, int rec_index)
2212 {
2213         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2214         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2215                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2216                                                        rec_index);
2217                 break;
2218         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2219                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2220                                                        rec_index);
2221                 break;
2222         }
2223 }
2224
2225 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2226 {
2227         u8 num_rec, last_rec_index, num_entries;
2228
2229         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2230         last_rec_index = num_rec - 1;
2231
2232         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2233                 return false;
2234         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2235             MLXSW_REG_RAUHTD_TYPE_IPV6)
2236                 return true;
2237
2238         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2239                                                                 last_rec_index);
2240         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2241                 return true;
2242         return false;
2243 }
2244
2245 static int
2246 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2247                                        char *rauhtd_pl,
2248                                        enum mlxsw_reg_rauhtd_type type)
2249 {
2250         int i, num_rec;
2251         int err;
2252
2253         /* Make sure the neighbour's netdev isn't removed in the
2254          * process.
2255          */
2256         rtnl_lock();
2257         do {
2258                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2259                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2260                                       rauhtd_pl);
2261                 if (err) {
2262                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2263                         break;
2264                 }
2265                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2266                 for (i = 0; i < num_rec; i++)
2267                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2268                                                           i);
2269         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2270         rtnl_unlock();
2271
2272         return err;
2273 }
2274
2275 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2276 {
2277         enum mlxsw_reg_rauhtd_type type;
2278         char *rauhtd_pl;
2279         int err;
2280
2281         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2282         if (!rauhtd_pl)
2283                 return -ENOMEM;
2284
2285         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2286         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2287         if (err)
2288                 goto out;
2289
2290         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2291         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2292 out:
2293         kfree(rauhtd_pl);
2294         return err;
2295 }
2296
2297 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2298 {
2299         struct mlxsw_sp_neigh_entry *neigh_entry;
2300
2301         /* Take RTNL mutex here to prevent lists from changes */
2302         rtnl_lock();
2303         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2304                             nexthop_neighs_list_node)
2305                 /* If this neigh have nexthops, make the kernel think this neigh
2306                  * is active regardless of the traffic.
2307                  */
2308                 neigh_event_send(neigh_entry->key.n, NULL);
2309         rtnl_unlock();
2310 }
2311
2312 static void
2313 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2314 {
2315         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2316
2317         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2318                                msecs_to_jiffies(interval));
2319 }
2320
2321 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2322 {
2323         struct mlxsw_sp_router *router;
2324         int err;
2325
2326         router = container_of(work, struct mlxsw_sp_router,
2327                               neighs_update.dw.work);
2328         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2329         if (err)
2330                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2331
2332         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2333
2334         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2335 }
2336
2337 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2338 {
2339         struct mlxsw_sp_neigh_entry *neigh_entry;
2340         struct mlxsw_sp_router *router;
2341
2342         router = container_of(work, struct mlxsw_sp_router,
2343                               nexthop_probe_dw.work);
2344         /* Iterate over nexthop neighbours, find those who are unresolved and
2345          * send arp on them. This solves the chicken-egg problem when
2346          * the nexthop wouldn't get offloaded until the neighbor is resolved
2347          * but it wouldn't get resolved ever in case traffic is flowing in HW
2348          * using different nexthop.
2349          *
2350          * Take RTNL mutex here to prevent lists from changes.
2351          */
2352         rtnl_lock();
2353         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2354                             nexthop_neighs_list_node)
2355                 if (!neigh_entry->connected)
2356                         neigh_event_send(neigh_entry->key.n, NULL);
2357         rtnl_unlock();
2358
2359         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2360                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2361 }
2362
2363 static void
2364 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2365                               struct mlxsw_sp_neigh_entry *neigh_entry,
2366                               bool removing);
2367
2368 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2369 {
2370         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2371                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2372 }
2373
2374 static int
2375 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2376                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2377                                 enum mlxsw_reg_rauht_op op)
2378 {
2379         struct neighbour *n = neigh_entry->key.n;
2380         u32 dip = ntohl(*((__be32 *) n->primary_key));
2381         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2382
2383         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2384                               dip);
2385         if (neigh_entry->counter_valid)
2386                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2387                                              neigh_entry->counter_index);
2388         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2389 }
2390
2391 static int
2392 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2393                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2394                                 enum mlxsw_reg_rauht_op op)
2395 {
2396         struct neighbour *n = neigh_entry->key.n;
2397         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2398         const char *dip = n->primary_key;
2399
2400         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2401                               dip);
2402         if (neigh_entry->counter_valid)
2403                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2404                                              neigh_entry->counter_index);
2405         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2406 }
2407
2408 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2409 {
2410         struct neighbour *n = neigh_entry->key.n;
2411
2412         /* Packets with a link-local destination address are trapped
2413          * after LPM lookup and never reach the neighbour table, so
2414          * there is no need to program such neighbours to the device.
2415          */
2416         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2417             IPV6_ADDR_LINKLOCAL)
2418                 return true;
2419         return false;
2420 }
2421
2422 static void
2423 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2424                             struct mlxsw_sp_neigh_entry *neigh_entry,
2425                             bool adding)
2426 {
2427         enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2428         int err;
2429
2430         if (!adding && !neigh_entry->connected)
2431                 return;
2432         neigh_entry->connected = adding;
2433         if (neigh_entry->key.n->tbl->family == AF_INET) {
2434                 err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2435                                                       op);
2436                 if (err)
2437                         return;
2438         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2439                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2440                         return;
2441                 err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2442                                                       op);
2443                 if (err)
2444                         return;
2445         } else {
2446                 WARN_ON_ONCE(1);
2447                 return;
2448         }
2449
2450         if (adding)
2451                 neigh_entry->key.n->flags |= NTF_OFFLOADED;
2452         else
2453                 neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2454 }
2455
2456 void
2457 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2458                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2459                                     bool adding)
2460 {
2461         if (adding)
2462                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2463         else
2464                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2465         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2466 }
2467
2468 struct mlxsw_sp_netevent_work {
2469         struct work_struct work;
2470         struct mlxsw_sp *mlxsw_sp;
2471         struct neighbour *n;
2472 };
2473
2474 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2475 {
2476         struct mlxsw_sp_netevent_work *net_work =
2477                 container_of(work, struct mlxsw_sp_netevent_work, work);
2478         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2479         struct mlxsw_sp_neigh_entry *neigh_entry;
2480         struct neighbour *n = net_work->n;
2481         unsigned char ha[ETH_ALEN];
2482         bool entry_connected;
2483         u8 nud_state, dead;
2484
2485         /* If these parameters are changed after we release the lock,
2486          * then we are guaranteed to receive another event letting us
2487          * know about it.
2488          */
2489         read_lock_bh(&n->lock);
2490         memcpy(ha, n->ha, ETH_ALEN);
2491         nud_state = n->nud_state;
2492         dead = n->dead;
2493         read_unlock_bh(&n->lock);
2494
2495         rtnl_lock();
2496         mlxsw_sp_span_respin(mlxsw_sp);
2497
2498         entry_connected = nud_state & NUD_VALID && !dead;
2499         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2500         if (!entry_connected && !neigh_entry)
2501                 goto out;
2502         if (!neigh_entry) {
2503                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2504                 if (IS_ERR(neigh_entry))
2505                         goto out;
2506         }
2507
2508         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2509         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2510         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2511
2512         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2513                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2514
2515 out:
2516         rtnl_unlock();
2517         neigh_release(n);
2518         kfree(net_work);
2519 }
2520
2521 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2522
2523 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2524 {
2525         struct mlxsw_sp_netevent_work *net_work =
2526                 container_of(work, struct mlxsw_sp_netevent_work, work);
2527         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2528
2529         mlxsw_sp_mp_hash_init(mlxsw_sp);
2530         kfree(net_work);
2531 }
2532
2533 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2534
2535 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2536 {
2537         struct mlxsw_sp_netevent_work *net_work =
2538                 container_of(work, struct mlxsw_sp_netevent_work, work);
2539         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2540
2541         __mlxsw_sp_router_init(mlxsw_sp);
2542         kfree(net_work);
2543 }
2544
2545 static int mlxsw_sp_router_schedule_work(struct net *net,
2546                                          struct notifier_block *nb,
2547                                          void (*cb)(struct work_struct *))
2548 {
2549         struct mlxsw_sp_netevent_work *net_work;
2550         struct mlxsw_sp_router *router;
2551
2552         if (!net_eq(net, &init_net))
2553                 return NOTIFY_DONE;
2554
2555         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2556         if (!net_work)
2557                 return NOTIFY_BAD;
2558
2559         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2560         INIT_WORK(&net_work->work, cb);
2561         net_work->mlxsw_sp = router->mlxsw_sp;
2562         mlxsw_core_schedule_work(&net_work->work);
2563         return NOTIFY_DONE;
2564 }
2565
2566 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2567                                           unsigned long event, void *ptr)
2568 {
2569         struct mlxsw_sp_netevent_work *net_work;
2570         struct mlxsw_sp_port *mlxsw_sp_port;
2571         struct mlxsw_sp *mlxsw_sp;
2572         unsigned long interval;
2573         struct neigh_parms *p;
2574         struct neighbour *n;
2575
2576         switch (event) {
2577         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2578                 p = ptr;
2579
2580                 /* We don't care about changes in the default table. */
2581                 if (!p->dev || (p->tbl->family != AF_INET &&
2582                                 p->tbl->family != AF_INET6))
2583                         return NOTIFY_DONE;
2584
2585                 /* We are in atomic context and can't take RTNL mutex,
2586                  * so use RCU variant to walk the device chain.
2587                  */
2588                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2589                 if (!mlxsw_sp_port)
2590                         return NOTIFY_DONE;
2591
2592                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2593                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2594                 mlxsw_sp->router->neighs_update.interval = interval;
2595
2596                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2597                 break;
2598         case NETEVENT_NEIGH_UPDATE:
2599                 n = ptr;
2600
2601                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2602                         return NOTIFY_DONE;
2603
2604                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2605                 if (!mlxsw_sp_port)
2606                         return NOTIFY_DONE;
2607
2608                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2609                 if (!net_work) {
2610                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2611                         return NOTIFY_BAD;
2612                 }
2613
2614                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2615                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2616                 net_work->n = n;
2617
2618                 /* Take a reference to ensure the neighbour won't be
2619                  * destructed until we drop the reference in delayed
2620                  * work.
2621                  */
2622                 neigh_clone(n);
2623                 mlxsw_core_schedule_work(&net_work->work);
2624                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2625                 break;
2626         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2627         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2628                 return mlxsw_sp_router_schedule_work(ptr, nb,
2629                                 mlxsw_sp_router_mp_hash_event_work);
2630
2631         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2632                 return mlxsw_sp_router_schedule_work(ptr, nb,
2633                                 mlxsw_sp_router_update_priority_work);
2634         }
2635
2636         return NOTIFY_DONE;
2637 }
2638
2639 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2640 {
2641         int err;
2642
2643         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2644                               &mlxsw_sp_neigh_ht_params);
2645         if (err)
2646                 return err;
2647
2648         /* Initialize the polling interval according to the default
2649          * table.
2650          */
2651         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2652
2653         /* Create the delayed works for the activity_update */
2654         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2655                           mlxsw_sp_router_neighs_update_work);
2656         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2657                           mlxsw_sp_router_probe_unresolved_nexthops);
2658         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2659         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2660         return 0;
2661 }
2662
2663 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2664 {
2665         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2666         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2667         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2668 }
2669
2670 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2671                                          struct mlxsw_sp_rif *rif)
2672 {
2673         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2674
2675         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2676                                  rif_list_node) {
2677                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2678                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2679         }
2680 }
2681
2682 enum mlxsw_sp_nexthop_type {
2683         MLXSW_SP_NEXTHOP_TYPE_ETH,
2684         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2685 };
2686
2687 struct mlxsw_sp_nexthop_key {
2688         struct fib_nh *fib_nh;
2689 };
2690
2691 struct mlxsw_sp_nexthop {
2692         struct list_head neigh_list_node; /* member of neigh entry list */
2693         struct list_head rif_list_node;
2694         struct list_head router_list_node;
2695         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2696                                                 * this belongs to
2697                                                 */
2698         struct rhash_head ht_node;
2699         struct mlxsw_sp_nexthop_key key;
2700         unsigned char gw_addr[sizeof(struct in6_addr)];
2701         int ifindex;
2702         int nh_weight;
2703         int norm_nh_weight;
2704         int num_adj_entries;
2705         struct mlxsw_sp_rif *rif;
2706         u8 should_offload:1, /* set indicates this neigh is connected and
2707                               * should be put to KVD linear area of this group.
2708                               */
2709            offloaded:1, /* set in case the neigh is actually put into
2710                          * KVD linear area of this group.
2711                          */
2712            update:1; /* set indicates that MAC of this neigh should be
2713                       * updated in HW
2714                       */
2715         enum mlxsw_sp_nexthop_type type;
2716         union {
2717                 struct mlxsw_sp_neigh_entry *neigh_entry;
2718                 struct mlxsw_sp_ipip_entry *ipip_entry;
2719         };
2720         unsigned int counter_index;
2721         bool counter_valid;
2722 };
2723
2724 struct mlxsw_sp_nexthop_group {
2725         void *priv;
2726         struct rhash_head ht_node;
2727         struct list_head fib_list; /* list of fib entries that use this group */
2728         struct neigh_table *neigh_tbl;
2729         u8 adj_index_valid:1,
2730            gateway:1; /* routes using the group use a gateway */
2731         u32 adj_index;
2732         u16 ecmp_size;
2733         u16 count;
2734         int sum_norm_weight;
2735         struct mlxsw_sp_nexthop nexthops[0];
2736 #define nh_rif  nexthops[0].rif
2737 };
2738
2739 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2740                                     struct mlxsw_sp_nexthop *nh)
2741 {
2742         struct devlink *devlink;
2743
2744         devlink = priv_to_devlink(mlxsw_sp->core);
2745         if (!devlink_dpipe_table_counter_enabled(devlink,
2746                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2747                 return;
2748
2749         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2750                 return;
2751
2752         nh->counter_valid = true;
2753 }
2754
2755 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2756                                    struct mlxsw_sp_nexthop *nh)
2757 {
2758         if (!nh->counter_valid)
2759                 return;
2760         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2761         nh->counter_valid = false;
2762 }
2763
2764 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2765                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2766 {
2767         if (!nh->counter_valid)
2768                 return -EINVAL;
2769
2770         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2771                                          p_counter, NULL);
2772 }
2773
2774 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2775                                                struct mlxsw_sp_nexthop *nh)
2776 {
2777         if (!nh) {
2778                 if (list_empty(&router->nexthop_list))
2779                         return NULL;
2780                 else
2781                         return list_first_entry(&router->nexthop_list,
2782                                                 typeof(*nh), router_list_node);
2783         }
2784         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2785                 return NULL;
2786         return list_next_entry(nh, router_list_node);
2787 }
2788
2789 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2790 {
2791         return nh->offloaded;
2792 }
2793
2794 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2795 {
2796         if (!nh->offloaded)
2797                 return NULL;
2798         return nh->neigh_entry->ha;
2799 }
2800
2801 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2802                              u32 *p_adj_size, u32 *p_adj_hash_index)
2803 {
2804         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2805         u32 adj_hash_index = 0;
2806         int i;
2807
2808         if (!nh->offloaded || !nh_grp->adj_index_valid)
2809                 return -EINVAL;
2810
2811         *p_adj_index = nh_grp->adj_index;
2812         *p_adj_size = nh_grp->ecmp_size;
2813
2814         for (i = 0; i < nh_grp->count; i++) {
2815                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2816
2817                 if (nh_iter == nh)
2818                         break;
2819                 if (nh_iter->offloaded)
2820                         adj_hash_index += nh_iter->num_adj_entries;
2821         }
2822
2823         *p_adj_hash_index = adj_hash_index;
2824         return 0;
2825 }
2826
2827 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2828 {
2829         return nh->rif;
2830 }
2831
2832 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2833 {
2834         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2835         int i;
2836
2837         for (i = 0; i < nh_grp->count; i++) {
2838                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2839
2840                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2841                         return true;
2842         }
2843         return false;
2844 }
2845
2846 static struct fib_info *
2847 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2848 {
2849         return nh_grp->priv;
2850 }
2851
2852 struct mlxsw_sp_nexthop_group_cmp_arg {
2853         enum mlxsw_sp_l3proto proto;
2854         union {
2855                 struct fib_info *fi;
2856                 struct mlxsw_sp_fib6_entry *fib6_entry;
2857         };
2858 };
2859
2860 static bool
2861 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2862                                     const struct in6_addr *gw, int ifindex,
2863                                     int weight)
2864 {
2865         int i;
2866
2867         for (i = 0; i < nh_grp->count; i++) {
2868                 const struct mlxsw_sp_nexthop *nh;
2869
2870                 nh = &nh_grp->nexthops[i];
2871                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2872                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2873                         return true;
2874         }
2875
2876         return false;
2877 }
2878
2879 static bool
2880 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2881                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2882 {
2883         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2884
2885         if (nh_grp->count != fib6_entry->nrt6)
2886                 return false;
2887
2888         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2889                 struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
2890                 struct in6_addr *gw;
2891                 int ifindex, weight;
2892
2893                 ifindex = fib6_nh->fib_nh_dev->ifindex;
2894                 weight = fib6_nh->fib_nh_weight;
2895                 gw = &fib6_nh->fib_nh_gw6;
2896                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2897                                                          weight))
2898                         return false;
2899         }
2900
2901         return true;
2902 }
2903
2904 static int
2905 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2906 {
2907         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2908         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2909
2910         switch (cmp_arg->proto) {
2911         case MLXSW_SP_L3_PROTO_IPV4:
2912                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2913         case MLXSW_SP_L3_PROTO_IPV6:
2914                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2915                                                     cmp_arg->fib6_entry);
2916         default:
2917                 WARN_ON(1);
2918                 return 1;
2919         }
2920 }
2921
2922 static int
2923 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2924 {
2925         return nh_grp->neigh_tbl->family;
2926 }
2927
2928 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2929 {
2930         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2931         const struct mlxsw_sp_nexthop *nh;
2932         struct fib_info *fi;
2933         unsigned int val;
2934         int i;
2935
2936         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2937         case AF_INET:
2938                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2939                 return jhash(&fi, sizeof(fi), seed);
2940         case AF_INET6:
2941                 val = nh_grp->count;
2942                 for (i = 0; i < nh_grp->count; i++) {
2943                         nh = &nh_grp->nexthops[i];
2944                         val ^= nh->ifindex;
2945                 }
2946                 return jhash(&val, sizeof(val), seed);
2947         default:
2948                 WARN_ON(1);
2949                 return 0;
2950         }
2951 }
2952
2953 static u32
2954 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2955 {
2956         unsigned int val = fib6_entry->nrt6;
2957         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2958         struct net_device *dev;
2959
2960         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2961                 dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
2962                 val ^= dev->ifindex;
2963         }
2964
2965         return jhash(&val, sizeof(val), seed);
2966 }
2967
2968 static u32
2969 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2970 {
2971         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2972
2973         switch (cmp_arg->proto) {
2974         case MLXSW_SP_L3_PROTO_IPV4:
2975                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2976         case MLXSW_SP_L3_PROTO_IPV6:
2977                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2978         default:
2979                 WARN_ON(1);
2980                 return 0;
2981         }
2982 }
2983
2984 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2985         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2986         .hashfn      = mlxsw_sp_nexthop_group_hash,
2987         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2988         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2989 };
2990
2991 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2992                                          struct mlxsw_sp_nexthop_group *nh_grp)
2993 {
2994         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2995             !nh_grp->gateway)
2996                 return 0;
2997
2998         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2999                                       &nh_grp->ht_node,
3000                                       mlxsw_sp_nexthop_group_ht_params);
3001 }
3002
3003 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3004                                           struct mlxsw_sp_nexthop_group *nh_grp)
3005 {
3006         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3007             !nh_grp->gateway)
3008                 return;
3009
3010         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3011                                &nh_grp->ht_node,
3012                                mlxsw_sp_nexthop_group_ht_params);
3013 }
3014
3015 static struct mlxsw_sp_nexthop_group *
3016 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3017                                struct fib_info *fi)
3018 {
3019         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3020
3021         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3022         cmp_arg.fi = fi;
3023         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3024                                       &cmp_arg,
3025                                       mlxsw_sp_nexthop_group_ht_params);
3026 }
3027
3028 static struct mlxsw_sp_nexthop_group *
3029 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3030                                struct mlxsw_sp_fib6_entry *fib6_entry)
3031 {
3032         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3033
3034         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3035         cmp_arg.fib6_entry = fib6_entry;
3036         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3037                                       &cmp_arg,
3038                                       mlxsw_sp_nexthop_group_ht_params);
3039 }
3040
3041 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3042         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3043         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3044         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3045 };
3046
3047 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3048                                    struct mlxsw_sp_nexthop *nh)
3049 {
3050         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3051                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3052 }
3053
3054 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3055                                     struct mlxsw_sp_nexthop *nh)
3056 {
3057         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3058                                mlxsw_sp_nexthop_ht_params);
3059 }
3060
3061 static struct mlxsw_sp_nexthop *
3062 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3063                         struct mlxsw_sp_nexthop_key key)
3064 {
3065         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3066                                       mlxsw_sp_nexthop_ht_params);
3067 }
3068
3069 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3070                                              const struct mlxsw_sp_fib *fib,
3071                                              u32 adj_index, u16 ecmp_size,
3072                                              u32 new_adj_index,
3073                                              u16 new_ecmp_size)
3074 {
3075         char raleu_pl[MLXSW_REG_RALEU_LEN];
3076
3077         mlxsw_reg_raleu_pack(raleu_pl,
3078                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
3079                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
3080                              new_ecmp_size);
3081         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3082 }
3083
3084 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3085                                           struct mlxsw_sp_nexthop_group *nh_grp,
3086                                           u32 old_adj_index, u16 old_ecmp_size)
3087 {
3088         struct mlxsw_sp_fib_entry *fib_entry;
3089         struct mlxsw_sp_fib *fib = NULL;
3090         int err;
3091
3092         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3093                 if (fib == fib_entry->fib_node->fib)
3094                         continue;
3095                 fib = fib_entry->fib_node->fib;
3096                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3097                                                         old_adj_index,
3098                                                         old_ecmp_size,
3099                                                         nh_grp->adj_index,
3100                                                         nh_grp->ecmp_size);
3101                 if (err)
3102                         return err;
3103         }
3104         return 0;
3105 }
3106
3107 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3108                                      struct mlxsw_sp_nexthop *nh)
3109 {
3110         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3111         char ratr_pl[MLXSW_REG_RATR_LEN];
3112
3113         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3114                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
3115                             adj_index, neigh_entry->rif);
3116         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3117         if (nh->counter_valid)
3118                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3119         else
3120                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3121
3122         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3123 }
3124
3125 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3126                             struct mlxsw_sp_nexthop *nh)
3127 {
3128         int i;
3129
3130         for (i = 0; i < nh->num_adj_entries; i++) {
3131                 int err;
3132
3133                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3134                 if (err)
3135                         return err;
3136         }
3137
3138         return 0;
3139 }
3140
3141 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3142                                           u32 adj_index,
3143                                           struct mlxsw_sp_nexthop *nh)
3144 {
3145         const struct mlxsw_sp_ipip_ops *ipip_ops;
3146
3147         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3148         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3149 }
3150
3151 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3152                                         u32 adj_index,
3153                                         struct mlxsw_sp_nexthop *nh)
3154 {
3155         int i;
3156
3157         for (i = 0; i < nh->num_adj_entries; i++) {
3158                 int err;
3159
3160                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3161                                                      nh);
3162                 if (err)
3163                         return err;
3164         }
3165
3166         return 0;
3167 }
3168
3169 static int
3170 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3171                               struct mlxsw_sp_nexthop_group *nh_grp,
3172                               bool reallocate)
3173 {
3174         u32 adj_index = nh_grp->adj_index; /* base */
3175         struct mlxsw_sp_nexthop *nh;
3176         int i;
3177         int err;
3178
3179         for (i = 0; i < nh_grp->count; i++) {
3180                 nh = &nh_grp->nexthops[i];
3181
3182                 if (!nh->should_offload) {
3183                         nh->offloaded = 0;
3184                         continue;
3185                 }
3186
3187                 if (nh->update || reallocate) {
3188                         switch (nh->type) {
3189                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3190                                 err = mlxsw_sp_nexthop_update
3191                                             (mlxsw_sp, adj_index, nh);
3192                                 break;
3193                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3194                                 err = mlxsw_sp_nexthop_ipip_update
3195                                             (mlxsw_sp, adj_index, nh);
3196                                 break;
3197                         }
3198                         if (err)
3199                                 return err;
3200                         nh->update = 0;
3201                         nh->offloaded = 1;
3202                 }
3203                 adj_index += nh->num_adj_entries;
3204         }
3205         return 0;
3206 }
3207
3208 static bool
3209 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3210                                  const struct mlxsw_sp_fib_entry *fib_entry);
3211
3212 static int
3213 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3214                                     struct mlxsw_sp_nexthop_group *nh_grp)
3215 {
3216         struct mlxsw_sp_fib_entry *fib_entry;
3217         int err;
3218
3219         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3220                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3221                                                       fib_entry))
3222                         continue;
3223                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3224                 if (err)
3225                         return err;
3226         }
3227         return 0;
3228 }
3229
3230 static void
3231 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3232                                    enum mlxsw_reg_ralue_op op, int err);
3233
3234 static void
3235 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3236 {
3237         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3238         struct mlxsw_sp_fib_entry *fib_entry;
3239
3240         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3241                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3242                                                       fib_entry))
3243                         continue;
3244                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3245         }
3246 }
3247
3248 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3249 {
3250         /* Valid sizes for an adjacency group are:
3251          * 1-64, 512, 1024, 2048 and 4096.
3252          */
3253         if (*p_adj_grp_size <= 64)
3254                 return;
3255         else if (*p_adj_grp_size <= 512)
3256                 *p_adj_grp_size = 512;
3257         else if (*p_adj_grp_size <= 1024)
3258                 *p_adj_grp_size = 1024;
3259         else if (*p_adj_grp_size <= 2048)
3260                 *p_adj_grp_size = 2048;
3261         else
3262                 *p_adj_grp_size = 4096;
3263 }
3264
3265 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3266                                              unsigned int alloc_size)
3267 {
3268         if (alloc_size >= 4096)
3269                 *p_adj_grp_size = 4096;
3270         else if (alloc_size >= 2048)
3271                 *p_adj_grp_size = 2048;
3272         else if (alloc_size >= 1024)
3273                 *p_adj_grp_size = 1024;
3274         else if (alloc_size >= 512)
3275                 *p_adj_grp_size = 512;
3276 }
3277
3278 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3279                                      u16 *p_adj_grp_size)
3280 {
3281         unsigned int alloc_size;
3282         int err;
3283
3284         /* Round up the requested group size to the next size supported
3285          * by the device and make sure the request can be satisfied.
3286          */
3287         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3288         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3289                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3290                                               *p_adj_grp_size, &alloc_size);
3291         if (err)
3292                 return err;
3293         /* It is possible the allocation results in more allocated
3294          * entries than requested. Try to use as much of them as
3295          * possible.
3296          */
3297         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3298
3299         return 0;
3300 }
3301
3302 static void
3303 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3304 {
3305         int i, g = 0, sum_norm_weight = 0;
3306         struct mlxsw_sp_nexthop *nh;
3307
3308         for (i = 0; i < nh_grp->count; i++) {
3309                 nh = &nh_grp->nexthops[i];
3310
3311                 if (!nh->should_offload)
3312                         continue;
3313                 if (g > 0)
3314                         g = gcd(nh->nh_weight, g);
3315                 else
3316                         g = nh->nh_weight;
3317         }
3318
3319         for (i = 0; i < nh_grp->count; i++) {
3320                 nh = &nh_grp->nexthops[i];
3321
3322                 if (!nh->should_offload)
3323                         continue;
3324                 nh->norm_nh_weight = nh->nh_weight / g;
3325                 sum_norm_weight += nh->norm_nh_weight;
3326         }
3327
3328         nh_grp->sum_norm_weight = sum_norm_weight;
3329 }
3330
3331 static void
3332 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3333 {
3334         int total = nh_grp->sum_norm_weight;
3335         u16 ecmp_size = nh_grp->ecmp_size;
3336         int i, weight = 0, lower_bound = 0;
3337
3338         for (i = 0; i < nh_grp->count; i++) {
3339                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3340                 int upper_bound;
3341
3342                 if (!nh->should_offload)
3343                         continue;
3344                 weight += nh->norm_nh_weight;
3345                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3346                 nh->num_adj_entries = upper_bound - lower_bound;
3347                 lower_bound = upper_bound;
3348         }
3349 }
3350
3351 static void
3352 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3353                                struct mlxsw_sp_nexthop_group *nh_grp)
3354 {
3355         u16 ecmp_size, old_ecmp_size;
3356         struct mlxsw_sp_nexthop *nh;
3357         bool offload_change = false;
3358         u32 adj_index;
3359         bool old_adj_index_valid;
3360         u32 old_adj_index;
3361         int i;
3362         int err;
3363
3364         if (!nh_grp->gateway) {
3365                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3366                 return;
3367         }
3368
3369         for (i = 0; i < nh_grp->count; i++) {
3370                 nh = &nh_grp->nexthops[i];
3371
3372                 if (nh->should_offload != nh->offloaded) {
3373                         offload_change = true;
3374                         if (nh->should_offload)
3375                                 nh->update = 1;
3376                 }
3377         }
3378         if (!offload_change) {
3379                 /* Nothing was added or removed, so no need to reallocate. Just
3380                  * update MAC on existing adjacency indexes.
3381                  */
3382                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3383                 if (err) {
3384                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3385                         goto set_trap;
3386                 }
3387                 return;
3388         }
3389         mlxsw_sp_nexthop_group_normalize(nh_grp);
3390         if (!nh_grp->sum_norm_weight)
3391                 /* No neigh of this group is connected so we just set
3392                  * the trap and let everthing flow through kernel.
3393                  */
3394                 goto set_trap;
3395
3396         ecmp_size = nh_grp->sum_norm_weight;
3397         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3398         if (err)
3399                 /* No valid allocation size available. */
3400                 goto set_trap;
3401
3402         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3403                                   ecmp_size, &adj_index);
3404         if (err) {
3405                 /* We ran out of KVD linear space, just set the
3406                  * trap and let everything flow through kernel.
3407                  */
3408                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3409                 goto set_trap;
3410         }
3411         old_adj_index_valid = nh_grp->adj_index_valid;
3412         old_adj_index = nh_grp->adj_index;
3413         old_ecmp_size = nh_grp->ecmp_size;
3414         nh_grp->adj_index_valid = 1;
3415         nh_grp->adj_index = adj_index;
3416         nh_grp->ecmp_size = ecmp_size;
3417         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3418         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3419         if (err) {
3420                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3421                 goto set_trap;
3422         }
3423
3424         if (!old_adj_index_valid) {
3425                 /* The trap was set for fib entries, so we have to call
3426                  * fib entry update to unset it and use adjacency index.
3427                  */
3428                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3429                 if (err) {
3430                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3431                         goto set_trap;
3432                 }
3433                 return;
3434         }
3435
3436         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3437                                              old_adj_index, old_ecmp_size);
3438         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3439                            old_ecmp_size, old_adj_index);
3440         if (err) {
3441                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3442                 goto set_trap;
3443         }
3444
3445         /* Offload state within the group changed, so update the flags. */
3446         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3447
3448         return;
3449
3450 set_trap:
3451         old_adj_index_valid = nh_grp->adj_index_valid;
3452         nh_grp->adj_index_valid = 0;
3453         for (i = 0; i < nh_grp->count; i++) {
3454                 nh = &nh_grp->nexthops[i];
3455                 nh->offloaded = 0;
3456         }
3457         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3458         if (err)
3459                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3460         if (old_adj_index_valid)
3461                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3462                                    nh_grp->ecmp_size, nh_grp->adj_index);
3463 }
3464
3465 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3466                                             bool removing)
3467 {
3468         if (!removing)
3469                 nh->should_offload = 1;
3470         else
3471                 nh->should_offload = 0;
3472         nh->update = 1;
3473 }
3474
3475 static void
3476 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3477                               struct mlxsw_sp_neigh_entry *neigh_entry,
3478                               bool removing)
3479 {
3480         struct mlxsw_sp_nexthop *nh;
3481
3482         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3483                             neigh_list_node) {
3484                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3485                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3486         }
3487 }
3488
3489 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3490                                       struct mlxsw_sp_rif *rif)
3491 {
3492         if (nh->rif)
3493                 return;
3494
3495         nh->rif = rif;
3496         list_add(&nh->rif_list_node, &rif->nexthop_list);
3497 }
3498
3499 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3500 {
3501         if (!nh->rif)
3502                 return;
3503
3504         list_del(&nh->rif_list_node);
3505         nh->rif = NULL;
3506 }
3507
3508 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3509                                        struct mlxsw_sp_nexthop *nh)
3510 {
3511         struct mlxsw_sp_neigh_entry *neigh_entry;
3512         struct neighbour *n;
3513         u8 nud_state, dead;
3514         int err;
3515
3516         if (!nh->nh_grp->gateway || nh->neigh_entry)
3517                 return 0;
3518
3519         /* Take a reference of neigh here ensuring that neigh would
3520          * not be destructed before the nexthop entry is finished.
3521          * The reference is taken either in neigh_lookup() or
3522          * in neigh_create() in case n is not found.
3523          */
3524         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3525         if (!n) {
3526                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3527                                  nh->rif->dev);
3528                 if (IS_ERR(n))
3529                         return PTR_ERR(n);
3530                 neigh_event_send(n, NULL);
3531         }
3532         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3533         if (!neigh_entry) {
3534                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3535                 if (IS_ERR(neigh_entry)) {
3536                         err = -EINVAL;
3537                         goto err_neigh_entry_create;
3538                 }
3539         }
3540
3541         /* If that is the first nexthop connected to that neigh, add to
3542          * nexthop_neighs_list
3543          */
3544         if (list_empty(&neigh_entry->nexthop_list))
3545                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3546                               &mlxsw_sp->router->nexthop_neighs_list);
3547
3548         nh->neigh_entry = neigh_entry;
3549         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3550         read_lock_bh(&n->lock);
3551         nud_state = n->nud_state;
3552         dead = n->dead;
3553         read_unlock_bh(&n->lock);
3554         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3555
3556         return 0;
3557
3558 err_neigh_entry_create:
3559         neigh_release(n);
3560         return err;
3561 }
3562
3563 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3564                                         struct mlxsw_sp_nexthop *nh)
3565 {
3566         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3567         struct neighbour *n;
3568
3569         if (!neigh_entry)
3570                 return;
3571         n = neigh_entry->key.n;
3572
3573         __mlxsw_sp_nexthop_neigh_update(nh, true);
3574         list_del(&nh->neigh_list_node);
3575         nh->neigh_entry = NULL;
3576
3577         /* If that is the last nexthop connected to that neigh, remove from
3578          * nexthop_neighs_list
3579          */
3580         if (list_empty(&neigh_entry->nexthop_list))
3581                 list_del(&neigh_entry->nexthop_neighs_list_node);
3582
3583         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3584                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3585
3586         neigh_release(n);
3587 }
3588
3589 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3590 {
3591         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3592
3593         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3594 }
3595
3596 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3597                                        struct mlxsw_sp_nexthop *nh,
3598                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3599 {
3600         bool removing;
3601
3602         if (!nh->nh_grp->gateway || nh->ipip_entry)
3603                 return;
3604
3605         nh->ipip_entry = ipip_entry;
3606         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3607         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3608         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3609 }
3610
3611 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3612                                        struct mlxsw_sp_nexthop *nh)
3613 {
3614         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3615
3616         if (!ipip_entry)
3617                 return;
3618
3619         __mlxsw_sp_nexthop_neigh_update(nh, true);
3620         nh->ipip_entry = NULL;
3621 }
3622
3623 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3624                                         const struct fib_nh *fib_nh,
3625                                         enum mlxsw_sp_ipip_type *p_ipipt)
3626 {
3627         struct net_device *dev = fib_nh->fib_nh_dev;
3628
3629         return dev &&
3630                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3631                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3632 }
3633
3634 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3635                                        struct mlxsw_sp_nexthop *nh)
3636 {
3637         switch (nh->type) {
3638         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3639                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3640                 mlxsw_sp_nexthop_rif_fini(nh);
3641                 break;
3642         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3643                 mlxsw_sp_nexthop_rif_fini(nh);
3644                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3645                 break;
3646         }
3647 }
3648
3649 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3650                                        struct mlxsw_sp_nexthop *nh,
3651                                        struct fib_nh *fib_nh)
3652 {
3653         const struct mlxsw_sp_ipip_ops *ipip_ops;
3654         struct net_device *dev = fib_nh->fib_nh_dev;
3655         struct mlxsw_sp_ipip_entry *ipip_entry;
3656         struct mlxsw_sp_rif *rif;
3657         int err;
3658
3659         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3660         if (ipip_entry) {
3661                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3662                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3663                                           MLXSW_SP_L3_PROTO_IPV4)) {
3664                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3665                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3666                         return 0;
3667                 }
3668         }
3669
3670         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3671         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3672         if (!rif)
3673                 return 0;
3674
3675         mlxsw_sp_nexthop_rif_init(nh, rif);
3676         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3677         if (err)
3678                 goto err_neigh_init;
3679
3680         return 0;
3681
3682 err_neigh_init:
3683         mlxsw_sp_nexthop_rif_fini(nh);
3684         return err;
3685 }
3686
3687 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3688                                         struct mlxsw_sp_nexthop *nh)
3689 {
3690         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3691 }
3692
3693 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3694                                   struct mlxsw_sp_nexthop_group *nh_grp,
3695                                   struct mlxsw_sp_nexthop *nh,
3696                                   struct fib_nh *fib_nh)
3697 {
3698         struct net_device *dev = fib_nh->fib_nh_dev;
3699         struct in_device *in_dev;
3700         int err;
3701
3702         nh->nh_grp = nh_grp;
3703         nh->key.fib_nh = fib_nh;
3704 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3705         nh->nh_weight = fib_nh->fib_nh_weight;
3706 #else
3707         nh->nh_weight = 1;
3708 #endif
3709         memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3710         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3711         if (err)
3712                 return err;
3713
3714         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3715         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3716
3717         if (!dev)
3718                 return 0;
3719
3720         in_dev = __in_dev_get_rtnl(dev);
3721         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3722             fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
3723                 return 0;
3724
3725         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3726         if (err)
3727                 goto err_nexthop_neigh_init;
3728
3729         return 0;
3730
3731 err_nexthop_neigh_init:
3732         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3733         return err;
3734 }
3735
3736 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3737                                    struct mlxsw_sp_nexthop *nh)
3738 {
3739         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3740         list_del(&nh->router_list_node);
3741         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3742         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3743 }
3744
3745 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3746                                     unsigned long event, struct fib_nh *fib_nh)
3747 {
3748         struct mlxsw_sp_nexthop_key key;
3749         struct mlxsw_sp_nexthop *nh;
3750
3751         if (mlxsw_sp->router->aborted)
3752                 return;
3753
3754         key.fib_nh = fib_nh;
3755         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3756         if (WARN_ON_ONCE(!nh))
3757                 return;
3758
3759         switch (event) {
3760         case FIB_EVENT_NH_ADD:
3761                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3762                 break;
3763         case FIB_EVENT_NH_DEL:
3764                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3765                 break;
3766         }
3767
3768         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3769 }
3770
3771 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3772                                         struct mlxsw_sp_rif *rif)
3773 {
3774         struct mlxsw_sp_nexthop *nh;
3775         bool removing;
3776
3777         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3778                 switch (nh->type) {
3779                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3780                         removing = false;
3781                         break;
3782                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3783                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3784                         break;
3785                 default:
3786                         WARN_ON(1);
3787                         continue;
3788                 }
3789
3790                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3791                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3792         }
3793 }
3794
3795 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3796                                          struct mlxsw_sp_rif *old_rif,
3797                                          struct mlxsw_sp_rif *new_rif)
3798 {
3799         struct mlxsw_sp_nexthop *nh;
3800
3801         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3802         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3803                 nh->rif = new_rif;
3804         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3805 }
3806
3807 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3808                                            struct mlxsw_sp_rif *rif)
3809 {
3810         struct mlxsw_sp_nexthop *nh, *tmp;
3811
3812         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3813                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3814                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3815         }
3816 }
3817
3818 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3819                                    const struct fib_info *fi)
3820 {
3821         return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
3822                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3823 }
3824
3825 static struct mlxsw_sp_nexthop_group *
3826 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3827 {
3828         struct mlxsw_sp_nexthop_group *nh_grp;
3829         struct mlxsw_sp_nexthop *nh;
3830         struct fib_nh *fib_nh;
3831         int i;
3832         int err;
3833
3834         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
3835                          GFP_KERNEL);
3836         if (!nh_grp)
3837                 return ERR_PTR(-ENOMEM);
3838         nh_grp->priv = fi;
3839         INIT_LIST_HEAD(&nh_grp->fib_list);
3840         nh_grp->neigh_tbl = &arp_tbl;
3841
3842         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3843         nh_grp->count = fi->fib_nhs;
3844         fib_info_hold(fi);
3845         for (i = 0; i < nh_grp->count; i++) {
3846                 nh = &nh_grp->nexthops[i];
3847                 fib_nh = &fi->fib_nh[i];
3848                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3849                 if (err)
3850                         goto err_nexthop4_init;
3851         }
3852         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3853         if (err)
3854                 goto err_nexthop_group_insert;
3855         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3856         return nh_grp;
3857
3858 err_nexthop_group_insert:
3859 err_nexthop4_init:
3860         for (i--; i >= 0; i--) {
3861                 nh = &nh_grp->nexthops[i];
3862                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3863         }
3864         fib_info_put(fi);
3865         kfree(nh_grp);
3866         return ERR_PTR(err);
3867 }
3868
3869 static void
3870 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3871                                 struct mlxsw_sp_nexthop_group *nh_grp)
3872 {
3873         struct mlxsw_sp_nexthop *nh;
3874         int i;
3875
3876         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3877         for (i = 0; i < nh_grp->count; i++) {
3878                 nh = &nh_grp->nexthops[i];
3879                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3880         }
3881         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3882         WARN_ON_ONCE(nh_grp->adj_index_valid);
3883         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3884         kfree(nh_grp);
3885 }
3886
3887 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3888                                        struct mlxsw_sp_fib_entry *fib_entry,
3889                                        struct fib_info *fi)
3890 {
3891         struct mlxsw_sp_nexthop_group *nh_grp;
3892
3893         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3894         if (!nh_grp) {
3895                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3896                 if (IS_ERR(nh_grp))
3897                         return PTR_ERR(nh_grp);
3898         }
3899         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3900         fib_entry->nh_group = nh_grp;
3901         return 0;
3902 }
3903
3904 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3905                                         struct mlxsw_sp_fib_entry *fib_entry)
3906 {
3907         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3908
3909         list_del(&fib_entry->nexthop_group_node);
3910         if (!list_empty(&nh_grp->fib_list))
3911                 return;
3912         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3913 }
3914
3915 static bool
3916 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3917 {
3918         struct mlxsw_sp_fib4_entry *fib4_entry;
3919
3920         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3921                                   common);
3922         return !fib4_entry->tos;
3923 }
3924
3925 static bool
3926 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3927 {
3928         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3929
3930         switch (fib_entry->fib_node->fib->proto) {
3931         case MLXSW_SP_L3_PROTO_IPV4:
3932                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3933                         return false;
3934                 break;
3935         case MLXSW_SP_L3_PROTO_IPV6:
3936                 break;
3937         }
3938
3939         switch (fib_entry->type) {
3940         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3941                 return !!nh_group->adj_index_valid;
3942         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3943                 return !!nh_group->nh_rif;
3944         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
3945         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3946         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
3947                 return true;
3948         default:
3949                 return false;
3950         }
3951 }
3952
3953 static struct mlxsw_sp_nexthop *
3954 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3955                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3956 {
3957         int i;
3958
3959         for (i = 0; i < nh_grp->count; i++) {
3960                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3961                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3962
3963                 if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
3964                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3965                                     &rt->fib6_nh.fib_nh_gw6))
3966                         return nh;
3967                 continue;
3968         }
3969
3970         return NULL;
3971 }
3972
3973 static void
3974 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3975 {
3976         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3977         int i;
3978
3979         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3980             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
3981             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
3982             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
3983                 nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3984                 return;
3985         }
3986
3987         for (i = 0; i < nh_grp->count; i++) {
3988                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3989
3990                 if (nh->offloaded)
3991                         nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3992                 else
3993                         nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3994         }
3995 }
3996
3997 static void
3998 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3999 {
4000         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4001         int i;
4002
4003         if (!list_is_singular(&nh_grp->fib_list))
4004                 return;
4005
4006         for (i = 0; i < nh_grp->count; i++) {
4007                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4008
4009                 nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4010         }
4011 }
4012
4013 static void
4014 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4015 {
4016         struct mlxsw_sp_fib6_entry *fib6_entry;
4017         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4018
4019         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4020                                   common);
4021
4022         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4023             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
4024                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4025                                  list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
4026                 return;
4027         }
4028
4029         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4030                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4031                 struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
4032                 struct mlxsw_sp_nexthop *nh;
4033
4034                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4035                 if (nh && nh->offloaded)
4036                         fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4037                 else
4038                         fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4039         }
4040 }
4041
4042 static void
4043 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4044 {
4045         struct mlxsw_sp_fib6_entry *fib6_entry;
4046         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4047
4048         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4049                                   common);
4050         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4051                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4052
4053                 rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
4054         }
4055 }
4056
4057 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4058 {
4059         switch (fib_entry->fib_node->fib->proto) {
4060         case MLXSW_SP_L3_PROTO_IPV4:
4061                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
4062                 break;
4063         case MLXSW_SP_L3_PROTO_IPV6:
4064                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
4065                 break;
4066         }
4067 }
4068
4069 static void
4070 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4071 {
4072         switch (fib_entry->fib_node->fib->proto) {
4073         case MLXSW_SP_L3_PROTO_IPV4:
4074                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4075                 break;
4076         case MLXSW_SP_L3_PROTO_IPV6:
4077                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4078                 break;
4079         }
4080 }
4081
4082 static void
4083 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4084                                    enum mlxsw_reg_ralue_op op, int err)
4085 {
4086         switch (op) {
4087         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4088                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4089         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4090                 if (err)
4091                         return;
4092                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4093                         mlxsw_sp_fib_entry_offload_set(fib_entry);
4094                 else
4095                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
4096                 return;
4097         default:
4098                 return;
4099         }
4100 }
4101
4102 static void
4103 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4104                               const struct mlxsw_sp_fib_entry *fib_entry,
4105                               enum mlxsw_reg_ralue_op op)
4106 {
4107         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4108         enum mlxsw_reg_ralxx_protocol proto;
4109         u32 *p_dip;
4110
4111         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4112
4113         switch (fib->proto) {
4114         case MLXSW_SP_L3_PROTO_IPV4:
4115                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4116                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4117                                       fib_entry->fib_node->key.prefix_len,
4118                                       *p_dip);
4119                 break;
4120         case MLXSW_SP_L3_PROTO_IPV6:
4121                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4122                                       fib_entry->fib_node->key.prefix_len,
4123                                       fib_entry->fib_node->key.addr);
4124                 break;
4125         }
4126 }
4127
4128 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4129                                         struct mlxsw_sp_fib_entry *fib_entry,
4130                                         enum mlxsw_reg_ralue_op op)
4131 {
4132         char ralue_pl[MLXSW_REG_RALUE_LEN];
4133         enum mlxsw_reg_ralue_trap_action trap_action;
4134         u16 trap_id = 0;
4135         u32 adjacency_index = 0;
4136         u16 ecmp_size = 0;
4137
4138         /* In case the nexthop group adjacency index is valid, use it
4139          * with provided ECMP size. Otherwise, setup trap and pass
4140          * traffic to kernel.
4141          */
4142         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4143                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4144                 adjacency_index = fib_entry->nh_group->adj_index;
4145                 ecmp_size = fib_entry->nh_group->ecmp_size;
4146         } else {
4147                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4148                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4149         }
4150
4151         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4152         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4153                                         adjacency_index, ecmp_size);
4154         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4155 }
4156
4157 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4158                                        struct mlxsw_sp_fib_entry *fib_entry,
4159                                        enum mlxsw_reg_ralue_op op)
4160 {
4161         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4162         enum mlxsw_reg_ralue_trap_action trap_action;
4163         char ralue_pl[MLXSW_REG_RALUE_LEN];
4164         u16 trap_id = 0;
4165         u16 rif_index = 0;
4166
4167         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4168                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4169                 rif_index = rif->rif_index;
4170         } else {
4171                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4172                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4173         }
4174
4175         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4176         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4177                                        rif_index);
4178         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4179 }
4180
4181 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4182                                       struct mlxsw_sp_fib_entry *fib_entry,
4183                                       enum mlxsw_reg_ralue_op op)
4184 {
4185         char ralue_pl[MLXSW_REG_RALUE_LEN];
4186
4187         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4188         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4189         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4190 }
4191
4192 static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4193                                            struct mlxsw_sp_fib_entry *fib_entry,
4194                                            enum mlxsw_reg_ralue_op op)
4195 {
4196         enum mlxsw_reg_ralue_trap_action trap_action;
4197         char ralue_pl[MLXSW_REG_RALUE_LEN];
4198
4199         trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4200         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4201         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4202         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4203 }
4204
4205 static int
4206 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4207                                  struct mlxsw_sp_fib_entry *fib_entry,
4208                                  enum mlxsw_reg_ralue_op op)
4209 {
4210         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4211         const struct mlxsw_sp_ipip_ops *ipip_ops;
4212
4213         if (WARN_ON(!ipip_entry))
4214                 return -EINVAL;
4215
4216         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4217         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4218                                       fib_entry->decap.tunnel_index);
4219 }
4220
4221 static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4222                                            struct mlxsw_sp_fib_entry *fib_entry,
4223                                            enum mlxsw_reg_ralue_op op)
4224 {
4225         char ralue_pl[MLXSW_REG_RALUE_LEN];
4226
4227         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4228         mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4229                                            fib_entry->decap.tunnel_index);
4230         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4231 }
4232
4233 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4234                                    struct mlxsw_sp_fib_entry *fib_entry,
4235                                    enum mlxsw_reg_ralue_op op)
4236 {
4237         switch (fib_entry->type) {
4238         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4239                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4240         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4241                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4242         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4243                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4244         case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4245                 return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4246         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4247                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4248                                                         fib_entry, op);
4249         case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4250                 return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4251         }
4252         return -EINVAL;
4253 }
4254
4255 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4256                                  struct mlxsw_sp_fib_entry *fib_entry,
4257                                  enum mlxsw_reg_ralue_op op)
4258 {
4259         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4260
4261         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4262
4263         return err;
4264 }
4265
4266 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4267                                      struct mlxsw_sp_fib_entry *fib_entry)
4268 {
4269         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4270                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4271 }
4272
4273 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4274                                   struct mlxsw_sp_fib_entry *fib_entry)
4275 {
4276         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4277                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4278 }
4279
4280 static int
4281 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4282                              const struct fib_entry_notifier_info *fen_info,
4283                              struct mlxsw_sp_fib_entry *fib_entry)
4284 {
4285         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4286         u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4287         struct net_device *dev = fen_info->fi->fib_dev;
4288         struct mlxsw_sp_ipip_entry *ipip_entry;
4289         struct fib_info *fi = fen_info->fi;
4290
4291         switch (fen_info->type) {
4292         case RTN_LOCAL:
4293                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4294                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4295                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4296                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4297                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4298                                                              fib_entry,
4299                                                              ipip_entry);
4300                 }
4301                 if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4302                                                      dip.addr4)) {
4303                         u32 t_index;
4304
4305                         t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4306                         fib_entry->decap.tunnel_index = t_index;
4307                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4308                         return 0;
4309                 }
4310                 /* fall through */
4311         case RTN_BROADCAST:
4312                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4313                 return 0;
4314         case RTN_BLACKHOLE:
4315                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4316                 return 0;
4317         case RTN_UNREACHABLE: /* fall through */
4318         case RTN_PROHIBIT:
4319                 /* Packets hitting these routes need to be trapped, but
4320                  * can do so with a lower priority than packets directed
4321                  * at the host, so use action type local instead of trap.
4322                  */
4323                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4324                 return 0;
4325         case RTN_UNICAST:
4326                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4327                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4328                 else
4329                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4330                 return 0;
4331         default:
4332                 return -EINVAL;
4333         }
4334 }
4335
4336 static struct mlxsw_sp_fib4_entry *
4337 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4338                            struct mlxsw_sp_fib_node *fib_node,
4339                            const struct fib_entry_notifier_info *fen_info)
4340 {
4341         struct mlxsw_sp_fib4_entry *fib4_entry;
4342         struct mlxsw_sp_fib_entry *fib_entry;
4343         int err;
4344
4345         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4346         if (!fib4_entry)
4347                 return ERR_PTR(-ENOMEM);
4348         fib_entry = &fib4_entry->common;
4349
4350         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4351         if (err)
4352                 goto err_fib4_entry_type_set;
4353
4354         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4355         if (err)
4356                 goto err_nexthop4_group_get;
4357
4358         fib4_entry->prio = fen_info->fi->fib_priority;
4359         fib4_entry->tb_id = fen_info->tb_id;
4360         fib4_entry->type = fen_info->type;
4361         fib4_entry->tos = fen_info->tos;
4362
4363         fib_entry->fib_node = fib_node;
4364
4365         return fib4_entry;
4366
4367 err_nexthop4_group_get:
4368 err_fib4_entry_type_set:
4369         kfree(fib4_entry);
4370         return ERR_PTR(err);
4371 }
4372
4373 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4374                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4375 {
4376         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4377         kfree(fib4_entry);
4378 }
4379
4380 static struct mlxsw_sp_fib4_entry *
4381 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4382                            const struct fib_entry_notifier_info *fen_info)
4383 {
4384         struct mlxsw_sp_fib4_entry *fib4_entry;
4385         struct mlxsw_sp_fib_node *fib_node;
4386         struct mlxsw_sp_fib *fib;
4387         struct mlxsw_sp_vr *vr;
4388
4389         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4390         if (!vr)
4391                 return NULL;
4392         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4393
4394         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4395                                             sizeof(fen_info->dst),
4396                                             fen_info->dst_len);
4397         if (!fib_node)
4398                 return NULL;
4399
4400         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4401                 if (fib4_entry->tb_id == fen_info->tb_id &&
4402                     fib4_entry->tos == fen_info->tos &&
4403                     fib4_entry->type == fen_info->type &&
4404                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4405                     fen_info->fi) {
4406                         return fib4_entry;
4407                 }
4408         }
4409
4410         return NULL;
4411 }
4412
4413 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4414         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4415         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4416         .key_len = sizeof(struct mlxsw_sp_fib_key),
4417         .automatic_shrinking = true,
4418 };
4419
4420 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4421                                     struct mlxsw_sp_fib_node *fib_node)
4422 {
4423         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4424                                       mlxsw_sp_fib_ht_params);
4425 }
4426
4427 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4428                                      struct mlxsw_sp_fib_node *fib_node)
4429 {
4430         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4431                                mlxsw_sp_fib_ht_params);
4432 }
4433
4434 static struct mlxsw_sp_fib_node *
4435 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4436                          size_t addr_len, unsigned char prefix_len)
4437 {
4438         struct mlxsw_sp_fib_key key;
4439
4440         memset(&key, 0, sizeof(key));
4441         memcpy(key.addr, addr, addr_len);
4442         key.prefix_len = prefix_len;
4443         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4444 }
4445
4446 static struct mlxsw_sp_fib_node *
4447 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4448                          size_t addr_len, unsigned char prefix_len)
4449 {
4450         struct mlxsw_sp_fib_node *fib_node;
4451
4452         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4453         if (!fib_node)
4454                 return NULL;
4455
4456         INIT_LIST_HEAD(&fib_node->entry_list);
4457         list_add(&fib_node->list, &fib->node_list);
4458         memcpy(fib_node->key.addr, addr, addr_len);
4459         fib_node->key.prefix_len = prefix_len;
4460
4461         return fib_node;
4462 }
4463
4464 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4465 {
4466         list_del(&fib_node->list);
4467         WARN_ON(!list_empty(&fib_node->entry_list));
4468         kfree(fib_node);
4469 }
4470
4471 static bool
4472 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4473                                  const struct mlxsw_sp_fib_entry *fib_entry)
4474 {
4475         return list_first_entry(&fib_node->entry_list,
4476                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4477 }
4478
4479 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4480                                       struct mlxsw_sp_fib_node *fib_node)
4481 {
4482         struct mlxsw_sp_prefix_usage req_prefix_usage;
4483         struct mlxsw_sp_fib *fib = fib_node->fib;
4484         struct mlxsw_sp_lpm_tree *lpm_tree;
4485         int err;
4486
4487         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4488         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4489                 goto out;
4490
4491         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4492         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4493         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4494                                          fib->proto);
4495         if (IS_ERR(lpm_tree))
4496                 return PTR_ERR(lpm_tree);
4497
4498         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4499         if (err)
4500                 goto err_lpm_tree_replace;
4501
4502 out:
4503         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4504         return 0;
4505
4506 err_lpm_tree_replace:
4507         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4508         return err;
4509 }
4510
4511 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4512                                          struct mlxsw_sp_fib_node *fib_node)
4513 {
4514         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4515         struct mlxsw_sp_prefix_usage req_prefix_usage;
4516         struct mlxsw_sp_fib *fib = fib_node->fib;
4517         int err;
4518
4519         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4520                 return;
4521         /* Try to construct a new LPM tree from the current prefix usage
4522          * minus the unused one. If we fail, continue using the old one.
4523          */
4524         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4525         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4526                                     fib_node->key.prefix_len);
4527         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4528                                          fib->proto);
4529         if (IS_ERR(lpm_tree))
4530                 return;
4531
4532         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4533         if (err)
4534                 goto err_lpm_tree_replace;
4535
4536         return;
4537
4538 err_lpm_tree_replace:
4539         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4540 }
4541
4542 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4543                                   struct mlxsw_sp_fib_node *fib_node,
4544                                   struct mlxsw_sp_fib *fib)
4545 {
4546         int err;
4547
4548         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4549         if (err)
4550                 return err;
4551         fib_node->fib = fib;
4552
4553         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4554         if (err)
4555                 goto err_fib_lpm_tree_link;
4556
4557         return 0;
4558
4559 err_fib_lpm_tree_link:
4560         fib_node->fib = NULL;
4561         mlxsw_sp_fib_node_remove(fib, fib_node);
4562         return err;
4563 }
4564
4565 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4566                                    struct mlxsw_sp_fib_node *fib_node)
4567 {
4568         struct mlxsw_sp_fib *fib = fib_node->fib;
4569
4570         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4571         fib_node->fib = NULL;
4572         mlxsw_sp_fib_node_remove(fib, fib_node);
4573 }
4574
4575 static struct mlxsw_sp_fib_node *
4576 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4577                       size_t addr_len, unsigned char prefix_len,
4578                       enum mlxsw_sp_l3proto proto)
4579 {
4580         struct mlxsw_sp_fib_node *fib_node;
4581         struct mlxsw_sp_fib *fib;
4582         struct mlxsw_sp_vr *vr;
4583         int err;
4584
4585         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4586         if (IS_ERR(vr))
4587                 return ERR_CAST(vr);
4588         fib = mlxsw_sp_vr_fib(vr, proto);
4589
4590         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4591         if (fib_node)
4592                 return fib_node;
4593
4594         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4595         if (!fib_node) {
4596                 err = -ENOMEM;
4597                 goto err_fib_node_create;
4598         }
4599
4600         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4601         if (err)
4602                 goto err_fib_node_init;
4603
4604         return fib_node;
4605
4606 err_fib_node_init:
4607         mlxsw_sp_fib_node_destroy(fib_node);
4608 err_fib_node_create:
4609         mlxsw_sp_vr_put(mlxsw_sp, vr);
4610         return ERR_PTR(err);
4611 }
4612
4613 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4614                                   struct mlxsw_sp_fib_node *fib_node)
4615 {
4616         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4617
4618         if (!list_empty(&fib_node->entry_list))
4619                 return;
4620         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4621         mlxsw_sp_fib_node_destroy(fib_node);
4622         mlxsw_sp_vr_put(mlxsw_sp, vr);
4623 }
4624
4625 static struct mlxsw_sp_fib4_entry *
4626 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4627                               const struct mlxsw_sp_fib4_entry *new4_entry)
4628 {
4629         struct mlxsw_sp_fib4_entry *fib4_entry;
4630
4631         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4632                 if (fib4_entry->tb_id > new4_entry->tb_id)
4633                         continue;
4634                 if (fib4_entry->tb_id != new4_entry->tb_id)
4635                         break;
4636                 if (fib4_entry->tos > new4_entry->tos)
4637                         continue;
4638                 if (fib4_entry->prio >= new4_entry->prio ||
4639                     fib4_entry->tos < new4_entry->tos)
4640                         return fib4_entry;
4641         }
4642
4643         return NULL;
4644 }
4645
4646 static int
4647 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4648                                struct mlxsw_sp_fib4_entry *new4_entry)
4649 {
4650         struct mlxsw_sp_fib_node *fib_node;
4651
4652         if (WARN_ON(!fib4_entry))
4653                 return -EINVAL;
4654
4655         fib_node = fib4_entry->common.fib_node;
4656         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4657                                  common.list) {
4658                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4659                     fib4_entry->tos != new4_entry->tos ||
4660                     fib4_entry->prio != new4_entry->prio)
4661                         break;
4662         }
4663
4664         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4665         return 0;
4666 }
4667
4668 static int
4669 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4670                                bool replace, bool append)
4671 {
4672         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4673         struct mlxsw_sp_fib4_entry *fib4_entry;
4674
4675         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4676
4677         if (append)
4678                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4679         if (replace && WARN_ON(!fib4_entry))
4680                 return -EINVAL;
4681
4682         /* Insert new entry before replaced one, so that we can later
4683          * remove the second.
4684          */
4685         if (fib4_entry) {
4686                 list_add_tail(&new4_entry->common.list,
4687                               &fib4_entry->common.list);
4688         } else {
4689                 struct mlxsw_sp_fib4_entry *last;
4690
4691                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4692                         if (new4_entry->tb_id > last->tb_id)
4693                                 break;
4694                         fib4_entry = last;
4695                 }
4696
4697                 if (fib4_entry)
4698                         list_add(&new4_entry->common.list,
4699                                  &fib4_entry->common.list);
4700                 else
4701                         list_add(&new4_entry->common.list,
4702                                  &fib_node->entry_list);
4703         }
4704
4705         return 0;
4706 }
4707
4708 static void
4709 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4710 {
4711         list_del(&fib4_entry->common.list);
4712 }
4713
4714 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4715                                        struct mlxsw_sp_fib_entry *fib_entry)
4716 {
4717         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4718
4719         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4720                 return 0;
4721
4722         /* To prevent packet loss, overwrite the previously offloaded
4723          * entry.
4724          */
4725         if (!list_is_singular(&fib_node->entry_list)) {
4726                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4727                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4728
4729                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4730         }
4731
4732         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4733 }
4734
4735 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4736                                         struct mlxsw_sp_fib_entry *fib_entry)
4737 {
4738         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4739
4740         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4741                 return;
4742
4743         /* Promote the next entry by overwriting the deleted entry */
4744         if (!list_is_singular(&fib_node->entry_list)) {
4745                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4746                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4747
4748                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4749                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4750                 return;
4751         }
4752
4753         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4754 }
4755
4756 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4757                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4758                                          bool replace, bool append)
4759 {
4760         int err;
4761
4762         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4763         if (err)
4764                 return err;
4765
4766         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4767         if (err)
4768                 goto err_fib_node_entry_add;
4769
4770         return 0;
4771
4772 err_fib_node_entry_add:
4773         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4774         return err;
4775 }
4776
4777 static void
4778 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4779                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4780 {
4781         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4782         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4783
4784         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4785                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4786 }
4787
4788 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4789                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4790                                         bool replace)
4791 {
4792         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4793         struct mlxsw_sp_fib4_entry *replaced;
4794
4795         if (!replace)
4796                 return;
4797
4798         /* We inserted the new entry before replaced one */
4799         replaced = list_next_entry(fib4_entry, common.list);
4800
4801         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4802         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4803         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4804 }
4805
4806 static int
4807 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4808                          const struct fib_entry_notifier_info *fen_info,
4809                          bool replace, bool append)
4810 {
4811         struct mlxsw_sp_fib4_entry *fib4_entry;
4812         struct mlxsw_sp_fib_node *fib_node;
4813         int err;
4814
4815         if (mlxsw_sp->router->aborted)
4816                 return 0;
4817
4818         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4819                                          &fen_info->dst, sizeof(fen_info->dst),
4820                                          fen_info->dst_len,
4821                                          MLXSW_SP_L3_PROTO_IPV4);
4822         if (IS_ERR(fib_node)) {
4823                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4824                 return PTR_ERR(fib_node);
4825         }
4826
4827         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4828         if (IS_ERR(fib4_entry)) {
4829                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4830                 err = PTR_ERR(fib4_entry);
4831                 goto err_fib4_entry_create;
4832         }
4833
4834         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4835                                             append);
4836         if (err) {
4837                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4838                 goto err_fib4_node_entry_link;
4839         }
4840
4841         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4842
4843         return 0;
4844
4845 err_fib4_node_entry_link:
4846         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4847 err_fib4_entry_create:
4848         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4849         return err;
4850 }
4851
4852 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4853                                      struct fib_entry_notifier_info *fen_info)
4854 {
4855         struct mlxsw_sp_fib4_entry *fib4_entry;
4856         struct mlxsw_sp_fib_node *fib_node;
4857
4858         if (mlxsw_sp->router->aborted)
4859                 return;
4860
4861         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4862         if (WARN_ON(!fib4_entry))
4863                 return;
4864         fib_node = fib4_entry->common.fib_node;
4865
4866         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4867         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4868         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4869 }
4870
4871 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4872 {
4873         /* Packets with link-local destination IP arriving to the router
4874          * are trapped to the CPU, so no need to program specific routes
4875          * for them.
4876          */
4877         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4878                 return true;
4879
4880         /* Multicast routes aren't supported, so ignore them. Neighbour
4881          * Discovery packets are specifically trapped.
4882          */
4883         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4884                 return true;
4885
4886         /* Cloned routes are irrelevant in the forwarding path. */
4887         if (rt->fib6_flags & RTF_CACHE)
4888                 return true;
4889
4890         return false;
4891 }
4892
4893 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4894 {
4895         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4896
4897         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4898         if (!mlxsw_sp_rt6)
4899                 return ERR_PTR(-ENOMEM);
4900
4901         /* In case of route replace, replaced route is deleted with
4902          * no notification. Take reference to prevent accessing freed
4903          * memory.
4904          */
4905         mlxsw_sp_rt6->rt = rt;
4906         fib6_info_hold(rt);
4907
4908         return mlxsw_sp_rt6;
4909 }
4910
4911 #if IS_ENABLED(CONFIG_IPV6)
4912 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4913 {
4914         fib6_info_release(rt);
4915 }
4916 #else
4917 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4918 {
4919 }
4920 #endif
4921
4922 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4923 {
4924         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4925         kfree(mlxsw_sp_rt6);
4926 }
4927
4928 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4929 {
4930         /* RTF_CACHE routes are ignored */
4931         return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
4932 }
4933
4934 static struct fib6_info *
4935 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4936 {
4937         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4938                                 list)->rt;
4939 }
4940
4941 static struct mlxsw_sp_fib6_entry *
4942 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4943                                  const struct fib6_info *nrt, bool replace)
4944 {
4945         struct mlxsw_sp_fib6_entry *fib6_entry;
4946
4947         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4948                 return NULL;
4949
4950         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4951                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4952
4953                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4954                  * virtual router.
4955                  */
4956                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4957                         continue;
4958                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4959                         break;
4960                 if (rt->fib6_metric < nrt->fib6_metric)
4961                         continue;
4962                 if (rt->fib6_metric == nrt->fib6_metric &&
4963                     mlxsw_sp_fib6_rt_can_mp(rt))
4964                         return fib6_entry;
4965                 if (rt->fib6_metric > nrt->fib6_metric)
4966                         break;
4967         }
4968
4969         return NULL;
4970 }
4971
4972 static struct mlxsw_sp_rt6 *
4973 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4974                             const struct fib6_info *rt)
4975 {
4976         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4977
4978         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4979                 if (mlxsw_sp_rt6->rt == rt)
4980                         return mlxsw_sp_rt6;
4981         }
4982
4983         return NULL;
4984 }
4985
4986 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4987                                         const struct fib6_info *rt,
4988                                         enum mlxsw_sp_ipip_type *ret)
4989 {
4990         return rt->fib6_nh.fib_nh_dev &&
4991                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
4992 }
4993
4994 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4995                                        struct mlxsw_sp_nexthop_group *nh_grp,
4996                                        struct mlxsw_sp_nexthop *nh,
4997                                        const struct fib6_info *rt)
4998 {
4999         const struct mlxsw_sp_ipip_ops *ipip_ops;
5000         struct mlxsw_sp_ipip_entry *ipip_entry;
5001         struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5002         struct mlxsw_sp_rif *rif;
5003         int err;
5004
5005         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5006         if (ipip_entry) {
5007                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5008                 if (ipip_ops->can_offload(mlxsw_sp, dev,
5009                                           MLXSW_SP_L3_PROTO_IPV6)) {
5010                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5011                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5012                         return 0;
5013                 }
5014         }
5015
5016         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5017         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5018         if (!rif)
5019                 return 0;
5020         mlxsw_sp_nexthop_rif_init(nh, rif);
5021
5022         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5023         if (err)
5024                 goto err_nexthop_neigh_init;
5025
5026         return 0;
5027
5028 err_nexthop_neigh_init:
5029         mlxsw_sp_nexthop_rif_fini(nh);
5030         return err;
5031 }
5032
5033 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5034                                         struct mlxsw_sp_nexthop *nh)
5035 {
5036         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5037 }
5038
5039 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5040                                   struct mlxsw_sp_nexthop_group *nh_grp,
5041                                   struct mlxsw_sp_nexthop *nh,
5042                                   const struct fib6_info *rt)
5043 {
5044         struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5045
5046         nh->nh_grp = nh_grp;
5047         nh->nh_weight = rt->fib6_nh.fib_nh_weight;
5048         memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
5049         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5050
5051         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5052
5053         if (!dev)
5054                 return 0;
5055         nh->ifindex = dev->ifindex;
5056
5057         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5058 }
5059
5060 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5061                                    struct mlxsw_sp_nexthop *nh)
5062 {
5063         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5064         list_del(&nh->router_list_node);
5065         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5066 }
5067
5068 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5069                                     const struct fib6_info *rt)
5070 {
5071         return rt->fib6_nh.fib_nh_gw_family ||
5072                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5073 }
5074
5075 static struct mlxsw_sp_nexthop_group *
5076 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5077                                struct mlxsw_sp_fib6_entry *fib6_entry)
5078 {
5079         struct mlxsw_sp_nexthop_group *nh_grp;
5080         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5081         struct mlxsw_sp_nexthop *nh;
5082         int i = 0;
5083         int err;
5084
5085         nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5086                          GFP_KERNEL);
5087         if (!nh_grp)
5088                 return ERR_PTR(-ENOMEM);
5089         INIT_LIST_HEAD(&nh_grp->fib_list);
5090 #if IS_ENABLED(CONFIG_IPV6)
5091         nh_grp->neigh_tbl = &nd_tbl;
5092 #endif
5093         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5094                                         struct mlxsw_sp_rt6, list);
5095         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5096         nh_grp->count = fib6_entry->nrt6;
5097         for (i = 0; i < nh_grp->count; i++) {
5098                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
5099
5100                 nh = &nh_grp->nexthops[i];
5101                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5102                 if (err)
5103                         goto err_nexthop6_init;
5104                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5105         }
5106
5107         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5108         if (err)
5109                 goto err_nexthop_group_insert;
5110
5111         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5112         return nh_grp;
5113
5114 err_nexthop_group_insert:
5115 err_nexthop6_init:
5116         for (i--; i >= 0; i--) {
5117                 nh = &nh_grp->nexthops[i];
5118                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5119         }
5120         kfree(nh_grp);
5121         return ERR_PTR(err);
5122 }
5123
5124 static void
5125 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5126                                 struct mlxsw_sp_nexthop_group *nh_grp)
5127 {
5128         struct mlxsw_sp_nexthop *nh;
5129         int i = nh_grp->count;
5130
5131         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5132         for (i--; i >= 0; i--) {
5133                 nh = &nh_grp->nexthops[i];
5134                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5135         }
5136         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5137         WARN_ON(nh_grp->adj_index_valid);
5138         kfree(nh_grp);
5139 }
5140
5141 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5142                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5143 {
5144         struct mlxsw_sp_nexthop_group *nh_grp;
5145
5146         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5147         if (!nh_grp) {
5148                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5149                 if (IS_ERR(nh_grp))
5150                         return PTR_ERR(nh_grp);
5151         }
5152
5153         list_add_tail(&fib6_entry->common.nexthop_group_node,
5154                       &nh_grp->fib_list);
5155         fib6_entry->common.nh_group = nh_grp;
5156
5157         return 0;
5158 }
5159
5160 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5161                                         struct mlxsw_sp_fib_entry *fib_entry)
5162 {
5163         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5164
5165         list_del(&fib_entry->nexthop_group_node);
5166         if (!list_empty(&nh_grp->fib_list))
5167                 return;
5168         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5169 }
5170
5171 static int
5172 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5173                                struct mlxsw_sp_fib6_entry *fib6_entry)
5174 {
5175         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5176         int err;
5177
5178         fib6_entry->common.nh_group = NULL;
5179         list_del(&fib6_entry->common.nexthop_group_node);
5180
5181         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5182         if (err)
5183                 goto err_nexthop6_group_get;
5184
5185         /* In case this entry is offloaded, then the adjacency index
5186          * currently associated with it in the device's table is that
5187          * of the old group. Start using the new one instead.
5188          */
5189         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5190         if (err)
5191                 goto err_fib_node_entry_add;
5192
5193         if (list_empty(&old_nh_grp->fib_list))
5194                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5195
5196         return 0;
5197
5198 err_fib_node_entry_add:
5199         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5200 err_nexthop6_group_get:
5201         list_add_tail(&fib6_entry->common.nexthop_group_node,
5202                       &old_nh_grp->fib_list);
5203         fib6_entry->common.nh_group = old_nh_grp;
5204         return err;
5205 }
5206
5207 static int
5208 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5209                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5210                                 struct fib6_info *rt)
5211 {
5212         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5213         int err;
5214
5215         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5216         if (IS_ERR(mlxsw_sp_rt6))
5217                 return PTR_ERR(mlxsw_sp_rt6);
5218
5219         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5220         fib6_entry->nrt6++;
5221
5222         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5223         if (err)
5224                 goto err_nexthop6_group_update;
5225
5226         return 0;
5227
5228 err_nexthop6_group_update:
5229         fib6_entry->nrt6--;
5230         list_del(&mlxsw_sp_rt6->list);
5231         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5232         return err;
5233 }
5234
5235 static void
5236 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5237                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5238                                 struct fib6_info *rt)
5239 {
5240         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5241
5242         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5243         if (WARN_ON(!mlxsw_sp_rt6))
5244                 return;
5245
5246         fib6_entry->nrt6--;
5247         list_del(&mlxsw_sp_rt6->list);
5248         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5249         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5250 }
5251
5252 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5253                                          struct mlxsw_sp_fib_entry *fib_entry,
5254                                          const struct fib6_info *rt)
5255 {
5256         /* Packets hitting RTF_REJECT routes need to be discarded by the
5257          * stack. We can rely on their destination device not having a
5258          * RIF (it's the loopback device) and can thus use action type
5259          * local, which will cause them to be trapped with a lower
5260          * priority than packets that need to be locally received.
5261          */
5262         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5263                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5264         else if (rt->fib6_type == RTN_BLACKHOLE)
5265                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5266         else if (rt->fib6_flags & RTF_REJECT)
5267                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5268         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5269                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5270         else
5271                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5272 }
5273
5274 static void
5275 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5276 {
5277         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5278
5279         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5280                                  list) {
5281                 fib6_entry->nrt6--;
5282                 list_del(&mlxsw_sp_rt6->list);
5283                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5284         }
5285 }
5286
5287 static struct mlxsw_sp_fib6_entry *
5288 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5289                            struct mlxsw_sp_fib_node *fib_node,
5290                            struct fib6_info *rt)
5291 {
5292         struct mlxsw_sp_fib6_entry *fib6_entry;
5293         struct mlxsw_sp_fib_entry *fib_entry;
5294         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5295         int err;
5296
5297         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5298         if (!fib6_entry)
5299                 return ERR_PTR(-ENOMEM);
5300         fib_entry = &fib6_entry->common;
5301
5302         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5303         if (IS_ERR(mlxsw_sp_rt6)) {
5304                 err = PTR_ERR(mlxsw_sp_rt6);
5305                 goto err_rt6_create;
5306         }
5307
5308         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5309
5310         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5311         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5312         fib6_entry->nrt6 = 1;
5313         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5314         if (err)
5315                 goto err_nexthop6_group_get;
5316
5317         fib_entry->fib_node = fib_node;
5318
5319         return fib6_entry;
5320
5321 err_nexthop6_group_get:
5322         list_del(&mlxsw_sp_rt6->list);
5323         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5324 err_rt6_create:
5325         kfree(fib6_entry);
5326         return ERR_PTR(err);
5327 }
5328
5329 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5330                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5331 {
5332         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5333         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5334         WARN_ON(fib6_entry->nrt6);
5335         kfree(fib6_entry);
5336 }
5337
5338 static struct mlxsw_sp_fib6_entry *
5339 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5340                               const struct fib6_info *nrt, bool replace)
5341 {
5342         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5343
5344         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5345                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5346
5347                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5348                         continue;
5349                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5350                         break;
5351                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5352                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5353                             mlxsw_sp_fib6_rt_can_mp(nrt))
5354                                 return fib6_entry;
5355                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5356                                 fallback = fallback ?: fib6_entry;
5357                 }
5358                 if (rt->fib6_metric > nrt->fib6_metric)
5359                         return fallback ?: fib6_entry;
5360         }
5361
5362         return fallback;
5363 }
5364
5365 static int
5366 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5367                                bool replace)
5368 {
5369         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5370         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5371         struct mlxsw_sp_fib6_entry *fib6_entry;
5372
5373         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5374
5375         if (replace && WARN_ON(!fib6_entry))
5376                 return -EINVAL;
5377
5378         if (fib6_entry) {
5379                 list_add_tail(&new6_entry->common.list,
5380                               &fib6_entry->common.list);
5381         } else {
5382                 struct mlxsw_sp_fib6_entry *last;
5383
5384                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5385                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5386
5387                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5388                                 break;
5389                         fib6_entry = last;
5390                 }
5391
5392                 if (fib6_entry)
5393                         list_add(&new6_entry->common.list,
5394                                  &fib6_entry->common.list);
5395                 else
5396                         list_add(&new6_entry->common.list,
5397                                  &fib_node->entry_list);
5398         }
5399
5400         return 0;
5401 }
5402
5403 static void
5404 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5405 {
5406         list_del(&fib6_entry->common.list);
5407 }
5408
5409 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5410                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5411                                          bool replace)
5412 {
5413         int err;
5414
5415         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5416         if (err)
5417                 return err;
5418
5419         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5420         if (err)
5421                 goto err_fib_node_entry_add;
5422
5423         return 0;
5424
5425 err_fib_node_entry_add:
5426         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5427         return err;
5428 }
5429
5430 static void
5431 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5432                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5433 {
5434         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5435         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5436 }
5437
5438 static struct mlxsw_sp_fib6_entry *
5439 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5440                            const struct fib6_info *rt)
5441 {
5442         struct mlxsw_sp_fib6_entry *fib6_entry;
5443         struct mlxsw_sp_fib_node *fib_node;
5444         struct mlxsw_sp_fib *fib;
5445         struct mlxsw_sp_vr *vr;
5446
5447         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5448         if (!vr)
5449                 return NULL;
5450         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5451
5452         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5453                                             sizeof(rt->fib6_dst.addr),
5454                                             rt->fib6_dst.plen);
5455         if (!fib_node)
5456                 return NULL;
5457
5458         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5459                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5460
5461                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5462                     rt->fib6_metric == iter_rt->fib6_metric &&
5463                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5464                         return fib6_entry;
5465         }
5466
5467         return NULL;
5468 }
5469
5470 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5471                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5472                                         bool replace)
5473 {
5474         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5475         struct mlxsw_sp_fib6_entry *replaced;
5476
5477         if (!replace)
5478                 return;
5479
5480         replaced = list_next_entry(fib6_entry, common.list);
5481
5482         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5483         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5484         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5485 }
5486
5487 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5488                                     struct fib6_info *rt, bool replace)
5489 {
5490         struct mlxsw_sp_fib6_entry *fib6_entry;
5491         struct mlxsw_sp_fib_node *fib_node;
5492         int err;
5493
5494         if (mlxsw_sp->router->aborted)
5495                 return 0;
5496
5497         if (rt->fib6_src.plen)
5498                 return -EINVAL;
5499
5500         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5501                 return 0;
5502
5503         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5504                                          &rt->fib6_dst.addr,
5505                                          sizeof(rt->fib6_dst.addr),
5506                                          rt->fib6_dst.plen,
5507                                          MLXSW_SP_L3_PROTO_IPV6);
5508         if (IS_ERR(fib_node))
5509                 return PTR_ERR(fib_node);
5510
5511         /* Before creating a new entry, try to append route to an existing
5512          * multipath entry.
5513          */
5514         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5515         if (fib6_entry) {
5516                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5517                 if (err)
5518                         goto err_fib6_entry_nexthop_add;
5519                 return 0;
5520         }
5521
5522         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5523         if (IS_ERR(fib6_entry)) {
5524                 err = PTR_ERR(fib6_entry);
5525                 goto err_fib6_entry_create;
5526         }
5527
5528         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5529         if (err)
5530                 goto err_fib6_node_entry_link;
5531
5532         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5533
5534         return 0;
5535
5536 err_fib6_node_entry_link:
5537         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5538 err_fib6_entry_create:
5539 err_fib6_entry_nexthop_add:
5540         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5541         return err;
5542 }
5543
5544 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5545                                      struct fib6_info *rt)
5546 {
5547         struct mlxsw_sp_fib6_entry *fib6_entry;
5548         struct mlxsw_sp_fib_node *fib_node;
5549
5550         if (mlxsw_sp->router->aborted)
5551                 return;
5552
5553         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5554                 return;
5555
5556         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5557         if (WARN_ON(!fib6_entry))
5558                 return;
5559
5560         /* If route is part of a multipath entry, but not the last one
5561          * removed, then only reduce its nexthop group.
5562          */
5563         if (!list_is_singular(&fib6_entry->rt6_list)) {
5564                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5565                 return;
5566         }
5567
5568         fib_node = fib6_entry->common.fib_node;
5569
5570         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5571         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5572         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5573 }
5574
5575 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5576                                             enum mlxsw_reg_ralxx_protocol proto,
5577                                             u8 tree_id)
5578 {
5579         char ralta_pl[MLXSW_REG_RALTA_LEN];
5580         char ralst_pl[MLXSW_REG_RALST_LEN];
5581         int i, err;
5582
5583         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5584         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5585         if (err)
5586                 return err;
5587
5588         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5589         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5590         if (err)
5591                 return err;
5592
5593         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5594                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5595                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5596                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5597
5598                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5599                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5600                                       raltb_pl);
5601                 if (err)
5602                         return err;
5603
5604                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5605                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5606                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5607                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5608                                       ralue_pl);
5609                 if (err)
5610                         return err;
5611         }
5612
5613         return 0;
5614 }
5615
5616 static struct mlxsw_sp_mr_table *
5617 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5618 {
5619         if (family == RTNL_FAMILY_IPMR)
5620                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5621         else
5622                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5623 }
5624
5625 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5626                                      struct mfc_entry_notifier_info *men_info,
5627                                      bool replace)
5628 {
5629         struct mlxsw_sp_mr_table *mrt;
5630         struct mlxsw_sp_vr *vr;
5631
5632         if (mlxsw_sp->router->aborted)
5633                 return 0;
5634
5635         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5636         if (IS_ERR(vr))
5637                 return PTR_ERR(vr);
5638
5639         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5640         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5641 }
5642
5643 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5644                                       struct mfc_entry_notifier_info *men_info)
5645 {
5646         struct mlxsw_sp_mr_table *mrt;
5647         struct mlxsw_sp_vr *vr;
5648
5649         if (mlxsw_sp->router->aborted)
5650                 return;
5651
5652         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5653         if (WARN_ON(!vr))
5654                 return;
5655
5656         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5657         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5658         mlxsw_sp_vr_put(mlxsw_sp, vr);
5659 }
5660
5661 static int
5662 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5663                               struct vif_entry_notifier_info *ven_info)
5664 {
5665         struct mlxsw_sp_mr_table *mrt;
5666         struct mlxsw_sp_rif *rif;
5667         struct mlxsw_sp_vr *vr;
5668
5669         if (mlxsw_sp->router->aborted)
5670                 return 0;
5671
5672         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5673         if (IS_ERR(vr))
5674                 return PTR_ERR(vr);
5675
5676         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5677         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5678         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5679                                    ven_info->vif_index,
5680                                    ven_info->vif_flags, rif);
5681 }
5682
5683 static void
5684 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5685                               struct vif_entry_notifier_info *ven_info)
5686 {
5687         struct mlxsw_sp_mr_table *mrt;
5688         struct mlxsw_sp_vr *vr;
5689
5690         if (mlxsw_sp->router->aborted)
5691                 return;
5692
5693         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5694         if (WARN_ON(!vr))
5695                 return;
5696
5697         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5698         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5699         mlxsw_sp_vr_put(mlxsw_sp, vr);
5700 }
5701
5702 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5703 {
5704         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5705         int err;
5706
5707         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5708                                                MLXSW_SP_LPM_TREE_MIN);
5709         if (err)
5710                 return err;
5711
5712         /* The multicast router code does not need an abort trap as by default,
5713          * packets that don't match any routes are trapped to the CPU.
5714          */
5715
5716         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5717         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5718                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5719 }
5720
5721 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5722                                      struct mlxsw_sp_fib_node *fib_node)
5723 {
5724         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5725
5726         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5727                                  common.list) {
5728                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5729
5730                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5731                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5732                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5733                 /* Break when entry list is empty and node was freed.
5734                  * Otherwise, we'll access freed memory in the next
5735                  * iteration.
5736                  */
5737                 if (do_break)
5738                         break;
5739         }
5740 }
5741
5742 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5743                                      struct mlxsw_sp_fib_node *fib_node)
5744 {
5745         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5746
5747         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5748                                  common.list) {
5749                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5750
5751                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5752                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5753                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5754                 if (do_break)
5755                         break;
5756         }
5757 }
5758
5759 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5760                                     struct mlxsw_sp_fib_node *fib_node)
5761 {
5762         switch (fib_node->fib->proto) {
5763         case MLXSW_SP_L3_PROTO_IPV4:
5764                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5765                 break;
5766         case MLXSW_SP_L3_PROTO_IPV6:
5767                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5768                 break;
5769         }
5770 }
5771
5772 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5773                                   struct mlxsw_sp_vr *vr,
5774                                   enum mlxsw_sp_l3proto proto)
5775 {
5776         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5777         struct mlxsw_sp_fib_node *fib_node, *tmp;
5778
5779         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5780                 bool do_break = &tmp->list == &fib->node_list;
5781
5782                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5783                 if (do_break)
5784                         break;
5785         }
5786 }
5787
5788 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5789 {
5790         int i, j;
5791
5792         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5793                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5794
5795                 if (!mlxsw_sp_vr_is_used(vr))
5796                         continue;
5797
5798                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5799                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5800                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5801
5802                 /* If virtual router was only used for IPv4, then it's no
5803                  * longer used.
5804                  */
5805                 if (!mlxsw_sp_vr_is_used(vr))
5806                         continue;
5807                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5808         }
5809 }
5810
5811 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5812 {
5813         int err;
5814
5815         if (mlxsw_sp->router->aborted)
5816                 return;
5817         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5818         mlxsw_sp_router_fib_flush(mlxsw_sp);
5819         mlxsw_sp->router->aborted = true;
5820         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5821         if (err)
5822                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5823 }
5824
5825 struct mlxsw_sp_fib_event_work {
5826         struct work_struct work;
5827         union {
5828                 struct fib6_entry_notifier_info fen6_info;
5829                 struct fib_entry_notifier_info fen_info;
5830                 struct fib_rule_notifier_info fr_info;
5831                 struct fib_nh_notifier_info fnh_info;
5832                 struct mfc_entry_notifier_info men_info;
5833                 struct vif_entry_notifier_info ven_info;
5834         };
5835         struct mlxsw_sp *mlxsw_sp;
5836         unsigned long event;
5837 };
5838
5839 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5840 {
5841         struct mlxsw_sp_fib_event_work *fib_work =
5842                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5843         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5844         bool replace, append;
5845         int err;
5846
5847         /* Protect internal structures from changes */
5848         rtnl_lock();
5849         mlxsw_sp_span_respin(mlxsw_sp);
5850
5851         switch (fib_work->event) {
5852         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5853         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5854         case FIB_EVENT_ENTRY_ADD:
5855                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5856                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5857                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5858                                                replace, append);
5859                 if (err)
5860                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5861                 fib_info_put(fib_work->fen_info.fi);
5862                 break;
5863         case FIB_EVENT_ENTRY_DEL:
5864                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5865                 fib_info_put(fib_work->fen_info.fi);
5866                 break;
5867         case FIB_EVENT_RULE_ADD:
5868                 /* if we get here, a rule was added that we do not support.
5869                  * just do the fib_abort
5870                  */
5871                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5872                 break;
5873         case FIB_EVENT_NH_ADD: /* fall through */
5874         case FIB_EVENT_NH_DEL:
5875                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5876                                         fib_work->fnh_info.fib_nh);
5877                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5878                 break;
5879         }
5880         rtnl_unlock();
5881         kfree(fib_work);
5882 }
5883
5884 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5885 {
5886         struct mlxsw_sp_fib_event_work *fib_work =
5887                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5888         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5889         bool replace;
5890         int err;
5891
5892         rtnl_lock();
5893         mlxsw_sp_span_respin(mlxsw_sp);
5894
5895         switch (fib_work->event) {
5896         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5897         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5898         case FIB_EVENT_ENTRY_ADD:
5899                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5900                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5901                                                fib_work->fen6_info.rt, replace);
5902                 if (err)
5903                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5904                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5905                 break;
5906         case FIB_EVENT_ENTRY_DEL:
5907                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5908                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5909                 break;
5910         case FIB_EVENT_RULE_ADD:
5911                 /* if we get here, a rule was added that we do not support.
5912                  * just do the fib_abort
5913                  */
5914                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5915                 break;
5916         }
5917         rtnl_unlock();
5918         kfree(fib_work);
5919 }
5920
5921 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5922 {
5923         struct mlxsw_sp_fib_event_work *fib_work =
5924                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5925         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5926         bool replace;
5927         int err;
5928
5929         rtnl_lock();
5930         switch (fib_work->event) {
5931         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5932         case FIB_EVENT_ENTRY_ADD:
5933                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5934
5935                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5936                                                 replace);
5937                 if (err)
5938                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5939                 mr_cache_put(fib_work->men_info.mfc);
5940                 break;
5941         case FIB_EVENT_ENTRY_DEL:
5942                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5943                 mr_cache_put(fib_work->men_info.mfc);
5944                 break;
5945         case FIB_EVENT_VIF_ADD:
5946                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5947                                                     &fib_work->ven_info);
5948                 if (err)
5949                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5950                 dev_put(fib_work->ven_info.dev);
5951                 break;
5952         case FIB_EVENT_VIF_DEL:
5953                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5954                                               &fib_work->ven_info);
5955                 dev_put(fib_work->ven_info.dev);
5956                 break;
5957         case FIB_EVENT_RULE_ADD:
5958                 /* if we get here, a rule was added that we do not support.
5959                  * just do the fib_abort
5960                  */
5961                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5962                 break;
5963         }
5964         rtnl_unlock();
5965         kfree(fib_work);
5966 }
5967
5968 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5969                                        struct fib_notifier_info *info)
5970 {
5971         struct fib_entry_notifier_info *fen_info;
5972         struct fib_nh_notifier_info *fnh_info;
5973
5974         switch (fib_work->event) {
5975         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5976         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5977         case FIB_EVENT_ENTRY_ADD: /* fall through */
5978         case FIB_EVENT_ENTRY_DEL:
5979                 fen_info = container_of(info, struct fib_entry_notifier_info,
5980                                         info);
5981                 fib_work->fen_info = *fen_info;
5982                 /* Take reference on fib_info to prevent it from being
5983                  * freed while work is queued. Release it afterwards.
5984                  */
5985                 fib_info_hold(fib_work->fen_info.fi);
5986                 break;
5987         case FIB_EVENT_NH_ADD: /* fall through */
5988         case FIB_EVENT_NH_DEL:
5989                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5990                                         info);
5991                 fib_work->fnh_info = *fnh_info;
5992                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5993                 break;
5994         }
5995 }
5996
5997 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5998                                        struct fib_notifier_info *info)
5999 {
6000         struct fib6_entry_notifier_info *fen6_info;
6001
6002         switch (fib_work->event) {
6003         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6004         case FIB_EVENT_ENTRY_APPEND: /* fall through */
6005         case FIB_EVENT_ENTRY_ADD: /* fall through */
6006         case FIB_EVENT_ENTRY_DEL:
6007                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
6008                                          info);
6009                 fib_work->fen6_info = *fen6_info;
6010                 fib6_info_hold(fib_work->fen6_info.rt);
6011                 break;
6012         }
6013 }
6014
6015 static void
6016 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6017                             struct fib_notifier_info *info)
6018 {
6019         switch (fib_work->event) {
6020         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6021         case FIB_EVENT_ENTRY_ADD: /* fall through */
6022         case FIB_EVENT_ENTRY_DEL:
6023                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6024                 mr_cache_hold(fib_work->men_info.mfc);
6025                 break;
6026         case FIB_EVENT_VIF_ADD: /* fall through */
6027         case FIB_EVENT_VIF_DEL:
6028                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6029                 dev_hold(fib_work->ven_info.dev);
6030                 break;
6031         }
6032 }
6033
6034 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6035                                           struct fib_notifier_info *info,
6036                                           struct mlxsw_sp *mlxsw_sp)
6037 {
6038         struct netlink_ext_ack *extack = info->extack;
6039         struct fib_rule_notifier_info *fr_info;
6040         struct fib_rule *rule;
6041         int err = 0;
6042
6043         /* nothing to do at the moment */
6044         if (event == FIB_EVENT_RULE_DEL)
6045                 return 0;
6046
6047         if (mlxsw_sp->router->aborted)
6048                 return 0;
6049
6050         fr_info = container_of(info, struct fib_rule_notifier_info, info);
6051         rule = fr_info->rule;
6052
6053         switch (info->family) {
6054         case AF_INET:
6055                 if (!fib4_rule_default(rule) && !rule->l3mdev)
6056                         err = -EOPNOTSUPP;
6057                 break;
6058         case AF_INET6:
6059                 if (!fib6_rule_default(rule) && !rule->l3mdev)
6060                         err = -EOPNOTSUPP;
6061                 break;
6062         case RTNL_FAMILY_IPMR:
6063                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
6064                         err = -EOPNOTSUPP;
6065                 break;
6066         case RTNL_FAMILY_IP6MR:
6067                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6068                         err = -EOPNOTSUPP;
6069                 break;
6070         }
6071
6072         if (err < 0)
6073                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6074
6075         return err;
6076 }
6077
6078 /* Called with rcu_read_lock() */
6079 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6080                                      unsigned long event, void *ptr)
6081 {
6082         struct mlxsw_sp_fib_event_work *fib_work;
6083         struct fib_notifier_info *info = ptr;
6084         struct mlxsw_sp_router *router;
6085         int err;
6086
6087         if (!net_eq(info->net, &init_net) ||
6088             (info->family != AF_INET && info->family != AF_INET6 &&
6089              info->family != RTNL_FAMILY_IPMR &&
6090              info->family != RTNL_FAMILY_IP6MR))
6091                 return NOTIFY_DONE;
6092
6093         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6094
6095         switch (event) {
6096         case FIB_EVENT_RULE_ADD: /* fall through */
6097         case FIB_EVENT_RULE_DEL:
6098                 err = mlxsw_sp_router_fib_rule_event(event, info,
6099                                                      router->mlxsw_sp);
6100                 if (!err || info->extack)
6101                         return notifier_from_errno(err);
6102                 break;
6103         case FIB_EVENT_ENTRY_ADD:
6104                 if (router->aborted) {
6105                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6106                         return notifier_from_errno(-EINVAL);
6107                 }
6108                 if (info->family == AF_INET) {
6109                         struct fib_entry_notifier_info *fen_info = ptr;
6110
6111                         if (fen_info->fi->fib_nh_is_v6) {
6112                                 NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6113                                 return notifier_from_errno(-EINVAL);
6114                         }
6115                 }
6116                 break;
6117         }
6118
6119         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6120         if (WARN_ON(!fib_work))
6121                 return NOTIFY_BAD;
6122
6123         fib_work->mlxsw_sp = router->mlxsw_sp;
6124         fib_work->event = event;
6125
6126         switch (info->family) {
6127         case AF_INET:
6128                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6129                 mlxsw_sp_router_fib4_event(fib_work, info);
6130                 break;
6131         case AF_INET6:
6132                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6133                 mlxsw_sp_router_fib6_event(fib_work, info);
6134                 break;
6135         case RTNL_FAMILY_IP6MR:
6136         case RTNL_FAMILY_IPMR:
6137                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6138                 mlxsw_sp_router_fibmr_event(fib_work, info);
6139                 break;
6140         }
6141
6142         mlxsw_core_schedule_work(&fib_work->work);
6143
6144         return NOTIFY_DONE;
6145 }
6146
6147 struct mlxsw_sp_rif *
6148 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6149                          const struct net_device *dev)
6150 {
6151         int i;
6152
6153         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6154                 if (mlxsw_sp->router->rifs[i] &&
6155                     mlxsw_sp->router->rifs[i]->dev == dev)
6156                         return mlxsw_sp->router->rifs[i];
6157
6158         return NULL;
6159 }
6160
6161 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6162 {
6163         char ritr_pl[MLXSW_REG_RITR_LEN];
6164         int err;
6165
6166         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6167         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6168         if (err)
6169                 return err;
6170
6171         mlxsw_reg_ritr_enable_set(ritr_pl, false);
6172         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6173 }
6174
6175 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6176                                           struct mlxsw_sp_rif *rif)
6177 {
6178         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6179         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6180         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6181 }
6182
6183 static bool
6184 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6185                            unsigned long event)
6186 {
6187         struct inet6_dev *inet6_dev;
6188         bool addr_list_empty = true;
6189         struct in_device *idev;
6190
6191         switch (event) {
6192         case NETDEV_UP:
6193                 return rif == NULL;
6194         case NETDEV_DOWN:
6195                 idev = __in_dev_get_rtnl(dev);
6196                 if (idev && idev->ifa_list)
6197                         addr_list_empty = false;
6198
6199                 inet6_dev = __in6_dev_get(dev);
6200                 if (addr_list_empty && inet6_dev &&
6201                     !list_empty(&inet6_dev->addr_list))
6202                         addr_list_empty = false;
6203
6204                 /* macvlans do not have a RIF, but rather piggy back on the
6205                  * RIF of their lower device.
6206                  */
6207                 if (netif_is_macvlan(dev) && addr_list_empty)
6208                         return true;
6209
6210                 if (rif && addr_list_empty &&
6211                     !netif_is_l3_slave(rif->dev))
6212                         return true;
6213                 /* It is possible we already removed the RIF ourselves
6214                  * if it was assigned to a netdev that is now a bridge
6215                  * or LAG slave.
6216                  */
6217                 return false;
6218         }
6219
6220         return false;
6221 }
6222
6223 static enum mlxsw_sp_rif_type
6224 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6225                       const struct net_device *dev)
6226 {
6227         enum mlxsw_sp_fid_type type;
6228
6229         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6230                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6231
6232         /* Otherwise RIF type is derived from the type of the underlying FID. */
6233         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6234                 type = MLXSW_SP_FID_TYPE_8021Q;
6235         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6236                 type = MLXSW_SP_FID_TYPE_8021Q;
6237         else if (netif_is_bridge_master(dev))
6238                 type = MLXSW_SP_FID_TYPE_8021D;
6239         else
6240                 type = MLXSW_SP_FID_TYPE_RFID;
6241
6242         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6243 }
6244
6245 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6246 {
6247         int i;
6248
6249         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6250                 if (!mlxsw_sp->router->rifs[i]) {
6251                         *p_rif_index = i;
6252                         return 0;
6253                 }
6254         }
6255
6256         return -ENOBUFS;
6257 }
6258
6259 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6260                                                u16 vr_id,
6261                                                struct net_device *l3_dev)
6262 {
6263         struct mlxsw_sp_rif *rif;
6264
6265         rif = kzalloc(rif_size, GFP_KERNEL);
6266         if (!rif)
6267                 return NULL;
6268
6269         INIT_LIST_HEAD(&rif->nexthop_list);
6270         INIT_LIST_HEAD(&rif->neigh_list);
6271         if (l3_dev) {
6272                 ether_addr_copy(rif->addr, l3_dev->dev_addr);
6273                 rif->mtu = l3_dev->mtu;
6274                 rif->dev = l3_dev;
6275         }
6276         rif->vr_id = vr_id;
6277         rif->rif_index = rif_index;
6278
6279         return rif;
6280 }
6281
6282 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6283                                            u16 rif_index)
6284 {
6285         return mlxsw_sp->router->rifs[rif_index];
6286 }
6287
6288 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6289 {
6290         return rif->rif_index;
6291 }
6292
6293 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6294 {
6295         return lb_rif->common.rif_index;
6296 }
6297
6298 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6299 {
6300         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6301         struct mlxsw_sp_vr *ul_vr;
6302
6303         ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6304         if (WARN_ON(IS_ERR(ul_vr)))
6305                 return 0;
6306
6307         return ul_vr->id;
6308 }
6309
6310 u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6311 {
6312         return lb_rif->ul_rif_id;
6313 }
6314
6315 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6316 {
6317         return rif->dev->ifindex;
6318 }
6319
6320 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6321 {
6322         return rif->dev;
6323 }
6324
6325 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6326 {
6327         return rif->fid;
6328 }
6329
6330 static struct mlxsw_sp_rif *
6331 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6332                     const struct mlxsw_sp_rif_params *params,
6333                     struct netlink_ext_ack *extack)
6334 {
6335         u32 tb_id = l3mdev_fib_table(params->dev);
6336         const struct mlxsw_sp_rif_ops *ops;
6337         struct mlxsw_sp_fid *fid = NULL;
6338         enum mlxsw_sp_rif_type type;
6339         struct mlxsw_sp_rif *rif;
6340         struct mlxsw_sp_vr *vr;
6341         u16 rif_index;
6342         int i, err;
6343
6344         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6345         ops = mlxsw_sp->rif_ops_arr[type];
6346
6347         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6348         if (IS_ERR(vr))
6349                 return ERR_CAST(vr);
6350         vr->rif_count++;
6351
6352         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6353         if (err) {
6354                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6355                 goto err_rif_index_alloc;
6356         }
6357
6358         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6359         if (!rif) {
6360                 err = -ENOMEM;
6361                 goto err_rif_alloc;
6362         }
6363         dev_hold(rif->dev);
6364         mlxsw_sp->router->rifs[rif_index] = rif;
6365         rif->mlxsw_sp = mlxsw_sp;
6366         rif->ops = ops;
6367
6368         if (ops->fid_get) {
6369                 fid = ops->fid_get(rif, extack);
6370                 if (IS_ERR(fid)) {
6371                         err = PTR_ERR(fid);
6372                         goto err_fid_get;
6373                 }
6374                 rif->fid = fid;
6375         }
6376
6377         if (ops->setup)
6378                 ops->setup(rif, params);
6379
6380         err = ops->configure(rif);
6381         if (err)
6382                 goto err_configure;
6383
6384         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6385                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6386                 if (err)
6387                         goto err_mr_rif_add;
6388         }
6389
6390         mlxsw_sp_rif_counters_alloc(rif);
6391
6392         return rif;
6393
6394 err_mr_rif_add:
6395         for (i--; i >= 0; i--)
6396                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6397         ops->deconfigure(rif);
6398 err_configure:
6399         if (fid)
6400                 mlxsw_sp_fid_put(fid);
6401 err_fid_get:
6402         mlxsw_sp->router->rifs[rif_index] = NULL;
6403         dev_put(rif->dev);
6404         kfree(rif);
6405 err_rif_alloc:
6406 err_rif_index_alloc:
6407         vr->rif_count--;
6408         mlxsw_sp_vr_put(mlxsw_sp, vr);
6409         return ERR_PTR(err);
6410 }
6411
6412 static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6413 {
6414         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6415         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6416         struct mlxsw_sp_fid *fid = rif->fid;
6417         struct mlxsw_sp_vr *vr;
6418         int i;
6419
6420         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6421         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6422
6423         mlxsw_sp_rif_counters_free(rif);
6424         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6425                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6426         ops->deconfigure(rif);
6427         if (fid)
6428                 /* Loopback RIFs are not associated with a FID. */
6429                 mlxsw_sp_fid_put(fid);
6430         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6431         dev_put(rif->dev);
6432         kfree(rif);
6433         vr->rif_count--;
6434         mlxsw_sp_vr_put(mlxsw_sp, vr);
6435 }
6436
6437 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6438                                  struct net_device *dev)
6439 {
6440         struct mlxsw_sp_rif *rif;
6441
6442         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6443         if (!rif)
6444                 return;
6445         mlxsw_sp_rif_destroy(rif);
6446 }
6447
6448 static void
6449 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6450                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6451 {
6452         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6453
6454         params->vid = mlxsw_sp_port_vlan->vid;
6455         params->lag = mlxsw_sp_port->lagged;
6456         if (params->lag)
6457                 params->lag_id = mlxsw_sp_port->lag_id;
6458         else
6459                 params->system_port = mlxsw_sp_port->local_port;
6460 }
6461
6462 static struct mlxsw_sp_rif_subport *
6463 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6464 {
6465         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6466 }
6467
6468 static struct mlxsw_sp_rif *
6469 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6470                          const struct mlxsw_sp_rif_params *params,
6471                          struct netlink_ext_ack *extack)
6472 {
6473         struct mlxsw_sp_rif_subport *rif_subport;
6474         struct mlxsw_sp_rif *rif;
6475
6476         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6477         if (!rif)
6478                 return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6479
6480         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6481         refcount_inc(&rif_subport->ref_count);
6482         return rif;
6483 }
6484
6485 static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6486 {
6487         struct mlxsw_sp_rif_subport *rif_subport;
6488
6489         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6490         if (!refcount_dec_and_test(&rif_subport->ref_count))
6491                 return;
6492
6493         mlxsw_sp_rif_destroy(rif);
6494 }
6495
6496 static int
6497 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6498                                struct net_device *l3_dev,
6499                                struct netlink_ext_ack *extack)
6500 {
6501         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6502         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6503         struct mlxsw_sp_rif_params params = {
6504                 .dev = l3_dev,
6505         };
6506         u16 vid = mlxsw_sp_port_vlan->vid;
6507         struct mlxsw_sp_rif *rif;
6508         struct mlxsw_sp_fid *fid;
6509         int err;
6510
6511         mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6512         rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6513         if (IS_ERR(rif))
6514                 return PTR_ERR(rif);
6515
6516         /* FID was already created, just take a reference */
6517         fid = rif->ops->fid_get(rif, extack);
6518         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6519         if (err)
6520                 goto err_fid_port_vid_map;
6521
6522         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6523         if (err)
6524                 goto err_port_vid_learning_set;
6525
6526         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6527                                         BR_STATE_FORWARDING);
6528         if (err)
6529                 goto err_port_vid_stp_set;
6530
6531         mlxsw_sp_port_vlan->fid = fid;
6532
6533         return 0;
6534
6535 err_port_vid_stp_set:
6536         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6537 err_port_vid_learning_set:
6538         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6539 err_fid_port_vid_map:
6540         mlxsw_sp_fid_put(fid);
6541         mlxsw_sp_rif_subport_put(rif);
6542         return err;
6543 }
6544
6545 void
6546 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6547 {
6548         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6549         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6550         struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6551         u16 vid = mlxsw_sp_port_vlan->vid;
6552
6553         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6554                 return;
6555
6556         mlxsw_sp_port_vlan->fid = NULL;
6557         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6558         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6559         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6560         mlxsw_sp_fid_put(fid);
6561         mlxsw_sp_rif_subport_put(rif);
6562 }
6563
6564 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6565                                              struct net_device *port_dev,
6566                                              unsigned long event, u16 vid,
6567                                              struct netlink_ext_ack *extack)
6568 {
6569         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6570         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6571
6572         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6573         if (WARN_ON(!mlxsw_sp_port_vlan))
6574                 return -EINVAL;
6575
6576         switch (event) {
6577         case NETDEV_UP:
6578                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6579                                                       l3_dev, extack);
6580         case NETDEV_DOWN:
6581                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6582                 break;
6583         }
6584
6585         return 0;
6586 }
6587
6588 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6589                                         unsigned long event,
6590                                         struct netlink_ext_ack *extack)
6591 {
6592         if (netif_is_bridge_port(port_dev) ||
6593             netif_is_lag_port(port_dev) ||
6594             netif_is_ovs_port(port_dev))
6595                 return 0;
6596
6597         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6598                                                  MLXSW_SP_DEFAULT_VID, extack);
6599 }
6600
6601 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6602                                          struct net_device *lag_dev,
6603                                          unsigned long event, u16 vid,
6604                                          struct netlink_ext_ack *extack)
6605 {
6606         struct net_device *port_dev;
6607         struct list_head *iter;
6608         int err;
6609
6610         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6611                 if (mlxsw_sp_port_dev_check(port_dev)) {
6612                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6613                                                                 port_dev,
6614                                                                 event, vid,
6615                                                                 extack);
6616                         if (err)
6617                                 return err;
6618                 }
6619         }
6620
6621         return 0;
6622 }
6623
6624 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6625                                        unsigned long event,
6626                                        struct netlink_ext_ack *extack)
6627 {
6628         if (netif_is_bridge_port(lag_dev))
6629                 return 0;
6630
6631         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6632                                              MLXSW_SP_DEFAULT_VID, extack);
6633 }
6634
6635 static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6636                                           struct net_device *l3_dev,
6637                                           unsigned long event,
6638                                           struct netlink_ext_ack *extack)
6639 {
6640         struct mlxsw_sp_rif_params params = {
6641                 .dev = l3_dev,
6642         };
6643         struct mlxsw_sp_rif *rif;
6644
6645         switch (event) {
6646         case NETDEV_UP:
6647                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6648                 if (IS_ERR(rif))
6649                         return PTR_ERR(rif);
6650                 break;
6651         case NETDEV_DOWN:
6652                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6653                 mlxsw_sp_rif_destroy(rif);
6654                 break;
6655         }
6656
6657         return 0;
6658 }
6659
6660 static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6661                                         struct net_device *vlan_dev,
6662                                         unsigned long event,
6663                                         struct netlink_ext_ack *extack)
6664 {
6665         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6666         u16 vid = vlan_dev_vlan_id(vlan_dev);
6667
6668         if (netif_is_bridge_port(vlan_dev))
6669                 return 0;
6670
6671         if (mlxsw_sp_port_dev_check(real_dev))
6672                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6673                                                          event, vid, extack);
6674         else if (netif_is_lag_master(real_dev))
6675                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6676                                                      vid, extack);
6677         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6678                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6679                                                       extack);
6680
6681         return 0;
6682 }
6683
6684 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6685 {
6686         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6687         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6688
6689         return ether_addr_equal_masked(mac, vrrp4, mask);
6690 }
6691
6692 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6693 {
6694         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6695         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6696
6697         return ether_addr_equal_masked(mac, vrrp6, mask);
6698 }
6699
6700 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6701                                 const u8 *mac, bool adding)
6702 {
6703         char ritr_pl[MLXSW_REG_RITR_LEN];
6704         u8 vrrp_id = adding ? mac[5] : 0;
6705         int err;
6706
6707         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6708             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6709                 return 0;
6710
6711         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6712         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6713         if (err)
6714                 return err;
6715
6716         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6717                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6718         else
6719                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6720
6721         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6722 }
6723
6724 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6725                                     const struct net_device *macvlan_dev,
6726                                     struct netlink_ext_ack *extack)
6727 {
6728         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6729         struct mlxsw_sp_rif *rif;
6730         int err;
6731
6732         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6733         if (!rif) {
6734                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6735                 return -EOPNOTSUPP;
6736         }
6737
6738         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6739                                   mlxsw_sp_fid_index(rif->fid), true);
6740         if (err)
6741                 return err;
6742
6743         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6744                                    macvlan_dev->dev_addr, true);
6745         if (err)
6746                 goto err_rif_vrrp_add;
6747
6748         /* Make sure the bridge driver does not have this MAC pointing at
6749          * some other port.
6750          */
6751         if (rif->ops->fdb_del)
6752                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6753
6754         return 0;
6755
6756 err_rif_vrrp_add:
6757         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6758                             mlxsw_sp_fid_index(rif->fid), false);
6759         return err;
6760 }
6761
6762 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6763                               const struct net_device *macvlan_dev)
6764 {
6765         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6766         struct mlxsw_sp_rif *rif;
6767
6768         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6769         /* If we do not have a RIF, then we already took care of
6770          * removing the macvlan's MAC during RIF deletion.
6771          */
6772         if (!rif)
6773                 return;
6774         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6775                              false);
6776         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6777                             mlxsw_sp_fid_index(rif->fid), false);
6778 }
6779
6780 static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6781                                            struct net_device *macvlan_dev,
6782                                            unsigned long event,
6783                                            struct netlink_ext_ack *extack)
6784 {
6785         switch (event) {
6786         case NETDEV_UP:
6787                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6788         case NETDEV_DOWN:
6789                 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6790                 break;
6791         }
6792
6793         return 0;
6794 }
6795
6796 static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6797                                                struct net_device *dev,
6798                                                const unsigned char *dev_addr,
6799                                                struct netlink_ext_ack *extack)
6800 {
6801         struct mlxsw_sp_rif *rif;
6802         int i;
6803
6804         /* A RIF is not created for macvlan netdevs. Their MAC is used to
6805          * populate the FDB
6806          */
6807         if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6808                 return 0;
6809
6810         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6811                 rif = mlxsw_sp->router->rifs[i];
6812                 if (rif && rif->dev && rif->dev != dev &&
6813                     !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6814                                              mlxsw_sp->mac_mask)) {
6815                         NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6816                         return -EINVAL;
6817                 }
6818         }
6819
6820         return 0;
6821 }
6822
6823 static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6824                                      struct net_device *dev,
6825                                      unsigned long event,
6826                                      struct netlink_ext_ack *extack)
6827 {
6828         if (mlxsw_sp_port_dev_check(dev))
6829                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6830         else if (netif_is_lag_master(dev))
6831                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6832         else if (netif_is_bridge_master(dev))
6833                 return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6834                                                       extack);
6835         else if (is_vlan_dev(dev))
6836                 return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6837                                                     extack);
6838         else if (netif_is_macvlan(dev))
6839                 return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6840                                                        extack);
6841         else
6842                 return 0;
6843 }
6844
6845 static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6846                                    unsigned long event, void *ptr)
6847 {
6848         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6849         struct net_device *dev = ifa->ifa_dev->dev;
6850         struct mlxsw_sp_router *router;
6851         struct mlxsw_sp_rif *rif;
6852         int err = 0;
6853
6854         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6855         if (event == NETDEV_UP)
6856                 goto out;
6857
6858         router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6859         rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6860         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6861                 goto out;
6862
6863         err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6864 out:
6865         return notifier_from_errno(err);
6866 }
6867
6868 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6869                                   unsigned long event, void *ptr)
6870 {
6871         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6872         struct net_device *dev = ivi->ivi_dev->dev;
6873         struct mlxsw_sp *mlxsw_sp;
6874         struct mlxsw_sp_rif *rif;
6875         int err = 0;
6876
6877         mlxsw_sp = mlxsw_sp_lower_get(dev);
6878         if (!mlxsw_sp)
6879                 goto out;
6880
6881         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6882         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6883                 goto out;
6884
6885         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6886                                                   ivi->extack);
6887         if (err)
6888                 goto out;
6889
6890         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6891 out:
6892         return notifier_from_errno(err);
6893 }
6894
6895 struct mlxsw_sp_inet6addr_event_work {
6896         struct work_struct work;
6897         struct mlxsw_sp *mlxsw_sp;
6898         struct net_device *dev;
6899         unsigned long event;
6900 };
6901
6902 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6903 {
6904         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6905                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6906         struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6907         struct net_device *dev = inet6addr_work->dev;
6908         unsigned long event = inet6addr_work->event;
6909         struct mlxsw_sp_rif *rif;
6910
6911         rtnl_lock();
6912
6913         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6914         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6915                 goto out;
6916
6917         __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
6918 out:
6919         rtnl_unlock();
6920         dev_put(dev);
6921         kfree(inet6addr_work);
6922 }
6923
6924 /* Called with rcu_read_lock() */
6925 static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
6926                                     unsigned long event, void *ptr)
6927 {
6928         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6929         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6930         struct net_device *dev = if6->idev->dev;
6931         struct mlxsw_sp_router *router;
6932
6933         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6934         if (event == NETDEV_UP)
6935                 return NOTIFY_DONE;
6936
6937         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6938         if (!inet6addr_work)
6939                 return NOTIFY_BAD;
6940
6941         router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
6942         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6943         inet6addr_work->mlxsw_sp = router->mlxsw_sp;
6944         inet6addr_work->dev = dev;
6945         inet6addr_work->event = event;
6946         dev_hold(dev);
6947         mlxsw_core_schedule_work(&inet6addr_work->work);
6948
6949         return NOTIFY_DONE;
6950 }
6951
6952 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6953                                    unsigned long event, void *ptr)
6954 {
6955         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6956         struct net_device *dev = i6vi->i6vi_dev->dev;
6957         struct mlxsw_sp *mlxsw_sp;
6958         struct mlxsw_sp_rif *rif;
6959         int err = 0;
6960
6961         mlxsw_sp = mlxsw_sp_lower_get(dev);
6962         if (!mlxsw_sp)
6963                 goto out;
6964
6965         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6966         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6967                 goto out;
6968
6969         err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6970                                                   i6vi->extack);
6971         if (err)
6972                 goto out;
6973
6974         err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
6975 out:
6976         return notifier_from_errno(err);
6977 }
6978
6979 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6980                              const char *mac, int mtu)
6981 {
6982         char ritr_pl[MLXSW_REG_RITR_LEN];
6983         int err;
6984
6985         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6986         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6987         if (err)
6988                 return err;
6989
6990         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6991         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6992         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6993         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6994 }
6995
6996 static int
6997 mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
6998                                   struct mlxsw_sp_rif *rif)
6999 {
7000         struct net_device *dev = rif->dev;
7001         u16 fid_index;
7002         int err;
7003
7004         fid_index = mlxsw_sp_fid_index(rif->fid);
7005
7006         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7007         if (err)
7008                 return err;
7009
7010         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7011                                 dev->mtu);
7012         if (err)
7013                 goto err_rif_edit;
7014
7015         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7016         if (err)
7017                 goto err_rif_fdb_op;
7018
7019         if (rif->mtu != dev->mtu) {
7020                 struct mlxsw_sp_vr *vr;
7021                 int i;
7022
7023                 /* The RIF is relevant only to its mr_table instance, as unlike
7024                  * unicast routing, in multicast routing a RIF cannot be shared
7025                  * between several multicast routing tables.
7026                  */
7027                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
7028                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7029                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7030                                                    rif, dev->mtu);
7031         }
7032
7033         ether_addr_copy(rif->addr, dev->dev_addr);
7034         rif->mtu = dev->mtu;
7035
7036         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7037
7038         return 0;
7039
7040 err_rif_fdb_op:
7041         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7042 err_rif_edit:
7043         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7044         return err;
7045 }
7046
7047 static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7048                             struct netdev_notifier_pre_changeaddr_info *info)
7049 {
7050         struct netlink_ext_ack *extack;
7051
7052         extack = netdev_notifier_info_to_extack(&info->info);
7053         return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7054                                                    info->dev_addr, extack);
7055 }
7056
7057 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7058                                          unsigned long event, void *ptr)
7059 {
7060         struct mlxsw_sp *mlxsw_sp;
7061         struct mlxsw_sp_rif *rif;
7062
7063         mlxsw_sp = mlxsw_sp_lower_get(dev);
7064         if (!mlxsw_sp)
7065                 return 0;
7066
7067         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7068         if (!rif)
7069                 return 0;
7070
7071         switch (event) {
7072         case NETDEV_CHANGEMTU: /* fall through */
7073         case NETDEV_CHANGEADDR:
7074                 return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7075         case NETDEV_PRE_CHANGEADDR:
7076                 return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7077         }
7078
7079         return 0;
7080 }
7081
7082 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7083                                   struct net_device *l3_dev,
7084                                   struct netlink_ext_ack *extack)
7085 {
7086         struct mlxsw_sp_rif *rif;
7087
7088         /* If netdev is already associated with a RIF, then we need to
7089          * destroy it and create a new one with the new virtual router ID.
7090          */
7091         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7092         if (rif)
7093                 __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7094                                           extack);
7095
7096         return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7097 }
7098
7099 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7100                                     struct net_device *l3_dev)
7101 {
7102         struct mlxsw_sp_rif *rif;
7103
7104         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7105         if (!rif)
7106                 return;
7107         __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7108 }
7109
7110 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7111                                  struct netdev_notifier_changeupper_info *info)
7112 {
7113         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7114         int err = 0;
7115
7116         /* We do not create a RIF for a macvlan, but only use it to
7117          * direct more MAC addresses to the router.
7118          */
7119         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7120                 return 0;
7121
7122         switch (event) {
7123         case NETDEV_PRECHANGEUPPER:
7124                 return 0;
7125         case NETDEV_CHANGEUPPER:
7126                 if (info->linking) {
7127                         struct netlink_ext_ack *extack;
7128
7129                         extack = netdev_notifier_info_to_extack(&info->info);
7130                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7131                 } else {
7132                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7133                 }
7134                 break;
7135         }
7136
7137         return err;
7138 }
7139
7140 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7141 {
7142         struct mlxsw_sp_rif *rif = data;
7143
7144         if (!netif_is_macvlan(dev))
7145                 return 0;
7146
7147         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7148                                    mlxsw_sp_fid_index(rif->fid), false);
7149 }
7150
7151 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7152 {
7153         if (!netif_is_macvlan_port(rif->dev))
7154                 return 0;
7155
7156         netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7157         return netdev_walk_all_upper_dev_rcu(rif->dev,
7158                                              __mlxsw_sp_rif_macvlan_flush, rif);
7159 }
7160
7161 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7162                                        const struct mlxsw_sp_rif_params *params)
7163 {
7164         struct mlxsw_sp_rif_subport *rif_subport;
7165
7166         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7167         refcount_set(&rif_subport->ref_count, 1);
7168         rif_subport->vid = params->vid;
7169         rif_subport->lag = params->lag;
7170         if (params->lag)
7171                 rif_subport->lag_id = params->lag_id;
7172         else
7173                 rif_subport->system_port = params->system_port;
7174 }
7175
7176 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7177 {
7178         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7179         struct mlxsw_sp_rif_subport *rif_subport;
7180         char ritr_pl[MLXSW_REG_RITR_LEN];
7181
7182         rif_subport = mlxsw_sp_rif_subport_rif(rif);
7183         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7184                             rif->rif_index, rif->vr_id, rif->dev->mtu);
7185         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7186         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7187                                   rif_subport->lag ? rif_subport->lag_id :
7188                                                      rif_subport->system_port,
7189                                   rif_subport->vid);
7190
7191         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7192 }
7193
7194 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7195 {
7196         int err;
7197
7198         err = mlxsw_sp_rif_subport_op(rif, true);
7199         if (err)
7200                 return err;
7201
7202         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7203                                   mlxsw_sp_fid_index(rif->fid), true);
7204         if (err)
7205                 goto err_rif_fdb_op;
7206
7207         mlxsw_sp_fid_rif_set(rif->fid, rif);
7208         return 0;
7209
7210 err_rif_fdb_op:
7211         mlxsw_sp_rif_subport_op(rif, false);
7212         return err;
7213 }
7214
7215 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7216 {
7217         struct mlxsw_sp_fid *fid = rif->fid;
7218
7219         mlxsw_sp_fid_rif_set(fid, NULL);
7220         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7221                             mlxsw_sp_fid_index(fid), false);
7222         mlxsw_sp_rif_macvlan_flush(rif);
7223         mlxsw_sp_rif_subport_op(rif, false);
7224 }
7225
7226 static struct mlxsw_sp_fid *
7227 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7228                              struct netlink_ext_ack *extack)
7229 {
7230         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7231 }
7232
7233 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7234         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
7235         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
7236         .setup                  = mlxsw_sp_rif_subport_setup,
7237         .configure              = mlxsw_sp_rif_subport_configure,
7238         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
7239         .fid_get                = mlxsw_sp_rif_subport_fid_get,
7240 };
7241
7242 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7243                                     enum mlxsw_reg_ritr_if_type type,
7244                                     u16 vid_fid, bool enable)
7245 {
7246         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7247         char ritr_pl[MLXSW_REG_RITR_LEN];
7248
7249         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7250                             rif->dev->mtu);
7251         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7252         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7253
7254         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7255 }
7256
7257 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7258 {
7259         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7260 }
7261
7262 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7263 {
7264         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7265         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7266         int err;
7267
7268         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7269         if (err)
7270                 return err;
7271
7272         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7273                                      mlxsw_sp_router_port(mlxsw_sp), true);
7274         if (err)
7275                 goto err_fid_mc_flood_set;
7276
7277         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7278                                      mlxsw_sp_router_port(mlxsw_sp), true);
7279         if (err)
7280                 goto err_fid_bc_flood_set;
7281
7282         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7283                                   mlxsw_sp_fid_index(rif->fid), true);
7284         if (err)
7285                 goto err_rif_fdb_op;
7286
7287         mlxsw_sp_fid_rif_set(rif->fid, rif);
7288         return 0;
7289
7290 err_rif_fdb_op:
7291         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7292                                mlxsw_sp_router_port(mlxsw_sp), false);
7293 err_fid_bc_flood_set:
7294         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7295                                mlxsw_sp_router_port(mlxsw_sp), false);
7296 err_fid_mc_flood_set:
7297         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7298         return err;
7299 }
7300
7301 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7302 {
7303         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7304         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7305         struct mlxsw_sp_fid *fid = rif->fid;
7306
7307         mlxsw_sp_fid_rif_set(fid, NULL);
7308         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7309                             mlxsw_sp_fid_index(fid), false);
7310         mlxsw_sp_rif_macvlan_flush(rif);
7311         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7312                                mlxsw_sp_router_port(mlxsw_sp), false);
7313         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7314                                mlxsw_sp_router_port(mlxsw_sp), false);
7315         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7316 }
7317
7318 static struct mlxsw_sp_fid *
7319 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7320                           struct netlink_ext_ack *extack)
7321 {
7322         struct net_device *br_dev = rif->dev;
7323         u16 vid;
7324         int err;
7325
7326         if (is_vlan_dev(rif->dev)) {
7327                 vid = vlan_dev_vlan_id(rif->dev);
7328                 br_dev = vlan_dev_real_dev(rif->dev);
7329                 if (WARN_ON(!netif_is_bridge_master(br_dev)))
7330                         return ERR_PTR(-EINVAL);
7331         } else {
7332                 err = br_vlan_get_pvid(rif->dev, &vid);
7333                 if (err < 0 || !vid) {
7334                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7335                         return ERR_PTR(-EINVAL);
7336                 }
7337         }
7338
7339         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7340 }
7341
7342 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7343 {
7344         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7345         struct switchdev_notifier_fdb_info info;
7346         struct net_device *br_dev;
7347         struct net_device *dev;
7348
7349         br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7350         dev = br_fdb_find_port(br_dev, mac, vid);
7351         if (!dev)
7352                 return;
7353
7354         info.addr = mac;
7355         info.vid = vid;
7356         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7357                                  NULL);
7358 }
7359
7360 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7361         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7362         .rif_size               = sizeof(struct mlxsw_sp_rif),
7363         .configure              = mlxsw_sp_rif_vlan_configure,
7364         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7365         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7366         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7367 };
7368
7369 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7370 {
7371         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7372         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7373         int err;
7374
7375         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7376                                        true);
7377         if (err)
7378                 return err;
7379
7380         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7381                                      mlxsw_sp_router_port(mlxsw_sp), true);
7382         if (err)
7383                 goto err_fid_mc_flood_set;
7384
7385         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7386                                      mlxsw_sp_router_port(mlxsw_sp), true);
7387         if (err)
7388                 goto err_fid_bc_flood_set;
7389
7390         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7391                                   mlxsw_sp_fid_index(rif->fid), true);
7392         if (err)
7393                 goto err_rif_fdb_op;
7394
7395         mlxsw_sp_fid_rif_set(rif->fid, rif);
7396         return 0;
7397
7398 err_rif_fdb_op:
7399         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7400                                mlxsw_sp_router_port(mlxsw_sp), false);
7401 err_fid_bc_flood_set:
7402         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7403                                mlxsw_sp_router_port(mlxsw_sp), false);
7404 err_fid_mc_flood_set:
7405         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7406         return err;
7407 }
7408
7409 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7410 {
7411         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7412         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7413         struct mlxsw_sp_fid *fid = rif->fid;
7414
7415         mlxsw_sp_fid_rif_set(fid, NULL);
7416         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7417                             mlxsw_sp_fid_index(fid), false);
7418         mlxsw_sp_rif_macvlan_flush(rif);
7419         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7420                                mlxsw_sp_router_port(mlxsw_sp), false);
7421         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7422                                mlxsw_sp_router_port(mlxsw_sp), false);
7423         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7424 }
7425
7426 static struct mlxsw_sp_fid *
7427 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7428                          struct netlink_ext_ack *extack)
7429 {
7430         return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7431 }
7432
7433 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7434 {
7435         struct switchdev_notifier_fdb_info info;
7436         struct net_device *dev;
7437
7438         dev = br_fdb_find_port(rif->dev, mac, 0);
7439         if (!dev)
7440                 return;
7441
7442         info.addr = mac;
7443         info.vid = 0;
7444         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7445                                  NULL);
7446 }
7447
7448 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7449         .type                   = MLXSW_SP_RIF_TYPE_FID,
7450         .rif_size               = sizeof(struct mlxsw_sp_rif),
7451         .configure              = mlxsw_sp_rif_fid_configure,
7452         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7453         .fid_get                = mlxsw_sp_rif_fid_fid_get,
7454         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7455 };
7456
7457 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7458         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7459         .rif_size               = sizeof(struct mlxsw_sp_rif),
7460         .configure              = mlxsw_sp_rif_fid_configure,
7461         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7462         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7463         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7464 };
7465
7466 static struct mlxsw_sp_rif_ipip_lb *
7467 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7468 {
7469         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7470 }
7471
7472 static void
7473 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7474                            const struct mlxsw_sp_rif_params *params)
7475 {
7476         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7477         struct mlxsw_sp_rif_ipip_lb *rif_lb;
7478
7479         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7480                                  common);
7481         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7482         rif_lb->lb_config = params_lb->lb_config;
7483 }
7484
7485 static int
7486 mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7487 {
7488         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7489         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7490         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7491         struct mlxsw_sp_vr *ul_vr;
7492         int err;
7493
7494         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7495         if (IS_ERR(ul_vr))
7496                 return PTR_ERR(ul_vr);
7497
7498         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7499         if (err)
7500                 goto err_loopback_op;
7501
7502         lb_rif->ul_vr_id = ul_vr->id;
7503         lb_rif->ul_rif_id = 0;
7504         ++ul_vr->rif_count;
7505         return 0;
7506
7507 err_loopback_op:
7508         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7509         return err;
7510 }
7511
7512 static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7513 {
7514         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7515         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7516         struct mlxsw_sp_vr *ul_vr;
7517
7518         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7519         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7520
7521         --ul_vr->rif_count;
7522         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7523 }
7524
7525 static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7526         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7527         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7528         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7529         .configure              = mlxsw_sp1_rif_ipip_lb_configure,
7530         .deconfigure            = mlxsw_sp1_rif_ipip_lb_deconfigure,
7531 };
7532
7533 const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7534         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7535         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7536         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7537         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp1_rif_ipip_lb_ops,
7538 };
7539
7540 static int
7541 mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7542 {
7543         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7544         char ritr_pl[MLXSW_REG_RITR_LEN];
7545
7546         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7547                             ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7548         mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7549                                              MLXSW_REG_RITR_LOOPBACK_GENERIC);
7550
7551         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7552 }
7553
7554 static struct mlxsw_sp_rif *
7555 mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7556                        struct netlink_ext_ack *extack)
7557 {
7558         struct mlxsw_sp_rif *ul_rif;
7559         u16 rif_index;
7560         int err;
7561
7562         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7563         if (err) {
7564                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7565                 return ERR_PTR(err);
7566         }
7567
7568         ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7569         if (!ul_rif)
7570                 return ERR_PTR(-ENOMEM);
7571
7572         mlxsw_sp->router->rifs[rif_index] = ul_rif;
7573         ul_rif->mlxsw_sp = mlxsw_sp;
7574         err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7575         if (err)
7576                 goto ul_rif_op_err;
7577
7578         return ul_rif;
7579
7580 ul_rif_op_err:
7581         mlxsw_sp->router->rifs[rif_index] = NULL;
7582         kfree(ul_rif);
7583         return ERR_PTR(err);
7584 }
7585
7586 static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7587 {
7588         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7589
7590         mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7591         mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7592         kfree(ul_rif);
7593 }
7594
7595 static struct mlxsw_sp_rif *
7596 mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7597                     struct netlink_ext_ack *extack)
7598 {
7599         struct mlxsw_sp_vr *vr;
7600         int err;
7601
7602         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7603         if (IS_ERR(vr))
7604                 return ERR_CAST(vr);
7605
7606         if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7607                 return vr->ul_rif;
7608
7609         vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7610         if (IS_ERR(vr->ul_rif)) {
7611                 err = PTR_ERR(vr->ul_rif);
7612                 goto err_ul_rif_create;
7613         }
7614
7615         vr->rif_count++;
7616         refcount_set(&vr->ul_rif_refcnt, 1);
7617
7618         return vr->ul_rif;
7619
7620 err_ul_rif_create:
7621         mlxsw_sp_vr_put(mlxsw_sp, vr);
7622         return ERR_PTR(err);
7623 }
7624
7625 static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7626 {
7627         struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7628         struct mlxsw_sp_vr *vr;
7629
7630         vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7631
7632         if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7633                 return;
7634
7635         vr->rif_count--;
7636         mlxsw_sp_ul_rif_destroy(ul_rif);
7637         mlxsw_sp_vr_put(mlxsw_sp, vr);
7638 }
7639
7640 int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7641                                u16 *ul_rif_index)
7642 {
7643         struct mlxsw_sp_rif *ul_rif;
7644
7645         ASSERT_RTNL();
7646
7647         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7648         if (IS_ERR(ul_rif))
7649                 return PTR_ERR(ul_rif);
7650         *ul_rif_index = ul_rif->rif_index;
7651
7652         return 0;
7653 }
7654
7655 void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7656 {
7657         struct mlxsw_sp_rif *ul_rif;
7658
7659         ASSERT_RTNL();
7660
7661         ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7662         if (WARN_ON(!ul_rif))
7663                 return;
7664
7665         mlxsw_sp_ul_rif_put(ul_rif);
7666 }
7667
7668 static int
7669 mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7670 {
7671         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7672         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7673         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7674         struct mlxsw_sp_rif *ul_rif;
7675         int err;
7676
7677         ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7678         if (IS_ERR(ul_rif))
7679                 return PTR_ERR(ul_rif);
7680
7681         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7682         if (err)
7683                 goto err_loopback_op;
7684
7685         lb_rif->ul_vr_id = 0;
7686         lb_rif->ul_rif_id = ul_rif->rif_index;
7687
7688         return 0;
7689
7690 err_loopback_op:
7691         mlxsw_sp_ul_rif_put(ul_rif);
7692         return err;
7693 }
7694
7695 static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7696 {
7697         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7698         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7699         struct mlxsw_sp_rif *ul_rif;
7700
7701         ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7702         mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7703         mlxsw_sp_ul_rif_put(ul_rif);
7704 }
7705
7706 static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7707         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7708         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7709         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7710         .configure              = mlxsw_sp2_rif_ipip_lb_configure,
7711         .deconfigure            = mlxsw_sp2_rif_ipip_lb_deconfigure,
7712 };
7713
7714 const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7715         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7716         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7717         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7718         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp2_rif_ipip_lb_ops,
7719 };
7720
7721 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7722 {
7723         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7724
7725         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7726                                          sizeof(struct mlxsw_sp_rif *),
7727                                          GFP_KERNEL);
7728         if (!mlxsw_sp->router->rifs)
7729                 return -ENOMEM;
7730
7731         return 0;
7732 }
7733
7734 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7735 {
7736         int i;
7737
7738         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7739                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7740
7741         kfree(mlxsw_sp->router->rifs);
7742 }
7743
7744 static int
7745 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7746 {
7747         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7748
7749         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7750         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7751 }
7752
7753 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7754 {
7755         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7756         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7757         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7758 }
7759
7760 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7761 {
7762         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7763 }
7764
7765 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7766 {
7767         struct mlxsw_sp_router *router;
7768
7769         /* Flush pending FIB notifications and then flush the device's
7770          * table before requesting another dump. The FIB notification
7771          * block is unregistered, so no need to take RTNL.
7772          */
7773         mlxsw_core_flush_owq();
7774         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7775         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7776 }
7777
7778 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7779 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7780 {
7781         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7782 }
7783
7784 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7785 {
7786         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7787 }
7788
7789 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7790 {
7791         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7792
7793         mlxsw_sp_mp_hash_header_set(recr2_pl,
7794                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7795         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7796         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7797         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7798         if (only_l3)
7799                 return;
7800         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7801         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7802         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7803         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7804 }
7805
7806 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7807 {
7808         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7809
7810         mlxsw_sp_mp_hash_header_set(recr2_pl,
7811                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7812         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7813         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7814         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7815         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7816         if (only_l3) {
7817                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7818                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7819         } else {
7820                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7821                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7822                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7823                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7824                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7825                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7826         }
7827 }
7828
7829 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7830 {
7831         char recr2_pl[MLXSW_REG_RECR2_LEN];
7832         u32 seed;
7833
7834         get_random_bytes(&seed, sizeof(seed));
7835         mlxsw_reg_recr2_pack(recr2_pl, seed);
7836         mlxsw_sp_mp4_hash_init(recr2_pl);
7837         mlxsw_sp_mp6_hash_init(recr2_pl);
7838
7839         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7840 }
7841 #else
7842 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7843 {
7844         return 0;
7845 }
7846 #endif
7847
7848 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7849 {
7850         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7851         unsigned int i;
7852
7853         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7854
7855         /* HW is determining switch priority based on DSCP-bits, but the
7856          * kernel is still doing that based on the ToS. Since there's a
7857          * mismatch in bits we need to make sure to translate the right
7858          * value ToS would observe, skipping the 2 least-significant ECN bits.
7859          */
7860         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7861                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7862
7863         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7864 }
7865
7866 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7867 {
7868         bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7869         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7870         u64 max_rifs;
7871         int err;
7872
7873         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7874                 return -EIO;
7875         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7876
7877         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7878         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7879         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7880         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7881         if (err)
7882                 return err;
7883         return 0;
7884 }
7885
7886 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7887 {
7888         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7889
7890         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7891         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7892 }
7893
7894 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7895 {
7896         struct mlxsw_sp_router *router;
7897         int err;
7898
7899         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7900         if (!router)
7901                 return -ENOMEM;
7902         mlxsw_sp->router = router;
7903         router->mlxsw_sp = mlxsw_sp;
7904
7905         router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
7906         err = register_inetaddr_notifier(&router->inetaddr_nb);
7907         if (err)
7908                 goto err_register_inetaddr_notifier;
7909
7910         router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
7911         err = register_inet6addr_notifier(&router->inet6addr_nb);
7912         if (err)
7913                 goto err_register_inet6addr_notifier;
7914
7915         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7916         err = __mlxsw_sp_router_init(mlxsw_sp);
7917         if (err)
7918                 goto err_router_init;
7919
7920         err = mlxsw_sp_rifs_init(mlxsw_sp);
7921         if (err)
7922                 goto err_rifs_init;
7923
7924         err = mlxsw_sp_ipips_init(mlxsw_sp);
7925         if (err)
7926                 goto err_ipips_init;
7927
7928         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7929                               &mlxsw_sp_nexthop_ht_params);
7930         if (err)
7931                 goto err_nexthop_ht_init;
7932
7933         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7934                               &mlxsw_sp_nexthop_group_ht_params);
7935         if (err)
7936                 goto err_nexthop_group_ht_init;
7937
7938         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7939         err = mlxsw_sp_lpm_init(mlxsw_sp);
7940         if (err)
7941                 goto err_lpm_init;
7942
7943         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7944         if (err)
7945                 goto err_mr_init;
7946
7947         err = mlxsw_sp_vrs_init(mlxsw_sp);
7948         if (err)
7949                 goto err_vrs_init;
7950
7951         err = mlxsw_sp_neigh_init(mlxsw_sp);
7952         if (err)
7953                 goto err_neigh_init;
7954
7955         mlxsw_sp->router->netevent_nb.notifier_call =
7956                 mlxsw_sp_router_netevent_event;
7957         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7958         if (err)
7959                 goto err_register_netevent_notifier;
7960
7961         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7962         if (err)
7963                 goto err_mp_hash_init;
7964
7965         err = mlxsw_sp_dscp_init(mlxsw_sp);
7966         if (err)
7967                 goto err_dscp_init;
7968
7969         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7970         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7971                                     mlxsw_sp_router_fib_dump_flush);
7972         if (err)
7973                 goto err_register_fib_notifier;
7974
7975         return 0;
7976
7977 err_register_fib_notifier:
7978 err_dscp_init:
7979 err_mp_hash_init:
7980         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7981 err_register_netevent_notifier:
7982         mlxsw_sp_neigh_fini(mlxsw_sp);
7983 err_neigh_init:
7984         mlxsw_sp_vrs_fini(mlxsw_sp);
7985 err_vrs_init:
7986         mlxsw_sp_mr_fini(mlxsw_sp);
7987 err_mr_init:
7988         mlxsw_sp_lpm_fini(mlxsw_sp);
7989 err_lpm_init:
7990         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7991 err_nexthop_group_ht_init:
7992         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7993 err_nexthop_ht_init:
7994         mlxsw_sp_ipips_fini(mlxsw_sp);
7995 err_ipips_init:
7996         mlxsw_sp_rifs_fini(mlxsw_sp);
7997 err_rifs_init:
7998         __mlxsw_sp_router_fini(mlxsw_sp);
7999 err_router_init:
8000         unregister_inet6addr_notifier(&router->inet6addr_nb);
8001 err_register_inet6addr_notifier:
8002         unregister_inetaddr_notifier(&router->inetaddr_nb);
8003 err_register_inetaddr_notifier:
8004         kfree(mlxsw_sp->router);
8005         return err;
8006 }
8007
8008 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8009 {
8010         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
8011         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8012         mlxsw_sp_neigh_fini(mlxsw_sp);
8013         mlxsw_sp_vrs_fini(mlxsw_sp);
8014         mlxsw_sp_mr_fini(mlxsw_sp);
8015         mlxsw_sp_lpm_fini(mlxsw_sp);
8016         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8017         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8018         mlxsw_sp_ipips_fini(mlxsw_sp);
8019         mlxsw_sp_rifs_fini(mlxsw_sp);
8020         __mlxsw_sp_router_fini(mlxsw_sp);
8021         unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8022         unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8023         kfree(mlxsw_sp->router);
8024 }