1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <linux/netdevice.h>
11 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
13 if (!ldev->pf[0].dev || !ldev->pf[1].dev)
16 return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev);
19 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
21 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
24 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
26 struct mlx5_lag *ldev;
29 ldev = mlx5_lag_dev_get(dev);
30 res = ldev && __mlx5_lag_is_multipath(ldev);
36 * Set lag port affinity
40 * 0 - set normal affinity.
41 * 1 - set affinity to port 1.
42 * 2 - set affinity to port 2.
45 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
47 struct lag_tracker tracker;
49 if (!__mlx5_lag_is_multipath(ldev))
54 tracker.netdev_state[0].tx_enabled = true;
55 tracker.netdev_state[1].tx_enabled = true;
56 tracker.netdev_state[0].link_up = true;
57 tracker.netdev_state[1].link_up = true;
60 tracker.netdev_state[0].tx_enabled = true;
61 tracker.netdev_state[0].link_up = true;
62 tracker.netdev_state[1].tx_enabled = false;
63 tracker.netdev_state[1].link_up = false;
66 tracker.netdev_state[0].tx_enabled = false;
67 tracker.netdev_state[0].link_up = false;
68 tracker.netdev_state[1].tx_enabled = true;
69 tracker.netdev_state[1].link_up = true;
72 mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d",
77 if (tracker.netdev_state[0].tx_enabled)
78 mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
79 MLX5_DEV_EVENT_PORT_AFFINITY,
82 if (tracker.netdev_state[1].tx_enabled)
83 mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
84 MLX5_DEV_EVENT_PORT_AFFINITY,
87 mlx5_modify_lag(ldev, &tracker);
90 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
92 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
93 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
95 flush_workqueue(ldev->wq);
98 struct mlx5_fib_event_work {
99 struct work_struct work;
100 struct mlx5_lag *ldev;
103 struct fib_entry_notifier_info fen_info;
104 struct fib_nh_notifier_info fnh_info;
108 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
112 struct lag_mp *mp = &ldev->lag_mp;
114 /* Handle delete event */
115 if (event == FIB_EVENT_ENTRY_DEL) {
122 /* Handle add/replace event */
123 if (fi->fib_nhs == 1) {
124 if (__mlx5_lag_is_active(ldev)) {
125 struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev;
126 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
128 mlx5_lag_set_port_affinity(ldev, ++i);
133 if (fi->fib_nhs != 2)
136 /* Verify next hops are ports of the same hca */
137 if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev &&
138 fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) &&
139 !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev &&
140 fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) {
141 mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
145 /* First time we see multipath route */
146 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
147 struct lag_tracker tracker;
149 tracker = ldev->tracker;
150 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
153 mlx5_lag_set_port_affinity(ldev, 0);
157 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
159 struct fib_nh *fib_nh,
162 struct lag_mp *mp = &ldev->lag_mp;
164 /* Check the nh event is related to the route */
165 if (!mp->mfi || mp->mfi != fi)
168 /* nh added/removed */
169 if (event == FIB_EVENT_NH_DEL) {
170 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
173 i = (i + 1) % 2 + 1; /* peer port */
174 mlx5_lag_set_port_affinity(ldev, i);
176 } else if (event == FIB_EVENT_NH_ADD &&
178 mlx5_lag_set_port_affinity(ldev, 0);
182 static void mlx5_lag_fib_update(struct work_struct *work)
184 struct mlx5_fib_event_work *fib_work =
185 container_of(work, struct mlx5_fib_event_work, work);
186 struct mlx5_lag *ldev = fib_work->ldev;
187 struct fib_nh *fib_nh;
189 /* Protect internal structures from changes */
191 switch (fib_work->event) {
192 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
193 case FIB_EVENT_ENTRY_APPEND: /* fall through */
194 case FIB_EVENT_ENTRY_ADD: /* fall through */
195 case FIB_EVENT_ENTRY_DEL:
196 mlx5_lag_fib_route_event(ldev, fib_work->event,
197 fib_work->fen_info.fi);
198 fib_info_put(fib_work->fen_info.fi);
200 case FIB_EVENT_NH_ADD: /* fall through */
201 case FIB_EVENT_NH_DEL:
202 fib_nh = fib_work->fnh_info.fib_nh;
203 mlx5_lag_fib_nexthop_event(ldev,
205 fib_work->fnh_info.fib_nh,
207 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
215 static struct mlx5_fib_event_work *
216 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
218 struct mlx5_fib_event_work *fib_work;
220 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
221 if (WARN_ON(!fib_work))
224 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
225 fib_work->ldev = ldev;
226 fib_work->event = event;
231 static int mlx5_lag_fib_event(struct notifier_block *nb,
235 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
236 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
237 struct fib_notifier_info *info = ptr;
238 struct mlx5_fib_event_work *fib_work;
239 struct fib_entry_notifier_info *fen_info;
240 struct fib_nh_notifier_info *fnh_info;
243 if (info->family != AF_INET)
246 if (!mlx5_lag_multipath_check_prereq(ldev))
250 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
251 case FIB_EVENT_ENTRY_APPEND: /* fall through */
252 case FIB_EVENT_ENTRY_ADD: /* fall through */
253 case FIB_EVENT_ENTRY_DEL:
254 fen_info = container_of(info, struct fib_entry_notifier_info,
257 if (fi->fib_dev != ldev->pf[0].netdev &&
258 fi->fib_dev != ldev->pf[1].netdev) {
261 fib_work = mlx5_lag_init_fib_work(ldev, event);
264 fib_work->fen_info = *fen_info;
265 /* Take reference on fib_info to prevent it from being
266 * freed while work is queued. Release it afterwards.
268 fib_info_hold(fib_work->fen_info.fi);
270 case FIB_EVENT_NH_ADD: /* fall through */
271 case FIB_EVENT_NH_DEL:
272 fnh_info = container_of(info, struct fib_nh_notifier_info,
274 fib_work = mlx5_lag_init_fib_work(ldev, event);
277 fib_work->fnh_info = *fnh_info;
278 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
284 queue_work(ldev->wq, &fib_work->work);
289 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
291 struct lag_mp *mp = &ldev->lag_mp;
294 if (mp->fib_nb.notifier_call)
297 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
298 err = register_fib_notifier(&mp->fib_nb,
299 mlx5_lag_fib_event_flush);
301 mp->fib_nb.notifier_call = NULL;
306 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
308 struct lag_mp *mp = &ldev->lag_mp;
310 if (!mp->fib_nb.notifier_call)
313 unregister_fib_notifier(&mp->fib_nb);
314 mp->fib_nb.notifier_call = NULL;