2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <generated/utsrelease.h>
34 #include <linux/mlx5/fs.h>
35 #include <net/switchdev.h>
36 #include <net/pkt_cls.h>
37 #include <net/act_api.h>
38 #include <net/netevent.h>
47 #define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
48 max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
49 #define MLX5E_REP_PARAMS_LOG_RQ_SIZE \
50 max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)
52 static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
54 static void mlx5e_rep_get_drvinfo(struct net_device *dev,
55 struct ethtool_drvinfo *drvinfo)
57 strlcpy(drvinfo->driver, mlx5e_rep_driver_name,
58 sizeof(drvinfo->driver));
59 strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
62 static const struct counter_desc sw_rep_stats_desc[] = {
63 { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
64 { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
65 { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
66 { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
69 #define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
71 static void mlx5e_rep_get_strings(struct net_device *dev,
72 u32 stringset, uint8_t *data)
78 for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
79 strcpy(data + (i * ETH_GSTRING_LEN),
80 sw_rep_stats_desc[i].format);
85 static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
87 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
88 struct mlx5e_rep_priv *rpriv = priv->ppriv;
89 struct mlx5_eswitch_rep *rep = rpriv->rep;
90 struct rtnl_link_stats64 *vport_stats;
91 struct ifla_vf_stats vf_stats;
94 err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
96 pr_warn("vport %d error %d reading stats\n", rep->vport, err);
100 vport_stats = &priv->stats.vf_vport;
101 /* flip tx/rx as we are reporting the counters for the switch vport */
102 vport_stats->rx_packets = vf_stats.tx_packets;
103 vport_stats->rx_bytes = vf_stats.tx_bytes;
104 vport_stats->tx_packets = vf_stats.rx_packets;
105 vport_stats->tx_bytes = vf_stats.rx_bytes;
108 static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
110 struct mlx5e_sw_stats *s = &priv->stats.sw;
111 struct mlx5e_rq_stats *rq_stats;
112 struct mlx5e_sq_stats *sq_stats;
115 memset(s, 0, sizeof(*s));
116 for (i = 0; i < priv->channels.num; i++) {
117 struct mlx5e_channel *c = priv->channels.c[i];
119 rq_stats = &c->rq.stats;
121 s->rx_packets += rq_stats->packets;
122 s->rx_bytes += rq_stats->bytes;
124 for (j = 0; j < priv->channels.params.num_tc; j++) {
125 sq_stats = &c->sq[j].stats;
127 s->tx_packets += sq_stats->packets;
128 s->tx_bytes += sq_stats->bytes;
133 static void mlx5e_rep_update_stats(struct mlx5e_priv *priv)
135 mlx5e_rep_update_sw_counters(priv);
136 mlx5e_rep_update_hw_counters(priv);
139 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
140 struct ethtool_stats *stats, u64 *data)
142 struct mlx5e_priv *priv = netdev_priv(dev);
148 mutex_lock(&priv->state_lock);
149 if (test_bit(MLX5E_STATE_OPENED, &priv->state))
150 mlx5e_rep_update_sw_counters(priv);
151 mutex_unlock(&priv->state_lock);
153 for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
154 data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
155 sw_rep_stats_desc, i);
158 static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
162 return NUM_VPORT_REP_COUNTERS;
168 static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
169 .get_drvinfo = mlx5e_rep_get_drvinfo,
170 .get_link = ethtool_op_get_link,
171 .get_strings = mlx5e_rep_get_strings,
172 .get_sset_count = mlx5e_rep_get_sset_count,
173 .get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
176 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
178 struct mlx5e_priv *priv = netdev_priv(dev);
179 struct mlx5e_rep_priv *rpriv = priv->ppriv;
180 struct mlx5_eswitch_rep *rep = rpriv->rep;
181 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
183 if (esw->mode == SRIOV_NONE)
187 case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
188 attr->u.ppid.id_len = ETH_ALEN;
189 ether_addr_copy(attr->u.ppid.id, rep->hw_id);
198 static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
199 struct mlx5_eswitch_rep *rep)
201 struct mlx5e_rep_sq *rep_sq, *tmp;
202 struct mlx5e_rep_priv *rpriv;
204 if (esw->mode != SRIOV_OFFLOADS)
207 rpriv = mlx5e_rep_to_rep_priv(rep);
208 list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
209 mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
210 list_del(&rep_sq->list);
215 static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
216 struct mlx5_eswitch_rep *rep,
217 u32 *sqns_array, int sqns_num)
219 struct mlx5_flow_handle *flow_rule;
220 struct mlx5e_rep_priv *rpriv;
221 struct mlx5e_rep_sq *rep_sq;
225 if (esw->mode != SRIOV_OFFLOADS)
228 rpriv = mlx5e_rep_to_rep_priv(rep);
229 for (i = 0; i < sqns_num; i++) {
230 rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
236 /* Add re-inject rule to the PF/representor sqs */
237 flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
240 if (IS_ERR(flow_rule)) {
241 err = PTR_ERR(flow_rule);
245 rep_sq->send_to_vport_rule = flow_rule;
246 list_add(&rep_sq->list, &rpriv->vport_sqs_list);
251 mlx5e_sqs2vport_stop(esw, rep);
255 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
257 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
258 struct mlx5e_rep_priv *rpriv = priv->ppriv;
259 struct mlx5_eswitch_rep *rep = rpriv->rep;
260 struct mlx5e_channel *c;
261 int n, tc, num_sqs = 0;
265 sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
269 for (n = 0; n < priv->channels.num; n++) {
270 c = priv->channels.c[n];
271 for (tc = 0; tc < c->num_tc; tc++)
272 sqs[num_sqs++] = c->sq[tc].sqn;
275 err = mlx5e_sqs2vport_start(esw, rep, sqs, num_sqs);
280 netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err);
284 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
286 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
287 struct mlx5e_rep_priv *rpriv = priv->ppriv;
288 struct mlx5_eswitch_rep *rep = rpriv->rep;
290 mlx5e_sqs2vport_stop(esw, rep);
293 static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
295 #if IS_ENABLED(CONFIG_IPV6)
296 unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms,
299 unsigned long ipv6_interval = ~0UL;
301 unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms,
303 struct net_device *netdev = rpriv->netdev;
304 struct mlx5e_priv *priv = netdev_priv(netdev);
306 rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
307 mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
310 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
312 struct mlx5e_rep_priv *rpriv = priv->ppriv;
313 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
315 mlx5_fc_queue_stats_work(priv->mdev,
316 &neigh_update->neigh_stats_work,
317 neigh_update->min_interval);
320 static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
322 struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
323 neigh_update.neigh_stats_work.work);
324 struct net_device *netdev = rpriv->netdev;
325 struct mlx5e_priv *priv = netdev_priv(netdev);
326 struct mlx5e_neigh_hash_entry *nhe;
329 if (!list_empty(&rpriv->neigh_update.neigh_list))
330 mlx5e_rep_queue_neigh_stats_work(priv);
332 list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list)
333 mlx5e_tc_update_neigh_used_value(nhe);
338 static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
340 refcount_inc(&nhe->refcnt);
343 static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
345 if (refcount_dec_and_test(&nhe->refcnt))
349 static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
350 struct mlx5e_encap_entry *e,
351 bool neigh_connected,
352 unsigned char ha[ETH_ALEN])
354 struct ethhdr *eth = (struct ethhdr *)e->encap_header;
358 if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) ||
359 !ether_addr_equal(e->h_dest, ha))
360 mlx5e_tc_encap_flows_del(priv, e);
362 if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
363 ether_addr_copy(e->h_dest, ha);
364 ether_addr_copy(eth->h_dest, ha);
366 mlx5e_tc_encap_flows_add(priv, e);
370 static void mlx5e_rep_neigh_update(struct work_struct *work)
372 struct mlx5e_neigh_hash_entry *nhe =
373 container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
374 struct neighbour *n = nhe->n;
375 struct mlx5e_encap_entry *e;
376 unsigned char ha[ETH_ALEN];
377 struct mlx5e_priv *priv;
378 bool neigh_connected;
379 bool encap_connected;
384 /* If these parameters are changed after we release the lock,
385 * we'll receive another event letting us know about it.
386 * We use this lock to avoid inconsistency between the neigh validity
387 * and it's hw address.
389 read_lock_bh(&n->lock);
390 memcpy(ha, n->ha, ETH_ALEN);
391 nud_state = n->nud_state;
393 read_unlock_bh(&n->lock);
395 neigh_connected = (nud_state & NUD_VALID) && !dead;
397 list_for_each_entry(e, &nhe->encap_list, encap_list) {
398 encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
399 priv = netdev_priv(e->out_dev);
401 if (encap_connected != neigh_connected ||
402 !ether_addr_equal(e->h_dest, ha))
403 mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
405 mlx5e_rep_neigh_entry_release(nhe);
410 static struct mlx5e_neigh_hash_entry *
411 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
412 struct mlx5e_neigh *m_neigh);
414 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
415 unsigned long event, void *ptr)
417 struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
418 neigh_update.netevent_nb);
419 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
420 struct net_device *netdev = rpriv->netdev;
421 struct mlx5e_priv *priv = netdev_priv(netdev);
422 struct mlx5e_neigh_hash_entry *nhe = NULL;
423 struct mlx5e_neigh m_neigh = {};
424 struct neigh_parms *p;
429 case NETEVENT_NEIGH_UPDATE:
431 #if IS_ENABLED(CONFIG_IPV6)
432 if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
434 if (n->tbl != &arp_tbl)
438 m_neigh.dev = n->dev;
439 m_neigh.family = n->ops->family;
440 memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
442 /* We are in atomic context and can't take RTNL mutex, so use
443 * spin_lock_bh to lookup the neigh table. bh is used since
444 * netevent can be called from a softirq context.
446 spin_lock_bh(&neigh_update->encap_lock);
447 nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
449 spin_unlock_bh(&neigh_update->encap_lock);
453 /* This assignment is valid as long as the the neigh reference
458 /* Take a reference to ensure the neighbour and mlx5 encap
459 * entry won't be destructed until we drop the reference in
463 mlx5e_rep_neigh_entry_hold(nhe);
465 if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
466 mlx5e_rep_neigh_entry_release(nhe);
469 spin_unlock_bh(&neigh_update->encap_lock);
472 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
475 /* We check the device is present since we don't care about
476 * changes in the default table, we only care about changes
477 * done per device delay prob time parameter.
479 #if IS_ENABLED(CONFIG_IPV6)
480 if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
482 if (!p->dev || p->tbl != &arp_tbl)
486 /* We are in atomic context and can't take RTNL mutex,
487 * so use spin_lock_bh to walk the neigh list and look for
488 * the relevant device. bh is used since netevent can be
489 * called from a softirq context.
491 spin_lock_bh(&neigh_update->encap_lock);
492 list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) {
493 if (p->dev == nhe->m_neigh.dev) {
498 spin_unlock_bh(&neigh_update->encap_lock);
502 neigh_update->min_interval = min_t(unsigned long,
503 NEIGH_VAR(p, DELAY_PROBE_TIME),
504 neigh_update->min_interval);
505 mlx5_fc_update_sampling_interval(priv->mdev,
506 neigh_update->min_interval);
512 static const struct rhashtable_params mlx5e_neigh_ht_params = {
513 .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
514 .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
515 .key_len = sizeof(struct mlx5e_neigh),
516 .automatic_shrinking = true,
519 static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
521 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
524 err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
528 INIT_LIST_HEAD(&neigh_update->neigh_list);
529 spin_lock_init(&neigh_update->encap_lock);
530 INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
531 mlx5e_rep_neigh_stats_work);
532 mlx5e_rep_neigh_update_init_interval(rpriv);
534 rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
535 err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
541 rhashtable_destroy(&neigh_update->neigh_ht);
545 static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
547 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
548 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
550 unregister_netevent_notifier(&neigh_update->netevent_nb);
552 flush_workqueue(priv->wq); /* flush neigh update works */
554 cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
556 rhashtable_destroy(&neigh_update->neigh_ht);
559 static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
560 struct mlx5e_neigh_hash_entry *nhe)
562 struct mlx5e_rep_priv *rpriv = priv->ppriv;
565 err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
567 mlx5e_neigh_ht_params);
571 list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
576 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv,
577 struct mlx5e_neigh_hash_entry *nhe)
579 struct mlx5e_rep_priv *rpriv = priv->ppriv;
581 spin_lock_bh(&rpriv->neigh_update.encap_lock);
583 list_del(&nhe->neigh_list);
585 rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
587 mlx5e_neigh_ht_params);
588 spin_unlock_bh(&rpriv->neigh_update.encap_lock);
591 /* This function must only be called under RTNL lock or under the
592 * representor's encap_lock in case RTNL mutex can't be held.
594 static struct mlx5e_neigh_hash_entry *
595 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
596 struct mlx5e_neigh *m_neigh)
598 struct mlx5e_rep_priv *rpriv = priv->ppriv;
599 struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
601 return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
602 mlx5e_neigh_ht_params);
605 static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
606 struct mlx5e_encap_entry *e,
607 struct mlx5e_neigh_hash_entry **nhe)
611 *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
615 memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
616 INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
617 INIT_LIST_HEAD(&(*nhe)->encap_list);
618 refcount_set(&(*nhe)->refcnt, 1);
620 err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
630 static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
631 struct mlx5e_neigh_hash_entry *nhe)
633 /* The neigh hash entry must be removed from the hash table regardless
634 * of the reference count value, so it won't be found by the next
635 * neigh notification call. The neigh hash entry reference count is
636 * incremented only during creation and neigh notification calls and
637 * protects from freeing the nhe struct.
639 mlx5e_rep_neigh_entry_remove(priv, nhe);
640 mlx5e_rep_neigh_entry_release(nhe);
643 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
644 struct mlx5e_encap_entry *e)
646 struct mlx5e_neigh_hash_entry *nhe;
649 nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
651 err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
655 list_add(&e->encap_list, &nhe->encap_list);
659 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
660 struct mlx5e_encap_entry *e)
662 struct mlx5e_neigh_hash_entry *nhe;
664 list_del(&e->encap_list);
665 nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
667 if (list_empty(&nhe->encap_list))
668 mlx5e_rep_neigh_entry_destroy(priv, nhe);
671 static int mlx5e_rep_open(struct net_device *dev)
673 struct mlx5e_priv *priv = netdev_priv(dev);
674 struct mlx5e_rep_priv *rpriv = priv->ppriv;
675 struct mlx5_eswitch_rep *rep = rpriv->rep;
678 mutex_lock(&priv->state_lock);
679 err = mlx5e_open_locked(dev);
683 if (!mlx5_modify_vport_admin_state(priv->mdev,
684 MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
685 rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP))
686 netif_carrier_on(dev);
689 mutex_unlock(&priv->state_lock);
693 static int mlx5e_rep_close(struct net_device *dev)
695 struct mlx5e_priv *priv = netdev_priv(dev);
696 struct mlx5e_rep_priv *rpriv = priv->ppriv;
697 struct mlx5_eswitch_rep *rep = rpriv->rep;
700 mutex_lock(&priv->state_lock);
701 mlx5_modify_vport_admin_state(priv->mdev,
702 MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
703 rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
704 ret = mlx5e_close_locked(dev);
705 mutex_unlock(&priv->state_lock);
709 static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
710 char *buf, size_t len)
712 struct mlx5e_priv *priv = netdev_priv(dev);
713 struct mlx5e_rep_priv *rpriv = priv->ppriv;
714 struct mlx5_eswitch_rep *rep = rpriv->rep;
717 ret = snprintf(buf, len, "%d", rep->vport - 1);
725 mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
726 struct tc_cls_flower_offload *cls_flower)
728 switch (cls_flower->command) {
729 case TC_CLSFLOWER_REPLACE:
730 return mlx5e_configure_flower(priv, cls_flower);
731 case TC_CLSFLOWER_DESTROY:
732 return mlx5e_delete_flower(priv, cls_flower);
733 case TC_CLSFLOWER_STATS:
734 return mlx5e_stats_flower(priv, cls_flower);
740 static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
743 struct mlx5e_priv *priv = cb_priv;
745 if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
749 case TC_SETUP_CLSFLOWER:
750 return mlx5e_rep_setup_tc_cls_flower(priv, type_data);
756 static int mlx5e_rep_setup_tc_block(struct net_device *dev,
757 struct tc_block_offload *f)
759 struct mlx5e_priv *priv = netdev_priv(dev);
761 if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
764 switch (f->command) {
766 return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
768 case TC_BLOCK_UNBIND:
769 tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
776 static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
781 return mlx5e_rep_setup_tc_block(dev, type_data);
787 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
789 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
790 struct mlx5e_rep_priv *rpriv = priv->ppriv;
791 struct mlx5_eswitch_rep *rep;
793 if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
797 if (esw->mode == SRIOV_OFFLOADS &&
798 rep && rep->vport == FDB_UPLINK_VPORT)
804 static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
806 struct mlx5e_rep_priv *rpriv = priv->ppriv;
807 struct mlx5_eswitch_rep *rep = rpriv->rep;
809 if (rep && rep->vport != FDB_UPLINK_VPORT)
815 bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
817 struct mlx5e_priv *priv = netdev_priv(dev);
820 case IFLA_OFFLOAD_XSTATS_CPU_HIT:
821 if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
829 mlx5e_get_sw_stats64(const struct net_device *dev,
830 struct rtnl_link_stats64 *stats)
832 struct mlx5e_priv *priv = netdev_priv(dev);
833 struct mlx5e_sw_stats *sstats = &priv->stats.sw;
835 stats->rx_packets = sstats->rx_packets;
836 stats->rx_bytes = sstats->rx_bytes;
837 stats->tx_packets = sstats->tx_packets;
838 stats->tx_bytes = sstats->tx_bytes;
840 stats->tx_dropped = sstats->tx_queue_dropped;
845 int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
849 case IFLA_OFFLOAD_XSTATS_CPU_HIT:
850 return mlx5e_get_sw_stats64(dev, sp);
857 mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
859 struct mlx5e_priv *priv = netdev_priv(dev);
861 memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
864 static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
865 .switchdev_port_attr_get = mlx5e_attr_get,
868 static const struct net_device_ops mlx5e_netdev_ops_rep = {
869 .ndo_open = mlx5e_rep_open,
870 .ndo_stop = mlx5e_rep_close,
871 .ndo_start_xmit = mlx5e_xmit,
872 .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
873 .ndo_setup_tc = mlx5e_rep_setup_tc,
874 .ndo_get_stats64 = mlx5e_rep_get_stats,
875 .ndo_has_offload_stats = mlx5e_has_offload_stats,
876 .ndo_get_offload_stats = mlx5e_get_offload_stats,
879 static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
880 struct mlx5e_params *params)
882 u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
883 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
884 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
886 params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
887 params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST;
888 params->log_rq_size = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
890 params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
891 mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
893 params->tx_max_inline = mlx5e_get_max_inline_cap(mdev);
895 params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
897 mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
900 static void mlx5e_build_rep_netdev(struct net_device *netdev)
902 netdev->netdev_ops = &mlx5e_netdev_ops_rep;
904 netdev->watchdog_timeo = 15 * HZ;
906 netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
908 netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
910 netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
911 netdev->hw_features |= NETIF_F_HW_TC;
913 eth_hw_addr_random(netdev);
916 static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
917 struct net_device *netdev,
918 const struct mlx5e_profile *profile,
921 struct mlx5e_priv *priv = netdev_priv(netdev);
924 priv->netdev = netdev;
925 priv->profile = profile;
928 mutex_init(&priv->state_lock);
930 INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
932 priv->channels.params.num_channels = profile->max_nch(mdev);
934 priv->hard_mtu = MLX5E_ETH_HARD_MTU;
936 mlx5e_build_rep_params(mdev, &priv->channels.params);
937 mlx5e_build_rep_netdev(netdev);
939 mlx5e_timestamp_init(priv);
942 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
944 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945 struct mlx5e_rep_priv *rpriv = priv->ppriv;
946 struct mlx5_eswitch_rep *rep = rpriv->rep;
947 struct mlx5_flow_handle *flow_rule;
950 mlx5e_init_l2_addr(priv);
952 err = mlx5e_create_direct_rqts(priv);
956 err = mlx5e_create_direct_tirs(priv);
958 goto err_destroy_direct_rqts;
960 flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
962 priv->direct_tir[0].tirn);
963 if (IS_ERR(flow_rule)) {
964 err = PTR_ERR(flow_rule);
965 goto err_destroy_direct_tirs;
967 rpriv->vport_rx_rule = flow_rule;
969 err = mlx5e_tc_init(priv);
971 goto err_del_flow_rule;
976 mlx5_del_flow_rules(rpriv->vport_rx_rule);
977 err_destroy_direct_tirs:
978 mlx5e_destroy_direct_tirs(priv);
979 err_destroy_direct_rqts:
980 mlx5e_destroy_direct_rqts(priv);
984 static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
986 struct mlx5e_rep_priv *rpriv = priv->ppriv;
988 mlx5e_tc_cleanup(priv);
989 mlx5_del_flow_rules(rpriv->vport_rx_rule);
990 mlx5e_destroy_direct_tirs(priv);
991 mlx5e_destroy_direct_rqts(priv);
994 static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
998 err = mlx5e_create_tises(priv);
1000 mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
1006 static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
1008 #define MLX5E_PORT_REPRESENTOR_NCH 1
1009 return MLX5E_PORT_REPRESENTOR_NCH;
1012 static const struct mlx5e_profile mlx5e_rep_profile = {
1013 .init = mlx5e_init_rep,
1014 .init_rx = mlx5e_init_rep_rx,
1015 .cleanup_rx = mlx5e_cleanup_rep_rx,
1016 .init_tx = mlx5e_init_rep_tx,
1017 .cleanup_tx = mlx5e_cleanup_nic_tx,
1018 .update_stats = mlx5e_rep_update_stats,
1019 .max_nch = mlx5e_get_rep_max_num_channels,
1020 .update_carrier = NULL,
1021 .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
1022 .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */,
1026 /* e-Switch vport representors */
1029 mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
1031 struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
1032 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
1036 if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
1037 err = mlx5e_add_sqs_fwd_rules(priv);
1042 err = mlx5e_rep_neigh_init(rpriv);
1044 goto err_remove_sqs;
1049 mlx5e_remove_sqs_fwd_rules(priv);
1054 mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep)
1056 struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
1057 struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
1059 if (test_bit(MLX5E_STATE_OPENED, &priv->state))
1060 mlx5e_remove_sqs_fwd_rules(priv);
1062 /* clean (and re-init) existing uplink offloaded TC rules */
1063 mlx5e_tc_cleanup(priv);
1064 mlx5e_tc_init(priv);
1066 mlx5e_rep_neigh_cleanup(rpriv);
1070 mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
1072 struct mlx5e_rep_priv *uplink_rpriv;
1073 struct mlx5e_rep_priv *rpriv;
1074 struct net_device *netdev;
1075 struct mlx5e_priv *upriv;
1078 rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
1082 netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv);
1084 pr_warn("Failed to create representor netdev for vport %d\n",
1090 rpriv->netdev = netdev;
1092 rep->rep_if[REP_ETH].priv = rpriv;
1093 INIT_LIST_HEAD(&rpriv->vport_sqs_list);
1095 err = mlx5e_attach_netdev(netdev_priv(netdev));
1097 pr_warn("Failed to attach representor netdev for vport %d\n",
1099 goto err_destroy_netdev;
1102 err = mlx5e_rep_neigh_init(rpriv);
1104 pr_warn("Failed to initialized neighbours handling for vport %d\n",
1106 goto err_detach_netdev;
1109 uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH);
1110 upriv = netdev_priv(uplink_rpriv->netdev);
1111 err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb,
1114 goto err_neigh_cleanup;
1116 err = register_netdev(netdev);
1118 pr_warn("Failed to register representor netdev for vport %d\n",
1120 goto err_egdev_cleanup;
1126 tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
1130 mlx5e_rep_neigh_cleanup(rpriv);
1133 mlx5e_detach_netdev(netdev_priv(netdev));
1136 mlx5e_destroy_netdev(netdev_priv(netdev));
1142 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
1144 struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
1145 struct net_device *netdev = rpriv->netdev;
1146 struct mlx5e_priv *priv = netdev_priv(netdev);
1147 struct mlx5e_rep_priv *uplink_rpriv;
1148 void *ppriv = priv->ppriv;
1149 struct mlx5e_priv *upriv;
1151 unregister_netdev(netdev);
1152 uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch,
1154 upriv = netdev_priv(uplink_rpriv->netdev);
1155 tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
1157 mlx5e_rep_neigh_cleanup(rpriv);
1158 mlx5e_detach_netdev(priv);
1159 mlx5e_destroy_netdev(priv);
1160 kfree(ppriv); /* mlx5e_rep_priv */
1163 static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv)
1165 struct mlx5_core_dev *mdev = priv->mdev;
1166 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1167 int total_vfs = MLX5_TOTAL_VPORTS(mdev);
1170 for (vport = 1; vport < total_vfs; vport++) {
1171 struct mlx5_eswitch_rep_if rep_if = {};
1173 rep_if.load = mlx5e_vport_rep_load;
1174 rep_if.unload = mlx5e_vport_rep_unload;
1175 mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
1179 static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv)
1181 struct mlx5_core_dev *mdev = priv->mdev;
1182 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1183 int total_vfs = MLX5_TOTAL_VPORTS(mdev);
1186 for (vport = 1; vport < total_vfs; vport++)
1187 mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
1190 void mlx5e_register_vport_reps(struct mlx5e_priv *priv)
1192 struct mlx5_core_dev *mdev = priv->mdev;
1193 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1194 struct mlx5_eswitch_rep_if rep_if;
1195 struct mlx5e_rep_priv *rpriv;
1197 rpriv = priv->ppriv;
1198 rpriv->netdev = priv->netdev;
1200 rep_if.load = mlx5e_nic_rep_load;
1201 rep_if.unload = mlx5e_nic_rep_unload;
1202 rep_if.priv = rpriv;
1203 INIT_LIST_HEAD(&rpriv->vport_sqs_list);
1204 mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/
1206 mlx5e_rep_register_vf_vports(priv); /* VFs vports */
1209 void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
1211 struct mlx5_core_dev *mdev = priv->mdev;
1212 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1214 mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
1215 mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/
1218 void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
1220 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1221 struct mlx5e_rep_priv *rpriv;
1223 rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
1227 rpriv->rep = &esw->offloads.vport_reps[0];