]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
3c7c04406a2bfa55d1f3c67e0019d488c7dff4a8
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66         { 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75         "CEV",
76         "CTX",
77         "DBUF",
78         "ERX",
79         "Host",
80         "MPU",
81         "NDMA",
82         "PTC ",
83         "RDMA ",
84         "RXF ",
85         "RXIPS ",
86         "RXULP0 ",
87         "RXULP1 ",
88         "RXULP2 ",
89         "TIM ",
90         "TPOST ",
91         "TPRE ",
92         "TXIPS ",
93         "TXULP0 ",
94         "TXULP1 ",
95         "UC ",
96         "WDMA ",
97         "TXULP2 ",
98         "HOST1 ",
99         "P0_OB_LINK ",
100         "P1_OB_LINK ",
101         "HOST_GPIO ",
102         "MBOX ",
103         "ERX2 ",
104         "SPARE ",
105         "JTAG ",
106         "MPU_INTPEND "
107 };
108
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111         "LPCMEMHOST",
112         "MGMT_MAC",
113         "PCS0ONLINE",
114         "MPU_IRAM",
115         "PCS1ONLINE",
116         "PCTL0",
117         "PCTL1",
118         "PMEM",
119         "RR",
120         "TXPB",
121         "RXPP",
122         "XAUI",
123         "TXP",
124         "ARM",
125         "IPC",
126         "HOST2",
127         "HOST3",
128         "HOST4",
129         "HOST5",
130         "HOST6",
131         "HOST7",
132         "ECRC",
133         "Poison TLP",
134         "NETC",
135         "PERIPH",
136         "LLTXULP",
137         "D2P",
138         "RCON",
139         "LDMA",
140         "LLTXP",
141         "LLTXPB",
142         "Unknown"
143 };
144
145 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
146                                  BE_IF_FLAGS_BROADCAST | \
147                                  BE_IF_FLAGS_MULTICAST | \
148                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
149
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152         struct be_dma_mem *mem = &q->dma_mem;
153
154         if (mem->va) {
155                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156                                   mem->dma);
157                 mem->va = NULL;
158         }
159 }
160
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162                           u16 len, u16 entry_size)
163 {
164         struct be_dma_mem *mem = &q->dma_mem;
165
166         memset(q, 0, sizeof(*q));
167         q->len = len;
168         q->entry_size = entry_size;
169         mem->size = len * entry_size;
170         mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
171                                      &mem->dma, GFP_KERNEL);
172         if (!mem->va)
173                 return -ENOMEM;
174         return 0;
175 }
176
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179         u32 reg, enabled;
180
181         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182                               &reg);
183         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184
185         if (!enabled && enable)
186                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187         else if (enabled && !enable)
188                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189         else
190                 return;
191
192         pci_write_config_dword(adapter->pdev,
193                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198         int status = 0;
199
200         /* On lancer interrupts can't be controlled via this register */
201         if (lancer_chip(adapter))
202                 return;
203
204         if (be_check_error(adapter, BE_ERROR_EEH))
205                 return;
206
207         status = be_cmd_intr_set(adapter, enable);
208         if (status)
209                 be_reg_intr_set(adapter, enable);
210 }
211
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214         u32 val = 0;
215
216         if (be_check_error(adapter, BE_ERROR_HW))
217                 return;
218
219         val |= qid & DB_RQ_RING_ID_MASK;
220         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221
222         wmb();
223         iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227                           u16 posted)
228 {
229         u32 val = 0;
230
231         if (be_check_error(adapter, BE_ERROR_HW))
232                 return;
233
234         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236
237         wmb();
238         iowrite32(val, adapter->db + txo->db_offset);
239 }
240
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242                          bool arm, bool clear_int, u16 num_popped,
243                          u32 eq_delay_mult_enc)
244 {
245         u32 val = 0;
246
247         val |= qid & DB_EQ_RING_ID_MASK;
248         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249
250         if (be_check_error(adapter, BE_ERROR_HW))
251                 return;
252
253         if (arm)
254                 val |= 1 << DB_EQ_REARM_SHIFT;
255         if (clear_int)
256                 val |= 1 << DB_EQ_CLR_SHIFT;
257         val |= 1 << DB_EQ_EVNT_SHIFT;
258         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260         iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265         u32 val = 0;
266
267         val |= qid & DB_CQ_RING_ID_MASK;
268         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
270
271         if (be_check_error(adapter, BE_ERROR_HW))
272                 return;
273
274         if (arm)
275                 val |= 1 << DB_CQ_REARM_SHIFT;
276         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277         iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282         int i;
283
284         /* Check if mac has already been added as part of uc-list */
285         for (i = 0; i < adapter->uc_macs; i++) {
286                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287                         /* mac already added, skip addition */
288                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289                         return 0;
290                 }
291         }
292
293         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294                                &adapter->pmac_id[0], 0);
295 }
296
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299         int i;
300
301         /* Skip deletion if the programmed mac is
302          * being used in uc-list
303          */
304         for (i = 0; i < adapter->uc_macs; i++) {
305                 if (adapter->pmac_id[i + 1] == pmac_id)
306                         return;
307         }
308         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313         struct be_adapter *adapter = netdev_priv(netdev);
314         struct device *dev = &adapter->pdev->dev;
315         struct sockaddr *addr = p;
316         int status;
317         u8 mac[ETH_ALEN];
318         u32 old_pmac_id = adapter->pmac_id[0];
319
320         if (!is_valid_ether_addr(addr->sa_data))
321                 return -EADDRNOTAVAIL;
322
323         /* Proceed further only if, User provided MAC is different
324          * from active MAC
325          */
326         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327                 return 0;
328
329         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330          * address
331          */
332         if (BEx_chip(adapter) && be_virtfn(adapter) &&
333             !check_privilege(adapter, BE_PRIV_FILTMGMT))
334                 return -EPERM;
335
336         /* if device is not running, copy MAC to netdev->dev_addr */
337         if (!netif_running(netdev))
338                 goto done;
339
340         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341          * privilege or if PF did not provision the new MAC address.
342          * On BE3, this cmd will always fail if the VF doesn't have the
343          * FILTMGMT privilege. This failure is OK, only if the PF programmed
344          * the MAC for the VF.
345          */
346         mutex_lock(&adapter->rx_filter_lock);
347         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348         if (!status) {
349
350                 /* Delete the old programmed MAC. This call may fail if the
351                  * old MAC was already deleted by the PF driver.
352                  */
353                 if (adapter->pmac_id[0] != old_pmac_id)
354                         be_dev_mac_del(adapter, old_pmac_id);
355         }
356
357         mutex_unlock(&adapter->rx_filter_lock);
358         /* Decide if the new MAC is successfully activated only after
359          * querying the FW
360          */
361         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362                                        adapter->if_handle, true, 0);
363         if (status)
364                 goto err;
365
366         /* The MAC change did not happen, either due to lack of privilege
367          * or PF didn't pre-provision.
368          */
369         if (!ether_addr_equal(addr->sa_data, mac)) {
370                 status = -EPERM;
371                 goto err;
372         }
373
374         /* Remember currently programmed MAC */
375         ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377         ether_addr_copy(netdev->dev_addr, addr->sa_data);
378         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379         return 0;
380 err:
381         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382         return status;
383 }
384
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388         if (BE2_chip(adapter)) {
389                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         } else if (BE3_chip(adapter)) {
393                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394
395                 return &cmd->hw_stats;
396         } else {
397                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398
399                 return &cmd->hw_stats;
400         }
401 }
402
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406         if (BE2_chip(adapter)) {
407                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         } else if (BE3_chip(adapter)) {
411                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412
413                 return &hw_stats->erx;
414         } else {
415                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416
417                 return &hw_stats->erx;
418         }
419 }
420
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426         struct be_port_rxf_stats_v0 *port_stats =
427                                         &rxf_stats->port[adapter->port_num];
428         struct be_drv_stats *drvs = &adapter->drv_stats;
429
430         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431         drvs->rx_pause_frames = port_stats->rx_pause_frames;
432         drvs->rx_crc_errors = port_stats->rx_crc_errors;
433         drvs->rx_control_frames = port_stats->rx_control_frames;
434         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446         drvs->rx_dropped_header_too_small =
447                 port_stats->rx_dropped_header_too_small;
448         drvs->rx_address_filtered =
449                                         port_stats->rx_address_filtered +
450                                         port_stats->rx_vlan_filtered;
451         drvs->rx_alignment_symbol_errors =
452                 port_stats->rx_alignment_symbol_errors;
453
454         drvs->tx_pauseframes = port_stats->tx_pauseframes;
455         drvs->tx_controlframes = port_stats->tx_controlframes;
456
457         if (adapter->port_num)
458                 drvs->jabber_events = rxf_stats->port1_jabber_events;
459         else
460                 drvs->jabber_events = rxf_stats->port0_jabber_events;
461         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463         drvs->forwarded_packets = rxf_stats->forwarded_packets;
464         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475         struct be_port_rxf_stats_v1 *port_stats =
476                                         &rxf_stats->port[adapter->port_num];
477         struct be_drv_stats *drvs = &adapter->drv_stats;
478
479         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482         drvs->rx_pause_frames = port_stats->rx_pause_frames;
483         drvs->rx_crc_errors = port_stats->rx_crc_errors;
484         drvs->rx_control_frames = port_stats->rx_control_frames;
485         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495         drvs->rx_dropped_header_too_small =
496                 port_stats->rx_dropped_header_too_small;
497         drvs->rx_input_fifo_overflow_drop =
498                 port_stats->rx_input_fifo_overflow_drop;
499         drvs->rx_address_filtered = port_stats->rx_address_filtered;
500         drvs->rx_alignment_symbol_errors =
501                 port_stats->rx_alignment_symbol_errors;
502         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503         drvs->tx_pauseframes = port_stats->tx_pauseframes;
504         drvs->tx_controlframes = port_stats->tx_controlframes;
505         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506         drvs->jabber_events = port_stats->jabber_events;
507         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509         drvs->forwarded_packets = rxf_stats->forwarded_packets;
510         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521         struct be_port_rxf_stats_v2 *port_stats =
522                                         &rxf_stats->port[adapter->port_num];
523         struct be_drv_stats *drvs = &adapter->drv_stats;
524
525         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528         drvs->rx_pause_frames = port_stats->rx_pause_frames;
529         drvs->rx_crc_errors = port_stats->rx_crc_errors;
530         drvs->rx_control_frames = port_stats->rx_control_frames;
531         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541         drvs->rx_dropped_header_too_small =
542                 port_stats->rx_dropped_header_too_small;
543         drvs->rx_input_fifo_overflow_drop =
544                 port_stats->rx_input_fifo_overflow_drop;
545         drvs->rx_address_filtered = port_stats->rx_address_filtered;
546         drvs->rx_alignment_symbol_errors =
547                 port_stats->rx_alignment_symbol_errors;
548         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549         drvs->tx_pauseframes = port_stats->tx_pauseframes;
550         drvs->tx_controlframes = port_stats->tx_controlframes;
551         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552         drvs->jabber_events = port_stats->jabber_events;
553         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555         drvs->forwarded_packets = rxf_stats->forwarded_packets;
556         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560         if (be_roce_supported(adapter)) {
561                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563                 drvs->rx_roce_frames = port_stats->roce_frames_received;
564                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
565                 drvs->roce_drops_payload_len =
566                         port_stats->roce_drops_payload_len;
567         }
568 }
569
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572         struct be_drv_stats *drvs = &adapter->drv_stats;
573         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574
575         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585         drvs->rx_dropped_tcp_length =
586                                 pport_stats->rx_dropped_invalid_tcp_length;
587         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590         drvs->rx_dropped_header_too_small =
591                                 pport_stats->rx_dropped_header_too_small;
592         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->rx_address_filtered =
594                                         pport_stats->rx_address_filtered +
595                                         pport_stats->rx_vlan_filtered;
596         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600         drvs->jabber_events = pport_stats->rx_jabbers;
601         drvs->forwarded_packets = pport_stats->num_forwards_lo;
602         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603         drvs->rx_drops_too_many_frags =
604                                 pport_stats->rx_drops_too_many_frags_lo;
605 }
606
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x)                   (x & 0xFFFF)
610 #define hi(x)                   (x & 0xFFFF0000)
611         bool wrapped = val < lo(*acc);
612         u32 newacc = hi(*acc) + val;
613
614         if (wrapped)
615                 newacc += 65536;
616         WRITE_ONCE(*acc, newacc);
617 }
618
619 static void populate_erx_stats(struct be_adapter *adapter,
620                                struct be_rx_obj *rxo, u32 erx_stat)
621 {
622         if (!BEx_chip(adapter))
623                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624         else
625                 /* below erx HW counter can actually wrap around after
626                  * 65535. Driver accumulates a 32-bit value
627                  */
628                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629                                      (u16)erx_stat);
630 }
631
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635         struct be_rx_obj *rxo;
636         int i;
637         u32 erx_stat;
638
639         if (lancer_chip(adapter)) {
640                 populate_lancer_stats(adapter);
641         } else {
642                 if (BE2_chip(adapter))
643                         populate_be_v0_stats(adapter);
644                 else if (BE3_chip(adapter))
645                         /* for BE3 */
646                         populate_be_v1_stats(adapter);
647                 else
648                         populate_be_v2_stats(adapter);
649
650                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651                 for_all_rx_queues(adapter, rxo, i) {
652                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653                         populate_erx_stats(adapter, rxo, erx_stat);
654                 }
655         }
656 }
657
658 static void be_get_stats64(struct net_device *netdev,
659                            struct rtnl_link_stats64 *stats)
660 {
661         struct be_adapter *adapter = netdev_priv(netdev);
662         struct be_drv_stats *drvs = &adapter->drv_stats;
663         struct be_rx_obj *rxo;
664         struct be_tx_obj *txo;
665         u64 pkts, bytes;
666         unsigned int start;
667         int i;
668
669         for_all_rx_queues(adapter, rxo, i) {
670                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
671
672                 do {
673                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674                         pkts = rx_stats(rxo)->rx_pkts;
675                         bytes = rx_stats(rxo)->rx_bytes;
676                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677                 stats->rx_packets += pkts;
678                 stats->rx_bytes += bytes;
679                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681                                         rx_stats(rxo)->rx_drops_no_frags;
682         }
683
684         for_all_tx_queues(adapter, txo, i) {
685                 const struct be_tx_stats *tx_stats = tx_stats(txo);
686
687                 do {
688                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689                         pkts = tx_stats(txo)->tx_pkts;
690                         bytes = tx_stats(txo)->tx_bytes;
691                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692                 stats->tx_packets += pkts;
693                 stats->tx_bytes += bytes;
694         }
695
696         /* bad pkts received */
697         stats->rx_errors = drvs->rx_crc_errors +
698                 drvs->rx_alignment_symbol_errors +
699                 drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long +
702                 drvs->rx_dropped_too_small +
703                 drvs->rx_dropped_too_short +
704                 drvs->rx_dropped_header_too_small +
705                 drvs->rx_dropped_tcp_length +
706                 drvs->rx_dropped_runt;
707
708         /* detailed rx errors */
709         stats->rx_length_errors = drvs->rx_in_range_errors +
710                 drvs->rx_out_range_errors +
711                 drvs->rx_frame_too_long;
712
713         stats->rx_crc_errors = drvs->rx_crc_errors;
714
715         /* frame alignment errors */
716         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717
718         /* receiver fifo overrun */
719         /* drops_no_pbuf is no per i/f, it's per BE card */
720         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721                                 drvs->rx_input_fifo_overflow_drop +
722                                 drvs->rx_drops_no_pbuf;
723 }
724
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727         struct net_device *netdev = adapter->netdev;
728
729         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730                 netif_carrier_off(netdev);
731                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732         }
733
734         if (link_status)
735                 netif_carrier_on(netdev);
736         else
737                 netif_carrier_off(netdev);
738
739         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744         if (skb->encapsulation)
745                 return skb_inner_transport_offset(skb) +
746                        inner_tcp_hdrlen(skb);
747         return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752         struct be_tx_stats *stats = tx_stats(txo);
753         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754         /* Account for headers which get duplicated in TSO pkt */
755         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756
757         u64_stats_update_begin(&stats->sync);
758         stats->tx_reqs++;
759         stats->tx_bytes += skb->len + dup_hdr_len;
760         stats->tx_pkts += tx_pkts;
761         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762                 stats->tx_vxlan_offload_pkts += tx_pkts;
763         u64_stats_update_end(&stats->sync);
764 }
765
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769         /* +1 for the header wrb */
770         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778         wrb->rsvd0 = 0;
779 }
780
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782  * to avoid the swap and shift/mask operations in wrb_fill().
783  */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786         wrb->frag_pa_hi = 0;
787         wrb->frag_pa_lo = 0;
788         wrb->frag_len = 0;
789         wrb->rsvd0 = 0;
790 }
791
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793                                      struct sk_buff *skb)
794 {
795         u8 vlan_prio;
796         u16 vlan_tag;
797
798         vlan_tag = skb_vlan_tag_get(skb);
799         vlan_prio = skb_vlan_tag_get_prio(skb);
800         /* If vlan priority provided by OS is NOT in available bmap */
801         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803                                 adapter->recommended_prio_bits;
804
805         return vlan_tag;
806 }
807
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811         return (inner_ip_hdr(skb)->version == 4) ?
812                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817         return (ip_hdr(skb)->version == 4) ?
818                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837                                        struct sk_buff *skb,
838                                        struct be_wrb_params *wrb_params)
839 {
840         u16 proto;
841
842         if (skb_is_gso(skb)) {
843                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
844                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848                 if (skb->encapsulation) {
849                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850                         proto = skb_inner_ip_proto(skb);
851                 } else {
852                         proto = skb_ip_proto(skb);
853                 }
854                 if (proto == IPPROTO_TCP)
855                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856                 else if (proto == IPPROTO_UDP)
857                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858         }
859
860         if (skb_vlan_tag_present(skb)) {
861                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863         }
864
865         BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869                          struct be_eth_hdr_wrb *hdr,
870                          struct be_wrb_params *wrb_params,
871                          struct sk_buff *skb)
872 {
873         memset(hdr, 0, sizeof(*hdr));
874
875         SET_TX_WRB_HDR_BITS(crc, hdr,
876                             BE_WRB_F_GET(wrb_params->features, CRC));
877         SET_TX_WRB_HDR_BITS(ipcs, hdr,
878                             BE_WRB_F_GET(wrb_params->features, IPCS));
879         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880                             BE_WRB_F_GET(wrb_params->features, TCPCS));
881         SET_TX_WRB_HDR_BITS(udpcs, hdr,
882                             BE_WRB_F_GET(wrb_params->features, UDPCS));
883
884         SET_TX_WRB_HDR_BITS(lso, hdr,
885                             BE_WRB_F_GET(wrb_params->features, LSO));
886         SET_TX_WRB_HDR_BITS(lso6, hdr,
887                             BE_WRB_F_GET(wrb_params->features, LSO6));
888         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889
890         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891          * hack is not needed, the evt bit is set while ringing DB.
892          */
893         SET_TX_WRB_HDR_BITS(event, hdr,
894                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895         SET_TX_WRB_HDR_BITS(vlan, hdr,
896                             BE_WRB_F_GET(wrb_params->features, VLAN));
897         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898
899         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901         SET_TX_WRB_HDR_BITS(mgmt, hdr,
902                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906                           bool unmap_single)
907 {
908         dma_addr_t dma;
909         u32 frag_len = le32_to_cpu(wrb->frag_len);
910
911
912         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913                 (u64)le32_to_cpu(wrb->frag_pa_lo);
914         if (frag_len) {
915                 if (unmap_single)
916                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917                 else
918                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919         }
920 }
921
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925         u32 head = txo->q.head;
926
927         queue_head_inc(&txo->q);
928         return head;
929 }
930
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933                                 struct be_tx_obj *txo,
934                                 struct be_wrb_params *wrb_params,
935                                 struct sk_buff *skb, u16 head)
936 {
937         u32 num_frags = skb_wrb_cnt(skb);
938         struct be_queue_info *txq = &txo->q;
939         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940
941         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942         be_dws_cpu_to_le(hdr, sizeof(*hdr));
943
944         BUG_ON(txo->sent_skb_list[head]);
945         txo->sent_skb_list[head] = skb;
946         txo->last_req_hdr = head;
947         atomic_add(num_frags, &txq->used);
948         txo->last_req_wrb_cnt = num_frags;
949         txo->pend_wrb_cnt += num_frags;
950 }
951
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954                                  int len)
955 {
956         struct be_eth_wrb *wrb;
957         struct be_queue_info *txq = &txo->q;
958
959         wrb = queue_head_node(txq);
960         wrb_fill(wrb, busaddr, len);
961         queue_head_inc(txq);
962 }
963
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965  * was invoked. The producer index is restored to the previous packet and the
966  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967  */
968 static void be_xmit_restore(struct be_adapter *adapter,
969                             struct be_tx_obj *txo, u32 head, bool map_single,
970                             u32 copied)
971 {
972         struct device *dev;
973         struct be_eth_wrb *wrb;
974         struct be_queue_info *txq = &txo->q;
975
976         dev = &adapter->pdev->dev;
977         txq->head = head;
978
979         /* skip the first wrb (hdr); it's not mapped */
980         queue_head_inc(txq);
981         while (copied) {
982                 wrb = queue_head_node(txq);
983                 unmap_tx_frag(dev, wrb, map_single);
984                 map_single = false;
985                 copied -= le32_to_cpu(wrb->frag_len);
986                 queue_head_inc(txq);
987         }
988
989         txq->head = head;
990 }
991
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994  * of WRBs used up by the packet.
995  */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997                            struct sk_buff *skb,
998                            struct be_wrb_params *wrb_params)
999 {
1000         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001         struct device *dev = &adapter->pdev->dev;
1002         bool map_single = false;
1003         u32 head;
1004         dma_addr_t busaddr;
1005         int len;
1006
1007         head = be_tx_get_wrb_hdr(txo);
1008
1009         if (skb->len > skb->data_len) {
1010                 len = skb_headlen(skb);
1011
1012                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013                 if (dma_mapping_error(dev, busaddr))
1014                         goto dma_err;
1015                 map_single = true;
1016                 be_tx_setup_wrb_frag(txo, busaddr, len);
1017                 copied += len;
1018         }
1019
1020         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022                 len = skb_frag_size(frag);
1023
1024                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025                 if (dma_mapping_error(dev, busaddr))
1026                         goto dma_err;
1027                 be_tx_setup_wrb_frag(txo, busaddr, len);
1028                 copied += len;
1029         }
1030
1031         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033         be_tx_stats_update(txo, skb);
1034         return wrb_cnt;
1035
1036 dma_err:
1037         adapter->drv_stats.dma_map_errors++;
1038         be_xmit_restore(adapter, txo, head, map_single, copied);
1039         return 0;
1040 }
1041
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048                                              struct sk_buff *skb,
1049                                              struct be_wrb_params
1050                                              *wrb_params)
1051 {
1052         bool insert_vlan = false;
1053         u16 vlan_tag = 0;
1054
1055         skb = skb_share_check(skb, GFP_ATOMIC);
1056         if (unlikely(!skb))
1057                 return skb;
1058
1059         if (skb_vlan_tag_present(skb)) {
1060                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1061                 insert_vlan = true;
1062         }
1063
1064         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1065                 if (!insert_vlan) {
1066                         vlan_tag = adapter->pvid;
1067                         insert_vlan = true;
1068                 }
1069                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1070                  * skip VLAN insertion
1071                  */
1072                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1073         }
1074
1075         if (insert_vlan) {
1076                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1077                                                 vlan_tag);
1078                 if (unlikely(!skb))
1079                         return skb;
1080                 __vlan_hwaccel_clear_tag(skb);
1081         }
1082
1083         /* Insert the outer VLAN, if any */
1084         if (adapter->qnq_vid) {
1085                 vlan_tag = adapter->qnq_vid;
1086                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1087                                                 vlan_tag);
1088                 if (unlikely(!skb))
1089                         return skb;
1090                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1091         }
1092
1093         return skb;
1094 }
1095
1096 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1097 {
1098         struct ethhdr *eh = (struct ethhdr *)skb->data;
1099         u16 offset = ETH_HLEN;
1100
1101         if (eh->h_proto == htons(ETH_P_IPV6)) {
1102                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1103
1104                 offset += sizeof(struct ipv6hdr);
1105                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1106                     ip6h->nexthdr != NEXTHDR_UDP) {
1107                         struct ipv6_opt_hdr *ehdr =
1108                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1109
1110                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1111                         if (ehdr->hdrlen == 0xff)
1112                                 return true;
1113                 }
1114         }
1115         return false;
1116 }
1117
1118 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1121 }
1122
1123 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1124 {
1125         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1126 }
1127
1128 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1129                                                   struct sk_buff *skb,
1130                                                   struct be_wrb_params
1131                                                   *wrb_params)
1132 {
1133         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1134         unsigned int eth_hdr_len;
1135         struct iphdr *ip;
1136
1137         /* For padded packets, BE HW modifies tot_len field in IP header
1138          * incorrecly when VLAN tag is inserted by HW.
1139          * For padded packets, Lancer computes incorrect checksum.
1140          */
1141         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1142                                                 VLAN_ETH_HLEN : ETH_HLEN;
1143         if (skb->len <= 60 &&
1144             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1145             is_ipv4_pkt(skb)) {
1146                 ip = (struct iphdr *)ip_hdr(skb);
1147                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1148         }
1149
1150         /* If vlan tag is already inlined in the packet, skip HW VLAN
1151          * tagging in pvid-tagging mode
1152          */
1153         if (be_pvid_tagging_enabled(adapter) &&
1154             veh->h_vlan_proto == htons(ETH_P_8021Q))
1155                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1156
1157         /* HW has a bug wherein it will calculate CSUM for VLAN
1158          * pkts even though it is disabled.
1159          * Manually insert VLAN in pkt.
1160          */
1161         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1162             skb_vlan_tag_present(skb)) {
1163                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1164                 if (unlikely(!skb))
1165                         goto err;
1166         }
1167
1168         /* HW may lockup when VLAN HW tagging is requested on
1169          * certain ipv6 packets. Drop such pkts if the HW workaround to
1170          * skip HW tagging is not enabled by FW.
1171          */
1172         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1173                      (adapter->pvid || adapter->qnq_vid) &&
1174                      !qnq_async_evt_rcvd(adapter)))
1175                 goto tx_drop;
1176
1177         /* Manual VLAN tag insertion to prevent:
1178          * ASIC lockup when the ASIC inserts VLAN tag into
1179          * certain ipv6 packets. Insert VLAN tags in driver,
1180          * and set event, completion, vlan bits accordingly
1181          * in the Tx WRB.
1182          */
1183         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1184             be_vlan_tag_tx_chk(adapter, skb)) {
1185                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1186                 if (unlikely(!skb))
1187                         goto err;
1188         }
1189
1190         return skb;
1191 tx_drop:
1192         dev_kfree_skb_any(skb);
1193 err:
1194         return NULL;
1195 }
1196
1197 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1198                                            struct sk_buff *skb,
1199                                            struct be_wrb_params *wrb_params)
1200 {
1201         int err;
1202
1203         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1204          * packets that are 32b or less may cause a transmit stall
1205          * on that port. The workaround is to pad such packets
1206          * (len <= 32 bytes) to a minimum length of 36b.
1207          */
1208         if (skb->len <= 32) {
1209                 if (skb_put_padto(skb, 36))
1210                         return NULL;
1211         }
1212
1213         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1214                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1215                 if (!skb)
1216                         return NULL;
1217         }
1218
1219         /* The stack can send us skbs with length greater than
1220          * what the HW can handle. Trim the extra bytes.
1221          */
1222         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1223         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1224         WARN_ON(err);
1225
1226         return skb;
1227 }
1228
1229 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1230 {
1231         struct be_queue_info *txq = &txo->q;
1232         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1233
1234         /* Mark the last request eventable if it hasn't been marked already */
1235         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1236                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1237
1238         /* compose a dummy wrb if there are odd set of wrbs to notify */
1239         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1240                 wrb_fill_dummy(queue_head_node(txq));
1241                 queue_head_inc(txq);
1242                 atomic_inc(&txq->used);
1243                 txo->pend_wrb_cnt++;
1244                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1245                                            TX_HDR_WRB_NUM_SHIFT);
1246                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1247                                           TX_HDR_WRB_NUM_SHIFT);
1248         }
1249         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1250         txo->pend_wrb_cnt = 0;
1251 }
1252
1253 /* OS2BMC related */
1254
1255 #define DHCP_CLIENT_PORT        68
1256 #define DHCP_SERVER_PORT        67
1257 #define NET_BIOS_PORT1          137
1258 #define NET_BIOS_PORT2          138
1259 #define DHCPV6_RAS_PORT         547
1260
1261 #define is_mc_allowed_on_bmc(adapter, eh)       \
1262         (!is_multicast_filt_enabled(adapter) && \
1263          is_multicast_ether_addr(eh->h_dest) && \
1264          !is_broadcast_ether_addr(eh->h_dest))
1265
1266 #define is_bc_allowed_on_bmc(adapter, eh)       \
1267         (!is_broadcast_filt_enabled(adapter) && \
1268          is_broadcast_ether_addr(eh->h_dest))
1269
1270 #define is_arp_allowed_on_bmc(adapter, skb)     \
1271         (is_arp(skb) && is_arp_filt_enabled(adapter))
1272
1273 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1274
1275 #define is_arp_filt_enabled(adapter)    \
1276                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1277
1278 #define is_dhcp_client_filt_enabled(adapter)    \
1279                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1280
1281 #define is_dhcp_srvr_filt_enabled(adapter)      \
1282                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1283
1284 #define is_nbios_filt_enabled(adapter)  \
1285                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1286
1287 #define is_ipv6_na_filt_enabled(adapter)        \
1288                 (adapter->bmc_filt_mask &       \
1289                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1290
1291 #define is_ipv6_ra_filt_enabled(adapter)        \
1292                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1293
1294 #define is_ipv6_ras_filt_enabled(adapter)       \
1295                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1296
1297 #define is_broadcast_filt_enabled(adapter)      \
1298                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1299
1300 #define is_multicast_filt_enabled(adapter)      \
1301                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1302
1303 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1304                                struct sk_buff **skb)
1305 {
1306         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1307         bool os2bmc = false;
1308
1309         if (!be_is_os2bmc_enabled(adapter))
1310                 goto done;
1311
1312         if (!is_multicast_ether_addr(eh->h_dest))
1313                 goto done;
1314
1315         if (is_mc_allowed_on_bmc(adapter, eh) ||
1316             is_bc_allowed_on_bmc(adapter, eh) ||
1317             is_arp_allowed_on_bmc(adapter, (*skb))) {
1318                 os2bmc = true;
1319                 goto done;
1320         }
1321
1322         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1323                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1324                 u8 nexthdr = hdr->nexthdr;
1325
1326                 if (nexthdr == IPPROTO_ICMPV6) {
1327                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1328
1329                         switch (icmp6->icmp6_type) {
1330                         case NDISC_ROUTER_ADVERTISEMENT:
1331                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1332                                 goto done;
1333                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1334                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1335                                 goto done;
1336                         default:
1337                                 break;
1338                         }
1339                 }
1340         }
1341
1342         if (is_udp_pkt((*skb))) {
1343                 struct udphdr *udp = udp_hdr((*skb));
1344
1345                 switch (ntohs(udp->dest)) {
1346                 case DHCP_CLIENT_PORT:
1347                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1348                         goto done;
1349                 case DHCP_SERVER_PORT:
1350                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1351                         goto done;
1352                 case NET_BIOS_PORT1:
1353                 case NET_BIOS_PORT2:
1354                         os2bmc = is_nbios_filt_enabled(adapter);
1355                         goto done;
1356                 case DHCPV6_RAS_PORT:
1357                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1358                         goto done;
1359                 default:
1360                         break;
1361                 }
1362         }
1363 done:
1364         /* For packets over a vlan, which are destined
1365          * to BMC, asic expects the vlan to be inline in the packet.
1366          */
1367         if (os2bmc)
1368                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1369
1370         return os2bmc;
1371 }
1372
1373 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1374 {
1375         struct be_adapter *adapter = netdev_priv(netdev);
1376         u16 q_idx = skb_get_queue_mapping(skb);
1377         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1378         struct be_wrb_params wrb_params = { 0 };
1379         bool flush = !skb->xmit_more;
1380         u16 wrb_cnt;
1381
1382         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1383         if (unlikely(!skb))
1384                 goto drop;
1385
1386         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1387
1388         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1389         if (unlikely(!wrb_cnt)) {
1390                 dev_kfree_skb_any(skb);
1391                 goto drop;
1392         }
1393
1394         /* if os2bmc is enabled and if the pkt is destined to bmc,
1395          * enqueue the pkt a 2nd time with mgmt bit set.
1396          */
1397         if (be_send_pkt_to_bmc(adapter, &skb)) {
1398                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1399                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1400                 if (unlikely(!wrb_cnt))
1401                         goto drop;
1402                 else
1403                         skb_get(skb);
1404         }
1405
1406         if (be_is_txq_full(txo)) {
1407                 netif_stop_subqueue(netdev, q_idx);
1408                 tx_stats(txo)->tx_stops++;
1409         }
1410
1411         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1412                 be_xmit_flush(adapter, txo);
1413
1414         return NETDEV_TX_OK;
1415 drop:
1416         tx_stats(txo)->tx_drv_drops++;
1417         /* Flush the already enqueued tx requests */
1418         if (flush && txo->pend_wrb_cnt)
1419                 be_xmit_flush(adapter, txo);
1420
1421         return NETDEV_TX_OK;
1422 }
1423
1424 static void be_tx_timeout(struct net_device *netdev)
1425 {
1426         struct be_adapter *adapter = netdev_priv(netdev);
1427         struct device *dev = &adapter->pdev->dev;
1428         struct be_tx_obj *txo;
1429         struct sk_buff *skb;
1430         struct tcphdr *tcphdr;
1431         struct udphdr *udphdr;
1432         u32 *entry;
1433         int status;
1434         int i, j;
1435
1436         for_all_tx_queues(adapter, txo, i) {
1437                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1438                          i, txo->q.head, txo->q.tail,
1439                          atomic_read(&txo->q.used), txo->q.id);
1440
1441                 entry = txo->q.dma_mem.va;
1442                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1443                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1444                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1445                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1446                                          j, entry[j], entry[j + 1],
1447                                          entry[j + 2], entry[j + 3]);
1448                         }
1449                 }
1450
1451                 entry = txo->cq.dma_mem.va;
1452                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1453                          i, txo->cq.head, txo->cq.tail,
1454                          atomic_read(&txo->cq.used));
1455                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1456                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1457                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1458                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1459                                          j, entry[j], entry[j + 1],
1460                                          entry[j + 2], entry[j + 3]);
1461                         }
1462                 }
1463
1464                 for (j = 0; j < TX_Q_LEN; j++) {
1465                         if (txo->sent_skb_list[j]) {
1466                                 skb = txo->sent_skb_list[j];
1467                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1468                                         tcphdr = tcp_hdr(skb);
1469                                         dev_info(dev, "TCP source port %d\n",
1470                                                  ntohs(tcphdr->source));
1471                                         dev_info(dev, "TCP dest port %d\n",
1472                                                  ntohs(tcphdr->dest));
1473                                         dev_info(dev, "TCP sequence num %d\n",
1474                                                  ntohs(tcphdr->seq));
1475                                         dev_info(dev, "TCP ack_seq %d\n",
1476                                                  ntohs(tcphdr->ack_seq));
1477                                 } else if (ip_hdr(skb)->protocol ==
1478                                            IPPROTO_UDP) {
1479                                         udphdr = udp_hdr(skb);
1480                                         dev_info(dev, "UDP source port %d\n",
1481                                                  ntohs(udphdr->source));
1482                                         dev_info(dev, "UDP dest port %d\n",
1483                                                  ntohs(udphdr->dest));
1484                                 }
1485                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1486                                          j, skb, skb->len, skb->protocol);
1487                         }
1488                 }
1489         }
1490
1491         if (lancer_chip(adapter)) {
1492                 dev_info(dev, "Initiating reset due to tx timeout\n");
1493                 dev_info(dev, "Resetting adapter\n");
1494                 status = lancer_physdev_ctrl(adapter,
1495                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1496                 if (status)
1497                         dev_err(dev, "Reset failed .. Reboot server\n");
1498         }
1499 }
1500
1501 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1502 {
1503         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1504                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1505 }
1506
1507 static int be_set_vlan_promisc(struct be_adapter *adapter)
1508 {
1509         struct device *dev = &adapter->pdev->dev;
1510         int status;
1511
1512         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1513                 return 0;
1514
1515         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1516         if (!status) {
1517                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1518                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1519         } else {
1520                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1521         }
1522         return status;
1523 }
1524
1525 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1526 {
1527         struct device *dev = &adapter->pdev->dev;
1528         int status;
1529
1530         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1531         if (!status) {
1532                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1533                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1534         }
1535         return status;
1536 }
1537
1538 /*
1539  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1540  * If the user configures more, place BE in vlan promiscuous mode.
1541  */
1542 static int be_vid_config(struct be_adapter *adapter)
1543 {
1544         struct device *dev = &adapter->pdev->dev;
1545         u16 vids[BE_NUM_VLANS_SUPPORTED];
1546         u16 num = 0, i = 0;
1547         int status = 0;
1548
1549         /* No need to change the VLAN state if the I/F is in promiscuous */
1550         if (adapter->netdev->flags & IFF_PROMISC)
1551                 return 0;
1552
1553         if (adapter->vlans_added > be_max_vlans(adapter))
1554                 return be_set_vlan_promisc(adapter);
1555
1556         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1557                 status = be_clear_vlan_promisc(adapter);
1558                 if (status)
1559                         return status;
1560         }
1561         /* Construct VLAN Table to give to HW */
1562         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1563                 vids[num++] = cpu_to_le16(i);
1564
1565         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1566         if (status) {
1567                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1568                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1569                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1570                     addl_status(status) ==
1571                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1572                         return be_set_vlan_promisc(adapter);
1573         }
1574         return status;
1575 }
1576
1577 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1578 {
1579         struct be_adapter *adapter = netdev_priv(netdev);
1580         int status = 0;
1581
1582         mutex_lock(&adapter->rx_filter_lock);
1583
1584         /* Packets with VID 0 are always received by Lancer by default */
1585         if (lancer_chip(adapter) && vid == 0)
1586                 goto done;
1587
1588         if (test_bit(vid, adapter->vids))
1589                 goto done;
1590
1591         set_bit(vid, adapter->vids);
1592         adapter->vlans_added++;
1593
1594         status = be_vid_config(adapter);
1595 done:
1596         mutex_unlock(&adapter->rx_filter_lock);
1597         return status;
1598 }
1599
1600 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1601 {
1602         struct be_adapter *adapter = netdev_priv(netdev);
1603         int status = 0;
1604
1605         mutex_lock(&adapter->rx_filter_lock);
1606
1607         /* Packets with VID 0 are always received by Lancer by default */
1608         if (lancer_chip(adapter) && vid == 0)
1609                 goto done;
1610
1611         if (!test_bit(vid, adapter->vids))
1612                 goto done;
1613
1614         clear_bit(vid, adapter->vids);
1615         adapter->vlans_added--;
1616
1617         status = be_vid_config(adapter);
1618 done:
1619         mutex_unlock(&adapter->rx_filter_lock);
1620         return status;
1621 }
1622
1623 static void be_set_all_promisc(struct be_adapter *adapter)
1624 {
1625         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1626         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1627 }
1628
1629 static void be_set_mc_promisc(struct be_adapter *adapter)
1630 {
1631         int status;
1632
1633         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1634                 return;
1635
1636         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1637         if (!status)
1638                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1639 }
1640
1641 static void be_set_uc_promisc(struct be_adapter *adapter)
1642 {
1643         int status;
1644
1645         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1646                 return;
1647
1648         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1649         if (!status)
1650                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1651 }
1652
1653 static void be_clear_uc_promisc(struct be_adapter *adapter)
1654 {
1655         int status;
1656
1657         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1658                 return;
1659
1660         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1661         if (!status)
1662                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1663 }
1664
1665 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1666  * We use a single callback function for both sync and unsync. We really don't
1667  * add/remove addresses through this callback. But, we use it to detect changes
1668  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1669  */
1670 static int be_uc_list_update(struct net_device *netdev,
1671                              const unsigned char *addr)
1672 {
1673         struct be_adapter *adapter = netdev_priv(netdev);
1674
1675         adapter->update_uc_list = true;
1676         return 0;
1677 }
1678
1679 static int be_mc_list_update(struct net_device *netdev,
1680                              const unsigned char *addr)
1681 {
1682         struct be_adapter *adapter = netdev_priv(netdev);
1683
1684         adapter->update_mc_list = true;
1685         return 0;
1686 }
1687
1688 static void be_set_mc_list(struct be_adapter *adapter)
1689 {
1690         struct net_device *netdev = adapter->netdev;
1691         struct netdev_hw_addr *ha;
1692         bool mc_promisc = false;
1693         int status;
1694
1695         netif_addr_lock_bh(netdev);
1696         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1697
1698         if (netdev->flags & IFF_PROMISC) {
1699                 adapter->update_mc_list = false;
1700         } else if (netdev->flags & IFF_ALLMULTI ||
1701                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1702                 /* Enable multicast promisc if num configured exceeds
1703                  * what we support
1704                  */
1705                 mc_promisc = true;
1706                 adapter->update_mc_list = false;
1707         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1708                 /* Update mc-list unconditionally if the iface was previously
1709                  * in mc-promisc mode and now is out of that mode.
1710                  */
1711                 adapter->update_mc_list = true;
1712         }
1713
1714         if (adapter->update_mc_list) {
1715                 int i = 0;
1716
1717                 /* cache the mc-list in adapter */
1718                 netdev_for_each_mc_addr(ha, netdev) {
1719                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1720                         i++;
1721                 }
1722                 adapter->mc_count = netdev_mc_count(netdev);
1723         }
1724         netif_addr_unlock_bh(netdev);
1725
1726         if (mc_promisc) {
1727                 be_set_mc_promisc(adapter);
1728         } else if (adapter->update_mc_list) {
1729                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1730                 if (!status)
1731                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1732                 else
1733                         be_set_mc_promisc(adapter);
1734
1735                 adapter->update_mc_list = false;
1736         }
1737 }
1738
1739 static void be_clear_mc_list(struct be_adapter *adapter)
1740 {
1741         struct net_device *netdev = adapter->netdev;
1742
1743         __dev_mc_unsync(netdev, NULL);
1744         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1745         adapter->mc_count = 0;
1746 }
1747
1748 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1749 {
1750         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1751                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1752                 return 0;
1753         }
1754
1755         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1756                                adapter->if_handle,
1757                                &adapter->pmac_id[uc_idx + 1], 0);
1758 }
1759
1760 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1761 {
1762         if (pmac_id == adapter->pmac_id[0])
1763                 return;
1764
1765         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1766 }
1767
1768 static void be_set_uc_list(struct be_adapter *adapter)
1769 {
1770         struct net_device *netdev = adapter->netdev;
1771         struct netdev_hw_addr *ha;
1772         bool uc_promisc = false;
1773         int curr_uc_macs = 0, i;
1774
1775         netif_addr_lock_bh(netdev);
1776         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1777
1778         if (netdev->flags & IFF_PROMISC) {
1779                 adapter->update_uc_list = false;
1780         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1781                 uc_promisc = true;
1782                 adapter->update_uc_list = false;
1783         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1784                 /* Update uc-list unconditionally if the iface was previously
1785                  * in uc-promisc mode and now is out of that mode.
1786                  */
1787                 adapter->update_uc_list = true;
1788         }
1789
1790         if (adapter->update_uc_list) {
1791                 /* cache the uc-list in adapter array */
1792                 i = 0;
1793                 netdev_for_each_uc_addr(ha, netdev) {
1794                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1795                         i++;
1796                 }
1797                 curr_uc_macs = netdev_uc_count(netdev);
1798         }
1799         netif_addr_unlock_bh(netdev);
1800
1801         if (uc_promisc) {
1802                 be_set_uc_promisc(adapter);
1803         } else if (adapter->update_uc_list) {
1804                 be_clear_uc_promisc(adapter);
1805
1806                 for (i = 0; i < adapter->uc_macs; i++)
1807                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1808
1809                 for (i = 0; i < curr_uc_macs; i++)
1810                         be_uc_mac_add(adapter, i);
1811                 adapter->uc_macs = curr_uc_macs;
1812                 adapter->update_uc_list = false;
1813         }
1814 }
1815
1816 static void be_clear_uc_list(struct be_adapter *adapter)
1817 {
1818         struct net_device *netdev = adapter->netdev;
1819         int i;
1820
1821         __dev_uc_unsync(netdev, NULL);
1822         for (i = 0; i < adapter->uc_macs; i++)
1823                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1824
1825         adapter->uc_macs = 0;
1826 }
1827
1828 static void __be_set_rx_mode(struct be_adapter *adapter)
1829 {
1830         struct net_device *netdev = adapter->netdev;
1831
1832         mutex_lock(&adapter->rx_filter_lock);
1833
1834         if (netdev->flags & IFF_PROMISC) {
1835                 if (!be_in_all_promisc(adapter))
1836                         be_set_all_promisc(adapter);
1837         } else if (be_in_all_promisc(adapter)) {
1838                 /* We need to re-program the vlan-list or clear
1839                  * vlan-promisc mode (if needed) when the interface
1840                  * comes out of promisc mode.
1841                  */
1842                 be_vid_config(adapter);
1843         }
1844
1845         be_set_uc_list(adapter);
1846         be_set_mc_list(adapter);
1847
1848         mutex_unlock(&adapter->rx_filter_lock);
1849 }
1850
1851 static void be_work_set_rx_mode(struct work_struct *work)
1852 {
1853         struct be_cmd_work *cmd_work =
1854                                 container_of(work, struct be_cmd_work, work);
1855
1856         __be_set_rx_mode(cmd_work->adapter);
1857         kfree(cmd_work);
1858 }
1859
1860 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1861 {
1862         struct be_adapter *adapter = netdev_priv(netdev);
1863         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1864         int status;
1865
1866         if (!sriov_enabled(adapter))
1867                 return -EPERM;
1868
1869         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1870                 return -EINVAL;
1871
1872         /* Proceed further only if user provided MAC is different
1873          * from active MAC
1874          */
1875         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1876                 return 0;
1877
1878         if (BEx_chip(adapter)) {
1879                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1880                                 vf + 1);
1881
1882                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1883                                          &vf_cfg->pmac_id, vf + 1);
1884         } else {
1885                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1886                                         vf + 1);
1887         }
1888
1889         if (status) {
1890                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1891                         mac, vf, status);
1892                 return be_cmd_status(status);
1893         }
1894
1895         ether_addr_copy(vf_cfg->mac_addr, mac);
1896
1897         return 0;
1898 }
1899
1900 static int be_get_vf_config(struct net_device *netdev, int vf,
1901                             struct ifla_vf_info *vi)
1902 {
1903         struct be_adapter *adapter = netdev_priv(netdev);
1904         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1905
1906         if (!sriov_enabled(adapter))
1907                 return -EPERM;
1908
1909         if (vf >= adapter->num_vfs)
1910                 return -EINVAL;
1911
1912         vi->vf = vf;
1913         vi->max_tx_rate = vf_cfg->tx_rate;
1914         vi->min_tx_rate = 0;
1915         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1916         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1917         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1918         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1919         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1920
1921         return 0;
1922 }
1923
1924 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1925 {
1926         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1927         u16 vids[BE_NUM_VLANS_SUPPORTED];
1928         int vf_if_id = vf_cfg->if_handle;
1929         int status;
1930
1931         /* Enable Transparent VLAN Tagging */
1932         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1933         if (status)
1934                 return status;
1935
1936         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1937         vids[0] = 0;
1938         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1939         if (!status)
1940                 dev_info(&adapter->pdev->dev,
1941                          "Cleared guest VLANs on VF%d", vf);
1942
1943         /* After TVT is enabled, disallow VFs to program VLAN filters */
1944         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1945                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1946                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1947                 if (!status)
1948                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1949         }
1950         return 0;
1951 }
1952
1953 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1954 {
1955         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1956         struct device *dev = &adapter->pdev->dev;
1957         int status;
1958
1959         /* Reset Transparent VLAN Tagging. */
1960         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1961                                        vf_cfg->if_handle, 0, 0);
1962         if (status)
1963                 return status;
1964
1965         /* Allow VFs to program VLAN filtering */
1966         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1967                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1968                                                   BE_PRIV_FILTMGMT, vf + 1);
1969                 if (!status) {
1970                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1971                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1972                 }
1973         }
1974
1975         dev_info(dev,
1976                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1977         return 0;
1978 }
1979
1980 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1981                           __be16 vlan_proto)
1982 {
1983         struct be_adapter *adapter = netdev_priv(netdev);
1984         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1985         int status;
1986
1987         if (!sriov_enabled(adapter))
1988                 return -EPERM;
1989
1990         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1991                 return -EINVAL;
1992
1993         if (vlan_proto != htons(ETH_P_8021Q))
1994                 return -EPROTONOSUPPORT;
1995
1996         if (vlan || qos) {
1997                 vlan |= qos << VLAN_PRIO_SHIFT;
1998                 status = be_set_vf_tvt(adapter, vf, vlan);
1999         } else {
2000                 status = be_clear_vf_tvt(adapter, vf);
2001         }
2002
2003         if (status) {
2004                 dev_err(&adapter->pdev->dev,
2005                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2006                         status);
2007                 return be_cmd_status(status);
2008         }
2009
2010         vf_cfg->vlan_tag = vlan;
2011         return 0;
2012 }
2013
2014 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2015                              int min_tx_rate, int max_tx_rate)
2016 {
2017         struct be_adapter *adapter = netdev_priv(netdev);
2018         struct device *dev = &adapter->pdev->dev;
2019         int percent_rate, status = 0;
2020         u16 link_speed = 0;
2021         u8 link_status;
2022
2023         if (!sriov_enabled(adapter))
2024                 return -EPERM;
2025
2026         if (vf >= adapter->num_vfs)
2027                 return -EINVAL;
2028
2029         if (min_tx_rate)
2030                 return -EINVAL;
2031
2032         if (!max_tx_rate)
2033                 goto config_qos;
2034
2035         status = be_cmd_link_status_query(adapter, &link_speed,
2036                                           &link_status, 0);
2037         if (status)
2038                 goto err;
2039
2040         if (!link_status) {
2041                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2042                 status = -ENETDOWN;
2043                 goto err;
2044         }
2045
2046         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2047                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2048                         link_speed);
2049                 status = -EINVAL;
2050                 goto err;
2051         }
2052
2053         /* On Skyhawk the QOS setting must be done only as a % value */
2054         percent_rate = link_speed / 100;
2055         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2056                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2057                         percent_rate);
2058                 status = -EINVAL;
2059                 goto err;
2060         }
2061
2062 config_qos:
2063         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2064         if (status)
2065                 goto err;
2066
2067         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2068         return 0;
2069
2070 err:
2071         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2072                 max_tx_rate, vf);
2073         return be_cmd_status(status);
2074 }
2075
2076 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2077                                 int link_state)
2078 {
2079         struct be_adapter *adapter = netdev_priv(netdev);
2080         int status;
2081
2082         if (!sriov_enabled(adapter))
2083                 return -EPERM;
2084
2085         if (vf >= adapter->num_vfs)
2086                 return -EINVAL;
2087
2088         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2089         if (status) {
2090                 dev_err(&adapter->pdev->dev,
2091                         "Link state change on VF %d failed: %#x\n", vf, status);
2092                 return be_cmd_status(status);
2093         }
2094
2095         adapter->vf_cfg[vf].plink_tracking = link_state;
2096
2097         return 0;
2098 }
2099
2100 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2101 {
2102         struct be_adapter *adapter = netdev_priv(netdev);
2103         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2104         u8 spoofchk;
2105         int status;
2106
2107         if (!sriov_enabled(adapter))
2108                 return -EPERM;
2109
2110         if (vf >= adapter->num_vfs)
2111                 return -EINVAL;
2112
2113         if (BEx_chip(adapter))
2114                 return -EOPNOTSUPP;
2115
2116         if (enable == vf_cfg->spoofchk)
2117                 return 0;
2118
2119         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2120
2121         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2122                                        0, spoofchk);
2123         if (status) {
2124                 dev_err(&adapter->pdev->dev,
2125                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2126                 return be_cmd_status(status);
2127         }
2128
2129         vf_cfg->spoofchk = enable;
2130         return 0;
2131 }
2132
2133 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2134                           ulong now)
2135 {
2136         aic->rx_pkts_prev = rx_pkts;
2137         aic->tx_reqs_prev = tx_pkts;
2138         aic->jiffies = now;
2139 }
2140
2141 static int be_get_new_eqd(struct be_eq_obj *eqo)
2142 {
2143         struct be_adapter *adapter = eqo->adapter;
2144         int eqd, start;
2145         struct be_aic_obj *aic;
2146         struct be_rx_obj *rxo;
2147         struct be_tx_obj *txo;
2148         u64 rx_pkts = 0, tx_pkts = 0;
2149         ulong now;
2150         u32 pps, delta;
2151         int i;
2152
2153         aic = &adapter->aic_obj[eqo->idx];
2154         if (!aic->enable) {
2155                 if (aic->jiffies)
2156                         aic->jiffies = 0;
2157                 eqd = aic->et_eqd;
2158                 return eqd;
2159         }
2160
2161         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2162                 do {
2163                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2164                         rx_pkts += rxo->stats.rx_pkts;
2165                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2166         }
2167
2168         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2169                 do {
2170                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2171                         tx_pkts += txo->stats.tx_reqs;
2172                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2173         }
2174
2175         /* Skip, if wrapped around or first calculation */
2176         now = jiffies;
2177         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2178             rx_pkts < aic->rx_pkts_prev ||
2179             tx_pkts < aic->tx_reqs_prev) {
2180                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2181                 return aic->prev_eqd;
2182         }
2183
2184         delta = jiffies_to_msecs(now - aic->jiffies);
2185         if (delta == 0)
2186                 return aic->prev_eqd;
2187
2188         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2189                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2190         eqd = (pps / 15000) << 2;
2191
2192         if (eqd < 8)
2193                 eqd = 0;
2194         eqd = min_t(u32, eqd, aic->max_eqd);
2195         eqd = max_t(u32, eqd, aic->min_eqd);
2196
2197         be_aic_update(aic, rx_pkts, tx_pkts, now);
2198
2199         return eqd;
2200 }
2201
2202 /* For Skyhawk-R only */
2203 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2204 {
2205         struct be_adapter *adapter = eqo->adapter;
2206         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2207         ulong now = jiffies;
2208         int eqd;
2209         u32 mult_enc;
2210
2211         if (!aic->enable)
2212                 return 0;
2213
2214         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2215                 eqd = aic->prev_eqd;
2216         else
2217                 eqd = be_get_new_eqd(eqo);
2218
2219         if (eqd > 100)
2220                 mult_enc = R2I_DLY_ENC_1;
2221         else if (eqd > 60)
2222                 mult_enc = R2I_DLY_ENC_2;
2223         else if (eqd > 20)
2224                 mult_enc = R2I_DLY_ENC_3;
2225         else
2226                 mult_enc = R2I_DLY_ENC_0;
2227
2228         aic->prev_eqd = eqd;
2229
2230         return mult_enc;
2231 }
2232
2233 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2234 {
2235         struct be_set_eqd set_eqd[MAX_EVT_QS];
2236         struct be_aic_obj *aic;
2237         struct be_eq_obj *eqo;
2238         int i, num = 0, eqd;
2239
2240         for_all_evt_queues(adapter, eqo, i) {
2241                 aic = &adapter->aic_obj[eqo->idx];
2242                 eqd = be_get_new_eqd(eqo);
2243                 if (force_update || eqd != aic->prev_eqd) {
2244                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2245                         set_eqd[num].eq_id = eqo->q.id;
2246                         aic->prev_eqd = eqd;
2247                         num++;
2248                 }
2249         }
2250
2251         if (num)
2252                 be_cmd_modify_eqd(adapter, set_eqd, num);
2253 }
2254
2255 static void be_rx_stats_update(struct be_rx_obj *rxo,
2256                                struct be_rx_compl_info *rxcp)
2257 {
2258         struct be_rx_stats *stats = rx_stats(rxo);
2259
2260         u64_stats_update_begin(&stats->sync);
2261         stats->rx_compl++;
2262         stats->rx_bytes += rxcp->pkt_size;
2263         stats->rx_pkts++;
2264         if (rxcp->tunneled)
2265                 stats->rx_vxlan_offload_pkts++;
2266         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2267                 stats->rx_mcast_pkts++;
2268         if (rxcp->err)
2269                 stats->rx_compl_err++;
2270         u64_stats_update_end(&stats->sync);
2271 }
2272
2273 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2274 {
2275         /* L4 checksum is not reliable for non TCP/UDP packets.
2276          * Also ignore ipcksm for ipv6 pkts
2277          */
2278         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2279                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2280 }
2281
2282 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2283 {
2284         struct be_adapter *adapter = rxo->adapter;
2285         struct be_rx_page_info *rx_page_info;
2286         struct be_queue_info *rxq = &rxo->q;
2287         u32 frag_idx = rxq->tail;
2288
2289         rx_page_info = &rxo->page_info_tbl[frag_idx];
2290         BUG_ON(!rx_page_info->page);
2291
2292         if (rx_page_info->last_frag) {
2293                 dma_unmap_page(&adapter->pdev->dev,
2294                                dma_unmap_addr(rx_page_info, bus),
2295                                adapter->big_page_size, DMA_FROM_DEVICE);
2296                 rx_page_info->last_frag = false;
2297         } else {
2298                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2299                                         dma_unmap_addr(rx_page_info, bus),
2300                                         rx_frag_size, DMA_FROM_DEVICE);
2301         }
2302
2303         queue_tail_inc(rxq);
2304         atomic_dec(&rxq->used);
2305         return rx_page_info;
2306 }
2307
2308 /* Throwaway the data in the Rx completion */
2309 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2310                                 struct be_rx_compl_info *rxcp)
2311 {
2312         struct be_rx_page_info *page_info;
2313         u16 i, num_rcvd = rxcp->num_rcvd;
2314
2315         for (i = 0; i < num_rcvd; i++) {
2316                 page_info = get_rx_page_info(rxo);
2317                 put_page(page_info->page);
2318                 memset(page_info, 0, sizeof(*page_info));
2319         }
2320 }
2321
2322 /*
2323  * skb_fill_rx_data forms a complete skb for an ether frame
2324  * indicated by rxcp.
2325  */
2326 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2327                              struct be_rx_compl_info *rxcp)
2328 {
2329         struct be_rx_page_info *page_info;
2330         u16 i, j;
2331         u16 hdr_len, curr_frag_len, remaining;
2332         u8 *start;
2333
2334         page_info = get_rx_page_info(rxo);
2335         start = page_address(page_info->page) + page_info->page_offset;
2336         prefetch(start);
2337
2338         /* Copy data in the first descriptor of this completion */
2339         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2340
2341         skb->len = curr_frag_len;
2342         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2343                 memcpy(skb->data, start, curr_frag_len);
2344                 /* Complete packet has now been moved to data */
2345                 put_page(page_info->page);
2346                 skb->data_len = 0;
2347                 skb->tail += curr_frag_len;
2348         } else {
2349                 hdr_len = ETH_HLEN;
2350                 memcpy(skb->data, start, hdr_len);
2351                 skb_shinfo(skb)->nr_frags = 1;
2352                 skb_frag_set_page(skb, 0, page_info->page);
2353                 skb_shinfo(skb)->frags[0].page_offset =
2354                                         page_info->page_offset + hdr_len;
2355                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2356                                   curr_frag_len - hdr_len);
2357                 skb->data_len = curr_frag_len - hdr_len;
2358                 skb->truesize += rx_frag_size;
2359                 skb->tail += hdr_len;
2360         }
2361         page_info->page = NULL;
2362
2363         if (rxcp->pkt_size <= rx_frag_size) {
2364                 BUG_ON(rxcp->num_rcvd != 1);
2365                 return;
2366         }
2367
2368         /* More frags present for this completion */
2369         remaining = rxcp->pkt_size - curr_frag_len;
2370         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2371                 page_info = get_rx_page_info(rxo);
2372                 curr_frag_len = min(remaining, rx_frag_size);
2373
2374                 /* Coalesce all frags from the same physical page in one slot */
2375                 if (page_info->page_offset == 0) {
2376                         /* Fresh page */
2377                         j++;
2378                         skb_frag_set_page(skb, j, page_info->page);
2379                         skb_shinfo(skb)->frags[j].page_offset =
2380                                                         page_info->page_offset;
2381                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2382                         skb_shinfo(skb)->nr_frags++;
2383                 } else {
2384                         put_page(page_info->page);
2385                 }
2386
2387                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2388                 skb->len += curr_frag_len;
2389                 skb->data_len += curr_frag_len;
2390                 skb->truesize += rx_frag_size;
2391                 remaining -= curr_frag_len;
2392                 page_info->page = NULL;
2393         }
2394         BUG_ON(j > MAX_SKB_FRAGS);
2395 }
2396
2397 /* Process the RX completion indicated by rxcp when GRO is disabled */
2398 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2399                                 struct be_rx_compl_info *rxcp)
2400 {
2401         struct be_adapter *adapter = rxo->adapter;
2402         struct net_device *netdev = adapter->netdev;
2403         struct sk_buff *skb;
2404
2405         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2406         if (unlikely(!skb)) {
2407                 rx_stats(rxo)->rx_drops_no_skbs++;
2408                 be_rx_compl_discard(rxo, rxcp);
2409                 return;
2410         }
2411
2412         skb_fill_rx_data(rxo, skb, rxcp);
2413
2414         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2415                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2416         else
2417                 skb_checksum_none_assert(skb);
2418
2419         skb->protocol = eth_type_trans(skb, netdev);
2420         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2421         if (netdev->features & NETIF_F_RXHASH)
2422                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2423
2424         skb->csum_level = rxcp->tunneled;
2425         skb_mark_napi_id(skb, napi);
2426
2427         if (rxcp->vlanf)
2428                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2429
2430         netif_receive_skb(skb);
2431 }
2432
2433 /* Process the RX completion indicated by rxcp when GRO is enabled */
2434 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2435                                     struct napi_struct *napi,
2436                                     struct be_rx_compl_info *rxcp)
2437 {
2438         struct be_adapter *adapter = rxo->adapter;
2439         struct be_rx_page_info *page_info;
2440         struct sk_buff *skb = NULL;
2441         u16 remaining, curr_frag_len;
2442         u16 i, j;
2443
2444         skb = napi_get_frags(napi);
2445         if (!skb) {
2446                 be_rx_compl_discard(rxo, rxcp);
2447                 return;
2448         }
2449
2450         remaining = rxcp->pkt_size;
2451         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2452                 page_info = get_rx_page_info(rxo);
2453
2454                 curr_frag_len = min(remaining, rx_frag_size);
2455
2456                 /* Coalesce all frags from the same physical page in one slot */
2457                 if (i == 0 || page_info->page_offset == 0) {
2458                         /* First frag or Fresh page */
2459                         j++;
2460                         skb_frag_set_page(skb, j, page_info->page);
2461                         skb_shinfo(skb)->frags[j].page_offset =
2462                                                         page_info->page_offset;
2463                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2464                 } else {
2465                         put_page(page_info->page);
2466                 }
2467                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2468                 skb->truesize += rx_frag_size;
2469                 remaining -= curr_frag_len;
2470                 memset(page_info, 0, sizeof(*page_info));
2471         }
2472         BUG_ON(j > MAX_SKB_FRAGS);
2473
2474         skb_shinfo(skb)->nr_frags = j + 1;
2475         skb->len = rxcp->pkt_size;
2476         skb->data_len = rxcp->pkt_size;
2477         skb->ip_summed = CHECKSUM_UNNECESSARY;
2478         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2479         if (adapter->netdev->features & NETIF_F_RXHASH)
2480                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2481
2482         skb->csum_level = rxcp->tunneled;
2483
2484         if (rxcp->vlanf)
2485                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2486
2487         napi_gro_frags(napi);
2488 }
2489
2490 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2491                                  struct be_rx_compl_info *rxcp)
2492 {
2493         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2494         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2495         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2496         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2497         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2498         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2499         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2500         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2501         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2502         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2503         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2504         if (rxcp->vlanf) {
2505                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2506                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2507         }
2508         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2509         rxcp->tunneled =
2510                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2511 }
2512
2513 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2514                                  struct be_rx_compl_info *rxcp)
2515 {
2516         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2517         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2518         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2519         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2520         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2521         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2522         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2523         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2524         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2525         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2526         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2527         if (rxcp->vlanf) {
2528                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2529                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2530         }
2531         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2532         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2533 }
2534
2535 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2536 {
2537         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2538         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2539         struct be_adapter *adapter = rxo->adapter;
2540
2541         /* For checking the valid bit it is Ok to use either definition as the
2542          * valid bit is at the same position in both v0 and v1 Rx compl */
2543         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2544                 return NULL;
2545
2546         rmb();
2547         be_dws_le_to_cpu(compl, sizeof(*compl));
2548
2549         if (adapter->be3_native)
2550                 be_parse_rx_compl_v1(compl, rxcp);
2551         else
2552                 be_parse_rx_compl_v0(compl, rxcp);
2553
2554         if (rxcp->ip_frag)
2555                 rxcp->l4_csum = 0;
2556
2557         if (rxcp->vlanf) {
2558                 /* In QNQ modes, if qnq bit is not set, then the packet was
2559                  * tagged only with the transparent outer vlan-tag and must
2560                  * not be treated as a vlan packet by host
2561                  */
2562                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2563                         rxcp->vlanf = 0;
2564
2565                 if (!lancer_chip(adapter))
2566                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2567
2568                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2569                     !test_bit(rxcp->vlan_tag, adapter->vids))
2570                         rxcp->vlanf = 0;
2571         }
2572
2573         /* As the compl has been parsed, reset it; we wont touch it again */
2574         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2575
2576         queue_tail_inc(&rxo->cq);
2577         return rxcp;
2578 }
2579
2580 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2581 {
2582         u32 order = get_order(size);
2583
2584         if (order > 0)
2585                 gfp |= __GFP_COMP;
2586         return  alloc_pages(gfp, order);
2587 }
2588
2589 /*
2590  * Allocate a page, split it to fragments of size rx_frag_size and post as
2591  * receive buffers to BE
2592  */
2593 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2594 {
2595         struct be_adapter *adapter = rxo->adapter;
2596         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2597         struct be_queue_info *rxq = &rxo->q;
2598         struct page *pagep = NULL;
2599         struct device *dev = &adapter->pdev->dev;
2600         struct be_eth_rx_d *rxd;
2601         u64 page_dmaaddr = 0, frag_dmaaddr;
2602         u32 posted, page_offset = 0, notify = 0;
2603
2604         page_info = &rxo->page_info_tbl[rxq->head];
2605         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2606                 if (!pagep) {
2607                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2608                         if (unlikely(!pagep)) {
2609                                 rx_stats(rxo)->rx_post_fail++;
2610                                 break;
2611                         }
2612                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2613                                                     adapter->big_page_size,
2614                                                     DMA_FROM_DEVICE);
2615                         if (dma_mapping_error(dev, page_dmaaddr)) {
2616                                 put_page(pagep);
2617                                 pagep = NULL;
2618                                 adapter->drv_stats.dma_map_errors++;
2619                                 break;
2620                         }
2621                         page_offset = 0;
2622                 } else {
2623                         get_page(pagep);
2624                         page_offset += rx_frag_size;
2625                 }
2626                 page_info->page_offset = page_offset;
2627                 page_info->page = pagep;
2628
2629                 rxd = queue_head_node(rxq);
2630                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2631                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2632                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2633
2634                 /* Any space left in the current big page for another frag? */
2635                 if ((page_offset + rx_frag_size + rx_frag_size) >
2636                                         adapter->big_page_size) {
2637                         pagep = NULL;
2638                         page_info->last_frag = true;
2639                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2640                 } else {
2641                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2642                 }
2643
2644                 prev_page_info = page_info;
2645                 queue_head_inc(rxq);
2646                 page_info = &rxo->page_info_tbl[rxq->head];
2647         }
2648
2649         /* Mark the last frag of a page when we break out of the above loop
2650          * with no more slots available in the RXQ
2651          */
2652         if (pagep) {
2653                 prev_page_info->last_frag = true;
2654                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2655         }
2656
2657         if (posted) {
2658                 atomic_add(posted, &rxq->used);
2659                 if (rxo->rx_post_starved)
2660                         rxo->rx_post_starved = false;
2661                 do {
2662                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2663                         be_rxq_notify(adapter, rxq->id, notify);
2664                         posted -= notify;
2665                 } while (posted);
2666         } else if (atomic_read(&rxq->used) == 0) {
2667                 /* Let be_worker replenish when memory is available */
2668                 rxo->rx_post_starved = true;
2669         }
2670 }
2671
2672 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2673 {
2674         switch (status) {
2675         case BE_TX_COMP_HDR_PARSE_ERR:
2676                 tx_stats(txo)->tx_hdr_parse_err++;
2677                 break;
2678         case BE_TX_COMP_NDMA_ERR:
2679                 tx_stats(txo)->tx_dma_err++;
2680                 break;
2681         case BE_TX_COMP_ACL_ERR:
2682                 tx_stats(txo)->tx_spoof_check_err++;
2683                 break;
2684         }
2685 }
2686
2687 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2688 {
2689         switch (status) {
2690         case LANCER_TX_COMP_LSO_ERR:
2691                 tx_stats(txo)->tx_tso_err++;
2692                 break;
2693         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2694         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2695                 tx_stats(txo)->tx_spoof_check_err++;
2696                 break;
2697         case LANCER_TX_COMP_QINQ_ERR:
2698                 tx_stats(txo)->tx_qinq_err++;
2699                 break;
2700         case LANCER_TX_COMP_PARITY_ERR:
2701                 tx_stats(txo)->tx_internal_parity_err++;
2702                 break;
2703         case LANCER_TX_COMP_DMA_ERR:
2704                 tx_stats(txo)->tx_dma_err++;
2705                 break;
2706         case LANCER_TX_COMP_SGE_ERR:
2707                 tx_stats(txo)->tx_sge_err++;
2708                 break;
2709         }
2710 }
2711
2712 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2713                                                 struct be_tx_obj *txo)
2714 {
2715         struct be_queue_info *tx_cq = &txo->cq;
2716         struct be_tx_compl_info *txcp = &txo->txcp;
2717         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2718
2719         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2720                 return NULL;
2721
2722         /* Ensure load ordering of valid bit dword and other dwords below */
2723         rmb();
2724         be_dws_le_to_cpu(compl, sizeof(*compl));
2725
2726         txcp->status = GET_TX_COMPL_BITS(status, compl);
2727         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2728
2729         if (txcp->status) {
2730                 if (lancer_chip(adapter)) {
2731                         lancer_update_tx_err(txo, txcp->status);
2732                         /* Reset the adapter incase of TSO,
2733                          * SGE or Parity error
2734                          */
2735                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2736                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2737                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2738                                 be_set_error(adapter, BE_ERROR_TX);
2739                 } else {
2740                         be_update_tx_err(txo, txcp->status);
2741                 }
2742         }
2743
2744         if (be_check_error(adapter, BE_ERROR_TX))
2745                 return NULL;
2746
2747         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2748         queue_tail_inc(tx_cq);
2749         return txcp;
2750 }
2751
2752 static u16 be_tx_compl_process(struct be_adapter *adapter,
2753                                struct be_tx_obj *txo, u16 last_index)
2754 {
2755         struct sk_buff **sent_skbs = txo->sent_skb_list;
2756         struct be_queue_info *txq = &txo->q;
2757         struct sk_buff *skb = NULL;
2758         bool unmap_skb_hdr = false;
2759         struct be_eth_wrb *wrb;
2760         u16 num_wrbs = 0;
2761         u32 frag_index;
2762
2763         do {
2764                 if (sent_skbs[txq->tail]) {
2765                         /* Free skb from prev req */
2766                         if (skb)
2767                                 dev_consume_skb_any(skb);
2768                         skb = sent_skbs[txq->tail];
2769                         sent_skbs[txq->tail] = NULL;
2770                         queue_tail_inc(txq);  /* skip hdr wrb */
2771                         num_wrbs++;
2772                         unmap_skb_hdr = true;
2773                 }
2774                 wrb = queue_tail_node(txq);
2775                 frag_index = txq->tail;
2776                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2777                               (unmap_skb_hdr && skb_headlen(skb)));
2778                 unmap_skb_hdr = false;
2779                 queue_tail_inc(txq);
2780                 num_wrbs++;
2781         } while (frag_index != last_index);
2782         dev_consume_skb_any(skb);
2783
2784         return num_wrbs;
2785 }
2786
2787 /* Return the number of events in the event queue */
2788 static inline int events_get(struct be_eq_obj *eqo)
2789 {
2790         struct be_eq_entry *eqe;
2791         int num = 0;
2792
2793         do {
2794                 eqe = queue_tail_node(&eqo->q);
2795                 if (eqe->evt == 0)
2796                         break;
2797
2798                 rmb();
2799                 eqe->evt = 0;
2800                 num++;
2801                 queue_tail_inc(&eqo->q);
2802         } while (true);
2803
2804         return num;
2805 }
2806
2807 /* Leaves the EQ is disarmed state */
2808 static void be_eq_clean(struct be_eq_obj *eqo)
2809 {
2810         int num = events_get(eqo);
2811
2812         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2813 }
2814
2815 /* Free posted rx buffers that were not used */
2816 static void be_rxq_clean(struct be_rx_obj *rxo)
2817 {
2818         struct be_queue_info *rxq = &rxo->q;
2819         struct be_rx_page_info *page_info;
2820
2821         while (atomic_read(&rxq->used) > 0) {
2822                 page_info = get_rx_page_info(rxo);
2823                 put_page(page_info->page);
2824                 memset(page_info, 0, sizeof(*page_info));
2825         }
2826         BUG_ON(atomic_read(&rxq->used));
2827         rxq->tail = 0;
2828         rxq->head = 0;
2829 }
2830
2831 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2832 {
2833         struct be_queue_info *rx_cq = &rxo->cq;
2834         struct be_rx_compl_info *rxcp;
2835         struct be_adapter *adapter = rxo->adapter;
2836         int flush_wait = 0;
2837
2838         /* Consume pending rx completions.
2839          * Wait for the flush completion (identified by zero num_rcvd)
2840          * to arrive. Notify CQ even when there are no more CQ entries
2841          * for HW to flush partially coalesced CQ entries.
2842          * In Lancer, there is no need to wait for flush compl.
2843          */
2844         for (;;) {
2845                 rxcp = be_rx_compl_get(rxo);
2846                 if (!rxcp) {
2847                         if (lancer_chip(adapter))
2848                                 break;
2849
2850                         if (flush_wait++ > 50 ||
2851                             be_check_error(adapter,
2852                                            BE_ERROR_HW)) {
2853                                 dev_warn(&adapter->pdev->dev,
2854                                          "did not receive flush compl\n");
2855                                 break;
2856                         }
2857                         be_cq_notify(adapter, rx_cq->id, true, 0);
2858                         mdelay(1);
2859                 } else {
2860                         be_rx_compl_discard(rxo, rxcp);
2861                         be_cq_notify(adapter, rx_cq->id, false, 1);
2862                         if (rxcp->num_rcvd == 0)
2863                                 break;
2864                 }
2865         }
2866
2867         /* After cleanup, leave the CQ in unarmed state */
2868         be_cq_notify(adapter, rx_cq->id, false, 0);
2869 }
2870
2871 static void be_tx_compl_clean(struct be_adapter *adapter)
2872 {
2873         struct device *dev = &adapter->pdev->dev;
2874         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2875         struct be_tx_compl_info *txcp;
2876         struct be_queue_info *txq;
2877         u32 end_idx, notified_idx;
2878         struct be_tx_obj *txo;
2879         int i, pending_txqs;
2880
2881         /* Stop polling for compls when HW has been silent for 10ms */
2882         do {
2883                 pending_txqs = adapter->num_tx_qs;
2884
2885                 for_all_tx_queues(adapter, txo, i) {
2886                         cmpl = 0;
2887                         num_wrbs = 0;
2888                         txq = &txo->q;
2889                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2890                                 num_wrbs +=
2891                                         be_tx_compl_process(adapter, txo,
2892                                                             txcp->end_index);
2893                                 cmpl++;
2894                         }
2895                         if (cmpl) {
2896                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2897                                 atomic_sub(num_wrbs, &txq->used);
2898                                 timeo = 0;
2899                         }
2900                         if (!be_is_tx_compl_pending(txo))
2901                                 pending_txqs--;
2902                 }
2903
2904                 if (pending_txqs == 0 || ++timeo > 10 ||
2905                     be_check_error(adapter, BE_ERROR_HW))
2906                         break;
2907
2908                 mdelay(1);
2909         } while (true);
2910
2911         /* Free enqueued TX that was never notified to HW */
2912         for_all_tx_queues(adapter, txo, i) {
2913                 txq = &txo->q;
2914
2915                 if (atomic_read(&txq->used)) {
2916                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2917                                  i, atomic_read(&txq->used));
2918                         notified_idx = txq->tail;
2919                         end_idx = txq->tail;
2920                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2921                                   txq->len);
2922                         /* Use the tx-compl process logic to handle requests
2923                          * that were not sent to the HW.
2924                          */
2925                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2926                         atomic_sub(num_wrbs, &txq->used);
2927                         BUG_ON(atomic_read(&txq->used));
2928                         txo->pend_wrb_cnt = 0;
2929                         /* Since hw was never notified of these requests,
2930                          * reset TXQ indices
2931                          */
2932                         txq->head = notified_idx;
2933                         txq->tail = notified_idx;
2934                 }
2935         }
2936 }
2937
2938 static void be_evt_queues_destroy(struct be_adapter *adapter)
2939 {
2940         struct be_eq_obj *eqo;
2941         int i;
2942
2943         for_all_evt_queues(adapter, eqo, i) {
2944                 if (eqo->q.created) {
2945                         be_eq_clean(eqo);
2946                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2947                         netif_napi_del(&eqo->napi);
2948                         free_cpumask_var(eqo->affinity_mask);
2949                 }
2950                 be_queue_free(adapter, &eqo->q);
2951         }
2952 }
2953
2954 static int be_evt_queues_create(struct be_adapter *adapter)
2955 {
2956         struct be_queue_info *eq;
2957         struct be_eq_obj *eqo;
2958         struct be_aic_obj *aic;
2959         int i, rc;
2960
2961         /* need enough EQs to service both RX and TX queues */
2962         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2963                                     max(adapter->cfg_num_rx_irqs,
2964                                         adapter->cfg_num_tx_irqs));
2965
2966         for_all_evt_queues(adapter, eqo, i) {
2967                 int numa_node = dev_to_node(&adapter->pdev->dev);
2968
2969                 aic = &adapter->aic_obj[i];
2970                 eqo->adapter = adapter;
2971                 eqo->idx = i;
2972                 aic->max_eqd = BE_MAX_EQD;
2973                 aic->enable = true;
2974
2975                 eq = &eqo->q;
2976                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2977                                     sizeof(struct be_eq_entry));
2978                 if (rc)
2979                         return rc;
2980
2981                 rc = be_cmd_eq_create(adapter, eqo);
2982                 if (rc)
2983                         return rc;
2984
2985                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2986                         return -ENOMEM;
2987                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2988                                 eqo->affinity_mask);
2989                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2990                                BE_NAPI_WEIGHT);
2991         }
2992         return 0;
2993 }
2994
2995 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2996 {
2997         struct be_queue_info *q;
2998
2999         q = &adapter->mcc_obj.q;
3000         if (q->created)
3001                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3002         be_queue_free(adapter, q);
3003
3004         q = &adapter->mcc_obj.cq;
3005         if (q->created)
3006                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3007         be_queue_free(adapter, q);
3008 }
3009
3010 /* Must be called only after TX qs are created as MCC shares TX EQ */
3011 static int be_mcc_queues_create(struct be_adapter *adapter)
3012 {
3013         struct be_queue_info *q, *cq;
3014
3015         cq = &adapter->mcc_obj.cq;
3016         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3017                            sizeof(struct be_mcc_compl)))
3018                 goto err;
3019
3020         /* Use the default EQ for MCC completions */
3021         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3022                 goto mcc_cq_free;
3023
3024         q = &adapter->mcc_obj.q;
3025         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3026                 goto mcc_cq_destroy;
3027
3028         if (be_cmd_mccq_create(adapter, q, cq))
3029                 goto mcc_q_free;
3030
3031         return 0;
3032
3033 mcc_q_free:
3034         be_queue_free(adapter, q);
3035 mcc_cq_destroy:
3036         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3037 mcc_cq_free:
3038         be_queue_free(adapter, cq);
3039 err:
3040         return -1;
3041 }
3042
3043 static void be_tx_queues_destroy(struct be_adapter *adapter)
3044 {
3045         struct be_queue_info *q;
3046         struct be_tx_obj *txo;
3047         u8 i;
3048
3049         for_all_tx_queues(adapter, txo, i) {
3050                 q = &txo->q;
3051                 if (q->created)
3052                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3053                 be_queue_free(adapter, q);
3054
3055                 q = &txo->cq;
3056                 if (q->created)
3057                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3058                 be_queue_free(adapter, q);
3059         }
3060 }
3061
3062 static int be_tx_qs_create(struct be_adapter *adapter)
3063 {
3064         struct be_queue_info *cq;
3065         struct be_tx_obj *txo;
3066         struct be_eq_obj *eqo;
3067         int status, i;
3068
3069         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3070
3071         for_all_tx_queues(adapter, txo, i) {
3072                 cq = &txo->cq;
3073                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3074                                         sizeof(struct be_eth_tx_compl));
3075                 if (status)
3076                         return status;
3077
3078                 u64_stats_init(&txo->stats.sync);
3079                 u64_stats_init(&txo->stats.sync_compl);
3080
3081                 /* If num_evt_qs is less than num_tx_qs, then more than
3082                  * one txq share an eq
3083                  */
3084                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3085                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3086                 if (status)
3087                         return status;
3088
3089                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3090                                         sizeof(struct be_eth_wrb));
3091                 if (status)
3092                         return status;
3093
3094                 status = be_cmd_txq_create(adapter, txo);
3095                 if (status)
3096                         return status;
3097
3098                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3099                                     eqo->idx);
3100         }
3101
3102         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3103                  adapter->num_tx_qs);
3104         return 0;
3105 }
3106
3107 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3108 {
3109         struct be_queue_info *q;
3110         struct be_rx_obj *rxo;
3111         int i;
3112
3113         for_all_rx_queues(adapter, rxo, i) {
3114                 q = &rxo->cq;
3115                 if (q->created)
3116                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3117                 be_queue_free(adapter, q);
3118         }
3119 }
3120
3121 static int be_rx_cqs_create(struct be_adapter *adapter)
3122 {
3123         struct be_queue_info *eq, *cq;
3124         struct be_rx_obj *rxo;
3125         int rc, i;
3126
3127         adapter->num_rss_qs =
3128                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3129
3130         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3131         if (adapter->num_rss_qs < 2)
3132                 adapter->num_rss_qs = 0;
3133
3134         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3135
3136         /* When the interface is not capable of RSS rings (and there is no
3137          * need to create a default RXQ) we'll still need one RXQ
3138          */
3139         if (adapter->num_rx_qs == 0)
3140                 adapter->num_rx_qs = 1;
3141
3142         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3143         for_all_rx_queues(adapter, rxo, i) {
3144                 rxo->adapter = adapter;
3145                 cq = &rxo->cq;
3146                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3147                                     sizeof(struct be_eth_rx_compl));
3148                 if (rc)
3149                         return rc;
3150
3151                 u64_stats_init(&rxo->stats.sync);
3152                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3153                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3154                 if (rc)
3155                         return rc;
3156         }
3157
3158         dev_info(&adapter->pdev->dev,
3159                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3160         return 0;
3161 }
3162
3163 static irqreturn_t be_intx(int irq, void *dev)
3164 {
3165         struct be_eq_obj *eqo = dev;
3166         struct be_adapter *adapter = eqo->adapter;
3167         int num_evts = 0;
3168
3169         /* IRQ is not expected when NAPI is scheduled as the EQ
3170          * will not be armed.
3171          * But, this can happen on Lancer INTx where it takes
3172          * a while to de-assert INTx or in BE2 where occasionaly
3173          * an interrupt may be raised even when EQ is unarmed.
3174          * If NAPI is already scheduled, then counting & notifying
3175          * events will orphan them.
3176          */
3177         if (napi_schedule_prep(&eqo->napi)) {
3178                 num_evts = events_get(eqo);
3179                 __napi_schedule(&eqo->napi);
3180                 if (num_evts)
3181                         eqo->spurious_intr = 0;
3182         }
3183         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3184
3185         /* Return IRQ_HANDLED only for the the first spurious intr
3186          * after a valid intr to stop the kernel from branding
3187          * this irq as a bad one!
3188          */
3189         if (num_evts || eqo->spurious_intr++ == 0)
3190                 return IRQ_HANDLED;
3191         else
3192                 return IRQ_NONE;
3193 }
3194
3195 static irqreturn_t be_msix(int irq, void *dev)
3196 {
3197         struct be_eq_obj *eqo = dev;
3198
3199         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3200         napi_schedule(&eqo->napi);
3201         return IRQ_HANDLED;
3202 }
3203
3204 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3205 {
3206         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3207 }
3208
3209 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3210                          int budget)
3211 {
3212         struct be_adapter *adapter = rxo->adapter;
3213         struct be_queue_info *rx_cq = &rxo->cq;
3214         struct be_rx_compl_info *rxcp;
3215         u32 work_done;
3216         u32 frags_consumed = 0;
3217
3218         for (work_done = 0; work_done < budget; work_done++) {
3219                 rxcp = be_rx_compl_get(rxo);
3220                 if (!rxcp)
3221                         break;
3222
3223                 /* Is it a flush compl that has no data */
3224                 if (unlikely(rxcp->num_rcvd == 0))
3225                         goto loop_continue;
3226
3227                 /* Discard compl with partial DMA Lancer B0 */
3228                 if (unlikely(!rxcp->pkt_size)) {
3229                         be_rx_compl_discard(rxo, rxcp);
3230                         goto loop_continue;
3231                 }
3232
3233                 /* On BE drop pkts that arrive due to imperfect filtering in
3234                  * promiscuous mode on some skews
3235                  */
3236                 if (unlikely(rxcp->port != adapter->port_num &&
3237                              !lancer_chip(adapter))) {
3238                         be_rx_compl_discard(rxo, rxcp);
3239                         goto loop_continue;
3240                 }
3241
3242                 if (do_gro(rxcp))
3243                         be_rx_compl_process_gro(rxo, napi, rxcp);
3244                 else
3245                         be_rx_compl_process(rxo, napi, rxcp);
3246
3247 loop_continue:
3248                 frags_consumed += rxcp->num_rcvd;
3249                 be_rx_stats_update(rxo, rxcp);
3250         }
3251
3252         if (work_done) {
3253                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3254
3255                 /* When an rx-obj gets into post_starved state, just
3256                  * let be_worker do the posting.
3257                  */
3258                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3259                     !rxo->rx_post_starved)
3260                         be_post_rx_frags(rxo, GFP_ATOMIC,
3261                                          max_t(u32, MAX_RX_POST,
3262                                                frags_consumed));
3263         }
3264
3265         return work_done;
3266 }
3267
3268
3269 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3270                           int idx)
3271 {
3272         int num_wrbs = 0, work_done = 0;
3273         struct be_tx_compl_info *txcp;
3274
3275         while ((txcp = be_tx_compl_get(adapter, txo))) {
3276                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3277                 work_done++;
3278         }
3279
3280         if (work_done) {
3281                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3282                 atomic_sub(num_wrbs, &txo->q.used);
3283
3284                 /* As Tx wrbs have been freed up, wake up netdev queue
3285                  * if it was stopped due to lack of tx wrbs.  */
3286                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3287                     be_can_txq_wake(txo)) {
3288                         netif_wake_subqueue(adapter->netdev, idx);
3289                 }
3290
3291                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3292                 tx_stats(txo)->tx_compl += work_done;
3293                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3294         }
3295 }
3296
3297 int be_poll(struct napi_struct *napi, int budget)
3298 {
3299         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3300         struct be_adapter *adapter = eqo->adapter;
3301         int max_work = 0, work, i, num_evts;
3302         struct be_rx_obj *rxo;
3303         struct be_tx_obj *txo;
3304         u32 mult_enc = 0;
3305
3306         num_evts = events_get(eqo);
3307
3308         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3309                 be_process_tx(adapter, txo, i);
3310
3311         /* This loop will iterate twice for EQ0 in which
3312          * completions of the last RXQ (default one) are also processed
3313          * For other EQs the loop iterates only once
3314          */
3315         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3316                 work = be_process_rx(rxo, napi, budget);
3317                 max_work = max(work, max_work);
3318         }
3319
3320         if (is_mcc_eqo(eqo))
3321                 be_process_mcc(adapter);
3322
3323         if (max_work < budget) {
3324                 napi_complete_done(napi, max_work);
3325
3326                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3327                  * delay via a delay multiplier encoding value
3328                  */
3329                 if (skyhawk_chip(adapter))
3330                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3331
3332                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3333                              mult_enc);
3334         } else {
3335                 /* As we'll continue in polling mode, count and clear events */
3336                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3337         }
3338         return max_work;
3339 }
3340
3341 void be_detect_error(struct be_adapter *adapter)
3342 {
3343         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3344         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3345         struct device *dev = &adapter->pdev->dev;
3346         u16 val;
3347         u32 i;
3348
3349         if (be_check_error(adapter, BE_ERROR_HW))
3350                 return;
3351
3352         if (lancer_chip(adapter)) {
3353                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3354                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3355                         be_set_error(adapter, BE_ERROR_UE);
3356                         sliport_err1 = ioread32(adapter->db +
3357                                                 SLIPORT_ERROR1_OFFSET);
3358                         sliport_err2 = ioread32(adapter->db +
3359                                                 SLIPORT_ERROR2_OFFSET);
3360                         /* Do not log error messages if its a FW reset */
3361                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3362                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3363                                 dev_info(dev, "Reset is in progress\n");
3364                         } else {
3365                                 dev_err(dev, "Error detected in the card\n");
3366                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3367                                         sliport_status);
3368                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3369                                         sliport_err1);
3370                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3371                                         sliport_err2);
3372                         }
3373                 }
3374         } else {
3375                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3376                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3377                 ue_lo_mask = ioread32(adapter->pcicfg +
3378                                       PCICFG_UE_STATUS_LOW_MASK);
3379                 ue_hi_mask = ioread32(adapter->pcicfg +
3380                                       PCICFG_UE_STATUS_HI_MASK);
3381
3382                 ue_lo = (ue_lo & ~ue_lo_mask);
3383                 ue_hi = (ue_hi & ~ue_hi_mask);
3384
3385                 if (ue_lo || ue_hi) {
3386                         /* On certain platforms BE3 hardware can indicate
3387                          * spurious UEs. In case of a UE in the chip,
3388                          * the POST register correctly reports either a
3389                          * FAT_LOG_START state (FW is currently dumping
3390                          * FAT log data) or a ARMFW_UE state. Check for the
3391                          * above states to ascertain if the UE is valid or not.
3392                          */
3393                         if (BE3_chip(adapter)) {
3394                                 val = be_POST_stage_get(adapter);
3395                                 if ((val & POST_STAGE_FAT_LOG_START)
3396                                      != POST_STAGE_FAT_LOG_START &&
3397                                     (val & POST_STAGE_ARMFW_UE)
3398                                      != POST_STAGE_ARMFW_UE &&
3399                                     (val & POST_STAGE_RECOVERABLE_ERR)
3400                                      != POST_STAGE_RECOVERABLE_ERR)
3401                                         return;
3402                         }
3403
3404                         dev_err(dev, "Error detected in the adapter");
3405                         be_set_error(adapter, BE_ERROR_UE);
3406
3407                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3408                                 if (ue_lo & 1)
3409                                         dev_err(dev, "UE: %s bit set\n",
3410                                                 ue_status_low_desc[i]);
3411                         }
3412                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3413                                 if (ue_hi & 1)
3414                                         dev_err(dev, "UE: %s bit set\n",
3415                                                 ue_status_hi_desc[i]);
3416                         }
3417                 }
3418         }
3419 }
3420
3421 static void be_msix_disable(struct be_adapter *adapter)
3422 {
3423         if (msix_enabled(adapter)) {
3424                 pci_disable_msix(adapter->pdev);
3425                 adapter->num_msix_vec = 0;
3426                 adapter->num_msix_roce_vec = 0;
3427         }
3428 }
3429
3430 static int be_msix_enable(struct be_adapter *adapter)
3431 {
3432         unsigned int i, max_roce_eqs;
3433         struct device *dev = &adapter->pdev->dev;
3434         int num_vec;
3435
3436         /* If RoCE is supported, program the max number of vectors that
3437          * could be used for NIC and RoCE, else, just program the number
3438          * we'll use initially.
3439          */
3440         if (be_roce_supported(adapter)) {
3441                 max_roce_eqs =
3442                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3443                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3444                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3445         } else {
3446                 num_vec = max(adapter->cfg_num_rx_irqs,
3447                               adapter->cfg_num_tx_irqs);
3448         }
3449
3450         for (i = 0; i < num_vec; i++)
3451                 adapter->msix_entries[i].entry = i;
3452
3453         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3454                                         MIN_MSIX_VECTORS, num_vec);
3455         if (num_vec < 0)
3456                 goto fail;
3457
3458         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3459                 adapter->num_msix_roce_vec = num_vec / 2;
3460                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3461                          adapter->num_msix_roce_vec);
3462         }
3463
3464         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3465
3466         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3467                  adapter->num_msix_vec);
3468         return 0;
3469
3470 fail:
3471         dev_warn(dev, "MSIx enable failed\n");
3472
3473         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3474         if (be_virtfn(adapter))
3475                 return num_vec;
3476         return 0;
3477 }
3478
3479 static inline int be_msix_vec_get(struct be_adapter *adapter,
3480                                   struct be_eq_obj *eqo)
3481 {
3482         return adapter->msix_entries[eqo->msix_idx].vector;
3483 }
3484
3485 static int be_msix_register(struct be_adapter *adapter)
3486 {
3487         struct net_device *netdev = adapter->netdev;
3488         struct be_eq_obj *eqo;
3489         int status, i, vec;
3490
3491         for_all_evt_queues(adapter, eqo, i) {
3492                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3493                 vec = be_msix_vec_get(adapter, eqo);
3494                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3495                 if (status)
3496                         goto err_msix;
3497
3498                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3499         }
3500
3501         return 0;
3502 err_msix:
3503         for (i--; i >= 0; i--) {
3504                 eqo = &adapter->eq_obj[i];
3505                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3506         }
3507         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3508                  status);
3509         be_msix_disable(adapter);
3510         return status;
3511 }
3512
3513 static int be_irq_register(struct be_adapter *adapter)
3514 {
3515         struct net_device *netdev = adapter->netdev;
3516         int status;
3517
3518         if (msix_enabled(adapter)) {
3519                 status = be_msix_register(adapter);
3520                 if (status == 0)
3521                         goto done;
3522                 /* INTx is not supported for VF */
3523                 if (be_virtfn(adapter))
3524                         return status;
3525         }
3526
3527         /* INTx: only the first EQ is used */
3528         netdev->irq = adapter->pdev->irq;
3529         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3530                              &adapter->eq_obj[0]);
3531         if (status) {
3532                 dev_err(&adapter->pdev->dev,
3533                         "INTx request IRQ failed - err %d\n", status);
3534                 return status;
3535         }
3536 done:
3537         adapter->isr_registered = true;
3538         return 0;
3539 }
3540
3541 static void be_irq_unregister(struct be_adapter *adapter)
3542 {
3543         struct net_device *netdev = adapter->netdev;
3544         struct be_eq_obj *eqo;
3545         int i, vec;
3546
3547         if (!adapter->isr_registered)
3548                 return;
3549
3550         /* INTx */
3551         if (!msix_enabled(adapter)) {
3552                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3553                 goto done;
3554         }
3555
3556         /* MSIx */
3557         for_all_evt_queues(adapter, eqo, i) {
3558                 vec = be_msix_vec_get(adapter, eqo);
3559                 irq_set_affinity_hint(vec, NULL);
3560                 free_irq(vec, eqo);
3561         }
3562
3563 done:
3564         adapter->isr_registered = false;
3565 }
3566
3567 static void be_rx_qs_destroy(struct be_adapter *adapter)
3568 {
3569         struct rss_info *rss = &adapter->rss_info;
3570         struct be_queue_info *q;
3571         struct be_rx_obj *rxo;
3572         int i;
3573
3574         for_all_rx_queues(adapter, rxo, i) {
3575                 q = &rxo->q;
3576                 if (q->created) {
3577                         /* If RXQs are destroyed while in an "out of buffer"
3578                          * state, there is a possibility of an HW stall on
3579                          * Lancer. So, post 64 buffers to each queue to relieve
3580                          * the "out of buffer" condition.
3581                          * Make sure there's space in the RXQ before posting.
3582                          */
3583                         if (lancer_chip(adapter)) {
3584                                 be_rx_cq_clean(rxo);
3585                                 if (atomic_read(&q->used) == 0)
3586                                         be_post_rx_frags(rxo, GFP_KERNEL,
3587                                                          MAX_RX_POST);
3588                         }
3589
3590                         be_cmd_rxq_destroy(adapter, q);
3591                         be_rx_cq_clean(rxo);
3592                         be_rxq_clean(rxo);
3593                 }
3594                 be_queue_free(adapter, q);
3595         }
3596
3597         if (rss->rss_flags) {
3598                 rss->rss_flags = RSS_ENABLE_NONE;
3599                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3600                                   128, rss->rss_hkey);
3601         }
3602 }
3603
3604 static void be_disable_if_filters(struct be_adapter *adapter)
3605 {
3606         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3607         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3608             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3609                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3610                 eth_zero_addr(adapter->dev_mac);
3611         }
3612
3613         be_clear_uc_list(adapter);
3614         be_clear_mc_list(adapter);
3615
3616         /* The IFACE flags are enabled in the open path and cleared
3617          * in the close path. When a VF gets detached from the host and
3618          * assigned to a VM the following happens:
3619          *      - VF's IFACE flags get cleared in the detach path
3620          *      - IFACE create is issued by the VF in the attach path
3621          * Due to a bug in the BE3/Skyhawk-R FW
3622          * (Lancer FW doesn't have the bug), the IFACE capability flags
3623          * specified along with the IFACE create cmd issued by a VF are not
3624          * honoured by FW.  As a consequence, if a *new* driver
3625          * (that enables/disables IFACE flags in open/close)
3626          * is loaded in the host and an *old* driver is * used by a VM/VF,
3627          * the IFACE gets created *without* the needed flags.
3628          * To avoid this, disable RX-filter flags only for Lancer.
3629          */
3630         if (lancer_chip(adapter)) {
3631                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3632                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3633         }
3634 }
3635
3636 static int be_close(struct net_device *netdev)
3637 {
3638         struct be_adapter *adapter = netdev_priv(netdev);
3639         struct be_eq_obj *eqo;
3640         int i;
3641
3642         /* This protection is needed as be_close() may be called even when the
3643          * adapter is in cleared state (after eeh perm failure)
3644          */
3645         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3646                 return 0;
3647
3648         /* Before attempting cleanup ensure all the pending cmds in the
3649          * config_wq have finished execution
3650          */
3651         flush_workqueue(be_wq);
3652
3653         be_disable_if_filters(adapter);
3654
3655         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3656                 for_all_evt_queues(adapter, eqo, i) {
3657                         napi_disable(&eqo->napi);
3658                 }
3659                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3660         }
3661
3662         be_async_mcc_disable(adapter);
3663
3664         /* Wait for all pending tx completions to arrive so that
3665          * all tx skbs are freed.
3666          */
3667         netif_tx_disable(netdev);
3668         be_tx_compl_clean(adapter);
3669
3670         be_rx_qs_destroy(adapter);
3671
3672         for_all_evt_queues(adapter, eqo, i) {
3673                 if (msix_enabled(adapter))
3674                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3675                 else
3676                         synchronize_irq(netdev->irq);
3677                 be_eq_clean(eqo);
3678         }
3679
3680         be_irq_unregister(adapter);
3681
3682         return 0;
3683 }
3684
3685 static int be_rx_qs_create(struct be_adapter *adapter)
3686 {
3687         struct rss_info *rss = &adapter->rss_info;
3688         u8 rss_key[RSS_HASH_KEY_LEN];
3689         struct be_rx_obj *rxo;
3690         int rc, i, j;
3691
3692         for_all_rx_queues(adapter, rxo, i) {
3693                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3694                                     sizeof(struct be_eth_rx_d));
3695                 if (rc)
3696                         return rc;
3697         }
3698
3699         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3700                 rxo = default_rxo(adapter);
3701                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3702                                        rx_frag_size, adapter->if_handle,
3703                                        false, &rxo->rss_id);
3704                 if (rc)
3705                         return rc;
3706         }
3707
3708         for_all_rss_queues(adapter, rxo, i) {
3709                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3710                                        rx_frag_size, adapter->if_handle,
3711                                        true, &rxo->rss_id);
3712                 if (rc)
3713                         return rc;
3714         }
3715
3716         if (be_multi_rxq(adapter)) {
3717                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3718                         for_all_rss_queues(adapter, rxo, i) {
3719                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3720                                         break;
3721                                 rss->rsstable[j + i] = rxo->rss_id;
3722                                 rss->rss_queue[j + i] = i;
3723                         }
3724                 }
3725                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3726                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3727
3728                 if (!BEx_chip(adapter))
3729                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3730                                 RSS_ENABLE_UDP_IPV6;
3731
3732                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3733                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3734                                        RSS_INDIR_TABLE_LEN, rss_key);
3735                 if (rc) {
3736                         rss->rss_flags = RSS_ENABLE_NONE;
3737                         return rc;
3738                 }
3739
3740                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3741         } else {
3742                 /* Disable RSS, if only default RX Q is created */
3743                 rss->rss_flags = RSS_ENABLE_NONE;
3744         }
3745
3746
3747         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3748          * which is a queue empty condition
3749          */
3750         for_all_rx_queues(adapter, rxo, i)
3751                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3752
3753         return 0;
3754 }
3755
3756 static int be_enable_if_filters(struct be_adapter *adapter)
3757 {
3758         int status;
3759
3760         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3761         if (status)
3762                 return status;
3763
3764         /* Normally this condition usually true as the ->dev_mac is zeroed.
3765          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3766          * subsequent be_dev_mac_add() can fail (after fresh boot)
3767          */
3768         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3769                 int old_pmac_id = -1;
3770
3771                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3772                 if (!is_zero_ether_addr(adapter->dev_mac))
3773                         old_pmac_id = adapter->pmac_id[0];
3774
3775                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3776                 if (status)
3777                         return status;
3778
3779                 /* Delete the old programmed MAC as we successfully programmed
3780                  * a new MAC
3781                  */
3782                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3783                         be_dev_mac_del(adapter, old_pmac_id);
3784
3785                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3786         }
3787
3788         if (adapter->vlans_added)
3789                 be_vid_config(adapter);
3790
3791         __be_set_rx_mode(adapter);
3792
3793         return 0;
3794 }
3795
3796 static int be_open(struct net_device *netdev)
3797 {
3798         struct be_adapter *adapter = netdev_priv(netdev);
3799         struct be_eq_obj *eqo;
3800         struct be_rx_obj *rxo;
3801         struct be_tx_obj *txo;
3802         u8 link_status;
3803         int status, i;
3804
3805         status = be_rx_qs_create(adapter);
3806         if (status)
3807                 goto err;
3808
3809         status = be_enable_if_filters(adapter);
3810         if (status)
3811                 goto err;
3812
3813         status = be_irq_register(adapter);
3814         if (status)
3815                 goto err;
3816
3817         for_all_rx_queues(adapter, rxo, i)
3818                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3819
3820         for_all_tx_queues(adapter, txo, i)
3821                 be_cq_notify(adapter, txo->cq.id, true, 0);
3822
3823         be_async_mcc_enable(adapter);
3824
3825         for_all_evt_queues(adapter, eqo, i) {
3826                 napi_enable(&eqo->napi);
3827                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3828         }
3829         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3830
3831         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3832         if (!status)
3833                 be_link_status_update(adapter, link_status);
3834
3835         netif_tx_start_all_queues(netdev);
3836         if (skyhawk_chip(adapter))
3837                 udp_tunnel_get_rx_info(netdev);
3838
3839         return 0;
3840 err:
3841         be_close(adapter->netdev);
3842         return -EIO;
3843 }
3844
3845 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3846 {
3847         u32 addr;
3848
3849         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3850
3851         mac[5] = (u8)(addr & 0xFF);
3852         mac[4] = (u8)((addr >> 8) & 0xFF);
3853         mac[3] = (u8)((addr >> 16) & 0xFF);
3854         /* Use the OUI from the current MAC address */
3855         memcpy(mac, adapter->netdev->dev_addr, 3);
3856 }
3857
3858 /*
3859  * Generate a seed MAC address from the PF MAC Address using jhash.
3860  * MAC Address for VFs are assigned incrementally starting from the seed.
3861  * These addresses are programmed in the ASIC by the PF and the VF driver
3862  * queries for the MAC address during its probe.
3863  */
3864 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3865 {
3866         u32 vf;
3867         int status = 0;
3868         u8 mac[ETH_ALEN];
3869         struct be_vf_cfg *vf_cfg;
3870
3871         be_vf_eth_addr_generate(adapter, mac);
3872
3873         for_all_vfs(adapter, vf_cfg, vf) {
3874                 if (BEx_chip(adapter))
3875                         status = be_cmd_pmac_add(adapter, mac,
3876                                                  vf_cfg->if_handle,
3877                                                  &vf_cfg->pmac_id, vf + 1);
3878                 else
3879                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3880                                                 vf + 1);
3881
3882                 if (status)
3883                         dev_err(&adapter->pdev->dev,
3884                                 "Mac address assignment failed for VF %d\n",
3885                                 vf);
3886                 else
3887                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3888
3889                 mac[5] += 1;
3890         }
3891         return status;
3892 }
3893
3894 static int be_vfs_mac_query(struct be_adapter *adapter)
3895 {
3896         int status, vf;
3897         u8 mac[ETH_ALEN];
3898         struct be_vf_cfg *vf_cfg;
3899
3900         for_all_vfs(adapter, vf_cfg, vf) {
3901                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3902                                                mac, vf_cfg->if_handle,
3903                                                false, vf+1);
3904                 if (status)
3905                         return status;
3906                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3907         }
3908         return 0;
3909 }
3910
3911 static void be_vf_clear(struct be_adapter *adapter)
3912 {
3913         struct be_vf_cfg *vf_cfg;
3914         u32 vf;
3915
3916         if (pci_vfs_assigned(adapter->pdev)) {
3917                 dev_warn(&adapter->pdev->dev,
3918                          "VFs are assigned to VMs: not disabling VFs\n");
3919                 goto done;
3920         }
3921
3922         pci_disable_sriov(adapter->pdev);
3923
3924         for_all_vfs(adapter, vf_cfg, vf) {
3925                 if (BEx_chip(adapter))
3926                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3927                                         vf_cfg->pmac_id, vf + 1);
3928                 else
3929                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3930                                        vf + 1);
3931
3932                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3933         }
3934
3935         if (BE3_chip(adapter))
3936                 be_cmd_set_hsw_config(adapter, 0, 0,
3937                                       adapter->if_handle,
3938                                       PORT_FWD_TYPE_PASSTHRU, 0);
3939 done:
3940         kfree(adapter->vf_cfg);
3941         adapter->num_vfs = 0;
3942         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3943 }
3944
3945 static void be_clear_queues(struct be_adapter *adapter)
3946 {
3947         be_mcc_queues_destroy(adapter);
3948         be_rx_cqs_destroy(adapter);
3949         be_tx_queues_destroy(adapter);
3950         be_evt_queues_destroy(adapter);
3951 }
3952
3953 static void be_cancel_worker(struct be_adapter *adapter)
3954 {
3955         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3956                 cancel_delayed_work_sync(&adapter->work);
3957                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3958         }
3959 }
3960
3961 static void be_cancel_err_detection(struct be_adapter *adapter)
3962 {
3963         struct be_error_recovery *err_rec = &adapter->error_recovery;
3964
3965         if (!be_err_recovery_workq)
3966                 return;
3967
3968         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3969                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3970                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3971         }
3972 }
3973
3974 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3975 {
3976         struct net_device *netdev = adapter->netdev;
3977         struct device *dev = &adapter->pdev->dev;
3978         struct be_vxlan_port *vxlan_port;
3979         __be16 port;
3980         int status;
3981
3982         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3983                                       struct be_vxlan_port, list);
3984         port = vxlan_port->port;
3985
3986         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3987                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3988         if (status) {
3989                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3990                 return status;
3991         }
3992         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3993
3994         status = be_cmd_set_vxlan_port(adapter, port);
3995         if (status) {
3996                 dev_warn(dev, "Failed to add VxLAN port\n");
3997                 return status;
3998         }
3999         adapter->vxlan_port = port;
4000
4001         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4002                                    NETIF_F_TSO | NETIF_F_TSO6 |
4003                                    NETIF_F_GSO_UDP_TUNNEL;
4004
4005         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4006                  be16_to_cpu(port));
4007         return 0;
4008 }
4009
4010 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4011 {
4012         struct net_device *netdev = adapter->netdev;
4013
4014         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4015                 be_cmd_manage_iface(adapter, adapter->if_handle,
4016                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4017
4018         if (adapter->vxlan_port)
4019                 be_cmd_set_vxlan_port(adapter, 0);
4020
4021         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4022         adapter->vxlan_port = 0;
4023
4024         netdev->hw_enc_features = 0;
4025 }
4026
4027 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4028                                 struct be_resources *vft_res)
4029 {
4030         struct be_resources res = adapter->pool_res;
4031         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4032         struct be_resources res_mod = {0};
4033         u16 num_vf_qs = 1;
4034
4035         /* Distribute the queue resources among the PF and it's VFs */
4036         if (num_vfs) {
4037                 /* Divide the rx queues evenly among the VFs and the PF, capped
4038                  * at VF-EQ-count. Any remainder queues belong to the PF.
4039                  */
4040                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4041                                 res.max_rss_qs / (num_vfs + 1));
4042
4043                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4044                  * RSS Tables per port. Provide RSS on VFs, only if number of
4045                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4046                  */
4047                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4048                         num_vf_qs = 1;
4049         }
4050
4051         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4052          * which are modifiable using SET_PROFILE_CONFIG cmd.
4053          */
4054         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4055                                   RESOURCE_MODIFIABLE, 0);
4056
4057         /* If RSS IFACE capability flags are modifiable for a VF, set the
4058          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4059          * more than 1 RSSQ is available for a VF.
4060          * Otherwise, provision only 1 queue pair for VF.
4061          */
4062         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4063                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4064                 if (num_vf_qs > 1) {
4065                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4066                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4067                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4068                 } else {
4069                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4070                                              BE_IF_FLAGS_DEFQ_RSS);
4071                 }
4072         } else {
4073                 num_vf_qs = 1;
4074         }
4075
4076         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4077                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4078                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4079         }
4080
4081         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4082         vft_res->max_rx_qs = num_vf_qs;
4083         vft_res->max_rss_qs = num_vf_qs;
4084         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4085         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4086
4087         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4088          * among the PF and it's VFs, if the fields are changeable
4089          */
4090         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4091                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4092
4093         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4094                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4095
4096         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4097                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4098
4099         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4100                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4101 }
4102
4103 static void be_if_destroy(struct be_adapter *adapter)
4104 {
4105         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4106
4107         kfree(adapter->pmac_id);
4108         adapter->pmac_id = NULL;
4109
4110         kfree(adapter->mc_list);
4111         adapter->mc_list = NULL;
4112
4113         kfree(adapter->uc_list);
4114         adapter->uc_list = NULL;
4115 }
4116
4117 static int be_clear(struct be_adapter *adapter)
4118 {
4119         struct pci_dev *pdev = adapter->pdev;
4120         struct  be_resources vft_res = {0};
4121
4122         be_cancel_worker(adapter);
4123
4124         flush_workqueue(be_wq);
4125
4126         if (sriov_enabled(adapter))
4127                 be_vf_clear(adapter);
4128
4129         /* Re-configure FW to distribute resources evenly across max-supported
4130          * number of VFs, only when VFs are not already enabled.
4131          */
4132         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4133             !pci_vfs_assigned(pdev)) {
4134                 be_calculate_vf_res(adapter,
4135                                     pci_sriov_get_totalvfs(pdev),
4136                                     &vft_res);
4137                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4138                                         pci_sriov_get_totalvfs(pdev),
4139                                         &vft_res);
4140         }
4141
4142         be_disable_vxlan_offloads(adapter);
4143
4144         be_if_destroy(adapter);
4145
4146         be_clear_queues(adapter);
4147
4148         be_msix_disable(adapter);
4149         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4150         return 0;
4151 }
4152
4153 static int be_vfs_if_create(struct be_adapter *adapter)
4154 {
4155         struct be_resources res = {0};
4156         u32 cap_flags, en_flags, vf;
4157         struct be_vf_cfg *vf_cfg;
4158         int status;
4159
4160         /* If a FW profile exists, then cap_flags are updated */
4161         cap_flags = BE_VF_IF_EN_FLAGS;
4162
4163         for_all_vfs(adapter, vf_cfg, vf) {
4164                 if (!BE3_chip(adapter)) {
4165                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4166                                                            ACTIVE_PROFILE_TYPE,
4167                                                            RESOURCE_LIMITS,
4168                                                            vf + 1);
4169                         if (!status) {
4170                                 cap_flags = res.if_cap_flags;
4171                                 /* Prevent VFs from enabling VLAN promiscuous
4172                                  * mode
4173                                  */
4174                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4175                         }
4176                 }
4177
4178                 /* PF should enable IF flags during proxy if_create call */
4179                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4180                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4181                                           &vf_cfg->if_handle, vf + 1);
4182                 if (status)
4183                         return status;
4184         }
4185
4186         return 0;
4187 }
4188
4189 static int be_vf_setup_init(struct be_adapter *adapter)
4190 {
4191         struct be_vf_cfg *vf_cfg;
4192         int vf;
4193
4194         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4195                                   GFP_KERNEL);
4196         if (!adapter->vf_cfg)
4197                 return -ENOMEM;
4198
4199         for_all_vfs(adapter, vf_cfg, vf) {
4200                 vf_cfg->if_handle = -1;
4201                 vf_cfg->pmac_id = -1;
4202         }
4203         return 0;
4204 }
4205
4206 static int be_vf_setup(struct be_adapter *adapter)
4207 {
4208         struct device *dev = &adapter->pdev->dev;
4209         struct be_vf_cfg *vf_cfg;
4210         int status, old_vfs, vf;
4211         bool spoofchk;
4212
4213         old_vfs = pci_num_vf(adapter->pdev);
4214
4215         status = be_vf_setup_init(adapter);
4216         if (status)
4217                 goto err;
4218
4219         if (old_vfs) {
4220                 for_all_vfs(adapter, vf_cfg, vf) {
4221                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4222                         if (status)
4223                                 goto err;
4224                 }
4225
4226                 status = be_vfs_mac_query(adapter);
4227                 if (status)
4228                         goto err;
4229         } else {
4230                 status = be_vfs_if_create(adapter);
4231                 if (status)
4232                         goto err;
4233
4234                 status = be_vf_eth_addr_config(adapter);
4235                 if (status)
4236                         goto err;
4237         }
4238
4239         for_all_vfs(adapter, vf_cfg, vf) {
4240                 /* Allow VFs to programs MAC/VLAN filters */
4241                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4242                                                   vf + 1);
4243                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4244                         status = be_cmd_set_fn_privileges(adapter,
4245                                                           vf_cfg->privileges |
4246                                                           BE_PRIV_FILTMGMT,
4247                                                           vf + 1);
4248                         if (!status) {
4249                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4250                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4251                                          vf);
4252                         }
4253                 }
4254
4255                 /* Allow full available bandwidth */
4256                 if (!old_vfs)
4257                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4258
4259                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4260                                                vf_cfg->if_handle, NULL,
4261                                                &spoofchk);
4262                 if (!status)
4263                         vf_cfg->spoofchk = spoofchk;
4264
4265                 if (!old_vfs) {
4266                         be_cmd_enable_vf(adapter, vf + 1);
4267                         be_cmd_set_logical_link_config(adapter,
4268                                                        IFLA_VF_LINK_STATE_AUTO,
4269                                                        vf+1);
4270                 }
4271         }
4272
4273         if (!old_vfs) {
4274                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4275                 if (status) {
4276                         dev_err(dev, "SRIOV enable failed\n");
4277                         adapter->num_vfs = 0;
4278                         goto err;
4279                 }
4280         }
4281
4282         if (BE3_chip(adapter)) {
4283                 /* On BE3, enable VEB only when SRIOV is enabled */
4284                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4285                                                adapter->if_handle,
4286                                                PORT_FWD_TYPE_VEB, 0);
4287                 if (status)
4288                         goto err;
4289         }
4290
4291         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4292         return 0;
4293 err:
4294         dev_err(dev, "VF setup failed\n");
4295         be_vf_clear(adapter);
4296         return status;
4297 }
4298
4299 /* Converting function_mode bits on BE3 to SH mc_type enums */
4300
4301 static u8 be_convert_mc_type(u32 function_mode)
4302 {
4303         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4304                 return vNIC1;
4305         else if (function_mode & QNQ_MODE)
4306                 return FLEX10;
4307         else if (function_mode & VNIC_MODE)
4308                 return vNIC2;
4309         else if (function_mode & UMC_ENABLED)
4310                 return UMC;
4311         else
4312                 return MC_NONE;
4313 }
4314
4315 /* On BE2/BE3 FW does not suggest the supported limits */
4316 static void BEx_get_resources(struct be_adapter *adapter,
4317                               struct be_resources *res)
4318 {
4319         bool use_sriov = adapter->num_vfs ? 1 : 0;
4320
4321         if (be_physfn(adapter))
4322                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4323         else
4324                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4325
4326         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4327
4328         if (be_is_mc(adapter)) {
4329                 /* Assuming that there are 4 channels per port,
4330                  * when multi-channel is enabled
4331                  */
4332                 if (be_is_qnq_mode(adapter))
4333                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4334                 else
4335                         /* In a non-qnq multichannel mode, the pvid
4336                          * takes up one vlan entry
4337                          */
4338                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4339         } else {
4340                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4341         }
4342
4343         res->max_mcast_mac = BE_MAX_MC;
4344
4345         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4346          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4347          *    *only* if it is RSS-capable.
4348          */
4349         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4350             be_virtfn(adapter) ||
4351             (be_is_mc(adapter) &&
4352              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4353                 res->max_tx_qs = 1;
4354         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4355                 struct be_resources super_nic_res = {0};
4356
4357                 /* On a SuperNIC profile, the driver needs to use the
4358                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4359                  */
4360                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4361                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4362                                           0);
4363                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4364                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4365         } else {
4366                 res->max_tx_qs = BE3_MAX_TX_QS;
4367         }
4368
4369         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4370             !use_sriov && be_physfn(adapter))
4371                 res->max_rss_qs = (adapter->be3_native) ?
4372                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4373         res->max_rx_qs = res->max_rss_qs + 1;
4374
4375         if (be_physfn(adapter))
4376                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4377                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4378         else
4379                 res->max_evt_qs = 1;
4380
4381         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4382         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4383         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4384                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4385 }
4386
4387 static void be_setup_init(struct be_adapter *adapter)
4388 {
4389         adapter->vlan_prio_bmap = 0xff;
4390         adapter->phy.link_speed = -1;
4391         adapter->if_handle = -1;
4392         adapter->be3_native = false;
4393         adapter->if_flags = 0;
4394         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4395         if (be_physfn(adapter))
4396                 adapter->cmd_privileges = MAX_PRIVILEGES;
4397         else
4398                 adapter->cmd_privileges = MIN_PRIVILEGES;
4399 }
4400
4401 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4402  * However, this HW limitation is not exposed to the host via any SLI cmd.
4403  * As a result, in the case of SRIOV and in particular multi-partition configs
4404  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4405  * for distribution between the VFs. This self-imposed limit will determine the
4406  * no: of VFs for which RSS can be enabled.
4407  */
4408 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4409 {
4410         struct be_port_resources port_res = {0};
4411         u8 rss_tables_on_port;
4412         u16 max_vfs = be_max_vfs(adapter);
4413
4414         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4415                                   RESOURCE_LIMITS, 0);
4416
4417         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4418
4419         /* Each PF Pool's RSS Tables limit =
4420          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4421          */
4422         adapter->pool_res.max_rss_tables =
4423                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4424 }
4425
4426 static int be_get_sriov_config(struct be_adapter *adapter)
4427 {
4428         struct be_resources res = {0};
4429         int max_vfs, old_vfs;
4430
4431         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4432                                   RESOURCE_LIMITS, 0);
4433
4434         /* Some old versions of BE3 FW don't report max_vfs value */
4435         if (BE3_chip(adapter) && !res.max_vfs) {
4436                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4437                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4438         }
4439
4440         adapter->pool_res = res;
4441
4442         /* If during previous unload of the driver, the VFs were not disabled,
4443          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4444          * Instead use the TotalVFs value stored in the pci-dev struct.
4445          */
4446         old_vfs = pci_num_vf(adapter->pdev);
4447         if (old_vfs) {
4448                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4449                          old_vfs);
4450
4451                 adapter->pool_res.max_vfs =
4452                         pci_sriov_get_totalvfs(adapter->pdev);
4453                 adapter->num_vfs = old_vfs;
4454         }
4455
4456         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4457                 be_calculate_pf_pool_rss_tables(adapter);
4458                 dev_info(&adapter->pdev->dev,
4459                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4460                          be_max_pf_pool_rss_tables(adapter));
4461         }
4462         return 0;
4463 }
4464
4465 static void be_alloc_sriov_res(struct be_adapter *adapter)
4466 {
4467         int old_vfs = pci_num_vf(adapter->pdev);
4468         struct  be_resources vft_res = {0};
4469         int status;
4470
4471         be_get_sriov_config(adapter);
4472
4473         if (!old_vfs)
4474                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4475
4476         /* When the HW is in SRIOV capable configuration, the PF-pool
4477          * resources are given to PF during driver load, if there are no
4478          * old VFs. This facility is not available in BE3 FW.
4479          * Also, this is done by FW in Lancer chip.
4480          */
4481         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4482                 be_calculate_vf_res(adapter, 0, &vft_res);
4483                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4484                                                  &vft_res);
4485                 if (status)
4486                         dev_err(&adapter->pdev->dev,
4487                                 "Failed to optimize SRIOV resources\n");
4488         }
4489 }
4490
4491 static int be_get_resources(struct be_adapter *adapter)
4492 {
4493         struct device *dev = &adapter->pdev->dev;
4494         struct be_resources res = {0};
4495         int status;
4496
4497         /* For Lancer, SH etc read per-function resource limits from FW.
4498          * GET_FUNC_CONFIG returns per function guaranteed limits.
4499          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4500          */
4501         if (BEx_chip(adapter)) {
4502                 BEx_get_resources(adapter, &res);
4503         } else {
4504                 status = be_cmd_get_func_config(adapter, &res);
4505                 if (status)
4506                         return status;
4507
4508                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4509                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4510                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4511                         res.max_rss_qs -= 1;
4512         }
4513
4514         /* If RoCE is supported stash away half the EQs for RoCE */
4515         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4516                                 res.max_evt_qs / 2 : res.max_evt_qs;
4517         adapter->res = res;
4518
4519         /* If FW supports RSS default queue, then skip creating non-RSS
4520          * queue for non-IP traffic.
4521          */
4522         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4523                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4524
4525         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4526                  be_max_txqs(adapter), be_max_rxqs(adapter),
4527                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4528                  be_max_vfs(adapter));
4529         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4530                  be_max_uc(adapter), be_max_mc(adapter),
4531                  be_max_vlans(adapter));
4532
4533         /* Ensure RX and TX queues are created in pairs at init time */
4534         adapter->cfg_num_rx_irqs =
4535                                 min_t(u16, netif_get_num_default_rss_queues(),
4536                                       be_max_qp_irqs(adapter));
4537         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4538         return 0;
4539 }
4540
4541 static int be_get_config(struct be_adapter *adapter)
4542 {
4543         int status, level;
4544         u16 profile_id;
4545
4546         status = be_cmd_get_cntl_attributes(adapter);
4547         if (status)
4548                 return status;
4549
4550         status = be_cmd_query_fw_cfg(adapter);
4551         if (status)
4552                 return status;
4553
4554         if (!lancer_chip(adapter) && be_physfn(adapter))
4555                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4556
4557         if (BEx_chip(adapter)) {
4558                 level = be_cmd_get_fw_log_level(adapter);
4559                 adapter->msg_enable =
4560                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4561         }
4562
4563         be_cmd_get_acpi_wol_cap(adapter);
4564         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4565         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4566
4567         be_cmd_query_port_name(adapter);
4568
4569         if (be_physfn(adapter)) {
4570                 status = be_cmd_get_active_profile(adapter, &profile_id);
4571                 if (!status)
4572                         dev_info(&adapter->pdev->dev,
4573                                  "Using profile 0x%x\n", profile_id);
4574         }
4575
4576         return 0;
4577 }
4578
4579 static int be_mac_setup(struct be_adapter *adapter)
4580 {
4581         u8 mac[ETH_ALEN];
4582         int status;
4583
4584         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4585                 status = be_cmd_get_perm_mac(adapter, mac);
4586                 if (status)
4587                         return status;
4588
4589                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4590                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4591
4592                 /* Initial MAC for BE3 VFs is already programmed by PF */
4593                 if (BEx_chip(adapter) && be_virtfn(adapter))
4594                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4595         }
4596
4597         return 0;
4598 }
4599
4600 static void be_schedule_worker(struct be_adapter *adapter)
4601 {
4602         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4603         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4604 }
4605
4606 static void be_destroy_err_recovery_workq(void)
4607 {
4608         if (!be_err_recovery_workq)
4609                 return;
4610
4611         flush_workqueue(be_err_recovery_workq);
4612         destroy_workqueue(be_err_recovery_workq);
4613         be_err_recovery_workq = NULL;
4614 }
4615
4616 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4617 {
4618         struct be_error_recovery *err_rec = &adapter->error_recovery;
4619
4620         if (!be_err_recovery_workq)
4621                 return;
4622
4623         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4624                            msecs_to_jiffies(delay));
4625         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4626 }
4627
4628 static int be_setup_queues(struct be_adapter *adapter)
4629 {
4630         struct net_device *netdev = adapter->netdev;
4631         int status;
4632
4633         status = be_evt_queues_create(adapter);
4634         if (status)
4635                 goto err;
4636
4637         status = be_tx_qs_create(adapter);
4638         if (status)
4639                 goto err;
4640
4641         status = be_rx_cqs_create(adapter);
4642         if (status)
4643                 goto err;
4644
4645         status = be_mcc_queues_create(adapter);
4646         if (status)
4647                 goto err;
4648
4649         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4650         if (status)
4651                 goto err;
4652
4653         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4654         if (status)
4655                 goto err;
4656
4657         return 0;
4658 err:
4659         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4660         return status;
4661 }
4662
4663 static int be_if_create(struct be_adapter *adapter)
4664 {
4665         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4666         u32 cap_flags = be_if_cap_flags(adapter);
4667         int status;
4668
4669         /* alloc required memory for other filtering fields */
4670         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4671                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4672         if (!adapter->pmac_id)
4673                 return -ENOMEM;
4674
4675         adapter->mc_list = kcalloc(be_max_mc(adapter),
4676                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4677         if (!adapter->mc_list)
4678                 return -ENOMEM;
4679
4680         adapter->uc_list = kcalloc(be_max_uc(adapter),
4681                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4682         if (!adapter->uc_list)
4683                 return -ENOMEM;
4684
4685         if (adapter->cfg_num_rx_irqs == 1)
4686                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4687
4688         en_flags &= cap_flags;
4689         /* will enable all the needed filter flags in be_open() */
4690         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4691                                   &adapter->if_handle, 0);
4692
4693         if (status)
4694                 return status;
4695
4696         return 0;
4697 }
4698
4699 int be_update_queues(struct be_adapter *adapter)
4700 {
4701         struct net_device *netdev = adapter->netdev;
4702         int status;
4703
4704         if (netif_running(netdev))
4705                 be_close(netdev);
4706
4707         be_cancel_worker(adapter);
4708
4709         /* If any vectors have been shared with RoCE we cannot re-program
4710          * the MSIx table.
4711          */
4712         if (!adapter->num_msix_roce_vec)
4713                 be_msix_disable(adapter);
4714
4715         be_clear_queues(adapter);
4716         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4717         if (status)
4718                 return status;
4719
4720         if (!msix_enabled(adapter)) {
4721                 status = be_msix_enable(adapter);
4722                 if (status)
4723                         return status;
4724         }
4725
4726         status = be_if_create(adapter);
4727         if (status)
4728                 return status;
4729
4730         status = be_setup_queues(adapter);
4731         if (status)
4732                 return status;
4733
4734         be_schedule_worker(adapter);
4735
4736         /* The IF was destroyed and re-created. We need to clear
4737          * all promiscuous flags valid for the destroyed IF.
4738          * Without this promisc mode is not restored during
4739          * be_open() because the driver thinks that it is
4740          * already enabled in HW.
4741          */
4742         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4743
4744         if (netif_running(netdev))
4745                 status = be_open(netdev);
4746
4747         return status;
4748 }
4749
4750 static inline int fw_major_num(const char *fw_ver)
4751 {
4752         int fw_major = 0, i;
4753
4754         i = sscanf(fw_ver, "%d.", &fw_major);
4755         if (i != 1)
4756                 return 0;
4757
4758         return fw_major;
4759 }
4760
4761 /* If it is error recovery, FLR the PF
4762  * Else if any VFs are already enabled don't FLR the PF
4763  */
4764 static bool be_reset_required(struct be_adapter *adapter)
4765 {
4766         if (be_error_recovering(adapter))
4767                 return true;
4768         else
4769                 return pci_num_vf(adapter->pdev) == 0;
4770 }
4771
4772 /* Wait for the FW to be ready and perform the required initialization */
4773 static int be_func_init(struct be_adapter *adapter)
4774 {
4775         int status;
4776
4777         status = be_fw_wait_ready(adapter);
4778         if (status)
4779                 return status;
4780
4781         /* FW is now ready; clear errors to allow cmds/doorbell */
4782         be_clear_error(adapter, BE_CLEAR_ALL);
4783
4784         if (be_reset_required(adapter)) {
4785                 status = be_cmd_reset_function(adapter);
4786                 if (status)
4787                         return status;
4788
4789                 /* Wait for interrupts to quiesce after an FLR */
4790                 msleep(100);
4791         }
4792
4793         /* Tell FW we're ready to fire cmds */
4794         status = be_cmd_fw_init(adapter);
4795         if (status)
4796                 return status;
4797
4798         /* Allow interrupts for other ULPs running on NIC function */
4799         be_intr_set(adapter, true);
4800
4801         return 0;
4802 }
4803
4804 static int be_setup(struct be_adapter *adapter)
4805 {
4806         struct device *dev = &adapter->pdev->dev;
4807         int status;
4808
4809         status = be_func_init(adapter);
4810         if (status)
4811                 return status;
4812
4813         be_setup_init(adapter);
4814
4815         if (!lancer_chip(adapter))
4816                 be_cmd_req_native_mode(adapter);
4817
4818         /* invoke this cmd first to get pf_num and vf_num which are needed
4819          * for issuing profile related cmds
4820          */
4821         if (!BEx_chip(adapter)) {
4822                 status = be_cmd_get_func_config(adapter, NULL);
4823                 if (status)
4824                         return status;
4825         }
4826
4827         status = be_get_config(adapter);
4828         if (status)
4829                 goto err;
4830
4831         if (!BE2_chip(adapter) && be_physfn(adapter))
4832                 be_alloc_sriov_res(adapter);
4833
4834         status = be_get_resources(adapter);
4835         if (status)
4836                 goto err;
4837
4838         status = be_msix_enable(adapter);
4839         if (status)
4840                 goto err;
4841
4842         /* will enable all the needed filter flags in be_open() */
4843         status = be_if_create(adapter);
4844         if (status)
4845                 goto err;
4846
4847         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4848         rtnl_lock();
4849         status = be_setup_queues(adapter);
4850         rtnl_unlock();
4851         if (status)
4852                 goto err;
4853
4854         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4855
4856         status = be_mac_setup(adapter);
4857         if (status)
4858                 goto err;
4859
4860         be_cmd_get_fw_ver(adapter);
4861         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4862
4863         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4864                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4865                         adapter->fw_ver);
4866                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4867         }
4868
4869         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4870                                          adapter->rx_fc);
4871         if (status)
4872                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4873                                         &adapter->rx_fc);
4874
4875         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4876                  adapter->tx_fc, adapter->rx_fc);
4877
4878         if (be_physfn(adapter))
4879                 be_cmd_set_logical_link_config(adapter,
4880                                                IFLA_VF_LINK_STATE_AUTO, 0);
4881
4882         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4883          * confusing a linux bridge or OVS that it might be connected to.
4884          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4885          * when SRIOV is not enabled.
4886          */
4887         if (BE3_chip(adapter))
4888                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4889                                       PORT_FWD_TYPE_PASSTHRU, 0);
4890
4891         if (adapter->num_vfs)
4892                 be_vf_setup(adapter);
4893
4894         status = be_cmd_get_phy_info(adapter);
4895         if (!status && be_pause_supported(adapter))
4896                 adapter->phy.fc_autoneg = 1;
4897
4898         if (be_physfn(adapter) && !lancer_chip(adapter))
4899                 be_cmd_set_features(adapter);
4900
4901         be_schedule_worker(adapter);
4902         adapter->flags |= BE_FLAGS_SETUP_DONE;
4903         return 0;
4904 err:
4905         be_clear(adapter);
4906         return status;
4907 }
4908
4909 #ifdef CONFIG_NET_POLL_CONTROLLER
4910 static void be_netpoll(struct net_device *netdev)
4911 {
4912         struct be_adapter *adapter = netdev_priv(netdev);
4913         struct be_eq_obj *eqo;
4914         int i;
4915
4916         for_all_evt_queues(adapter, eqo, i) {
4917                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4918                 napi_schedule(&eqo->napi);
4919         }
4920 }
4921 #endif
4922
4923 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4924 {
4925         const struct firmware *fw;
4926         int status;
4927
4928         if (!netif_running(adapter->netdev)) {
4929                 dev_err(&adapter->pdev->dev,
4930                         "Firmware load not allowed (interface is down)\n");
4931                 return -ENETDOWN;
4932         }
4933
4934         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4935         if (status)
4936                 goto fw_exit;
4937
4938         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4939
4940         if (lancer_chip(adapter))
4941                 status = lancer_fw_download(adapter, fw);
4942         else
4943                 status = be_fw_download(adapter, fw);
4944
4945         if (!status)
4946                 be_cmd_get_fw_ver(adapter);
4947
4948 fw_exit:
4949         release_firmware(fw);
4950         return status;
4951 }
4952
4953 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4954                                  u16 flags, struct netlink_ext_ack *extack)
4955 {
4956         struct be_adapter *adapter = netdev_priv(dev);
4957         struct nlattr *attr, *br_spec;
4958         int rem;
4959         int status = 0;
4960         u16 mode = 0;
4961
4962         if (!sriov_enabled(adapter))
4963                 return -EOPNOTSUPP;
4964
4965         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4966         if (!br_spec)
4967                 return -EINVAL;
4968
4969         nla_for_each_nested(attr, br_spec, rem) {
4970                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4971                         continue;
4972
4973                 if (nla_len(attr) < sizeof(mode))
4974                         return -EINVAL;
4975
4976                 mode = nla_get_u16(attr);
4977                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4978                         return -EOPNOTSUPP;
4979
4980                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4981                         return -EINVAL;
4982
4983                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4984                                                adapter->if_handle,
4985                                                mode == BRIDGE_MODE_VEPA ?
4986                                                PORT_FWD_TYPE_VEPA :
4987                                                PORT_FWD_TYPE_VEB, 0);
4988                 if (status)
4989                         goto err;
4990
4991                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4992                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4993
4994                 return status;
4995         }
4996 err:
4997         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4998                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4999
5000         return status;
5001 }
5002
5003 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5004                                  struct net_device *dev, u32 filter_mask,
5005                                  int nlflags)
5006 {
5007         struct be_adapter *adapter = netdev_priv(dev);
5008         int status = 0;
5009         u8 hsw_mode;
5010
5011         /* BE and Lancer chips support VEB mode only */
5012         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5013                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5014                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5015                         return 0;
5016                 hsw_mode = PORT_FWD_TYPE_VEB;
5017         } else {
5018                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5019                                                adapter->if_handle, &hsw_mode,
5020                                                NULL);
5021                 if (status)
5022                         return 0;
5023
5024                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5025                         return 0;
5026         }
5027
5028         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5029                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5030                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5031                                        0, 0, nlflags, filter_mask, NULL);
5032 }
5033
5034 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5035                                          void (*func)(struct work_struct *))
5036 {
5037         struct be_cmd_work *work;
5038
5039         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5040         if (!work) {
5041                 dev_err(&adapter->pdev->dev,
5042                         "be_work memory allocation failed\n");
5043                 return NULL;
5044         }
5045
5046         INIT_WORK(&work->work, func);
5047         work->adapter = adapter;
5048         return work;
5049 }
5050
5051 /* VxLAN offload Notes:
5052  *
5053  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5054  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5055  * is expected to work across all types of IP tunnels once exported. Skyhawk
5056  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5057  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5058  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5059  * those other tunnels are unexported on the fly through ndo_features_check().
5060  *
5061  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5062  * adds more than one port, disable offloads and re-enable them again when
5063  * there's only one port left. We maintain a list of ports for this purpose.
5064  */
5065 static void be_work_add_vxlan_port(struct work_struct *work)
5066 {
5067         struct be_cmd_work *cmd_work =
5068                                 container_of(work, struct be_cmd_work, work);
5069         struct be_adapter *adapter = cmd_work->adapter;
5070         struct device *dev = &adapter->pdev->dev;
5071         __be16 port = cmd_work->info.vxlan_port;
5072         struct be_vxlan_port *vxlan_port;
5073         int status;
5074
5075         /* Bump up the alias count if it is an existing port */
5076         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5077                 if (vxlan_port->port == port) {
5078                         vxlan_port->port_aliases++;
5079                         goto done;
5080                 }
5081         }
5082
5083         /* Add a new port to our list. We don't need a lock here since port
5084          * add/delete are done only in the context of a single-threaded work
5085          * queue (be_wq).
5086          */
5087         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5088         if (!vxlan_port)
5089                 goto done;
5090
5091         vxlan_port->port = port;
5092         INIT_LIST_HEAD(&vxlan_port->list);
5093         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5094         adapter->vxlan_port_count++;
5095
5096         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5097                 dev_info(dev,
5098                          "Only one UDP port supported for VxLAN offloads\n");
5099                 dev_info(dev, "Disabling VxLAN offloads\n");
5100                 goto err;
5101         }
5102
5103         if (adapter->vxlan_port_count > 1)
5104                 goto done;
5105
5106         status = be_enable_vxlan_offloads(adapter);
5107         if (!status)
5108                 goto done;
5109
5110 err:
5111         be_disable_vxlan_offloads(adapter);
5112 done:
5113         kfree(cmd_work);
5114         return;
5115 }
5116
5117 static void be_work_del_vxlan_port(struct work_struct *work)
5118 {
5119         struct be_cmd_work *cmd_work =
5120                                 container_of(work, struct be_cmd_work, work);
5121         struct be_adapter *adapter = cmd_work->adapter;
5122         __be16 port = cmd_work->info.vxlan_port;
5123         struct be_vxlan_port *vxlan_port;
5124
5125         /* Nothing to be done if a port alias is being deleted */
5126         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5127                 if (vxlan_port->port == port) {
5128                         if (vxlan_port->port_aliases) {
5129                                 vxlan_port->port_aliases--;
5130                                 goto done;
5131                         }
5132                         break;
5133                 }
5134         }
5135
5136         /* No port aliases left; delete the port from the list */
5137         list_del(&vxlan_port->list);
5138         adapter->vxlan_port_count--;
5139
5140         /* Disable VxLAN offload if this is the offloaded port */
5141         if (adapter->vxlan_port == vxlan_port->port) {
5142                 WARN_ON(adapter->vxlan_port_count);
5143                 be_disable_vxlan_offloads(adapter);
5144                 dev_info(&adapter->pdev->dev,
5145                          "Disabled VxLAN offloads for UDP port %d\n",
5146                          be16_to_cpu(port));
5147                 goto out;
5148         }
5149
5150         /* If only 1 port is left, re-enable VxLAN offload */
5151         if (adapter->vxlan_port_count == 1)
5152                 be_enable_vxlan_offloads(adapter);
5153
5154 out:
5155         kfree(vxlan_port);
5156 done:
5157         kfree(cmd_work);
5158 }
5159
5160 static void be_cfg_vxlan_port(struct net_device *netdev,
5161                               struct udp_tunnel_info *ti,
5162                               void (*func)(struct work_struct *))
5163 {
5164         struct be_adapter *adapter = netdev_priv(netdev);
5165         struct be_cmd_work *cmd_work;
5166
5167         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5168                 return;
5169
5170         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5171                 return;
5172
5173         cmd_work = be_alloc_work(adapter, func);
5174         if (cmd_work) {
5175                 cmd_work->info.vxlan_port = ti->port;
5176                 queue_work(be_wq, &cmd_work->work);
5177         }
5178 }
5179
5180 static void be_del_vxlan_port(struct net_device *netdev,
5181                               struct udp_tunnel_info *ti)
5182 {
5183         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5184 }
5185
5186 static void be_add_vxlan_port(struct net_device *netdev,
5187                               struct udp_tunnel_info *ti)
5188 {
5189         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5190 }
5191
5192 static netdev_features_t be_features_check(struct sk_buff *skb,
5193                                            struct net_device *dev,
5194                                            netdev_features_t features)
5195 {
5196         struct be_adapter *adapter = netdev_priv(dev);
5197         u8 l4_hdr = 0;
5198
5199         if (skb_is_gso(skb)) {
5200                 /* IPv6 TSO requests with extension hdrs are a problem
5201                  * to Lancer and BE3 HW. Disable TSO6 feature.
5202                  */
5203                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5204                         features &= ~NETIF_F_TSO6;
5205
5206                 /* Lancer cannot handle the packet with MSS less than 256.
5207                  * Also it can't handle a TSO packet with a single segment
5208                  * Disable the GSO support in such cases
5209                  */
5210                 if (lancer_chip(adapter) &&
5211                     (skb_shinfo(skb)->gso_size < 256 ||
5212                      skb_shinfo(skb)->gso_segs == 1))
5213                         features &= ~NETIF_F_GSO_MASK;
5214         }
5215
5216         /* The code below restricts offload features for some tunneled and
5217          * Q-in-Q packets.
5218          * Offload features for normal (non tunnel) packets are unchanged.
5219          */
5220         features = vlan_features_check(skb, features);
5221         if (!skb->encapsulation ||
5222             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5223                 return features;
5224
5225         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5226          * should disable tunnel offload features if it's not a VxLAN packet,
5227          * as tunnel offloads have been enabled only for VxLAN. This is done to
5228          * allow other tunneled traffic like GRE work fine while VxLAN
5229          * offloads are configured in Skyhawk-R.
5230          */
5231         switch (vlan_get_protocol(skb)) {
5232         case htons(ETH_P_IP):
5233                 l4_hdr = ip_hdr(skb)->protocol;
5234                 break;
5235         case htons(ETH_P_IPV6):
5236                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5237                 break;
5238         default:
5239                 return features;
5240         }
5241
5242         if (l4_hdr != IPPROTO_UDP ||
5243             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5244             skb->inner_protocol != htons(ETH_P_TEB) ||
5245             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5246                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5247             !adapter->vxlan_port ||
5248             udp_hdr(skb)->dest != adapter->vxlan_port)
5249                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5250
5251         return features;
5252 }
5253
5254 static int be_get_phys_port_id(struct net_device *dev,
5255                                struct netdev_phys_item_id *ppid)
5256 {
5257         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5258         struct be_adapter *adapter = netdev_priv(dev);
5259         u8 *id;
5260
5261         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5262                 return -ENOSPC;
5263
5264         ppid->id[0] = adapter->hba_port_num + 1;
5265         id = &ppid->id[1];
5266         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5267              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5268                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5269
5270         ppid->id_len = id_len;
5271
5272         return 0;
5273 }
5274
5275 static void be_set_rx_mode(struct net_device *dev)
5276 {
5277         struct be_adapter *adapter = netdev_priv(dev);
5278         struct be_cmd_work *work;
5279
5280         work = be_alloc_work(adapter, be_work_set_rx_mode);
5281         if (work)
5282                 queue_work(be_wq, &work->work);
5283 }
5284
5285 static const struct net_device_ops be_netdev_ops = {
5286         .ndo_open               = be_open,
5287         .ndo_stop               = be_close,
5288         .ndo_start_xmit         = be_xmit,
5289         .ndo_set_rx_mode        = be_set_rx_mode,
5290         .ndo_set_mac_address    = be_mac_addr_set,
5291         .ndo_get_stats64        = be_get_stats64,
5292         .ndo_validate_addr      = eth_validate_addr,
5293         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5294         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5295         .ndo_set_vf_mac         = be_set_vf_mac,
5296         .ndo_set_vf_vlan        = be_set_vf_vlan,
5297         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5298         .ndo_get_vf_config      = be_get_vf_config,
5299         .ndo_set_vf_link_state  = be_set_vf_link_state,
5300         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5301         .ndo_tx_timeout         = be_tx_timeout,
5302 #ifdef CONFIG_NET_POLL_CONTROLLER
5303         .ndo_poll_controller    = be_netpoll,
5304 #endif
5305         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5306         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5307         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5308         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5309         .ndo_features_check     = be_features_check,
5310         .ndo_get_phys_port_id   = be_get_phys_port_id,
5311 };
5312
5313 static void be_netdev_init(struct net_device *netdev)
5314 {
5315         struct be_adapter *adapter = netdev_priv(netdev);
5316
5317         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5318                 NETIF_F_GSO_UDP_TUNNEL |
5319                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5320                 NETIF_F_HW_VLAN_CTAG_TX;
5321         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5322                 netdev->hw_features |= NETIF_F_RXHASH;
5323
5324         netdev->features |= netdev->hw_features |
5325                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5326
5327         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5328                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5329
5330         netdev->priv_flags |= IFF_UNICAST_FLT;
5331
5332         netdev->flags |= IFF_MULTICAST;
5333
5334         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5335
5336         netdev->netdev_ops = &be_netdev_ops;
5337
5338         netdev->ethtool_ops = &be_ethtool_ops;
5339
5340         /* MTU range: 256 - 9000 */
5341         netdev->min_mtu = BE_MIN_MTU;
5342         netdev->max_mtu = BE_MAX_MTU;
5343 }
5344
5345 static void be_cleanup(struct be_adapter *adapter)
5346 {
5347         struct net_device *netdev = adapter->netdev;
5348
5349         rtnl_lock();
5350         netif_device_detach(netdev);
5351         if (netif_running(netdev))
5352                 be_close(netdev);
5353         rtnl_unlock();
5354
5355         be_clear(adapter);
5356 }
5357
5358 static int be_resume(struct be_adapter *adapter)
5359 {
5360         struct net_device *netdev = adapter->netdev;
5361         int status;
5362
5363         status = be_setup(adapter);
5364         if (status)
5365                 return status;
5366
5367         rtnl_lock();
5368         if (netif_running(netdev))
5369                 status = be_open(netdev);
5370         rtnl_unlock();
5371
5372         if (status)
5373                 return status;
5374
5375         netif_device_attach(netdev);
5376
5377         return 0;
5378 }
5379
5380 static void be_soft_reset(struct be_adapter *adapter)
5381 {
5382         u32 val;
5383
5384         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5385         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5386         val |= SLIPORT_SOFTRESET_SR_MASK;
5387         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5388 }
5389
5390 static bool be_err_is_recoverable(struct be_adapter *adapter)
5391 {
5392         struct be_error_recovery *err_rec = &adapter->error_recovery;
5393         unsigned long initial_idle_time =
5394                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5395         unsigned long recovery_interval =
5396                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5397         u16 ue_err_code;
5398         u32 val;
5399
5400         val = be_POST_stage_get(adapter);
5401         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5402                 return false;
5403         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5404         if (ue_err_code == 0)
5405                 return false;
5406
5407         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5408                 ue_err_code);
5409
5410         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5411                 dev_err(&adapter->pdev->dev,
5412                         "Cannot recover within %lu sec from driver load\n",
5413                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5414                 return false;
5415         }
5416
5417         if (err_rec->last_recovery_time && time_before_eq(
5418                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5419                 dev_err(&adapter->pdev->dev,
5420                         "Cannot recover within %lu sec from last recovery\n",
5421                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5422                 return false;
5423         }
5424
5425         if (ue_err_code == err_rec->last_err_code) {
5426                 dev_err(&adapter->pdev->dev,
5427                         "Cannot recover from a consecutive TPE error\n");
5428                 return false;
5429         }
5430
5431         err_rec->last_recovery_time = jiffies;
5432         err_rec->last_err_code = ue_err_code;
5433         return true;
5434 }
5435
5436 static int be_tpe_recover(struct be_adapter *adapter)
5437 {
5438         struct be_error_recovery *err_rec = &adapter->error_recovery;
5439         int status = -EAGAIN;
5440         u32 val;
5441
5442         switch (err_rec->recovery_state) {
5443         case ERR_RECOVERY_ST_NONE:
5444                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5445                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5446                 break;
5447
5448         case ERR_RECOVERY_ST_DETECT:
5449                 val = be_POST_stage_get(adapter);
5450                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5451                     POST_STAGE_RECOVERABLE_ERR) {
5452                         dev_err(&adapter->pdev->dev,
5453                                 "Unrecoverable HW error detected: 0x%x\n", val);
5454                         status = -EINVAL;
5455                         err_rec->resched_delay = 0;
5456                         break;
5457                 }
5458
5459                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5460
5461                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5462                  * milliseconds before it checks for final error status in
5463                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5464                  * If it does, then PF0 initiates a Soft Reset.
5465                  */
5466                 if (adapter->pf_num == 0) {
5467                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5468                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5469                                         ERR_RECOVERY_UE_DETECT_DURATION;
5470                         break;
5471                 }
5472
5473                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5474                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5475                                         ERR_RECOVERY_UE_DETECT_DURATION;
5476                 break;
5477
5478         case ERR_RECOVERY_ST_RESET:
5479                 if (!be_err_is_recoverable(adapter)) {
5480                         dev_err(&adapter->pdev->dev,
5481                                 "Failed to meet recovery criteria\n");
5482                         status = -EIO;
5483                         err_rec->resched_delay = 0;
5484                         break;
5485                 }
5486                 be_soft_reset(adapter);
5487                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5488                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5489                                         err_rec->ue_to_reset_time;
5490                 break;
5491
5492         case ERR_RECOVERY_ST_PRE_POLL:
5493                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5494                 err_rec->resched_delay = 0;
5495                 status = 0;                     /* done */
5496                 break;
5497
5498         default:
5499                 status = -EINVAL;
5500                 err_rec->resched_delay = 0;
5501                 break;
5502         }
5503
5504         return status;
5505 }
5506
5507 static int be_err_recover(struct be_adapter *adapter)
5508 {
5509         int status;
5510
5511         if (!lancer_chip(adapter)) {
5512                 if (!adapter->error_recovery.recovery_supported ||
5513                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5514                         return -EIO;
5515                 status = be_tpe_recover(adapter);
5516                 if (status)
5517                         goto err;
5518         }
5519
5520         /* Wait for adapter to reach quiescent state before
5521          * destroying queues
5522          */
5523         status = be_fw_wait_ready(adapter);
5524         if (status)
5525                 goto err;
5526
5527         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5528
5529         be_cleanup(adapter);
5530
5531         status = be_resume(adapter);
5532         if (status)
5533                 goto err;
5534
5535         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5536
5537 err:
5538         return status;
5539 }
5540
5541 static void be_err_detection_task(struct work_struct *work)
5542 {
5543         struct be_error_recovery *err_rec =
5544                         container_of(work, struct be_error_recovery,
5545                                      err_detection_work.work);
5546         struct be_adapter *adapter =
5547                         container_of(err_rec, struct be_adapter,
5548                                      error_recovery);
5549         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5550         struct device *dev = &adapter->pdev->dev;
5551         int recovery_status;
5552
5553         be_detect_error(adapter);
5554         if (!be_check_error(adapter, BE_ERROR_HW))
5555                 goto reschedule_task;
5556
5557         recovery_status = be_err_recover(adapter);
5558         if (!recovery_status) {
5559                 err_rec->recovery_retries = 0;
5560                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5561                 dev_info(dev, "Adapter recovery successful\n");
5562                 goto reschedule_task;
5563         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5564                 /* BEx/SH recovery state machine */
5565                 if (adapter->pf_num == 0 &&
5566                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5567                         dev_err(&adapter->pdev->dev,
5568                                 "Adapter recovery in progress\n");
5569                 resched_delay = err_rec->resched_delay;
5570                 goto reschedule_task;
5571         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5572                 /* For VFs, check if PF have allocated resources
5573                  * every second.
5574                  */
5575                 dev_err(dev, "Re-trying adapter recovery\n");
5576                 goto reschedule_task;
5577         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5578                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5579                 /* In case of another error during recovery, it takes 30 sec
5580                  * for adapter to come out of error. Retry error recovery after
5581                  * this time interval.
5582                  */
5583                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5584                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5585                 goto reschedule_task;
5586         } else {
5587                 dev_err(dev, "Adapter recovery failed\n");
5588                 dev_err(dev, "Please reboot server to recover\n");
5589         }
5590
5591         return;
5592
5593 reschedule_task:
5594         be_schedule_err_detection(adapter, resched_delay);
5595 }
5596
5597 static void be_log_sfp_info(struct be_adapter *adapter)
5598 {
5599         int status;
5600
5601         status = be_cmd_query_sfp_info(adapter);
5602         if (!status) {
5603                 dev_err(&adapter->pdev->dev,
5604                         "Port %c: %s Vendor: %s part no: %s",
5605                         adapter->port_name,
5606                         be_misconfig_evt_port_state[adapter->phy_state],
5607                         adapter->phy.vendor_name,
5608                         adapter->phy.vendor_pn);
5609         }
5610         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5611 }
5612
5613 static void be_worker(struct work_struct *work)
5614 {
5615         struct be_adapter *adapter =
5616                 container_of(work, struct be_adapter, work.work);
5617         struct be_rx_obj *rxo;
5618         int i;
5619
5620         if (be_physfn(adapter) &&
5621             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5622                 be_cmd_get_die_temperature(adapter);
5623
5624         /* when interrupts are not yet enabled, just reap any pending
5625          * mcc completions
5626          */
5627         if (!netif_running(adapter->netdev)) {
5628                 local_bh_disable();
5629                 be_process_mcc(adapter);
5630                 local_bh_enable();
5631                 goto reschedule;
5632         }
5633
5634         if (!adapter->stats_cmd_sent) {
5635                 if (lancer_chip(adapter))
5636                         lancer_cmd_get_pport_stats(adapter,
5637                                                    &adapter->stats_cmd);
5638                 else
5639                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5640         }
5641
5642         for_all_rx_queues(adapter, rxo, i) {
5643                 /* Replenish RX-queues starved due to memory
5644                  * allocation failures.
5645                  */
5646                 if (rxo->rx_post_starved)
5647                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5648         }
5649
5650         /* EQ-delay update for Skyhawk is done while notifying EQ */
5651         if (!skyhawk_chip(adapter))
5652                 be_eqd_update(adapter, false);
5653
5654         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5655                 be_log_sfp_info(adapter);
5656
5657 reschedule:
5658         adapter->work_counter++;
5659         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5660 }
5661
5662 static void be_unmap_pci_bars(struct be_adapter *adapter)
5663 {
5664         if (adapter->csr)
5665                 pci_iounmap(adapter->pdev, adapter->csr);
5666         if (adapter->db)
5667                 pci_iounmap(adapter->pdev, adapter->db);
5668         if (adapter->pcicfg && adapter->pcicfg_mapped)
5669                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5670 }
5671
5672 static int db_bar(struct be_adapter *adapter)
5673 {
5674         if (lancer_chip(adapter) || be_virtfn(adapter))
5675                 return 0;
5676         else
5677                 return 4;
5678 }
5679
5680 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5681 {
5682         if (skyhawk_chip(adapter)) {
5683                 adapter->roce_db.size = 4096;
5684                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5685                                                               db_bar(adapter));
5686                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5687                                                                db_bar(adapter));
5688         }
5689         return 0;
5690 }
5691
5692 static int be_map_pci_bars(struct be_adapter *adapter)
5693 {
5694         struct pci_dev *pdev = adapter->pdev;
5695         u8 __iomem *addr;
5696         u32 sli_intf;
5697
5698         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5699         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5700                                 SLI_INTF_FAMILY_SHIFT;
5701         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5702
5703         if (BEx_chip(adapter) && be_physfn(adapter)) {
5704                 adapter->csr = pci_iomap(pdev, 2, 0);
5705                 if (!adapter->csr)
5706                         return -ENOMEM;
5707         }
5708
5709         addr = pci_iomap(pdev, db_bar(adapter), 0);
5710         if (!addr)
5711                 goto pci_map_err;
5712         adapter->db = addr;
5713
5714         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5715                 if (be_physfn(adapter)) {
5716                         /* PCICFG is the 2nd BAR in BE2 */
5717                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5718                         if (!addr)
5719                                 goto pci_map_err;
5720                         adapter->pcicfg = addr;
5721                         adapter->pcicfg_mapped = true;
5722                 } else {
5723                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5724                         adapter->pcicfg_mapped = false;
5725                 }
5726         }
5727
5728         be_roce_map_pci_bars(adapter);
5729         return 0;
5730
5731 pci_map_err:
5732         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5733         be_unmap_pci_bars(adapter);
5734         return -ENOMEM;
5735 }
5736
5737 static void be_drv_cleanup(struct be_adapter *adapter)
5738 {
5739         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5740         struct device *dev = &adapter->pdev->dev;
5741
5742         if (mem->va)
5743                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5744
5745         mem = &adapter->rx_filter;
5746         if (mem->va)
5747                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5748
5749         mem = &adapter->stats_cmd;
5750         if (mem->va)
5751                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5752 }
5753
5754 /* Allocate and initialize various fields in be_adapter struct */
5755 static int be_drv_init(struct be_adapter *adapter)
5756 {
5757         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5758         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5759         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5760         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5761         struct device *dev = &adapter->pdev->dev;
5762         int status = 0;
5763
5764         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5765         mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5766                                                 &mbox_mem_alloc->dma,
5767                                                 GFP_KERNEL);
5768         if (!mbox_mem_alloc->va)
5769                 return -ENOMEM;
5770
5771         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5772         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5773         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5774
5775         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5776         rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5777                                            &rx_filter->dma, GFP_KERNEL);
5778         if (!rx_filter->va) {
5779                 status = -ENOMEM;
5780                 goto free_mbox;
5781         }
5782
5783         if (lancer_chip(adapter))
5784                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5785         else if (BE2_chip(adapter))
5786                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5787         else if (BE3_chip(adapter))
5788                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5789         else
5790                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5791         stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5792                                            &stats_cmd->dma, GFP_KERNEL);
5793         if (!stats_cmd->va) {
5794                 status = -ENOMEM;
5795                 goto free_rx_filter;
5796         }
5797
5798         mutex_init(&adapter->mbox_lock);
5799         mutex_init(&adapter->mcc_lock);
5800         mutex_init(&adapter->rx_filter_lock);
5801         spin_lock_init(&adapter->mcc_cq_lock);
5802         init_completion(&adapter->et_cmd_compl);
5803
5804         pci_save_state(adapter->pdev);
5805
5806         INIT_DELAYED_WORK(&adapter->work, be_worker);
5807
5808         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5809         adapter->error_recovery.resched_delay = 0;
5810         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5811                           be_err_detection_task);
5812
5813         adapter->rx_fc = true;
5814         adapter->tx_fc = true;
5815
5816         /* Must be a power of 2 or else MODULO will BUG_ON */
5817         adapter->be_get_temp_freq = 64;
5818
5819         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5820         return 0;
5821
5822 free_rx_filter:
5823         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5824 free_mbox:
5825         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5826                           mbox_mem_alloc->dma);
5827         return status;
5828 }
5829
5830 static void be_remove(struct pci_dev *pdev)
5831 {
5832         struct be_adapter *adapter = pci_get_drvdata(pdev);
5833
5834         if (!adapter)
5835                 return;
5836
5837         be_roce_dev_remove(adapter);
5838         be_intr_set(adapter, false);
5839
5840         be_cancel_err_detection(adapter);
5841
5842         unregister_netdev(adapter->netdev);
5843
5844         be_clear(adapter);
5845
5846         if (!pci_vfs_assigned(adapter->pdev))
5847                 be_cmd_reset_function(adapter);
5848
5849         /* tell fw we're done with firing cmds */
5850         be_cmd_fw_clean(adapter);
5851
5852         be_unmap_pci_bars(adapter);
5853         be_drv_cleanup(adapter);
5854
5855         pci_disable_pcie_error_reporting(pdev);
5856
5857         pci_release_regions(pdev);
5858         pci_disable_device(pdev);
5859
5860         free_netdev(adapter->netdev);
5861 }
5862
5863 static ssize_t be_hwmon_show_temp(struct device *dev,
5864                                   struct device_attribute *dev_attr,
5865                                   char *buf)
5866 {
5867         struct be_adapter *adapter = dev_get_drvdata(dev);
5868
5869         /* Unit: millidegree Celsius */
5870         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5871                 return -EIO;
5872         else
5873                 return sprintf(buf, "%u\n",
5874                                adapter->hwmon_info.be_on_die_temp * 1000);
5875 }
5876
5877 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5878                           be_hwmon_show_temp, NULL, 1);
5879
5880 static struct attribute *be_hwmon_attrs[] = {
5881         &sensor_dev_attr_temp1_input.dev_attr.attr,
5882         NULL
5883 };
5884
5885 ATTRIBUTE_GROUPS(be_hwmon);
5886
5887 static char *mc_name(struct be_adapter *adapter)
5888 {
5889         char *str = ""; /* default */
5890
5891         switch (adapter->mc_type) {
5892         case UMC:
5893                 str = "UMC";
5894                 break;
5895         case FLEX10:
5896                 str = "FLEX10";
5897                 break;
5898         case vNIC1:
5899                 str = "vNIC-1";
5900                 break;
5901         case nPAR:
5902                 str = "nPAR";
5903                 break;
5904         case UFP:
5905                 str = "UFP";
5906                 break;
5907         case vNIC2:
5908                 str = "vNIC-2";
5909                 break;
5910         default:
5911                 str = "";
5912         }
5913
5914         return str;
5915 }
5916
5917 static inline char *func_name(struct be_adapter *adapter)
5918 {
5919         return be_physfn(adapter) ? "PF" : "VF";
5920 }
5921
5922 static inline char *nic_name(struct pci_dev *pdev)
5923 {
5924         switch (pdev->device) {
5925         case OC_DEVICE_ID1:
5926                 return OC_NAME;
5927         case OC_DEVICE_ID2:
5928                 return OC_NAME_BE;
5929         case OC_DEVICE_ID3:
5930         case OC_DEVICE_ID4:
5931                 return OC_NAME_LANCER;
5932         case BE_DEVICE_ID2:
5933                 return BE3_NAME;
5934         case OC_DEVICE_ID5:
5935         case OC_DEVICE_ID6:
5936                 return OC_NAME_SH;
5937         default:
5938                 return BE_NAME;
5939         }
5940 }
5941
5942 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5943 {
5944         struct be_adapter *adapter;
5945         struct net_device *netdev;
5946         int status = 0;
5947
5948         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5949
5950         status = pci_enable_device(pdev);
5951         if (status)
5952                 goto do_none;
5953
5954         status = pci_request_regions(pdev, DRV_NAME);
5955         if (status)
5956                 goto disable_dev;
5957         pci_set_master(pdev);
5958
5959         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5960         if (!netdev) {
5961                 status = -ENOMEM;
5962                 goto rel_reg;
5963         }
5964         adapter = netdev_priv(netdev);
5965         adapter->pdev = pdev;
5966         pci_set_drvdata(pdev, adapter);
5967         adapter->netdev = netdev;
5968         SET_NETDEV_DEV(netdev, &pdev->dev);
5969
5970         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5971         if (!status) {
5972                 netdev->features |= NETIF_F_HIGHDMA;
5973         } else {
5974                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5975                 if (status) {
5976                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5977                         goto free_netdev;
5978                 }
5979         }
5980
5981         status = pci_enable_pcie_error_reporting(pdev);
5982         if (!status)
5983                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5984
5985         status = be_map_pci_bars(adapter);
5986         if (status)
5987                 goto free_netdev;
5988
5989         status = be_drv_init(adapter);
5990         if (status)
5991                 goto unmap_bars;
5992
5993         status = be_setup(adapter);
5994         if (status)
5995                 goto drv_cleanup;
5996
5997         be_netdev_init(netdev);
5998         status = register_netdev(netdev);
5999         if (status != 0)
6000                 goto unsetup;
6001
6002         be_roce_dev_add(adapter);
6003
6004         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6005         adapter->error_recovery.probe_time = jiffies;
6006
6007         /* On Die temperature not supported for VF. */
6008         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6009                 adapter->hwmon_info.hwmon_dev =
6010                         devm_hwmon_device_register_with_groups(&pdev->dev,
6011                                                                DRV_NAME,
6012                                                                adapter,
6013                                                                be_hwmon_groups);
6014                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6015         }
6016
6017         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6018                  func_name(adapter), mc_name(adapter), adapter->port_name);
6019
6020         return 0;
6021
6022 unsetup:
6023         be_clear(adapter);
6024 drv_cleanup:
6025         be_drv_cleanup(adapter);
6026 unmap_bars:
6027         be_unmap_pci_bars(adapter);
6028 free_netdev:
6029         free_netdev(netdev);
6030 rel_reg:
6031         pci_release_regions(pdev);
6032 disable_dev:
6033         pci_disable_device(pdev);
6034 do_none:
6035         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6036         return status;
6037 }
6038
6039 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6040 {
6041         struct be_adapter *adapter = pci_get_drvdata(pdev);
6042
6043         be_intr_set(adapter, false);
6044         be_cancel_err_detection(adapter);
6045
6046         be_cleanup(adapter);
6047
6048         pci_save_state(pdev);
6049         pci_disable_device(pdev);
6050         pci_set_power_state(pdev, pci_choose_state(pdev, state));
6051         return 0;
6052 }
6053
6054 static int be_pci_resume(struct pci_dev *pdev)
6055 {
6056         struct be_adapter *adapter = pci_get_drvdata(pdev);
6057         int status = 0;
6058
6059         status = pci_enable_device(pdev);
6060         if (status)
6061                 return status;
6062
6063         pci_restore_state(pdev);
6064
6065         status = be_resume(adapter);
6066         if (status)
6067                 return status;
6068
6069         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6070
6071         return 0;
6072 }
6073
6074 /*
6075  * An FLR will stop BE from DMAing any data.
6076  */
6077 static void be_shutdown(struct pci_dev *pdev)
6078 {
6079         struct be_adapter *adapter = pci_get_drvdata(pdev);
6080
6081         if (!adapter)
6082                 return;
6083
6084         be_roce_dev_shutdown(adapter);
6085         cancel_delayed_work_sync(&adapter->work);
6086         be_cancel_err_detection(adapter);
6087
6088         netif_device_detach(adapter->netdev);
6089
6090         be_cmd_reset_function(adapter);
6091
6092         pci_disable_device(pdev);
6093 }
6094
6095 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6096                                             pci_channel_state_t state)
6097 {
6098         struct be_adapter *adapter = pci_get_drvdata(pdev);
6099
6100         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6101
6102         be_roce_dev_remove(adapter);
6103
6104         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6105                 be_set_error(adapter, BE_ERROR_EEH);
6106
6107                 be_cancel_err_detection(adapter);
6108
6109                 be_cleanup(adapter);
6110         }
6111
6112         if (state == pci_channel_io_perm_failure)
6113                 return PCI_ERS_RESULT_DISCONNECT;
6114
6115         pci_disable_device(pdev);
6116
6117         /* The error could cause the FW to trigger a flash debug dump.
6118          * Resetting the card while flash dump is in progress
6119          * can cause it not to recover; wait for it to finish.
6120          * Wait only for first function as it is needed only once per
6121          * adapter.
6122          */
6123         if (pdev->devfn == 0)
6124                 ssleep(30);
6125
6126         return PCI_ERS_RESULT_NEED_RESET;
6127 }
6128
6129 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6130 {
6131         struct be_adapter *adapter = pci_get_drvdata(pdev);
6132         int status;
6133
6134         dev_info(&adapter->pdev->dev, "EEH reset\n");
6135
6136         status = pci_enable_device(pdev);
6137         if (status)
6138                 return PCI_ERS_RESULT_DISCONNECT;
6139
6140         pci_set_master(pdev);
6141         pci_restore_state(pdev);
6142
6143         /* Check if card is ok and fw is ready */
6144         dev_info(&adapter->pdev->dev,
6145                  "Waiting for FW to be ready after EEH reset\n");
6146         status = be_fw_wait_ready(adapter);
6147         if (status)
6148                 return PCI_ERS_RESULT_DISCONNECT;
6149
6150         be_clear_error(adapter, BE_CLEAR_ALL);
6151         return PCI_ERS_RESULT_RECOVERED;
6152 }
6153
6154 static void be_eeh_resume(struct pci_dev *pdev)
6155 {
6156         int status = 0;
6157         struct be_adapter *adapter = pci_get_drvdata(pdev);
6158
6159         dev_info(&adapter->pdev->dev, "EEH resume\n");
6160
6161         pci_save_state(pdev);
6162
6163         status = be_resume(adapter);
6164         if (status)
6165                 goto err;
6166
6167         be_roce_dev_add(adapter);
6168
6169         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6170         return;
6171 err:
6172         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6173 }
6174
6175 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6176 {
6177         struct be_adapter *adapter = pci_get_drvdata(pdev);
6178         struct be_resources vft_res = {0};
6179         int status;
6180
6181         if (!num_vfs)
6182                 be_vf_clear(adapter);
6183
6184         adapter->num_vfs = num_vfs;
6185
6186         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6187                 dev_warn(&pdev->dev,
6188                          "Cannot disable VFs while they are assigned\n");
6189                 return -EBUSY;
6190         }
6191
6192         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6193          * are equally distributed across the max-number of VFs. The user may
6194          * request only a subset of the max-vfs to be enabled.
6195          * Based on num_vfs, redistribute the resources across num_vfs so that
6196          * each VF will have access to more number of resources.
6197          * This facility is not available in BE3 FW.
6198          * Also, this is done by FW in Lancer chip.
6199          */
6200         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6201                 be_calculate_vf_res(adapter, adapter->num_vfs,
6202                                     &vft_res);
6203                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6204                                                  adapter->num_vfs, &vft_res);
6205                 if (status)
6206                         dev_err(&pdev->dev,
6207                                 "Failed to optimize SR-IOV resources\n");
6208         }
6209
6210         status = be_get_resources(adapter);
6211         if (status)
6212                 return be_cmd_status(status);
6213
6214         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6215         rtnl_lock();
6216         status = be_update_queues(adapter);
6217         rtnl_unlock();
6218         if (status)
6219                 return be_cmd_status(status);
6220
6221         if (adapter->num_vfs)
6222                 status = be_vf_setup(adapter);
6223
6224         if (!status)
6225                 return adapter->num_vfs;
6226
6227         return 0;
6228 }
6229
6230 static const struct pci_error_handlers be_eeh_handlers = {
6231         .error_detected = be_eeh_err_detected,
6232         .slot_reset = be_eeh_reset,
6233         .resume = be_eeh_resume,
6234 };
6235
6236 static struct pci_driver be_driver = {
6237         .name = DRV_NAME,
6238         .id_table = be_dev_ids,
6239         .probe = be_probe,
6240         .remove = be_remove,
6241         .suspend = be_suspend,
6242         .resume = be_pci_resume,
6243         .shutdown = be_shutdown,
6244         .sriov_configure = be_pci_sriov_configure,
6245         .err_handler = &be_eeh_handlers
6246 };
6247
6248 static int __init be_init_module(void)
6249 {
6250         int status;
6251
6252         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6253             rx_frag_size != 2048) {
6254                 printk(KERN_WARNING DRV_NAME
6255                         " : Module param rx_frag_size must be 2048/4096/8192."
6256                         " Using 2048\n");
6257                 rx_frag_size = 2048;
6258         }
6259
6260         if (num_vfs > 0) {
6261                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6262                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6263         }
6264
6265         be_wq = create_singlethread_workqueue("be_wq");
6266         if (!be_wq) {
6267                 pr_warn(DRV_NAME "workqueue creation failed\n");
6268                 return -1;
6269         }
6270
6271         be_err_recovery_workq =
6272                 create_singlethread_workqueue("be_err_recover");
6273         if (!be_err_recovery_workq)
6274                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6275
6276         status = pci_register_driver(&be_driver);
6277         if (status) {
6278                 destroy_workqueue(be_wq);
6279                 be_destroy_err_recovery_workq();
6280         }
6281         return status;
6282 }
6283 module_init(be_init_module);
6284
6285 static void __exit be_exit_module(void)
6286 {
6287         pci_unregister_driver(&be_driver);
6288
6289         be_destroy_err_recovery_workq();
6290
6291         if (be_wq)
6292                 destroy_workqueue(be_wq);
6293 }
6294 module_exit(be_exit_module);