]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
be2net: don't delete MAC on close on unprivileged BE3 VFs
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* if device is not running, copy MAC to netdev->dev_addr */
322         if (!netif_running(netdev))
323                 goto done;
324
325         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
326          * privilege or if PF did not provision the new MAC address.
327          * On BE3, this cmd will always fail if the VF doesn't have the
328          * FILTMGMT privilege. This failure is OK, only if the PF programmed
329          * the MAC for the VF.
330          */
331         mutex_lock(&adapter->rx_filter_lock);
332         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
333         if (!status) {
334
335                 /* Delete the old programmed MAC. This call may fail if the
336                  * old MAC was already deleted by the PF driver.
337                  */
338                 if (adapter->pmac_id[0] != old_pmac_id)
339                         be_dev_mac_del(adapter, old_pmac_id);
340         }
341
342         mutex_unlock(&adapter->rx_filter_lock);
343         /* Decide if the new MAC is successfully activated only after
344          * querying the FW
345          */
346         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
347                                        adapter->if_handle, true, 0);
348         if (status)
349                 goto err;
350
351         /* The MAC change did not happen, either due to lack of privilege
352          * or PF didn't pre-provision.
353          */
354         if (!ether_addr_equal(addr->sa_data, mac)) {
355                 status = -EPERM;
356                 goto err;
357         }
358 done:
359         ether_addr_copy(adapter->dev_mac, addr->sa_data);
360         ether_addr_copy(netdev->dev_addr, addr->sa_data);
361         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
362         return 0;
363 err:
364         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
365         return status;
366 }
367
368 /* BE2 supports only v0 cmd */
369 static void *hw_stats_from_cmd(struct be_adapter *adapter)
370 {
371         if (BE2_chip(adapter)) {
372                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
373
374                 return &cmd->hw_stats;
375         } else if (BE3_chip(adapter)) {
376                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
377
378                 return &cmd->hw_stats;
379         } else {
380                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
381
382                 return &cmd->hw_stats;
383         }
384 }
385
386 /* BE2 supports only v0 cmd */
387 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
388 {
389         if (BE2_chip(adapter)) {
390                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
391
392                 return &hw_stats->erx;
393         } else if (BE3_chip(adapter)) {
394                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
395
396                 return &hw_stats->erx;
397         } else {
398                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
399
400                 return &hw_stats->erx;
401         }
402 }
403
404 static void populate_be_v0_stats(struct be_adapter *adapter)
405 {
406         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
407         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
408         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
409         struct be_port_rxf_stats_v0 *port_stats =
410                                         &rxf_stats->port[adapter->port_num];
411         struct be_drv_stats *drvs = &adapter->drv_stats;
412
413         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
414         drvs->rx_pause_frames = port_stats->rx_pause_frames;
415         drvs->rx_crc_errors = port_stats->rx_crc_errors;
416         drvs->rx_control_frames = port_stats->rx_control_frames;
417         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
418         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
419         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
420         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
421         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
422         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
423         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
424         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
425         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
426         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
427         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
428         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
429         drvs->rx_dropped_header_too_small =
430                 port_stats->rx_dropped_header_too_small;
431         drvs->rx_address_filtered =
432                                         port_stats->rx_address_filtered +
433                                         port_stats->rx_vlan_filtered;
434         drvs->rx_alignment_symbol_errors =
435                 port_stats->rx_alignment_symbol_errors;
436
437         drvs->tx_pauseframes = port_stats->tx_pauseframes;
438         drvs->tx_controlframes = port_stats->tx_controlframes;
439
440         if (adapter->port_num)
441                 drvs->jabber_events = rxf_stats->port1_jabber_events;
442         else
443                 drvs->jabber_events = rxf_stats->port0_jabber_events;
444         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
445         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
446         drvs->forwarded_packets = rxf_stats->forwarded_packets;
447         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
448         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
449         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
450         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
451 }
452
453 static void populate_be_v1_stats(struct be_adapter *adapter)
454 {
455         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
456         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
457         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
458         struct be_port_rxf_stats_v1 *port_stats =
459                                         &rxf_stats->port[adapter->port_num];
460         struct be_drv_stats *drvs = &adapter->drv_stats;
461
462         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
463         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
464         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
465         drvs->rx_pause_frames = port_stats->rx_pause_frames;
466         drvs->rx_crc_errors = port_stats->rx_crc_errors;
467         drvs->rx_control_frames = port_stats->rx_control_frames;
468         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
469         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
470         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
471         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
472         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
473         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
474         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
475         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
476         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
477         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
478         drvs->rx_dropped_header_too_small =
479                 port_stats->rx_dropped_header_too_small;
480         drvs->rx_input_fifo_overflow_drop =
481                 port_stats->rx_input_fifo_overflow_drop;
482         drvs->rx_address_filtered = port_stats->rx_address_filtered;
483         drvs->rx_alignment_symbol_errors =
484                 port_stats->rx_alignment_symbol_errors;
485         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
486         drvs->tx_pauseframes = port_stats->tx_pauseframes;
487         drvs->tx_controlframes = port_stats->tx_controlframes;
488         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
489         drvs->jabber_events = port_stats->jabber_events;
490         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
491         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
492         drvs->forwarded_packets = rxf_stats->forwarded_packets;
493         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
494         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
495         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
496         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
497 }
498
499 static void populate_be_v2_stats(struct be_adapter *adapter)
500 {
501         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
502         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
503         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
504         struct be_port_rxf_stats_v2 *port_stats =
505                                         &rxf_stats->port[adapter->port_num];
506         struct be_drv_stats *drvs = &adapter->drv_stats;
507
508         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
509         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
510         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
511         drvs->rx_pause_frames = port_stats->rx_pause_frames;
512         drvs->rx_crc_errors = port_stats->rx_crc_errors;
513         drvs->rx_control_frames = port_stats->rx_control_frames;
514         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
515         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
516         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
517         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
518         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
519         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
520         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
521         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
522         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
523         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
524         drvs->rx_dropped_header_too_small =
525                 port_stats->rx_dropped_header_too_small;
526         drvs->rx_input_fifo_overflow_drop =
527                 port_stats->rx_input_fifo_overflow_drop;
528         drvs->rx_address_filtered = port_stats->rx_address_filtered;
529         drvs->rx_alignment_symbol_errors =
530                 port_stats->rx_alignment_symbol_errors;
531         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
532         drvs->tx_pauseframes = port_stats->tx_pauseframes;
533         drvs->tx_controlframes = port_stats->tx_controlframes;
534         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
535         drvs->jabber_events = port_stats->jabber_events;
536         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
537         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
538         drvs->forwarded_packets = rxf_stats->forwarded_packets;
539         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
540         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
541         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
542         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
543         if (be_roce_supported(adapter)) {
544                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
545                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
546                 drvs->rx_roce_frames = port_stats->roce_frames_received;
547                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
548                 drvs->roce_drops_payload_len =
549                         port_stats->roce_drops_payload_len;
550         }
551 }
552
553 static void populate_lancer_stats(struct be_adapter *adapter)
554 {
555         struct be_drv_stats *drvs = &adapter->drv_stats;
556         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
557
558         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
559         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
560         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
561         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
562         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
563         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
564         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
565         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
566         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
567         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
568         drvs->rx_dropped_tcp_length =
569                                 pport_stats->rx_dropped_invalid_tcp_length;
570         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
571         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
572         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
573         drvs->rx_dropped_header_too_small =
574                                 pport_stats->rx_dropped_header_too_small;
575         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
576         drvs->rx_address_filtered =
577                                         pport_stats->rx_address_filtered +
578                                         pport_stats->rx_vlan_filtered;
579         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
580         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
581         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
582         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
583         drvs->jabber_events = pport_stats->rx_jabbers;
584         drvs->forwarded_packets = pport_stats->num_forwards_lo;
585         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
586         drvs->rx_drops_too_many_frags =
587                                 pport_stats->rx_drops_too_many_frags_lo;
588 }
589
590 static void accumulate_16bit_val(u32 *acc, u16 val)
591 {
592 #define lo(x)                   (x & 0xFFFF)
593 #define hi(x)                   (x & 0xFFFF0000)
594         bool wrapped = val < lo(*acc);
595         u32 newacc = hi(*acc) + val;
596
597         if (wrapped)
598                 newacc += 65536;
599         ACCESS_ONCE(*acc) = newacc;
600 }
601
602 static void populate_erx_stats(struct be_adapter *adapter,
603                                struct be_rx_obj *rxo, u32 erx_stat)
604 {
605         if (!BEx_chip(adapter))
606                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
607         else
608                 /* below erx HW counter can actually wrap around after
609                  * 65535. Driver accumulates a 32-bit value
610                  */
611                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
612                                      (u16)erx_stat);
613 }
614
615 void be_parse_stats(struct be_adapter *adapter)
616 {
617         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
618         struct be_rx_obj *rxo;
619         int i;
620         u32 erx_stat;
621
622         if (lancer_chip(adapter)) {
623                 populate_lancer_stats(adapter);
624         } else {
625                 if (BE2_chip(adapter))
626                         populate_be_v0_stats(adapter);
627                 else if (BE3_chip(adapter))
628                         /* for BE3 */
629                         populate_be_v1_stats(adapter);
630                 else
631                         populate_be_v2_stats(adapter);
632
633                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
634                 for_all_rx_queues(adapter, rxo, i) {
635                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
636                         populate_erx_stats(adapter, rxo, erx_stat);
637                 }
638         }
639 }
640
641 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
642                                                 struct rtnl_link_stats64 *stats)
643 {
644         struct be_adapter *adapter = netdev_priv(netdev);
645         struct be_drv_stats *drvs = &adapter->drv_stats;
646         struct be_rx_obj *rxo;
647         struct be_tx_obj *txo;
648         u64 pkts, bytes;
649         unsigned int start;
650         int i;
651
652         for_all_rx_queues(adapter, rxo, i) {
653                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
654
655                 do {
656                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
657                         pkts = rx_stats(rxo)->rx_pkts;
658                         bytes = rx_stats(rxo)->rx_bytes;
659                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
660                 stats->rx_packets += pkts;
661                 stats->rx_bytes += bytes;
662                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
663                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
664                                         rx_stats(rxo)->rx_drops_no_frags;
665         }
666
667         for_all_tx_queues(adapter, txo, i) {
668                 const struct be_tx_stats *tx_stats = tx_stats(txo);
669
670                 do {
671                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
672                         pkts = tx_stats(txo)->tx_pkts;
673                         bytes = tx_stats(txo)->tx_bytes;
674                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
675                 stats->tx_packets += pkts;
676                 stats->tx_bytes += bytes;
677         }
678
679         /* bad pkts received */
680         stats->rx_errors = drvs->rx_crc_errors +
681                 drvs->rx_alignment_symbol_errors +
682                 drvs->rx_in_range_errors +
683                 drvs->rx_out_range_errors +
684                 drvs->rx_frame_too_long +
685                 drvs->rx_dropped_too_small +
686                 drvs->rx_dropped_too_short +
687                 drvs->rx_dropped_header_too_small +
688                 drvs->rx_dropped_tcp_length +
689                 drvs->rx_dropped_runt;
690
691         /* detailed rx errors */
692         stats->rx_length_errors = drvs->rx_in_range_errors +
693                 drvs->rx_out_range_errors +
694                 drvs->rx_frame_too_long;
695
696         stats->rx_crc_errors = drvs->rx_crc_errors;
697
698         /* frame alignment errors */
699         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
700
701         /* receiver fifo overrun */
702         /* drops_no_pbuf is no per i/f, it's per BE card */
703         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
704                                 drvs->rx_input_fifo_overflow_drop +
705                                 drvs->rx_drops_no_pbuf;
706         return stats;
707 }
708
709 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
710 {
711         struct net_device *netdev = adapter->netdev;
712
713         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
714                 netif_carrier_off(netdev);
715                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
716         }
717
718         if (link_status)
719                 netif_carrier_on(netdev);
720         else
721                 netif_carrier_off(netdev);
722
723         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
724 }
725
726 static int be_gso_hdr_len(struct sk_buff *skb)
727 {
728         if (skb->encapsulation)
729                 return skb_inner_transport_offset(skb) +
730                        inner_tcp_hdrlen(skb);
731         return skb_transport_offset(skb) + tcp_hdrlen(skb);
732 }
733
734 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
735 {
736         struct be_tx_stats *stats = tx_stats(txo);
737         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
738         /* Account for headers which get duplicated in TSO pkt */
739         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
740
741         u64_stats_update_begin(&stats->sync);
742         stats->tx_reqs++;
743         stats->tx_bytes += skb->len + dup_hdr_len;
744         stats->tx_pkts += tx_pkts;
745         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
746                 stats->tx_vxlan_offload_pkts += tx_pkts;
747         u64_stats_update_end(&stats->sync);
748 }
749
750 /* Returns number of WRBs needed for the skb */
751 static u32 skb_wrb_cnt(struct sk_buff *skb)
752 {
753         /* +1 for the header wrb */
754         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
755 }
756
757 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
758 {
759         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
760         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
761         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
762         wrb->rsvd0 = 0;
763 }
764
765 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
766  * to avoid the swap and shift/mask operations in wrb_fill().
767  */
768 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
769 {
770         wrb->frag_pa_hi = 0;
771         wrb->frag_pa_lo = 0;
772         wrb->frag_len = 0;
773         wrb->rsvd0 = 0;
774 }
775
776 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
777                                      struct sk_buff *skb)
778 {
779         u8 vlan_prio;
780         u16 vlan_tag;
781
782         vlan_tag = skb_vlan_tag_get(skb);
783         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
784         /* If vlan priority provided by OS is NOT in available bmap */
785         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
786                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
787                                 adapter->recommended_prio_bits;
788
789         return vlan_tag;
790 }
791
792 /* Used only for IP tunnel packets */
793 static u16 skb_inner_ip_proto(struct sk_buff *skb)
794 {
795         return (inner_ip_hdr(skb)->version == 4) ?
796                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
797 }
798
799 static u16 skb_ip_proto(struct sk_buff *skb)
800 {
801         return (ip_hdr(skb)->version == 4) ?
802                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
803 }
804
805 static inline bool be_is_txq_full(struct be_tx_obj *txo)
806 {
807         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
808 }
809
810 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
811 {
812         return atomic_read(&txo->q.used) < txo->q.len / 2;
813 }
814
815 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
816 {
817         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
818 }
819
820 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
821                                        struct sk_buff *skb,
822                                        struct be_wrb_params *wrb_params)
823 {
824         u16 proto;
825
826         if (skb_is_gso(skb)) {
827                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
828                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
829                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
830                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
831         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
832                 if (skb->encapsulation) {
833                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
834                         proto = skb_inner_ip_proto(skb);
835                 } else {
836                         proto = skb_ip_proto(skb);
837                 }
838                 if (proto == IPPROTO_TCP)
839                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
840                 else if (proto == IPPROTO_UDP)
841                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
842         }
843
844         if (skb_vlan_tag_present(skb)) {
845                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
846                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
847         }
848
849         BE_WRB_F_SET(wrb_params->features, CRC, 1);
850 }
851
852 static void wrb_fill_hdr(struct be_adapter *adapter,
853                          struct be_eth_hdr_wrb *hdr,
854                          struct be_wrb_params *wrb_params,
855                          struct sk_buff *skb)
856 {
857         memset(hdr, 0, sizeof(*hdr));
858
859         SET_TX_WRB_HDR_BITS(crc, hdr,
860                             BE_WRB_F_GET(wrb_params->features, CRC));
861         SET_TX_WRB_HDR_BITS(ipcs, hdr,
862                             BE_WRB_F_GET(wrb_params->features, IPCS));
863         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
864                             BE_WRB_F_GET(wrb_params->features, TCPCS));
865         SET_TX_WRB_HDR_BITS(udpcs, hdr,
866                             BE_WRB_F_GET(wrb_params->features, UDPCS));
867
868         SET_TX_WRB_HDR_BITS(lso, hdr,
869                             BE_WRB_F_GET(wrb_params->features, LSO));
870         SET_TX_WRB_HDR_BITS(lso6, hdr,
871                             BE_WRB_F_GET(wrb_params->features, LSO6));
872         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
873
874         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
875          * hack is not needed, the evt bit is set while ringing DB.
876          */
877         SET_TX_WRB_HDR_BITS(event, hdr,
878                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
879         SET_TX_WRB_HDR_BITS(vlan, hdr,
880                             BE_WRB_F_GET(wrb_params->features, VLAN));
881         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
882
883         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
884         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
885         SET_TX_WRB_HDR_BITS(mgmt, hdr,
886                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
887 }
888
889 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
890                           bool unmap_single)
891 {
892         dma_addr_t dma;
893         u32 frag_len = le32_to_cpu(wrb->frag_len);
894
895
896         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
897                 (u64)le32_to_cpu(wrb->frag_pa_lo);
898         if (frag_len) {
899                 if (unmap_single)
900                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
901                 else
902                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
903         }
904 }
905
906 /* Grab a WRB header for xmit */
907 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
908 {
909         u32 head = txo->q.head;
910
911         queue_head_inc(&txo->q);
912         return head;
913 }
914
915 /* Set up the WRB header for xmit */
916 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
917                                 struct be_tx_obj *txo,
918                                 struct be_wrb_params *wrb_params,
919                                 struct sk_buff *skb, u16 head)
920 {
921         u32 num_frags = skb_wrb_cnt(skb);
922         struct be_queue_info *txq = &txo->q;
923         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
924
925         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
926         be_dws_cpu_to_le(hdr, sizeof(*hdr));
927
928         BUG_ON(txo->sent_skb_list[head]);
929         txo->sent_skb_list[head] = skb;
930         txo->last_req_hdr = head;
931         atomic_add(num_frags, &txq->used);
932         txo->last_req_wrb_cnt = num_frags;
933         txo->pend_wrb_cnt += num_frags;
934 }
935
936 /* Setup a WRB fragment (buffer descriptor) for xmit */
937 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
938                                  int len)
939 {
940         struct be_eth_wrb *wrb;
941         struct be_queue_info *txq = &txo->q;
942
943         wrb = queue_head_node(txq);
944         wrb_fill(wrb, busaddr, len);
945         queue_head_inc(txq);
946 }
947
948 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
949  * was invoked. The producer index is restored to the previous packet and the
950  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
951  */
952 static void be_xmit_restore(struct be_adapter *adapter,
953                             struct be_tx_obj *txo, u32 head, bool map_single,
954                             u32 copied)
955 {
956         struct device *dev;
957         struct be_eth_wrb *wrb;
958         struct be_queue_info *txq = &txo->q;
959
960         dev = &adapter->pdev->dev;
961         txq->head = head;
962
963         /* skip the first wrb (hdr); it's not mapped */
964         queue_head_inc(txq);
965         while (copied) {
966                 wrb = queue_head_node(txq);
967                 unmap_tx_frag(dev, wrb, map_single);
968                 map_single = false;
969                 copied -= le32_to_cpu(wrb->frag_len);
970                 queue_head_inc(txq);
971         }
972
973         txq->head = head;
974 }
975
976 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
977  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
978  * of WRBs used up by the packet.
979  */
980 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
981                            struct sk_buff *skb,
982                            struct be_wrb_params *wrb_params)
983 {
984         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
985         struct device *dev = &adapter->pdev->dev;
986         struct be_queue_info *txq = &txo->q;
987         bool map_single = false;
988         u32 head = txq->head;
989         dma_addr_t busaddr;
990         int len;
991
992         head = be_tx_get_wrb_hdr(txo);
993
994         if (skb->len > skb->data_len) {
995                 len = skb_headlen(skb);
996
997                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
998                 if (dma_mapping_error(dev, busaddr))
999                         goto dma_err;
1000                 map_single = true;
1001                 be_tx_setup_wrb_frag(txo, busaddr, len);
1002                 copied += len;
1003         }
1004
1005         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1006                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1007                 len = skb_frag_size(frag);
1008
1009                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1010                 if (dma_mapping_error(dev, busaddr))
1011                         goto dma_err;
1012                 be_tx_setup_wrb_frag(txo, busaddr, len);
1013                 copied += len;
1014         }
1015
1016         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1017
1018         be_tx_stats_update(txo, skb);
1019         return wrb_cnt;
1020
1021 dma_err:
1022         adapter->drv_stats.dma_map_errors++;
1023         be_xmit_restore(adapter, txo, head, map_single, copied);
1024         return 0;
1025 }
1026
1027 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1028 {
1029         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1030 }
1031
1032 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1033                                              struct sk_buff *skb,
1034                                              struct be_wrb_params
1035                                              *wrb_params)
1036 {
1037         u16 vlan_tag = 0;
1038
1039         skb = skb_share_check(skb, GFP_ATOMIC);
1040         if (unlikely(!skb))
1041                 return skb;
1042
1043         if (skb_vlan_tag_present(skb))
1044                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1045
1046         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1047                 if (!vlan_tag)
1048                         vlan_tag = adapter->pvid;
1049                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1050                  * skip VLAN insertion
1051                  */
1052                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1053         }
1054
1055         if (vlan_tag) {
1056                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1057                                                 vlan_tag);
1058                 if (unlikely(!skb))
1059                         return skb;
1060                 skb->vlan_tci = 0;
1061         }
1062
1063         /* Insert the outer VLAN, if any */
1064         if (adapter->qnq_vid) {
1065                 vlan_tag = adapter->qnq_vid;
1066                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1067                                                 vlan_tag);
1068                 if (unlikely(!skb))
1069                         return skb;
1070                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1071         }
1072
1073         return skb;
1074 }
1075
1076 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1077 {
1078         struct ethhdr *eh = (struct ethhdr *)skb->data;
1079         u16 offset = ETH_HLEN;
1080
1081         if (eh->h_proto == htons(ETH_P_IPV6)) {
1082                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1083
1084                 offset += sizeof(struct ipv6hdr);
1085                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1086                     ip6h->nexthdr != NEXTHDR_UDP) {
1087                         struct ipv6_opt_hdr *ehdr =
1088                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1089
1090                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1091                         if (ehdr->hdrlen == 0xff)
1092                                 return true;
1093                 }
1094         }
1095         return false;
1096 }
1097
1098 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1099 {
1100         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1101 }
1102
1103 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1104 {
1105         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1106 }
1107
1108 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1109                                                   struct sk_buff *skb,
1110                                                   struct be_wrb_params
1111                                                   *wrb_params)
1112 {
1113         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1114         unsigned int eth_hdr_len;
1115         struct iphdr *ip;
1116
1117         /* For padded packets, BE HW modifies tot_len field in IP header
1118          * incorrecly when VLAN tag is inserted by HW.
1119          * For padded packets, Lancer computes incorrect checksum.
1120          */
1121         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1122                                                 VLAN_ETH_HLEN : ETH_HLEN;
1123         if (skb->len <= 60 &&
1124             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1125             is_ipv4_pkt(skb)) {
1126                 ip = (struct iphdr *)ip_hdr(skb);
1127                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1128         }
1129
1130         /* If vlan tag is already inlined in the packet, skip HW VLAN
1131          * tagging in pvid-tagging mode
1132          */
1133         if (be_pvid_tagging_enabled(adapter) &&
1134             veh->h_vlan_proto == htons(ETH_P_8021Q))
1135                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1136
1137         /* HW has a bug wherein it will calculate CSUM for VLAN
1138          * pkts even though it is disabled.
1139          * Manually insert VLAN in pkt.
1140          */
1141         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1142             skb_vlan_tag_present(skb)) {
1143                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1144                 if (unlikely(!skb))
1145                         goto err;
1146         }
1147
1148         /* HW may lockup when VLAN HW tagging is requested on
1149          * certain ipv6 packets. Drop such pkts if the HW workaround to
1150          * skip HW tagging is not enabled by FW.
1151          */
1152         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1153                      (adapter->pvid || adapter->qnq_vid) &&
1154                      !qnq_async_evt_rcvd(adapter)))
1155                 goto tx_drop;
1156
1157         /* Manual VLAN tag insertion to prevent:
1158          * ASIC lockup when the ASIC inserts VLAN tag into
1159          * certain ipv6 packets. Insert VLAN tags in driver,
1160          * and set event, completion, vlan bits accordingly
1161          * in the Tx WRB.
1162          */
1163         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1164             be_vlan_tag_tx_chk(adapter, skb)) {
1165                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1166                 if (unlikely(!skb))
1167                         goto err;
1168         }
1169
1170         return skb;
1171 tx_drop:
1172         dev_kfree_skb_any(skb);
1173 err:
1174         return NULL;
1175 }
1176
1177 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1178                                            struct sk_buff *skb,
1179                                            struct be_wrb_params *wrb_params)
1180 {
1181         int err;
1182
1183         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1184          * packets that are 32b or less may cause a transmit stall
1185          * on that port. The workaround is to pad such packets
1186          * (len <= 32 bytes) to a minimum length of 36b.
1187          */
1188         if (skb->len <= 32) {
1189                 if (skb_put_padto(skb, 36))
1190                         return NULL;
1191         }
1192
1193         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1194                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1195                 if (!skb)
1196                         return NULL;
1197         }
1198
1199         /* The stack can send us skbs with length greater than
1200          * what the HW can handle. Trim the extra bytes.
1201          */
1202         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1203         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1204         WARN_ON(err);
1205
1206         return skb;
1207 }
1208
1209 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1210 {
1211         struct be_queue_info *txq = &txo->q;
1212         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1213
1214         /* Mark the last request eventable if it hasn't been marked already */
1215         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1216                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1217
1218         /* compose a dummy wrb if there are odd set of wrbs to notify */
1219         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1220                 wrb_fill_dummy(queue_head_node(txq));
1221                 queue_head_inc(txq);
1222                 atomic_inc(&txq->used);
1223                 txo->pend_wrb_cnt++;
1224                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1225                                            TX_HDR_WRB_NUM_SHIFT);
1226                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1227                                           TX_HDR_WRB_NUM_SHIFT);
1228         }
1229         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1230         txo->pend_wrb_cnt = 0;
1231 }
1232
1233 /* OS2BMC related */
1234
1235 #define DHCP_CLIENT_PORT        68
1236 #define DHCP_SERVER_PORT        67
1237 #define NET_BIOS_PORT1          137
1238 #define NET_BIOS_PORT2          138
1239 #define DHCPV6_RAS_PORT         547
1240
1241 #define is_mc_allowed_on_bmc(adapter, eh)       \
1242         (!is_multicast_filt_enabled(adapter) && \
1243          is_multicast_ether_addr(eh->h_dest) && \
1244          !is_broadcast_ether_addr(eh->h_dest))
1245
1246 #define is_bc_allowed_on_bmc(adapter, eh)       \
1247         (!is_broadcast_filt_enabled(adapter) && \
1248          is_broadcast_ether_addr(eh->h_dest))
1249
1250 #define is_arp_allowed_on_bmc(adapter, skb)     \
1251         (is_arp(skb) && is_arp_filt_enabled(adapter))
1252
1253 #define is_broadcast_packet(eh, adapter)        \
1254                 (is_multicast_ether_addr(eh->h_dest) && \
1255                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1256
1257 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1258
1259 #define is_arp_filt_enabled(adapter)    \
1260                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1261
1262 #define is_dhcp_client_filt_enabled(adapter)    \
1263                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1264
1265 #define is_dhcp_srvr_filt_enabled(adapter)      \
1266                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1267
1268 #define is_nbios_filt_enabled(adapter)  \
1269                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1270
1271 #define is_ipv6_na_filt_enabled(adapter)        \
1272                 (adapter->bmc_filt_mask &       \
1273                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1274
1275 #define is_ipv6_ra_filt_enabled(adapter)        \
1276                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1277
1278 #define is_ipv6_ras_filt_enabled(adapter)       \
1279                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1280
1281 #define is_broadcast_filt_enabled(adapter)      \
1282                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1283
1284 #define is_multicast_filt_enabled(adapter)      \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1286
1287 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1288                                struct sk_buff **skb)
1289 {
1290         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1291         bool os2bmc = false;
1292
1293         if (!be_is_os2bmc_enabled(adapter))
1294                 goto done;
1295
1296         if (!is_multicast_ether_addr(eh->h_dest))
1297                 goto done;
1298
1299         if (is_mc_allowed_on_bmc(adapter, eh) ||
1300             is_bc_allowed_on_bmc(adapter, eh) ||
1301             is_arp_allowed_on_bmc(adapter, (*skb))) {
1302                 os2bmc = true;
1303                 goto done;
1304         }
1305
1306         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1307                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1308                 u8 nexthdr = hdr->nexthdr;
1309
1310                 if (nexthdr == IPPROTO_ICMPV6) {
1311                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1312
1313                         switch (icmp6->icmp6_type) {
1314                         case NDISC_ROUTER_ADVERTISEMENT:
1315                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1316                                 goto done;
1317                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1318                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1319                                 goto done;
1320                         default:
1321                                 break;
1322                         }
1323                 }
1324         }
1325
1326         if (is_udp_pkt((*skb))) {
1327                 struct udphdr *udp = udp_hdr((*skb));
1328
1329                 switch (ntohs(udp->dest)) {
1330                 case DHCP_CLIENT_PORT:
1331                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1332                         goto done;
1333                 case DHCP_SERVER_PORT:
1334                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1335                         goto done;
1336                 case NET_BIOS_PORT1:
1337                 case NET_BIOS_PORT2:
1338                         os2bmc = is_nbios_filt_enabled(adapter);
1339                         goto done;
1340                 case DHCPV6_RAS_PORT:
1341                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1342                         goto done;
1343                 default:
1344                         break;
1345                 }
1346         }
1347 done:
1348         /* For packets over a vlan, which are destined
1349          * to BMC, asic expects the vlan to be inline in the packet.
1350          */
1351         if (os2bmc)
1352                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1353
1354         return os2bmc;
1355 }
1356
1357 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1358 {
1359         struct be_adapter *adapter = netdev_priv(netdev);
1360         u16 q_idx = skb_get_queue_mapping(skb);
1361         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1362         struct be_wrb_params wrb_params = { 0 };
1363         bool flush = !skb->xmit_more;
1364         u16 wrb_cnt;
1365
1366         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1367         if (unlikely(!skb))
1368                 goto drop;
1369
1370         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1371
1372         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1373         if (unlikely(!wrb_cnt)) {
1374                 dev_kfree_skb_any(skb);
1375                 goto drop;
1376         }
1377
1378         /* if os2bmc is enabled and if the pkt is destined to bmc,
1379          * enqueue the pkt a 2nd time with mgmt bit set.
1380          */
1381         if (be_send_pkt_to_bmc(adapter, &skb)) {
1382                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1383                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384                 if (unlikely(!wrb_cnt))
1385                         goto drop;
1386                 else
1387                         skb_get(skb);
1388         }
1389
1390         if (be_is_txq_full(txo)) {
1391                 netif_stop_subqueue(netdev, q_idx);
1392                 tx_stats(txo)->tx_stops++;
1393         }
1394
1395         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1396                 be_xmit_flush(adapter, txo);
1397
1398         return NETDEV_TX_OK;
1399 drop:
1400         tx_stats(txo)->tx_drv_drops++;
1401         /* Flush the already enqueued tx requests */
1402         if (flush && txo->pend_wrb_cnt)
1403                 be_xmit_flush(adapter, txo);
1404
1405         return NETDEV_TX_OK;
1406 }
1407
1408 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1409 {
1410         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1411                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1412 }
1413
1414 static int be_set_vlan_promisc(struct be_adapter *adapter)
1415 {
1416         struct device *dev = &adapter->pdev->dev;
1417         int status;
1418
1419         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1420                 return 0;
1421
1422         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1423         if (!status) {
1424                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1425                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1426         } else {
1427                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1428         }
1429         return status;
1430 }
1431
1432 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1433 {
1434         struct device *dev = &adapter->pdev->dev;
1435         int status;
1436
1437         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1438         if (!status) {
1439                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1440                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1441         }
1442         return status;
1443 }
1444
1445 /*
1446  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1447  * If the user configures more, place BE in vlan promiscuous mode.
1448  */
1449 static int be_vid_config(struct be_adapter *adapter)
1450 {
1451         struct device *dev = &adapter->pdev->dev;
1452         u16 vids[BE_NUM_VLANS_SUPPORTED];
1453         u16 num = 0, i = 0;
1454         int status = 0;
1455
1456         /* No need to change the VLAN state if the I/F is in promiscuous */
1457         if (adapter->netdev->flags & IFF_PROMISC)
1458                 return 0;
1459
1460         if (adapter->vlans_added > be_max_vlans(adapter))
1461                 return be_set_vlan_promisc(adapter);
1462
1463         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1464                 status = be_clear_vlan_promisc(adapter);
1465                 if (status)
1466                         return status;
1467         }
1468         /* Construct VLAN Table to give to HW */
1469         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1470                 vids[num++] = cpu_to_le16(i);
1471
1472         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1473         if (status) {
1474                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1475                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1476                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1477                     addl_status(status) ==
1478                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1479                         return be_set_vlan_promisc(adapter);
1480         }
1481         return status;
1482 }
1483
1484 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1485 {
1486         struct be_adapter *adapter = netdev_priv(netdev);
1487         int status = 0;
1488
1489         mutex_lock(&adapter->rx_filter_lock);
1490
1491         /* Packets with VID 0 are always received by Lancer by default */
1492         if (lancer_chip(adapter) && vid == 0)
1493                 goto done;
1494
1495         if (test_bit(vid, adapter->vids))
1496                 goto done;
1497
1498         set_bit(vid, adapter->vids);
1499         adapter->vlans_added++;
1500
1501         status = be_vid_config(adapter);
1502 done:
1503         mutex_unlock(&adapter->rx_filter_lock);
1504         return status;
1505 }
1506
1507 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1508 {
1509         struct be_adapter *adapter = netdev_priv(netdev);
1510         int status = 0;
1511
1512         mutex_lock(&adapter->rx_filter_lock);
1513
1514         /* Packets with VID 0 are always received by Lancer by default */
1515         if (lancer_chip(adapter) && vid == 0)
1516                 goto done;
1517
1518         if (!test_bit(vid, adapter->vids))
1519                 goto done;
1520
1521         clear_bit(vid, adapter->vids);
1522         adapter->vlans_added--;
1523
1524         status = be_vid_config(adapter);
1525 done:
1526         mutex_unlock(&adapter->rx_filter_lock);
1527         return status;
1528 }
1529
1530 static void be_set_all_promisc(struct be_adapter *adapter)
1531 {
1532         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1533         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1534 }
1535
1536 static void be_set_mc_promisc(struct be_adapter *adapter)
1537 {
1538         int status;
1539
1540         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1541                 return;
1542
1543         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1544         if (!status)
1545                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1546 }
1547
1548 static void be_set_uc_promisc(struct be_adapter *adapter)
1549 {
1550         int status;
1551
1552         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1553                 return;
1554
1555         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1556         if (!status)
1557                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1558 }
1559
1560 static void be_clear_uc_promisc(struct be_adapter *adapter)
1561 {
1562         int status;
1563
1564         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1565                 return;
1566
1567         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1568         if (!status)
1569                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1570 }
1571
1572 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1573  * We use a single callback function for both sync and unsync. We really don't
1574  * add/remove addresses through this callback. But, we use it to detect changes
1575  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1576  */
1577 static int be_uc_list_update(struct net_device *netdev,
1578                              const unsigned char *addr)
1579 {
1580         struct be_adapter *adapter = netdev_priv(netdev);
1581
1582         adapter->update_uc_list = true;
1583         return 0;
1584 }
1585
1586 static int be_mc_list_update(struct net_device *netdev,
1587                              const unsigned char *addr)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590
1591         adapter->update_mc_list = true;
1592         return 0;
1593 }
1594
1595 static void be_set_mc_list(struct be_adapter *adapter)
1596 {
1597         struct net_device *netdev = adapter->netdev;
1598         struct netdev_hw_addr *ha;
1599         bool mc_promisc = false;
1600         int status;
1601
1602         netif_addr_lock_bh(netdev);
1603         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1604
1605         if (netdev->flags & IFF_PROMISC) {
1606                 adapter->update_mc_list = false;
1607         } else if (netdev->flags & IFF_ALLMULTI ||
1608                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1609                 /* Enable multicast promisc if num configured exceeds
1610                  * what we support
1611                  */
1612                 mc_promisc = true;
1613                 adapter->update_mc_list = false;
1614         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1615                 /* Update mc-list unconditionally if the iface was previously
1616                  * in mc-promisc mode and now is out of that mode.
1617                  */
1618                 adapter->update_mc_list = true;
1619         }
1620
1621         if (adapter->update_mc_list) {
1622                 int i = 0;
1623
1624                 /* cache the mc-list in adapter */
1625                 netdev_for_each_mc_addr(ha, netdev) {
1626                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1627                         i++;
1628                 }
1629                 adapter->mc_count = netdev_mc_count(netdev);
1630         }
1631         netif_addr_unlock_bh(netdev);
1632
1633         if (mc_promisc) {
1634                 be_set_mc_promisc(adapter);
1635         } else if (adapter->update_mc_list) {
1636                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1637                 if (!status)
1638                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1639                 else
1640                         be_set_mc_promisc(adapter);
1641
1642                 adapter->update_mc_list = false;
1643         }
1644 }
1645
1646 static void be_clear_mc_list(struct be_adapter *adapter)
1647 {
1648         struct net_device *netdev = adapter->netdev;
1649
1650         __dev_mc_unsync(netdev, NULL);
1651         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1652         adapter->mc_count = 0;
1653 }
1654
1655 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1656 {
1657         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1658                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1659                 return 0;
1660         }
1661
1662         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1663                                adapter->if_handle,
1664                                &adapter->pmac_id[uc_idx + 1], 0);
1665 }
1666
1667 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1668 {
1669         if (pmac_id == adapter->pmac_id[0])
1670                 return;
1671
1672         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1673 }
1674
1675 static void be_set_uc_list(struct be_adapter *adapter)
1676 {
1677         struct net_device *netdev = adapter->netdev;
1678         struct netdev_hw_addr *ha;
1679         bool uc_promisc = false;
1680         int curr_uc_macs = 0, i;
1681
1682         netif_addr_lock_bh(netdev);
1683         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1684
1685         if (netdev->flags & IFF_PROMISC) {
1686                 adapter->update_uc_list = false;
1687         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1688                 uc_promisc = true;
1689                 adapter->update_uc_list = false;
1690         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1691                 /* Update uc-list unconditionally if the iface was previously
1692                  * in uc-promisc mode and now is out of that mode.
1693                  */
1694                 adapter->update_uc_list = true;
1695         }
1696
1697         if (adapter->update_uc_list) {
1698                 /* cache the uc-list in adapter array */
1699                 i = 0;
1700                 netdev_for_each_uc_addr(ha, netdev) {
1701                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1702                         i++;
1703                 }
1704                 curr_uc_macs = netdev_uc_count(netdev);
1705         }
1706         netif_addr_unlock_bh(netdev);
1707
1708         if (uc_promisc) {
1709                 be_set_uc_promisc(adapter);
1710         } else if (adapter->update_uc_list) {
1711                 be_clear_uc_promisc(adapter);
1712
1713                 for (i = 0; i < adapter->uc_macs; i++)
1714                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1715
1716                 for (i = 0; i < curr_uc_macs; i++)
1717                         be_uc_mac_add(adapter, i);
1718                 adapter->uc_macs = curr_uc_macs;
1719                 adapter->update_uc_list = false;
1720         }
1721 }
1722
1723 static void be_clear_uc_list(struct be_adapter *adapter)
1724 {
1725         struct net_device *netdev = adapter->netdev;
1726         int i;
1727
1728         __dev_uc_unsync(netdev, NULL);
1729         for (i = 0; i < adapter->uc_macs; i++)
1730                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1731
1732         adapter->uc_macs = 0;
1733 }
1734
1735 static void __be_set_rx_mode(struct be_adapter *adapter)
1736 {
1737         struct net_device *netdev = adapter->netdev;
1738
1739         mutex_lock(&adapter->rx_filter_lock);
1740
1741         if (netdev->flags & IFF_PROMISC) {
1742                 if (!be_in_all_promisc(adapter))
1743                         be_set_all_promisc(adapter);
1744         } else if (be_in_all_promisc(adapter)) {
1745                 /* We need to re-program the vlan-list or clear
1746                  * vlan-promisc mode (if needed) when the interface
1747                  * comes out of promisc mode.
1748                  */
1749                 be_vid_config(adapter);
1750         }
1751
1752         be_set_uc_list(adapter);
1753         be_set_mc_list(adapter);
1754
1755         mutex_unlock(&adapter->rx_filter_lock);
1756 }
1757
1758 static void be_work_set_rx_mode(struct work_struct *work)
1759 {
1760         struct be_cmd_work *cmd_work =
1761                                 container_of(work, struct be_cmd_work, work);
1762
1763         __be_set_rx_mode(cmd_work->adapter);
1764         kfree(cmd_work);
1765 }
1766
1767 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1768 {
1769         struct be_adapter *adapter = netdev_priv(netdev);
1770         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1771         int status;
1772
1773         if (!sriov_enabled(adapter))
1774                 return -EPERM;
1775
1776         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1777                 return -EINVAL;
1778
1779         /* Proceed further only if user provided MAC is different
1780          * from active MAC
1781          */
1782         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1783                 return 0;
1784
1785         if (BEx_chip(adapter)) {
1786                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1787                                 vf + 1);
1788
1789                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1790                                          &vf_cfg->pmac_id, vf + 1);
1791         } else {
1792                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1793                                         vf + 1);
1794         }
1795
1796         if (status) {
1797                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1798                         mac, vf, status);
1799                 return be_cmd_status(status);
1800         }
1801
1802         ether_addr_copy(vf_cfg->mac_addr, mac);
1803
1804         return 0;
1805 }
1806
1807 static int be_get_vf_config(struct net_device *netdev, int vf,
1808                             struct ifla_vf_info *vi)
1809 {
1810         struct be_adapter *adapter = netdev_priv(netdev);
1811         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1812
1813         if (!sriov_enabled(adapter))
1814                 return -EPERM;
1815
1816         if (vf >= adapter->num_vfs)
1817                 return -EINVAL;
1818
1819         vi->vf = vf;
1820         vi->max_tx_rate = vf_cfg->tx_rate;
1821         vi->min_tx_rate = 0;
1822         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1823         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1824         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1825         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1826         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1827
1828         return 0;
1829 }
1830
1831 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1832 {
1833         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1834         u16 vids[BE_NUM_VLANS_SUPPORTED];
1835         int vf_if_id = vf_cfg->if_handle;
1836         int status;
1837
1838         /* Enable Transparent VLAN Tagging */
1839         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1840         if (status)
1841                 return status;
1842
1843         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1844         vids[0] = 0;
1845         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1846         if (!status)
1847                 dev_info(&adapter->pdev->dev,
1848                          "Cleared guest VLANs on VF%d", vf);
1849
1850         /* After TVT is enabled, disallow VFs to program VLAN filters */
1851         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1852                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1853                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1854                 if (!status)
1855                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1856         }
1857         return 0;
1858 }
1859
1860 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1861 {
1862         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1863         struct device *dev = &adapter->pdev->dev;
1864         int status;
1865
1866         /* Reset Transparent VLAN Tagging. */
1867         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1868                                        vf_cfg->if_handle, 0, 0);
1869         if (status)
1870                 return status;
1871
1872         /* Allow VFs to program VLAN filtering */
1873         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1874                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1875                                                   BE_PRIV_FILTMGMT, vf + 1);
1876                 if (!status) {
1877                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1878                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1879                 }
1880         }
1881
1882         dev_info(dev,
1883                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1884         return 0;
1885 }
1886
1887 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1888                           __be16 vlan_proto)
1889 {
1890         struct be_adapter *adapter = netdev_priv(netdev);
1891         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1892         int status;
1893
1894         if (!sriov_enabled(adapter))
1895                 return -EPERM;
1896
1897         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1898                 return -EINVAL;
1899
1900         if (vlan_proto != htons(ETH_P_8021Q))
1901                 return -EPROTONOSUPPORT;
1902
1903         if (vlan || qos) {
1904                 vlan |= qos << VLAN_PRIO_SHIFT;
1905                 status = be_set_vf_tvt(adapter, vf, vlan);
1906         } else {
1907                 status = be_clear_vf_tvt(adapter, vf);
1908         }
1909
1910         if (status) {
1911                 dev_err(&adapter->pdev->dev,
1912                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1913                         status);
1914                 return be_cmd_status(status);
1915         }
1916
1917         vf_cfg->vlan_tag = vlan;
1918         return 0;
1919 }
1920
1921 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1922                              int min_tx_rate, int max_tx_rate)
1923 {
1924         struct be_adapter *adapter = netdev_priv(netdev);
1925         struct device *dev = &adapter->pdev->dev;
1926         int percent_rate, status = 0;
1927         u16 link_speed = 0;
1928         u8 link_status;
1929
1930         if (!sriov_enabled(adapter))
1931                 return -EPERM;
1932
1933         if (vf >= adapter->num_vfs)
1934                 return -EINVAL;
1935
1936         if (min_tx_rate)
1937                 return -EINVAL;
1938
1939         if (!max_tx_rate)
1940                 goto config_qos;
1941
1942         status = be_cmd_link_status_query(adapter, &link_speed,
1943                                           &link_status, 0);
1944         if (status)
1945                 goto err;
1946
1947         if (!link_status) {
1948                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1949                 status = -ENETDOWN;
1950                 goto err;
1951         }
1952
1953         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1954                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1955                         link_speed);
1956                 status = -EINVAL;
1957                 goto err;
1958         }
1959
1960         /* On Skyhawk the QOS setting must be done only as a % value */
1961         percent_rate = link_speed / 100;
1962         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1963                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1964                         percent_rate);
1965                 status = -EINVAL;
1966                 goto err;
1967         }
1968
1969 config_qos:
1970         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1971         if (status)
1972                 goto err;
1973
1974         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1975         return 0;
1976
1977 err:
1978         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1979                 max_tx_rate, vf);
1980         return be_cmd_status(status);
1981 }
1982
1983 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1984                                 int link_state)
1985 {
1986         struct be_adapter *adapter = netdev_priv(netdev);
1987         int status;
1988
1989         if (!sriov_enabled(adapter))
1990                 return -EPERM;
1991
1992         if (vf >= adapter->num_vfs)
1993                 return -EINVAL;
1994
1995         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1996         if (status) {
1997                 dev_err(&adapter->pdev->dev,
1998                         "Link state change on VF %d failed: %#x\n", vf, status);
1999                 return be_cmd_status(status);
2000         }
2001
2002         adapter->vf_cfg[vf].plink_tracking = link_state;
2003
2004         return 0;
2005 }
2006
2007 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2008 {
2009         struct be_adapter *adapter = netdev_priv(netdev);
2010         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2011         u8 spoofchk;
2012         int status;
2013
2014         if (!sriov_enabled(adapter))
2015                 return -EPERM;
2016
2017         if (vf >= adapter->num_vfs)
2018                 return -EINVAL;
2019
2020         if (BEx_chip(adapter))
2021                 return -EOPNOTSUPP;
2022
2023         if (enable == vf_cfg->spoofchk)
2024                 return 0;
2025
2026         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2027
2028         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2029                                        0, spoofchk);
2030         if (status) {
2031                 dev_err(&adapter->pdev->dev,
2032                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2033                 return be_cmd_status(status);
2034         }
2035
2036         vf_cfg->spoofchk = enable;
2037         return 0;
2038 }
2039
2040 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2041                           ulong now)
2042 {
2043         aic->rx_pkts_prev = rx_pkts;
2044         aic->tx_reqs_prev = tx_pkts;
2045         aic->jiffies = now;
2046 }
2047
2048 static int be_get_new_eqd(struct be_eq_obj *eqo)
2049 {
2050         struct be_adapter *adapter = eqo->adapter;
2051         int eqd, start;
2052         struct be_aic_obj *aic;
2053         struct be_rx_obj *rxo;
2054         struct be_tx_obj *txo;
2055         u64 rx_pkts = 0, tx_pkts = 0;
2056         ulong now;
2057         u32 pps, delta;
2058         int i;
2059
2060         aic = &adapter->aic_obj[eqo->idx];
2061         if (!aic->enable) {
2062                 if (aic->jiffies)
2063                         aic->jiffies = 0;
2064                 eqd = aic->et_eqd;
2065                 return eqd;
2066         }
2067
2068         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2069                 do {
2070                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2071                         rx_pkts += rxo->stats.rx_pkts;
2072                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2073         }
2074
2075         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2076                 do {
2077                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2078                         tx_pkts += txo->stats.tx_reqs;
2079                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2080         }
2081
2082         /* Skip, if wrapped around or first calculation */
2083         now = jiffies;
2084         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2085             rx_pkts < aic->rx_pkts_prev ||
2086             tx_pkts < aic->tx_reqs_prev) {
2087                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2088                 return aic->prev_eqd;
2089         }
2090
2091         delta = jiffies_to_msecs(now - aic->jiffies);
2092         if (delta == 0)
2093                 return aic->prev_eqd;
2094
2095         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2096                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2097         eqd = (pps / 15000) << 2;
2098
2099         if (eqd < 8)
2100                 eqd = 0;
2101         eqd = min_t(u32, eqd, aic->max_eqd);
2102         eqd = max_t(u32, eqd, aic->min_eqd);
2103
2104         be_aic_update(aic, rx_pkts, tx_pkts, now);
2105
2106         return eqd;
2107 }
2108
2109 /* For Skyhawk-R only */
2110 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2111 {
2112         struct be_adapter *adapter = eqo->adapter;
2113         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2114         ulong now = jiffies;
2115         int eqd;
2116         u32 mult_enc;
2117
2118         if (!aic->enable)
2119                 return 0;
2120
2121         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2122                 eqd = aic->prev_eqd;
2123         else
2124                 eqd = be_get_new_eqd(eqo);
2125
2126         if (eqd > 100)
2127                 mult_enc = R2I_DLY_ENC_1;
2128         else if (eqd > 60)
2129                 mult_enc = R2I_DLY_ENC_2;
2130         else if (eqd > 20)
2131                 mult_enc = R2I_DLY_ENC_3;
2132         else
2133                 mult_enc = R2I_DLY_ENC_0;
2134
2135         aic->prev_eqd = eqd;
2136
2137         return mult_enc;
2138 }
2139
2140 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2141 {
2142         struct be_set_eqd set_eqd[MAX_EVT_QS];
2143         struct be_aic_obj *aic;
2144         struct be_eq_obj *eqo;
2145         int i, num = 0, eqd;
2146
2147         for_all_evt_queues(adapter, eqo, i) {
2148                 aic = &adapter->aic_obj[eqo->idx];
2149                 eqd = be_get_new_eqd(eqo);
2150                 if (force_update || eqd != aic->prev_eqd) {
2151                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2152                         set_eqd[num].eq_id = eqo->q.id;
2153                         aic->prev_eqd = eqd;
2154                         num++;
2155                 }
2156         }
2157
2158         if (num)
2159                 be_cmd_modify_eqd(adapter, set_eqd, num);
2160 }
2161
2162 static void be_rx_stats_update(struct be_rx_obj *rxo,
2163                                struct be_rx_compl_info *rxcp)
2164 {
2165         struct be_rx_stats *stats = rx_stats(rxo);
2166
2167         u64_stats_update_begin(&stats->sync);
2168         stats->rx_compl++;
2169         stats->rx_bytes += rxcp->pkt_size;
2170         stats->rx_pkts++;
2171         if (rxcp->tunneled)
2172                 stats->rx_vxlan_offload_pkts++;
2173         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2174                 stats->rx_mcast_pkts++;
2175         if (rxcp->err)
2176                 stats->rx_compl_err++;
2177         u64_stats_update_end(&stats->sync);
2178 }
2179
2180 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2181 {
2182         /* L4 checksum is not reliable for non TCP/UDP packets.
2183          * Also ignore ipcksm for ipv6 pkts
2184          */
2185         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2186                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2187 }
2188
2189 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2190 {
2191         struct be_adapter *adapter = rxo->adapter;
2192         struct be_rx_page_info *rx_page_info;
2193         struct be_queue_info *rxq = &rxo->q;
2194         u32 frag_idx = rxq->tail;
2195
2196         rx_page_info = &rxo->page_info_tbl[frag_idx];
2197         BUG_ON(!rx_page_info->page);
2198
2199         if (rx_page_info->last_frag) {
2200                 dma_unmap_page(&adapter->pdev->dev,
2201                                dma_unmap_addr(rx_page_info, bus),
2202                                adapter->big_page_size, DMA_FROM_DEVICE);
2203                 rx_page_info->last_frag = false;
2204         } else {
2205                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2206                                         dma_unmap_addr(rx_page_info, bus),
2207                                         rx_frag_size, DMA_FROM_DEVICE);
2208         }
2209
2210         queue_tail_inc(rxq);
2211         atomic_dec(&rxq->used);
2212         return rx_page_info;
2213 }
2214
2215 /* Throwaway the data in the Rx completion */
2216 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2217                                 struct be_rx_compl_info *rxcp)
2218 {
2219         struct be_rx_page_info *page_info;
2220         u16 i, num_rcvd = rxcp->num_rcvd;
2221
2222         for (i = 0; i < num_rcvd; i++) {
2223                 page_info = get_rx_page_info(rxo);
2224                 put_page(page_info->page);
2225                 memset(page_info, 0, sizeof(*page_info));
2226         }
2227 }
2228
2229 /*
2230  * skb_fill_rx_data forms a complete skb for an ether frame
2231  * indicated by rxcp.
2232  */
2233 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2234                              struct be_rx_compl_info *rxcp)
2235 {
2236         struct be_rx_page_info *page_info;
2237         u16 i, j;
2238         u16 hdr_len, curr_frag_len, remaining;
2239         u8 *start;
2240
2241         page_info = get_rx_page_info(rxo);
2242         start = page_address(page_info->page) + page_info->page_offset;
2243         prefetch(start);
2244
2245         /* Copy data in the first descriptor of this completion */
2246         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2247
2248         skb->len = curr_frag_len;
2249         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2250                 memcpy(skb->data, start, curr_frag_len);
2251                 /* Complete packet has now been moved to data */
2252                 put_page(page_info->page);
2253                 skb->data_len = 0;
2254                 skb->tail += curr_frag_len;
2255         } else {
2256                 hdr_len = ETH_HLEN;
2257                 memcpy(skb->data, start, hdr_len);
2258                 skb_shinfo(skb)->nr_frags = 1;
2259                 skb_frag_set_page(skb, 0, page_info->page);
2260                 skb_shinfo(skb)->frags[0].page_offset =
2261                                         page_info->page_offset + hdr_len;
2262                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2263                                   curr_frag_len - hdr_len);
2264                 skb->data_len = curr_frag_len - hdr_len;
2265                 skb->truesize += rx_frag_size;
2266                 skb->tail += hdr_len;
2267         }
2268         page_info->page = NULL;
2269
2270         if (rxcp->pkt_size <= rx_frag_size) {
2271                 BUG_ON(rxcp->num_rcvd != 1);
2272                 return;
2273         }
2274
2275         /* More frags present for this completion */
2276         remaining = rxcp->pkt_size - curr_frag_len;
2277         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2278                 page_info = get_rx_page_info(rxo);
2279                 curr_frag_len = min(remaining, rx_frag_size);
2280
2281                 /* Coalesce all frags from the same physical page in one slot */
2282                 if (page_info->page_offset == 0) {
2283                         /* Fresh page */
2284                         j++;
2285                         skb_frag_set_page(skb, j, page_info->page);
2286                         skb_shinfo(skb)->frags[j].page_offset =
2287                                                         page_info->page_offset;
2288                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2289                         skb_shinfo(skb)->nr_frags++;
2290                 } else {
2291                         put_page(page_info->page);
2292                 }
2293
2294                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2295                 skb->len += curr_frag_len;
2296                 skb->data_len += curr_frag_len;
2297                 skb->truesize += rx_frag_size;
2298                 remaining -= curr_frag_len;
2299                 page_info->page = NULL;
2300         }
2301         BUG_ON(j > MAX_SKB_FRAGS);
2302 }
2303
2304 /* Process the RX completion indicated by rxcp when GRO is disabled */
2305 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2306                                 struct be_rx_compl_info *rxcp)
2307 {
2308         struct be_adapter *adapter = rxo->adapter;
2309         struct net_device *netdev = adapter->netdev;
2310         struct sk_buff *skb;
2311
2312         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2313         if (unlikely(!skb)) {
2314                 rx_stats(rxo)->rx_drops_no_skbs++;
2315                 be_rx_compl_discard(rxo, rxcp);
2316                 return;
2317         }
2318
2319         skb_fill_rx_data(rxo, skb, rxcp);
2320
2321         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2322                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2323         else
2324                 skb_checksum_none_assert(skb);
2325
2326         skb->protocol = eth_type_trans(skb, netdev);
2327         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2328         if (netdev->features & NETIF_F_RXHASH)
2329                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2330
2331         skb->csum_level = rxcp->tunneled;
2332         skb_mark_napi_id(skb, napi);
2333
2334         if (rxcp->vlanf)
2335                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2336
2337         netif_receive_skb(skb);
2338 }
2339
2340 /* Process the RX completion indicated by rxcp when GRO is enabled */
2341 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2342                                     struct napi_struct *napi,
2343                                     struct be_rx_compl_info *rxcp)
2344 {
2345         struct be_adapter *adapter = rxo->adapter;
2346         struct be_rx_page_info *page_info;
2347         struct sk_buff *skb = NULL;
2348         u16 remaining, curr_frag_len;
2349         u16 i, j;
2350
2351         skb = napi_get_frags(napi);
2352         if (!skb) {
2353                 be_rx_compl_discard(rxo, rxcp);
2354                 return;
2355         }
2356
2357         remaining = rxcp->pkt_size;
2358         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2359                 page_info = get_rx_page_info(rxo);
2360
2361                 curr_frag_len = min(remaining, rx_frag_size);
2362
2363                 /* Coalesce all frags from the same physical page in one slot */
2364                 if (i == 0 || page_info->page_offset == 0) {
2365                         /* First frag or Fresh page */
2366                         j++;
2367                         skb_frag_set_page(skb, j, page_info->page);
2368                         skb_shinfo(skb)->frags[j].page_offset =
2369                                                         page_info->page_offset;
2370                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2371                 } else {
2372                         put_page(page_info->page);
2373                 }
2374                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2375                 skb->truesize += rx_frag_size;
2376                 remaining -= curr_frag_len;
2377                 memset(page_info, 0, sizeof(*page_info));
2378         }
2379         BUG_ON(j > MAX_SKB_FRAGS);
2380
2381         skb_shinfo(skb)->nr_frags = j + 1;
2382         skb->len = rxcp->pkt_size;
2383         skb->data_len = rxcp->pkt_size;
2384         skb->ip_summed = CHECKSUM_UNNECESSARY;
2385         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2386         if (adapter->netdev->features & NETIF_F_RXHASH)
2387                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2388
2389         skb->csum_level = rxcp->tunneled;
2390
2391         if (rxcp->vlanf)
2392                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2393
2394         napi_gro_frags(napi);
2395 }
2396
2397 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2398                                  struct be_rx_compl_info *rxcp)
2399 {
2400         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2401         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2402         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2403         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2404         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2405         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2406         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2407         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2408         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2409         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2410         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2411         if (rxcp->vlanf) {
2412                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2413                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2414         }
2415         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2416         rxcp->tunneled =
2417                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2418 }
2419
2420 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2421                                  struct be_rx_compl_info *rxcp)
2422 {
2423         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2424         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2425         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2426         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2427         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2428         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2429         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2430         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2431         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2432         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2433         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2434         if (rxcp->vlanf) {
2435                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2436                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2437         }
2438         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2439         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2440 }
2441
2442 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2443 {
2444         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2445         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2446         struct be_adapter *adapter = rxo->adapter;
2447
2448         /* For checking the valid bit it is Ok to use either definition as the
2449          * valid bit is at the same position in both v0 and v1 Rx compl */
2450         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2451                 return NULL;
2452
2453         rmb();
2454         be_dws_le_to_cpu(compl, sizeof(*compl));
2455
2456         if (adapter->be3_native)
2457                 be_parse_rx_compl_v1(compl, rxcp);
2458         else
2459                 be_parse_rx_compl_v0(compl, rxcp);
2460
2461         if (rxcp->ip_frag)
2462                 rxcp->l4_csum = 0;
2463
2464         if (rxcp->vlanf) {
2465                 /* In QNQ modes, if qnq bit is not set, then the packet was
2466                  * tagged only with the transparent outer vlan-tag and must
2467                  * not be treated as a vlan packet by host
2468                  */
2469                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2470                         rxcp->vlanf = 0;
2471
2472                 if (!lancer_chip(adapter))
2473                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2474
2475                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2476                     !test_bit(rxcp->vlan_tag, adapter->vids))
2477                         rxcp->vlanf = 0;
2478         }
2479
2480         /* As the compl has been parsed, reset it; we wont touch it again */
2481         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2482
2483         queue_tail_inc(&rxo->cq);
2484         return rxcp;
2485 }
2486
2487 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2488 {
2489         u32 order = get_order(size);
2490
2491         if (order > 0)
2492                 gfp |= __GFP_COMP;
2493         return  alloc_pages(gfp, order);
2494 }
2495
2496 /*
2497  * Allocate a page, split it to fragments of size rx_frag_size and post as
2498  * receive buffers to BE
2499  */
2500 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2501 {
2502         struct be_adapter *adapter = rxo->adapter;
2503         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2504         struct be_queue_info *rxq = &rxo->q;
2505         struct page *pagep = NULL;
2506         struct device *dev = &adapter->pdev->dev;
2507         struct be_eth_rx_d *rxd;
2508         u64 page_dmaaddr = 0, frag_dmaaddr;
2509         u32 posted, page_offset = 0, notify = 0;
2510
2511         page_info = &rxo->page_info_tbl[rxq->head];
2512         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2513                 if (!pagep) {
2514                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2515                         if (unlikely(!pagep)) {
2516                                 rx_stats(rxo)->rx_post_fail++;
2517                                 break;
2518                         }
2519                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2520                                                     adapter->big_page_size,
2521                                                     DMA_FROM_DEVICE);
2522                         if (dma_mapping_error(dev, page_dmaaddr)) {
2523                                 put_page(pagep);
2524                                 pagep = NULL;
2525                                 adapter->drv_stats.dma_map_errors++;
2526                                 break;
2527                         }
2528                         page_offset = 0;
2529                 } else {
2530                         get_page(pagep);
2531                         page_offset += rx_frag_size;
2532                 }
2533                 page_info->page_offset = page_offset;
2534                 page_info->page = pagep;
2535
2536                 rxd = queue_head_node(rxq);
2537                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2538                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2539                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2540
2541                 /* Any space left in the current big page for another frag? */
2542                 if ((page_offset + rx_frag_size + rx_frag_size) >
2543                                         adapter->big_page_size) {
2544                         pagep = NULL;
2545                         page_info->last_frag = true;
2546                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2547                 } else {
2548                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2549                 }
2550
2551                 prev_page_info = page_info;
2552                 queue_head_inc(rxq);
2553                 page_info = &rxo->page_info_tbl[rxq->head];
2554         }
2555
2556         /* Mark the last frag of a page when we break out of the above loop
2557          * with no more slots available in the RXQ
2558          */
2559         if (pagep) {
2560                 prev_page_info->last_frag = true;
2561                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2562         }
2563
2564         if (posted) {
2565                 atomic_add(posted, &rxq->used);
2566                 if (rxo->rx_post_starved)
2567                         rxo->rx_post_starved = false;
2568                 do {
2569                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2570                         be_rxq_notify(adapter, rxq->id, notify);
2571                         posted -= notify;
2572                 } while (posted);
2573         } else if (atomic_read(&rxq->used) == 0) {
2574                 /* Let be_worker replenish when memory is available */
2575                 rxo->rx_post_starved = true;
2576         }
2577 }
2578
2579 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2580 {
2581         struct be_queue_info *tx_cq = &txo->cq;
2582         struct be_tx_compl_info *txcp = &txo->txcp;
2583         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2584
2585         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2586                 return NULL;
2587
2588         /* Ensure load ordering of valid bit dword and other dwords below */
2589         rmb();
2590         be_dws_le_to_cpu(compl, sizeof(*compl));
2591
2592         txcp->status = GET_TX_COMPL_BITS(status, compl);
2593         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2594
2595         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2596         queue_tail_inc(tx_cq);
2597         return txcp;
2598 }
2599
2600 static u16 be_tx_compl_process(struct be_adapter *adapter,
2601                                struct be_tx_obj *txo, u16 last_index)
2602 {
2603         struct sk_buff **sent_skbs = txo->sent_skb_list;
2604         struct be_queue_info *txq = &txo->q;
2605         struct sk_buff *skb = NULL;
2606         bool unmap_skb_hdr = false;
2607         struct be_eth_wrb *wrb;
2608         u16 num_wrbs = 0;
2609         u32 frag_index;
2610
2611         do {
2612                 if (sent_skbs[txq->tail]) {
2613                         /* Free skb from prev req */
2614                         if (skb)
2615                                 dev_consume_skb_any(skb);
2616                         skb = sent_skbs[txq->tail];
2617                         sent_skbs[txq->tail] = NULL;
2618                         queue_tail_inc(txq);  /* skip hdr wrb */
2619                         num_wrbs++;
2620                         unmap_skb_hdr = true;
2621                 }
2622                 wrb = queue_tail_node(txq);
2623                 frag_index = txq->tail;
2624                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2625                               (unmap_skb_hdr && skb_headlen(skb)));
2626                 unmap_skb_hdr = false;
2627                 queue_tail_inc(txq);
2628                 num_wrbs++;
2629         } while (frag_index != last_index);
2630         dev_consume_skb_any(skb);
2631
2632         return num_wrbs;
2633 }
2634
2635 /* Return the number of events in the event queue */
2636 static inline int events_get(struct be_eq_obj *eqo)
2637 {
2638         struct be_eq_entry *eqe;
2639         int num = 0;
2640
2641         do {
2642                 eqe = queue_tail_node(&eqo->q);
2643                 if (eqe->evt == 0)
2644                         break;
2645
2646                 rmb();
2647                 eqe->evt = 0;
2648                 num++;
2649                 queue_tail_inc(&eqo->q);
2650         } while (true);
2651
2652         return num;
2653 }
2654
2655 /* Leaves the EQ is disarmed state */
2656 static void be_eq_clean(struct be_eq_obj *eqo)
2657 {
2658         int num = events_get(eqo);
2659
2660         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2661 }
2662
2663 /* Free posted rx buffers that were not used */
2664 static void be_rxq_clean(struct be_rx_obj *rxo)
2665 {
2666         struct be_queue_info *rxq = &rxo->q;
2667         struct be_rx_page_info *page_info;
2668
2669         while (atomic_read(&rxq->used) > 0) {
2670                 page_info = get_rx_page_info(rxo);
2671                 put_page(page_info->page);
2672                 memset(page_info, 0, sizeof(*page_info));
2673         }
2674         BUG_ON(atomic_read(&rxq->used));
2675         rxq->tail = 0;
2676         rxq->head = 0;
2677 }
2678
2679 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2680 {
2681         struct be_queue_info *rx_cq = &rxo->cq;
2682         struct be_rx_compl_info *rxcp;
2683         struct be_adapter *adapter = rxo->adapter;
2684         int flush_wait = 0;
2685
2686         /* Consume pending rx completions.
2687          * Wait for the flush completion (identified by zero num_rcvd)
2688          * to arrive. Notify CQ even when there are no more CQ entries
2689          * for HW to flush partially coalesced CQ entries.
2690          * In Lancer, there is no need to wait for flush compl.
2691          */
2692         for (;;) {
2693                 rxcp = be_rx_compl_get(rxo);
2694                 if (!rxcp) {
2695                         if (lancer_chip(adapter))
2696                                 break;
2697
2698                         if (flush_wait++ > 50 ||
2699                             be_check_error(adapter,
2700                                            BE_ERROR_HW)) {
2701                                 dev_warn(&adapter->pdev->dev,
2702                                          "did not receive flush compl\n");
2703                                 break;
2704                         }
2705                         be_cq_notify(adapter, rx_cq->id, true, 0);
2706                         mdelay(1);
2707                 } else {
2708                         be_rx_compl_discard(rxo, rxcp);
2709                         be_cq_notify(adapter, rx_cq->id, false, 1);
2710                         if (rxcp->num_rcvd == 0)
2711                                 break;
2712                 }
2713         }
2714
2715         /* After cleanup, leave the CQ in unarmed state */
2716         be_cq_notify(adapter, rx_cq->id, false, 0);
2717 }
2718
2719 static void be_tx_compl_clean(struct be_adapter *adapter)
2720 {
2721         struct device *dev = &adapter->pdev->dev;
2722         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2723         struct be_tx_compl_info *txcp;
2724         struct be_queue_info *txq;
2725         u32 end_idx, notified_idx;
2726         struct be_tx_obj *txo;
2727         int i, pending_txqs;
2728
2729         /* Stop polling for compls when HW has been silent for 10ms */
2730         do {
2731                 pending_txqs = adapter->num_tx_qs;
2732
2733                 for_all_tx_queues(adapter, txo, i) {
2734                         cmpl = 0;
2735                         num_wrbs = 0;
2736                         txq = &txo->q;
2737                         while ((txcp = be_tx_compl_get(txo))) {
2738                                 num_wrbs +=
2739                                         be_tx_compl_process(adapter, txo,
2740                                                             txcp->end_index);
2741                                 cmpl++;
2742                         }
2743                         if (cmpl) {
2744                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2745                                 atomic_sub(num_wrbs, &txq->used);
2746                                 timeo = 0;
2747                         }
2748                         if (!be_is_tx_compl_pending(txo))
2749                                 pending_txqs--;
2750                 }
2751
2752                 if (pending_txqs == 0 || ++timeo > 10 ||
2753                     be_check_error(adapter, BE_ERROR_HW))
2754                         break;
2755
2756                 mdelay(1);
2757         } while (true);
2758
2759         /* Free enqueued TX that was never notified to HW */
2760         for_all_tx_queues(adapter, txo, i) {
2761                 txq = &txo->q;
2762
2763                 if (atomic_read(&txq->used)) {
2764                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2765                                  i, atomic_read(&txq->used));
2766                         notified_idx = txq->tail;
2767                         end_idx = txq->tail;
2768                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2769                                   txq->len);
2770                         /* Use the tx-compl process logic to handle requests
2771                          * that were not sent to the HW.
2772                          */
2773                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2774                         atomic_sub(num_wrbs, &txq->used);
2775                         BUG_ON(atomic_read(&txq->used));
2776                         txo->pend_wrb_cnt = 0;
2777                         /* Since hw was never notified of these requests,
2778                          * reset TXQ indices
2779                          */
2780                         txq->head = notified_idx;
2781                         txq->tail = notified_idx;
2782                 }
2783         }
2784 }
2785
2786 static void be_evt_queues_destroy(struct be_adapter *adapter)
2787 {
2788         struct be_eq_obj *eqo;
2789         int i;
2790
2791         for_all_evt_queues(adapter, eqo, i) {
2792                 if (eqo->q.created) {
2793                         be_eq_clean(eqo);
2794                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2795                         netif_napi_del(&eqo->napi);
2796                         free_cpumask_var(eqo->affinity_mask);
2797                 }
2798                 be_queue_free(adapter, &eqo->q);
2799         }
2800 }
2801
2802 static int be_evt_queues_create(struct be_adapter *adapter)
2803 {
2804         struct be_queue_info *eq;
2805         struct be_eq_obj *eqo;
2806         struct be_aic_obj *aic;
2807         int i, rc;
2808
2809         /* need enough EQs to service both RX and TX queues */
2810         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2811                                     max(adapter->cfg_num_rx_irqs,
2812                                         adapter->cfg_num_tx_irqs));
2813
2814         for_all_evt_queues(adapter, eqo, i) {
2815                 int numa_node = dev_to_node(&adapter->pdev->dev);
2816
2817                 aic = &adapter->aic_obj[i];
2818                 eqo->adapter = adapter;
2819                 eqo->idx = i;
2820                 aic->max_eqd = BE_MAX_EQD;
2821                 aic->enable = true;
2822
2823                 eq = &eqo->q;
2824                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2825                                     sizeof(struct be_eq_entry));
2826                 if (rc)
2827                         return rc;
2828
2829                 rc = be_cmd_eq_create(adapter, eqo);
2830                 if (rc)
2831                         return rc;
2832
2833                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2834                         return -ENOMEM;
2835                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2836                                 eqo->affinity_mask);
2837                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2838                                BE_NAPI_WEIGHT);
2839         }
2840         return 0;
2841 }
2842
2843 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2844 {
2845         struct be_queue_info *q;
2846
2847         q = &adapter->mcc_obj.q;
2848         if (q->created)
2849                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2850         be_queue_free(adapter, q);
2851
2852         q = &adapter->mcc_obj.cq;
2853         if (q->created)
2854                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2855         be_queue_free(adapter, q);
2856 }
2857
2858 /* Must be called only after TX qs are created as MCC shares TX EQ */
2859 static int be_mcc_queues_create(struct be_adapter *adapter)
2860 {
2861         struct be_queue_info *q, *cq;
2862
2863         cq = &adapter->mcc_obj.cq;
2864         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2865                            sizeof(struct be_mcc_compl)))
2866                 goto err;
2867
2868         /* Use the default EQ for MCC completions */
2869         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2870                 goto mcc_cq_free;
2871
2872         q = &adapter->mcc_obj.q;
2873         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2874                 goto mcc_cq_destroy;
2875
2876         if (be_cmd_mccq_create(adapter, q, cq))
2877                 goto mcc_q_free;
2878
2879         return 0;
2880
2881 mcc_q_free:
2882         be_queue_free(adapter, q);
2883 mcc_cq_destroy:
2884         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2885 mcc_cq_free:
2886         be_queue_free(adapter, cq);
2887 err:
2888         return -1;
2889 }
2890
2891 static void be_tx_queues_destroy(struct be_adapter *adapter)
2892 {
2893         struct be_queue_info *q;
2894         struct be_tx_obj *txo;
2895         u8 i;
2896
2897         for_all_tx_queues(adapter, txo, i) {
2898                 q = &txo->q;
2899                 if (q->created)
2900                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2901                 be_queue_free(adapter, q);
2902
2903                 q = &txo->cq;
2904                 if (q->created)
2905                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2906                 be_queue_free(adapter, q);
2907         }
2908 }
2909
2910 static int be_tx_qs_create(struct be_adapter *adapter)
2911 {
2912         struct be_queue_info *cq;
2913         struct be_tx_obj *txo;
2914         struct be_eq_obj *eqo;
2915         int status, i;
2916
2917         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2918
2919         for_all_tx_queues(adapter, txo, i) {
2920                 cq = &txo->cq;
2921                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2922                                         sizeof(struct be_eth_tx_compl));
2923                 if (status)
2924                         return status;
2925
2926                 u64_stats_init(&txo->stats.sync);
2927                 u64_stats_init(&txo->stats.sync_compl);
2928
2929                 /* If num_evt_qs is less than num_tx_qs, then more than
2930                  * one txq share an eq
2931                  */
2932                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2933                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2934                 if (status)
2935                         return status;
2936
2937                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2938                                         sizeof(struct be_eth_wrb));
2939                 if (status)
2940                         return status;
2941
2942                 status = be_cmd_txq_create(adapter, txo);
2943                 if (status)
2944                         return status;
2945
2946                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2947                                     eqo->idx);
2948         }
2949
2950         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2951                  adapter->num_tx_qs);
2952         return 0;
2953 }
2954
2955 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2956 {
2957         struct be_queue_info *q;
2958         struct be_rx_obj *rxo;
2959         int i;
2960
2961         for_all_rx_queues(adapter, rxo, i) {
2962                 q = &rxo->cq;
2963                 if (q->created)
2964                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2965                 be_queue_free(adapter, q);
2966         }
2967 }
2968
2969 static int be_rx_cqs_create(struct be_adapter *adapter)
2970 {
2971         struct be_queue_info *eq, *cq;
2972         struct be_rx_obj *rxo;
2973         int rc, i;
2974
2975         adapter->num_rss_qs =
2976                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2977
2978         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2979         if (adapter->num_rss_qs < 2)
2980                 adapter->num_rss_qs = 0;
2981
2982         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2983
2984         /* When the interface is not capable of RSS rings (and there is no
2985          * need to create a default RXQ) we'll still need one RXQ
2986          */
2987         if (adapter->num_rx_qs == 0)
2988                 adapter->num_rx_qs = 1;
2989
2990         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2991         for_all_rx_queues(adapter, rxo, i) {
2992                 rxo->adapter = adapter;
2993                 cq = &rxo->cq;
2994                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2995                                     sizeof(struct be_eth_rx_compl));
2996                 if (rc)
2997                         return rc;
2998
2999                 u64_stats_init(&rxo->stats.sync);
3000                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3001                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3002                 if (rc)
3003                         return rc;
3004         }
3005
3006         dev_info(&adapter->pdev->dev,
3007                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3008         return 0;
3009 }
3010
3011 static irqreturn_t be_intx(int irq, void *dev)
3012 {
3013         struct be_eq_obj *eqo = dev;
3014         struct be_adapter *adapter = eqo->adapter;
3015         int num_evts = 0;
3016
3017         /* IRQ is not expected when NAPI is scheduled as the EQ
3018          * will not be armed.
3019          * But, this can happen on Lancer INTx where it takes
3020          * a while to de-assert INTx or in BE2 where occasionaly
3021          * an interrupt may be raised even when EQ is unarmed.
3022          * If NAPI is already scheduled, then counting & notifying
3023          * events will orphan them.
3024          */
3025         if (napi_schedule_prep(&eqo->napi)) {
3026                 num_evts = events_get(eqo);
3027                 __napi_schedule(&eqo->napi);
3028                 if (num_evts)
3029                         eqo->spurious_intr = 0;
3030         }
3031         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3032
3033         /* Return IRQ_HANDLED only for the the first spurious intr
3034          * after a valid intr to stop the kernel from branding
3035          * this irq as a bad one!
3036          */
3037         if (num_evts || eqo->spurious_intr++ == 0)
3038                 return IRQ_HANDLED;
3039         else
3040                 return IRQ_NONE;
3041 }
3042
3043 static irqreturn_t be_msix(int irq, void *dev)
3044 {
3045         struct be_eq_obj *eqo = dev;
3046
3047         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3048         napi_schedule(&eqo->napi);
3049         return IRQ_HANDLED;
3050 }
3051
3052 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3053 {
3054         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3055 }
3056
3057 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3058                          int budget, int polling)
3059 {
3060         struct be_adapter *adapter = rxo->adapter;
3061         struct be_queue_info *rx_cq = &rxo->cq;
3062         struct be_rx_compl_info *rxcp;
3063         u32 work_done;
3064         u32 frags_consumed = 0;
3065
3066         for (work_done = 0; work_done < budget; work_done++) {
3067                 rxcp = be_rx_compl_get(rxo);
3068                 if (!rxcp)
3069                         break;
3070
3071                 /* Is it a flush compl that has no data */
3072                 if (unlikely(rxcp->num_rcvd == 0))
3073                         goto loop_continue;
3074
3075                 /* Discard compl with partial DMA Lancer B0 */
3076                 if (unlikely(!rxcp->pkt_size)) {
3077                         be_rx_compl_discard(rxo, rxcp);
3078                         goto loop_continue;
3079                 }
3080
3081                 /* On BE drop pkts that arrive due to imperfect filtering in
3082                  * promiscuous mode on some skews
3083                  */
3084                 if (unlikely(rxcp->port != adapter->port_num &&
3085                              !lancer_chip(adapter))) {
3086                         be_rx_compl_discard(rxo, rxcp);
3087                         goto loop_continue;
3088                 }
3089
3090                 /* Don't do gro when we're busy_polling */
3091                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3092                         be_rx_compl_process_gro(rxo, napi, rxcp);
3093                 else
3094                         be_rx_compl_process(rxo, napi, rxcp);
3095
3096 loop_continue:
3097                 frags_consumed += rxcp->num_rcvd;
3098                 be_rx_stats_update(rxo, rxcp);
3099         }
3100
3101         if (work_done) {
3102                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3103
3104                 /* When an rx-obj gets into post_starved state, just
3105                  * let be_worker do the posting.
3106                  */
3107                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3108                     !rxo->rx_post_starved)
3109                         be_post_rx_frags(rxo, GFP_ATOMIC,
3110                                          max_t(u32, MAX_RX_POST,
3111                                                frags_consumed));
3112         }
3113
3114         return work_done;
3115 }
3116
3117 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3118 {
3119         switch (status) {
3120         case BE_TX_COMP_HDR_PARSE_ERR:
3121                 tx_stats(txo)->tx_hdr_parse_err++;
3122                 break;
3123         case BE_TX_COMP_NDMA_ERR:
3124                 tx_stats(txo)->tx_dma_err++;
3125                 break;
3126         case BE_TX_COMP_ACL_ERR:
3127                 tx_stats(txo)->tx_spoof_check_err++;
3128                 break;
3129         }
3130 }
3131
3132 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3133 {
3134         switch (status) {
3135         case LANCER_TX_COMP_LSO_ERR:
3136                 tx_stats(txo)->tx_tso_err++;
3137                 break;
3138         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3139         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3140                 tx_stats(txo)->tx_spoof_check_err++;
3141                 break;
3142         case LANCER_TX_COMP_QINQ_ERR:
3143                 tx_stats(txo)->tx_qinq_err++;
3144                 break;
3145         case LANCER_TX_COMP_PARITY_ERR:
3146                 tx_stats(txo)->tx_internal_parity_err++;
3147                 break;
3148         case LANCER_TX_COMP_DMA_ERR:
3149                 tx_stats(txo)->tx_dma_err++;
3150                 break;
3151         }
3152 }
3153
3154 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3155                           int idx)
3156 {
3157         int num_wrbs = 0, work_done = 0;
3158         struct be_tx_compl_info *txcp;
3159
3160         while ((txcp = be_tx_compl_get(txo))) {
3161                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3162                 work_done++;
3163
3164                 if (txcp->status) {
3165                         if (lancer_chip(adapter))
3166                                 lancer_update_tx_err(txo, txcp->status);
3167                         else
3168                                 be_update_tx_err(txo, txcp->status);
3169                 }
3170         }
3171
3172         if (work_done) {
3173                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3174                 atomic_sub(num_wrbs, &txo->q.used);
3175
3176                 /* As Tx wrbs have been freed up, wake up netdev queue
3177                  * if it was stopped due to lack of tx wrbs.  */
3178                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3179                     be_can_txq_wake(txo)) {
3180                         netif_wake_subqueue(adapter->netdev, idx);
3181                 }
3182
3183                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3184                 tx_stats(txo)->tx_compl += work_done;
3185                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3186         }
3187 }
3188
3189 #ifdef CONFIG_NET_RX_BUSY_POLL
3190 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3191 {
3192         bool status = true;
3193
3194         spin_lock(&eqo->lock); /* BH is already disabled */
3195         if (eqo->state & BE_EQ_LOCKED) {
3196                 WARN_ON(eqo->state & BE_EQ_NAPI);
3197                 eqo->state |= BE_EQ_NAPI_YIELD;
3198                 status = false;
3199         } else {
3200                 eqo->state = BE_EQ_NAPI;
3201         }
3202         spin_unlock(&eqo->lock);
3203         return status;
3204 }
3205
3206 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3207 {
3208         spin_lock(&eqo->lock); /* BH is already disabled */
3209
3210         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3211         eqo->state = BE_EQ_IDLE;
3212
3213         spin_unlock(&eqo->lock);
3214 }
3215
3216 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3217 {
3218         bool status = true;
3219
3220         spin_lock_bh(&eqo->lock);
3221         if (eqo->state & BE_EQ_LOCKED) {
3222                 eqo->state |= BE_EQ_POLL_YIELD;
3223                 status = false;
3224         } else {
3225                 eqo->state |= BE_EQ_POLL;
3226         }
3227         spin_unlock_bh(&eqo->lock);
3228         return status;
3229 }
3230
3231 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3232 {
3233         spin_lock_bh(&eqo->lock);
3234
3235         WARN_ON(eqo->state & (BE_EQ_NAPI));
3236         eqo->state = BE_EQ_IDLE;
3237
3238         spin_unlock_bh(&eqo->lock);
3239 }
3240
3241 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3242 {
3243         spin_lock_init(&eqo->lock);
3244         eqo->state = BE_EQ_IDLE;
3245 }
3246
3247 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3248 {
3249         local_bh_disable();
3250
3251         /* It's enough to just acquire napi lock on the eqo to stop
3252          * be_busy_poll() from processing any queueus.
3253          */
3254         while (!be_lock_napi(eqo))
3255                 mdelay(1);
3256
3257         local_bh_enable();
3258 }
3259
3260 #else /* CONFIG_NET_RX_BUSY_POLL */
3261
3262 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3263 {
3264         return true;
3265 }
3266
3267 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3268 {
3269 }
3270
3271 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3272 {
3273         return false;
3274 }
3275
3276 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3277 {
3278 }
3279
3280 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3281 {
3282 }
3283
3284 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287 #endif /* CONFIG_NET_RX_BUSY_POLL */
3288
3289 int be_poll(struct napi_struct *napi, int budget)
3290 {
3291         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3292         struct be_adapter *adapter = eqo->adapter;
3293         int max_work = 0, work, i, num_evts;
3294         struct be_rx_obj *rxo;
3295         struct be_tx_obj *txo;
3296         u32 mult_enc = 0;
3297
3298         num_evts = events_get(eqo);
3299
3300         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3301                 be_process_tx(adapter, txo, i);
3302
3303         if (be_lock_napi(eqo)) {
3304                 /* This loop will iterate twice for EQ0 in which
3305                  * completions of the last RXQ (default one) are also processed
3306                  * For other EQs the loop iterates only once
3307                  */
3308                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3309                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3310                         max_work = max(work, max_work);
3311                 }
3312                 be_unlock_napi(eqo);
3313         } else {
3314                 max_work = budget;
3315         }
3316
3317         if (is_mcc_eqo(eqo))
3318                 be_process_mcc(adapter);
3319
3320         if (max_work < budget) {
3321                 napi_complete(napi);
3322
3323                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324                  * delay via a delay multiplier encoding value
3325                  */
3326                 if (skyhawk_chip(adapter))
3327                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330                              mult_enc);
3331         } else {
3332                 /* As we'll continue in polling mode, count and clear events */
3333                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334         }
3335         return max_work;
3336 }
3337
3338 #ifdef CONFIG_NET_RX_BUSY_POLL
3339 static int be_busy_poll(struct napi_struct *napi)
3340 {
3341         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3342         struct be_adapter *adapter = eqo->adapter;
3343         struct be_rx_obj *rxo;
3344         int i, work = 0;
3345
3346         if (!be_lock_busy_poll(eqo))
3347                 return LL_FLUSH_BUSY;
3348
3349         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3350                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3351                 if (work)
3352                         break;
3353         }
3354
3355         be_unlock_busy_poll(eqo);
3356         return work;
3357 }
3358 #endif
3359
3360 void be_detect_error(struct be_adapter *adapter)
3361 {
3362         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3363         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3364         u32 i;
3365         struct device *dev = &adapter->pdev->dev;
3366
3367         if (be_check_error(adapter, BE_ERROR_HW))
3368                 return;
3369
3370         if (lancer_chip(adapter)) {
3371                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3372                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3373                         be_set_error(adapter, BE_ERROR_UE);
3374                         sliport_err1 = ioread32(adapter->db +
3375                                                 SLIPORT_ERROR1_OFFSET);
3376                         sliport_err2 = ioread32(adapter->db +
3377                                                 SLIPORT_ERROR2_OFFSET);
3378                         /* Do not log error messages if its a FW reset */
3379                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3380                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3381                                 dev_info(dev, "Firmware update in progress\n");
3382                         } else {
3383                                 dev_err(dev, "Error detected in the card\n");
3384                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3385                                         sliport_status);
3386                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3387                                         sliport_err1);
3388                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3389                                         sliport_err2);
3390                         }
3391                 }
3392         } else {
3393                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3394                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3395                 ue_lo_mask = ioread32(adapter->pcicfg +
3396                                       PCICFG_UE_STATUS_LOW_MASK);
3397                 ue_hi_mask = ioread32(adapter->pcicfg +
3398                                       PCICFG_UE_STATUS_HI_MASK);
3399
3400                 ue_lo = (ue_lo & ~ue_lo_mask);
3401                 ue_hi = (ue_hi & ~ue_hi_mask);
3402
3403                 /* On certain platforms BE hardware can indicate spurious UEs.
3404                  * Allow HW to stop working completely in case of a real UE.
3405                  * Hence not setting the hw_error for UE detection.
3406                  */
3407
3408                 if (ue_lo || ue_hi) {
3409                         dev_err(dev, "Error detected in the adapter");
3410                         if (skyhawk_chip(adapter))
3411                                 be_set_error(adapter, BE_ERROR_UE);
3412
3413                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3414                                 if (ue_lo & 1)
3415                                         dev_err(dev, "UE: %s bit set\n",
3416                                                 ue_status_low_desc[i]);
3417                         }
3418                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3419                                 if (ue_hi & 1)
3420                                         dev_err(dev, "UE: %s bit set\n",
3421                                                 ue_status_hi_desc[i]);
3422                         }
3423                 }
3424         }
3425 }
3426
3427 static void be_msix_disable(struct be_adapter *adapter)
3428 {
3429         if (msix_enabled(adapter)) {
3430                 pci_disable_msix(adapter->pdev);
3431                 adapter->num_msix_vec = 0;
3432                 adapter->num_msix_roce_vec = 0;
3433         }
3434 }
3435
3436 static int be_msix_enable(struct be_adapter *adapter)
3437 {
3438         unsigned int i, max_roce_eqs;
3439         struct device *dev = &adapter->pdev->dev;
3440         int num_vec;
3441
3442         /* If RoCE is supported, program the max number of vectors that
3443          * could be used for NIC and RoCE, else, just program the number
3444          * we'll use initially.
3445          */
3446         if (be_roce_supported(adapter)) {
3447                 max_roce_eqs =
3448                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3449                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3450                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3451         } else {
3452                 num_vec = max(adapter->cfg_num_rx_irqs,
3453                               adapter->cfg_num_tx_irqs);
3454         }
3455
3456         for (i = 0; i < num_vec; i++)
3457                 adapter->msix_entries[i].entry = i;
3458
3459         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3460                                         MIN_MSIX_VECTORS, num_vec);
3461         if (num_vec < 0)
3462                 goto fail;
3463
3464         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3465                 adapter->num_msix_roce_vec = num_vec / 2;
3466                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3467                          adapter->num_msix_roce_vec);
3468         }
3469
3470         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3471
3472         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3473                  adapter->num_msix_vec);
3474         return 0;
3475
3476 fail:
3477         dev_warn(dev, "MSIx enable failed\n");
3478
3479         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3480         if (be_virtfn(adapter))
3481                 return num_vec;
3482         return 0;
3483 }
3484
3485 static inline int be_msix_vec_get(struct be_adapter *adapter,
3486                                   struct be_eq_obj *eqo)
3487 {
3488         return adapter->msix_entries[eqo->msix_idx].vector;
3489 }
3490
3491 static int be_msix_register(struct be_adapter *adapter)
3492 {
3493         struct net_device *netdev = adapter->netdev;
3494         struct be_eq_obj *eqo;
3495         int status, i, vec;
3496
3497         for_all_evt_queues(adapter, eqo, i) {
3498                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3499                 vec = be_msix_vec_get(adapter, eqo);
3500                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3501                 if (status)
3502                         goto err_msix;
3503
3504                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3505         }
3506
3507         return 0;
3508 err_msix:
3509         for (i--; i >= 0; i--) {
3510                 eqo = &adapter->eq_obj[i];
3511                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3512         }
3513         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3514                  status);
3515         be_msix_disable(adapter);
3516         return status;
3517 }
3518
3519 static int be_irq_register(struct be_adapter *adapter)
3520 {
3521         struct net_device *netdev = adapter->netdev;
3522         int status;
3523
3524         if (msix_enabled(adapter)) {
3525                 status = be_msix_register(adapter);
3526                 if (status == 0)
3527                         goto done;
3528                 /* INTx is not supported for VF */
3529                 if (be_virtfn(adapter))
3530                         return status;
3531         }
3532
3533         /* INTx: only the first EQ is used */
3534         netdev->irq = adapter->pdev->irq;
3535         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3536                              &adapter->eq_obj[0]);
3537         if (status) {
3538                 dev_err(&adapter->pdev->dev,
3539                         "INTx request IRQ failed - err %d\n", status);
3540                 return status;
3541         }
3542 done:
3543         adapter->isr_registered = true;
3544         return 0;
3545 }
3546
3547 static void be_irq_unregister(struct be_adapter *adapter)
3548 {
3549         struct net_device *netdev = adapter->netdev;
3550         struct be_eq_obj *eqo;
3551         int i, vec;
3552
3553         if (!adapter->isr_registered)
3554                 return;
3555
3556         /* INTx */
3557         if (!msix_enabled(adapter)) {
3558                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3559                 goto done;
3560         }
3561
3562         /* MSIx */
3563         for_all_evt_queues(adapter, eqo, i) {
3564                 vec = be_msix_vec_get(adapter, eqo);
3565                 irq_set_affinity_hint(vec, NULL);
3566                 free_irq(vec, eqo);
3567         }
3568
3569 done:
3570         adapter->isr_registered = false;
3571 }
3572
3573 static void be_rx_qs_destroy(struct be_adapter *adapter)
3574 {
3575         struct rss_info *rss = &adapter->rss_info;
3576         struct be_queue_info *q;
3577         struct be_rx_obj *rxo;
3578         int i;
3579
3580         for_all_rx_queues(adapter, rxo, i) {
3581                 q = &rxo->q;
3582                 if (q->created) {
3583                         /* If RXQs are destroyed while in an "out of buffer"
3584                          * state, there is a possibility of an HW stall on
3585                          * Lancer. So, post 64 buffers to each queue to relieve
3586                          * the "out of buffer" condition.
3587                          * Make sure there's space in the RXQ before posting.
3588                          */
3589                         if (lancer_chip(adapter)) {
3590                                 be_rx_cq_clean(rxo);
3591                                 if (atomic_read(&q->used) == 0)
3592                                         be_post_rx_frags(rxo, GFP_KERNEL,
3593                                                          MAX_RX_POST);
3594                         }
3595
3596                         be_cmd_rxq_destroy(adapter, q);
3597                         be_rx_cq_clean(rxo);
3598                         be_rxq_clean(rxo);
3599                 }
3600                 be_queue_free(adapter, q);
3601         }
3602
3603         if (rss->rss_flags) {
3604                 rss->rss_flags = RSS_ENABLE_NONE;
3605                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3606                                   128, rss->rss_hkey);
3607         }
3608 }
3609
3610 static void be_disable_if_filters(struct be_adapter *adapter)
3611 {
3612         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3613         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3614             check_privilege(adapter, BE_PRIV_FILTMGMT))
3615                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3616
3617         be_clear_uc_list(adapter);
3618         be_clear_mc_list(adapter);
3619
3620         /* The IFACE flags are enabled in the open path and cleared
3621          * in the close path. When a VF gets detached from the host and
3622          * assigned to a VM the following happens:
3623          *      - VF's IFACE flags get cleared in the detach path
3624          *      - IFACE create is issued by the VF in the attach path
3625          * Due to a bug in the BE3/Skyhawk-R FW
3626          * (Lancer FW doesn't have the bug), the IFACE capability flags
3627          * specified along with the IFACE create cmd issued by a VF are not
3628          * honoured by FW.  As a consequence, if a *new* driver
3629          * (that enables/disables IFACE flags in open/close)
3630          * is loaded in the host and an *old* driver is * used by a VM/VF,
3631          * the IFACE gets created *without* the needed flags.
3632          * To avoid this, disable RX-filter flags only for Lancer.
3633          */
3634         if (lancer_chip(adapter)) {
3635                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3636                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3637         }
3638 }
3639
3640 static int be_close(struct net_device *netdev)
3641 {
3642         struct be_adapter *adapter = netdev_priv(netdev);
3643         struct be_eq_obj *eqo;
3644         int i;
3645
3646         /* This protection is needed as be_close() may be called even when the
3647          * adapter is in cleared state (after eeh perm failure)
3648          */
3649         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3650                 return 0;
3651
3652         /* Before attempting cleanup ensure all the pending cmds in the
3653          * config_wq have finished execution
3654          */
3655         flush_workqueue(be_wq);
3656
3657         be_disable_if_filters(adapter);
3658
3659         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3660                 for_all_evt_queues(adapter, eqo, i) {
3661                         napi_disable(&eqo->napi);
3662                         be_disable_busy_poll(eqo);
3663                 }
3664                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3665         }
3666
3667         be_async_mcc_disable(adapter);
3668
3669         /* Wait for all pending tx completions to arrive so that
3670          * all tx skbs are freed.
3671          */
3672         netif_tx_disable(netdev);
3673         be_tx_compl_clean(adapter);
3674
3675         be_rx_qs_destroy(adapter);
3676
3677         for_all_evt_queues(adapter, eqo, i) {
3678                 if (msix_enabled(adapter))
3679                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3680                 else
3681                         synchronize_irq(netdev->irq);
3682                 be_eq_clean(eqo);
3683         }
3684
3685         be_irq_unregister(adapter);
3686
3687         return 0;
3688 }
3689
3690 static int be_rx_qs_create(struct be_adapter *adapter)
3691 {
3692         struct rss_info *rss = &adapter->rss_info;
3693         u8 rss_key[RSS_HASH_KEY_LEN];
3694         struct be_rx_obj *rxo;
3695         int rc, i, j;
3696
3697         for_all_rx_queues(adapter, rxo, i) {
3698                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3699                                     sizeof(struct be_eth_rx_d));
3700                 if (rc)
3701                         return rc;
3702         }
3703
3704         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3705                 rxo = default_rxo(adapter);
3706                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707                                        rx_frag_size, adapter->if_handle,
3708                                        false, &rxo->rss_id);
3709                 if (rc)
3710                         return rc;
3711         }
3712
3713         for_all_rss_queues(adapter, rxo, i) {
3714                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3715                                        rx_frag_size, adapter->if_handle,
3716                                        true, &rxo->rss_id);
3717                 if (rc)
3718                         return rc;
3719         }
3720
3721         if (be_multi_rxq(adapter)) {
3722                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3723                         for_all_rss_queues(adapter, rxo, i) {
3724                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3725                                         break;
3726                                 rss->rsstable[j + i] = rxo->rss_id;
3727                                 rss->rss_queue[j + i] = i;
3728                         }
3729                 }
3730                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3731                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3732
3733                 if (!BEx_chip(adapter))
3734                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3735                                 RSS_ENABLE_UDP_IPV6;
3736
3737                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3738                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3739                                        RSS_INDIR_TABLE_LEN, rss_key);
3740                 if (rc) {
3741                         rss->rss_flags = RSS_ENABLE_NONE;
3742                         return rc;
3743                 }
3744
3745                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3746         } else {
3747                 /* Disable RSS, if only default RX Q is created */
3748                 rss->rss_flags = RSS_ENABLE_NONE;
3749         }
3750
3751
3752         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3753          * which is a queue empty condition
3754          */
3755         for_all_rx_queues(adapter, rxo, i)
3756                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3757
3758         return 0;
3759 }
3760
3761 static int be_enable_if_filters(struct be_adapter *adapter)
3762 {
3763         int status;
3764
3765         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3766         if (status)
3767                 return status;
3768
3769         /* For BE3 VFs, the PF programs the initial MAC address */
3770         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3771                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                 if (status)
3773                         return status;
3774                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3775         }
3776
3777         if (adapter->vlans_added)
3778                 be_vid_config(adapter);
3779
3780         __be_set_rx_mode(adapter);
3781
3782         return 0;
3783 }
3784
3785 static int be_open(struct net_device *netdev)
3786 {
3787         struct be_adapter *adapter = netdev_priv(netdev);
3788         struct be_eq_obj *eqo;
3789         struct be_rx_obj *rxo;
3790         struct be_tx_obj *txo;
3791         u8 link_status;
3792         int status, i;
3793
3794         status = be_rx_qs_create(adapter);
3795         if (status)
3796                 goto err;
3797
3798         status = be_enable_if_filters(adapter);
3799         if (status)
3800                 goto err;
3801
3802         status = be_irq_register(adapter);
3803         if (status)
3804                 goto err;
3805
3806         for_all_rx_queues(adapter, rxo, i)
3807                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3808
3809         for_all_tx_queues(adapter, txo, i)
3810                 be_cq_notify(adapter, txo->cq.id, true, 0);
3811
3812         be_async_mcc_enable(adapter);
3813
3814         for_all_evt_queues(adapter, eqo, i) {
3815                 napi_enable(&eqo->napi);
3816                 be_enable_busy_poll(eqo);
3817                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3818         }
3819         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3820
3821         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3822         if (!status)
3823                 be_link_status_update(adapter, link_status);
3824
3825         netif_tx_start_all_queues(netdev);
3826         if (skyhawk_chip(adapter))
3827                 udp_tunnel_get_rx_info(netdev);
3828
3829         return 0;
3830 err:
3831         be_close(adapter->netdev);
3832         return -EIO;
3833 }
3834
3835 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3836 {
3837         u32 addr;
3838
3839         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3840
3841         mac[5] = (u8)(addr & 0xFF);
3842         mac[4] = (u8)((addr >> 8) & 0xFF);
3843         mac[3] = (u8)((addr >> 16) & 0xFF);
3844         /* Use the OUI from the current MAC address */
3845         memcpy(mac, adapter->netdev->dev_addr, 3);
3846 }
3847
3848 /*
3849  * Generate a seed MAC address from the PF MAC Address using jhash.
3850  * MAC Address for VFs are assigned incrementally starting from the seed.
3851  * These addresses are programmed in the ASIC by the PF and the VF driver
3852  * queries for the MAC address during its probe.
3853  */
3854 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3855 {
3856         u32 vf;
3857         int status = 0;
3858         u8 mac[ETH_ALEN];
3859         struct be_vf_cfg *vf_cfg;
3860
3861         be_vf_eth_addr_generate(adapter, mac);
3862
3863         for_all_vfs(adapter, vf_cfg, vf) {
3864                 if (BEx_chip(adapter))
3865                         status = be_cmd_pmac_add(adapter, mac,
3866                                                  vf_cfg->if_handle,
3867                                                  &vf_cfg->pmac_id, vf + 1);
3868                 else
3869                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3870                                                 vf + 1);
3871
3872                 if (status)
3873                         dev_err(&adapter->pdev->dev,
3874                                 "Mac address assignment failed for VF %d\n",
3875                                 vf);
3876                 else
3877                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3878
3879                 mac[5] += 1;
3880         }
3881         return status;
3882 }
3883
3884 static int be_vfs_mac_query(struct be_adapter *adapter)
3885 {
3886         int status, vf;
3887         u8 mac[ETH_ALEN];
3888         struct be_vf_cfg *vf_cfg;
3889
3890         for_all_vfs(adapter, vf_cfg, vf) {
3891                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3892                                                mac, vf_cfg->if_handle,
3893                                                false, vf+1);
3894                 if (status)
3895                         return status;
3896                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3897         }
3898         return 0;
3899 }
3900
3901 static void be_vf_clear(struct be_adapter *adapter)
3902 {
3903         struct be_vf_cfg *vf_cfg;
3904         u32 vf;
3905
3906         if (pci_vfs_assigned(adapter->pdev)) {
3907                 dev_warn(&adapter->pdev->dev,
3908                          "VFs are assigned to VMs: not disabling VFs\n");
3909                 goto done;
3910         }
3911
3912         pci_disable_sriov(adapter->pdev);
3913
3914         for_all_vfs(adapter, vf_cfg, vf) {
3915                 if (BEx_chip(adapter))
3916                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3917                                         vf_cfg->pmac_id, vf + 1);
3918                 else
3919                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3920                                        vf + 1);
3921
3922                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3923         }
3924
3925         if (BE3_chip(adapter))
3926                 be_cmd_set_hsw_config(adapter, 0, 0,
3927                                       adapter->if_handle,
3928                                       PORT_FWD_TYPE_PASSTHRU, 0);
3929 done:
3930         kfree(adapter->vf_cfg);
3931         adapter->num_vfs = 0;
3932         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3933 }
3934
3935 static void be_clear_queues(struct be_adapter *adapter)
3936 {
3937         be_mcc_queues_destroy(adapter);
3938         be_rx_cqs_destroy(adapter);
3939         be_tx_queues_destroy(adapter);
3940         be_evt_queues_destroy(adapter);
3941 }
3942
3943 static void be_cancel_worker(struct be_adapter *adapter)
3944 {
3945         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3946                 cancel_delayed_work_sync(&adapter->work);
3947                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3948         }
3949 }
3950
3951 static void be_cancel_err_detection(struct be_adapter *adapter)
3952 {
3953         struct be_error_recovery *err_rec = &adapter->error_recovery;
3954
3955         if (!be_err_recovery_workq)
3956                 return;
3957
3958         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3959                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3960                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3961         }
3962 }
3963
3964 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3965 {
3966         struct net_device *netdev = adapter->netdev;
3967
3968         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3969                 be_cmd_manage_iface(adapter, adapter->if_handle,
3970                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3971
3972         if (adapter->vxlan_port)
3973                 be_cmd_set_vxlan_port(adapter, 0);
3974
3975         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3976         adapter->vxlan_port = 0;
3977
3978         netdev->hw_enc_features = 0;
3979         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3980         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3981 }
3982
3983 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3984                                 struct be_resources *vft_res)
3985 {
3986         struct be_resources res = adapter->pool_res;
3987         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3988         struct be_resources res_mod = {0};
3989         u16 num_vf_qs = 1;
3990
3991         /* Distribute the queue resources among the PF and it's VFs */
3992         if (num_vfs) {
3993                 /* Divide the rx queues evenly among the VFs and the PF, capped
3994                  * at VF-EQ-count. Any remainder queues belong to the PF.
3995                  */
3996                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3997                                 res.max_rss_qs / (num_vfs + 1));
3998
3999                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4000                  * RSS Tables per port. Provide RSS on VFs, only if number of
4001                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4002                  */
4003                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4004                         num_vf_qs = 1;
4005         }
4006
4007         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4008          * which are modifiable using SET_PROFILE_CONFIG cmd.
4009          */
4010         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4011                                   RESOURCE_MODIFIABLE, 0);
4012
4013         /* If RSS IFACE capability flags are modifiable for a VF, set the
4014          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4015          * more than 1 RSSQ is available for a VF.
4016          * Otherwise, provision only 1 queue pair for VF.
4017          */
4018         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4019                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4020                 if (num_vf_qs > 1) {
4021                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4022                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4023                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4024                 } else {
4025                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4026                                              BE_IF_FLAGS_DEFQ_RSS);
4027                 }
4028         } else {
4029                 num_vf_qs = 1;
4030         }
4031
4032         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4033                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4034                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4035         }
4036
4037         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4038         vft_res->max_rx_qs = num_vf_qs;
4039         vft_res->max_rss_qs = num_vf_qs;
4040         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4041         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4042
4043         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4044          * among the PF and it's VFs, if the fields are changeable
4045          */
4046         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4047                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4048
4049         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4050                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4051
4052         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4053                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4054
4055         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4056                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4057 }
4058
4059 static void be_if_destroy(struct be_adapter *adapter)
4060 {
4061         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4062
4063         kfree(adapter->pmac_id);
4064         adapter->pmac_id = NULL;
4065
4066         kfree(adapter->mc_list);
4067         adapter->mc_list = NULL;
4068
4069         kfree(adapter->uc_list);
4070         adapter->uc_list = NULL;
4071 }
4072
4073 static int be_clear(struct be_adapter *adapter)
4074 {
4075         struct pci_dev *pdev = adapter->pdev;
4076         struct  be_resources vft_res = {0};
4077
4078         be_cancel_worker(adapter);
4079
4080         flush_workqueue(be_wq);
4081
4082         if (sriov_enabled(adapter))
4083                 be_vf_clear(adapter);
4084
4085         /* Re-configure FW to distribute resources evenly across max-supported
4086          * number of VFs, only when VFs are not already enabled.
4087          */
4088         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4089             !pci_vfs_assigned(pdev)) {
4090                 be_calculate_vf_res(adapter,
4091                                     pci_sriov_get_totalvfs(pdev),
4092                                     &vft_res);
4093                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4094                                         pci_sriov_get_totalvfs(pdev),
4095                                         &vft_res);
4096         }
4097
4098         be_disable_vxlan_offloads(adapter);
4099
4100         be_if_destroy(adapter);
4101
4102         be_clear_queues(adapter);
4103
4104         be_msix_disable(adapter);
4105         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4106         return 0;
4107 }
4108
4109 static int be_vfs_if_create(struct be_adapter *adapter)
4110 {
4111         struct be_resources res = {0};
4112         u32 cap_flags, en_flags, vf;
4113         struct be_vf_cfg *vf_cfg;
4114         int status;
4115
4116         /* If a FW profile exists, then cap_flags are updated */
4117         cap_flags = BE_VF_IF_EN_FLAGS;
4118
4119         for_all_vfs(adapter, vf_cfg, vf) {
4120                 if (!BE3_chip(adapter)) {
4121                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4122                                                            ACTIVE_PROFILE_TYPE,
4123                                                            RESOURCE_LIMITS,
4124                                                            vf + 1);
4125                         if (!status) {
4126                                 cap_flags = res.if_cap_flags;
4127                                 /* Prevent VFs from enabling VLAN promiscuous
4128                                  * mode
4129                                  */
4130                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4131                         }
4132                 }
4133
4134                 /* PF should enable IF flags during proxy if_create call */
4135                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4136                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4137                                           &vf_cfg->if_handle, vf + 1);
4138                 if (status)
4139                         return status;
4140         }
4141
4142         return 0;
4143 }
4144
4145 static int be_vf_setup_init(struct be_adapter *adapter)
4146 {
4147         struct be_vf_cfg *vf_cfg;
4148         int vf;
4149
4150         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4151                                   GFP_KERNEL);
4152         if (!adapter->vf_cfg)
4153                 return -ENOMEM;
4154
4155         for_all_vfs(adapter, vf_cfg, vf) {
4156                 vf_cfg->if_handle = -1;
4157                 vf_cfg->pmac_id = -1;
4158         }
4159         return 0;
4160 }
4161
4162 static int be_vf_setup(struct be_adapter *adapter)
4163 {
4164         struct device *dev = &adapter->pdev->dev;
4165         struct be_vf_cfg *vf_cfg;
4166         int status, old_vfs, vf;
4167         bool spoofchk;
4168
4169         old_vfs = pci_num_vf(adapter->pdev);
4170
4171         status = be_vf_setup_init(adapter);
4172         if (status)
4173                 goto err;
4174
4175         if (old_vfs) {
4176                 for_all_vfs(adapter, vf_cfg, vf) {
4177                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4178                         if (status)
4179                                 goto err;
4180                 }
4181
4182                 status = be_vfs_mac_query(adapter);
4183                 if (status)
4184                         goto err;
4185         } else {
4186                 status = be_vfs_if_create(adapter);
4187                 if (status)
4188                         goto err;
4189
4190                 status = be_vf_eth_addr_config(adapter);
4191                 if (status)
4192                         goto err;
4193         }
4194
4195         for_all_vfs(adapter, vf_cfg, vf) {
4196                 /* Allow VFs to programs MAC/VLAN filters */
4197                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4198                                                   vf + 1);
4199                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4200                         status = be_cmd_set_fn_privileges(adapter,
4201                                                           vf_cfg->privileges |
4202                                                           BE_PRIV_FILTMGMT,
4203                                                           vf + 1);
4204                         if (!status) {
4205                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4206                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4207                                          vf);
4208                         }
4209                 }
4210
4211                 /* Allow full available bandwidth */
4212                 if (!old_vfs)
4213                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4214
4215                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4216                                                vf_cfg->if_handle, NULL,
4217                                                &spoofchk);
4218                 if (!status)
4219                         vf_cfg->spoofchk = spoofchk;
4220
4221                 if (!old_vfs) {
4222                         be_cmd_enable_vf(adapter, vf + 1);
4223                         be_cmd_set_logical_link_config(adapter,
4224                                                        IFLA_VF_LINK_STATE_AUTO,
4225                                                        vf+1);
4226                 }
4227         }
4228
4229         if (!old_vfs) {
4230                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4231                 if (status) {
4232                         dev_err(dev, "SRIOV enable failed\n");
4233                         adapter->num_vfs = 0;
4234                         goto err;
4235                 }
4236         }
4237
4238         if (BE3_chip(adapter)) {
4239                 /* On BE3, enable VEB only when SRIOV is enabled */
4240                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4241                                                adapter->if_handle,
4242                                                PORT_FWD_TYPE_VEB, 0);
4243                 if (status)
4244                         goto err;
4245         }
4246
4247         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4248         return 0;
4249 err:
4250         dev_err(dev, "VF setup failed\n");
4251         be_vf_clear(adapter);
4252         return status;
4253 }
4254
4255 /* Converting function_mode bits on BE3 to SH mc_type enums */
4256
4257 static u8 be_convert_mc_type(u32 function_mode)
4258 {
4259         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4260                 return vNIC1;
4261         else if (function_mode & QNQ_MODE)
4262                 return FLEX10;
4263         else if (function_mode & VNIC_MODE)
4264                 return vNIC2;
4265         else if (function_mode & UMC_ENABLED)
4266                 return UMC;
4267         else
4268                 return MC_NONE;
4269 }
4270
4271 /* On BE2/BE3 FW does not suggest the supported limits */
4272 static void BEx_get_resources(struct be_adapter *adapter,
4273                               struct be_resources *res)
4274 {
4275         bool use_sriov = adapter->num_vfs ? 1 : 0;
4276
4277         if (be_physfn(adapter))
4278                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4279         else
4280                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4281
4282         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4283
4284         if (be_is_mc(adapter)) {
4285                 /* Assuming that there are 4 channels per port,
4286                  * when multi-channel is enabled
4287                  */
4288                 if (be_is_qnq_mode(adapter))
4289                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4290                 else
4291                         /* In a non-qnq multichannel mode, the pvid
4292                          * takes up one vlan entry
4293                          */
4294                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4295         } else {
4296                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4297         }
4298
4299         res->max_mcast_mac = BE_MAX_MC;
4300
4301         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4302          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4303          *    *only* if it is RSS-capable.
4304          */
4305         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4306             be_virtfn(adapter) ||
4307             (be_is_mc(adapter) &&
4308              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4309                 res->max_tx_qs = 1;
4310         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4311                 struct be_resources super_nic_res = {0};
4312
4313                 /* On a SuperNIC profile, the driver needs to use the
4314                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4315                  */
4316                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4317                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4318                                           0);
4319                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4320                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4321         } else {
4322                 res->max_tx_qs = BE3_MAX_TX_QS;
4323         }
4324
4325         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4326             !use_sriov && be_physfn(adapter))
4327                 res->max_rss_qs = (adapter->be3_native) ?
4328                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4329         res->max_rx_qs = res->max_rss_qs + 1;
4330
4331         if (be_physfn(adapter))
4332                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4333                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4334         else
4335                 res->max_evt_qs = 1;
4336
4337         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4338         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4339         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4340                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4341 }
4342
4343 static void be_setup_init(struct be_adapter *adapter)
4344 {
4345         adapter->vlan_prio_bmap = 0xff;
4346         adapter->phy.link_speed = -1;
4347         adapter->if_handle = -1;
4348         adapter->be3_native = false;
4349         adapter->if_flags = 0;
4350         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4351         if (be_physfn(adapter))
4352                 adapter->cmd_privileges = MAX_PRIVILEGES;
4353         else
4354                 adapter->cmd_privileges = MIN_PRIVILEGES;
4355 }
4356
4357 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4358  * However, this HW limitation is not exposed to the host via any SLI cmd.
4359  * As a result, in the case of SRIOV and in particular multi-partition configs
4360  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4361  * for distribution between the VFs. This self-imposed limit will determine the
4362  * no: of VFs for which RSS can be enabled.
4363  */
4364 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4365 {
4366         struct be_port_resources port_res = {0};
4367         u8 rss_tables_on_port;
4368         u16 max_vfs = be_max_vfs(adapter);
4369
4370         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4371                                   RESOURCE_LIMITS, 0);
4372
4373         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4374
4375         /* Each PF Pool's RSS Tables limit =
4376          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4377          */
4378         adapter->pool_res.max_rss_tables =
4379                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4380 }
4381
4382 static int be_get_sriov_config(struct be_adapter *adapter)
4383 {
4384         struct be_resources res = {0};
4385         int max_vfs, old_vfs;
4386
4387         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4388                                   RESOURCE_LIMITS, 0);
4389
4390         /* Some old versions of BE3 FW don't report max_vfs value */
4391         if (BE3_chip(adapter) && !res.max_vfs) {
4392                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4393                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4394         }
4395
4396         adapter->pool_res = res;
4397
4398         /* If during previous unload of the driver, the VFs were not disabled,
4399          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4400          * Instead use the TotalVFs value stored in the pci-dev struct.
4401          */
4402         old_vfs = pci_num_vf(adapter->pdev);
4403         if (old_vfs) {
4404                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4405                          old_vfs);
4406
4407                 adapter->pool_res.max_vfs =
4408                         pci_sriov_get_totalvfs(adapter->pdev);
4409                 adapter->num_vfs = old_vfs;
4410         }
4411
4412         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4413                 be_calculate_pf_pool_rss_tables(adapter);
4414                 dev_info(&adapter->pdev->dev,
4415                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4416                          be_max_pf_pool_rss_tables(adapter));
4417         }
4418         return 0;
4419 }
4420
4421 static void be_alloc_sriov_res(struct be_adapter *adapter)
4422 {
4423         int old_vfs = pci_num_vf(adapter->pdev);
4424         struct  be_resources vft_res = {0};
4425         int status;
4426
4427         be_get_sriov_config(adapter);
4428
4429         if (!old_vfs)
4430                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4431
4432         /* When the HW is in SRIOV capable configuration, the PF-pool
4433          * resources are given to PF during driver load, if there are no
4434          * old VFs. This facility is not available in BE3 FW.
4435          * Also, this is done by FW in Lancer chip.
4436          */
4437         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4438                 be_calculate_vf_res(adapter, 0, &vft_res);
4439                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4440                                                  &vft_res);
4441                 if (status)
4442                         dev_err(&adapter->pdev->dev,
4443                                 "Failed to optimize SRIOV resources\n");
4444         }
4445 }
4446
4447 static int be_get_resources(struct be_adapter *adapter)
4448 {
4449         struct device *dev = &adapter->pdev->dev;
4450         struct be_resources res = {0};
4451         int status;
4452
4453         /* For Lancer, SH etc read per-function resource limits from FW.
4454          * GET_FUNC_CONFIG returns per function guaranteed limits.
4455          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4456          */
4457         if (BEx_chip(adapter)) {
4458                 BEx_get_resources(adapter, &res);
4459         } else {
4460                 status = be_cmd_get_func_config(adapter, &res);
4461                 if (status)
4462                         return status;
4463
4464                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4465                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4466                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4467                         res.max_rss_qs -= 1;
4468         }
4469
4470         /* If RoCE is supported stash away half the EQs for RoCE */
4471         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4472                                 res.max_evt_qs / 2 : res.max_evt_qs;
4473         adapter->res = res;
4474
4475         /* If FW supports RSS default queue, then skip creating non-RSS
4476          * queue for non-IP traffic.
4477          */
4478         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4479                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4480
4481         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4482                  be_max_txqs(adapter), be_max_rxqs(adapter),
4483                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4484                  be_max_vfs(adapter));
4485         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4486                  be_max_uc(adapter), be_max_mc(adapter),
4487                  be_max_vlans(adapter));
4488
4489         /* Ensure RX and TX queues are created in pairs at init time */
4490         adapter->cfg_num_rx_irqs =
4491                                 min_t(u16, netif_get_num_default_rss_queues(),
4492                                       be_max_qp_irqs(adapter));
4493         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4494         return 0;
4495 }
4496
4497 static int be_get_config(struct be_adapter *adapter)
4498 {
4499         int status, level;
4500         u16 profile_id;
4501
4502         status = be_cmd_get_cntl_attributes(adapter);
4503         if (status)
4504                 return status;
4505
4506         status = be_cmd_query_fw_cfg(adapter);
4507         if (status)
4508                 return status;
4509
4510         if (!lancer_chip(adapter) && be_physfn(adapter))
4511                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4512
4513         if (BEx_chip(adapter)) {
4514                 level = be_cmd_get_fw_log_level(adapter);
4515                 adapter->msg_enable =
4516                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4517         }
4518
4519         be_cmd_get_acpi_wol_cap(adapter);
4520         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4521         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4522
4523         be_cmd_query_port_name(adapter);
4524
4525         if (be_physfn(adapter)) {
4526                 status = be_cmd_get_active_profile(adapter, &profile_id);
4527                 if (!status)
4528                         dev_info(&adapter->pdev->dev,
4529                                  "Using profile 0x%x\n", profile_id);
4530         }
4531
4532         return 0;
4533 }
4534
4535 static int be_mac_setup(struct be_adapter *adapter)
4536 {
4537         u8 mac[ETH_ALEN];
4538         int status;
4539
4540         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4541                 status = be_cmd_get_perm_mac(adapter, mac);
4542                 if (status)
4543                         return status;
4544
4545                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4546                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4547         }
4548
4549         return 0;
4550 }
4551
4552 static void be_schedule_worker(struct be_adapter *adapter)
4553 {
4554         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4555         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4556 }
4557
4558 static void be_destroy_err_recovery_workq(void)
4559 {
4560         if (!be_err_recovery_workq)
4561                 return;
4562
4563         flush_workqueue(be_err_recovery_workq);
4564         destroy_workqueue(be_err_recovery_workq);
4565         be_err_recovery_workq = NULL;
4566 }
4567
4568 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4569 {
4570         struct be_error_recovery *err_rec = &adapter->error_recovery;
4571
4572         if (!be_err_recovery_workq)
4573                 return;
4574
4575         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4576                            msecs_to_jiffies(delay));
4577         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4578 }
4579
4580 static int be_setup_queues(struct be_adapter *adapter)
4581 {
4582         struct net_device *netdev = adapter->netdev;
4583         int status;
4584
4585         status = be_evt_queues_create(adapter);
4586         if (status)
4587                 goto err;
4588
4589         status = be_tx_qs_create(adapter);
4590         if (status)
4591                 goto err;
4592
4593         status = be_rx_cqs_create(adapter);
4594         if (status)
4595                 goto err;
4596
4597         status = be_mcc_queues_create(adapter);
4598         if (status)
4599                 goto err;
4600
4601         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4602         if (status)
4603                 goto err;
4604
4605         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4606         if (status)
4607                 goto err;
4608
4609         return 0;
4610 err:
4611         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4612         return status;
4613 }
4614
4615 static int be_if_create(struct be_adapter *adapter)
4616 {
4617         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4618         u32 cap_flags = be_if_cap_flags(adapter);
4619         int status;
4620
4621         /* alloc required memory for other filtering fields */
4622         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4623                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4624         if (!adapter->pmac_id)
4625                 return -ENOMEM;
4626
4627         adapter->mc_list = kcalloc(be_max_mc(adapter),
4628                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4629         if (!adapter->mc_list)
4630                 return -ENOMEM;
4631
4632         adapter->uc_list = kcalloc(be_max_uc(adapter),
4633                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4634         if (!adapter->uc_list)
4635                 return -ENOMEM;
4636
4637         if (adapter->cfg_num_rx_irqs == 1)
4638                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4639
4640         en_flags &= cap_flags;
4641         /* will enable all the needed filter flags in be_open() */
4642         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4643                                   &adapter->if_handle, 0);
4644
4645         if (status)
4646                 return status;
4647
4648         return 0;
4649 }
4650
4651 int be_update_queues(struct be_adapter *adapter)
4652 {
4653         struct net_device *netdev = adapter->netdev;
4654         int status;
4655
4656         if (netif_running(netdev))
4657                 be_close(netdev);
4658
4659         be_cancel_worker(adapter);
4660
4661         /* If any vectors have been shared with RoCE we cannot re-program
4662          * the MSIx table.
4663          */
4664         if (!adapter->num_msix_roce_vec)
4665                 be_msix_disable(adapter);
4666
4667         be_clear_queues(adapter);
4668         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4669         if (status)
4670                 return status;
4671
4672         if (!msix_enabled(adapter)) {
4673                 status = be_msix_enable(adapter);
4674                 if (status)
4675                         return status;
4676         }
4677
4678         status = be_if_create(adapter);
4679         if (status)
4680                 return status;
4681
4682         status = be_setup_queues(adapter);
4683         if (status)
4684                 return status;
4685
4686         be_schedule_worker(adapter);
4687
4688         if (netif_running(netdev))
4689                 status = be_open(netdev);
4690
4691         return status;
4692 }
4693
4694 static inline int fw_major_num(const char *fw_ver)
4695 {
4696         int fw_major = 0, i;
4697
4698         i = sscanf(fw_ver, "%d.", &fw_major);
4699         if (i != 1)
4700                 return 0;
4701
4702         return fw_major;
4703 }
4704
4705 /* If it is error recovery, FLR the PF
4706  * Else if any VFs are already enabled don't FLR the PF
4707  */
4708 static bool be_reset_required(struct be_adapter *adapter)
4709 {
4710         if (be_error_recovering(adapter))
4711                 return true;
4712         else
4713                 return pci_num_vf(adapter->pdev) == 0;
4714 }
4715
4716 /* Wait for the FW to be ready and perform the required initialization */
4717 static int be_func_init(struct be_adapter *adapter)
4718 {
4719         int status;
4720
4721         status = be_fw_wait_ready(adapter);
4722         if (status)
4723                 return status;
4724
4725         /* FW is now ready; clear errors to allow cmds/doorbell */
4726         be_clear_error(adapter, BE_CLEAR_ALL);
4727
4728         if (be_reset_required(adapter)) {
4729                 status = be_cmd_reset_function(adapter);
4730                 if (status)
4731                         return status;
4732
4733                 /* Wait for interrupts to quiesce after an FLR */
4734                 msleep(100);
4735         }
4736
4737         /* Tell FW we're ready to fire cmds */
4738         status = be_cmd_fw_init(adapter);
4739         if (status)
4740                 return status;
4741
4742         /* Allow interrupts for other ULPs running on NIC function */
4743         be_intr_set(adapter, true);
4744
4745         return 0;
4746 }
4747
4748 static int be_setup(struct be_adapter *adapter)
4749 {
4750         struct device *dev = &adapter->pdev->dev;
4751         int status;
4752
4753         status = be_func_init(adapter);
4754         if (status)
4755                 return status;
4756
4757         be_setup_init(adapter);
4758
4759         if (!lancer_chip(adapter))
4760                 be_cmd_req_native_mode(adapter);
4761
4762         /* invoke this cmd first to get pf_num and vf_num which are needed
4763          * for issuing profile related cmds
4764          */
4765         if (!BEx_chip(adapter)) {
4766                 status = be_cmd_get_func_config(adapter, NULL);
4767                 if (status)
4768                         return status;
4769         }
4770
4771         status = be_get_config(adapter);
4772         if (status)
4773                 goto err;
4774
4775         if (!BE2_chip(adapter) && be_physfn(adapter))
4776                 be_alloc_sriov_res(adapter);
4777
4778         status = be_get_resources(adapter);
4779         if (status)
4780                 goto err;
4781
4782         status = be_msix_enable(adapter);
4783         if (status)
4784                 goto err;
4785
4786         /* will enable all the needed filter flags in be_open() */
4787         status = be_if_create(adapter);
4788         if (status)
4789                 goto err;
4790
4791         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4792         rtnl_lock();
4793         status = be_setup_queues(adapter);
4794         rtnl_unlock();
4795         if (status)
4796                 goto err;
4797
4798         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4799
4800         status = be_mac_setup(adapter);
4801         if (status)
4802                 goto err;
4803
4804         be_cmd_get_fw_ver(adapter);
4805         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4806
4807         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4808                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4809                         adapter->fw_ver);
4810                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4811         }
4812
4813         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4814                                          adapter->rx_fc);
4815         if (status)
4816                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4817                                         &adapter->rx_fc);
4818
4819         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4820                  adapter->tx_fc, adapter->rx_fc);
4821
4822         if (be_physfn(adapter))
4823                 be_cmd_set_logical_link_config(adapter,
4824                                                IFLA_VF_LINK_STATE_AUTO, 0);
4825
4826         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4827          * confusing a linux bridge or OVS that it might be connected to.
4828          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4829          * when SRIOV is not enabled.
4830          */
4831         if (BE3_chip(adapter))
4832                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4833                                       PORT_FWD_TYPE_PASSTHRU, 0);
4834
4835         if (adapter->num_vfs)
4836                 be_vf_setup(adapter);
4837
4838         status = be_cmd_get_phy_info(adapter);
4839         if (!status && be_pause_supported(adapter))
4840                 adapter->phy.fc_autoneg = 1;
4841
4842         if (be_physfn(adapter) && !lancer_chip(adapter))
4843                 be_cmd_set_features(adapter);
4844
4845         be_schedule_worker(adapter);
4846         adapter->flags |= BE_FLAGS_SETUP_DONE;
4847         return 0;
4848 err:
4849         be_clear(adapter);
4850         return status;
4851 }
4852
4853 #ifdef CONFIG_NET_POLL_CONTROLLER
4854 static void be_netpoll(struct net_device *netdev)
4855 {
4856         struct be_adapter *adapter = netdev_priv(netdev);
4857         struct be_eq_obj *eqo;
4858         int i;
4859
4860         for_all_evt_queues(adapter, eqo, i) {
4861                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4862                 napi_schedule(&eqo->napi);
4863         }
4864 }
4865 #endif
4866
4867 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4868 {
4869         const struct firmware *fw;
4870         int status;
4871
4872         if (!netif_running(adapter->netdev)) {
4873                 dev_err(&adapter->pdev->dev,
4874                         "Firmware load not allowed (interface is down)\n");
4875                 return -ENETDOWN;
4876         }
4877
4878         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4879         if (status)
4880                 goto fw_exit;
4881
4882         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4883
4884         if (lancer_chip(adapter))
4885                 status = lancer_fw_download(adapter, fw);
4886         else
4887                 status = be_fw_download(adapter, fw);
4888
4889         if (!status)
4890                 be_cmd_get_fw_ver(adapter);
4891
4892 fw_exit:
4893         release_firmware(fw);
4894         return status;
4895 }
4896
4897 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4898                                  u16 flags)
4899 {
4900         struct be_adapter *adapter = netdev_priv(dev);
4901         struct nlattr *attr, *br_spec;
4902         int rem;
4903         int status = 0;
4904         u16 mode = 0;
4905
4906         if (!sriov_enabled(adapter))
4907                 return -EOPNOTSUPP;
4908
4909         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4910         if (!br_spec)
4911                 return -EINVAL;
4912
4913         nla_for_each_nested(attr, br_spec, rem) {
4914                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4915                         continue;
4916
4917                 if (nla_len(attr) < sizeof(mode))
4918                         return -EINVAL;
4919
4920                 mode = nla_get_u16(attr);
4921                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4922                         return -EOPNOTSUPP;
4923
4924                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4925                         return -EINVAL;
4926
4927                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4928                                                adapter->if_handle,
4929                                                mode == BRIDGE_MODE_VEPA ?
4930                                                PORT_FWD_TYPE_VEPA :
4931                                                PORT_FWD_TYPE_VEB, 0);
4932                 if (status)
4933                         goto err;
4934
4935                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4936                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4937
4938                 return status;
4939         }
4940 err:
4941         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4942                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4943
4944         return status;
4945 }
4946
4947 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4948                                  struct net_device *dev, u32 filter_mask,
4949                                  int nlflags)
4950 {
4951         struct be_adapter *adapter = netdev_priv(dev);
4952         int status = 0;
4953         u8 hsw_mode;
4954
4955         /* BE and Lancer chips support VEB mode only */
4956         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4957                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4958                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4959                         return 0;
4960                 hsw_mode = PORT_FWD_TYPE_VEB;
4961         } else {
4962                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4963                                                adapter->if_handle, &hsw_mode,
4964                                                NULL);
4965                 if (status)
4966                         return 0;
4967
4968                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4969                         return 0;
4970         }
4971
4972         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4973                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4974                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4975                                        0, 0, nlflags, filter_mask, NULL);
4976 }
4977
4978 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4979                                          void (*func)(struct work_struct *))
4980 {
4981         struct be_cmd_work *work;
4982
4983         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4984         if (!work) {
4985                 dev_err(&adapter->pdev->dev,
4986                         "be_work memory allocation failed\n");
4987                 return NULL;
4988         }
4989
4990         INIT_WORK(&work->work, func);
4991         work->adapter = adapter;
4992         return work;
4993 }
4994
4995 /* VxLAN offload Notes:
4996  *
4997  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4998  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4999  * is expected to work across all types of IP tunnels once exported. Skyhawk
5000  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5001  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5002  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5003  * those other tunnels are unexported on the fly through ndo_features_check().
5004  *
5005  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5006  * adds more than one port, disable offloads and don't re-enable them again
5007  * until after all the tunnels are removed.
5008  */
5009 static void be_work_add_vxlan_port(struct work_struct *work)
5010 {
5011         struct be_cmd_work *cmd_work =
5012                                 container_of(work, struct be_cmd_work, work);
5013         struct be_adapter *adapter = cmd_work->adapter;
5014         struct net_device *netdev = adapter->netdev;
5015         struct device *dev = &adapter->pdev->dev;
5016         __be16 port = cmd_work->info.vxlan_port;
5017         int status;
5018
5019         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5020                 adapter->vxlan_port_aliases++;
5021                 goto done;
5022         }
5023
5024         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5025                 dev_info(dev,
5026                          "Only one UDP port supported for VxLAN offloads\n");
5027                 dev_info(dev, "Disabling VxLAN offloads\n");
5028                 adapter->vxlan_port_count++;
5029                 goto err;
5030         }
5031
5032         if (adapter->vxlan_port_count++ >= 1)
5033                 goto done;
5034
5035         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5036                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5037         if (status) {
5038                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5039                 goto err;
5040         }
5041
5042         status = be_cmd_set_vxlan_port(adapter, port);
5043         if (status) {
5044                 dev_warn(dev, "Failed to add VxLAN port\n");
5045                 goto err;
5046         }
5047         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5048         adapter->vxlan_port = port;
5049
5050         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5051                                    NETIF_F_TSO | NETIF_F_TSO6 |
5052                                    NETIF_F_GSO_UDP_TUNNEL;
5053         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5054         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5055
5056         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5057                  be16_to_cpu(port));
5058         goto done;
5059 err:
5060         be_disable_vxlan_offloads(adapter);
5061 done:
5062         kfree(cmd_work);
5063 }
5064
5065 static void be_work_del_vxlan_port(struct work_struct *work)
5066 {
5067         struct be_cmd_work *cmd_work =
5068                                 container_of(work, struct be_cmd_work, work);
5069         struct be_adapter *adapter = cmd_work->adapter;
5070         __be16 port = cmd_work->info.vxlan_port;
5071
5072         if (adapter->vxlan_port != port)
5073                 goto done;
5074
5075         if (adapter->vxlan_port_aliases) {
5076                 adapter->vxlan_port_aliases--;
5077                 goto out;
5078         }
5079
5080         be_disable_vxlan_offloads(adapter);
5081
5082         dev_info(&adapter->pdev->dev,
5083                  "Disabled VxLAN offloads for UDP port %d\n",
5084                  be16_to_cpu(port));
5085 done:
5086         adapter->vxlan_port_count--;
5087 out:
5088         kfree(cmd_work);
5089 }
5090
5091 static void be_cfg_vxlan_port(struct net_device *netdev,
5092                               struct udp_tunnel_info *ti,
5093                               void (*func)(struct work_struct *))
5094 {
5095         struct be_adapter *adapter = netdev_priv(netdev);
5096         struct be_cmd_work *cmd_work;
5097
5098         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5099                 return;
5100
5101         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5102                 return;
5103
5104         cmd_work = be_alloc_work(adapter, func);
5105         if (cmd_work) {
5106                 cmd_work->info.vxlan_port = ti->port;
5107                 queue_work(be_wq, &cmd_work->work);
5108         }
5109 }
5110
5111 static void be_del_vxlan_port(struct net_device *netdev,
5112                               struct udp_tunnel_info *ti)
5113 {
5114         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5115 }
5116
5117 static void be_add_vxlan_port(struct net_device *netdev,
5118                               struct udp_tunnel_info *ti)
5119 {
5120         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5121 }
5122
5123 static netdev_features_t be_features_check(struct sk_buff *skb,
5124                                            struct net_device *dev,
5125                                            netdev_features_t features)
5126 {
5127         struct be_adapter *adapter = netdev_priv(dev);
5128         u8 l4_hdr = 0;
5129
5130         /* The code below restricts offload features for some tunneled packets.
5131          * Offload features for normal (non tunnel) packets are unchanged.
5132          */
5133         if (!skb->encapsulation ||
5134             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5135                 return features;
5136
5137         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5138          * should disable tunnel offload features if it's not a VxLAN packet,
5139          * as tunnel offloads have been enabled only for VxLAN. This is done to
5140          * allow other tunneled traffic like GRE work fine while VxLAN
5141          * offloads are configured in Skyhawk-R.
5142          */
5143         switch (vlan_get_protocol(skb)) {
5144         case htons(ETH_P_IP):
5145                 l4_hdr = ip_hdr(skb)->protocol;
5146                 break;
5147         case htons(ETH_P_IPV6):
5148                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5149                 break;
5150         default:
5151                 return features;
5152         }
5153
5154         if (l4_hdr != IPPROTO_UDP ||
5155             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5156             skb->inner_protocol != htons(ETH_P_TEB) ||
5157             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5158                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5159             !adapter->vxlan_port ||
5160             udp_hdr(skb)->dest != adapter->vxlan_port)
5161                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5162
5163         return features;
5164 }
5165
5166 static int be_get_phys_port_id(struct net_device *dev,
5167                                struct netdev_phys_item_id *ppid)
5168 {
5169         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5170         struct be_adapter *adapter = netdev_priv(dev);
5171         u8 *id;
5172
5173         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5174                 return -ENOSPC;
5175
5176         ppid->id[0] = adapter->hba_port_num + 1;
5177         id = &ppid->id[1];
5178         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5179              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5180                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5181
5182         ppid->id_len = id_len;
5183
5184         return 0;
5185 }
5186
5187 static void be_set_rx_mode(struct net_device *dev)
5188 {
5189         struct be_adapter *adapter = netdev_priv(dev);
5190         struct be_cmd_work *work;
5191
5192         work = be_alloc_work(adapter, be_work_set_rx_mode);
5193         if (work)
5194                 queue_work(be_wq, &work->work);
5195 }
5196
5197 static const struct net_device_ops be_netdev_ops = {
5198         .ndo_open               = be_open,
5199         .ndo_stop               = be_close,
5200         .ndo_start_xmit         = be_xmit,
5201         .ndo_set_rx_mode        = be_set_rx_mode,
5202         .ndo_set_mac_address    = be_mac_addr_set,
5203         .ndo_get_stats64        = be_get_stats64,
5204         .ndo_validate_addr      = eth_validate_addr,
5205         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5206         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5207         .ndo_set_vf_mac         = be_set_vf_mac,
5208         .ndo_set_vf_vlan        = be_set_vf_vlan,
5209         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5210         .ndo_get_vf_config      = be_get_vf_config,
5211         .ndo_set_vf_link_state  = be_set_vf_link_state,
5212         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5213 #ifdef CONFIG_NET_POLL_CONTROLLER
5214         .ndo_poll_controller    = be_netpoll,
5215 #endif
5216         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5217         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5218 #ifdef CONFIG_NET_RX_BUSY_POLL
5219         .ndo_busy_poll          = be_busy_poll,
5220 #endif
5221         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5222         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5223         .ndo_features_check     = be_features_check,
5224         .ndo_get_phys_port_id   = be_get_phys_port_id,
5225 };
5226
5227 static void be_netdev_init(struct net_device *netdev)
5228 {
5229         struct be_adapter *adapter = netdev_priv(netdev);
5230
5231         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5232                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5233                 NETIF_F_HW_VLAN_CTAG_TX;
5234         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5235                 netdev->hw_features |= NETIF_F_RXHASH;
5236
5237         netdev->features |= netdev->hw_features |
5238                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5239
5240         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5241                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5242
5243         netdev->priv_flags |= IFF_UNICAST_FLT;
5244
5245         netdev->flags |= IFF_MULTICAST;
5246
5247         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5248
5249         netdev->netdev_ops = &be_netdev_ops;
5250
5251         netdev->ethtool_ops = &be_ethtool_ops;
5252
5253         /* MTU range: 256 - 9000 */
5254         netdev->min_mtu = BE_MIN_MTU;
5255         netdev->max_mtu = BE_MAX_MTU;
5256 }
5257
5258 static void be_cleanup(struct be_adapter *adapter)
5259 {
5260         struct net_device *netdev = adapter->netdev;
5261
5262         rtnl_lock();
5263         netif_device_detach(netdev);
5264         if (netif_running(netdev))
5265                 be_close(netdev);
5266         rtnl_unlock();
5267
5268         be_clear(adapter);
5269 }
5270
5271 static int be_resume(struct be_adapter *adapter)
5272 {
5273         struct net_device *netdev = adapter->netdev;
5274         int status;
5275
5276         status = be_setup(adapter);
5277         if (status)
5278                 return status;
5279
5280         rtnl_lock();
5281         if (netif_running(netdev))
5282                 status = be_open(netdev);
5283         rtnl_unlock();
5284
5285         if (status)
5286                 return status;
5287
5288         netif_device_attach(netdev);
5289
5290         return 0;
5291 }
5292
5293 static void be_soft_reset(struct be_adapter *adapter)
5294 {
5295         u32 val;
5296
5297         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5298         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5299         val |= SLIPORT_SOFTRESET_SR_MASK;
5300         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5301 }
5302
5303 static bool be_err_is_recoverable(struct be_adapter *adapter)
5304 {
5305         struct be_error_recovery *err_rec = &adapter->error_recovery;
5306         unsigned long initial_idle_time =
5307                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5308         unsigned long recovery_interval =
5309                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5310         u16 ue_err_code;
5311         u32 val;
5312
5313         val = be_POST_stage_get(adapter);
5314         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5315                 return false;
5316         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5317         if (ue_err_code == 0)
5318                 return false;
5319
5320         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5321                 ue_err_code);
5322
5323         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5324                 dev_err(&adapter->pdev->dev,
5325                         "Cannot recover within %lu sec from driver load\n",
5326                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5327                 return false;
5328         }
5329
5330         if (err_rec->last_recovery_time &&
5331             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5332                 dev_err(&adapter->pdev->dev,
5333                         "Cannot recover within %lu sec from last recovery\n",
5334                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5335                 return false;
5336         }
5337
5338         if (ue_err_code == err_rec->last_err_code) {
5339                 dev_err(&adapter->pdev->dev,
5340                         "Cannot recover from a consecutive TPE error\n");
5341                 return false;
5342         }
5343
5344         err_rec->last_recovery_time = jiffies;
5345         err_rec->last_err_code = ue_err_code;
5346         return true;
5347 }
5348
5349 static int be_tpe_recover(struct be_adapter *adapter)
5350 {
5351         struct be_error_recovery *err_rec = &adapter->error_recovery;
5352         int status = -EAGAIN;
5353         u32 val;
5354
5355         switch (err_rec->recovery_state) {
5356         case ERR_RECOVERY_ST_NONE:
5357                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5358                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5359                 break;
5360
5361         case ERR_RECOVERY_ST_DETECT:
5362                 val = be_POST_stage_get(adapter);
5363                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5364                     POST_STAGE_RECOVERABLE_ERR) {
5365                         dev_err(&adapter->pdev->dev,
5366                                 "Unrecoverable HW error detected: 0x%x\n", val);
5367                         status = -EINVAL;
5368                         err_rec->resched_delay = 0;
5369                         break;
5370                 }
5371
5372                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5373
5374                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5375                  * milliseconds before it checks for final error status in
5376                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5377                  * If it does, then PF0 initiates a Soft Reset.
5378                  */
5379                 if (adapter->pf_num == 0) {
5380                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5381                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5382                                         ERR_RECOVERY_UE_DETECT_DURATION;
5383                         break;
5384                 }
5385
5386                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5387                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5388                                         ERR_RECOVERY_UE_DETECT_DURATION;
5389                 break;
5390
5391         case ERR_RECOVERY_ST_RESET:
5392                 if (!be_err_is_recoverable(adapter)) {
5393                         dev_err(&adapter->pdev->dev,
5394                                 "Failed to meet recovery criteria\n");
5395                         status = -EIO;
5396                         err_rec->resched_delay = 0;
5397                         break;
5398                 }
5399                 be_soft_reset(adapter);
5400                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5401                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5402                                         err_rec->ue_to_reset_time;
5403                 break;
5404
5405         case ERR_RECOVERY_ST_PRE_POLL:
5406                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5407                 err_rec->resched_delay = 0;
5408                 status = 0;                     /* done */
5409                 break;
5410
5411         default:
5412                 status = -EINVAL;
5413                 err_rec->resched_delay = 0;
5414                 break;
5415         }
5416
5417         return status;
5418 }
5419
5420 static int be_err_recover(struct be_adapter *adapter)
5421 {
5422         int status;
5423
5424         if (!lancer_chip(adapter)) {
5425                 if (!adapter->error_recovery.recovery_supported ||
5426                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5427                         return -EIO;
5428                 status = be_tpe_recover(adapter);
5429                 if (status)
5430                         goto err;
5431         }
5432
5433         /* Wait for adapter to reach quiescent state before
5434          * destroying queues
5435          */
5436         status = be_fw_wait_ready(adapter);
5437         if (status)
5438                 goto err;
5439
5440         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5441
5442         be_cleanup(adapter);
5443
5444         status = be_resume(adapter);
5445         if (status)
5446                 goto err;
5447
5448         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5449
5450 err:
5451         return status;
5452 }
5453
5454 static void be_err_detection_task(struct work_struct *work)
5455 {
5456         struct be_error_recovery *err_rec =
5457                         container_of(work, struct be_error_recovery,
5458                                      err_detection_work.work);
5459         struct be_adapter *adapter =
5460                         container_of(err_rec, struct be_adapter,
5461                                      error_recovery);
5462         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5463         struct device *dev = &adapter->pdev->dev;
5464         int recovery_status;
5465
5466         be_detect_error(adapter);
5467         if (!be_check_error(adapter, BE_ERROR_HW))
5468                 goto reschedule_task;
5469
5470         recovery_status = be_err_recover(adapter);
5471         if (!recovery_status) {
5472                 err_rec->recovery_retries = 0;
5473                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5474                 dev_info(dev, "Adapter recovery successful\n");
5475                 goto reschedule_task;
5476         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5477                 /* BEx/SH recovery state machine */
5478                 if (adapter->pf_num == 0 &&
5479                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5480                         dev_err(&adapter->pdev->dev,
5481                                 "Adapter recovery in progress\n");
5482                 resched_delay = err_rec->resched_delay;
5483                 goto reschedule_task;
5484         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5485                 /* For VFs, check if PF have allocated resources
5486                  * every second.
5487                  */
5488                 dev_err(dev, "Re-trying adapter recovery\n");
5489                 goto reschedule_task;
5490         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5491                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5492                 /* In case of another error during recovery, it takes 30 sec
5493                  * for adapter to come out of error. Retry error recovery after
5494                  * this time interval.
5495                  */
5496                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5497                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5498                 goto reschedule_task;
5499         } else {
5500                 dev_err(dev, "Adapter recovery failed\n");
5501                 dev_err(dev, "Please reboot server to recover\n");
5502         }
5503
5504         return;
5505
5506 reschedule_task:
5507         be_schedule_err_detection(adapter, resched_delay);
5508 }
5509
5510 static void be_log_sfp_info(struct be_adapter *adapter)
5511 {
5512         int status;
5513
5514         status = be_cmd_query_sfp_info(adapter);
5515         if (!status) {
5516                 dev_err(&adapter->pdev->dev,
5517                         "Port %c: %s Vendor: %s part no: %s",
5518                         adapter->port_name,
5519                         be_misconfig_evt_port_state[adapter->phy_state],
5520                         adapter->phy.vendor_name,
5521                         adapter->phy.vendor_pn);
5522         }
5523         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5524 }
5525
5526 static void be_worker(struct work_struct *work)
5527 {
5528         struct be_adapter *adapter =
5529                 container_of(work, struct be_adapter, work.work);
5530         struct be_rx_obj *rxo;
5531         int i;
5532
5533         if (be_physfn(adapter) &&
5534             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5535                 be_cmd_get_die_temperature(adapter);
5536
5537         /* when interrupts are not yet enabled, just reap any pending
5538          * mcc completions
5539          */
5540         if (!netif_running(adapter->netdev)) {
5541                 local_bh_disable();
5542                 be_process_mcc(adapter);
5543                 local_bh_enable();
5544                 goto reschedule;
5545         }
5546
5547         if (!adapter->stats_cmd_sent) {
5548                 if (lancer_chip(adapter))
5549                         lancer_cmd_get_pport_stats(adapter,
5550                                                    &adapter->stats_cmd);
5551                 else
5552                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5553         }
5554
5555         for_all_rx_queues(adapter, rxo, i) {
5556                 /* Replenish RX-queues starved due to memory
5557                  * allocation failures.
5558                  */
5559                 if (rxo->rx_post_starved)
5560                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5561         }
5562
5563         /* EQ-delay update for Skyhawk is done while notifying EQ */
5564         if (!skyhawk_chip(adapter))
5565                 be_eqd_update(adapter, false);
5566
5567         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5568                 be_log_sfp_info(adapter);
5569
5570 reschedule:
5571         adapter->work_counter++;
5572         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5573 }
5574
5575 static void be_unmap_pci_bars(struct be_adapter *adapter)
5576 {
5577         if (adapter->csr)
5578                 pci_iounmap(adapter->pdev, adapter->csr);
5579         if (adapter->db)
5580                 pci_iounmap(adapter->pdev, adapter->db);
5581         if (adapter->pcicfg && adapter->pcicfg_mapped)
5582                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5583 }
5584
5585 static int db_bar(struct be_adapter *adapter)
5586 {
5587         if (lancer_chip(adapter) || be_virtfn(adapter))
5588                 return 0;
5589         else
5590                 return 4;
5591 }
5592
5593 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5594 {
5595         if (skyhawk_chip(adapter)) {
5596                 adapter->roce_db.size = 4096;
5597                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5598                                                               db_bar(adapter));
5599                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5600                                                                db_bar(adapter));
5601         }
5602         return 0;
5603 }
5604
5605 static int be_map_pci_bars(struct be_adapter *adapter)
5606 {
5607         struct pci_dev *pdev = adapter->pdev;
5608         u8 __iomem *addr;
5609         u32 sli_intf;
5610
5611         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5612         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5613                                 SLI_INTF_FAMILY_SHIFT;
5614         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5615
5616         if (BEx_chip(adapter) && be_physfn(adapter)) {
5617                 adapter->csr = pci_iomap(pdev, 2, 0);
5618                 if (!adapter->csr)
5619                         return -ENOMEM;
5620         }
5621
5622         addr = pci_iomap(pdev, db_bar(adapter), 0);
5623         if (!addr)
5624                 goto pci_map_err;
5625         adapter->db = addr;
5626
5627         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5628                 if (be_physfn(adapter)) {
5629                         /* PCICFG is the 2nd BAR in BE2 */
5630                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5631                         if (!addr)
5632                                 goto pci_map_err;
5633                         adapter->pcicfg = addr;
5634                         adapter->pcicfg_mapped = true;
5635                 } else {
5636                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5637                         adapter->pcicfg_mapped = false;
5638                 }
5639         }
5640
5641         be_roce_map_pci_bars(adapter);
5642         return 0;
5643
5644 pci_map_err:
5645         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5646         be_unmap_pci_bars(adapter);
5647         return -ENOMEM;
5648 }
5649
5650 static void be_drv_cleanup(struct be_adapter *adapter)
5651 {
5652         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5653         struct device *dev = &adapter->pdev->dev;
5654
5655         if (mem->va)
5656                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5657
5658         mem = &adapter->rx_filter;
5659         if (mem->va)
5660                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5661
5662         mem = &adapter->stats_cmd;
5663         if (mem->va)
5664                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5665 }
5666
5667 /* Allocate and initialize various fields in be_adapter struct */
5668 static int be_drv_init(struct be_adapter *adapter)
5669 {
5670         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5671         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5672         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5673         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5674         struct device *dev = &adapter->pdev->dev;
5675         int status = 0;
5676
5677         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5678         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5679                                                  &mbox_mem_alloc->dma,
5680                                                  GFP_KERNEL);
5681         if (!mbox_mem_alloc->va)
5682                 return -ENOMEM;
5683
5684         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5685         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5686         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5687
5688         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5689         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5690                                             &rx_filter->dma, GFP_KERNEL);
5691         if (!rx_filter->va) {
5692                 status = -ENOMEM;
5693                 goto free_mbox;
5694         }
5695
5696         if (lancer_chip(adapter))
5697                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5698         else if (BE2_chip(adapter))
5699                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5700         else if (BE3_chip(adapter))
5701                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5702         else
5703                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5704         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5705                                             &stats_cmd->dma, GFP_KERNEL);
5706         if (!stats_cmd->va) {
5707                 status = -ENOMEM;
5708                 goto free_rx_filter;
5709         }
5710
5711         mutex_init(&adapter->mbox_lock);
5712         mutex_init(&adapter->mcc_lock);
5713         mutex_init(&adapter->rx_filter_lock);
5714         spin_lock_init(&adapter->mcc_cq_lock);
5715         init_completion(&adapter->et_cmd_compl);
5716
5717         pci_save_state(adapter->pdev);
5718
5719         INIT_DELAYED_WORK(&adapter->work, be_worker);
5720
5721         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5722         adapter->error_recovery.resched_delay = 0;
5723         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5724                           be_err_detection_task);
5725
5726         adapter->rx_fc = true;
5727         adapter->tx_fc = true;
5728
5729         /* Must be a power of 2 or else MODULO will BUG_ON */
5730         adapter->be_get_temp_freq = 64;
5731
5732         return 0;
5733
5734 free_rx_filter:
5735         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5736 free_mbox:
5737         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5738                           mbox_mem_alloc->dma);
5739         return status;
5740 }
5741
5742 static void be_remove(struct pci_dev *pdev)
5743 {
5744         struct be_adapter *adapter = pci_get_drvdata(pdev);
5745
5746         if (!adapter)
5747                 return;
5748
5749         be_roce_dev_remove(adapter);
5750         be_intr_set(adapter, false);
5751
5752         be_cancel_err_detection(adapter);
5753
5754         unregister_netdev(adapter->netdev);
5755
5756         be_clear(adapter);
5757
5758         if (!pci_vfs_assigned(adapter->pdev))
5759                 be_cmd_reset_function(adapter);
5760
5761         /* tell fw we're done with firing cmds */
5762         be_cmd_fw_clean(adapter);
5763
5764         be_unmap_pci_bars(adapter);
5765         be_drv_cleanup(adapter);
5766
5767         pci_disable_pcie_error_reporting(pdev);
5768
5769         pci_release_regions(pdev);
5770         pci_disable_device(pdev);
5771
5772         free_netdev(adapter->netdev);
5773 }
5774
5775 static ssize_t be_hwmon_show_temp(struct device *dev,
5776                                   struct device_attribute *dev_attr,
5777                                   char *buf)
5778 {
5779         struct be_adapter *adapter = dev_get_drvdata(dev);
5780
5781         /* Unit: millidegree Celsius */
5782         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5783                 return -EIO;
5784         else
5785                 return sprintf(buf, "%u\n",
5786                                adapter->hwmon_info.be_on_die_temp * 1000);
5787 }
5788
5789 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5790                           be_hwmon_show_temp, NULL, 1);
5791
5792 static struct attribute *be_hwmon_attrs[] = {
5793         &sensor_dev_attr_temp1_input.dev_attr.attr,
5794         NULL
5795 };
5796
5797 ATTRIBUTE_GROUPS(be_hwmon);
5798
5799 static char *mc_name(struct be_adapter *adapter)
5800 {
5801         char *str = ""; /* default */
5802
5803         switch (adapter->mc_type) {
5804         case UMC:
5805                 str = "UMC";
5806                 break;
5807         case FLEX10:
5808                 str = "FLEX10";
5809                 break;
5810         case vNIC1:
5811                 str = "vNIC-1";
5812                 break;
5813         case nPAR:
5814                 str = "nPAR";
5815                 break;
5816         case UFP:
5817                 str = "UFP";
5818                 break;
5819         case vNIC2:
5820                 str = "vNIC-2";
5821                 break;
5822         default:
5823                 str = "";
5824         }
5825
5826         return str;
5827 }
5828
5829 static inline char *func_name(struct be_adapter *adapter)
5830 {
5831         return be_physfn(adapter) ? "PF" : "VF";
5832 }
5833
5834 static inline char *nic_name(struct pci_dev *pdev)
5835 {
5836         switch (pdev->device) {
5837         case OC_DEVICE_ID1:
5838                 return OC_NAME;
5839         case OC_DEVICE_ID2:
5840                 return OC_NAME_BE;
5841         case OC_DEVICE_ID3:
5842         case OC_DEVICE_ID4:
5843                 return OC_NAME_LANCER;
5844         case BE_DEVICE_ID2:
5845                 return BE3_NAME;
5846         case OC_DEVICE_ID5:
5847         case OC_DEVICE_ID6:
5848                 return OC_NAME_SH;
5849         default:
5850                 return BE_NAME;
5851         }
5852 }
5853
5854 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5855 {
5856         struct be_adapter *adapter;
5857         struct net_device *netdev;
5858         int status = 0;
5859
5860         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5861
5862         status = pci_enable_device(pdev);
5863         if (status)
5864                 goto do_none;
5865
5866         status = pci_request_regions(pdev, DRV_NAME);
5867         if (status)
5868                 goto disable_dev;
5869         pci_set_master(pdev);
5870
5871         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5872         if (!netdev) {
5873                 status = -ENOMEM;
5874                 goto rel_reg;
5875         }
5876         adapter = netdev_priv(netdev);
5877         adapter->pdev = pdev;
5878         pci_set_drvdata(pdev, adapter);
5879         adapter->netdev = netdev;
5880         SET_NETDEV_DEV(netdev, &pdev->dev);
5881
5882         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5883         if (!status) {
5884                 netdev->features |= NETIF_F_HIGHDMA;
5885         } else {
5886                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5887                 if (status) {
5888                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5889                         goto free_netdev;
5890                 }
5891         }
5892
5893         status = pci_enable_pcie_error_reporting(pdev);
5894         if (!status)
5895                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5896
5897         status = be_map_pci_bars(adapter);
5898         if (status)
5899                 goto free_netdev;
5900
5901         status = be_drv_init(adapter);
5902         if (status)
5903                 goto unmap_bars;
5904
5905         status = be_setup(adapter);
5906         if (status)
5907                 goto drv_cleanup;
5908
5909         be_netdev_init(netdev);
5910         status = register_netdev(netdev);
5911         if (status != 0)
5912                 goto unsetup;
5913
5914         be_roce_dev_add(adapter);
5915
5916         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5917         adapter->error_recovery.probe_time = jiffies;
5918
5919         /* On Die temperature not supported for VF. */
5920         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5921                 adapter->hwmon_info.hwmon_dev =
5922                         devm_hwmon_device_register_with_groups(&pdev->dev,
5923                                                                DRV_NAME,
5924                                                                adapter,
5925                                                                be_hwmon_groups);
5926                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5927         }
5928
5929         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5930                  func_name(adapter), mc_name(adapter), adapter->port_name);
5931
5932         return 0;
5933
5934 unsetup:
5935         be_clear(adapter);
5936 drv_cleanup:
5937         be_drv_cleanup(adapter);
5938 unmap_bars:
5939         be_unmap_pci_bars(adapter);
5940 free_netdev:
5941         free_netdev(netdev);
5942 rel_reg:
5943         pci_release_regions(pdev);
5944 disable_dev:
5945         pci_disable_device(pdev);
5946 do_none:
5947         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5948         return status;
5949 }
5950
5951 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5952 {
5953         struct be_adapter *adapter = pci_get_drvdata(pdev);
5954
5955         be_intr_set(adapter, false);
5956         be_cancel_err_detection(adapter);
5957
5958         be_cleanup(adapter);
5959
5960         pci_save_state(pdev);
5961         pci_disable_device(pdev);
5962         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5963         return 0;
5964 }
5965
5966 static int be_pci_resume(struct pci_dev *pdev)
5967 {
5968         struct be_adapter *adapter = pci_get_drvdata(pdev);
5969         int status = 0;
5970
5971         status = pci_enable_device(pdev);
5972         if (status)
5973                 return status;
5974
5975         pci_restore_state(pdev);
5976
5977         status = be_resume(adapter);
5978         if (status)
5979                 return status;
5980
5981         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5982
5983         return 0;
5984 }
5985
5986 /*
5987  * An FLR will stop BE from DMAing any data.
5988  */
5989 static void be_shutdown(struct pci_dev *pdev)
5990 {
5991         struct be_adapter *adapter = pci_get_drvdata(pdev);
5992
5993         if (!adapter)
5994                 return;
5995
5996         be_roce_dev_shutdown(adapter);
5997         cancel_delayed_work_sync(&adapter->work);
5998         be_cancel_err_detection(adapter);
5999
6000         netif_device_detach(adapter->netdev);
6001
6002         be_cmd_reset_function(adapter);
6003
6004         pci_disable_device(pdev);
6005 }
6006
6007 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6008                                             pci_channel_state_t state)
6009 {
6010         struct be_adapter *adapter = pci_get_drvdata(pdev);
6011
6012         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6013
6014         be_roce_dev_remove(adapter);
6015
6016         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6017                 be_set_error(adapter, BE_ERROR_EEH);
6018
6019                 be_cancel_err_detection(adapter);
6020
6021                 be_cleanup(adapter);
6022         }
6023
6024         if (state == pci_channel_io_perm_failure)
6025                 return PCI_ERS_RESULT_DISCONNECT;
6026
6027         pci_disable_device(pdev);
6028
6029         /* The error could cause the FW to trigger a flash debug dump.
6030          * Resetting the card while flash dump is in progress
6031          * can cause it not to recover; wait for it to finish.
6032          * Wait only for first function as it is needed only once per
6033          * adapter.
6034          */
6035         if (pdev->devfn == 0)
6036                 ssleep(30);
6037
6038         return PCI_ERS_RESULT_NEED_RESET;
6039 }
6040
6041 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6042 {
6043         struct be_adapter *adapter = pci_get_drvdata(pdev);
6044         int status;
6045
6046         dev_info(&adapter->pdev->dev, "EEH reset\n");
6047
6048         status = pci_enable_device(pdev);
6049         if (status)
6050                 return PCI_ERS_RESULT_DISCONNECT;
6051
6052         pci_set_master(pdev);
6053         pci_restore_state(pdev);
6054
6055         /* Check if card is ok and fw is ready */
6056         dev_info(&adapter->pdev->dev,
6057                  "Waiting for FW to be ready after EEH reset\n");
6058         status = be_fw_wait_ready(adapter);
6059         if (status)
6060                 return PCI_ERS_RESULT_DISCONNECT;
6061
6062         pci_cleanup_aer_uncorrect_error_status(pdev);
6063         be_clear_error(adapter, BE_CLEAR_ALL);
6064         return PCI_ERS_RESULT_RECOVERED;
6065 }
6066
6067 static void be_eeh_resume(struct pci_dev *pdev)
6068 {
6069         int status = 0;
6070         struct be_adapter *adapter = pci_get_drvdata(pdev);
6071
6072         dev_info(&adapter->pdev->dev, "EEH resume\n");
6073
6074         pci_save_state(pdev);
6075
6076         status = be_resume(adapter);
6077         if (status)
6078                 goto err;
6079
6080         be_roce_dev_add(adapter);
6081
6082         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6083         return;
6084 err:
6085         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6086 }
6087
6088 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6089 {
6090         struct be_adapter *adapter = pci_get_drvdata(pdev);
6091         struct be_resources vft_res = {0};
6092         int status;
6093
6094         if (!num_vfs)
6095                 be_vf_clear(adapter);
6096
6097         adapter->num_vfs = num_vfs;
6098
6099         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6100                 dev_warn(&pdev->dev,
6101                          "Cannot disable VFs while they are assigned\n");
6102                 return -EBUSY;
6103         }
6104
6105         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6106          * are equally distributed across the max-number of VFs. The user may
6107          * request only a subset of the max-vfs to be enabled.
6108          * Based on num_vfs, redistribute the resources across num_vfs so that
6109          * each VF will have access to more number of resources.
6110          * This facility is not available in BE3 FW.
6111          * Also, this is done by FW in Lancer chip.
6112          */
6113         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6114                 be_calculate_vf_res(adapter, adapter->num_vfs,
6115                                     &vft_res);
6116                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6117                                                  adapter->num_vfs, &vft_res);
6118                 if (status)
6119                         dev_err(&pdev->dev,
6120                                 "Failed to optimize SR-IOV resources\n");
6121         }
6122
6123         status = be_get_resources(adapter);
6124         if (status)
6125                 return be_cmd_status(status);
6126
6127         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6128         rtnl_lock();
6129         status = be_update_queues(adapter);
6130         rtnl_unlock();
6131         if (status)
6132                 return be_cmd_status(status);
6133
6134         if (adapter->num_vfs)
6135                 status = be_vf_setup(adapter);
6136
6137         if (!status)
6138                 return adapter->num_vfs;
6139
6140         return 0;
6141 }
6142
6143 static const struct pci_error_handlers be_eeh_handlers = {
6144         .error_detected = be_eeh_err_detected,
6145         .slot_reset = be_eeh_reset,
6146         .resume = be_eeh_resume,
6147 };
6148
6149 static struct pci_driver be_driver = {
6150         .name = DRV_NAME,
6151         .id_table = be_dev_ids,
6152         .probe = be_probe,
6153         .remove = be_remove,
6154         .suspend = be_suspend,
6155         .resume = be_pci_resume,
6156         .shutdown = be_shutdown,
6157         .sriov_configure = be_pci_sriov_configure,
6158         .err_handler = &be_eeh_handlers
6159 };
6160
6161 static int __init be_init_module(void)
6162 {
6163         int status;
6164
6165         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6166             rx_frag_size != 2048) {
6167                 printk(KERN_WARNING DRV_NAME
6168                         " : Module param rx_frag_size must be 2048/4096/8192."
6169                         " Using 2048\n");
6170                 rx_frag_size = 2048;
6171         }
6172
6173         if (num_vfs > 0) {
6174                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6175                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6176         }
6177
6178         be_wq = create_singlethread_workqueue("be_wq");
6179         if (!be_wq) {
6180                 pr_warn(DRV_NAME "workqueue creation failed\n");
6181                 return -1;
6182         }
6183
6184         be_err_recovery_workq =
6185                 create_singlethread_workqueue("be_err_recover");
6186         if (!be_err_recovery_workq)
6187                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6188
6189         status = pci_register_driver(&be_driver);
6190         if (status) {
6191                 destroy_workqueue(be_wq);
6192                 be_destroy_err_recovery_workq();
6193         }
6194         return status;
6195 }
6196 module_init(be_init_module);
6197
6198 static void __exit be_exit_module(void)
6199 {
6200         pci_unregister_driver(&be_driver);
6201
6202         be_destroy_err_recovery_workq();
6203
6204         if (be_wq)
6205                 destroy_workqueue(be_wq);
6206 }
6207 module_exit(be_exit_module);