]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* if device is not running, copy MAC to netdev->dev_addr */
322         if (!netif_running(netdev))
323                 goto done;
324
325         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
326          * privilege or if PF did not provision the new MAC address.
327          * On BE3, this cmd will always fail if the VF doesn't have the
328          * FILTMGMT privilege. This failure is OK, only if the PF programmed
329          * the MAC for the VF.
330          */
331         mutex_lock(&adapter->rx_filter_lock);
332         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
333         if (!status) {
334
335                 /* Delete the old programmed MAC. This call may fail if the
336                  * old MAC was already deleted by the PF driver.
337                  */
338                 if (adapter->pmac_id[0] != old_pmac_id)
339                         be_dev_mac_del(adapter, old_pmac_id);
340         }
341
342         mutex_unlock(&adapter->rx_filter_lock);
343         /* Decide if the new MAC is successfully activated only after
344          * querying the FW
345          */
346         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
347                                        adapter->if_handle, true, 0);
348         if (status)
349                 goto err;
350
351         /* The MAC change did not happen, either due to lack of privilege
352          * or PF didn't pre-provision.
353          */
354         if (!ether_addr_equal(addr->sa_data, mac)) {
355                 status = -EPERM;
356                 goto err;
357         }
358 done:
359         ether_addr_copy(adapter->dev_mac, addr->sa_data);
360         ether_addr_copy(netdev->dev_addr, addr->sa_data);
361         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
362         return 0;
363 err:
364         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
365         return status;
366 }
367
368 /* BE2 supports only v0 cmd */
369 static void *hw_stats_from_cmd(struct be_adapter *adapter)
370 {
371         if (BE2_chip(adapter)) {
372                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
373
374                 return &cmd->hw_stats;
375         } else if (BE3_chip(adapter)) {
376                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
377
378                 return &cmd->hw_stats;
379         } else {
380                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
381
382                 return &cmd->hw_stats;
383         }
384 }
385
386 /* BE2 supports only v0 cmd */
387 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
388 {
389         if (BE2_chip(adapter)) {
390                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
391
392                 return &hw_stats->erx;
393         } else if (BE3_chip(adapter)) {
394                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
395
396                 return &hw_stats->erx;
397         } else {
398                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
399
400                 return &hw_stats->erx;
401         }
402 }
403
404 static void populate_be_v0_stats(struct be_adapter *adapter)
405 {
406         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
407         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
408         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
409         struct be_port_rxf_stats_v0 *port_stats =
410                                         &rxf_stats->port[adapter->port_num];
411         struct be_drv_stats *drvs = &adapter->drv_stats;
412
413         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
414         drvs->rx_pause_frames = port_stats->rx_pause_frames;
415         drvs->rx_crc_errors = port_stats->rx_crc_errors;
416         drvs->rx_control_frames = port_stats->rx_control_frames;
417         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
418         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
419         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
420         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
421         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
422         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
423         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
424         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
425         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
426         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
427         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
428         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
429         drvs->rx_dropped_header_too_small =
430                 port_stats->rx_dropped_header_too_small;
431         drvs->rx_address_filtered =
432                                         port_stats->rx_address_filtered +
433                                         port_stats->rx_vlan_filtered;
434         drvs->rx_alignment_symbol_errors =
435                 port_stats->rx_alignment_symbol_errors;
436
437         drvs->tx_pauseframes = port_stats->tx_pauseframes;
438         drvs->tx_controlframes = port_stats->tx_controlframes;
439
440         if (adapter->port_num)
441                 drvs->jabber_events = rxf_stats->port1_jabber_events;
442         else
443                 drvs->jabber_events = rxf_stats->port0_jabber_events;
444         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
445         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
446         drvs->forwarded_packets = rxf_stats->forwarded_packets;
447         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
448         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
449         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
450         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
451 }
452
453 static void populate_be_v1_stats(struct be_adapter *adapter)
454 {
455         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
456         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
457         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
458         struct be_port_rxf_stats_v1 *port_stats =
459                                         &rxf_stats->port[adapter->port_num];
460         struct be_drv_stats *drvs = &adapter->drv_stats;
461
462         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
463         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
464         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
465         drvs->rx_pause_frames = port_stats->rx_pause_frames;
466         drvs->rx_crc_errors = port_stats->rx_crc_errors;
467         drvs->rx_control_frames = port_stats->rx_control_frames;
468         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
469         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
470         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
471         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
472         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
473         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
474         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
475         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
476         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
477         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
478         drvs->rx_dropped_header_too_small =
479                 port_stats->rx_dropped_header_too_small;
480         drvs->rx_input_fifo_overflow_drop =
481                 port_stats->rx_input_fifo_overflow_drop;
482         drvs->rx_address_filtered = port_stats->rx_address_filtered;
483         drvs->rx_alignment_symbol_errors =
484                 port_stats->rx_alignment_symbol_errors;
485         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
486         drvs->tx_pauseframes = port_stats->tx_pauseframes;
487         drvs->tx_controlframes = port_stats->tx_controlframes;
488         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
489         drvs->jabber_events = port_stats->jabber_events;
490         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
491         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
492         drvs->forwarded_packets = rxf_stats->forwarded_packets;
493         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
494         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
495         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
496         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
497 }
498
499 static void populate_be_v2_stats(struct be_adapter *adapter)
500 {
501         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
502         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
503         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
504         struct be_port_rxf_stats_v2 *port_stats =
505                                         &rxf_stats->port[adapter->port_num];
506         struct be_drv_stats *drvs = &adapter->drv_stats;
507
508         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
509         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
510         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
511         drvs->rx_pause_frames = port_stats->rx_pause_frames;
512         drvs->rx_crc_errors = port_stats->rx_crc_errors;
513         drvs->rx_control_frames = port_stats->rx_control_frames;
514         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
515         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
516         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
517         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
518         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
519         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
520         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
521         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
522         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
523         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
524         drvs->rx_dropped_header_too_small =
525                 port_stats->rx_dropped_header_too_small;
526         drvs->rx_input_fifo_overflow_drop =
527                 port_stats->rx_input_fifo_overflow_drop;
528         drvs->rx_address_filtered = port_stats->rx_address_filtered;
529         drvs->rx_alignment_symbol_errors =
530                 port_stats->rx_alignment_symbol_errors;
531         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
532         drvs->tx_pauseframes = port_stats->tx_pauseframes;
533         drvs->tx_controlframes = port_stats->tx_controlframes;
534         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
535         drvs->jabber_events = port_stats->jabber_events;
536         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
537         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
538         drvs->forwarded_packets = rxf_stats->forwarded_packets;
539         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
540         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
541         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
542         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
543         if (be_roce_supported(adapter)) {
544                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
545                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
546                 drvs->rx_roce_frames = port_stats->roce_frames_received;
547                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
548                 drvs->roce_drops_payload_len =
549                         port_stats->roce_drops_payload_len;
550         }
551 }
552
553 static void populate_lancer_stats(struct be_adapter *adapter)
554 {
555         struct be_drv_stats *drvs = &adapter->drv_stats;
556         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
557
558         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
559         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
560         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
561         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
562         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
563         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
564         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
565         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
566         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
567         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
568         drvs->rx_dropped_tcp_length =
569                                 pport_stats->rx_dropped_invalid_tcp_length;
570         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
571         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
572         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
573         drvs->rx_dropped_header_too_small =
574                                 pport_stats->rx_dropped_header_too_small;
575         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
576         drvs->rx_address_filtered =
577                                         pport_stats->rx_address_filtered +
578                                         pport_stats->rx_vlan_filtered;
579         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
580         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
581         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
582         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
583         drvs->jabber_events = pport_stats->rx_jabbers;
584         drvs->forwarded_packets = pport_stats->num_forwards_lo;
585         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
586         drvs->rx_drops_too_many_frags =
587                                 pport_stats->rx_drops_too_many_frags_lo;
588 }
589
590 static void accumulate_16bit_val(u32 *acc, u16 val)
591 {
592 #define lo(x)                   (x & 0xFFFF)
593 #define hi(x)                   (x & 0xFFFF0000)
594         bool wrapped = val < lo(*acc);
595         u32 newacc = hi(*acc) + val;
596
597         if (wrapped)
598                 newacc += 65536;
599         ACCESS_ONCE(*acc) = newacc;
600 }
601
602 static void populate_erx_stats(struct be_adapter *adapter,
603                                struct be_rx_obj *rxo, u32 erx_stat)
604 {
605         if (!BEx_chip(adapter))
606                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
607         else
608                 /* below erx HW counter can actually wrap around after
609                  * 65535. Driver accumulates a 32-bit value
610                  */
611                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
612                                      (u16)erx_stat);
613 }
614
615 void be_parse_stats(struct be_adapter *adapter)
616 {
617         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
618         struct be_rx_obj *rxo;
619         int i;
620         u32 erx_stat;
621
622         if (lancer_chip(adapter)) {
623                 populate_lancer_stats(adapter);
624         } else {
625                 if (BE2_chip(adapter))
626                         populate_be_v0_stats(adapter);
627                 else if (BE3_chip(adapter))
628                         /* for BE3 */
629                         populate_be_v1_stats(adapter);
630                 else
631                         populate_be_v2_stats(adapter);
632
633                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
634                 for_all_rx_queues(adapter, rxo, i) {
635                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
636                         populate_erx_stats(adapter, rxo, erx_stat);
637                 }
638         }
639 }
640
641 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
642                                                 struct rtnl_link_stats64 *stats)
643 {
644         struct be_adapter *adapter = netdev_priv(netdev);
645         struct be_drv_stats *drvs = &adapter->drv_stats;
646         struct be_rx_obj *rxo;
647         struct be_tx_obj *txo;
648         u64 pkts, bytes;
649         unsigned int start;
650         int i;
651
652         for_all_rx_queues(adapter, rxo, i) {
653                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
654
655                 do {
656                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
657                         pkts = rx_stats(rxo)->rx_pkts;
658                         bytes = rx_stats(rxo)->rx_bytes;
659                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
660                 stats->rx_packets += pkts;
661                 stats->rx_bytes += bytes;
662                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
663                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
664                                         rx_stats(rxo)->rx_drops_no_frags;
665         }
666
667         for_all_tx_queues(adapter, txo, i) {
668                 const struct be_tx_stats *tx_stats = tx_stats(txo);
669
670                 do {
671                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
672                         pkts = tx_stats(txo)->tx_pkts;
673                         bytes = tx_stats(txo)->tx_bytes;
674                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
675                 stats->tx_packets += pkts;
676                 stats->tx_bytes += bytes;
677         }
678
679         /* bad pkts received */
680         stats->rx_errors = drvs->rx_crc_errors +
681                 drvs->rx_alignment_symbol_errors +
682                 drvs->rx_in_range_errors +
683                 drvs->rx_out_range_errors +
684                 drvs->rx_frame_too_long +
685                 drvs->rx_dropped_too_small +
686                 drvs->rx_dropped_too_short +
687                 drvs->rx_dropped_header_too_small +
688                 drvs->rx_dropped_tcp_length +
689                 drvs->rx_dropped_runt;
690
691         /* detailed rx errors */
692         stats->rx_length_errors = drvs->rx_in_range_errors +
693                 drvs->rx_out_range_errors +
694                 drvs->rx_frame_too_long;
695
696         stats->rx_crc_errors = drvs->rx_crc_errors;
697
698         /* frame alignment errors */
699         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
700
701         /* receiver fifo overrun */
702         /* drops_no_pbuf is no per i/f, it's per BE card */
703         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
704                                 drvs->rx_input_fifo_overflow_drop +
705                                 drvs->rx_drops_no_pbuf;
706         return stats;
707 }
708
709 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
710 {
711         struct net_device *netdev = adapter->netdev;
712
713         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
714                 netif_carrier_off(netdev);
715                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
716         }
717
718         if (link_status)
719                 netif_carrier_on(netdev);
720         else
721                 netif_carrier_off(netdev);
722
723         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
724 }
725
726 static int be_gso_hdr_len(struct sk_buff *skb)
727 {
728         if (skb->encapsulation)
729                 return skb_inner_transport_offset(skb) +
730                        inner_tcp_hdrlen(skb);
731         return skb_transport_offset(skb) + tcp_hdrlen(skb);
732 }
733
734 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
735 {
736         struct be_tx_stats *stats = tx_stats(txo);
737         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
738         /* Account for headers which get duplicated in TSO pkt */
739         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
740
741         u64_stats_update_begin(&stats->sync);
742         stats->tx_reqs++;
743         stats->tx_bytes += skb->len + dup_hdr_len;
744         stats->tx_pkts += tx_pkts;
745         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
746                 stats->tx_vxlan_offload_pkts += tx_pkts;
747         u64_stats_update_end(&stats->sync);
748 }
749
750 /* Returns number of WRBs needed for the skb */
751 static u32 skb_wrb_cnt(struct sk_buff *skb)
752 {
753         /* +1 for the header wrb */
754         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
755 }
756
757 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
758 {
759         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
760         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
761         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
762         wrb->rsvd0 = 0;
763 }
764
765 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
766  * to avoid the swap and shift/mask operations in wrb_fill().
767  */
768 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
769 {
770         wrb->frag_pa_hi = 0;
771         wrb->frag_pa_lo = 0;
772         wrb->frag_len = 0;
773         wrb->rsvd0 = 0;
774 }
775
776 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
777                                      struct sk_buff *skb)
778 {
779         u8 vlan_prio;
780         u16 vlan_tag;
781
782         vlan_tag = skb_vlan_tag_get(skb);
783         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
784         /* If vlan priority provided by OS is NOT in available bmap */
785         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
786                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
787                                 adapter->recommended_prio_bits;
788
789         return vlan_tag;
790 }
791
792 /* Used only for IP tunnel packets */
793 static u16 skb_inner_ip_proto(struct sk_buff *skb)
794 {
795         return (inner_ip_hdr(skb)->version == 4) ?
796                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
797 }
798
799 static u16 skb_ip_proto(struct sk_buff *skb)
800 {
801         return (ip_hdr(skb)->version == 4) ?
802                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
803 }
804
805 static inline bool be_is_txq_full(struct be_tx_obj *txo)
806 {
807         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
808 }
809
810 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
811 {
812         return atomic_read(&txo->q.used) < txo->q.len / 2;
813 }
814
815 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
816 {
817         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
818 }
819
820 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
821                                        struct sk_buff *skb,
822                                        struct be_wrb_params *wrb_params)
823 {
824         u16 proto;
825
826         if (skb_is_gso(skb)) {
827                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
828                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
829                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
830                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
831         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
832                 if (skb->encapsulation) {
833                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
834                         proto = skb_inner_ip_proto(skb);
835                 } else {
836                         proto = skb_ip_proto(skb);
837                 }
838                 if (proto == IPPROTO_TCP)
839                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
840                 else if (proto == IPPROTO_UDP)
841                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
842         }
843
844         if (skb_vlan_tag_present(skb)) {
845                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
846                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
847         }
848
849         BE_WRB_F_SET(wrb_params->features, CRC, 1);
850 }
851
852 static void wrb_fill_hdr(struct be_adapter *adapter,
853                          struct be_eth_hdr_wrb *hdr,
854                          struct be_wrb_params *wrb_params,
855                          struct sk_buff *skb)
856 {
857         memset(hdr, 0, sizeof(*hdr));
858
859         SET_TX_WRB_HDR_BITS(crc, hdr,
860                             BE_WRB_F_GET(wrb_params->features, CRC));
861         SET_TX_WRB_HDR_BITS(ipcs, hdr,
862                             BE_WRB_F_GET(wrb_params->features, IPCS));
863         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
864                             BE_WRB_F_GET(wrb_params->features, TCPCS));
865         SET_TX_WRB_HDR_BITS(udpcs, hdr,
866                             BE_WRB_F_GET(wrb_params->features, UDPCS));
867
868         SET_TX_WRB_HDR_BITS(lso, hdr,
869                             BE_WRB_F_GET(wrb_params->features, LSO));
870         SET_TX_WRB_HDR_BITS(lso6, hdr,
871                             BE_WRB_F_GET(wrb_params->features, LSO6));
872         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
873
874         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
875          * hack is not needed, the evt bit is set while ringing DB.
876          */
877         SET_TX_WRB_HDR_BITS(event, hdr,
878                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
879         SET_TX_WRB_HDR_BITS(vlan, hdr,
880                             BE_WRB_F_GET(wrb_params->features, VLAN));
881         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
882
883         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
884         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
885         SET_TX_WRB_HDR_BITS(mgmt, hdr,
886                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
887 }
888
889 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
890                           bool unmap_single)
891 {
892         dma_addr_t dma;
893         u32 frag_len = le32_to_cpu(wrb->frag_len);
894
895
896         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
897                 (u64)le32_to_cpu(wrb->frag_pa_lo);
898         if (frag_len) {
899                 if (unmap_single)
900                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
901                 else
902                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
903         }
904 }
905
906 /* Grab a WRB header for xmit */
907 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
908 {
909         u32 head = txo->q.head;
910
911         queue_head_inc(&txo->q);
912         return head;
913 }
914
915 /* Set up the WRB header for xmit */
916 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
917                                 struct be_tx_obj *txo,
918                                 struct be_wrb_params *wrb_params,
919                                 struct sk_buff *skb, u16 head)
920 {
921         u32 num_frags = skb_wrb_cnt(skb);
922         struct be_queue_info *txq = &txo->q;
923         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
924
925         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
926         be_dws_cpu_to_le(hdr, sizeof(*hdr));
927
928         BUG_ON(txo->sent_skb_list[head]);
929         txo->sent_skb_list[head] = skb;
930         txo->last_req_hdr = head;
931         atomic_add(num_frags, &txq->used);
932         txo->last_req_wrb_cnt = num_frags;
933         txo->pend_wrb_cnt += num_frags;
934 }
935
936 /* Setup a WRB fragment (buffer descriptor) for xmit */
937 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
938                                  int len)
939 {
940         struct be_eth_wrb *wrb;
941         struct be_queue_info *txq = &txo->q;
942
943         wrb = queue_head_node(txq);
944         wrb_fill(wrb, busaddr, len);
945         queue_head_inc(txq);
946 }
947
948 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
949  * was invoked. The producer index is restored to the previous packet and the
950  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
951  */
952 static void be_xmit_restore(struct be_adapter *adapter,
953                             struct be_tx_obj *txo, u32 head, bool map_single,
954                             u32 copied)
955 {
956         struct device *dev;
957         struct be_eth_wrb *wrb;
958         struct be_queue_info *txq = &txo->q;
959
960         dev = &adapter->pdev->dev;
961         txq->head = head;
962
963         /* skip the first wrb (hdr); it's not mapped */
964         queue_head_inc(txq);
965         while (copied) {
966                 wrb = queue_head_node(txq);
967                 unmap_tx_frag(dev, wrb, map_single);
968                 map_single = false;
969                 copied -= le32_to_cpu(wrb->frag_len);
970                 queue_head_inc(txq);
971         }
972
973         txq->head = head;
974 }
975
976 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
977  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
978  * of WRBs used up by the packet.
979  */
980 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
981                            struct sk_buff *skb,
982                            struct be_wrb_params *wrb_params)
983 {
984         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
985         struct device *dev = &adapter->pdev->dev;
986         struct be_queue_info *txq = &txo->q;
987         bool map_single = false;
988         u32 head = txq->head;
989         dma_addr_t busaddr;
990         int len;
991
992         head = be_tx_get_wrb_hdr(txo);
993
994         if (skb->len > skb->data_len) {
995                 len = skb_headlen(skb);
996
997                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
998                 if (dma_mapping_error(dev, busaddr))
999                         goto dma_err;
1000                 map_single = true;
1001                 be_tx_setup_wrb_frag(txo, busaddr, len);
1002                 copied += len;
1003         }
1004
1005         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1006                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1007                 len = skb_frag_size(frag);
1008
1009                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1010                 if (dma_mapping_error(dev, busaddr))
1011                         goto dma_err;
1012                 be_tx_setup_wrb_frag(txo, busaddr, len);
1013                 copied += len;
1014         }
1015
1016         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1017
1018         be_tx_stats_update(txo, skb);
1019         return wrb_cnt;
1020
1021 dma_err:
1022         adapter->drv_stats.dma_map_errors++;
1023         be_xmit_restore(adapter, txo, head, map_single, copied);
1024         return 0;
1025 }
1026
1027 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1028 {
1029         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1030 }
1031
1032 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1033                                              struct sk_buff *skb,
1034                                              struct be_wrb_params
1035                                              *wrb_params)
1036 {
1037         u16 vlan_tag = 0;
1038
1039         skb = skb_share_check(skb, GFP_ATOMIC);
1040         if (unlikely(!skb))
1041                 return skb;
1042
1043         if (skb_vlan_tag_present(skb))
1044                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1045
1046         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1047                 if (!vlan_tag)
1048                         vlan_tag = adapter->pvid;
1049                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1050                  * skip VLAN insertion
1051                  */
1052                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1053         }
1054
1055         if (vlan_tag) {
1056                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1057                                                 vlan_tag);
1058                 if (unlikely(!skb))
1059                         return skb;
1060                 skb->vlan_tci = 0;
1061         }
1062
1063         /* Insert the outer VLAN, if any */
1064         if (adapter->qnq_vid) {
1065                 vlan_tag = adapter->qnq_vid;
1066                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1067                                                 vlan_tag);
1068                 if (unlikely(!skb))
1069                         return skb;
1070                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1071         }
1072
1073         return skb;
1074 }
1075
1076 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1077 {
1078         struct ethhdr *eh = (struct ethhdr *)skb->data;
1079         u16 offset = ETH_HLEN;
1080
1081         if (eh->h_proto == htons(ETH_P_IPV6)) {
1082                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1083
1084                 offset += sizeof(struct ipv6hdr);
1085                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1086                     ip6h->nexthdr != NEXTHDR_UDP) {
1087                         struct ipv6_opt_hdr *ehdr =
1088                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1089
1090                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1091                         if (ehdr->hdrlen == 0xff)
1092                                 return true;
1093                 }
1094         }
1095         return false;
1096 }
1097
1098 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1099 {
1100         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1101 }
1102
1103 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1104 {
1105         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1106 }
1107
1108 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1109                                                   struct sk_buff *skb,
1110                                                   struct be_wrb_params
1111                                                   *wrb_params)
1112 {
1113         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1114         unsigned int eth_hdr_len;
1115         struct iphdr *ip;
1116
1117         /* For padded packets, BE HW modifies tot_len field in IP header
1118          * incorrecly when VLAN tag is inserted by HW.
1119          * For padded packets, Lancer computes incorrect checksum.
1120          */
1121         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1122                                                 VLAN_ETH_HLEN : ETH_HLEN;
1123         if (skb->len <= 60 &&
1124             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1125             is_ipv4_pkt(skb)) {
1126                 ip = (struct iphdr *)ip_hdr(skb);
1127                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1128         }
1129
1130         /* If vlan tag is already inlined in the packet, skip HW VLAN
1131          * tagging in pvid-tagging mode
1132          */
1133         if (be_pvid_tagging_enabled(adapter) &&
1134             veh->h_vlan_proto == htons(ETH_P_8021Q))
1135                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1136
1137         /* HW has a bug wherein it will calculate CSUM for VLAN
1138          * pkts even though it is disabled.
1139          * Manually insert VLAN in pkt.
1140          */
1141         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1142             skb_vlan_tag_present(skb)) {
1143                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1144                 if (unlikely(!skb))
1145                         goto err;
1146         }
1147
1148         /* HW may lockup when VLAN HW tagging is requested on
1149          * certain ipv6 packets. Drop such pkts if the HW workaround to
1150          * skip HW tagging is not enabled by FW.
1151          */
1152         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1153                      (adapter->pvid || adapter->qnq_vid) &&
1154                      !qnq_async_evt_rcvd(adapter)))
1155                 goto tx_drop;
1156
1157         /* Manual VLAN tag insertion to prevent:
1158          * ASIC lockup when the ASIC inserts VLAN tag into
1159          * certain ipv6 packets. Insert VLAN tags in driver,
1160          * and set event, completion, vlan bits accordingly
1161          * in the Tx WRB.
1162          */
1163         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1164             be_vlan_tag_tx_chk(adapter, skb)) {
1165                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1166                 if (unlikely(!skb))
1167                         goto err;
1168         }
1169
1170         return skb;
1171 tx_drop:
1172         dev_kfree_skb_any(skb);
1173 err:
1174         return NULL;
1175 }
1176
1177 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1178                                            struct sk_buff *skb,
1179                                            struct be_wrb_params *wrb_params)
1180 {
1181         int err;
1182
1183         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1184          * packets that are 32b or less may cause a transmit stall
1185          * on that port. The workaround is to pad such packets
1186          * (len <= 32 bytes) to a minimum length of 36b.
1187          */
1188         if (skb->len <= 32) {
1189                 if (skb_put_padto(skb, 36))
1190                         return NULL;
1191         }
1192
1193         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1194                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1195                 if (!skb)
1196                         return NULL;
1197         }
1198
1199         /* The stack can send us skbs with length greater than
1200          * what the HW can handle. Trim the extra bytes.
1201          */
1202         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1203         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1204         WARN_ON(err);
1205
1206         return skb;
1207 }
1208
1209 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1210 {
1211         struct be_queue_info *txq = &txo->q;
1212         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1213
1214         /* Mark the last request eventable if it hasn't been marked already */
1215         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1216                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1217
1218         /* compose a dummy wrb if there are odd set of wrbs to notify */
1219         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1220                 wrb_fill_dummy(queue_head_node(txq));
1221                 queue_head_inc(txq);
1222                 atomic_inc(&txq->used);
1223                 txo->pend_wrb_cnt++;
1224                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1225                                            TX_HDR_WRB_NUM_SHIFT);
1226                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1227                                           TX_HDR_WRB_NUM_SHIFT);
1228         }
1229         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1230         txo->pend_wrb_cnt = 0;
1231 }
1232
1233 /* OS2BMC related */
1234
1235 #define DHCP_CLIENT_PORT        68
1236 #define DHCP_SERVER_PORT        67
1237 #define NET_BIOS_PORT1          137
1238 #define NET_BIOS_PORT2          138
1239 #define DHCPV6_RAS_PORT         547
1240
1241 #define is_mc_allowed_on_bmc(adapter, eh)       \
1242         (!is_multicast_filt_enabled(adapter) && \
1243          is_multicast_ether_addr(eh->h_dest) && \
1244          !is_broadcast_ether_addr(eh->h_dest))
1245
1246 #define is_bc_allowed_on_bmc(adapter, eh)       \
1247         (!is_broadcast_filt_enabled(adapter) && \
1248          is_broadcast_ether_addr(eh->h_dest))
1249
1250 #define is_arp_allowed_on_bmc(adapter, skb)     \
1251         (is_arp(skb) && is_arp_filt_enabled(adapter))
1252
1253 #define is_broadcast_packet(eh, adapter)        \
1254                 (is_multicast_ether_addr(eh->h_dest) && \
1255                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1256
1257 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1258
1259 #define is_arp_filt_enabled(adapter)    \
1260                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1261
1262 #define is_dhcp_client_filt_enabled(adapter)    \
1263                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1264
1265 #define is_dhcp_srvr_filt_enabled(adapter)      \
1266                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1267
1268 #define is_nbios_filt_enabled(adapter)  \
1269                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1270
1271 #define is_ipv6_na_filt_enabled(adapter)        \
1272                 (adapter->bmc_filt_mask &       \
1273                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1274
1275 #define is_ipv6_ra_filt_enabled(adapter)        \
1276                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1277
1278 #define is_ipv6_ras_filt_enabled(adapter)       \
1279                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1280
1281 #define is_broadcast_filt_enabled(adapter)      \
1282                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1283
1284 #define is_multicast_filt_enabled(adapter)      \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1286
1287 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1288                                struct sk_buff **skb)
1289 {
1290         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1291         bool os2bmc = false;
1292
1293         if (!be_is_os2bmc_enabled(adapter))
1294                 goto done;
1295
1296         if (!is_multicast_ether_addr(eh->h_dest))
1297                 goto done;
1298
1299         if (is_mc_allowed_on_bmc(adapter, eh) ||
1300             is_bc_allowed_on_bmc(adapter, eh) ||
1301             is_arp_allowed_on_bmc(adapter, (*skb))) {
1302                 os2bmc = true;
1303                 goto done;
1304         }
1305
1306         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1307                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1308                 u8 nexthdr = hdr->nexthdr;
1309
1310                 if (nexthdr == IPPROTO_ICMPV6) {
1311                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1312
1313                         switch (icmp6->icmp6_type) {
1314                         case NDISC_ROUTER_ADVERTISEMENT:
1315                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1316                                 goto done;
1317                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1318                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1319                                 goto done;
1320                         default:
1321                                 break;
1322                         }
1323                 }
1324         }
1325
1326         if (is_udp_pkt((*skb))) {
1327                 struct udphdr *udp = udp_hdr((*skb));
1328
1329                 switch (ntohs(udp->dest)) {
1330                 case DHCP_CLIENT_PORT:
1331                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1332                         goto done;
1333                 case DHCP_SERVER_PORT:
1334                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1335                         goto done;
1336                 case NET_BIOS_PORT1:
1337                 case NET_BIOS_PORT2:
1338                         os2bmc = is_nbios_filt_enabled(adapter);
1339                         goto done;
1340                 case DHCPV6_RAS_PORT:
1341                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1342                         goto done;
1343                 default:
1344                         break;
1345                 }
1346         }
1347 done:
1348         /* For packets over a vlan, which are destined
1349          * to BMC, asic expects the vlan to be inline in the packet.
1350          */
1351         if (os2bmc)
1352                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1353
1354         return os2bmc;
1355 }
1356
1357 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1358 {
1359         struct be_adapter *adapter = netdev_priv(netdev);
1360         u16 q_idx = skb_get_queue_mapping(skb);
1361         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1362         struct be_wrb_params wrb_params = { 0 };
1363         bool flush = !skb->xmit_more;
1364         u16 wrb_cnt;
1365
1366         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1367         if (unlikely(!skb))
1368                 goto drop;
1369
1370         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1371
1372         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1373         if (unlikely(!wrb_cnt)) {
1374                 dev_kfree_skb_any(skb);
1375                 goto drop;
1376         }
1377
1378         /* if os2bmc is enabled and if the pkt is destined to bmc,
1379          * enqueue the pkt a 2nd time with mgmt bit set.
1380          */
1381         if (be_send_pkt_to_bmc(adapter, &skb)) {
1382                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1383                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384                 if (unlikely(!wrb_cnt))
1385                         goto drop;
1386                 else
1387                         skb_get(skb);
1388         }
1389
1390         if (be_is_txq_full(txo)) {
1391                 netif_stop_subqueue(netdev, q_idx);
1392                 tx_stats(txo)->tx_stops++;
1393         }
1394
1395         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1396                 be_xmit_flush(adapter, txo);
1397
1398         return NETDEV_TX_OK;
1399 drop:
1400         tx_stats(txo)->tx_drv_drops++;
1401         /* Flush the already enqueued tx requests */
1402         if (flush && txo->pend_wrb_cnt)
1403                 be_xmit_flush(adapter, txo);
1404
1405         return NETDEV_TX_OK;
1406 }
1407
1408 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1409 {
1410         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1411                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1412 }
1413
1414 static int be_set_vlan_promisc(struct be_adapter *adapter)
1415 {
1416         struct device *dev = &adapter->pdev->dev;
1417         int status;
1418
1419         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1420                 return 0;
1421
1422         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1423         if (!status) {
1424                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1425                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1426         } else {
1427                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1428         }
1429         return status;
1430 }
1431
1432 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1433 {
1434         struct device *dev = &adapter->pdev->dev;
1435         int status;
1436
1437         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1438         if (!status) {
1439                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1440                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1441         }
1442         return status;
1443 }
1444
1445 /*
1446  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1447  * If the user configures more, place BE in vlan promiscuous mode.
1448  */
1449 static int be_vid_config(struct be_adapter *adapter)
1450 {
1451         struct device *dev = &adapter->pdev->dev;
1452         u16 vids[BE_NUM_VLANS_SUPPORTED];
1453         u16 num = 0, i = 0;
1454         int status = 0;
1455
1456         /* No need to change the VLAN state if the I/F is in promiscuous */
1457         if (adapter->netdev->flags & IFF_PROMISC)
1458                 return 0;
1459
1460         if (adapter->vlans_added > be_max_vlans(adapter))
1461                 return be_set_vlan_promisc(adapter);
1462
1463         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1464                 status = be_clear_vlan_promisc(adapter);
1465                 if (status)
1466                         return status;
1467         }
1468         /* Construct VLAN Table to give to HW */
1469         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1470                 vids[num++] = cpu_to_le16(i);
1471
1472         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1473         if (status) {
1474                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1475                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1476                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1477                     addl_status(status) ==
1478                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1479                         return be_set_vlan_promisc(adapter);
1480         }
1481         return status;
1482 }
1483
1484 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1485 {
1486         struct be_adapter *adapter = netdev_priv(netdev);
1487         int status = 0;
1488
1489         mutex_lock(&adapter->rx_filter_lock);
1490
1491         /* Packets with VID 0 are always received by Lancer by default */
1492         if (lancer_chip(adapter) && vid == 0)
1493                 goto done;
1494
1495         if (test_bit(vid, adapter->vids))
1496                 goto done;
1497
1498         set_bit(vid, adapter->vids);
1499         adapter->vlans_added++;
1500
1501         status = be_vid_config(adapter);
1502 done:
1503         mutex_unlock(&adapter->rx_filter_lock);
1504         return status;
1505 }
1506
1507 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1508 {
1509         struct be_adapter *adapter = netdev_priv(netdev);
1510         int status = 0;
1511
1512         mutex_lock(&adapter->rx_filter_lock);
1513
1514         /* Packets with VID 0 are always received by Lancer by default */
1515         if (lancer_chip(adapter) && vid == 0)
1516                 goto done;
1517
1518         if (!test_bit(vid, adapter->vids))
1519                 goto done;
1520
1521         clear_bit(vid, adapter->vids);
1522         adapter->vlans_added--;
1523
1524         status = be_vid_config(adapter);
1525 done:
1526         mutex_unlock(&adapter->rx_filter_lock);
1527         return status;
1528 }
1529
1530 static void be_set_all_promisc(struct be_adapter *adapter)
1531 {
1532         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1533         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1534 }
1535
1536 static void be_set_mc_promisc(struct be_adapter *adapter)
1537 {
1538         int status;
1539
1540         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1541                 return;
1542
1543         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1544         if (!status)
1545                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1546 }
1547
1548 static void be_set_uc_promisc(struct be_adapter *adapter)
1549 {
1550         int status;
1551
1552         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1553                 return;
1554
1555         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1556         if (!status)
1557                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1558 }
1559
1560 static void be_clear_uc_promisc(struct be_adapter *adapter)
1561 {
1562         int status;
1563
1564         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1565                 return;
1566
1567         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1568         if (!status)
1569                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1570 }
1571
1572 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1573  * We use a single callback function for both sync and unsync. We really don't
1574  * add/remove addresses through this callback. But, we use it to detect changes
1575  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1576  */
1577 static int be_uc_list_update(struct net_device *netdev,
1578                              const unsigned char *addr)
1579 {
1580         struct be_adapter *adapter = netdev_priv(netdev);
1581
1582         adapter->update_uc_list = true;
1583         return 0;
1584 }
1585
1586 static int be_mc_list_update(struct net_device *netdev,
1587                              const unsigned char *addr)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590
1591         adapter->update_mc_list = true;
1592         return 0;
1593 }
1594
1595 static void be_set_mc_list(struct be_adapter *adapter)
1596 {
1597         struct net_device *netdev = adapter->netdev;
1598         struct netdev_hw_addr *ha;
1599         bool mc_promisc = false;
1600         int status;
1601
1602         netif_addr_lock_bh(netdev);
1603         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1604
1605         if (netdev->flags & IFF_PROMISC) {
1606                 adapter->update_mc_list = false;
1607         } else if (netdev->flags & IFF_ALLMULTI ||
1608                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1609                 /* Enable multicast promisc if num configured exceeds
1610                  * what we support
1611                  */
1612                 mc_promisc = true;
1613                 adapter->update_mc_list = false;
1614         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1615                 /* Update mc-list unconditionally if the iface was previously
1616                  * in mc-promisc mode and now is out of that mode.
1617                  */
1618                 adapter->update_mc_list = true;
1619         }
1620
1621         if (adapter->update_mc_list) {
1622                 int i = 0;
1623
1624                 /* cache the mc-list in adapter */
1625                 netdev_for_each_mc_addr(ha, netdev) {
1626                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1627                         i++;
1628                 }
1629                 adapter->mc_count = netdev_mc_count(netdev);
1630         }
1631         netif_addr_unlock_bh(netdev);
1632
1633         if (mc_promisc) {
1634                 be_set_mc_promisc(adapter);
1635         } else if (adapter->update_mc_list) {
1636                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1637                 if (!status)
1638                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1639                 else
1640                         be_set_mc_promisc(adapter);
1641
1642                 adapter->update_mc_list = false;
1643         }
1644 }
1645
1646 static void be_clear_mc_list(struct be_adapter *adapter)
1647 {
1648         struct net_device *netdev = adapter->netdev;
1649
1650         __dev_mc_unsync(netdev, NULL);
1651         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1652         adapter->mc_count = 0;
1653 }
1654
1655 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1656 {
1657         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1658                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1659                 return 0;
1660         }
1661
1662         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1663                                adapter->if_handle,
1664                                &adapter->pmac_id[uc_idx + 1], 0);
1665 }
1666
1667 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1668 {
1669         if (pmac_id == adapter->pmac_id[0])
1670                 return;
1671
1672         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1673 }
1674
1675 static void be_set_uc_list(struct be_adapter *adapter)
1676 {
1677         struct net_device *netdev = adapter->netdev;
1678         struct netdev_hw_addr *ha;
1679         bool uc_promisc = false;
1680         int curr_uc_macs = 0, i;
1681
1682         netif_addr_lock_bh(netdev);
1683         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1684
1685         if (netdev->flags & IFF_PROMISC) {
1686                 adapter->update_uc_list = false;
1687         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1688                 uc_promisc = true;
1689                 adapter->update_uc_list = false;
1690         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1691                 /* Update uc-list unconditionally if the iface was previously
1692                  * in uc-promisc mode and now is out of that mode.
1693                  */
1694                 adapter->update_uc_list = true;
1695         }
1696
1697         if (adapter->update_uc_list) {
1698                 /* cache the uc-list in adapter array */
1699                 i = 0;
1700                 netdev_for_each_uc_addr(ha, netdev) {
1701                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1702                         i++;
1703                 }
1704                 curr_uc_macs = netdev_uc_count(netdev);
1705         }
1706         netif_addr_unlock_bh(netdev);
1707
1708         if (uc_promisc) {
1709                 be_set_uc_promisc(adapter);
1710         } else if (adapter->update_uc_list) {
1711                 be_clear_uc_promisc(adapter);
1712
1713                 for (i = 0; i < adapter->uc_macs; i++)
1714                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1715
1716                 for (i = 0; i < curr_uc_macs; i++)
1717                         be_uc_mac_add(adapter, i);
1718                 adapter->uc_macs = curr_uc_macs;
1719                 adapter->update_uc_list = false;
1720         }
1721 }
1722
1723 static void be_clear_uc_list(struct be_adapter *adapter)
1724 {
1725         struct net_device *netdev = adapter->netdev;
1726         int i;
1727
1728         __dev_uc_unsync(netdev, NULL);
1729         for (i = 0; i < adapter->uc_macs; i++)
1730                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1731
1732         adapter->uc_macs = 0;
1733 }
1734
1735 static void __be_set_rx_mode(struct be_adapter *adapter)
1736 {
1737         struct net_device *netdev = adapter->netdev;
1738
1739         mutex_lock(&adapter->rx_filter_lock);
1740
1741         if (netdev->flags & IFF_PROMISC) {
1742                 if (!be_in_all_promisc(adapter))
1743                         be_set_all_promisc(adapter);
1744         } else if (be_in_all_promisc(adapter)) {
1745                 /* We need to re-program the vlan-list or clear
1746                  * vlan-promisc mode (if needed) when the interface
1747                  * comes out of promisc mode.
1748                  */
1749                 be_vid_config(adapter);
1750         }
1751
1752         be_set_uc_list(adapter);
1753         be_set_mc_list(adapter);
1754
1755         mutex_unlock(&adapter->rx_filter_lock);
1756 }
1757
1758 static void be_work_set_rx_mode(struct work_struct *work)
1759 {
1760         struct be_cmd_work *cmd_work =
1761                                 container_of(work, struct be_cmd_work, work);
1762
1763         __be_set_rx_mode(cmd_work->adapter);
1764         kfree(cmd_work);
1765 }
1766
1767 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1768 {
1769         struct be_adapter *adapter = netdev_priv(netdev);
1770         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1771         int status;
1772
1773         if (!sriov_enabled(adapter))
1774                 return -EPERM;
1775
1776         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1777                 return -EINVAL;
1778
1779         /* Proceed further only if user provided MAC is different
1780          * from active MAC
1781          */
1782         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1783                 return 0;
1784
1785         if (BEx_chip(adapter)) {
1786                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1787                                 vf + 1);
1788
1789                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1790                                          &vf_cfg->pmac_id, vf + 1);
1791         } else {
1792                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1793                                         vf + 1);
1794         }
1795
1796         if (status) {
1797                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1798                         mac, vf, status);
1799                 return be_cmd_status(status);
1800         }
1801
1802         ether_addr_copy(vf_cfg->mac_addr, mac);
1803
1804         return 0;
1805 }
1806
1807 static int be_get_vf_config(struct net_device *netdev, int vf,
1808                             struct ifla_vf_info *vi)
1809 {
1810         struct be_adapter *adapter = netdev_priv(netdev);
1811         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1812
1813         if (!sriov_enabled(adapter))
1814                 return -EPERM;
1815
1816         if (vf >= adapter->num_vfs)
1817                 return -EINVAL;
1818
1819         vi->vf = vf;
1820         vi->max_tx_rate = vf_cfg->tx_rate;
1821         vi->min_tx_rate = 0;
1822         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1823         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1824         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1825         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1826         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1827
1828         return 0;
1829 }
1830
1831 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1832 {
1833         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1834         u16 vids[BE_NUM_VLANS_SUPPORTED];
1835         int vf_if_id = vf_cfg->if_handle;
1836         int status;
1837
1838         /* Enable Transparent VLAN Tagging */
1839         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1840         if (status)
1841                 return status;
1842
1843         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1844         vids[0] = 0;
1845         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1846         if (!status)
1847                 dev_info(&adapter->pdev->dev,
1848                          "Cleared guest VLANs on VF%d", vf);
1849
1850         /* After TVT is enabled, disallow VFs to program VLAN filters */
1851         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1852                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1853                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1854                 if (!status)
1855                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1856         }
1857         return 0;
1858 }
1859
1860 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1861 {
1862         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1863         struct device *dev = &adapter->pdev->dev;
1864         int status;
1865
1866         /* Reset Transparent VLAN Tagging. */
1867         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1868                                        vf_cfg->if_handle, 0, 0);
1869         if (status)
1870                 return status;
1871
1872         /* Allow VFs to program VLAN filtering */
1873         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1874                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1875                                                   BE_PRIV_FILTMGMT, vf + 1);
1876                 if (!status) {
1877                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1878                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1879                 }
1880         }
1881
1882         dev_info(dev,
1883                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1884         return 0;
1885 }
1886
1887 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1888                           __be16 vlan_proto)
1889 {
1890         struct be_adapter *adapter = netdev_priv(netdev);
1891         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1892         int status;
1893
1894         if (!sriov_enabled(adapter))
1895                 return -EPERM;
1896
1897         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1898                 return -EINVAL;
1899
1900         if (vlan_proto != htons(ETH_P_8021Q))
1901                 return -EPROTONOSUPPORT;
1902
1903         if (vlan || qos) {
1904                 vlan |= qos << VLAN_PRIO_SHIFT;
1905                 status = be_set_vf_tvt(adapter, vf, vlan);
1906         } else {
1907                 status = be_clear_vf_tvt(adapter, vf);
1908         }
1909
1910         if (status) {
1911                 dev_err(&adapter->pdev->dev,
1912                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1913                         status);
1914                 return be_cmd_status(status);
1915         }
1916
1917         vf_cfg->vlan_tag = vlan;
1918         return 0;
1919 }
1920
1921 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1922                              int min_tx_rate, int max_tx_rate)
1923 {
1924         struct be_adapter *adapter = netdev_priv(netdev);
1925         struct device *dev = &adapter->pdev->dev;
1926         int percent_rate, status = 0;
1927         u16 link_speed = 0;
1928         u8 link_status;
1929
1930         if (!sriov_enabled(adapter))
1931                 return -EPERM;
1932
1933         if (vf >= adapter->num_vfs)
1934                 return -EINVAL;
1935
1936         if (min_tx_rate)
1937                 return -EINVAL;
1938
1939         if (!max_tx_rate)
1940                 goto config_qos;
1941
1942         status = be_cmd_link_status_query(adapter, &link_speed,
1943                                           &link_status, 0);
1944         if (status)
1945                 goto err;
1946
1947         if (!link_status) {
1948                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1949                 status = -ENETDOWN;
1950                 goto err;
1951         }
1952
1953         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1954                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1955                         link_speed);
1956                 status = -EINVAL;
1957                 goto err;
1958         }
1959
1960         /* On Skyhawk the QOS setting must be done only as a % value */
1961         percent_rate = link_speed / 100;
1962         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1963                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1964                         percent_rate);
1965                 status = -EINVAL;
1966                 goto err;
1967         }
1968
1969 config_qos:
1970         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1971         if (status)
1972                 goto err;
1973
1974         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1975         return 0;
1976
1977 err:
1978         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1979                 max_tx_rate, vf);
1980         return be_cmd_status(status);
1981 }
1982
1983 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1984                                 int link_state)
1985 {
1986         struct be_adapter *adapter = netdev_priv(netdev);
1987         int status;
1988
1989         if (!sriov_enabled(adapter))
1990                 return -EPERM;
1991
1992         if (vf >= adapter->num_vfs)
1993                 return -EINVAL;
1994
1995         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1996         if (status) {
1997                 dev_err(&adapter->pdev->dev,
1998                         "Link state change on VF %d failed: %#x\n", vf, status);
1999                 return be_cmd_status(status);
2000         }
2001
2002         adapter->vf_cfg[vf].plink_tracking = link_state;
2003
2004         return 0;
2005 }
2006
2007 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2008 {
2009         struct be_adapter *adapter = netdev_priv(netdev);
2010         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2011         u8 spoofchk;
2012         int status;
2013
2014         if (!sriov_enabled(adapter))
2015                 return -EPERM;
2016
2017         if (vf >= adapter->num_vfs)
2018                 return -EINVAL;
2019
2020         if (BEx_chip(adapter))
2021                 return -EOPNOTSUPP;
2022
2023         if (enable == vf_cfg->spoofchk)
2024                 return 0;
2025
2026         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2027
2028         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2029                                        0, spoofchk);
2030         if (status) {
2031                 dev_err(&adapter->pdev->dev,
2032                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2033                 return be_cmd_status(status);
2034         }
2035
2036         vf_cfg->spoofchk = enable;
2037         return 0;
2038 }
2039
2040 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2041                           ulong now)
2042 {
2043         aic->rx_pkts_prev = rx_pkts;
2044         aic->tx_reqs_prev = tx_pkts;
2045         aic->jiffies = now;
2046 }
2047
2048 static int be_get_new_eqd(struct be_eq_obj *eqo)
2049 {
2050         struct be_adapter *adapter = eqo->adapter;
2051         int eqd, start;
2052         struct be_aic_obj *aic;
2053         struct be_rx_obj *rxo;
2054         struct be_tx_obj *txo;
2055         u64 rx_pkts = 0, tx_pkts = 0;
2056         ulong now;
2057         u32 pps, delta;
2058         int i;
2059
2060         aic = &adapter->aic_obj[eqo->idx];
2061         if (!aic->enable) {
2062                 if (aic->jiffies)
2063                         aic->jiffies = 0;
2064                 eqd = aic->et_eqd;
2065                 return eqd;
2066         }
2067
2068         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2069                 do {
2070                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2071                         rx_pkts += rxo->stats.rx_pkts;
2072                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2073         }
2074
2075         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2076                 do {
2077                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2078                         tx_pkts += txo->stats.tx_reqs;
2079                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2080         }
2081
2082         /* Skip, if wrapped around or first calculation */
2083         now = jiffies;
2084         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2085             rx_pkts < aic->rx_pkts_prev ||
2086             tx_pkts < aic->tx_reqs_prev) {
2087                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2088                 return aic->prev_eqd;
2089         }
2090
2091         delta = jiffies_to_msecs(now - aic->jiffies);
2092         if (delta == 0)
2093                 return aic->prev_eqd;
2094
2095         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2096                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2097         eqd = (pps / 15000) << 2;
2098
2099         if (eqd < 8)
2100                 eqd = 0;
2101         eqd = min_t(u32, eqd, aic->max_eqd);
2102         eqd = max_t(u32, eqd, aic->min_eqd);
2103
2104         be_aic_update(aic, rx_pkts, tx_pkts, now);
2105
2106         return eqd;
2107 }
2108
2109 /* For Skyhawk-R only */
2110 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2111 {
2112         struct be_adapter *adapter = eqo->adapter;
2113         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2114         ulong now = jiffies;
2115         int eqd;
2116         u32 mult_enc;
2117
2118         if (!aic->enable)
2119                 return 0;
2120
2121         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2122                 eqd = aic->prev_eqd;
2123         else
2124                 eqd = be_get_new_eqd(eqo);
2125
2126         if (eqd > 100)
2127                 mult_enc = R2I_DLY_ENC_1;
2128         else if (eqd > 60)
2129                 mult_enc = R2I_DLY_ENC_2;
2130         else if (eqd > 20)
2131                 mult_enc = R2I_DLY_ENC_3;
2132         else
2133                 mult_enc = R2I_DLY_ENC_0;
2134
2135         aic->prev_eqd = eqd;
2136
2137         return mult_enc;
2138 }
2139
2140 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2141 {
2142         struct be_set_eqd set_eqd[MAX_EVT_QS];
2143         struct be_aic_obj *aic;
2144         struct be_eq_obj *eqo;
2145         int i, num = 0, eqd;
2146
2147         for_all_evt_queues(adapter, eqo, i) {
2148                 aic = &adapter->aic_obj[eqo->idx];
2149                 eqd = be_get_new_eqd(eqo);
2150                 if (force_update || eqd != aic->prev_eqd) {
2151                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2152                         set_eqd[num].eq_id = eqo->q.id;
2153                         aic->prev_eqd = eqd;
2154                         num++;
2155                 }
2156         }
2157
2158         if (num)
2159                 be_cmd_modify_eqd(adapter, set_eqd, num);
2160 }
2161
2162 static void be_rx_stats_update(struct be_rx_obj *rxo,
2163                                struct be_rx_compl_info *rxcp)
2164 {
2165         struct be_rx_stats *stats = rx_stats(rxo);
2166
2167         u64_stats_update_begin(&stats->sync);
2168         stats->rx_compl++;
2169         stats->rx_bytes += rxcp->pkt_size;
2170         stats->rx_pkts++;
2171         if (rxcp->tunneled)
2172                 stats->rx_vxlan_offload_pkts++;
2173         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2174                 stats->rx_mcast_pkts++;
2175         if (rxcp->err)
2176                 stats->rx_compl_err++;
2177         u64_stats_update_end(&stats->sync);
2178 }
2179
2180 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2181 {
2182         /* L4 checksum is not reliable for non TCP/UDP packets.
2183          * Also ignore ipcksm for ipv6 pkts
2184          */
2185         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2186                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2187 }
2188
2189 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2190 {
2191         struct be_adapter *adapter = rxo->adapter;
2192         struct be_rx_page_info *rx_page_info;
2193         struct be_queue_info *rxq = &rxo->q;
2194         u32 frag_idx = rxq->tail;
2195
2196         rx_page_info = &rxo->page_info_tbl[frag_idx];
2197         BUG_ON(!rx_page_info->page);
2198
2199         if (rx_page_info->last_frag) {
2200                 dma_unmap_page(&adapter->pdev->dev,
2201                                dma_unmap_addr(rx_page_info, bus),
2202                                adapter->big_page_size, DMA_FROM_DEVICE);
2203                 rx_page_info->last_frag = false;
2204         } else {
2205                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2206                                         dma_unmap_addr(rx_page_info, bus),
2207                                         rx_frag_size, DMA_FROM_DEVICE);
2208         }
2209
2210         queue_tail_inc(rxq);
2211         atomic_dec(&rxq->used);
2212         return rx_page_info;
2213 }
2214
2215 /* Throwaway the data in the Rx completion */
2216 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2217                                 struct be_rx_compl_info *rxcp)
2218 {
2219         struct be_rx_page_info *page_info;
2220         u16 i, num_rcvd = rxcp->num_rcvd;
2221
2222         for (i = 0; i < num_rcvd; i++) {
2223                 page_info = get_rx_page_info(rxo);
2224                 put_page(page_info->page);
2225                 memset(page_info, 0, sizeof(*page_info));
2226         }
2227 }
2228
2229 /*
2230  * skb_fill_rx_data forms a complete skb for an ether frame
2231  * indicated by rxcp.
2232  */
2233 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2234                              struct be_rx_compl_info *rxcp)
2235 {
2236         struct be_rx_page_info *page_info;
2237         u16 i, j;
2238         u16 hdr_len, curr_frag_len, remaining;
2239         u8 *start;
2240
2241         page_info = get_rx_page_info(rxo);
2242         start = page_address(page_info->page) + page_info->page_offset;
2243         prefetch(start);
2244
2245         /* Copy data in the first descriptor of this completion */
2246         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2247
2248         skb->len = curr_frag_len;
2249         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2250                 memcpy(skb->data, start, curr_frag_len);
2251                 /* Complete packet has now been moved to data */
2252                 put_page(page_info->page);
2253                 skb->data_len = 0;
2254                 skb->tail += curr_frag_len;
2255         } else {
2256                 hdr_len = ETH_HLEN;
2257                 memcpy(skb->data, start, hdr_len);
2258                 skb_shinfo(skb)->nr_frags = 1;
2259                 skb_frag_set_page(skb, 0, page_info->page);
2260                 skb_shinfo(skb)->frags[0].page_offset =
2261                                         page_info->page_offset + hdr_len;
2262                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2263                                   curr_frag_len - hdr_len);
2264                 skb->data_len = curr_frag_len - hdr_len;
2265                 skb->truesize += rx_frag_size;
2266                 skb->tail += hdr_len;
2267         }
2268         page_info->page = NULL;
2269
2270         if (rxcp->pkt_size <= rx_frag_size) {
2271                 BUG_ON(rxcp->num_rcvd != 1);
2272                 return;
2273         }
2274
2275         /* More frags present for this completion */
2276         remaining = rxcp->pkt_size - curr_frag_len;
2277         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2278                 page_info = get_rx_page_info(rxo);
2279                 curr_frag_len = min(remaining, rx_frag_size);
2280
2281                 /* Coalesce all frags from the same physical page in one slot */
2282                 if (page_info->page_offset == 0) {
2283                         /* Fresh page */
2284                         j++;
2285                         skb_frag_set_page(skb, j, page_info->page);
2286                         skb_shinfo(skb)->frags[j].page_offset =
2287                                                         page_info->page_offset;
2288                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2289                         skb_shinfo(skb)->nr_frags++;
2290                 } else {
2291                         put_page(page_info->page);
2292                 }
2293
2294                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2295                 skb->len += curr_frag_len;
2296                 skb->data_len += curr_frag_len;
2297                 skb->truesize += rx_frag_size;
2298                 remaining -= curr_frag_len;
2299                 page_info->page = NULL;
2300         }
2301         BUG_ON(j > MAX_SKB_FRAGS);
2302 }
2303
2304 /* Process the RX completion indicated by rxcp when GRO is disabled */
2305 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2306                                 struct be_rx_compl_info *rxcp)
2307 {
2308         struct be_adapter *adapter = rxo->adapter;
2309         struct net_device *netdev = adapter->netdev;
2310         struct sk_buff *skb;
2311
2312         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2313         if (unlikely(!skb)) {
2314                 rx_stats(rxo)->rx_drops_no_skbs++;
2315                 be_rx_compl_discard(rxo, rxcp);
2316                 return;
2317         }
2318
2319         skb_fill_rx_data(rxo, skb, rxcp);
2320
2321         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2322                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2323         else
2324                 skb_checksum_none_assert(skb);
2325
2326         skb->protocol = eth_type_trans(skb, netdev);
2327         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2328         if (netdev->features & NETIF_F_RXHASH)
2329                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2330
2331         skb->csum_level = rxcp->tunneled;
2332         skb_mark_napi_id(skb, napi);
2333
2334         if (rxcp->vlanf)
2335                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2336
2337         netif_receive_skb(skb);
2338 }
2339
2340 /* Process the RX completion indicated by rxcp when GRO is enabled */
2341 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2342                                     struct napi_struct *napi,
2343                                     struct be_rx_compl_info *rxcp)
2344 {
2345         struct be_adapter *adapter = rxo->adapter;
2346         struct be_rx_page_info *page_info;
2347         struct sk_buff *skb = NULL;
2348         u16 remaining, curr_frag_len;
2349         u16 i, j;
2350
2351         skb = napi_get_frags(napi);
2352         if (!skb) {
2353                 be_rx_compl_discard(rxo, rxcp);
2354                 return;
2355         }
2356
2357         remaining = rxcp->pkt_size;
2358         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2359                 page_info = get_rx_page_info(rxo);
2360
2361                 curr_frag_len = min(remaining, rx_frag_size);
2362
2363                 /* Coalesce all frags from the same physical page in one slot */
2364                 if (i == 0 || page_info->page_offset == 0) {
2365                         /* First frag or Fresh page */
2366                         j++;
2367                         skb_frag_set_page(skb, j, page_info->page);
2368                         skb_shinfo(skb)->frags[j].page_offset =
2369                                                         page_info->page_offset;
2370                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2371                 } else {
2372                         put_page(page_info->page);
2373                 }
2374                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2375                 skb->truesize += rx_frag_size;
2376                 remaining -= curr_frag_len;
2377                 memset(page_info, 0, sizeof(*page_info));
2378         }
2379         BUG_ON(j > MAX_SKB_FRAGS);
2380
2381         skb_shinfo(skb)->nr_frags = j + 1;
2382         skb->len = rxcp->pkt_size;
2383         skb->data_len = rxcp->pkt_size;
2384         skb->ip_summed = CHECKSUM_UNNECESSARY;
2385         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2386         if (adapter->netdev->features & NETIF_F_RXHASH)
2387                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2388
2389         skb->csum_level = rxcp->tunneled;
2390
2391         if (rxcp->vlanf)
2392                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2393
2394         napi_gro_frags(napi);
2395 }
2396
2397 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2398                                  struct be_rx_compl_info *rxcp)
2399 {
2400         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2401         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2402         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2403         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2404         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2405         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2406         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2407         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2408         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2409         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2410         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2411         if (rxcp->vlanf) {
2412                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2413                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2414         }
2415         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2416         rxcp->tunneled =
2417                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2418 }
2419
2420 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2421                                  struct be_rx_compl_info *rxcp)
2422 {
2423         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2424         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2425         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2426         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2427         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2428         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2429         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2430         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2431         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2432         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2433         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2434         if (rxcp->vlanf) {
2435                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2436                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2437         }
2438         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2439         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2440 }
2441
2442 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2443 {
2444         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2445         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2446         struct be_adapter *adapter = rxo->adapter;
2447
2448         /* For checking the valid bit it is Ok to use either definition as the
2449          * valid bit is at the same position in both v0 and v1 Rx compl */
2450         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2451                 return NULL;
2452
2453         rmb();
2454         be_dws_le_to_cpu(compl, sizeof(*compl));
2455
2456         if (adapter->be3_native)
2457                 be_parse_rx_compl_v1(compl, rxcp);
2458         else
2459                 be_parse_rx_compl_v0(compl, rxcp);
2460
2461         if (rxcp->ip_frag)
2462                 rxcp->l4_csum = 0;
2463
2464         if (rxcp->vlanf) {
2465                 /* In QNQ modes, if qnq bit is not set, then the packet was
2466                  * tagged only with the transparent outer vlan-tag and must
2467                  * not be treated as a vlan packet by host
2468                  */
2469                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2470                         rxcp->vlanf = 0;
2471
2472                 if (!lancer_chip(adapter))
2473                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2474
2475                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2476                     !test_bit(rxcp->vlan_tag, adapter->vids))
2477                         rxcp->vlanf = 0;
2478         }
2479
2480         /* As the compl has been parsed, reset it; we wont touch it again */
2481         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2482
2483         queue_tail_inc(&rxo->cq);
2484         return rxcp;
2485 }
2486
2487 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2488 {
2489         u32 order = get_order(size);
2490
2491         if (order > 0)
2492                 gfp |= __GFP_COMP;
2493         return  alloc_pages(gfp, order);
2494 }
2495
2496 /*
2497  * Allocate a page, split it to fragments of size rx_frag_size and post as
2498  * receive buffers to BE
2499  */
2500 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2501 {
2502         struct be_adapter *adapter = rxo->adapter;
2503         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2504         struct be_queue_info *rxq = &rxo->q;
2505         struct page *pagep = NULL;
2506         struct device *dev = &adapter->pdev->dev;
2507         struct be_eth_rx_d *rxd;
2508         u64 page_dmaaddr = 0, frag_dmaaddr;
2509         u32 posted, page_offset = 0, notify = 0;
2510
2511         page_info = &rxo->page_info_tbl[rxq->head];
2512         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2513                 if (!pagep) {
2514                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2515                         if (unlikely(!pagep)) {
2516                                 rx_stats(rxo)->rx_post_fail++;
2517                                 break;
2518                         }
2519                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2520                                                     adapter->big_page_size,
2521                                                     DMA_FROM_DEVICE);
2522                         if (dma_mapping_error(dev, page_dmaaddr)) {
2523                                 put_page(pagep);
2524                                 pagep = NULL;
2525                                 adapter->drv_stats.dma_map_errors++;
2526                                 break;
2527                         }
2528                         page_offset = 0;
2529                 } else {
2530                         get_page(pagep);
2531                         page_offset += rx_frag_size;
2532                 }
2533                 page_info->page_offset = page_offset;
2534                 page_info->page = pagep;
2535
2536                 rxd = queue_head_node(rxq);
2537                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2538                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2539                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2540
2541                 /* Any space left in the current big page for another frag? */
2542                 if ((page_offset + rx_frag_size + rx_frag_size) >
2543                                         adapter->big_page_size) {
2544                         pagep = NULL;
2545                         page_info->last_frag = true;
2546                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2547                 } else {
2548                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2549                 }
2550
2551                 prev_page_info = page_info;
2552                 queue_head_inc(rxq);
2553                 page_info = &rxo->page_info_tbl[rxq->head];
2554         }
2555
2556         /* Mark the last frag of a page when we break out of the above loop
2557          * with no more slots available in the RXQ
2558          */
2559         if (pagep) {
2560                 prev_page_info->last_frag = true;
2561                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2562         }
2563
2564         if (posted) {
2565                 atomic_add(posted, &rxq->used);
2566                 if (rxo->rx_post_starved)
2567                         rxo->rx_post_starved = false;
2568                 do {
2569                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2570                         be_rxq_notify(adapter, rxq->id, notify);
2571                         posted -= notify;
2572                 } while (posted);
2573         } else if (atomic_read(&rxq->used) == 0) {
2574                 /* Let be_worker replenish when memory is available */
2575                 rxo->rx_post_starved = true;
2576         }
2577 }
2578
2579 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2580 {
2581         struct be_queue_info *tx_cq = &txo->cq;
2582         struct be_tx_compl_info *txcp = &txo->txcp;
2583         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2584
2585         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2586                 return NULL;
2587
2588         /* Ensure load ordering of valid bit dword and other dwords below */
2589         rmb();
2590         be_dws_le_to_cpu(compl, sizeof(*compl));
2591
2592         txcp->status = GET_TX_COMPL_BITS(status, compl);
2593         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2594
2595         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2596         queue_tail_inc(tx_cq);
2597         return txcp;
2598 }
2599
2600 static u16 be_tx_compl_process(struct be_adapter *adapter,
2601                                struct be_tx_obj *txo, u16 last_index)
2602 {
2603         struct sk_buff **sent_skbs = txo->sent_skb_list;
2604         struct be_queue_info *txq = &txo->q;
2605         struct sk_buff *skb = NULL;
2606         bool unmap_skb_hdr = false;
2607         struct be_eth_wrb *wrb;
2608         u16 num_wrbs = 0;
2609         u32 frag_index;
2610
2611         do {
2612                 if (sent_skbs[txq->tail]) {
2613                         /* Free skb from prev req */
2614                         if (skb)
2615                                 dev_consume_skb_any(skb);
2616                         skb = sent_skbs[txq->tail];
2617                         sent_skbs[txq->tail] = NULL;
2618                         queue_tail_inc(txq);  /* skip hdr wrb */
2619                         num_wrbs++;
2620                         unmap_skb_hdr = true;
2621                 }
2622                 wrb = queue_tail_node(txq);
2623                 frag_index = txq->tail;
2624                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2625                               (unmap_skb_hdr && skb_headlen(skb)));
2626                 unmap_skb_hdr = false;
2627                 queue_tail_inc(txq);
2628                 num_wrbs++;
2629         } while (frag_index != last_index);
2630         dev_consume_skb_any(skb);
2631
2632         return num_wrbs;
2633 }
2634
2635 /* Return the number of events in the event queue */
2636 static inline int events_get(struct be_eq_obj *eqo)
2637 {
2638         struct be_eq_entry *eqe;
2639         int num = 0;
2640
2641         do {
2642                 eqe = queue_tail_node(&eqo->q);
2643                 if (eqe->evt == 0)
2644                         break;
2645
2646                 rmb();
2647                 eqe->evt = 0;
2648                 num++;
2649                 queue_tail_inc(&eqo->q);
2650         } while (true);
2651
2652         return num;
2653 }
2654
2655 /* Leaves the EQ is disarmed state */
2656 static void be_eq_clean(struct be_eq_obj *eqo)
2657 {
2658         int num = events_get(eqo);
2659
2660         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2661 }
2662
2663 /* Free posted rx buffers that were not used */
2664 static void be_rxq_clean(struct be_rx_obj *rxo)
2665 {
2666         struct be_queue_info *rxq = &rxo->q;
2667         struct be_rx_page_info *page_info;
2668
2669         while (atomic_read(&rxq->used) > 0) {
2670                 page_info = get_rx_page_info(rxo);
2671                 put_page(page_info->page);
2672                 memset(page_info, 0, sizeof(*page_info));
2673         }
2674         BUG_ON(atomic_read(&rxq->used));
2675         rxq->tail = 0;
2676         rxq->head = 0;
2677 }
2678
2679 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2680 {
2681         struct be_queue_info *rx_cq = &rxo->cq;
2682         struct be_rx_compl_info *rxcp;
2683         struct be_adapter *adapter = rxo->adapter;
2684         int flush_wait = 0;
2685
2686         /* Consume pending rx completions.
2687          * Wait for the flush completion (identified by zero num_rcvd)
2688          * to arrive. Notify CQ even when there are no more CQ entries
2689          * for HW to flush partially coalesced CQ entries.
2690          * In Lancer, there is no need to wait for flush compl.
2691          */
2692         for (;;) {
2693                 rxcp = be_rx_compl_get(rxo);
2694                 if (!rxcp) {
2695                         if (lancer_chip(adapter))
2696                                 break;
2697
2698                         if (flush_wait++ > 50 ||
2699                             be_check_error(adapter,
2700                                            BE_ERROR_HW)) {
2701                                 dev_warn(&adapter->pdev->dev,
2702                                          "did not receive flush compl\n");
2703                                 break;
2704                         }
2705                         be_cq_notify(adapter, rx_cq->id, true, 0);
2706                         mdelay(1);
2707                 } else {
2708                         be_rx_compl_discard(rxo, rxcp);
2709                         be_cq_notify(adapter, rx_cq->id, false, 1);
2710                         if (rxcp->num_rcvd == 0)
2711                                 break;
2712                 }
2713         }
2714
2715         /* After cleanup, leave the CQ in unarmed state */
2716         be_cq_notify(adapter, rx_cq->id, false, 0);
2717 }
2718
2719 static void be_tx_compl_clean(struct be_adapter *adapter)
2720 {
2721         struct device *dev = &adapter->pdev->dev;
2722         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2723         struct be_tx_compl_info *txcp;
2724         struct be_queue_info *txq;
2725         u32 end_idx, notified_idx;
2726         struct be_tx_obj *txo;
2727         int i, pending_txqs;
2728
2729         /* Stop polling for compls when HW has been silent for 10ms */
2730         do {
2731                 pending_txqs = adapter->num_tx_qs;
2732
2733                 for_all_tx_queues(adapter, txo, i) {
2734                         cmpl = 0;
2735                         num_wrbs = 0;
2736                         txq = &txo->q;
2737                         while ((txcp = be_tx_compl_get(txo))) {
2738                                 num_wrbs +=
2739                                         be_tx_compl_process(adapter, txo,
2740                                                             txcp->end_index);
2741                                 cmpl++;
2742                         }
2743                         if (cmpl) {
2744                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2745                                 atomic_sub(num_wrbs, &txq->used);
2746                                 timeo = 0;
2747                         }
2748                         if (!be_is_tx_compl_pending(txo))
2749                                 pending_txqs--;
2750                 }
2751
2752                 if (pending_txqs == 0 || ++timeo > 10 ||
2753                     be_check_error(adapter, BE_ERROR_HW))
2754                         break;
2755
2756                 mdelay(1);
2757         } while (true);
2758
2759         /* Free enqueued TX that was never notified to HW */
2760         for_all_tx_queues(adapter, txo, i) {
2761                 txq = &txo->q;
2762
2763                 if (atomic_read(&txq->used)) {
2764                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2765                                  i, atomic_read(&txq->used));
2766                         notified_idx = txq->tail;
2767                         end_idx = txq->tail;
2768                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2769                                   txq->len);
2770                         /* Use the tx-compl process logic to handle requests
2771                          * that were not sent to the HW.
2772                          */
2773                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2774                         atomic_sub(num_wrbs, &txq->used);
2775                         BUG_ON(atomic_read(&txq->used));
2776                         txo->pend_wrb_cnt = 0;
2777                         /* Since hw was never notified of these requests,
2778                          * reset TXQ indices
2779                          */
2780                         txq->head = notified_idx;
2781                         txq->tail = notified_idx;
2782                 }
2783         }
2784 }
2785
2786 static void be_evt_queues_destroy(struct be_adapter *adapter)
2787 {
2788         struct be_eq_obj *eqo;
2789         int i;
2790
2791         for_all_evt_queues(adapter, eqo, i) {
2792                 if (eqo->q.created) {
2793                         be_eq_clean(eqo);
2794                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2795                         netif_napi_del(&eqo->napi);
2796                         free_cpumask_var(eqo->affinity_mask);
2797                 }
2798                 be_queue_free(adapter, &eqo->q);
2799         }
2800 }
2801
2802 static int be_evt_queues_create(struct be_adapter *adapter)
2803 {
2804         struct be_queue_info *eq;
2805         struct be_eq_obj *eqo;
2806         struct be_aic_obj *aic;
2807         int i, rc;
2808
2809         /* need enough EQs to service both RX and TX queues */
2810         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2811                                     max(adapter->cfg_num_rx_irqs,
2812                                         adapter->cfg_num_tx_irqs));
2813
2814         for_all_evt_queues(adapter, eqo, i) {
2815                 int numa_node = dev_to_node(&adapter->pdev->dev);
2816
2817                 aic = &adapter->aic_obj[i];
2818                 eqo->adapter = adapter;
2819                 eqo->idx = i;
2820                 aic->max_eqd = BE_MAX_EQD;
2821                 aic->enable = true;
2822
2823                 eq = &eqo->q;
2824                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2825                                     sizeof(struct be_eq_entry));
2826                 if (rc)
2827                         return rc;
2828
2829                 rc = be_cmd_eq_create(adapter, eqo);
2830                 if (rc)
2831                         return rc;
2832
2833                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2834                         return -ENOMEM;
2835                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2836                                 eqo->affinity_mask);
2837                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2838                                BE_NAPI_WEIGHT);
2839         }
2840         return 0;
2841 }
2842
2843 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2844 {
2845         struct be_queue_info *q;
2846
2847         q = &adapter->mcc_obj.q;
2848         if (q->created)
2849                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2850         be_queue_free(adapter, q);
2851
2852         q = &adapter->mcc_obj.cq;
2853         if (q->created)
2854                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2855         be_queue_free(adapter, q);
2856 }
2857
2858 /* Must be called only after TX qs are created as MCC shares TX EQ */
2859 static int be_mcc_queues_create(struct be_adapter *adapter)
2860 {
2861         struct be_queue_info *q, *cq;
2862
2863         cq = &adapter->mcc_obj.cq;
2864         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2865                            sizeof(struct be_mcc_compl)))
2866                 goto err;
2867
2868         /* Use the default EQ for MCC completions */
2869         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2870                 goto mcc_cq_free;
2871
2872         q = &adapter->mcc_obj.q;
2873         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2874                 goto mcc_cq_destroy;
2875
2876         if (be_cmd_mccq_create(adapter, q, cq))
2877                 goto mcc_q_free;
2878
2879         return 0;
2880
2881 mcc_q_free:
2882         be_queue_free(adapter, q);
2883 mcc_cq_destroy:
2884         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2885 mcc_cq_free:
2886         be_queue_free(adapter, cq);
2887 err:
2888         return -1;
2889 }
2890
2891 static void be_tx_queues_destroy(struct be_adapter *adapter)
2892 {
2893         struct be_queue_info *q;
2894         struct be_tx_obj *txo;
2895         u8 i;
2896
2897         for_all_tx_queues(adapter, txo, i) {
2898                 q = &txo->q;
2899                 if (q->created)
2900                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2901                 be_queue_free(adapter, q);
2902
2903                 q = &txo->cq;
2904                 if (q->created)
2905                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2906                 be_queue_free(adapter, q);
2907         }
2908 }
2909
2910 static int be_tx_qs_create(struct be_adapter *adapter)
2911 {
2912         struct be_queue_info *cq;
2913         struct be_tx_obj *txo;
2914         struct be_eq_obj *eqo;
2915         int status, i;
2916
2917         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2918
2919         for_all_tx_queues(adapter, txo, i) {
2920                 cq = &txo->cq;
2921                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2922                                         sizeof(struct be_eth_tx_compl));
2923                 if (status)
2924                         return status;
2925
2926                 u64_stats_init(&txo->stats.sync);
2927                 u64_stats_init(&txo->stats.sync_compl);
2928
2929                 /* If num_evt_qs is less than num_tx_qs, then more than
2930                  * one txq share an eq
2931                  */
2932                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2933                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2934                 if (status)
2935                         return status;
2936
2937                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2938                                         sizeof(struct be_eth_wrb));
2939                 if (status)
2940                         return status;
2941
2942                 status = be_cmd_txq_create(adapter, txo);
2943                 if (status)
2944                         return status;
2945
2946                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2947                                     eqo->idx);
2948         }
2949
2950         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2951                  adapter->num_tx_qs);
2952         return 0;
2953 }
2954
2955 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2956 {
2957         struct be_queue_info *q;
2958         struct be_rx_obj *rxo;
2959         int i;
2960
2961         for_all_rx_queues(adapter, rxo, i) {
2962                 q = &rxo->cq;
2963                 if (q->created)
2964                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2965                 be_queue_free(adapter, q);
2966         }
2967 }
2968
2969 static int be_rx_cqs_create(struct be_adapter *adapter)
2970 {
2971         struct be_queue_info *eq, *cq;
2972         struct be_rx_obj *rxo;
2973         int rc, i;
2974
2975         adapter->num_rss_qs =
2976                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2977
2978         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2979         if (adapter->num_rss_qs < 2)
2980                 adapter->num_rss_qs = 0;
2981
2982         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2983
2984         /* When the interface is not capable of RSS rings (and there is no
2985          * need to create a default RXQ) we'll still need one RXQ
2986          */
2987         if (adapter->num_rx_qs == 0)
2988                 adapter->num_rx_qs = 1;
2989
2990         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2991         for_all_rx_queues(adapter, rxo, i) {
2992                 rxo->adapter = adapter;
2993                 cq = &rxo->cq;
2994                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2995                                     sizeof(struct be_eth_rx_compl));
2996                 if (rc)
2997                         return rc;
2998
2999                 u64_stats_init(&rxo->stats.sync);
3000                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3001                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3002                 if (rc)
3003                         return rc;
3004         }
3005
3006         dev_info(&adapter->pdev->dev,
3007                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3008         return 0;
3009 }
3010
3011 static irqreturn_t be_intx(int irq, void *dev)
3012 {
3013         struct be_eq_obj *eqo = dev;
3014         struct be_adapter *adapter = eqo->adapter;
3015         int num_evts = 0;
3016
3017         /* IRQ is not expected when NAPI is scheduled as the EQ
3018          * will not be armed.
3019          * But, this can happen on Lancer INTx where it takes
3020          * a while to de-assert INTx or in BE2 where occasionaly
3021          * an interrupt may be raised even when EQ is unarmed.
3022          * If NAPI is already scheduled, then counting & notifying
3023          * events will orphan them.
3024          */
3025         if (napi_schedule_prep(&eqo->napi)) {
3026                 num_evts = events_get(eqo);
3027                 __napi_schedule(&eqo->napi);
3028                 if (num_evts)
3029                         eqo->spurious_intr = 0;
3030         }
3031         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3032
3033         /* Return IRQ_HANDLED only for the the first spurious intr
3034          * after a valid intr to stop the kernel from branding
3035          * this irq as a bad one!
3036          */
3037         if (num_evts || eqo->spurious_intr++ == 0)
3038                 return IRQ_HANDLED;
3039         else
3040                 return IRQ_NONE;
3041 }
3042
3043 static irqreturn_t be_msix(int irq, void *dev)
3044 {
3045         struct be_eq_obj *eqo = dev;
3046
3047         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3048         napi_schedule(&eqo->napi);
3049         return IRQ_HANDLED;
3050 }
3051
3052 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3053 {
3054         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3055 }
3056
3057 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3058                          int budget, int polling)
3059 {
3060         struct be_adapter *adapter = rxo->adapter;
3061         struct be_queue_info *rx_cq = &rxo->cq;
3062         struct be_rx_compl_info *rxcp;
3063         u32 work_done;
3064         u32 frags_consumed = 0;
3065
3066         for (work_done = 0; work_done < budget; work_done++) {
3067                 rxcp = be_rx_compl_get(rxo);
3068                 if (!rxcp)
3069                         break;
3070
3071                 /* Is it a flush compl that has no data */
3072                 if (unlikely(rxcp->num_rcvd == 0))
3073                         goto loop_continue;
3074
3075                 /* Discard compl with partial DMA Lancer B0 */
3076                 if (unlikely(!rxcp->pkt_size)) {
3077                         be_rx_compl_discard(rxo, rxcp);
3078                         goto loop_continue;
3079                 }
3080
3081                 /* On BE drop pkts that arrive due to imperfect filtering in
3082                  * promiscuous mode on some skews
3083                  */
3084                 if (unlikely(rxcp->port != adapter->port_num &&
3085                              !lancer_chip(adapter))) {
3086                         be_rx_compl_discard(rxo, rxcp);
3087                         goto loop_continue;
3088                 }
3089
3090                 /* Don't do gro when we're busy_polling */
3091                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3092                         be_rx_compl_process_gro(rxo, napi, rxcp);
3093                 else
3094                         be_rx_compl_process(rxo, napi, rxcp);
3095
3096 loop_continue:
3097                 frags_consumed += rxcp->num_rcvd;
3098                 be_rx_stats_update(rxo, rxcp);
3099         }
3100
3101         if (work_done) {
3102                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3103
3104                 /* When an rx-obj gets into post_starved state, just
3105                  * let be_worker do the posting.
3106                  */
3107                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3108                     !rxo->rx_post_starved)
3109                         be_post_rx_frags(rxo, GFP_ATOMIC,
3110                                          max_t(u32, MAX_RX_POST,
3111                                                frags_consumed));
3112         }
3113
3114         return work_done;
3115 }
3116
3117 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3118 {
3119         switch (status) {
3120         case BE_TX_COMP_HDR_PARSE_ERR:
3121                 tx_stats(txo)->tx_hdr_parse_err++;
3122                 break;
3123         case BE_TX_COMP_NDMA_ERR:
3124                 tx_stats(txo)->tx_dma_err++;
3125                 break;
3126         case BE_TX_COMP_ACL_ERR:
3127                 tx_stats(txo)->tx_spoof_check_err++;
3128                 break;
3129         }
3130 }
3131
3132 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3133 {
3134         switch (status) {
3135         case LANCER_TX_COMP_LSO_ERR:
3136                 tx_stats(txo)->tx_tso_err++;
3137                 break;
3138         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3139         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3140                 tx_stats(txo)->tx_spoof_check_err++;
3141                 break;
3142         case LANCER_TX_COMP_QINQ_ERR:
3143                 tx_stats(txo)->tx_qinq_err++;
3144                 break;
3145         case LANCER_TX_COMP_PARITY_ERR:
3146                 tx_stats(txo)->tx_internal_parity_err++;
3147                 break;
3148         case LANCER_TX_COMP_DMA_ERR:
3149                 tx_stats(txo)->tx_dma_err++;
3150                 break;
3151         }
3152 }
3153
3154 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3155                           int idx)
3156 {
3157         int num_wrbs = 0, work_done = 0;
3158         struct be_tx_compl_info *txcp;
3159
3160         while ((txcp = be_tx_compl_get(txo))) {
3161                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3162                 work_done++;
3163
3164                 if (txcp->status) {
3165                         if (lancer_chip(adapter))
3166                                 lancer_update_tx_err(txo, txcp->status);
3167                         else
3168                                 be_update_tx_err(txo, txcp->status);
3169                 }
3170         }
3171
3172         if (work_done) {
3173                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3174                 atomic_sub(num_wrbs, &txo->q.used);
3175
3176                 /* As Tx wrbs have been freed up, wake up netdev queue
3177                  * if it was stopped due to lack of tx wrbs.  */
3178                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3179                     be_can_txq_wake(txo)) {
3180                         netif_wake_subqueue(adapter->netdev, idx);
3181                 }
3182
3183                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3184                 tx_stats(txo)->tx_compl += work_done;
3185                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3186         }
3187 }
3188
3189 #ifdef CONFIG_NET_RX_BUSY_POLL
3190 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3191 {
3192         bool status = true;
3193
3194         spin_lock(&eqo->lock); /* BH is already disabled */
3195         if (eqo->state & BE_EQ_LOCKED) {
3196                 WARN_ON(eqo->state & BE_EQ_NAPI);
3197                 eqo->state |= BE_EQ_NAPI_YIELD;
3198                 status = false;
3199         } else {
3200                 eqo->state = BE_EQ_NAPI;
3201         }
3202         spin_unlock(&eqo->lock);
3203         return status;
3204 }
3205
3206 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3207 {
3208         spin_lock(&eqo->lock); /* BH is already disabled */
3209
3210         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3211         eqo->state = BE_EQ_IDLE;
3212
3213         spin_unlock(&eqo->lock);
3214 }
3215
3216 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3217 {
3218         bool status = true;
3219
3220         spin_lock_bh(&eqo->lock);
3221         if (eqo->state & BE_EQ_LOCKED) {
3222                 eqo->state |= BE_EQ_POLL_YIELD;
3223                 status = false;
3224         } else {
3225                 eqo->state |= BE_EQ_POLL;
3226         }
3227         spin_unlock_bh(&eqo->lock);
3228         return status;
3229 }
3230
3231 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3232 {
3233         spin_lock_bh(&eqo->lock);
3234
3235         WARN_ON(eqo->state & (BE_EQ_NAPI));
3236         eqo->state = BE_EQ_IDLE;
3237
3238         spin_unlock_bh(&eqo->lock);
3239 }
3240
3241 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3242 {
3243         spin_lock_init(&eqo->lock);
3244         eqo->state = BE_EQ_IDLE;
3245 }
3246
3247 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3248 {
3249         local_bh_disable();
3250
3251         /* It's enough to just acquire napi lock on the eqo to stop
3252          * be_busy_poll() from processing any queueus.
3253          */
3254         while (!be_lock_napi(eqo))
3255                 mdelay(1);
3256
3257         local_bh_enable();
3258 }
3259
3260 #else /* CONFIG_NET_RX_BUSY_POLL */
3261
3262 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3263 {
3264         return true;
3265 }
3266
3267 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3268 {
3269 }
3270
3271 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3272 {
3273         return false;
3274 }
3275
3276 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3277 {
3278 }
3279
3280 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3281 {
3282 }
3283
3284 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287 #endif /* CONFIG_NET_RX_BUSY_POLL */
3288
3289 int be_poll(struct napi_struct *napi, int budget)
3290 {
3291         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3292         struct be_adapter *adapter = eqo->adapter;
3293         int max_work = 0, work, i, num_evts;
3294         struct be_rx_obj *rxo;
3295         struct be_tx_obj *txo;
3296         u32 mult_enc = 0;
3297
3298         num_evts = events_get(eqo);
3299
3300         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3301                 be_process_tx(adapter, txo, i);
3302
3303         if (be_lock_napi(eqo)) {
3304                 /* This loop will iterate twice for EQ0 in which
3305                  * completions of the last RXQ (default one) are also processed
3306                  * For other EQs the loop iterates only once
3307                  */
3308                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3309                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3310                         max_work = max(work, max_work);
3311                 }
3312                 be_unlock_napi(eqo);
3313         } else {
3314                 max_work = budget;
3315         }
3316
3317         if (is_mcc_eqo(eqo))
3318                 be_process_mcc(adapter);
3319
3320         if (max_work < budget) {
3321                 napi_complete(napi);
3322
3323                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324                  * delay via a delay multiplier encoding value
3325                  */
3326                 if (skyhawk_chip(adapter))
3327                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330                              mult_enc);
3331         } else {
3332                 /* As we'll continue in polling mode, count and clear events */
3333                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334         }
3335         return max_work;
3336 }
3337
3338 #ifdef CONFIG_NET_RX_BUSY_POLL
3339 static int be_busy_poll(struct napi_struct *napi)
3340 {
3341         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3342         struct be_adapter *adapter = eqo->adapter;
3343         struct be_rx_obj *rxo;
3344         int i, work = 0;
3345
3346         if (!be_lock_busy_poll(eqo))
3347                 return LL_FLUSH_BUSY;
3348
3349         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3350                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3351                 if (work)
3352                         break;
3353         }
3354
3355         be_unlock_busy_poll(eqo);
3356         return work;
3357 }
3358 #endif
3359
3360 void be_detect_error(struct be_adapter *adapter)
3361 {
3362         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3363         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3364         u32 i;
3365         struct device *dev = &adapter->pdev->dev;
3366
3367         if (be_check_error(adapter, BE_ERROR_HW))
3368                 return;
3369
3370         if (lancer_chip(adapter)) {
3371                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3372                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3373                         be_set_error(adapter, BE_ERROR_UE);
3374                         sliport_err1 = ioread32(adapter->db +
3375                                                 SLIPORT_ERROR1_OFFSET);
3376                         sliport_err2 = ioread32(adapter->db +
3377                                                 SLIPORT_ERROR2_OFFSET);
3378                         /* Do not log error messages if its a FW reset */
3379                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3380                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3381                                 dev_info(dev, "Firmware update in progress\n");
3382                         } else {
3383                                 dev_err(dev, "Error detected in the card\n");
3384                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3385                                         sliport_status);
3386                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3387                                         sliport_err1);
3388                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3389                                         sliport_err2);
3390                         }
3391                 }
3392         } else {
3393                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3394                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3395                 ue_lo_mask = ioread32(adapter->pcicfg +
3396                                       PCICFG_UE_STATUS_LOW_MASK);
3397                 ue_hi_mask = ioread32(adapter->pcicfg +
3398                                       PCICFG_UE_STATUS_HI_MASK);
3399
3400                 ue_lo = (ue_lo & ~ue_lo_mask);
3401                 ue_hi = (ue_hi & ~ue_hi_mask);
3402
3403                 /* On certain platforms BE hardware can indicate spurious UEs.
3404                  * Allow HW to stop working completely in case of a real UE.
3405                  * Hence not setting the hw_error for UE detection.
3406                  */
3407
3408                 if (ue_lo || ue_hi) {
3409                         dev_err(dev, "Error detected in the adapter");
3410                         if (skyhawk_chip(adapter))
3411                                 be_set_error(adapter, BE_ERROR_UE);
3412
3413                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3414                                 if (ue_lo & 1)
3415                                         dev_err(dev, "UE: %s bit set\n",
3416                                                 ue_status_low_desc[i]);
3417                         }
3418                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3419                                 if (ue_hi & 1)
3420                                         dev_err(dev, "UE: %s bit set\n",
3421                                                 ue_status_hi_desc[i]);
3422                         }
3423                 }
3424         }
3425 }
3426
3427 static void be_msix_disable(struct be_adapter *adapter)
3428 {
3429         if (msix_enabled(adapter)) {
3430                 pci_disable_msix(adapter->pdev);
3431                 adapter->num_msix_vec = 0;
3432                 adapter->num_msix_roce_vec = 0;
3433         }
3434 }
3435
3436 static int be_msix_enable(struct be_adapter *adapter)
3437 {
3438         unsigned int i, max_roce_eqs;
3439         struct device *dev = &adapter->pdev->dev;
3440         int num_vec;
3441
3442         /* If RoCE is supported, program the max number of vectors that
3443          * could be used for NIC and RoCE, else, just program the number
3444          * we'll use initially.
3445          */
3446         if (be_roce_supported(adapter)) {
3447                 max_roce_eqs =
3448                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3449                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3450                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3451         } else {
3452                 num_vec = max(adapter->cfg_num_rx_irqs,
3453                               adapter->cfg_num_tx_irqs);
3454         }
3455
3456         for (i = 0; i < num_vec; i++)
3457                 adapter->msix_entries[i].entry = i;
3458
3459         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3460                                         MIN_MSIX_VECTORS, num_vec);
3461         if (num_vec < 0)
3462                 goto fail;
3463
3464         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3465                 adapter->num_msix_roce_vec = num_vec / 2;
3466                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3467                          adapter->num_msix_roce_vec);
3468         }
3469
3470         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3471
3472         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3473                  adapter->num_msix_vec);
3474         return 0;
3475
3476 fail:
3477         dev_warn(dev, "MSIx enable failed\n");
3478
3479         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3480         if (be_virtfn(adapter))
3481                 return num_vec;
3482         return 0;
3483 }
3484
3485 static inline int be_msix_vec_get(struct be_adapter *adapter,
3486                                   struct be_eq_obj *eqo)
3487 {
3488         return adapter->msix_entries[eqo->msix_idx].vector;
3489 }
3490
3491 static int be_msix_register(struct be_adapter *adapter)
3492 {
3493         struct net_device *netdev = adapter->netdev;
3494         struct be_eq_obj *eqo;
3495         int status, i, vec;
3496
3497         for_all_evt_queues(adapter, eqo, i) {
3498                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3499                 vec = be_msix_vec_get(adapter, eqo);
3500                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3501                 if (status)
3502                         goto err_msix;
3503
3504                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3505         }
3506
3507         return 0;
3508 err_msix:
3509         for (i--; i >= 0; i--) {
3510                 eqo = &adapter->eq_obj[i];
3511                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3512         }
3513         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3514                  status);
3515         be_msix_disable(adapter);
3516         return status;
3517 }
3518
3519 static int be_irq_register(struct be_adapter *adapter)
3520 {
3521         struct net_device *netdev = adapter->netdev;
3522         int status;
3523
3524         if (msix_enabled(adapter)) {
3525                 status = be_msix_register(adapter);
3526                 if (status == 0)
3527                         goto done;
3528                 /* INTx is not supported for VF */
3529                 if (be_virtfn(adapter))
3530                         return status;
3531         }
3532
3533         /* INTx: only the first EQ is used */
3534         netdev->irq = adapter->pdev->irq;
3535         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3536                              &adapter->eq_obj[0]);
3537         if (status) {
3538                 dev_err(&adapter->pdev->dev,
3539                         "INTx request IRQ failed - err %d\n", status);
3540                 return status;
3541         }
3542 done:
3543         adapter->isr_registered = true;
3544         return 0;
3545 }
3546
3547 static void be_irq_unregister(struct be_adapter *adapter)
3548 {
3549         struct net_device *netdev = adapter->netdev;
3550         struct be_eq_obj *eqo;
3551         int i, vec;
3552
3553         if (!adapter->isr_registered)
3554                 return;
3555
3556         /* INTx */
3557         if (!msix_enabled(adapter)) {
3558                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3559                 goto done;
3560         }
3561
3562         /* MSIx */
3563         for_all_evt_queues(adapter, eqo, i) {
3564                 vec = be_msix_vec_get(adapter, eqo);
3565                 irq_set_affinity_hint(vec, NULL);
3566                 free_irq(vec, eqo);
3567         }
3568
3569 done:
3570         adapter->isr_registered = false;
3571 }
3572
3573 static void be_rx_qs_destroy(struct be_adapter *adapter)
3574 {
3575         struct rss_info *rss = &adapter->rss_info;
3576         struct be_queue_info *q;
3577         struct be_rx_obj *rxo;
3578         int i;
3579
3580         for_all_rx_queues(adapter, rxo, i) {
3581                 q = &rxo->q;
3582                 if (q->created) {
3583                         /* If RXQs are destroyed while in an "out of buffer"
3584                          * state, there is a possibility of an HW stall on
3585                          * Lancer. So, post 64 buffers to each queue to relieve
3586                          * the "out of buffer" condition.
3587                          * Make sure there's space in the RXQ before posting.
3588                          */
3589                         if (lancer_chip(adapter)) {
3590                                 be_rx_cq_clean(rxo);
3591                                 if (atomic_read(&q->used) == 0)
3592                                         be_post_rx_frags(rxo, GFP_KERNEL,
3593                                                          MAX_RX_POST);
3594                         }
3595
3596                         be_cmd_rxq_destroy(adapter, q);
3597                         be_rx_cq_clean(rxo);
3598                         be_rxq_clean(rxo);
3599                 }
3600                 be_queue_free(adapter, q);
3601         }
3602
3603         if (rss->rss_flags) {
3604                 rss->rss_flags = RSS_ENABLE_NONE;
3605                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3606                                   128, rss->rss_hkey);
3607         }
3608 }
3609
3610 static void be_disable_if_filters(struct be_adapter *adapter)
3611 {
3612         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3613         be_clear_uc_list(adapter);
3614         be_clear_mc_list(adapter);
3615
3616         /* The IFACE flags are enabled in the open path and cleared
3617          * in the close path. When a VF gets detached from the host and
3618          * assigned to a VM the following happens:
3619          *      - VF's IFACE flags get cleared in the detach path
3620          *      - IFACE create is issued by the VF in the attach path
3621          * Due to a bug in the BE3/Skyhawk-R FW
3622          * (Lancer FW doesn't have the bug), the IFACE capability flags
3623          * specified along with the IFACE create cmd issued by a VF are not
3624          * honoured by FW.  As a consequence, if a *new* driver
3625          * (that enables/disables IFACE flags in open/close)
3626          * is loaded in the host and an *old* driver is * used by a VM/VF,
3627          * the IFACE gets created *without* the needed flags.
3628          * To avoid this, disable RX-filter flags only for Lancer.
3629          */
3630         if (lancer_chip(adapter)) {
3631                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3632                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3633         }
3634 }
3635
3636 static int be_close(struct net_device *netdev)
3637 {
3638         struct be_adapter *adapter = netdev_priv(netdev);
3639         struct be_eq_obj *eqo;
3640         int i;
3641
3642         /* This protection is needed as be_close() may be called even when the
3643          * adapter is in cleared state (after eeh perm failure)
3644          */
3645         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3646                 return 0;
3647
3648         /* Before attempting cleanup ensure all the pending cmds in the
3649          * config_wq have finished execution
3650          */
3651         flush_workqueue(be_wq);
3652
3653         be_disable_if_filters(adapter);
3654
3655         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3656                 for_all_evt_queues(adapter, eqo, i) {
3657                         napi_disable(&eqo->napi);
3658                         be_disable_busy_poll(eqo);
3659                 }
3660                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3661         }
3662
3663         be_async_mcc_disable(adapter);
3664
3665         /* Wait for all pending tx completions to arrive so that
3666          * all tx skbs are freed.
3667          */
3668         netif_tx_disable(netdev);
3669         be_tx_compl_clean(adapter);
3670
3671         be_rx_qs_destroy(adapter);
3672
3673         for_all_evt_queues(adapter, eqo, i) {
3674                 if (msix_enabled(adapter))
3675                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3676                 else
3677                         synchronize_irq(netdev->irq);
3678                 be_eq_clean(eqo);
3679         }
3680
3681         be_irq_unregister(adapter);
3682
3683         return 0;
3684 }
3685
3686 static int be_rx_qs_create(struct be_adapter *adapter)
3687 {
3688         struct rss_info *rss = &adapter->rss_info;
3689         u8 rss_key[RSS_HASH_KEY_LEN];
3690         struct be_rx_obj *rxo;
3691         int rc, i, j;
3692
3693         for_all_rx_queues(adapter, rxo, i) {
3694                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3695                                     sizeof(struct be_eth_rx_d));
3696                 if (rc)
3697                         return rc;
3698         }
3699
3700         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3701                 rxo = default_rxo(adapter);
3702                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3703                                        rx_frag_size, adapter->if_handle,
3704                                        false, &rxo->rss_id);
3705                 if (rc)
3706                         return rc;
3707         }
3708
3709         for_all_rss_queues(adapter, rxo, i) {
3710                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3711                                        rx_frag_size, adapter->if_handle,
3712                                        true, &rxo->rss_id);
3713                 if (rc)
3714                         return rc;
3715         }
3716
3717         if (be_multi_rxq(adapter)) {
3718                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3719                         for_all_rss_queues(adapter, rxo, i) {
3720                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3721                                         break;
3722                                 rss->rsstable[j + i] = rxo->rss_id;
3723                                 rss->rss_queue[j + i] = i;
3724                         }
3725                 }
3726                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3727                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3728
3729                 if (!BEx_chip(adapter))
3730                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3731                                 RSS_ENABLE_UDP_IPV6;
3732
3733                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3734                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3735                                        RSS_INDIR_TABLE_LEN, rss_key);
3736                 if (rc) {
3737                         rss->rss_flags = RSS_ENABLE_NONE;
3738                         return rc;
3739                 }
3740
3741                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3742         } else {
3743                 /* Disable RSS, if only default RX Q is created */
3744                 rss->rss_flags = RSS_ENABLE_NONE;
3745         }
3746
3747
3748         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3749          * which is a queue empty condition
3750          */
3751         for_all_rx_queues(adapter, rxo, i)
3752                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3753
3754         return 0;
3755 }
3756
3757 static int be_enable_if_filters(struct be_adapter *adapter)
3758 {
3759         int status;
3760
3761         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3762         if (status)
3763                 return status;
3764
3765         /* For BE3 VFs, the PF programs the initial MAC address */
3766         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3767                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3768                 if (status)
3769                         return status;
3770                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3771         }
3772
3773         if (adapter->vlans_added)
3774                 be_vid_config(adapter);
3775
3776         __be_set_rx_mode(adapter);
3777
3778         return 0;
3779 }
3780
3781 static int be_open(struct net_device *netdev)
3782 {
3783         struct be_adapter *adapter = netdev_priv(netdev);
3784         struct be_eq_obj *eqo;
3785         struct be_rx_obj *rxo;
3786         struct be_tx_obj *txo;
3787         u8 link_status;
3788         int status, i;
3789
3790         status = be_rx_qs_create(adapter);
3791         if (status)
3792                 goto err;
3793
3794         status = be_enable_if_filters(adapter);
3795         if (status)
3796                 goto err;
3797
3798         status = be_irq_register(adapter);
3799         if (status)
3800                 goto err;
3801
3802         for_all_rx_queues(adapter, rxo, i)
3803                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3804
3805         for_all_tx_queues(adapter, txo, i)
3806                 be_cq_notify(adapter, txo->cq.id, true, 0);
3807
3808         be_async_mcc_enable(adapter);
3809
3810         for_all_evt_queues(adapter, eqo, i) {
3811                 napi_enable(&eqo->napi);
3812                 be_enable_busy_poll(eqo);
3813                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3814         }
3815         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3816
3817         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3818         if (!status)
3819                 be_link_status_update(adapter, link_status);
3820
3821         netif_tx_start_all_queues(netdev);
3822         if (skyhawk_chip(adapter))
3823                 udp_tunnel_get_rx_info(netdev);
3824
3825         return 0;
3826 err:
3827         be_close(adapter->netdev);
3828         return -EIO;
3829 }
3830
3831 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3832 {
3833         u32 addr;
3834
3835         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3836
3837         mac[5] = (u8)(addr & 0xFF);
3838         mac[4] = (u8)((addr >> 8) & 0xFF);
3839         mac[3] = (u8)((addr >> 16) & 0xFF);
3840         /* Use the OUI from the current MAC address */
3841         memcpy(mac, adapter->netdev->dev_addr, 3);
3842 }
3843
3844 /*
3845  * Generate a seed MAC address from the PF MAC Address using jhash.
3846  * MAC Address for VFs are assigned incrementally starting from the seed.
3847  * These addresses are programmed in the ASIC by the PF and the VF driver
3848  * queries for the MAC address during its probe.
3849  */
3850 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3851 {
3852         u32 vf;
3853         int status = 0;
3854         u8 mac[ETH_ALEN];
3855         struct be_vf_cfg *vf_cfg;
3856
3857         be_vf_eth_addr_generate(adapter, mac);
3858
3859         for_all_vfs(adapter, vf_cfg, vf) {
3860                 if (BEx_chip(adapter))
3861                         status = be_cmd_pmac_add(adapter, mac,
3862                                                  vf_cfg->if_handle,
3863                                                  &vf_cfg->pmac_id, vf + 1);
3864                 else
3865                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3866                                                 vf + 1);
3867
3868                 if (status)
3869                         dev_err(&adapter->pdev->dev,
3870                                 "Mac address assignment failed for VF %d\n",
3871                                 vf);
3872                 else
3873                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3874
3875                 mac[5] += 1;
3876         }
3877         return status;
3878 }
3879
3880 static int be_vfs_mac_query(struct be_adapter *adapter)
3881 {
3882         int status, vf;
3883         u8 mac[ETH_ALEN];
3884         struct be_vf_cfg *vf_cfg;
3885
3886         for_all_vfs(adapter, vf_cfg, vf) {
3887                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3888                                                mac, vf_cfg->if_handle,
3889                                                false, vf+1);
3890                 if (status)
3891                         return status;
3892                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3893         }
3894         return 0;
3895 }
3896
3897 static void be_vf_clear(struct be_adapter *adapter)
3898 {
3899         struct be_vf_cfg *vf_cfg;
3900         u32 vf;
3901
3902         if (pci_vfs_assigned(adapter->pdev)) {
3903                 dev_warn(&adapter->pdev->dev,
3904                          "VFs are assigned to VMs: not disabling VFs\n");
3905                 goto done;
3906         }
3907
3908         pci_disable_sriov(adapter->pdev);
3909
3910         for_all_vfs(adapter, vf_cfg, vf) {
3911                 if (BEx_chip(adapter))
3912                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3913                                         vf_cfg->pmac_id, vf + 1);
3914                 else
3915                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3916                                        vf + 1);
3917
3918                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3919         }
3920
3921         if (BE3_chip(adapter))
3922                 be_cmd_set_hsw_config(adapter, 0, 0,
3923                                       adapter->if_handle,
3924                                       PORT_FWD_TYPE_PASSTHRU, 0);
3925 done:
3926         kfree(adapter->vf_cfg);
3927         adapter->num_vfs = 0;
3928         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3929 }
3930
3931 static void be_clear_queues(struct be_adapter *adapter)
3932 {
3933         be_mcc_queues_destroy(adapter);
3934         be_rx_cqs_destroy(adapter);
3935         be_tx_queues_destroy(adapter);
3936         be_evt_queues_destroy(adapter);
3937 }
3938
3939 static void be_cancel_worker(struct be_adapter *adapter)
3940 {
3941         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3942                 cancel_delayed_work_sync(&adapter->work);
3943                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3944         }
3945 }
3946
3947 static void be_cancel_err_detection(struct be_adapter *adapter)
3948 {
3949         struct be_error_recovery *err_rec = &adapter->error_recovery;
3950
3951         if (!be_err_recovery_workq)
3952                 return;
3953
3954         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3955                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3956                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3957         }
3958 }
3959
3960 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3961 {
3962         struct net_device *netdev = adapter->netdev;
3963
3964         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3965                 be_cmd_manage_iface(adapter, adapter->if_handle,
3966                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3967
3968         if (adapter->vxlan_port)
3969                 be_cmd_set_vxlan_port(adapter, 0);
3970
3971         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3972         adapter->vxlan_port = 0;
3973
3974         netdev->hw_enc_features = 0;
3975         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3976         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3977 }
3978
3979 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3980                                 struct be_resources *vft_res)
3981 {
3982         struct be_resources res = adapter->pool_res;
3983         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3984         struct be_resources res_mod = {0};
3985         u16 num_vf_qs = 1;
3986
3987         /* Distribute the queue resources among the PF and it's VFs */
3988         if (num_vfs) {
3989                 /* Divide the rx queues evenly among the VFs and the PF, capped
3990                  * at VF-EQ-count. Any remainder queues belong to the PF.
3991                  */
3992                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3993                                 res.max_rss_qs / (num_vfs + 1));
3994
3995                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3996                  * RSS Tables per port. Provide RSS on VFs, only if number of
3997                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3998                  */
3999                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4000                         num_vf_qs = 1;
4001         }
4002
4003         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4004          * which are modifiable using SET_PROFILE_CONFIG cmd.
4005          */
4006         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4007                                   RESOURCE_MODIFIABLE, 0);
4008
4009         /* If RSS IFACE capability flags are modifiable for a VF, set the
4010          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4011          * more than 1 RSSQ is available for a VF.
4012          * Otherwise, provision only 1 queue pair for VF.
4013          */
4014         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4015                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4016                 if (num_vf_qs > 1) {
4017                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4018                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4019                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4020                 } else {
4021                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4022                                              BE_IF_FLAGS_DEFQ_RSS);
4023                 }
4024         } else {
4025                 num_vf_qs = 1;
4026         }
4027
4028         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4029                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4030                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4031         }
4032
4033         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4034         vft_res->max_rx_qs = num_vf_qs;
4035         vft_res->max_rss_qs = num_vf_qs;
4036         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4037         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4038
4039         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4040          * among the PF and it's VFs, if the fields are changeable
4041          */
4042         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4043                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4044
4045         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4046                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4047
4048         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4049                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4050
4051         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4052                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4053 }
4054
4055 static void be_if_destroy(struct be_adapter *adapter)
4056 {
4057         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4058
4059         kfree(adapter->pmac_id);
4060         adapter->pmac_id = NULL;
4061
4062         kfree(adapter->mc_list);
4063         adapter->mc_list = NULL;
4064
4065         kfree(adapter->uc_list);
4066         adapter->uc_list = NULL;
4067 }
4068
4069 static int be_clear(struct be_adapter *adapter)
4070 {
4071         struct pci_dev *pdev = adapter->pdev;
4072         struct  be_resources vft_res = {0};
4073
4074         be_cancel_worker(adapter);
4075
4076         flush_workqueue(be_wq);
4077
4078         if (sriov_enabled(adapter))
4079                 be_vf_clear(adapter);
4080
4081         /* Re-configure FW to distribute resources evenly across max-supported
4082          * number of VFs, only when VFs are not already enabled.
4083          */
4084         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4085             !pci_vfs_assigned(pdev)) {
4086                 be_calculate_vf_res(adapter,
4087                                     pci_sriov_get_totalvfs(pdev),
4088                                     &vft_res);
4089                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4090                                         pci_sriov_get_totalvfs(pdev),
4091                                         &vft_res);
4092         }
4093
4094         be_disable_vxlan_offloads(adapter);
4095
4096         be_if_destroy(adapter);
4097
4098         be_clear_queues(adapter);
4099
4100         be_msix_disable(adapter);
4101         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4102         return 0;
4103 }
4104
4105 static int be_vfs_if_create(struct be_adapter *adapter)
4106 {
4107         struct be_resources res = {0};
4108         u32 cap_flags, en_flags, vf;
4109         struct be_vf_cfg *vf_cfg;
4110         int status;
4111
4112         /* If a FW profile exists, then cap_flags are updated */
4113         cap_flags = BE_VF_IF_EN_FLAGS;
4114
4115         for_all_vfs(adapter, vf_cfg, vf) {
4116                 if (!BE3_chip(adapter)) {
4117                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4118                                                            ACTIVE_PROFILE_TYPE,
4119                                                            RESOURCE_LIMITS,
4120                                                            vf + 1);
4121                         if (!status) {
4122                                 cap_flags = res.if_cap_flags;
4123                                 /* Prevent VFs from enabling VLAN promiscuous
4124                                  * mode
4125                                  */
4126                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4127                         }
4128                 }
4129
4130                 /* PF should enable IF flags during proxy if_create call */
4131                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4132                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4133                                           &vf_cfg->if_handle, vf + 1);
4134                 if (status)
4135                         return status;
4136         }
4137
4138         return 0;
4139 }
4140
4141 static int be_vf_setup_init(struct be_adapter *adapter)
4142 {
4143         struct be_vf_cfg *vf_cfg;
4144         int vf;
4145
4146         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4147                                   GFP_KERNEL);
4148         if (!adapter->vf_cfg)
4149                 return -ENOMEM;
4150
4151         for_all_vfs(adapter, vf_cfg, vf) {
4152                 vf_cfg->if_handle = -1;
4153                 vf_cfg->pmac_id = -1;
4154         }
4155         return 0;
4156 }
4157
4158 static int be_vf_setup(struct be_adapter *adapter)
4159 {
4160         struct device *dev = &adapter->pdev->dev;
4161         struct be_vf_cfg *vf_cfg;
4162         int status, old_vfs, vf;
4163         bool spoofchk;
4164
4165         old_vfs = pci_num_vf(adapter->pdev);
4166
4167         status = be_vf_setup_init(adapter);
4168         if (status)
4169                 goto err;
4170
4171         if (old_vfs) {
4172                 for_all_vfs(adapter, vf_cfg, vf) {
4173                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4174                         if (status)
4175                                 goto err;
4176                 }
4177
4178                 status = be_vfs_mac_query(adapter);
4179                 if (status)
4180                         goto err;
4181         } else {
4182                 status = be_vfs_if_create(adapter);
4183                 if (status)
4184                         goto err;
4185
4186                 status = be_vf_eth_addr_config(adapter);
4187                 if (status)
4188                         goto err;
4189         }
4190
4191         for_all_vfs(adapter, vf_cfg, vf) {
4192                 /* Allow VFs to programs MAC/VLAN filters */
4193                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4194                                                   vf + 1);
4195                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4196                         status = be_cmd_set_fn_privileges(adapter,
4197                                                           vf_cfg->privileges |
4198                                                           BE_PRIV_FILTMGMT,
4199                                                           vf + 1);
4200                         if (!status) {
4201                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4202                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4203                                          vf);
4204                         }
4205                 }
4206
4207                 /* Allow full available bandwidth */
4208                 if (!old_vfs)
4209                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4210
4211                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4212                                                vf_cfg->if_handle, NULL,
4213                                                &spoofchk);
4214                 if (!status)
4215                         vf_cfg->spoofchk = spoofchk;
4216
4217                 if (!old_vfs) {
4218                         be_cmd_enable_vf(adapter, vf + 1);
4219                         be_cmd_set_logical_link_config(adapter,
4220                                                        IFLA_VF_LINK_STATE_AUTO,
4221                                                        vf+1);
4222                 }
4223         }
4224
4225         if (!old_vfs) {
4226                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4227                 if (status) {
4228                         dev_err(dev, "SRIOV enable failed\n");
4229                         adapter->num_vfs = 0;
4230                         goto err;
4231                 }
4232         }
4233
4234         if (BE3_chip(adapter)) {
4235                 /* On BE3, enable VEB only when SRIOV is enabled */
4236                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4237                                                adapter->if_handle,
4238                                                PORT_FWD_TYPE_VEB, 0);
4239                 if (status)
4240                         goto err;
4241         }
4242
4243         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4244         return 0;
4245 err:
4246         dev_err(dev, "VF setup failed\n");
4247         be_vf_clear(adapter);
4248         return status;
4249 }
4250
4251 /* Converting function_mode bits on BE3 to SH mc_type enums */
4252
4253 static u8 be_convert_mc_type(u32 function_mode)
4254 {
4255         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4256                 return vNIC1;
4257         else if (function_mode & QNQ_MODE)
4258                 return FLEX10;
4259         else if (function_mode & VNIC_MODE)
4260                 return vNIC2;
4261         else if (function_mode & UMC_ENABLED)
4262                 return UMC;
4263         else
4264                 return MC_NONE;
4265 }
4266
4267 /* On BE2/BE3 FW does not suggest the supported limits */
4268 static void BEx_get_resources(struct be_adapter *adapter,
4269                               struct be_resources *res)
4270 {
4271         bool use_sriov = adapter->num_vfs ? 1 : 0;
4272
4273         if (be_physfn(adapter))
4274                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4275         else
4276                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4277
4278         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4279
4280         if (be_is_mc(adapter)) {
4281                 /* Assuming that there are 4 channels per port,
4282                  * when multi-channel is enabled
4283                  */
4284                 if (be_is_qnq_mode(adapter))
4285                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4286                 else
4287                         /* In a non-qnq multichannel mode, the pvid
4288                          * takes up one vlan entry
4289                          */
4290                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4291         } else {
4292                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4293         }
4294
4295         res->max_mcast_mac = BE_MAX_MC;
4296
4297         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4298          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4299          *    *only* if it is RSS-capable.
4300          */
4301         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4302             be_virtfn(adapter) ||
4303             (be_is_mc(adapter) &&
4304              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4305                 res->max_tx_qs = 1;
4306         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4307                 struct be_resources super_nic_res = {0};
4308
4309                 /* On a SuperNIC profile, the driver needs to use the
4310                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4311                  */
4312                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4313                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4314                                           0);
4315                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4316                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4317         } else {
4318                 res->max_tx_qs = BE3_MAX_TX_QS;
4319         }
4320
4321         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4322             !use_sriov && be_physfn(adapter))
4323                 res->max_rss_qs = (adapter->be3_native) ?
4324                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4325         res->max_rx_qs = res->max_rss_qs + 1;
4326
4327         if (be_physfn(adapter))
4328                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4329                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4330         else
4331                 res->max_evt_qs = 1;
4332
4333         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4334         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4335         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4336                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4337 }
4338
4339 static void be_setup_init(struct be_adapter *adapter)
4340 {
4341         adapter->vlan_prio_bmap = 0xff;
4342         adapter->phy.link_speed = -1;
4343         adapter->if_handle = -1;
4344         adapter->be3_native = false;
4345         adapter->if_flags = 0;
4346         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4347         if (be_physfn(adapter))
4348                 adapter->cmd_privileges = MAX_PRIVILEGES;
4349         else
4350                 adapter->cmd_privileges = MIN_PRIVILEGES;
4351 }
4352
4353 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4354  * However, this HW limitation is not exposed to the host via any SLI cmd.
4355  * As a result, in the case of SRIOV and in particular multi-partition configs
4356  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4357  * for distribution between the VFs. This self-imposed limit will determine the
4358  * no: of VFs for which RSS can be enabled.
4359  */
4360 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4361 {
4362         struct be_port_resources port_res = {0};
4363         u8 rss_tables_on_port;
4364         u16 max_vfs = be_max_vfs(adapter);
4365
4366         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4367                                   RESOURCE_LIMITS, 0);
4368
4369         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4370
4371         /* Each PF Pool's RSS Tables limit =
4372          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4373          */
4374         adapter->pool_res.max_rss_tables =
4375                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4376 }
4377
4378 static int be_get_sriov_config(struct be_adapter *adapter)
4379 {
4380         struct be_resources res = {0};
4381         int max_vfs, old_vfs;
4382
4383         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4384                                   RESOURCE_LIMITS, 0);
4385
4386         /* Some old versions of BE3 FW don't report max_vfs value */
4387         if (BE3_chip(adapter) && !res.max_vfs) {
4388                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4389                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4390         }
4391
4392         adapter->pool_res = res;
4393
4394         /* If during previous unload of the driver, the VFs were not disabled,
4395          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4396          * Instead use the TotalVFs value stored in the pci-dev struct.
4397          */
4398         old_vfs = pci_num_vf(adapter->pdev);
4399         if (old_vfs) {
4400                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4401                          old_vfs);
4402
4403                 adapter->pool_res.max_vfs =
4404                         pci_sriov_get_totalvfs(adapter->pdev);
4405                 adapter->num_vfs = old_vfs;
4406         }
4407
4408         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4409                 be_calculate_pf_pool_rss_tables(adapter);
4410                 dev_info(&adapter->pdev->dev,
4411                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4412                          be_max_pf_pool_rss_tables(adapter));
4413         }
4414         return 0;
4415 }
4416
4417 static void be_alloc_sriov_res(struct be_adapter *adapter)
4418 {
4419         int old_vfs = pci_num_vf(adapter->pdev);
4420         struct  be_resources vft_res = {0};
4421         int status;
4422
4423         be_get_sriov_config(adapter);
4424
4425         if (!old_vfs)
4426                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4427
4428         /* When the HW is in SRIOV capable configuration, the PF-pool
4429          * resources are given to PF during driver load, if there are no
4430          * old VFs. This facility is not available in BE3 FW.
4431          * Also, this is done by FW in Lancer chip.
4432          */
4433         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4434                 be_calculate_vf_res(adapter, 0, &vft_res);
4435                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4436                                                  &vft_res);
4437                 if (status)
4438                         dev_err(&adapter->pdev->dev,
4439                                 "Failed to optimize SRIOV resources\n");
4440         }
4441 }
4442
4443 static int be_get_resources(struct be_adapter *adapter)
4444 {
4445         struct device *dev = &adapter->pdev->dev;
4446         struct be_resources res = {0};
4447         int status;
4448
4449         /* For Lancer, SH etc read per-function resource limits from FW.
4450          * GET_FUNC_CONFIG returns per function guaranteed limits.
4451          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4452          */
4453         if (BEx_chip(adapter)) {
4454                 BEx_get_resources(adapter, &res);
4455         } else {
4456                 status = be_cmd_get_func_config(adapter, &res);
4457                 if (status)
4458                         return status;
4459
4460                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4461                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4462                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4463                         res.max_rss_qs -= 1;
4464         }
4465
4466         /* If RoCE is supported stash away half the EQs for RoCE */
4467         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4468                                 res.max_evt_qs / 2 : res.max_evt_qs;
4469         adapter->res = res;
4470
4471         /* If FW supports RSS default queue, then skip creating non-RSS
4472          * queue for non-IP traffic.
4473          */
4474         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4475                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4476
4477         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4478                  be_max_txqs(adapter), be_max_rxqs(adapter),
4479                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4480                  be_max_vfs(adapter));
4481         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4482                  be_max_uc(adapter), be_max_mc(adapter),
4483                  be_max_vlans(adapter));
4484
4485         /* Ensure RX and TX queues are created in pairs at init time */
4486         adapter->cfg_num_rx_irqs =
4487                                 min_t(u16, netif_get_num_default_rss_queues(),
4488                                       be_max_qp_irqs(adapter));
4489         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4490         return 0;
4491 }
4492
4493 static int be_get_config(struct be_adapter *adapter)
4494 {
4495         int status, level;
4496         u16 profile_id;
4497
4498         status = be_cmd_get_cntl_attributes(adapter);
4499         if (status)
4500                 return status;
4501
4502         status = be_cmd_query_fw_cfg(adapter);
4503         if (status)
4504                 return status;
4505
4506         if (!lancer_chip(adapter) && be_physfn(adapter))
4507                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4508
4509         if (BEx_chip(adapter)) {
4510                 level = be_cmd_get_fw_log_level(adapter);
4511                 adapter->msg_enable =
4512                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4513         }
4514
4515         be_cmd_get_acpi_wol_cap(adapter);
4516         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4517         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4518
4519         be_cmd_query_port_name(adapter);
4520
4521         if (be_physfn(adapter)) {
4522                 status = be_cmd_get_active_profile(adapter, &profile_id);
4523                 if (!status)
4524                         dev_info(&adapter->pdev->dev,
4525                                  "Using profile 0x%x\n", profile_id);
4526         }
4527
4528         return 0;
4529 }
4530
4531 static int be_mac_setup(struct be_adapter *adapter)
4532 {
4533         u8 mac[ETH_ALEN];
4534         int status;
4535
4536         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4537                 status = be_cmd_get_perm_mac(adapter, mac);
4538                 if (status)
4539                         return status;
4540
4541                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4542                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4543         }
4544
4545         return 0;
4546 }
4547
4548 static void be_schedule_worker(struct be_adapter *adapter)
4549 {
4550         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4551         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4552 }
4553
4554 static void be_destroy_err_recovery_workq(void)
4555 {
4556         if (!be_err_recovery_workq)
4557                 return;
4558
4559         flush_workqueue(be_err_recovery_workq);
4560         destroy_workqueue(be_err_recovery_workq);
4561         be_err_recovery_workq = NULL;
4562 }
4563
4564 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4565 {
4566         struct be_error_recovery *err_rec = &adapter->error_recovery;
4567
4568         if (!be_err_recovery_workq)
4569                 return;
4570
4571         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4572                            msecs_to_jiffies(delay));
4573         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4574 }
4575
4576 static int be_setup_queues(struct be_adapter *adapter)
4577 {
4578         struct net_device *netdev = adapter->netdev;
4579         int status;
4580
4581         status = be_evt_queues_create(adapter);
4582         if (status)
4583                 goto err;
4584
4585         status = be_tx_qs_create(adapter);
4586         if (status)
4587                 goto err;
4588
4589         status = be_rx_cqs_create(adapter);
4590         if (status)
4591                 goto err;
4592
4593         status = be_mcc_queues_create(adapter);
4594         if (status)
4595                 goto err;
4596
4597         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4598         if (status)
4599                 goto err;
4600
4601         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4602         if (status)
4603                 goto err;
4604
4605         return 0;
4606 err:
4607         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4608         return status;
4609 }
4610
4611 static int be_if_create(struct be_adapter *adapter)
4612 {
4613         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4614         u32 cap_flags = be_if_cap_flags(adapter);
4615         int status;
4616
4617         /* alloc required memory for other filtering fields */
4618         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4619                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4620         if (!adapter->pmac_id)
4621                 return -ENOMEM;
4622
4623         adapter->mc_list = kcalloc(be_max_mc(adapter),
4624                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4625         if (!adapter->mc_list)
4626                 return -ENOMEM;
4627
4628         adapter->uc_list = kcalloc(be_max_uc(adapter),
4629                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4630         if (!adapter->uc_list)
4631                 return -ENOMEM;
4632
4633         if (adapter->cfg_num_rx_irqs == 1)
4634                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4635
4636         en_flags &= cap_flags;
4637         /* will enable all the needed filter flags in be_open() */
4638         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4639                                   &adapter->if_handle, 0);
4640
4641         if (status)
4642                 return status;
4643
4644         return 0;
4645 }
4646
4647 int be_update_queues(struct be_adapter *adapter)
4648 {
4649         struct net_device *netdev = adapter->netdev;
4650         int status;
4651
4652         if (netif_running(netdev))
4653                 be_close(netdev);
4654
4655         be_cancel_worker(adapter);
4656
4657         /* If any vectors have been shared with RoCE we cannot re-program
4658          * the MSIx table.
4659          */
4660         if (!adapter->num_msix_roce_vec)
4661                 be_msix_disable(adapter);
4662
4663         be_clear_queues(adapter);
4664         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4665         if (status)
4666                 return status;
4667
4668         if (!msix_enabled(adapter)) {
4669                 status = be_msix_enable(adapter);
4670                 if (status)
4671                         return status;
4672         }
4673
4674         status = be_if_create(adapter);
4675         if (status)
4676                 return status;
4677
4678         status = be_setup_queues(adapter);
4679         if (status)
4680                 return status;
4681
4682         be_schedule_worker(adapter);
4683
4684         if (netif_running(netdev))
4685                 status = be_open(netdev);
4686
4687         return status;
4688 }
4689
4690 static inline int fw_major_num(const char *fw_ver)
4691 {
4692         int fw_major = 0, i;
4693
4694         i = sscanf(fw_ver, "%d.", &fw_major);
4695         if (i != 1)
4696                 return 0;
4697
4698         return fw_major;
4699 }
4700
4701 /* If it is error recovery, FLR the PF
4702  * Else if any VFs are already enabled don't FLR the PF
4703  */
4704 static bool be_reset_required(struct be_adapter *adapter)
4705 {
4706         if (be_error_recovering(adapter))
4707                 return true;
4708         else
4709                 return pci_num_vf(adapter->pdev) == 0;
4710 }
4711
4712 /* Wait for the FW to be ready and perform the required initialization */
4713 static int be_func_init(struct be_adapter *adapter)
4714 {
4715         int status;
4716
4717         status = be_fw_wait_ready(adapter);
4718         if (status)
4719                 return status;
4720
4721         /* FW is now ready; clear errors to allow cmds/doorbell */
4722         be_clear_error(adapter, BE_CLEAR_ALL);
4723
4724         if (be_reset_required(adapter)) {
4725                 status = be_cmd_reset_function(adapter);
4726                 if (status)
4727                         return status;
4728
4729                 /* Wait for interrupts to quiesce after an FLR */
4730                 msleep(100);
4731         }
4732
4733         /* Tell FW we're ready to fire cmds */
4734         status = be_cmd_fw_init(adapter);
4735         if (status)
4736                 return status;
4737
4738         /* Allow interrupts for other ULPs running on NIC function */
4739         be_intr_set(adapter, true);
4740
4741         return 0;
4742 }
4743
4744 static int be_setup(struct be_adapter *adapter)
4745 {
4746         struct device *dev = &adapter->pdev->dev;
4747         int status;
4748
4749         status = be_func_init(adapter);
4750         if (status)
4751                 return status;
4752
4753         be_setup_init(adapter);
4754
4755         if (!lancer_chip(adapter))
4756                 be_cmd_req_native_mode(adapter);
4757
4758         /* invoke this cmd first to get pf_num and vf_num which are needed
4759          * for issuing profile related cmds
4760          */
4761         if (!BEx_chip(adapter)) {
4762                 status = be_cmd_get_func_config(adapter, NULL);
4763                 if (status)
4764                         return status;
4765         }
4766
4767         status = be_get_config(adapter);
4768         if (status)
4769                 goto err;
4770
4771         if (!BE2_chip(adapter) && be_physfn(adapter))
4772                 be_alloc_sriov_res(adapter);
4773
4774         status = be_get_resources(adapter);
4775         if (status)
4776                 goto err;
4777
4778         status = be_msix_enable(adapter);
4779         if (status)
4780                 goto err;
4781
4782         /* will enable all the needed filter flags in be_open() */
4783         status = be_if_create(adapter);
4784         if (status)
4785                 goto err;
4786
4787         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4788         rtnl_lock();
4789         status = be_setup_queues(adapter);
4790         rtnl_unlock();
4791         if (status)
4792                 goto err;
4793
4794         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4795
4796         status = be_mac_setup(adapter);
4797         if (status)
4798                 goto err;
4799
4800         be_cmd_get_fw_ver(adapter);
4801         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4802
4803         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4804                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4805                         adapter->fw_ver);
4806                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4807         }
4808
4809         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4810                                          adapter->rx_fc);
4811         if (status)
4812                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4813                                         &adapter->rx_fc);
4814
4815         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4816                  adapter->tx_fc, adapter->rx_fc);
4817
4818         if (be_physfn(adapter))
4819                 be_cmd_set_logical_link_config(adapter,
4820                                                IFLA_VF_LINK_STATE_AUTO, 0);
4821
4822         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4823          * confusing a linux bridge or OVS that it might be connected to.
4824          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4825          * when SRIOV is not enabled.
4826          */
4827         if (BE3_chip(adapter))
4828                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4829                                       PORT_FWD_TYPE_PASSTHRU, 0);
4830
4831         if (adapter->num_vfs)
4832                 be_vf_setup(adapter);
4833
4834         status = be_cmd_get_phy_info(adapter);
4835         if (!status && be_pause_supported(adapter))
4836                 adapter->phy.fc_autoneg = 1;
4837
4838         if (be_physfn(adapter) && !lancer_chip(adapter))
4839                 be_cmd_set_features(adapter);
4840
4841         be_schedule_worker(adapter);
4842         adapter->flags |= BE_FLAGS_SETUP_DONE;
4843         return 0;
4844 err:
4845         be_clear(adapter);
4846         return status;
4847 }
4848
4849 #ifdef CONFIG_NET_POLL_CONTROLLER
4850 static void be_netpoll(struct net_device *netdev)
4851 {
4852         struct be_adapter *adapter = netdev_priv(netdev);
4853         struct be_eq_obj *eqo;
4854         int i;
4855
4856         for_all_evt_queues(adapter, eqo, i) {
4857                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4858                 napi_schedule(&eqo->napi);
4859         }
4860 }
4861 #endif
4862
4863 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4864 {
4865         const struct firmware *fw;
4866         int status;
4867
4868         if (!netif_running(adapter->netdev)) {
4869                 dev_err(&adapter->pdev->dev,
4870                         "Firmware load not allowed (interface is down)\n");
4871                 return -ENETDOWN;
4872         }
4873
4874         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4875         if (status)
4876                 goto fw_exit;
4877
4878         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4879
4880         if (lancer_chip(adapter))
4881                 status = lancer_fw_download(adapter, fw);
4882         else
4883                 status = be_fw_download(adapter, fw);
4884
4885         if (!status)
4886                 be_cmd_get_fw_ver(adapter);
4887
4888 fw_exit:
4889         release_firmware(fw);
4890         return status;
4891 }
4892
4893 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4894                                  u16 flags)
4895 {
4896         struct be_adapter *adapter = netdev_priv(dev);
4897         struct nlattr *attr, *br_spec;
4898         int rem;
4899         int status = 0;
4900         u16 mode = 0;
4901
4902         if (!sriov_enabled(adapter))
4903                 return -EOPNOTSUPP;
4904
4905         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4906         if (!br_spec)
4907                 return -EINVAL;
4908
4909         nla_for_each_nested(attr, br_spec, rem) {
4910                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4911                         continue;
4912
4913                 if (nla_len(attr) < sizeof(mode))
4914                         return -EINVAL;
4915
4916                 mode = nla_get_u16(attr);
4917                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4918                         return -EOPNOTSUPP;
4919
4920                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4921                         return -EINVAL;
4922
4923                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4924                                                adapter->if_handle,
4925                                                mode == BRIDGE_MODE_VEPA ?
4926                                                PORT_FWD_TYPE_VEPA :
4927                                                PORT_FWD_TYPE_VEB, 0);
4928                 if (status)
4929                         goto err;
4930
4931                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4932                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4933
4934                 return status;
4935         }
4936 err:
4937         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4938                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4939
4940         return status;
4941 }
4942
4943 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4944                                  struct net_device *dev, u32 filter_mask,
4945                                  int nlflags)
4946 {
4947         struct be_adapter *adapter = netdev_priv(dev);
4948         int status = 0;
4949         u8 hsw_mode;
4950
4951         /* BE and Lancer chips support VEB mode only */
4952         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4953                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4954                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4955                         return 0;
4956                 hsw_mode = PORT_FWD_TYPE_VEB;
4957         } else {
4958                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4959                                                adapter->if_handle, &hsw_mode,
4960                                                NULL);
4961                 if (status)
4962                         return 0;
4963
4964                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4965                         return 0;
4966         }
4967
4968         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4969                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4970                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4971                                        0, 0, nlflags, filter_mask, NULL);
4972 }
4973
4974 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4975                                          void (*func)(struct work_struct *))
4976 {
4977         struct be_cmd_work *work;
4978
4979         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4980         if (!work) {
4981                 dev_err(&adapter->pdev->dev,
4982                         "be_work memory allocation failed\n");
4983                 return NULL;
4984         }
4985
4986         INIT_WORK(&work->work, func);
4987         work->adapter = adapter;
4988         return work;
4989 }
4990
4991 /* VxLAN offload Notes:
4992  *
4993  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4994  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4995  * is expected to work across all types of IP tunnels once exported. Skyhawk
4996  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4997  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4998  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4999  * those other tunnels are unexported on the fly through ndo_features_check().
5000  *
5001  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5002  * adds more than one port, disable offloads and don't re-enable them again
5003  * until after all the tunnels are removed.
5004  */
5005 static void be_work_add_vxlan_port(struct work_struct *work)
5006 {
5007         struct be_cmd_work *cmd_work =
5008                                 container_of(work, struct be_cmd_work, work);
5009         struct be_adapter *adapter = cmd_work->adapter;
5010         struct net_device *netdev = adapter->netdev;
5011         struct device *dev = &adapter->pdev->dev;
5012         __be16 port = cmd_work->info.vxlan_port;
5013         int status;
5014
5015         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5016                 adapter->vxlan_port_aliases++;
5017                 goto done;
5018         }
5019
5020         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5021                 dev_info(dev,
5022                          "Only one UDP port supported for VxLAN offloads\n");
5023                 dev_info(dev, "Disabling VxLAN offloads\n");
5024                 adapter->vxlan_port_count++;
5025                 goto err;
5026         }
5027
5028         if (adapter->vxlan_port_count++ >= 1)
5029                 goto done;
5030
5031         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5032                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5033         if (status) {
5034                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5035                 goto err;
5036         }
5037
5038         status = be_cmd_set_vxlan_port(adapter, port);
5039         if (status) {
5040                 dev_warn(dev, "Failed to add VxLAN port\n");
5041                 goto err;
5042         }
5043         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5044         adapter->vxlan_port = port;
5045
5046         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5047                                    NETIF_F_TSO | NETIF_F_TSO6 |
5048                                    NETIF_F_GSO_UDP_TUNNEL;
5049         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5050         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5051
5052         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5053                  be16_to_cpu(port));
5054         goto done;
5055 err:
5056         be_disable_vxlan_offloads(adapter);
5057 done:
5058         kfree(cmd_work);
5059 }
5060
5061 static void be_work_del_vxlan_port(struct work_struct *work)
5062 {
5063         struct be_cmd_work *cmd_work =
5064                                 container_of(work, struct be_cmd_work, work);
5065         struct be_adapter *adapter = cmd_work->adapter;
5066         __be16 port = cmd_work->info.vxlan_port;
5067
5068         if (adapter->vxlan_port != port)
5069                 goto done;
5070
5071         if (adapter->vxlan_port_aliases) {
5072                 adapter->vxlan_port_aliases--;
5073                 goto out;
5074         }
5075
5076         be_disable_vxlan_offloads(adapter);
5077
5078         dev_info(&adapter->pdev->dev,
5079                  "Disabled VxLAN offloads for UDP port %d\n",
5080                  be16_to_cpu(port));
5081 done:
5082         adapter->vxlan_port_count--;
5083 out:
5084         kfree(cmd_work);
5085 }
5086
5087 static void be_cfg_vxlan_port(struct net_device *netdev,
5088                               struct udp_tunnel_info *ti,
5089                               void (*func)(struct work_struct *))
5090 {
5091         struct be_adapter *adapter = netdev_priv(netdev);
5092         struct be_cmd_work *cmd_work;
5093
5094         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5095                 return;
5096
5097         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5098                 return;
5099
5100         cmd_work = be_alloc_work(adapter, func);
5101         if (cmd_work) {
5102                 cmd_work->info.vxlan_port = ti->port;
5103                 queue_work(be_wq, &cmd_work->work);
5104         }
5105 }
5106
5107 static void be_del_vxlan_port(struct net_device *netdev,
5108                               struct udp_tunnel_info *ti)
5109 {
5110         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5111 }
5112
5113 static void be_add_vxlan_port(struct net_device *netdev,
5114                               struct udp_tunnel_info *ti)
5115 {
5116         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5117 }
5118
5119 static netdev_features_t be_features_check(struct sk_buff *skb,
5120                                            struct net_device *dev,
5121                                            netdev_features_t features)
5122 {
5123         struct be_adapter *adapter = netdev_priv(dev);
5124         u8 l4_hdr = 0;
5125
5126         /* The code below restricts offload features for some tunneled packets.
5127          * Offload features for normal (non tunnel) packets are unchanged.
5128          */
5129         if (!skb->encapsulation ||
5130             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5131                 return features;
5132
5133         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5134          * should disable tunnel offload features if it's not a VxLAN packet,
5135          * as tunnel offloads have been enabled only for VxLAN. This is done to
5136          * allow other tunneled traffic like GRE work fine while VxLAN
5137          * offloads are configured in Skyhawk-R.
5138          */
5139         switch (vlan_get_protocol(skb)) {
5140         case htons(ETH_P_IP):
5141                 l4_hdr = ip_hdr(skb)->protocol;
5142                 break;
5143         case htons(ETH_P_IPV6):
5144                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5145                 break;
5146         default:
5147                 return features;
5148         }
5149
5150         if (l4_hdr != IPPROTO_UDP ||
5151             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5152             skb->inner_protocol != htons(ETH_P_TEB) ||
5153             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5154                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5155             !adapter->vxlan_port ||
5156             udp_hdr(skb)->dest != adapter->vxlan_port)
5157                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5158
5159         return features;
5160 }
5161
5162 static int be_get_phys_port_id(struct net_device *dev,
5163                                struct netdev_phys_item_id *ppid)
5164 {
5165         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5166         struct be_adapter *adapter = netdev_priv(dev);
5167         u8 *id;
5168
5169         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5170                 return -ENOSPC;
5171
5172         ppid->id[0] = adapter->hba_port_num + 1;
5173         id = &ppid->id[1];
5174         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5175              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5176                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5177
5178         ppid->id_len = id_len;
5179
5180         return 0;
5181 }
5182
5183 static void be_set_rx_mode(struct net_device *dev)
5184 {
5185         struct be_adapter *adapter = netdev_priv(dev);
5186         struct be_cmd_work *work;
5187
5188         work = be_alloc_work(adapter, be_work_set_rx_mode);
5189         if (work)
5190                 queue_work(be_wq, &work->work);
5191 }
5192
5193 static const struct net_device_ops be_netdev_ops = {
5194         .ndo_open               = be_open,
5195         .ndo_stop               = be_close,
5196         .ndo_start_xmit         = be_xmit,
5197         .ndo_set_rx_mode        = be_set_rx_mode,
5198         .ndo_set_mac_address    = be_mac_addr_set,
5199         .ndo_get_stats64        = be_get_stats64,
5200         .ndo_validate_addr      = eth_validate_addr,
5201         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5202         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5203         .ndo_set_vf_mac         = be_set_vf_mac,
5204         .ndo_set_vf_vlan        = be_set_vf_vlan,
5205         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5206         .ndo_get_vf_config      = be_get_vf_config,
5207         .ndo_set_vf_link_state  = be_set_vf_link_state,
5208         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5209 #ifdef CONFIG_NET_POLL_CONTROLLER
5210         .ndo_poll_controller    = be_netpoll,
5211 #endif
5212         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5213         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5214 #ifdef CONFIG_NET_RX_BUSY_POLL
5215         .ndo_busy_poll          = be_busy_poll,
5216 #endif
5217         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5218         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5219         .ndo_features_check     = be_features_check,
5220         .ndo_get_phys_port_id   = be_get_phys_port_id,
5221 };
5222
5223 static void be_netdev_init(struct net_device *netdev)
5224 {
5225         struct be_adapter *adapter = netdev_priv(netdev);
5226
5227         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5228                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5229                 NETIF_F_HW_VLAN_CTAG_TX;
5230         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5231                 netdev->hw_features |= NETIF_F_RXHASH;
5232
5233         netdev->features |= netdev->hw_features |
5234                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5235
5236         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5237                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5238
5239         netdev->priv_flags |= IFF_UNICAST_FLT;
5240
5241         netdev->flags |= IFF_MULTICAST;
5242
5243         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5244
5245         netdev->netdev_ops = &be_netdev_ops;
5246
5247         netdev->ethtool_ops = &be_ethtool_ops;
5248
5249         /* MTU range: 256 - 9000 */
5250         netdev->min_mtu = BE_MIN_MTU;
5251         netdev->max_mtu = BE_MAX_MTU;
5252 }
5253
5254 static void be_cleanup(struct be_adapter *adapter)
5255 {
5256         struct net_device *netdev = adapter->netdev;
5257
5258         rtnl_lock();
5259         netif_device_detach(netdev);
5260         if (netif_running(netdev))
5261                 be_close(netdev);
5262         rtnl_unlock();
5263
5264         be_clear(adapter);
5265 }
5266
5267 static int be_resume(struct be_adapter *adapter)
5268 {
5269         struct net_device *netdev = adapter->netdev;
5270         int status;
5271
5272         status = be_setup(adapter);
5273         if (status)
5274                 return status;
5275
5276         rtnl_lock();
5277         if (netif_running(netdev))
5278                 status = be_open(netdev);
5279         rtnl_unlock();
5280
5281         if (status)
5282                 return status;
5283
5284         netif_device_attach(netdev);
5285
5286         return 0;
5287 }
5288
5289 static void be_soft_reset(struct be_adapter *adapter)
5290 {
5291         u32 val;
5292
5293         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5294         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5295         val |= SLIPORT_SOFTRESET_SR_MASK;
5296         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5297 }
5298
5299 static bool be_err_is_recoverable(struct be_adapter *adapter)
5300 {
5301         struct be_error_recovery *err_rec = &adapter->error_recovery;
5302         unsigned long initial_idle_time =
5303                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5304         unsigned long recovery_interval =
5305                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5306         u16 ue_err_code;
5307         u32 val;
5308
5309         val = be_POST_stage_get(adapter);
5310         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5311                 return false;
5312         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5313         if (ue_err_code == 0)
5314                 return false;
5315
5316         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5317                 ue_err_code);
5318
5319         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5320                 dev_err(&adapter->pdev->dev,
5321                         "Cannot recover within %lu sec from driver load\n",
5322                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5323                 return false;
5324         }
5325
5326         if (err_rec->last_recovery_time &&
5327             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5328                 dev_err(&adapter->pdev->dev,
5329                         "Cannot recover within %lu sec from last recovery\n",
5330                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5331                 return false;
5332         }
5333
5334         if (ue_err_code == err_rec->last_err_code) {
5335                 dev_err(&adapter->pdev->dev,
5336                         "Cannot recover from a consecutive TPE error\n");
5337                 return false;
5338         }
5339
5340         err_rec->last_recovery_time = jiffies;
5341         err_rec->last_err_code = ue_err_code;
5342         return true;
5343 }
5344
5345 static int be_tpe_recover(struct be_adapter *adapter)
5346 {
5347         struct be_error_recovery *err_rec = &adapter->error_recovery;
5348         int status = -EAGAIN;
5349         u32 val;
5350
5351         switch (err_rec->recovery_state) {
5352         case ERR_RECOVERY_ST_NONE:
5353                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5354                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5355                 break;
5356
5357         case ERR_RECOVERY_ST_DETECT:
5358                 val = be_POST_stage_get(adapter);
5359                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5360                     POST_STAGE_RECOVERABLE_ERR) {
5361                         dev_err(&adapter->pdev->dev,
5362                                 "Unrecoverable HW error detected: 0x%x\n", val);
5363                         status = -EINVAL;
5364                         err_rec->resched_delay = 0;
5365                         break;
5366                 }
5367
5368                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5369
5370                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5371                  * milliseconds before it checks for final error status in
5372                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5373                  * If it does, then PF0 initiates a Soft Reset.
5374                  */
5375                 if (adapter->pf_num == 0) {
5376                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5377                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5378                                         ERR_RECOVERY_UE_DETECT_DURATION;
5379                         break;
5380                 }
5381
5382                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5383                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5384                                         ERR_RECOVERY_UE_DETECT_DURATION;
5385                 break;
5386
5387         case ERR_RECOVERY_ST_RESET:
5388                 if (!be_err_is_recoverable(adapter)) {
5389                         dev_err(&adapter->pdev->dev,
5390                                 "Failed to meet recovery criteria\n");
5391                         status = -EIO;
5392                         err_rec->resched_delay = 0;
5393                         break;
5394                 }
5395                 be_soft_reset(adapter);
5396                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5397                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5398                                         err_rec->ue_to_reset_time;
5399                 break;
5400
5401         case ERR_RECOVERY_ST_PRE_POLL:
5402                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5403                 err_rec->resched_delay = 0;
5404                 status = 0;                     /* done */
5405                 break;
5406
5407         default:
5408                 status = -EINVAL;
5409                 err_rec->resched_delay = 0;
5410                 break;
5411         }
5412
5413         return status;
5414 }
5415
5416 static int be_err_recover(struct be_adapter *adapter)
5417 {
5418         int status;
5419
5420         if (!lancer_chip(adapter)) {
5421                 if (!adapter->error_recovery.recovery_supported ||
5422                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5423                         return -EIO;
5424                 status = be_tpe_recover(adapter);
5425                 if (status)
5426                         goto err;
5427         }
5428
5429         /* Wait for adapter to reach quiescent state before
5430          * destroying queues
5431          */
5432         status = be_fw_wait_ready(adapter);
5433         if (status)
5434                 goto err;
5435
5436         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5437
5438         be_cleanup(adapter);
5439
5440         status = be_resume(adapter);
5441         if (status)
5442                 goto err;
5443
5444         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5445
5446 err:
5447         return status;
5448 }
5449
5450 static void be_err_detection_task(struct work_struct *work)
5451 {
5452         struct be_error_recovery *err_rec =
5453                         container_of(work, struct be_error_recovery,
5454                                      err_detection_work.work);
5455         struct be_adapter *adapter =
5456                         container_of(err_rec, struct be_adapter,
5457                                      error_recovery);
5458         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5459         struct device *dev = &adapter->pdev->dev;
5460         int recovery_status;
5461
5462         be_detect_error(adapter);
5463         if (!be_check_error(adapter, BE_ERROR_HW))
5464                 goto reschedule_task;
5465
5466         recovery_status = be_err_recover(adapter);
5467         if (!recovery_status) {
5468                 err_rec->recovery_retries = 0;
5469                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5470                 dev_info(dev, "Adapter recovery successful\n");
5471                 goto reschedule_task;
5472         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5473                 /* BEx/SH recovery state machine */
5474                 if (adapter->pf_num == 0 &&
5475                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5476                         dev_err(&adapter->pdev->dev,
5477                                 "Adapter recovery in progress\n");
5478                 resched_delay = err_rec->resched_delay;
5479                 goto reschedule_task;
5480         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5481                 /* For VFs, check if PF have allocated resources
5482                  * every second.
5483                  */
5484                 dev_err(dev, "Re-trying adapter recovery\n");
5485                 goto reschedule_task;
5486         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5487                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5488                 /* In case of another error during recovery, it takes 30 sec
5489                  * for adapter to come out of error. Retry error recovery after
5490                  * this time interval.
5491                  */
5492                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5493                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5494                 goto reschedule_task;
5495         } else {
5496                 dev_err(dev, "Adapter recovery failed\n");
5497                 dev_err(dev, "Please reboot server to recover\n");
5498         }
5499
5500         return;
5501
5502 reschedule_task:
5503         be_schedule_err_detection(adapter, resched_delay);
5504 }
5505
5506 static void be_log_sfp_info(struct be_adapter *adapter)
5507 {
5508         int status;
5509
5510         status = be_cmd_query_sfp_info(adapter);
5511         if (!status) {
5512                 dev_err(&adapter->pdev->dev,
5513                         "Port %c: %s Vendor: %s part no: %s",
5514                         adapter->port_name,
5515                         be_misconfig_evt_port_state[adapter->phy_state],
5516                         adapter->phy.vendor_name,
5517                         adapter->phy.vendor_pn);
5518         }
5519         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5520 }
5521
5522 static void be_worker(struct work_struct *work)
5523 {
5524         struct be_adapter *adapter =
5525                 container_of(work, struct be_adapter, work.work);
5526         struct be_rx_obj *rxo;
5527         int i;
5528
5529         if (be_physfn(adapter) &&
5530             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5531                 be_cmd_get_die_temperature(adapter);
5532
5533         /* when interrupts are not yet enabled, just reap any pending
5534          * mcc completions
5535          */
5536         if (!netif_running(adapter->netdev)) {
5537                 local_bh_disable();
5538                 be_process_mcc(adapter);
5539                 local_bh_enable();
5540                 goto reschedule;
5541         }
5542
5543         if (!adapter->stats_cmd_sent) {
5544                 if (lancer_chip(adapter))
5545                         lancer_cmd_get_pport_stats(adapter,
5546                                                    &adapter->stats_cmd);
5547                 else
5548                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5549         }
5550
5551         for_all_rx_queues(adapter, rxo, i) {
5552                 /* Replenish RX-queues starved due to memory
5553                  * allocation failures.
5554                  */
5555                 if (rxo->rx_post_starved)
5556                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5557         }
5558
5559         /* EQ-delay update for Skyhawk is done while notifying EQ */
5560         if (!skyhawk_chip(adapter))
5561                 be_eqd_update(adapter, false);
5562
5563         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5564                 be_log_sfp_info(adapter);
5565
5566 reschedule:
5567         adapter->work_counter++;
5568         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5569 }
5570
5571 static void be_unmap_pci_bars(struct be_adapter *adapter)
5572 {
5573         if (adapter->csr)
5574                 pci_iounmap(adapter->pdev, adapter->csr);
5575         if (adapter->db)
5576                 pci_iounmap(adapter->pdev, adapter->db);
5577         if (adapter->pcicfg && adapter->pcicfg_mapped)
5578                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5579 }
5580
5581 static int db_bar(struct be_adapter *adapter)
5582 {
5583         if (lancer_chip(adapter) || be_virtfn(adapter))
5584                 return 0;
5585         else
5586                 return 4;
5587 }
5588
5589 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5590 {
5591         if (skyhawk_chip(adapter)) {
5592                 adapter->roce_db.size = 4096;
5593                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5594                                                               db_bar(adapter));
5595                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5596                                                                db_bar(adapter));
5597         }
5598         return 0;
5599 }
5600
5601 static int be_map_pci_bars(struct be_adapter *adapter)
5602 {
5603         struct pci_dev *pdev = adapter->pdev;
5604         u8 __iomem *addr;
5605         u32 sli_intf;
5606
5607         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5608         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5609                                 SLI_INTF_FAMILY_SHIFT;
5610         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5611
5612         if (BEx_chip(adapter) && be_physfn(adapter)) {
5613                 adapter->csr = pci_iomap(pdev, 2, 0);
5614                 if (!adapter->csr)
5615                         return -ENOMEM;
5616         }
5617
5618         addr = pci_iomap(pdev, db_bar(adapter), 0);
5619         if (!addr)
5620                 goto pci_map_err;
5621         adapter->db = addr;
5622
5623         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5624                 if (be_physfn(adapter)) {
5625                         /* PCICFG is the 2nd BAR in BE2 */
5626                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5627                         if (!addr)
5628                                 goto pci_map_err;
5629                         adapter->pcicfg = addr;
5630                         adapter->pcicfg_mapped = true;
5631                 } else {
5632                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5633                         adapter->pcicfg_mapped = false;
5634                 }
5635         }
5636
5637         be_roce_map_pci_bars(adapter);
5638         return 0;
5639
5640 pci_map_err:
5641         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5642         be_unmap_pci_bars(adapter);
5643         return -ENOMEM;
5644 }
5645
5646 static void be_drv_cleanup(struct be_adapter *adapter)
5647 {
5648         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5649         struct device *dev = &adapter->pdev->dev;
5650
5651         if (mem->va)
5652                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5653
5654         mem = &adapter->rx_filter;
5655         if (mem->va)
5656                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5657
5658         mem = &adapter->stats_cmd;
5659         if (mem->va)
5660                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5661 }
5662
5663 /* Allocate and initialize various fields in be_adapter struct */
5664 static int be_drv_init(struct be_adapter *adapter)
5665 {
5666         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5667         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5668         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5669         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5670         struct device *dev = &adapter->pdev->dev;
5671         int status = 0;
5672
5673         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5674         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5675                                                  &mbox_mem_alloc->dma,
5676                                                  GFP_KERNEL);
5677         if (!mbox_mem_alloc->va)
5678                 return -ENOMEM;
5679
5680         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5681         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5682         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5683
5684         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5685         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5686                                             &rx_filter->dma, GFP_KERNEL);
5687         if (!rx_filter->va) {
5688                 status = -ENOMEM;
5689                 goto free_mbox;
5690         }
5691
5692         if (lancer_chip(adapter))
5693                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5694         else if (BE2_chip(adapter))
5695                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5696         else if (BE3_chip(adapter))
5697                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5698         else
5699                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5700         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5701                                             &stats_cmd->dma, GFP_KERNEL);
5702         if (!stats_cmd->va) {
5703                 status = -ENOMEM;
5704                 goto free_rx_filter;
5705         }
5706
5707         mutex_init(&adapter->mbox_lock);
5708         mutex_init(&adapter->mcc_lock);
5709         mutex_init(&adapter->rx_filter_lock);
5710         spin_lock_init(&adapter->mcc_cq_lock);
5711         init_completion(&adapter->et_cmd_compl);
5712
5713         pci_save_state(adapter->pdev);
5714
5715         INIT_DELAYED_WORK(&adapter->work, be_worker);
5716
5717         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5718         adapter->error_recovery.resched_delay = 0;
5719         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5720                           be_err_detection_task);
5721
5722         adapter->rx_fc = true;
5723         adapter->tx_fc = true;
5724
5725         /* Must be a power of 2 or else MODULO will BUG_ON */
5726         adapter->be_get_temp_freq = 64;
5727
5728         return 0;
5729
5730 free_rx_filter:
5731         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5732 free_mbox:
5733         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5734                           mbox_mem_alloc->dma);
5735         return status;
5736 }
5737
5738 static void be_remove(struct pci_dev *pdev)
5739 {
5740         struct be_adapter *adapter = pci_get_drvdata(pdev);
5741
5742         if (!adapter)
5743                 return;
5744
5745         be_roce_dev_remove(adapter);
5746         be_intr_set(adapter, false);
5747
5748         be_cancel_err_detection(adapter);
5749
5750         unregister_netdev(adapter->netdev);
5751
5752         be_clear(adapter);
5753
5754         if (!pci_vfs_assigned(adapter->pdev))
5755                 be_cmd_reset_function(adapter);
5756
5757         /* tell fw we're done with firing cmds */
5758         be_cmd_fw_clean(adapter);
5759
5760         be_unmap_pci_bars(adapter);
5761         be_drv_cleanup(adapter);
5762
5763         pci_disable_pcie_error_reporting(pdev);
5764
5765         pci_release_regions(pdev);
5766         pci_disable_device(pdev);
5767
5768         free_netdev(adapter->netdev);
5769 }
5770
5771 static ssize_t be_hwmon_show_temp(struct device *dev,
5772                                   struct device_attribute *dev_attr,
5773                                   char *buf)
5774 {
5775         struct be_adapter *adapter = dev_get_drvdata(dev);
5776
5777         /* Unit: millidegree Celsius */
5778         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5779                 return -EIO;
5780         else
5781                 return sprintf(buf, "%u\n",
5782                                adapter->hwmon_info.be_on_die_temp * 1000);
5783 }
5784
5785 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5786                           be_hwmon_show_temp, NULL, 1);
5787
5788 static struct attribute *be_hwmon_attrs[] = {
5789         &sensor_dev_attr_temp1_input.dev_attr.attr,
5790         NULL
5791 };
5792
5793 ATTRIBUTE_GROUPS(be_hwmon);
5794
5795 static char *mc_name(struct be_adapter *adapter)
5796 {
5797         char *str = ""; /* default */
5798
5799         switch (adapter->mc_type) {
5800         case UMC:
5801                 str = "UMC";
5802                 break;
5803         case FLEX10:
5804                 str = "FLEX10";
5805                 break;
5806         case vNIC1:
5807                 str = "vNIC-1";
5808                 break;
5809         case nPAR:
5810                 str = "nPAR";
5811                 break;
5812         case UFP:
5813                 str = "UFP";
5814                 break;
5815         case vNIC2:
5816                 str = "vNIC-2";
5817                 break;
5818         default:
5819                 str = "";
5820         }
5821
5822         return str;
5823 }
5824
5825 static inline char *func_name(struct be_adapter *adapter)
5826 {
5827         return be_physfn(adapter) ? "PF" : "VF";
5828 }
5829
5830 static inline char *nic_name(struct pci_dev *pdev)
5831 {
5832         switch (pdev->device) {
5833         case OC_DEVICE_ID1:
5834                 return OC_NAME;
5835         case OC_DEVICE_ID2:
5836                 return OC_NAME_BE;
5837         case OC_DEVICE_ID3:
5838         case OC_DEVICE_ID4:
5839                 return OC_NAME_LANCER;
5840         case BE_DEVICE_ID2:
5841                 return BE3_NAME;
5842         case OC_DEVICE_ID5:
5843         case OC_DEVICE_ID6:
5844                 return OC_NAME_SH;
5845         default:
5846                 return BE_NAME;
5847         }
5848 }
5849
5850 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5851 {
5852         struct be_adapter *adapter;
5853         struct net_device *netdev;
5854         int status = 0;
5855
5856         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5857
5858         status = pci_enable_device(pdev);
5859         if (status)
5860                 goto do_none;
5861
5862         status = pci_request_regions(pdev, DRV_NAME);
5863         if (status)
5864                 goto disable_dev;
5865         pci_set_master(pdev);
5866
5867         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5868         if (!netdev) {
5869                 status = -ENOMEM;
5870                 goto rel_reg;
5871         }
5872         adapter = netdev_priv(netdev);
5873         adapter->pdev = pdev;
5874         pci_set_drvdata(pdev, adapter);
5875         adapter->netdev = netdev;
5876         SET_NETDEV_DEV(netdev, &pdev->dev);
5877
5878         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5879         if (!status) {
5880                 netdev->features |= NETIF_F_HIGHDMA;
5881         } else {
5882                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5883                 if (status) {
5884                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5885                         goto free_netdev;
5886                 }
5887         }
5888
5889         status = pci_enable_pcie_error_reporting(pdev);
5890         if (!status)
5891                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5892
5893         status = be_map_pci_bars(adapter);
5894         if (status)
5895                 goto free_netdev;
5896
5897         status = be_drv_init(adapter);
5898         if (status)
5899                 goto unmap_bars;
5900
5901         status = be_setup(adapter);
5902         if (status)
5903                 goto drv_cleanup;
5904
5905         be_netdev_init(netdev);
5906         status = register_netdev(netdev);
5907         if (status != 0)
5908                 goto unsetup;
5909
5910         be_roce_dev_add(adapter);
5911
5912         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5913         adapter->error_recovery.probe_time = jiffies;
5914
5915         /* On Die temperature not supported for VF. */
5916         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5917                 adapter->hwmon_info.hwmon_dev =
5918                         devm_hwmon_device_register_with_groups(&pdev->dev,
5919                                                                DRV_NAME,
5920                                                                adapter,
5921                                                                be_hwmon_groups);
5922                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5923         }
5924
5925         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5926                  func_name(adapter), mc_name(adapter), adapter->port_name);
5927
5928         return 0;
5929
5930 unsetup:
5931         be_clear(adapter);
5932 drv_cleanup:
5933         be_drv_cleanup(adapter);
5934 unmap_bars:
5935         be_unmap_pci_bars(adapter);
5936 free_netdev:
5937         free_netdev(netdev);
5938 rel_reg:
5939         pci_release_regions(pdev);
5940 disable_dev:
5941         pci_disable_device(pdev);
5942 do_none:
5943         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5944         return status;
5945 }
5946
5947 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5948 {
5949         struct be_adapter *adapter = pci_get_drvdata(pdev);
5950
5951         be_intr_set(adapter, false);
5952         be_cancel_err_detection(adapter);
5953
5954         be_cleanup(adapter);
5955
5956         pci_save_state(pdev);
5957         pci_disable_device(pdev);
5958         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5959         return 0;
5960 }
5961
5962 static int be_pci_resume(struct pci_dev *pdev)
5963 {
5964         struct be_adapter *adapter = pci_get_drvdata(pdev);
5965         int status = 0;
5966
5967         status = pci_enable_device(pdev);
5968         if (status)
5969                 return status;
5970
5971         pci_restore_state(pdev);
5972
5973         status = be_resume(adapter);
5974         if (status)
5975                 return status;
5976
5977         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5978
5979         return 0;
5980 }
5981
5982 /*
5983  * An FLR will stop BE from DMAing any data.
5984  */
5985 static void be_shutdown(struct pci_dev *pdev)
5986 {
5987         struct be_adapter *adapter = pci_get_drvdata(pdev);
5988
5989         if (!adapter)
5990                 return;
5991
5992         be_roce_dev_shutdown(adapter);
5993         cancel_delayed_work_sync(&adapter->work);
5994         be_cancel_err_detection(adapter);
5995
5996         netif_device_detach(adapter->netdev);
5997
5998         be_cmd_reset_function(adapter);
5999
6000         pci_disable_device(pdev);
6001 }
6002
6003 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6004                                             pci_channel_state_t state)
6005 {
6006         struct be_adapter *adapter = pci_get_drvdata(pdev);
6007
6008         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6009
6010         be_roce_dev_remove(adapter);
6011
6012         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6013                 be_set_error(adapter, BE_ERROR_EEH);
6014
6015                 be_cancel_err_detection(adapter);
6016
6017                 be_cleanup(adapter);
6018         }
6019
6020         if (state == pci_channel_io_perm_failure)
6021                 return PCI_ERS_RESULT_DISCONNECT;
6022
6023         pci_disable_device(pdev);
6024
6025         /* The error could cause the FW to trigger a flash debug dump.
6026          * Resetting the card while flash dump is in progress
6027          * can cause it not to recover; wait for it to finish.
6028          * Wait only for first function as it is needed only once per
6029          * adapter.
6030          */
6031         if (pdev->devfn == 0)
6032                 ssleep(30);
6033
6034         return PCI_ERS_RESULT_NEED_RESET;
6035 }
6036
6037 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6038 {
6039         struct be_adapter *adapter = pci_get_drvdata(pdev);
6040         int status;
6041
6042         dev_info(&adapter->pdev->dev, "EEH reset\n");
6043
6044         status = pci_enable_device(pdev);
6045         if (status)
6046                 return PCI_ERS_RESULT_DISCONNECT;
6047
6048         pci_set_master(pdev);
6049         pci_restore_state(pdev);
6050
6051         /* Check if card is ok and fw is ready */
6052         dev_info(&adapter->pdev->dev,
6053                  "Waiting for FW to be ready after EEH reset\n");
6054         status = be_fw_wait_ready(adapter);
6055         if (status)
6056                 return PCI_ERS_RESULT_DISCONNECT;
6057
6058         pci_cleanup_aer_uncorrect_error_status(pdev);
6059         be_clear_error(adapter, BE_CLEAR_ALL);
6060         return PCI_ERS_RESULT_RECOVERED;
6061 }
6062
6063 static void be_eeh_resume(struct pci_dev *pdev)
6064 {
6065         int status = 0;
6066         struct be_adapter *adapter = pci_get_drvdata(pdev);
6067
6068         dev_info(&adapter->pdev->dev, "EEH resume\n");
6069
6070         pci_save_state(pdev);
6071
6072         status = be_resume(adapter);
6073         if (status)
6074                 goto err;
6075
6076         be_roce_dev_add(adapter);
6077
6078         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6079         return;
6080 err:
6081         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6082 }
6083
6084 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6085 {
6086         struct be_adapter *adapter = pci_get_drvdata(pdev);
6087         struct be_resources vft_res = {0};
6088         int status;
6089
6090         if (!num_vfs)
6091                 be_vf_clear(adapter);
6092
6093         adapter->num_vfs = num_vfs;
6094
6095         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6096                 dev_warn(&pdev->dev,
6097                          "Cannot disable VFs while they are assigned\n");
6098                 return -EBUSY;
6099         }
6100
6101         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6102          * are equally distributed across the max-number of VFs. The user may
6103          * request only a subset of the max-vfs to be enabled.
6104          * Based on num_vfs, redistribute the resources across num_vfs so that
6105          * each VF will have access to more number of resources.
6106          * This facility is not available in BE3 FW.
6107          * Also, this is done by FW in Lancer chip.
6108          */
6109         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6110                 be_calculate_vf_res(adapter, adapter->num_vfs,
6111                                     &vft_res);
6112                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6113                                                  adapter->num_vfs, &vft_res);
6114                 if (status)
6115                         dev_err(&pdev->dev,
6116                                 "Failed to optimize SR-IOV resources\n");
6117         }
6118
6119         status = be_get_resources(adapter);
6120         if (status)
6121                 return be_cmd_status(status);
6122
6123         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6124         rtnl_lock();
6125         status = be_update_queues(adapter);
6126         rtnl_unlock();
6127         if (status)
6128                 return be_cmd_status(status);
6129
6130         if (adapter->num_vfs)
6131                 status = be_vf_setup(adapter);
6132
6133         if (!status)
6134                 return adapter->num_vfs;
6135
6136         return 0;
6137 }
6138
6139 static const struct pci_error_handlers be_eeh_handlers = {
6140         .error_detected = be_eeh_err_detected,
6141         .slot_reset = be_eeh_reset,
6142         .resume = be_eeh_resume,
6143 };
6144
6145 static struct pci_driver be_driver = {
6146         .name = DRV_NAME,
6147         .id_table = be_dev_ids,
6148         .probe = be_probe,
6149         .remove = be_remove,
6150         .suspend = be_suspend,
6151         .resume = be_pci_resume,
6152         .shutdown = be_shutdown,
6153         .sriov_configure = be_pci_sriov_configure,
6154         .err_handler = &be_eeh_handlers
6155 };
6156
6157 static int __init be_init_module(void)
6158 {
6159         int status;
6160
6161         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6162             rx_frag_size != 2048) {
6163                 printk(KERN_WARNING DRV_NAME
6164                         " : Module param rx_frag_size must be 2048/4096/8192."
6165                         " Using 2048\n");
6166                 rx_frag_size = 2048;
6167         }
6168
6169         if (num_vfs > 0) {
6170                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6171                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6172         }
6173
6174         be_wq = create_singlethread_workqueue("be_wq");
6175         if (!be_wq) {
6176                 pr_warn(DRV_NAME "workqueue creation failed\n");
6177                 return -1;
6178         }
6179
6180         be_err_recovery_workq =
6181                 create_singlethread_workqueue("be_err_recover");
6182         if (!be_err_recovery_workq)
6183                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6184
6185         status = pci_register_driver(&be_driver);
6186         if (status) {
6187                 destroy_workqueue(be_wq);
6188                 be_destroy_err_recovery_workq();
6189         }
6190         return status;
6191 }
6192 module_init(be_init_module);
6193
6194 static void __exit be_exit_module(void)
6195 {
6196         pci_unregister_driver(&be_driver);
6197
6198         be_destroy_err_recovery_workq();
6199
6200         if (be_wq)
6201                 destroy_workqueue(be_wq);
6202 }
6203 module_exit(be_exit_module);