]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
net: make ndo_get_stats64 a void function
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279                                      mac)) {
280                         /* mac already added, skip addition */
281                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282                         return 0;
283                 }
284         }
285
286         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287                                &adapter->pmac_id[0], 0);
288 }
289
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292         int i;
293
294         /* Skip deletion if the programmed mac is
295          * being used in uc-list
296          */
297         for (i = 0; i < adapter->uc_macs; i++) {
298                 if (adapter->pmac_id[i + 1] == pmac_id)
299                         return;
300         }
301         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306         struct be_adapter *adapter = netdev_priv(netdev);
307         struct device *dev = &adapter->pdev->dev;
308         struct sockaddr *addr = p;
309         int status;
310         u8 mac[ETH_ALEN];
311         u32 old_pmac_id = adapter->pmac_id[0];
312
313         if (!is_valid_ether_addr(addr->sa_data))
314                 return -EADDRNOTAVAIL;
315
316         /* Proceed further only if, User provided MAC is different
317          * from active MAC
318          */
319         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320                 return 0;
321
322         /* if device is not running, copy MAC to netdev->dev_addr */
323         if (!netif_running(netdev))
324                 goto done;
325
326         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327          * privilege or if PF did not provision the new MAC address.
328          * On BE3, this cmd will always fail if the VF doesn't have the
329          * FILTMGMT privilege. This failure is OK, only if the PF programmed
330          * the MAC for the VF.
331          */
332         mutex_lock(&adapter->rx_filter_lock);
333         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334         if (!status) {
335
336                 /* Delete the old programmed MAC. This call may fail if the
337                  * old MAC was already deleted by the PF driver.
338                  */
339                 if (adapter->pmac_id[0] != old_pmac_id)
340                         be_dev_mac_del(adapter, old_pmac_id);
341         }
342
343         mutex_unlock(&adapter->rx_filter_lock);
344         /* Decide if the new MAC is successfully activated only after
345          * querying the FW
346          */
347         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348                                        adapter->if_handle, true, 0);
349         if (status)
350                 goto err;
351
352         /* The MAC change did not happen, either due to lack of privilege
353          * or PF didn't pre-provision.
354          */
355         if (!ether_addr_equal(addr->sa_data, mac)) {
356                 status = -EPERM;
357                 goto err;
358         }
359 done:
360         ether_addr_copy(adapter->dev_mac, addr->sa_data);
361         ether_addr_copy(netdev->dev_addr, addr->sa_data);
362         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363         return 0;
364 err:
365         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366         return status;
367 }
368
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372         if (BE2_chip(adapter)) {
373                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374
375                 return &cmd->hw_stats;
376         } else if (BE3_chip(adapter)) {
377                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378
379                 return &cmd->hw_stats;
380         } else {
381                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         }
385 }
386
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390         if (BE2_chip(adapter)) {
391                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392
393                 return &hw_stats->erx;
394         } else if (BE3_chip(adapter)) {
395                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396
397                 return &hw_stats->erx;
398         } else {
399                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         }
403 }
404
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410         struct be_port_rxf_stats_v0 *port_stats =
411                                         &rxf_stats->port[adapter->port_num];
412         struct be_drv_stats *drvs = &adapter->drv_stats;
413
414         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415         drvs->rx_pause_frames = port_stats->rx_pause_frames;
416         drvs->rx_crc_errors = port_stats->rx_crc_errors;
417         drvs->rx_control_frames = port_stats->rx_control_frames;
418         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430         drvs->rx_dropped_header_too_small =
431                 port_stats->rx_dropped_header_too_small;
432         drvs->rx_address_filtered =
433                                         port_stats->rx_address_filtered +
434                                         port_stats->rx_vlan_filtered;
435         drvs->rx_alignment_symbol_errors =
436                 port_stats->rx_alignment_symbol_errors;
437
438         drvs->tx_pauseframes = port_stats->tx_pauseframes;
439         drvs->tx_controlframes = port_stats->tx_controlframes;
440
441         if (adapter->port_num)
442                 drvs->jabber_events = rxf_stats->port1_jabber_events;
443         else
444                 drvs->jabber_events = rxf_stats->port0_jabber_events;
445         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447         drvs->forwarded_packets = rxf_stats->forwarded_packets;
448         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459         struct be_port_rxf_stats_v1 *port_stats =
460                                         &rxf_stats->port[adapter->port_num];
461         struct be_drv_stats *drvs = &adapter->drv_stats;
462
463         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466         drvs->rx_pause_frames = port_stats->rx_pause_frames;
467         drvs->rx_crc_errors = port_stats->rx_crc_errors;
468         drvs->rx_control_frames = port_stats->rx_control_frames;
469         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479         drvs->rx_dropped_header_too_small =
480                 port_stats->rx_dropped_header_too_small;
481         drvs->rx_input_fifo_overflow_drop =
482                 port_stats->rx_input_fifo_overflow_drop;
483         drvs->rx_address_filtered = port_stats->rx_address_filtered;
484         drvs->rx_alignment_symbol_errors =
485                 port_stats->rx_alignment_symbol_errors;
486         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487         drvs->tx_pauseframes = port_stats->tx_pauseframes;
488         drvs->tx_controlframes = port_stats->tx_controlframes;
489         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490         drvs->jabber_events = port_stats->jabber_events;
491         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493         drvs->forwarded_packets = rxf_stats->forwarded_packets;
494         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505         struct be_port_rxf_stats_v2 *port_stats =
506                                         &rxf_stats->port[adapter->port_num];
507         struct be_drv_stats *drvs = &adapter->drv_stats;
508
509         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512         drvs->rx_pause_frames = port_stats->rx_pause_frames;
513         drvs->rx_crc_errors = port_stats->rx_crc_errors;
514         drvs->rx_control_frames = port_stats->rx_control_frames;
515         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525         drvs->rx_dropped_header_too_small =
526                 port_stats->rx_dropped_header_too_small;
527         drvs->rx_input_fifo_overflow_drop =
528                 port_stats->rx_input_fifo_overflow_drop;
529         drvs->rx_address_filtered = port_stats->rx_address_filtered;
530         drvs->rx_alignment_symbol_errors =
531                 port_stats->rx_alignment_symbol_errors;
532         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533         drvs->tx_pauseframes = port_stats->tx_pauseframes;
534         drvs->tx_controlframes = port_stats->tx_controlframes;
535         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536         drvs->jabber_events = port_stats->jabber_events;
537         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539         drvs->forwarded_packets = rxf_stats->forwarded_packets;
540         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544         if (be_roce_supported(adapter)) {
545                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547                 drvs->rx_roce_frames = port_stats->roce_frames_received;
548                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
549                 drvs->roce_drops_payload_len =
550                         port_stats->roce_drops_payload_len;
551         }
552 }
553
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556         struct be_drv_stats *drvs = &adapter->drv_stats;
557         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558
559         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569         drvs->rx_dropped_tcp_length =
570                                 pport_stats->rx_dropped_invalid_tcp_length;
571         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574         drvs->rx_dropped_header_too_small =
575                                 pport_stats->rx_dropped_header_too_small;
576         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577         drvs->rx_address_filtered =
578                                         pport_stats->rx_address_filtered +
579                                         pport_stats->rx_vlan_filtered;
580         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584         drvs->jabber_events = pport_stats->rx_jabbers;
585         drvs->forwarded_packets = pport_stats->num_forwards_lo;
586         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587         drvs->rx_drops_too_many_frags =
588                                 pport_stats->rx_drops_too_many_frags_lo;
589 }
590
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)                   (x & 0xFFFF)
594 #define hi(x)                   (x & 0xFFFF0000)
595         bool wrapped = val < lo(*acc);
596         u32 newacc = hi(*acc) + val;
597
598         if (wrapped)
599                 newacc += 65536;
600         ACCESS_ONCE(*acc) = newacc;
601 }
602
603 static void populate_erx_stats(struct be_adapter *adapter,
604                                struct be_rx_obj *rxo, u32 erx_stat)
605 {
606         if (!BEx_chip(adapter))
607                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608         else
609                 /* below erx HW counter can actually wrap around after
610                  * 65535. Driver accumulates a 32-bit value
611                  */
612                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613                                      (u16)erx_stat);
614 }
615
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619         struct be_rx_obj *rxo;
620         int i;
621         u32 erx_stat;
622
623         if (lancer_chip(adapter)) {
624                 populate_lancer_stats(adapter);
625         } else {
626                 if (BE2_chip(adapter))
627                         populate_be_v0_stats(adapter);
628                 else if (BE3_chip(adapter))
629                         /* for BE3 */
630                         populate_be_v1_stats(adapter);
631                 else
632                         populate_be_v2_stats(adapter);
633
634                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635                 for_all_rx_queues(adapter, rxo, i) {
636                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637                         populate_erx_stats(adapter, rxo, erx_stat);
638                 }
639         }
640 }
641
642 static void be_get_stats64(struct net_device *netdev,
643                            struct rtnl_link_stats64 *stats)
644 {
645         struct be_adapter *adapter = netdev_priv(netdev);
646         struct be_drv_stats *drvs = &adapter->drv_stats;
647         struct be_rx_obj *rxo;
648         struct be_tx_obj *txo;
649         u64 pkts, bytes;
650         unsigned int start;
651         int i;
652
653         for_all_rx_queues(adapter, rxo, i) {
654                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
655
656                 do {
657                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658                         pkts = rx_stats(rxo)->rx_pkts;
659                         bytes = rx_stats(rxo)->rx_bytes;
660                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661                 stats->rx_packets += pkts;
662                 stats->rx_bytes += bytes;
663                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665                                         rx_stats(rxo)->rx_drops_no_frags;
666         }
667
668         for_all_tx_queues(adapter, txo, i) {
669                 const struct be_tx_stats *tx_stats = tx_stats(txo);
670
671                 do {
672                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673                         pkts = tx_stats(txo)->tx_pkts;
674                         bytes = tx_stats(txo)->tx_bytes;
675                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676                 stats->tx_packets += pkts;
677                 stats->tx_bytes += bytes;
678         }
679
680         /* bad pkts received */
681         stats->rx_errors = drvs->rx_crc_errors +
682                 drvs->rx_alignment_symbol_errors +
683                 drvs->rx_in_range_errors +
684                 drvs->rx_out_range_errors +
685                 drvs->rx_frame_too_long +
686                 drvs->rx_dropped_too_small +
687                 drvs->rx_dropped_too_short +
688                 drvs->rx_dropped_header_too_small +
689                 drvs->rx_dropped_tcp_length +
690                 drvs->rx_dropped_runt;
691
692         /* detailed rx errors */
693         stats->rx_length_errors = drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long;
696
697         stats->rx_crc_errors = drvs->rx_crc_errors;
698
699         /* frame alignment errors */
700         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701
702         /* receiver fifo overrun */
703         /* drops_no_pbuf is no per i/f, it's per BE card */
704         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705                                 drvs->rx_input_fifo_overflow_drop +
706                                 drvs->rx_drops_no_pbuf;
707 }
708
709 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
710 {
711         struct net_device *netdev = adapter->netdev;
712
713         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
714                 netif_carrier_off(netdev);
715                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
716         }
717
718         if (link_status)
719                 netif_carrier_on(netdev);
720         else
721                 netif_carrier_off(netdev);
722
723         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
724 }
725
726 static int be_gso_hdr_len(struct sk_buff *skb)
727 {
728         if (skb->encapsulation)
729                 return skb_inner_transport_offset(skb) +
730                        inner_tcp_hdrlen(skb);
731         return skb_transport_offset(skb) + tcp_hdrlen(skb);
732 }
733
734 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
735 {
736         struct be_tx_stats *stats = tx_stats(txo);
737         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
738         /* Account for headers which get duplicated in TSO pkt */
739         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
740
741         u64_stats_update_begin(&stats->sync);
742         stats->tx_reqs++;
743         stats->tx_bytes += skb->len + dup_hdr_len;
744         stats->tx_pkts += tx_pkts;
745         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
746                 stats->tx_vxlan_offload_pkts += tx_pkts;
747         u64_stats_update_end(&stats->sync);
748 }
749
750 /* Returns number of WRBs needed for the skb */
751 static u32 skb_wrb_cnt(struct sk_buff *skb)
752 {
753         /* +1 for the header wrb */
754         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
755 }
756
757 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
758 {
759         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
760         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
761         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
762         wrb->rsvd0 = 0;
763 }
764
765 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
766  * to avoid the swap and shift/mask operations in wrb_fill().
767  */
768 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
769 {
770         wrb->frag_pa_hi = 0;
771         wrb->frag_pa_lo = 0;
772         wrb->frag_len = 0;
773         wrb->rsvd0 = 0;
774 }
775
776 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
777                                      struct sk_buff *skb)
778 {
779         u8 vlan_prio;
780         u16 vlan_tag;
781
782         vlan_tag = skb_vlan_tag_get(skb);
783         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
784         /* If vlan priority provided by OS is NOT in available bmap */
785         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
786                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
787                                 adapter->recommended_prio_bits;
788
789         return vlan_tag;
790 }
791
792 /* Used only for IP tunnel packets */
793 static u16 skb_inner_ip_proto(struct sk_buff *skb)
794 {
795         return (inner_ip_hdr(skb)->version == 4) ?
796                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
797 }
798
799 static u16 skb_ip_proto(struct sk_buff *skb)
800 {
801         return (ip_hdr(skb)->version == 4) ?
802                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
803 }
804
805 static inline bool be_is_txq_full(struct be_tx_obj *txo)
806 {
807         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
808 }
809
810 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
811 {
812         return atomic_read(&txo->q.used) < txo->q.len / 2;
813 }
814
815 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
816 {
817         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
818 }
819
820 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
821                                        struct sk_buff *skb,
822                                        struct be_wrb_params *wrb_params)
823 {
824         u16 proto;
825
826         if (skb_is_gso(skb)) {
827                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
828                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
829                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
830                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
831         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
832                 if (skb->encapsulation) {
833                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
834                         proto = skb_inner_ip_proto(skb);
835                 } else {
836                         proto = skb_ip_proto(skb);
837                 }
838                 if (proto == IPPROTO_TCP)
839                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
840                 else if (proto == IPPROTO_UDP)
841                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
842         }
843
844         if (skb_vlan_tag_present(skb)) {
845                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
846                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
847         }
848
849         BE_WRB_F_SET(wrb_params->features, CRC, 1);
850 }
851
852 static void wrb_fill_hdr(struct be_adapter *adapter,
853                          struct be_eth_hdr_wrb *hdr,
854                          struct be_wrb_params *wrb_params,
855                          struct sk_buff *skb)
856 {
857         memset(hdr, 0, sizeof(*hdr));
858
859         SET_TX_WRB_HDR_BITS(crc, hdr,
860                             BE_WRB_F_GET(wrb_params->features, CRC));
861         SET_TX_WRB_HDR_BITS(ipcs, hdr,
862                             BE_WRB_F_GET(wrb_params->features, IPCS));
863         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
864                             BE_WRB_F_GET(wrb_params->features, TCPCS));
865         SET_TX_WRB_HDR_BITS(udpcs, hdr,
866                             BE_WRB_F_GET(wrb_params->features, UDPCS));
867
868         SET_TX_WRB_HDR_BITS(lso, hdr,
869                             BE_WRB_F_GET(wrb_params->features, LSO));
870         SET_TX_WRB_HDR_BITS(lso6, hdr,
871                             BE_WRB_F_GET(wrb_params->features, LSO6));
872         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
873
874         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
875          * hack is not needed, the evt bit is set while ringing DB.
876          */
877         SET_TX_WRB_HDR_BITS(event, hdr,
878                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
879         SET_TX_WRB_HDR_BITS(vlan, hdr,
880                             BE_WRB_F_GET(wrb_params->features, VLAN));
881         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
882
883         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
884         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
885         SET_TX_WRB_HDR_BITS(mgmt, hdr,
886                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
887 }
888
889 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
890                           bool unmap_single)
891 {
892         dma_addr_t dma;
893         u32 frag_len = le32_to_cpu(wrb->frag_len);
894
895
896         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
897                 (u64)le32_to_cpu(wrb->frag_pa_lo);
898         if (frag_len) {
899                 if (unmap_single)
900                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
901                 else
902                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
903         }
904 }
905
906 /* Grab a WRB header for xmit */
907 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
908 {
909         u32 head = txo->q.head;
910
911         queue_head_inc(&txo->q);
912         return head;
913 }
914
915 /* Set up the WRB header for xmit */
916 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
917                                 struct be_tx_obj *txo,
918                                 struct be_wrb_params *wrb_params,
919                                 struct sk_buff *skb, u16 head)
920 {
921         u32 num_frags = skb_wrb_cnt(skb);
922         struct be_queue_info *txq = &txo->q;
923         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
924
925         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
926         be_dws_cpu_to_le(hdr, sizeof(*hdr));
927
928         BUG_ON(txo->sent_skb_list[head]);
929         txo->sent_skb_list[head] = skb;
930         txo->last_req_hdr = head;
931         atomic_add(num_frags, &txq->used);
932         txo->last_req_wrb_cnt = num_frags;
933         txo->pend_wrb_cnt += num_frags;
934 }
935
936 /* Setup a WRB fragment (buffer descriptor) for xmit */
937 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
938                                  int len)
939 {
940         struct be_eth_wrb *wrb;
941         struct be_queue_info *txq = &txo->q;
942
943         wrb = queue_head_node(txq);
944         wrb_fill(wrb, busaddr, len);
945         queue_head_inc(txq);
946 }
947
948 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
949  * was invoked. The producer index is restored to the previous packet and the
950  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
951  */
952 static void be_xmit_restore(struct be_adapter *adapter,
953                             struct be_tx_obj *txo, u32 head, bool map_single,
954                             u32 copied)
955 {
956         struct device *dev;
957         struct be_eth_wrb *wrb;
958         struct be_queue_info *txq = &txo->q;
959
960         dev = &adapter->pdev->dev;
961         txq->head = head;
962
963         /* skip the first wrb (hdr); it's not mapped */
964         queue_head_inc(txq);
965         while (copied) {
966                 wrb = queue_head_node(txq);
967                 unmap_tx_frag(dev, wrb, map_single);
968                 map_single = false;
969                 copied -= le32_to_cpu(wrb->frag_len);
970                 queue_head_inc(txq);
971         }
972
973         txq->head = head;
974 }
975
976 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
977  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
978  * of WRBs used up by the packet.
979  */
980 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
981                            struct sk_buff *skb,
982                            struct be_wrb_params *wrb_params)
983 {
984         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
985         struct device *dev = &adapter->pdev->dev;
986         struct be_queue_info *txq = &txo->q;
987         bool map_single = false;
988         u32 head = txq->head;
989         dma_addr_t busaddr;
990         int len;
991
992         head = be_tx_get_wrb_hdr(txo);
993
994         if (skb->len > skb->data_len) {
995                 len = skb_headlen(skb);
996
997                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
998                 if (dma_mapping_error(dev, busaddr))
999                         goto dma_err;
1000                 map_single = true;
1001                 be_tx_setup_wrb_frag(txo, busaddr, len);
1002                 copied += len;
1003         }
1004
1005         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1006                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1007                 len = skb_frag_size(frag);
1008
1009                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1010                 if (dma_mapping_error(dev, busaddr))
1011                         goto dma_err;
1012                 be_tx_setup_wrb_frag(txo, busaddr, len);
1013                 copied += len;
1014         }
1015
1016         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1017
1018         be_tx_stats_update(txo, skb);
1019         return wrb_cnt;
1020
1021 dma_err:
1022         adapter->drv_stats.dma_map_errors++;
1023         be_xmit_restore(adapter, txo, head, map_single, copied);
1024         return 0;
1025 }
1026
1027 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1028 {
1029         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1030 }
1031
1032 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1033                                              struct sk_buff *skb,
1034                                              struct be_wrb_params
1035                                              *wrb_params)
1036 {
1037         u16 vlan_tag = 0;
1038
1039         skb = skb_share_check(skb, GFP_ATOMIC);
1040         if (unlikely(!skb))
1041                 return skb;
1042
1043         if (skb_vlan_tag_present(skb))
1044                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1045
1046         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1047                 if (!vlan_tag)
1048                         vlan_tag = adapter->pvid;
1049                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1050                  * skip VLAN insertion
1051                  */
1052                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1053         }
1054
1055         if (vlan_tag) {
1056                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1057                                                 vlan_tag);
1058                 if (unlikely(!skb))
1059                         return skb;
1060                 skb->vlan_tci = 0;
1061         }
1062
1063         /* Insert the outer VLAN, if any */
1064         if (adapter->qnq_vid) {
1065                 vlan_tag = adapter->qnq_vid;
1066                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1067                                                 vlan_tag);
1068                 if (unlikely(!skb))
1069                         return skb;
1070                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1071         }
1072
1073         return skb;
1074 }
1075
1076 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1077 {
1078         struct ethhdr *eh = (struct ethhdr *)skb->data;
1079         u16 offset = ETH_HLEN;
1080
1081         if (eh->h_proto == htons(ETH_P_IPV6)) {
1082                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1083
1084                 offset += sizeof(struct ipv6hdr);
1085                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1086                     ip6h->nexthdr != NEXTHDR_UDP) {
1087                         struct ipv6_opt_hdr *ehdr =
1088                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1089
1090                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1091                         if (ehdr->hdrlen == 0xff)
1092                                 return true;
1093                 }
1094         }
1095         return false;
1096 }
1097
1098 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1099 {
1100         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1101 }
1102
1103 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1104 {
1105         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1106 }
1107
1108 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1109                                                   struct sk_buff *skb,
1110                                                   struct be_wrb_params
1111                                                   *wrb_params)
1112 {
1113         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1114         unsigned int eth_hdr_len;
1115         struct iphdr *ip;
1116
1117         /* For padded packets, BE HW modifies tot_len field in IP header
1118          * incorrecly when VLAN tag is inserted by HW.
1119          * For padded packets, Lancer computes incorrect checksum.
1120          */
1121         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1122                                                 VLAN_ETH_HLEN : ETH_HLEN;
1123         if (skb->len <= 60 &&
1124             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1125             is_ipv4_pkt(skb)) {
1126                 ip = (struct iphdr *)ip_hdr(skb);
1127                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1128         }
1129
1130         /* If vlan tag is already inlined in the packet, skip HW VLAN
1131          * tagging in pvid-tagging mode
1132          */
1133         if (be_pvid_tagging_enabled(adapter) &&
1134             veh->h_vlan_proto == htons(ETH_P_8021Q))
1135                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1136
1137         /* HW has a bug wherein it will calculate CSUM for VLAN
1138          * pkts even though it is disabled.
1139          * Manually insert VLAN in pkt.
1140          */
1141         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1142             skb_vlan_tag_present(skb)) {
1143                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1144                 if (unlikely(!skb))
1145                         goto err;
1146         }
1147
1148         /* HW may lockup when VLAN HW tagging is requested on
1149          * certain ipv6 packets. Drop such pkts if the HW workaround to
1150          * skip HW tagging is not enabled by FW.
1151          */
1152         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1153                      (adapter->pvid || adapter->qnq_vid) &&
1154                      !qnq_async_evt_rcvd(adapter)))
1155                 goto tx_drop;
1156
1157         /* Manual VLAN tag insertion to prevent:
1158          * ASIC lockup when the ASIC inserts VLAN tag into
1159          * certain ipv6 packets. Insert VLAN tags in driver,
1160          * and set event, completion, vlan bits accordingly
1161          * in the Tx WRB.
1162          */
1163         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1164             be_vlan_tag_tx_chk(adapter, skb)) {
1165                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1166                 if (unlikely(!skb))
1167                         goto err;
1168         }
1169
1170         return skb;
1171 tx_drop:
1172         dev_kfree_skb_any(skb);
1173 err:
1174         return NULL;
1175 }
1176
1177 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1178                                            struct sk_buff *skb,
1179                                            struct be_wrb_params *wrb_params)
1180 {
1181         int err;
1182
1183         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1184          * packets that are 32b or less may cause a transmit stall
1185          * on that port. The workaround is to pad such packets
1186          * (len <= 32 bytes) to a minimum length of 36b.
1187          */
1188         if (skb->len <= 32) {
1189                 if (skb_put_padto(skb, 36))
1190                         return NULL;
1191         }
1192
1193         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1194                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1195                 if (!skb)
1196                         return NULL;
1197         }
1198
1199         /* The stack can send us skbs with length greater than
1200          * what the HW can handle. Trim the extra bytes.
1201          */
1202         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1203         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1204         WARN_ON(err);
1205
1206         return skb;
1207 }
1208
1209 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1210 {
1211         struct be_queue_info *txq = &txo->q;
1212         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1213
1214         /* Mark the last request eventable if it hasn't been marked already */
1215         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1216                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1217
1218         /* compose a dummy wrb if there are odd set of wrbs to notify */
1219         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1220                 wrb_fill_dummy(queue_head_node(txq));
1221                 queue_head_inc(txq);
1222                 atomic_inc(&txq->used);
1223                 txo->pend_wrb_cnt++;
1224                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1225                                            TX_HDR_WRB_NUM_SHIFT);
1226                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1227                                           TX_HDR_WRB_NUM_SHIFT);
1228         }
1229         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1230         txo->pend_wrb_cnt = 0;
1231 }
1232
1233 /* OS2BMC related */
1234
1235 #define DHCP_CLIENT_PORT        68
1236 #define DHCP_SERVER_PORT        67
1237 #define NET_BIOS_PORT1          137
1238 #define NET_BIOS_PORT2          138
1239 #define DHCPV6_RAS_PORT         547
1240
1241 #define is_mc_allowed_on_bmc(adapter, eh)       \
1242         (!is_multicast_filt_enabled(adapter) && \
1243          is_multicast_ether_addr(eh->h_dest) && \
1244          !is_broadcast_ether_addr(eh->h_dest))
1245
1246 #define is_bc_allowed_on_bmc(adapter, eh)       \
1247         (!is_broadcast_filt_enabled(adapter) && \
1248          is_broadcast_ether_addr(eh->h_dest))
1249
1250 #define is_arp_allowed_on_bmc(adapter, skb)     \
1251         (is_arp(skb) && is_arp_filt_enabled(adapter))
1252
1253 #define is_broadcast_packet(eh, adapter)        \
1254                 (is_multicast_ether_addr(eh->h_dest) && \
1255                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1256
1257 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1258
1259 #define is_arp_filt_enabled(adapter)    \
1260                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1261
1262 #define is_dhcp_client_filt_enabled(adapter)    \
1263                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1264
1265 #define is_dhcp_srvr_filt_enabled(adapter)      \
1266                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1267
1268 #define is_nbios_filt_enabled(adapter)  \
1269                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1270
1271 #define is_ipv6_na_filt_enabled(adapter)        \
1272                 (adapter->bmc_filt_mask &       \
1273                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1274
1275 #define is_ipv6_ra_filt_enabled(adapter)        \
1276                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1277
1278 #define is_ipv6_ras_filt_enabled(adapter)       \
1279                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1280
1281 #define is_broadcast_filt_enabled(adapter)      \
1282                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1283
1284 #define is_multicast_filt_enabled(adapter)      \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1286
1287 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1288                                struct sk_buff **skb)
1289 {
1290         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1291         bool os2bmc = false;
1292
1293         if (!be_is_os2bmc_enabled(adapter))
1294                 goto done;
1295
1296         if (!is_multicast_ether_addr(eh->h_dest))
1297                 goto done;
1298
1299         if (is_mc_allowed_on_bmc(adapter, eh) ||
1300             is_bc_allowed_on_bmc(adapter, eh) ||
1301             is_arp_allowed_on_bmc(adapter, (*skb))) {
1302                 os2bmc = true;
1303                 goto done;
1304         }
1305
1306         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1307                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1308                 u8 nexthdr = hdr->nexthdr;
1309
1310                 if (nexthdr == IPPROTO_ICMPV6) {
1311                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1312
1313                         switch (icmp6->icmp6_type) {
1314                         case NDISC_ROUTER_ADVERTISEMENT:
1315                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1316                                 goto done;
1317                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1318                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1319                                 goto done;
1320                         default:
1321                                 break;
1322                         }
1323                 }
1324         }
1325
1326         if (is_udp_pkt((*skb))) {
1327                 struct udphdr *udp = udp_hdr((*skb));
1328
1329                 switch (ntohs(udp->dest)) {
1330                 case DHCP_CLIENT_PORT:
1331                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1332                         goto done;
1333                 case DHCP_SERVER_PORT:
1334                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1335                         goto done;
1336                 case NET_BIOS_PORT1:
1337                 case NET_BIOS_PORT2:
1338                         os2bmc = is_nbios_filt_enabled(adapter);
1339                         goto done;
1340                 case DHCPV6_RAS_PORT:
1341                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1342                         goto done;
1343                 default:
1344                         break;
1345                 }
1346         }
1347 done:
1348         /* For packets over a vlan, which are destined
1349          * to BMC, asic expects the vlan to be inline in the packet.
1350          */
1351         if (os2bmc)
1352                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1353
1354         return os2bmc;
1355 }
1356
1357 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1358 {
1359         struct be_adapter *adapter = netdev_priv(netdev);
1360         u16 q_idx = skb_get_queue_mapping(skb);
1361         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1362         struct be_wrb_params wrb_params = { 0 };
1363         bool flush = !skb->xmit_more;
1364         u16 wrb_cnt;
1365
1366         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1367         if (unlikely(!skb))
1368                 goto drop;
1369
1370         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1371
1372         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1373         if (unlikely(!wrb_cnt)) {
1374                 dev_kfree_skb_any(skb);
1375                 goto drop;
1376         }
1377
1378         /* if os2bmc is enabled and if the pkt is destined to bmc,
1379          * enqueue the pkt a 2nd time with mgmt bit set.
1380          */
1381         if (be_send_pkt_to_bmc(adapter, &skb)) {
1382                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1383                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384                 if (unlikely(!wrb_cnt))
1385                         goto drop;
1386                 else
1387                         skb_get(skb);
1388         }
1389
1390         if (be_is_txq_full(txo)) {
1391                 netif_stop_subqueue(netdev, q_idx);
1392                 tx_stats(txo)->tx_stops++;
1393         }
1394
1395         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1396                 be_xmit_flush(adapter, txo);
1397
1398         return NETDEV_TX_OK;
1399 drop:
1400         tx_stats(txo)->tx_drv_drops++;
1401         /* Flush the already enqueued tx requests */
1402         if (flush && txo->pend_wrb_cnt)
1403                 be_xmit_flush(adapter, txo);
1404
1405         return NETDEV_TX_OK;
1406 }
1407
1408 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1409 {
1410         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1411                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1412 }
1413
1414 static int be_set_vlan_promisc(struct be_adapter *adapter)
1415 {
1416         struct device *dev = &adapter->pdev->dev;
1417         int status;
1418
1419         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1420                 return 0;
1421
1422         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1423         if (!status) {
1424                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1425                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1426         } else {
1427                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1428         }
1429         return status;
1430 }
1431
1432 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1433 {
1434         struct device *dev = &adapter->pdev->dev;
1435         int status;
1436
1437         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1438         if (!status) {
1439                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1440                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1441         }
1442         return status;
1443 }
1444
1445 /*
1446  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1447  * If the user configures more, place BE in vlan promiscuous mode.
1448  */
1449 static int be_vid_config(struct be_adapter *adapter)
1450 {
1451         struct device *dev = &adapter->pdev->dev;
1452         u16 vids[BE_NUM_VLANS_SUPPORTED];
1453         u16 num = 0, i = 0;
1454         int status = 0;
1455
1456         /* No need to change the VLAN state if the I/F is in promiscuous */
1457         if (adapter->netdev->flags & IFF_PROMISC)
1458                 return 0;
1459
1460         if (adapter->vlans_added > be_max_vlans(adapter))
1461                 return be_set_vlan_promisc(adapter);
1462
1463         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1464                 status = be_clear_vlan_promisc(adapter);
1465                 if (status)
1466                         return status;
1467         }
1468         /* Construct VLAN Table to give to HW */
1469         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1470                 vids[num++] = cpu_to_le16(i);
1471
1472         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1473         if (status) {
1474                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1475                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1476                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1477                     addl_status(status) ==
1478                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1479                         return be_set_vlan_promisc(adapter);
1480         }
1481         return status;
1482 }
1483
1484 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1485 {
1486         struct be_adapter *adapter = netdev_priv(netdev);
1487         int status = 0;
1488
1489         mutex_lock(&adapter->rx_filter_lock);
1490
1491         /* Packets with VID 0 are always received by Lancer by default */
1492         if (lancer_chip(adapter) && vid == 0)
1493                 goto done;
1494
1495         if (test_bit(vid, adapter->vids))
1496                 goto done;
1497
1498         set_bit(vid, adapter->vids);
1499         adapter->vlans_added++;
1500
1501         status = be_vid_config(adapter);
1502 done:
1503         mutex_unlock(&adapter->rx_filter_lock);
1504         return status;
1505 }
1506
1507 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1508 {
1509         struct be_adapter *adapter = netdev_priv(netdev);
1510         int status = 0;
1511
1512         mutex_lock(&adapter->rx_filter_lock);
1513
1514         /* Packets with VID 0 are always received by Lancer by default */
1515         if (lancer_chip(adapter) && vid == 0)
1516                 goto done;
1517
1518         if (!test_bit(vid, adapter->vids))
1519                 goto done;
1520
1521         clear_bit(vid, adapter->vids);
1522         adapter->vlans_added--;
1523
1524         status = be_vid_config(adapter);
1525 done:
1526         mutex_unlock(&adapter->rx_filter_lock);
1527         return status;
1528 }
1529
1530 static void be_set_all_promisc(struct be_adapter *adapter)
1531 {
1532         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1533         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1534 }
1535
1536 static void be_set_mc_promisc(struct be_adapter *adapter)
1537 {
1538         int status;
1539
1540         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1541                 return;
1542
1543         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1544         if (!status)
1545                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1546 }
1547
1548 static void be_set_uc_promisc(struct be_adapter *adapter)
1549 {
1550         int status;
1551
1552         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1553                 return;
1554
1555         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1556         if (!status)
1557                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1558 }
1559
1560 static void be_clear_uc_promisc(struct be_adapter *adapter)
1561 {
1562         int status;
1563
1564         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1565                 return;
1566
1567         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1568         if (!status)
1569                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1570 }
1571
1572 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1573  * We use a single callback function for both sync and unsync. We really don't
1574  * add/remove addresses through this callback. But, we use it to detect changes
1575  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1576  */
1577 static int be_uc_list_update(struct net_device *netdev,
1578                              const unsigned char *addr)
1579 {
1580         struct be_adapter *adapter = netdev_priv(netdev);
1581
1582         adapter->update_uc_list = true;
1583         return 0;
1584 }
1585
1586 static int be_mc_list_update(struct net_device *netdev,
1587                              const unsigned char *addr)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590
1591         adapter->update_mc_list = true;
1592         return 0;
1593 }
1594
1595 static void be_set_mc_list(struct be_adapter *adapter)
1596 {
1597         struct net_device *netdev = adapter->netdev;
1598         struct netdev_hw_addr *ha;
1599         bool mc_promisc = false;
1600         int status;
1601
1602         netif_addr_lock_bh(netdev);
1603         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1604
1605         if (netdev->flags & IFF_PROMISC) {
1606                 adapter->update_mc_list = false;
1607         } else if (netdev->flags & IFF_ALLMULTI ||
1608                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1609                 /* Enable multicast promisc if num configured exceeds
1610                  * what we support
1611                  */
1612                 mc_promisc = true;
1613                 adapter->update_mc_list = false;
1614         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1615                 /* Update mc-list unconditionally if the iface was previously
1616                  * in mc-promisc mode and now is out of that mode.
1617                  */
1618                 adapter->update_mc_list = true;
1619         }
1620
1621         if (adapter->update_mc_list) {
1622                 int i = 0;
1623
1624                 /* cache the mc-list in adapter */
1625                 netdev_for_each_mc_addr(ha, netdev) {
1626                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1627                         i++;
1628                 }
1629                 adapter->mc_count = netdev_mc_count(netdev);
1630         }
1631         netif_addr_unlock_bh(netdev);
1632
1633         if (mc_promisc) {
1634                 be_set_mc_promisc(adapter);
1635         } else if (adapter->update_mc_list) {
1636                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1637                 if (!status)
1638                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1639                 else
1640                         be_set_mc_promisc(adapter);
1641
1642                 adapter->update_mc_list = false;
1643         }
1644 }
1645
1646 static void be_clear_mc_list(struct be_adapter *adapter)
1647 {
1648         struct net_device *netdev = adapter->netdev;
1649
1650         __dev_mc_unsync(netdev, NULL);
1651         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1652         adapter->mc_count = 0;
1653 }
1654
1655 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1656 {
1657         if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1658                              adapter->dev_mac)) {
1659                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1660                 return 0;
1661         }
1662
1663         return be_cmd_pmac_add(adapter,
1664                                (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1665                                adapter->if_handle,
1666                                &adapter->pmac_id[uc_idx + 1], 0);
1667 }
1668
1669 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1670 {
1671         if (pmac_id == adapter->pmac_id[0])
1672                 return;
1673
1674         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1675 }
1676
1677 static void be_set_uc_list(struct be_adapter *adapter)
1678 {
1679         struct net_device *netdev = adapter->netdev;
1680         struct netdev_hw_addr *ha;
1681         bool uc_promisc = false;
1682         int curr_uc_macs = 0, i;
1683
1684         netif_addr_lock_bh(netdev);
1685         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1686
1687         if (netdev->flags & IFF_PROMISC) {
1688                 adapter->update_uc_list = false;
1689         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1690                 uc_promisc = true;
1691                 adapter->update_uc_list = false;
1692         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1693                 /* Update uc-list unconditionally if the iface was previously
1694                  * in uc-promisc mode and now is out of that mode.
1695                  */
1696                 adapter->update_uc_list = true;
1697         }
1698
1699         if (adapter->update_uc_list) {
1700                 i = 1; /* First slot is claimed by the Primary MAC */
1701
1702                 /* cache the uc-list in adapter array */
1703                 netdev_for_each_uc_addr(ha, netdev) {
1704                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1705                         i++;
1706                 }
1707                 curr_uc_macs = netdev_uc_count(netdev);
1708         }
1709         netif_addr_unlock_bh(netdev);
1710
1711         if (uc_promisc) {
1712                 be_set_uc_promisc(adapter);
1713         } else if (adapter->update_uc_list) {
1714                 be_clear_uc_promisc(adapter);
1715
1716                 for (i = 0; i < adapter->uc_macs; i++)
1717                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1718
1719                 for (i = 0; i < curr_uc_macs; i++)
1720                         be_uc_mac_add(adapter, i);
1721                 adapter->uc_macs = curr_uc_macs;
1722                 adapter->update_uc_list = false;
1723         }
1724 }
1725
1726 static void be_clear_uc_list(struct be_adapter *adapter)
1727 {
1728         struct net_device *netdev = adapter->netdev;
1729         int i;
1730
1731         __dev_uc_unsync(netdev, NULL);
1732         for (i = 0; i < adapter->uc_macs; i++)
1733                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1734
1735         adapter->uc_macs = 0;
1736 }
1737
1738 static void __be_set_rx_mode(struct be_adapter *adapter)
1739 {
1740         struct net_device *netdev = adapter->netdev;
1741
1742         mutex_lock(&adapter->rx_filter_lock);
1743
1744         if (netdev->flags & IFF_PROMISC) {
1745                 if (!be_in_all_promisc(adapter))
1746                         be_set_all_promisc(adapter);
1747         } else if (be_in_all_promisc(adapter)) {
1748                 /* We need to re-program the vlan-list or clear
1749                  * vlan-promisc mode (if needed) when the interface
1750                  * comes out of promisc mode.
1751                  */
1752                 be_vid_config(adapter);
1753         }
1754
1755         be_set_uc_list(adapter);
1756         be_set_mc_list(adapter);
1757
1758         mutex_unlock(&adapter->rx_filter_lock);
1759 }
1760
1761 static void be_work_set_rx_mode(struct work_struct *work)
1762 {
1763         struct be_cmd_work *cmd_work =
1764                                 container_of(work, struct be_cmd_work, work);
1765
1766         __be_set_rx_mode(cmd_work->adapter);
1767         kfree(cmd_work);
1768 }
1769
1770 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1771 {
1772         struct be_adapter *adapter = netdev_priv(netdev);
1773         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1774         int status;
1775
1776         if (!sriov_enabled(adapter))
1777                 return -EPERM;
1778
1779         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1780                 return -EINVAL;
1781
1782         /* Proceed further only if user provided MAC is different
1783          * from active MAC
1784          */
1785         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1786                 return 0;
1787
1788         if (BEx_chip(adapter)) {
1789                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1790                                 vf + 1);
1791
1792                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1793                                          &vf_cfg->pmac_id, vf + 1);
1794         } else {
1795                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1796                                         vf + 1);
1797         }
1798
1799         if (status) {
1800                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1801                         mac, vf, status);
1802                 return be_cmd_status(status);
1803         }
1804
1805         ether_addr_copy(vf_cfg->mac_addr, mac);
1806
1807         return 0;
1808 }
1809
1810 static int be_get_vf_config(struct net_device *netdev, int vf,
1811                             struct ifla_vf_info *vi)
1812 {
1813         struct be_adapter *adapter = netdev_priv(netdev);
1814         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1815
1816         if (!sriov_enabled(adapter))
1817                 return -EPERM;
1818
1819         if (vf >= adapter->num_vfs)
1820                 return -EINVAL;
1821
1822         vi->vf = vf;
1823         vi->max_tx_rate = vf_cfg->tx_rate;
1824         vi->min_tx_rate = 0;
1825         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1826         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1827         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1828         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1829         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1830
1831         return 0;
1832 }
1833
1834 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1835 {
1836         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1837         u16 vids[BE_NUM_VLANS_SUPPORTED];
1838         int vf_if_id = vf_cfg->if_handle;
1839         int status;
1840
1841         /* Enable Transparent VLAN Tagging */
1842         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1843         if (status)
1844                 return status;
1845
1846         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1847         vids[0] = 0;
1848         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1849         if (!status)
1850                 dev_info(&adapter->pdev->dev,
1851                          "Cleared guest VLANs on VF%d", vf);
1852
1853         /* After TVT is enabled, disallow VFs to program VLAN filters */
1854         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1855                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1856                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1857                 if (!status)
1858                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1859         }
1860         return 0;
1861 }
1862
1863 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1864 {
1865         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1866         struct device *dev = &adapter->pdev->dev;
1867         int status;
1868
1869         /* Reset Transparent VLAN Tagging. */
1870         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1871                                        vf_cfg->if_handle, 0, 0);
1872         if (status)
1873                 return status;
1874
1875         /* Allow VFs to program VLAN filtering */
1876         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1877                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1878                                                   BE_PRIV_FILTMGMT, vf + 1);
1879                 if (!status) {
1880                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1881                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1882                 }
1883         }
1884
1885         dev_info(dev,
1886                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1887         return 0;
1888 }
1889
1890 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1891                           __be16 vlan_proto)
1892 {
1893         struct be_adapter *adapter = netdev_priv(netdev);
1894         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1895         int status;
1896
1897         if (!sriov_enabled(adapter))
1898                 return -EPERM;
1899
1900         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1901                 return -EINVAL;
1902
1903         if (vlan_proto != htons(ETH_P_8021Q))
1904                 return -EPROTONOSUPPORT;
1905
1906         if (vlan || qos) {
1907                 vlan |= qos << VLAN_PRIO_SHIFT;
1908                 status = be_set_vf_tvt(adapter, vf, vlan);
1909         } else {
1910                 status = be_clear_vf_tvt(adapter, vf);
1911         }
1912
1913         if (status) {
1914                 dev_err(&adapter->pdev->dev,
1915                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1916                         status);
1917                 return be_cmd_status(status);
1918         }
1919
1920         vf_cfg->vlan_tag = vlan;
1921         return 0;
1922 }
1923
1924 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1925                              int min_tx_rate, int max_tx_rate)
1926 {
1927         struct be_adapter *adapter = netdev_priv(netdev);
1928         struct device *dev = &adapter->pdev->dev;
1929         int percent_rate, status = 0;
1930         u16 link_speed = 0;
1931         u8 link_status;
1932
1933         if (!sriov_enabled(adapter))
1934                 return -EPERM;
1935
1936         if (vf >= adapter->num_vfs)
1937                 return -EINVAL;
1938
1939         if (min_tx_rate)
1940                 return -EINVAL;
1941
1942         if (!max_tx_rate)
1943                 goto config_qos;
1944
1945         status = be_cmd_link_status_query(adapter, &link_speed,
1946                                           &link_status, 0);
1947         if (status)
1948                 goto err;
1949
1950         if (!link_status) {
1951                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1952                 status = -ENETDOWN;
1953                 goto err;
1954         }
1955
1956         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1957                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1958                         link_speed);
1959                 status = -EINVAL;
1960                 goto err;
1961         }
1962
1963         /* On Skyhawk the QOS setting must be done only as a % value */
1964         percent_rate = link_speed / 100;
1965         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1966                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1967                         percent_rate);
1968                 status = -EINVAL;
1969                 goto err;
1970         }
1971
1972 config_qos:
1973         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1974         if (status)
1975                 goto err;
1976
1977         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1978         return 0;
1979
1980 err:
1981         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1982                 max_tx_rate, vf);
1983         return be_cmd_status(status);
1984 }
1985
1986 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1987                                 int link_state)
1988 {
1989         struct be_adapter *adapter = netdev_priv(netdev);
1990         int status;
1991
1992         if (!sriov_enabled(adapter))
1993                 return -EPERM;
1994
1995         if (vf >= adapter->num_vfs)
1996                 return -EINVAL;
1997
1998         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1999         if (status) {
2000                 dev_err(&adapter->pdev->dev,
2001                         "Link state change on VF %d failed: %#x\n", vf, status);
2002                 return be_cmd_status(status);
2003         }
2004
2005         adapter->vf_cfg[vf].plink_tracking = link_state;
2006
2007         return 0;
2008 }
2009
2010 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2014         u8 spoofchk;
2015         int status;
2016
2017         if (!sriov_enabled(adapter))
2018                 return -EPERM;
2019
2020         if (vf >= adapter->num_vfs)
2021                 return -EINVAL;
2022
2023         if (BEx_chip(adapter))
2024                 return -EOPNOTSUPP;
2025
2026         if (enable == vf_cfg->spoofchk)
2027                 return 0;
2028
2029         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2030
2031         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2032                                        0, spoofchk);
2033         if (status) {
2034                 dev_err(&adapter->pdev->dev,
2035                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2036                 return be_cmd_status(status);
2037         }
2038
2039         vf_cfg->spoofchk = enable;
2040         return 0;
2041 }
2042
2043 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2044                           ulong now)
2045 {
2046         aic->rx_pkts_prev = rx_pkts;
2047         aic->tx_reqs_prev = tx_pkts;
2048         aic->jiffies = now;
2049 }
2050
2051 static int be_get_new_eqd(struct be_eq_obj *eqo)
2052 {
2053         struct be_adapter *adapter = eqo->adapter;
2054         int eqd, start;
2055         struct be_aic_obj *aic;
2056         struct be_rx_obj *rxo;
2057         struct be_tx_obj *txo;
2058         u64 rx_pkts = 0, tx_pkts = 0;
2059         ulong now;
2060         u32 pps, delta;
2061         int i;
2062
2063         aic = &adapter->aic_obj[eqo->idx];
2064         if (!aic->enable) {
2065                 if (aic->jiffies)
2066                         aic->jiffies = 0;
2067                 eqd = aic->et_eqd;
2068                 return eqd;
2069         }
2070
2071         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2072                 do {
2073                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2074                         rx_pkts += rxo->stats.rx_pkts;
2075                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2076         }
2077
2078         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2079                 do {
2080                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2081                         tx_pkts += txo->stats.tx_reqs;
2082                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2083         }
2084
2085         /* Skip, if wrapped around or first calculation */
2086         now = jiffies;
2087         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2088             rx_pkts < aic->rx_pkts_prev ||
2089             tx_pkts < aic->tx_reqs_prev) {
2090                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2091                 return aic->prev_eqd;
2092         }
2093
2094         delta = jiffies_to_msecs(now - aic->jiffies);
2095         if (delta == 0)
2096                 return aic->prev_eqd;
2097
2098         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2099                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2100         eqd = (pps / 15000) << 2;
2101
2102         if (eqd < 8)
2103                 eqd = 0;
2104         eqd = min_t(u32, eqd, aic->max_eqd);
2105         eqd = max_t(u32, eqd, aic->min_eqd);
2106
2107         be_aic_update(aic, rx_pkts, tx_pkts, now);
2108
2109         return eqd;
2110 }
2111
2112 /* For Skyhawk-R only */
2113 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2114 {
2115         struct be_adapter *adapter = eqo->adapter;
2116         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2117         ulong now = jiffies;
2118         int eqd;
2119         u32 mult_enc;
2120
2121         if (!aic->enable)
2122                 return 0;
2123
2124         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2125                 eqd = aic->prev_eqd;
2126         else
2127                 eqd = be_get_new_eqd(eqo);
2128
2129         if (eqd > 100)
2130                 mult_enc = R2I_DLY_ENC_1;
2131         else if (eqd > 60)
2132                 mult_enc = R2I_DLY_ENC_2;
2133         else if (eqd > 20)
2134                 mult_enc = R2I_DLY_ENC_3;
2135         else
2136                 mult_enc = R2I_DLY_ENC_0;
2137
2138         aic->prev_eqd = eqd;
2139
2140         return mult_enc;
2141 }
2142
2143 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2144 {
2145         struct be_set_eqd set_eqd[MAX_EVT_QS];
2146         struct be_aic_obj *aic;
2147         struct be_eq_obj *eqo;
2148         int i, num = 0, eqd;
2149
2150         for_all_evt_queues(adapter, eqo, i) {
2151                 aic = &adapter->aic_obj[eqo->idx];
2152                 eqd = be_get_new_eqd(eqo);
2153                 if (force_update || eqd != aic->prev_eqd) {
2154                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2155                         set_eqd[num].eq_id = eqo->q.id;
2156                         aic->prev_eqd = eqd;
2157                         num++;
2158                 }
2159         }
2160
2161         if (num)
2162                 be_cmd_modify_eqd(adapter, set_eqd, num);
2163 }
2164
2165 static void be_rx_stats_update(struct be_rx_obj *rxo,
2166                                struct be_rx_compl_info *rxcp)
2167 {
2168         struct be_rx_stats *stats = rx_stats(rxo);
2169
2170         u64_stats_update_begin(&stats->sync);
2171         stats->rx_compl++;
2172         stats->rx_bytes += rxcp->pkt_size;
2173         stats->rx_pkts++;
2174         if (rxcp->tunneled)
2175                 stats->rx_vxlan_offload_pkts++;
2176         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2177                 stats->rx_mcast_pkts++;
2178         if (rxcp->err)
2179                 stats->rx_compl_err++;
2180         u64_stats_update_end(&stats->sync);
2181 }
2182
2183 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2184 {
2185         /* L4 checksum is not reliable for non TCP/UDP packets.
2186          * Also ignore ipcksm for ipv6 pkts
2187          */
2188         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2189                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2190 }
2191
2192 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2193 {
2194         struct be_adapter *adapter = rxo->adapter;
2195         struct be_rx_page_info *rx_page_info;
2196         struct be_queue_info *rxq = &rxo->q;
2197         u32 frag_idx = rxq->tail;
2198
2199         rx_page_info = &rxo->page_info_tbl[frag_idx];
2200         BUG_ON(!rx_page_info->page);
2201
2202         if (rx_page_info->last_frag) {
2203                 dma_unmap_page(&adapter->pdev->dev,
2204                                dma_unmap_addr(rx_page_info, bus),
2205                                adapter->big_page_size, DMA_FROM_DEVICE);
2206                 rx_page_info->last_frag = false;
2207         } else {
2208                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2209                                         dma_unmap_addr(rx_page_info, bus),
2210                                         rx_frag_size, DMA_FROM_DEVICE);
2211         }
2212
2213         queue_tail_inc(rxq);
2214         atomic_dec(&rxq->used);
2215         return rx_page_info;
2216 }
2217
2218 /* Throwaway the data in the Rx completion */
2219 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2220                                 struct be_rx_compl_info *rxcp)
2221 {
2222         struct be_rx_page_info *page_info;
2223         u16 i, num_rcvd = rxcp->num_rcvd;
2224
2225         for (i = 0; i < num_rcvd; i++) {
2226                 page_info = get_rx_page_info(rxo);
2227                 put_page(page_info->page);
2228                 memset(page_info, 0, sizeof(*page_info));
2229         }
2230 }
2231
2232 /*
2233  * skb_fill_rx_data forms a complete skb for an ether frame
2234  * indicated by rxcp.
2235  */
2236 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2237                              struct be_rx_compl_info *rxcp)
2238 {
2239         struct be_rx_page_info *page_info;
2240         u16 i, j;
2241         u16 hdr_len, curr_frag_len, remaining;
2242         u8 *start;
2243
2244         page_info = get_rx_page_info(rxo);
2245         start = page_address(page_info->page) + page_info->page_offset;
2246         prefetch(start);
2247
2248         /* Copy data in the first descriptor of this completion */
2249         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2250
2251         skb->len = curr_frag_len;
2252         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2253                 memcpy(skb->data, start, curr_frag_len);
2254                 /* Complete packet has now been moved to data */
2255                 put_page(page_info->page);
2256                 skb->data_len = 0;
2257                 skb->tail += curr_frag_len;
2258         } else {
2259                 hdr_len = ETH_HLEN;
2260                 memcpy(skb->data, start, hdr_len);
2261                 skb_shinfo(skb)->nr_frags = 1;
2262                 skb_frag_set_page(skb, 0, page_info->page);
2263                 skb_shinfo(skb)->frags[0].page_offset =
2264                                         page_info->page_offset + hdr_len;
2265                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2266                                   curr_frag_len - hdr_len);
2267                 skb->data_len = curr_frag_len - hdr_len;
2268                 skb->truesize += rx_frag_size;
2269                 skb->tail += hdr_len;
2270         }
2271         page_info->page = NULL;
2272
2273         if (rxcp->pkt_size <= rx_frag_size) {
2274                 BUG_ON(rxcp->num_rcvd != 1);
2275                 return;
2276         }
2277
2278         /* More frags present for this completion */
2279         remaining = rxcp->pkt_size - curr_frag_len;
2280         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2281                 page_info = get_rx_page_info(rxo);
2282                 curr_frag_len = min(remaining, rx_frag_size);
2283
2284                 /* Coalesce all frags from the same physical page in one slot */
2285                 if (page_info->page_offset == 0) {
2286                         /* Fresh page */
2287                         j++;
2288                         skb_frag_set_page(skb, j, page_info->page);
2289                         skb_shinfo(skb)->frags[j].page_offset =
2290                                                         page_info->page_offset;
2291                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2292                         skb_shinfo(skb)->nr_frags++;
2293                 } else {
2294                         put_page(page_info->page);
2295                 }
2296
2297                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2298                 skb->len += curr_frag_len;
2299                 skb->data_len += curr_frag_len;
2300                 skb->truesize += rx_frag_size;
2301                 remaining -= curr_frag_len;
2302                 page_info->page = NULL;
2303         }
2304         BUG_ON(j > MAX_SKB_FRAGS);
2305 }
2306
2307 /* Process the RX completion indicated by rxcp when GRO is disabled */
2308 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2309                                 struct be_rx_compl_info *rxcp)
2310 {
2311         struct be_adapter *adapter = rxo->adapter;
2312         struct net_device *netdev = adapter->netdev;
2313         struct sk_buff *skb;
2314
2315         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2316         if (unlikely(!skb)) {
2317                 rx_stats(rxo)->rx_drops_no_skbs++;
2318                 be_rx_compl_discard(rxo, rxcp);
2319                 return;
2320         }
2321
2322         skb_fill_rx_data(rxo, skb, rxcp);
2323
2324         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2325                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2326         else
2327                 skb_checksum_none_assert(skb);
2328
2329         skb->protocol = eth_type_trans(skb, netdev);
2330         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2331         if (netdev->features & NETIF_F_RXHASH)
2332                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2333
2334         skb->csum_level = rxcp->tunneled;
2335         skb_mark_napi_id(skb, napi);
2336
2337         if (rxcp->vlanf)
2338                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2339
2340         netif_receive_skb(skb);
2341 }
2342
2343 /* Process the RX completion indicated by rxcp when GRO is enabled */
2344 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2345                                     struct napi_struct *napi,
2346                                     struct be_rx_compl_info *rxcp)
2347 {
2348         struct be_adapter *adapter = rxo->adapter;
2349         struct be_rx_page_info *page_info;
2350         struct sk_buff *skb = NULL;
2351         u16 remaining, curr_frag_len;
2352         u16 i, j;
2353
2354         skb = napi_get_frags(napi);
2355         if (!skb) {
2356                 be_rx_compl_discard(rxo, rxcp);
2357                 return;
2358         }
2359
2360         remaining = rxcp->pkt_size;
2361         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2362                 page_info = get_rx_page_info(rxo);
2363
2364                 curr_frag_len = min(remaining, rx_frag_size);
2365
2366                 /* Coalesce all frags from the same physical page in one slot */
2367                 if (i == 0 || page_info->page_offset == 0) {
2368                         /* First frag or Fresh page */
2369                         j++;
2370                         skb_frag_set_page(skb, j, page_info->page);
2371                         skb_shinfo(skb)->frags[j].page_offset =
2372                                                         page_info->page_offset;
2373                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2374                 } else {
2375                         put_page(page_info->page);
2376                 }
2377                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2378                 skb->truesize += rx_frag_size;
2379                 remaining -= curr_frag_len;
2380                 memset(page_info, 0, sizeof(*page_info));
2381         }
2382         BUG_ON(j > MAX_SKB_FRAGS);
2383
2384         skb_shinfo(skb)->nr_frags = j + 1;
2385         skb->len = rxcp->pkt_size;
2386         skb->data_len = rxcp->pkt_size;
2387         skb->ip_summed = CHECKSUM_UNNECESSARY;
2388         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2389         if (adapter->netdev->features & NETIF_F_RXHASH)
2390                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2391
2392         skb->csum_level = rxcp->tunneled;
2393
2394         if (rxcp->vlanf)
2395                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2396
2397         napi_gro_frags(napi);
2398 }
2399
2400 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2401                                  struct be_rx_compl_info *rxcp)
2402 {
2403         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2404         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2405         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2406         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2407         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2408         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2409         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2410         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2411         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2412         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2413         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2414         if (rxcp->vlanf) {
2415                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2416                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2417         }
2418         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2419         rxcp->tunneled =
2420                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2421 }
2422
2423 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2424                                  struct be_rx_compl_info *rxcp)
2425 {
2426         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2427         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2428         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2429         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2430         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2431         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2432         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2433         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2434         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2435         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2436         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2437         if (rxcp->vlanf) {
2438                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2439                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2440         }
2441         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2442         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2443 }
2444
2445 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2446 {
2447         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2448         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2449         struct be_adapter *adapter = rxo->adapter;
2450
2451         /* For checking the valid bit it is Ok to use either definition as the
2452          * valid bit is at the same position in both v0 and v1 Rx compl */
2453         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2454                 return NULL;
2455
2456         rmb();
2457         be_dws_le_to_cpu(compl, sizeof(*compl));
2458
2459         if (adapter->be3_native)
2460                 be_parse_rx_compl_v1(compl, rxcp);
2461         else
2462                 be_parse_rx_compl_v0(compl, rxcp);
2463
2464         if (rxcp->ip_frag)
2465                 rxcp->l4_csum = 0;
2466
2467         if (rxcp->vlanf) {
2468                 /* In QNQ modes, if qnq bit is not set, then the packet was
2469                  * tagged only with the transparent outer vlan-tag and must
2470                  * not be treated as a vlan packet by host
2471                  */
2472                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2473                         rxcp->vlanf = 0;
2474
2475                 if (!lancer_chip(adapter))
2476                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2477
2478                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2479                     !test_bit(rxcp->vlan_tag, adapter->vids))
2480                         rxcp->vlanf = 0;
2481         }
2482
2483         /* As the compl has been parsed, reset it; we wont touch it again */
2484         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2485
2486         queue_tail_inc(&rxo->cq);
2487         return rxcp;
2488 }
2489
2490 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2491 {
2492         u32 order = get_order(size);
2493
2494         if (order > 0)
2495                 gfp |= __GFP_COMP;
2496         return  alloc_pages(gfp, order);
2497 }
2498
2499 /*
2500  * Allocate a page, split it to fragments of size rx_frag_size and post as
2501  * receive buffers to BE
2502  */
2503 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2504 {
2505         struct be_adapter *adapter = rxo->adapter;
2506         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2507         struct be_queue_info *rxq = &rxo->q;
2508         struct page *pagep = NULL;
2509         struct device *dev = &adapter->pdev->dev;
2510         struct be_eth_rx_d *rxd;
2511         u64 page_dmaaddr = 0, frag_dmaaddr;
2512         u32 posted, page_offset = 0, notify = 0;
2513
2514         page_info = &rxo->page_info_tbl[rxq->head];
2515         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2516                 if (!pagep) {
2517                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2518                         if (unlikely(!pagep)) {
2519                                 rx_stats(rxo)->rx_post_fail++;
2520                                 break;
2521                         }
2522                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2523                                                     adapter->big_page_size,
2524                                                     DMA_FROM_DEVICE);
2525                         if (dma_mapping_error(dev, page_dmaaddr)) {
2526                                 put_page(pagep);
2527                                 pagep = NULL;
2528                                 adapter->drv_stats.dma_map_errors++;
2529                                 break;
2530                         }
2531                         page_offset = 0;
2532                 } else {
2533                         get_page(pagep);
2534                         page_offset += rx_frag_size;
2535                 }
2536                 page_info->page_offset = page_offset;
2537                 page_info->page = pagep;
2538
2539                 rxd = queue_head_node(rxq);
2540                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2541                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2542                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2543
2544                 /* Any space left in the current big page for another frag? */
2545                 if ((page_offset + rx_frag_size + rx_frag_size) >
2546                                         adapter->big_page_size) {
2547                         pagep = NULL;
2548                         page_info->last_frag = true;
2549                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2550                 } else {
2551                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2552                 }
2553
2554                 prev_page_info = page_info;
2555                 queue_head_inc(rxq);
2556                 page_info = &rxo->page_info_tbl[rxq->head];
2557         }
2558
2559         /* Mark the last frag of a page when we break out of the above loop
2560          * with no more slots available in the RXQ
2561          */
2562         if (pagep) {
2563                 prev_page_info->last_frag = true;
2564                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2565         }
2566
2567         if (posted) {
2568                 atomic_add(posted, &rxq->used);
2569                 if (rxo->rx_post_starved)
2570                         rxo->rx_post_starved = false;
2571                 do {
2572                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2573                         be_rxq_notify(adapter, rxq->id, notify);
2574                         posted -= notify;
2575                 } while (posted);
2576         } else if (atomic_read(&rxq->used) == 0) {
2577                 /* Let be_worker replenish when memory is available */
2578                 rxo->rx_post_starved = true;
2579         }
2580 }
2581
2582 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2583 {
2584         struct be_queue_info *tx_cq = &txo->cq;
2585         struct be_tx_compl_info *txcp = &txo->txcp;
2586         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2587
2588         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2589                 return NULL;
2590
2591         /* Ensure load ordering of valid bit dword and other dwords below */
2592         rmb();
2593         be_dws_le_to_cpu(compl, sizeof(*compl));
2594
2595         txcp->status = GET_TX_COMPL_BITS(status, compl);
2596         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2597
2598         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2599         queue_tail_inc(tx_cq);
2600         return txcp;
2601 }
2602
2603 static u16 be_tx_compl_process(struct be_adapter *adapter,
2604                                struct be_tx_obj *txo, u16 last_index)
2605 {
2606         struct sk_buff **sent_skbs = txo->sent_skb_list;
2607         struct be_queue_info *txq = &txo->q;
2608         struct sk_buff *skb = NULL;
2609         bool unmap_skb_hdr = false;
2610         struct be_eth_wrb *wrb;
2611         u16 num_wrbs = 0;
2612         u32 frag_index;
2613
2614         do {
2615                 if (sent_skbs[txq->tail]) {
2616                         /* Free skb from prev req */
2617                         if (skb)
2618                                 dev_consume_skb_any(skb);
2619                         skb = sent_skbs[txq->tail];
2620                         sent_skbs[txq->tail] = NULL;
2621                         queue_tail_inc(txq);  /* skip hdr wrb */
2622                         num_wrbs++;
2623                         unmap_skb_hdr = true;
2624                 }
2625                 wrb = queue_tail_node(txq);
2626                 frag_index = txq->tail;
2627                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2628                               (unmap_skb_hdr && skb_headlen(skb)));
2629                 unmap_skb_hdr = false;
2630                 queue_tail_inc(txq);
2631                 num_wrbs++;
2632         } while (frag_index != last_index);
2633         dev_consume_skb_any(skb);
2634
2635         return num_wrbs;
2636 }
2637
2638 /* Return the number of events in the event queue */
2639 static inline int events_get(struct be_eq_obj *eqo)
2640 {
2641         struct be_eq_entry *eqe;
2642         int num = 0;
2643
2644         do {
2645                 eqe = queue_tail_node(&eqo->q);
2646                 if (eqe->evt == 0)
2647                         break;
2648
2649                 rmb();
2650                 eqe->evt = 0;
2651                 num++;
2652                 queue_tail_inc(&eqo->q);
2653         } while (true);
2654
2655         return num;
2656 }
2657
2658 /* Leaves the EQ is disarmed state */
2659 static void be_eq_clean(struct be_eq_obj *eqo)
2660 {
2661         int num = events_get(eqo);
2662
2663         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2664 }
2665
2666 /* Free posted rx buffers that were not used */
2667 static void be_rxq_clean(struct be_rx_obj *rxo)
2668 {
2669         struct be_queue_info *rxq = &rxo->q;
2670         struct be_rx_page_info *page_info;
2671
2672         while (atomic_read(&rxq->used) > 0) {
2673                 page_info = get_rx_page_info(rxo);
2674                 put_page(page_info->page);
2675                 memset(page_info, 0, sizeof(*page_info));
2676         }
2677         BUG_ON(atomic_read(&rxq->used));
2678         rxq->tail = 0;
2679         rxq->head = 0;
2680 }
2681
2682 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2683 {
2684         struct be_queue_info *rx_cq = &rxo->cq;
2685         struct be_rx_compl_info *rxcp;
2686         struct be_adapter *adapter = rxo->adapter;
2687         int flush_wait = 0;
2688
2689         /* Consume pending rx completions.
2690          * Wait for the flush completion (identified by zero num_rcvd)
2691          * to arrive. Notify CQ even when there are no more CQ entries
2692          * for HW to flush partially coalesced CQ entries.
2693          * In Lancer, there is no need to wait for flush compl.
2694          */
2695         for (;;) {
2696                 rxcp = be_rx_compl_get(rxo);
2697                 if (!rxcp) {
2698                         if (lancer_chip(adapter))
2699                                 break;
2700
2701                         if (flush_wait++ > 50 ||
2702                             be_check_error(adapter,
2703                                            BE_ERROR_HW)) {
2704                                 dev_warn(&adapter->pdev->dev,
2705                                          "did not receive flush compl\n");
2706                                 break;
2707                         }
2708                         be_cq_notify(adapter, rx_cq->id, true, 0);
2709                         mdelay(1);
2710                 } else {
2711                         be_rx_compl_discard(rxo, rxcp);
2712                         be_cq_notify(adapter, rx_cq->id, false, 1);
2713                         if (rxcp->num_rcvd == 0)
2714                                 break;
2715                 }
2716         }
2717
2718         /* After cleanup, leave the CQ in unarmed state */
2719         be_cq_notify(adapter, rx_cq->id, false, 0);
2720 }
2721
2722 static void be_tx_compl_clean(struct be_adapter *adapter)
2723 {
2724         struct device *dev = &adapter->pdev->dev;
2725         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2726         struct be_tx_compl_info *txcp;
2727         struct be_queue_info *txq;
2728         u32 end_idx, notified_idx;
2729         struct be_tx_obj *txo;
2730         int i, pending_txqs;
2731
2732         /* Stop polling for compls when HW has been silent for 10ms */
2733         do {
2734                 pending_txqs = adapter->num_tx_qs;
2735
2736                 for_all_tx_queues(adapter, txo, i) {
2737                         cmpl = 0;
2738                         num_wrbs = 0;
2739                         txq = &txo->q;
2740                         while ((txcp = be_tx_compl_get(txo))) {
2741                                 num_wrbs +=
2742                                         be_tx_compl_process(adapter, txo,
2743                                                             txcp->end_index);
2744                                 cmpl++;
2745                         }
2746                         if (cmpl) {
2747                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2748                                 atomic_sub(num_wrbs, &txq->used);
2749                                 timeo = 0;
2750                         }
2751                         if (!be_is_tx_compl_pending(txo))
2752                                 pending_txqs--;
2753                 }
2754
2755                 if (pending_txqs == 0 || ++timeo > 10 ||
2756                     be_check_error(adapter, BE_ERROR_HW))
2757                         break;
2758
2759                 mdelay(1);
2760         } while (true);
2761
2762         /* Free enqueued TX that was never notified to HW */
2763         for_all_tx_queues(adapter, txo, i) {
2764                 txq = &txo->q;
2765
2766                 if (atomic_read(&txq->used)) {
2767                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2768                                  i, atomic_read(&txq->used));
2769                         notified_idx = txq->tail;
2770                         end_idx = txq->tail;
2771                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2772                                   txq->len);
2773                         /* Use the tx-compl process logic to handle requests
2774                          * that were not sent to the HW.
2775                          */
2776                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2777                         atomic_sub(num_wrbs, &txq->used);
2778                         BUG_ON(atomic_read(&txq->used));
2779                         txo->pend_wrb_cnt = 0;
2780                         /* Since hw was never notified of these requests,
2781                          * reset TXQ indices
2782                          */
2783                         txq->head = notified_idx;
2784                         txq->tail = notified_idx;
2785                 }
2786         }
2787 }
2788
2789 static void be_evt_queues_destroy(struct be_adapter *adapter)
2790 {
2791         struct be_eq_obj *eqo;
2792         int i;
2793
2794         for_all_evt_queues(adapter, eqo, i) {
2795                 if (eqo->q.created) {
2796                         be_eq_clean(eqo);
2797                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2798                         netif_napi_del(&eqo->napi);
2799                         free_cpumask_var(eqo->affinity_mask);
2800                 }
2801                 be_queue_free(adapter, &eqo->q);
2802         }
2803 }
2804
2805 static int be_evt_queues_create(struct be_adapter *adapter)
2806 {
2807         struct be_queue_info *eq;
2808         struct be_eq_obj *eqo;
2809         struct be_aic_obj *aic;
2810         int i, rc;
2811
2812         /* need enough EQs to service both RX and TX queues */
2813         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2814                                     max(adapter->cfg_num_rx_irqs,
2815                                         adapter->cfg_num_tx_irqs));
2816
2817         for_all_evt_queues(adapter, eqo, i) {
2818                 int numa_node = dev_to_node(&adapter->pdev->dev);
2819
2820                 aic = &adapter->aic_obj[i];
2821                 eqo->adapter = adapter;
2822                 eqo->idx = i;
2823                 aic->max_eqd = BE_MAX_EQD;
2824                 aic->enable = true;
2825
2826                 eq = &eqo->q;
2827                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2828                                     sizeof(struct be_eq_entry));
2829                 if (rc)
2830                         return rc;
2831
2832                 rc = be_cmd_eq_create(adapter, eqo);
2833                 if (rc)
2834                         return rc;
2835
2836                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2837                         return -ENOMEM;
2838                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2839                                 eqo->affinity_mask);
2840                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2841                                BE_NAPI_WEIGHT);
2842         }
2843         return 0;
2844 }
2845
2846 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2847 {
2848         struct be_queue_info *q;
2849
2850         q = &adapter->mcc_obj.q;
2851         if (q->created)
2852                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2853         be_queue_free(adapter, q);
2854
2855         q = &adapter->mcc_obj.cq;
2856         if (q->created)
2857                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2858         be_queue_free(adapter, q);
2859 }
2860
2861 /* Must be called only after TX qs are created as MCC shares TX EQ */
2862 static int be_mcc_queues_create(struct be_adapter *adapter)
2863 {
2864         struct be_queue_info *q, *cq;
2865
2866         cq = &adapter->mcc_obj.cq;
2867         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2868                            sizeof(struct be_mcc_compl)))
2869                 goto err;
2870
2871         /* Use the default EQ for MCC completions */
2872         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2873                 goto mcc_cq_free;
2874
2875         q = &adapter->mcc_obj.q;
2876         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2877                 goto mcc_cq_destroy;
2878
2879         if (be_cmd_mccq_create(adapter, q, cq))
2880                 goto mcc_q_free;
2881
2882         return 0;
2883
2884 mcc_q_free:
2885         be_queue_free(adapter, q);
2886 mcc_cq_destroy:
2887         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2888 mcc_cq_free:
2889         be_queue_free(adapter, cq);
2890 err:
2891         return -1;
2892 }
2893
2894 static void be_tx_queues_destroy(struct be_adapter *adapter)
2895 {
2896         struct be_queue_info *q;
2897         struct be_tx_obj *txo;
2898         u8 i;
2899
2900         for_all_tx_queues(adapter, txo, i) {
2901                 q = &txo->q;
2902                 if (q->created)
2903                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2904                 be_queue_free(adapter, q);
2905
2906                 q = &txo->cq;
2907                 if (q->created)
2908                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2909                 be_queue_free(adapter, q);
2910         }
2911 }
2912
2913 static int be_tx_qs_create(struct be_adapter *adapter)
2914 {
2915         struct be_queue_info *cq;
2916         struct be_tx_obj *txo;
2917         struct be_eq_obj *eqo;
2918         int status, i;
2919
2920         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2921
2922         for_all_tx_queues(adapter, txo, i) {
2923                 cq = &txo->cq;
2924                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2925                                         sizeof(struct be_eth_tx_compl));
2926                 if (status)
2927                         return status;
2928
2929                 u64_stats_init(&txo->stats.sync);
2930                 u64_stats_init(&txo->stats.sync_compl);
2931
2932                 /* If num_evt_qs is less than num_tx_qs, then more than
2933                  * one txq share an eq
2934                  */
2935                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2936                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2937                 if (status)
2938                         return status;
2939
2940                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2941                                         sizeof(struct be_eth_wrb));
2942                 if (status)
2943                         return status;
2944
2945                 status = be_cmd_txq_create(adapter, txo);
2946                 if (status)
2947                         return status;
2948
2949                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2950                                     eqo->idx);
2951         }
2952
2953         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2954                  adapter->num_tx_qs);
2955         return 0;
2956 }
2957
2958 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2959 {
2960         struct be_queue_info *q;
2961         struct be_rx_obj *rxo;
2962         int i;
2963
2964         for_all_rx_queues(adapter, rxo, i) {
2965                 q = &rxo->cq;
2966                 if (q->created)
2967                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2968                 be_queue_free(adapter, q);
2969         }
2970 }
2971
2972 static int be_rx_cqs_create(struct be_adapter *adapter)
2973 {
2974         struct be_queue_info *eq, *cq;
2975         struct be_rx_obj *rxo;
2976         int rc, i;
2977
2978         adapter->num_rss_qs =
2979                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2980
2981         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2982         if (adapter->num_rss_qs < 2)
2983                 adapter->num_rss_qs = 0;
2984
2985         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2986
2987         /* When the interface is not capable of RSS rings (and there is no
2988          * need to create a default RXQ) we'll still need one RXQ
2989          */
2990         if (adapter->num_rx_qs == 0)
2991                 adapter->num_rx_qs = 1;
2992
2993         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2994         for_all_rx_queues(adapter, rxo, i) {
2995                 rxo->adapter = adapter;
2996                 cq = &rxo->cq;
2997                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2998                                     sizeof(struct be_eth_rx_compl));
2999                 if (rc)
3000                         return rc;
3001
3002                 u64_stats_init(&rxo->stats.sync);
3003                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3004                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3005                 if (rc)
3006                         return rc;
3007         }
3008
3009         dev_info(&adapter->pdev->dev,
3010                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3011         return 0;
3012 }
3013
3014 static irqreturn_t be_intx(int irq, void *dev)
3015 {
3016         struct be_eq_obj *eqo = dev;
3017         struct be_adapter *adapter = eqo->adapter;
3018         int num_evts = 0;
3019
3020         /* IRQ is not expected when NAPI is scheduled as the EQ
3021          * will not be armed.
3022          * But, this can happen on Lancer INTx where it takes
3023          * a while to de-assert INTx or in BE2 where occasionaly
3024          * an interrupt may be raised even when EQ is unarmed.
3025          * If NAPI is already scheduled, then counting & notifying
3026          * events will orphan them.
3027          */
3028         if (napi_schedule_prep(&eqo->napi)) {
3029                 num_evts = events_get(eqo);
3030                 __napi_schedule(&eqo->napi);
3031                 if (num_evts)
3032                         eqo->spurious_intr = 0;
3033         }
3034         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3035
3036         /* Return IRQ_HANDLED only for the the first spurious intr
3037          * after a valid intr to stop the kernel from branding
3038          * this irq as a bad one!
3039          */
3040         if (num_evts || eqo->spurious_intr++ == 0)
3041                 return IRQ_HANDLED;
3042         else
3043                 return IRQ_NONE;
3044 }
3045
3046 static irqreturn_t be_msix(int irq, void *dev)
3047 {
3048         struct be_eq_obj *eqo = dev;
3049
3050         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3051         napi_schedule(&eqo->napi);
3052         return IRQ_HANDLED;
3053 }
3054
3055 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3056 {
3057         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3058 }
3059
3060 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3061                          int budget, int polling)
3062 {
3063         struct be_adapter *adapter = rxo->adapter;
3064         struct be_queue_info *rx_cq = &rxo->cq;
3065         struct be_rx_compl_info *rxcp;
3066         u32 work_done;
3067         u32 frags_consumed = 0;
3068
3069         for (work_done = 0; work_done < budget; work_done++) {
3070                 rxcp = be_rx_compl_get(rxo);
3071                 if (!rxcp)
3072                         break;
3073
3074                 /* Is it a flush compl that has no data */
3075                 if (unlikely(rxcp->num_rcvd == 0))
3076                         goto loop_continue;
3077
3078                 /* Discard compl with partial DMA Lancer B0 */
3079                 if (unlikely(!rxcp->pkt_size)) {
3080                         be_rx_compl_discard(rxo, rxcp);
3081                         goto loop_continue;
3082                 }
3083
3084                 /* On BE drop pkts that arrive due to imperfect filtering in
3085                  * promiscuous mode on some skews
3086                  */
3087                 if (unlikely(rxcp->port != adapter->port_num &&
3088                              !lancer_chip(adapter))) {
3089                         be_rx_compl_discard(rxo, rxcp);
3090                         goto loop_continue;
3091                 }
3092
3093                 /* Don't do gro when we're busy_polling */
3094                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3095                         be_rx_compl_process_gro(rxo, napi, rxcp);
3096                 else
3097                         be_rx_compl_process(rxo, napi, rxcp);
3098
3099 loop_continue:
3100                 frags_consumed += rxcp->num_rcvd;
3101                 be_rx_stats_update(rxo, rxcp);
3102         }
3103
3104         if (work_done) {
3105                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3106
3107                 /* When an rx-obj gets into post_starved state, just
3108                  * let be_worker do the posting.
3109                  */
3110                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3111                     !rxo->rx_post_starved)
3112                         be_post_rx_frags(rxo, GFP_ATOMIC,
3113                                          max_t(u32, MAX_RX_POST,
3114                                                frags_consumed));
3115         }
3116
3117         return work_done;
3118 }
3119
3120 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3121 {
3122         switch (status) {
3123         case BE_TX_COMP_HDR_PARSE_ERR:
3124                 tx_stats(txo)->tx_hdr_parse_err++;
3125                 break;
3126         case BE_TX_COMP_NDMA_ERR:
3127                 tx_stats(txo)->tx_dma_err++;
3128                 break;
3129         case BE_TX_COMP_ACL_ERR:
3130                 tx_stats(txo)->tx_spoof_check_err++;
3131                 break;
3132         }
3133 }
3134
3135 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3136 {
3137         switch (status) {
3138         case LANCER_TX_COMP_LSO_ERR:
3139                 tx_stats(txo)->tx_tso_err++;
3140                 break;
3141         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3142         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3143                 tx_stats(txo)->tx_spoof_check_err++;
3144                 break;
3145         case LANCER_TX_COMP_QINQ_ERR:
3146                 tx_stats(txo)->tx_qinq_err++;
3147                 break;
3148         case LANCER_TX_COMP_PARITY_ERR:
3149                 tx_stats(txo)->tx_internal_parity_err++;
3150                 break;
3151         case LANCER_TX_COMP_DMA_ERR:
3152                 tx_stats(txo)->tx_dma_err++;
3153                 break;
3154         }
3155 }
3156
3157 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3158                           int idx)
3159 {
3160         int num_wrbs = 0, work_done = 0;
3161         struct be_tx_compl_info *txcp;
3162
3163         while ((txcp = be_tx_compl_get(txo))) {
3164                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3165                 work_done++;
3166
3167                 if (txcp->status) {
3168                         if (lancer_chip(adapter))
3169                                 lancer_update_tx_err(txo, txcp->status);
3170                         else
3171                                 be_update_tx_err(txo, txcp->status);
3172                 }
3173         }
3174
3175         if (work_done) {
3176                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3177                 atomic_sub(num_wrbs, &txo->q.used);
3178
3179                 /* As Tx wrbs have been freed up, wake up netdev queue
3180                  * if it was stopped due to lack of tx wrbs.  */
3181                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3182                     be_can_txq_wake(txo)) {
3183                         netif_wake_subqueue(adapter->netdev, idx);
3184                 }
3185
3186                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3187                 tx_stats(txo)->tx_compl += work_done;
3188                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3189         }
3190 }
3191
3192 #ifdef CONFIG_NET_RX_BUSY_POLL
3193 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3194 {
3195         bool status = true;
3196
3197         spin_lock(&eqo->lock); /* BH is already disabled */
3198         if (eqo->state & BE_EQ_LOCKED) {
3199                 WARN_ON(eqo->state & BE_EQ_NAPI);
3200                 eqo->state |= BE_EQ_NAPI_YIELD;
3201                 status = false;
3202         } else {
3203                 eqo->state = BE_EQ_NAPI;
3204         }
3205         spin_unlock(&eqo->lock);
3206         return status;
3207 }
3208
3209 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3210 {
3211         spin_lock(&eqo->lock); /* BH is already disabled */
3212
3213         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3214         eqo->state = BE_EQ_IDLE;
3215
3216         spin_unlock(&eqo->lock);
3217 }
3218
3219 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3220 {
3221         bool status = true;
3222
3223         spin_lock_bh(&eqo->lock);
3224         if (eqo->state & BE_EQ_LOCKED) {
3225                 eqo->state |= BE_EQ_POLL_YIELD;
3226                 status = false;
3227         } else {
3228                 eqo->state |= BE_EQ_POLL;
3229         }
3230         spin_unlock_bh(&eqo->lock);
3231         return status;
3232 }
3233
3234 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3235 {
3236         spin_lock_bh(&eqo->lock);
3237
3238         WARN_ON(eqo->state & (BE_EQ_NAPI));
3239         eqo->state = BE_EQ_IDLE;
3240
3241         spin_unlock_bh(&eqo->lock);
3242 }
3243
3244 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3245 {
3246         spin_lock_init(&eqo->lock);
3247         eqo->state = BE_EQ_IDLE;
3248 }
3249
3250 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3251 {
3252         local_bh_disable();
3253
3254         /* It's enough to just acquire napi lock on the eqo to stop
3255          * be_busy_poll() from processing any queueus.
3256          */
3257         while (!be_lock_napi(eqo))
3258                 mdelay(1);
3259
3260         local_bh_enable();
3261 }
3262
3263 #else /* CONFIG_NET_RX_BUSY_POLL */
3264
3265 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3266 {
3267         return true;
3268 }
3269
3270 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3271 {
3272 }
3273
3274 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3275 {
3276         return false;
3277 }
3278
3279 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3280 {
3281 }
3282
3283 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3284 {
3285 }
3286
3287 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3288 {
3289 }
3290 #endif /* CONFIG_NET_RX_BUSY_POLL */
3291
3292 int be_poll(struct napi_struct *napi, int budget)
3293 {
3294         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3295         struct be_adapter *adapter = eqo->adapter;
3296         int max_work = 0, work, i, num_evts;
3297         struct be_rx_obj *rxo;
3298         struct be_tx_obj *txo;
3299         u32 mult_enc = 0;
3300
3301         num_evts = events_get(eqo);
3302
3303         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3304                 be_process_tx(adapter, txo, i);
3305
3306         if (be_lock_napi(eqo)) {
3307                 /* This loop will iterate twice for EQ0 in which
3308                  * completions of the last RXQ (default one) are also processed
3309                  * For other EQs the loop iterates only once
3310                  */
3311                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3313                         max_work = max(work, max_work);
3314                 }
3315                 be_unlock_napi(eqo);
3316         } else {
3317                 max_work = budget;
3318         }
3319
3320         if (is_mcc_eqo(eqo))
3321                 be_process_mcc(adapter);
3322
3323         if (max_work < budget) {
3324                 napi_complete(napi);
3325
3326                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3327                  * delay via a delay multiplier encoding value
3328                  */
3329                 if (skyhawk_chip(adapter))
3330                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3331
3332                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3333                              mult_enc);
3334         } else {
3335                 /* As we'll continue in polling mode, count and clear events */
3336                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3337         }
3338         return max_work;
3339 }
3340
3341 #ifdef CONFIG_NET_RX_BUSY_POLL
3342 static int be_busy_poll(struct napi_struct *napi)
3343 {
3344         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3345         struct be_adapter *adapter = eqo->adapter;
3346         struct be_rx_obj *rxo;
3347         int i, work = 0;
3348
3349         if (!be_lock_busy_poll(eqo))
3350                 return LL_FLUSH_BUSY;
3351
3352         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3353                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3354                 if (work)
3355                         break;
3356         }
3357
3358         be_unlock_busy_poll(eqo);
3359         return work;
3360 }
3361 #endif
3362
3363 void be_detect_error(struct be_adapter *adapter)
3364 {
3365         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3366         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3367         u32 i;
3368         struct device *dev = &adapter->pdev->dev;
3369
3370         if (be_check_error(adapter, BE_ERROR_HW))
3371                 return;
3372
3373         if (lancer_chip(adapter)) {
3374                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3375                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3376                         be_set_error(adapter, BE_ERROR_UE);
3377                         sliport_err1 = ioread32(adapter->db +
3378                                                 SLIPORT_ERROR1_OFFSET);
3379                         sliport_err2 = ioread32(adapter->db +
3380                                                 SLIPORT_ERROR2_OFFSET);
3381                         /* Do not log error messages if its a FW reset */
3382                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3383                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3384                                 dev_info(dev, "Firmware update in progress\n");
3385                         } else {
3386                                 dev_err(dev, "Error detected in the card\n");
3387                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3388                                         sliport_status);
3389                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3390                                         sliport_err1);
3391                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3392                                         sliport_err2);
3393                         }
3394                 }
3395         } else {
3396                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3397                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3398                 ue_lo_mask = ioread32(adapter->pcicfg +
3399                                       PCICFG_UE_STATUS_LOW_MASK);
3400                 ue_hi_mask = ioread32(adapter->pcicfg +
3401                                       PCICFG_UE_STATUS_HI_MASK);
3402
3403                 ue_lo = (ue_lo & ~ue_lo_mask);
3404                 ue_hi = (ue_hi & ~ue_hi_mask);
3405
3406                 /* On certain platforms BE hardware can indicate spurious UEs.
3407                  * Allow HW to stop working completely in case of a real UE.
3408                  * Hence not setting the hw_error for UE detection.
3409                  */
3410
3411                 if (ue_lo || ue_hi) {
3412                         dev_err(dev, "Error detected in the adapter");
3413                         if (skyhawk_chip(adapter))
3414                                 be_set_error(adapter, BE_ERROR_UE);
3415
3416                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3417                                 if (ue_lo & 1)
3418                                         dev_err(dev, "UE: %s bit set\n",
3419                                                 ue_status_low_desc[i]);
3420                         }
3421                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3422                                 if (ue_hi & 1)
3423                                         dev_err(dev, "UE: %s bit set\n",
3424                                                 ue_status_hi_desc[i]);
3425                         }
3426                 }
3427         }
3428 }
3429
3430 static void be_msix_disable(struct be_adapter *adapter)
3431 {
3432         if (msix_enabled(adapter)) {
3433                 pci_disable_msix(adapter->pdev);
3434                 adapter->num_msix_vec = 0;
3435                 adapter->num_msix_roce_vec = 0;
3436         }
3437 }
3438
3439 static int be_msix_enable(struct be_adapter *adapter)
3440 {
3441         unsigned int i, max_roce_eqs;
3442         struct device *dev = &adapter->pdev->dev;
3443         int num_vec;
3444
3445         /* If RoCE is supported, program the max number of vectors that
3446          * could be used for NIC and RoCE, else, just program the number
3447          * we'll use initially.
3448          */
3449         if (be_roce_supported(adapter)) {
3450                 max_roce_eqs =
3451                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3452                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3453                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3454         } else {
3455                 num_vec = max(adapter->cfg_num_rx_irqs,
3456                               adapter->cfg_num_tx_irqs);
3457         }
3458
3459         for (i = 0; i < num_vec; i++)
3460                 adapter->msix_entries[i].entry = i;
3461
3462         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3463                                         MIN_MSIX_VECTORS, num_vec);
3464         if (num_vec < 0)
3465                 goto fail;
3466
3467         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3468                 adapter->num_msix_roce_vec = num_vec / 2;
3469                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3470                          adapter->num_msix_roce_vec);
3471         }
3472
3473         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3474
3475         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3476                  adapter->num_msix_vec);
3477         return 0;
3478
3479 fail:
3480         dev_warn(dev, "MSIx enable failed\n");
3481
3482         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3483         if (be_virtfn(adapter))
3484                 return num_vec;
3485         return 0;
3486 }
3487
3488 static inline int be_msix_vec_get(struct be_adapter *adapter,
3489                                   struct be_eq_obj *eqo)
3490 {
3491         return adapter->msix_entries[eqo->msix_idx].vector;
3492 }
3493
3494 static int be_msix_register(struct be_adapter *adapter)
3495 {
3496         struct net_device *netdev = adapter->netdev;
3497         struct be_eq_obj *eqo;
3498         int status, i, vec;
3499
3500         for_all_evt_queues(adapter, eqo, i) {
3501                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3502                 vec = be_msix_vec_get(adapter, eqo);
3503                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3504                 if (status)
3505                         goto err_msix;
3506
3507                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3508         }
3509
3510         return 0;
3511 err_msix:
3512         for (i--; i >= 0; i--) {
3513                 eqo = &adapter->eq_obj[i];
3514                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3515         }
3516         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3517                  status);
3518         be_msix_disable(adapter);
3519         return status;
3520 }
3521
3522 static int be_irq_register(struct be_adapter *adapter)
3523 {
3524         struct net_device *netdev = adapter->netdev;
3525         int status;
3526
3527         if (msix_enabled(adapter)) {
3528                 status = be_msix_register(adapter);
3529                 if (status == 0)
3530                         goto done;
3531                 /* INTx is not supported for VF */
3532                 if (be_virtfn(adapter))
3533                         return status;
3534         }
3535
3536         /* INTx: only the first EQ is used */
3537         netdev->irq = adapter->pdev->irq;
3538         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3539                              &adapter->eq_obj[0]);
3540         if (status) {
3541                 dev_err(&adapter->pdev->dev,
3542                         "INTx request IRQ failed - err %d\n", status);
3543                 return status;
3544         }
3545 done:
3546         adapter->isr_registered = true;
3547         return 0;
3548 }
3549
3550 static void be_irq_unregister(struct be_adapter *adapter)
3551 {
3552         struct net_device *netdev = adapter->netdev;
3553         struct be_eq_obj *eqo;
3554         int i, vec;
3555
3556         if (!adapter->isr_registered)
3557                 return;
3558
3559         /* INTx */
3560         if (!msix_enabled(adapter)) {
3561                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3562                 goto done;
3563         }
3564
3565         /* MSIx */
3566         for_all_evt_queues(adapter, eqo, i) {
3567                 vec = be_msix_vec_get(adapter, eqo);
3568                 irq_set_affinity_hint(vec, NULL);
3569                 free_irq(vec, eqo);
3570         }
3571
3572 done:
3573         adapter->isr_registered = false;
3574 }
3575
3576 static void be_rx_qs_destroy(struct be_adapter *adapter)
3577 {
3578         struct rss_info *rss = &adapter->rss_info;
3579         struct be_queue_info *q;
3580         struct be_rx_obj *rxo;
3581         int i;
3582
3583         for_all_rx_queues(adapter, rxo, i) {
3584                 q = &rxo->q;
3585                 if (q->created) {
3586                         /* If RXQs are destroyed while in an "out of buffer"
3587                          * state, there is a possibility of an HW stall on
3588                          * Lancer. So, post 64 buffers to each queue to relieve
3589                          * the "out of buffer" condition.
3590                          * Make sure there's space in the RXQ before posting.
3591                          */
3592                         if (lancer_chip(adapter)) {
3593                                 be_rx_cq_clean(rxo);
3594                                 if (atomic_read(&q->used) == 0)
3595                                         be_post_rx_frags(rxo, GFP_KERNEL,
3596                                                          MAX_RX_POST);
3597                         }
3598
3599                         be_cmd_rxq_destroy(adapter, q);
3600                         be_rx_cq_clean(rxo);
3601                         be_rxq_clean(rxo);
3602                 }
3603                 be_queue_free(adapter, q);
3604         }
3605
3606         if (rss->rss_flags) {
3607                 rss->rss_flags = RSS_ENABLE_NONE;
3608                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3609                                   128, rss->rss_hkey);
3610         }
3611 }
3612
3613 static void be_disable_if_filters(struct be_adapter *adapter)
3614 {
3615         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3616         be_clear_uc_list(adapter);
3617         be_clear_mc_list(adapter);
3618
3619         /* The IFACE flags are enabled in the open path and cleared
3620          * in the close path. When a VF gets detached from the host and
3621          * assigned to a VM the following happens:
3622          *      - VF's IFACE flags get cleared in the detach path
3623          *      - IFACE create is issued by the VF in the attach path
3624          * Due to a bug in the BE3/Skyhawk-R FW
3625          * (Lancer FW doesn't have the bug), the IFACE capability flags
3626          * specified along with the IFACE create cmd issued by a VF are not
3627          * honoured by FW.  As a consequence, if a *new* driver
3628          * (that enables/disables IFACE flags in open/close)
3629          * is loaded in the host and an *old* driver is * used by a VM/VF,
3630          * the IFACE gets created *without* the needed flags.
3631          * To avoid this, disable RX-filter flags only for Lancer.
3632          */
3633         if (lancer_chip(adapter)) {
3634                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3635                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3636         }
3637 }
3638
3639 static int be_close(struct net_device *netdev)
3640 {
3641         struct be_adapter *adapter = netdev_priv(netdev);
3642         struct be_eq_obj *eqo;
3643         int i;
3644
3645         /* This protection is needed as be_close() may be called even when the
3646          * adapter is in cleared state (after eeh perm failure)
3647          */
3648         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3649                 return 0;
3650
3651         /* Before attempting cleanup ensure all the pending cmds in the
3652          * config_wq have finished execution
3653          */
3654         flush_workqueue(be_wq);
3655
3656         be_disable_if_filters(adapter);
3657
3658         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3659                 for_all_evt_queues(adapter, eqo, i) {
3660                         napi_disable(&eqo->napi);
3661                         be_disable_busy_poll(eqo);
3662                 }
3663                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3664         }
3665
3666         be_async_mcc_disable(adapter);
3667
3668         /* Wait for all pending tx completions to arrive so that
3669          * all tx skbs are freed.
3670          */
3671         netif_tx_disable(netdev);
3672         be_tx_compl_clean(adapter);
3673
3674         be_rx_qs_destroy(adapter);
3675
3676         for_all_evt_queues(adapter, eqo, i) {
3677                 if (msix_enabled(adapter))
3678                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3679                 else
3680                         synchronize_irq(netdev->irq);
3681                 be_eq_clean(eqo);
3682         }
3683
3684         be_irq_unregister(adapter);
3685
3686         return 0;
3687 }
3688
3689 static int be_rx_qs_create(struct be_adapter *adapter)
3690 {
3691         struct rss_info *rss = &adapter->rss_info;
3692         u8 rss_key[RSS_HASH_KEY_LEN];
3693         struct be_rx_obj *rxo;
3694         int rc, i, j;
3695
3696         for_all_rx_queues(adapter, rxo, i) {
3697                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3698                                     sizeof(struct be_eth_rx_d));
3699                 if (rc)
3700                         return rc;
3701         }
3702
3703         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3704                 rxo = default_rxo(adapter);
3705                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                        rx_frag_size, adapter->if_handle,
3707                                        false, &rxo->rss_id);
3708                 if (rc)
3709                         return rc;
3710         }
3711
3712         for_all_rss_queues(adapter, rxo, i) {
3713                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3714                                        rx_frag_size, adapter->if_handle,
3715                                        true, &rxo->rss_id);
3716                 if (rc)
3717                         return rc;
3718         }
3719
3720         if (be_multi_rxq(adapter)) {
3721                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3722                         for_all_rss_queues(adapter, rxo, i) {
3723                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3724                                         break;
3725                                 rss->rsstable[j + i] = rxo->rss_id;
3726                                 rss->rss_queue[j + i] = i;
3727                         }
3728                 }
3729                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3730                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3731
3732                 if (!BEx_chip(adapter))
3733                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3734                                 RSS_ENABLE_UDP_IPV6;
3735
3736                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3737                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3738                                        RSS_INDIR_TABLE_LEN, rss_key);
3739                 if (rc) {
3740                         rss->rss_flags = RSS_ENABLE_NONE;
3741                         return rc;
3742                 }
3743
3744                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3745         } else {
3746                 /* Disable RSS, if only default RX Q is created */
3747                 rss->rss_flags = RSS_ENABLE_NONE;
3748         }
3749
3750
3751         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3752          * which is a queue empty condition
3753          */
3754         for_all_rx_queues(adapter, rxo, i)
3755                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3756
3757         return 0;
3758 }
3759
3760 static int be_enable_if_filters(struct be_adapter *adapter)
3761 {
3762         int status;
3763
3764         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3765         if (status)
3766                 return status;
3767
3768         /* For BE3 VFs, the PF programs the initial MAC address */
3769         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3770                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3771                 if (status)
3772                         return status;
3773                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3774         }
3775
3776         if (adapter->vlans_added)
3777                 be_vid_config(adapter);
3778
3779         __be_set_rx_mode(adapter);
3780
3781         return 0;
3782 }
3783
3784 static int be_open(struct net_device *netdev)
3785 {
3786         struct be_adapter *adapter = netdev_priv(netdev);
3787         struct be_eq_obj *eqo;
3788         struct be_rx_obj *rxo;
3789         struct be_tx_obj *txo;
3790         u8 link_status;
3791         int status, i;
3792
3793         status = be_rx_qs_create(adapter);
3794         if (status)
3795                 goto err;
3796
3797         status = be_enable_if_filters(adapter);
3798         if (status)
3799                 goto err;
3800
3801         status = be_irq_register(adapter);
3802         if (status)
3803                 goto err;
3804
3805         for_all_rx_queues(adapter, rxo, i)
3806                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3807
3808         for_all_tx_queues(adapter, txo, i)
3809                 be_cq_notify(adapter, txo->cq.id, true, 0);
3810
3811         be_async_mcc_enable(adapter);
3812
3813         for_all_evt_queues(adapter, eqo, i) {
3814                 napi_enable(&eqo->napi);
3815                 be_enable_busy_poll(eqo);
3816                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3817         }
3818         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3819
3820         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3821         if (!status)
3822                 be_link_status_update(adapter, link_status);
3823
3824         netif_tx_start_all_queues(netdev);
3825         if (skyhawk_chip(adapter))
3826                 udp_tunnel_get_rx_info(netdev);
3827
3828         return 0;
3829 err:
3830         be_close(adapter->netdev);
3831         return -EIO;
3832 }
3833
3834 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3835 {
3836         u32 addr;
3837
3838         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3839
3840         mac[5] = (u8)(addr & 0xFF);
3841         mac[4] = (u8)((addr >> 8) & 0xFF);
3842         mac[3] = (u8)((addr >> 16) & 0xFF);
3843         /* Use the OUI from the current MAC address */
3844         memcpy(mac, adapter->netdev->dev_addr, 3);
3845 }
3846
3847 /*
3848  * Generate a seed MAC address from the PF MAC Address using jhash.
3849  * MAC Address for VFs are assigned incrementally starting from the seed.
3850  * These addresses are programmed in the ASIC by the PF and the VF driver
3851  * queries for the MAC address during its probe.
3852  */
3853 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3854 {
3855         u32 vf;
3856         int status = 0;
3857         u8 mac[ETH_ALEN];
3858         struct be_vf_cfg *vf_cfg;
3859
3860         be_vf_eth_addr_generate(adapter, mac);
3861
3862         for_all_vfs(adapter, vf_cfg, vf) {
3863                 if (BEx_chip(adapter))
3864                         status = be_cmd_pmac_add(adapter, mac,
3865                                                  vf_cfg->if_handle,
3866                                                  &vf_cfg->pmac_id, vf + 1);
3867                 else
3868                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3869                                                 vf + 1);
3870
3871                 if (status)
3872                         dev_err(&adapter->pdev->dev,
3873                                 "Mac address assignment failed for VF %d\n",
3874                                 vf);
3875                 else
3876                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3877
3878                 mac[5] += 1;
3879         }
3880         return status;
3881 }
3882
3883 static int be_vfs_mac_query(struct be_adapter *adapter)
3884 {
3885         int status, vf;
3886         u8 mac[ETH_ALEN];
3887         struct be_vf_cfg *vf_cfg;
3888
3889         for_all_vfs(adapter, vf_cfg, vf) {
3890                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3891                                                mac, vf_cfg->if_handle,
3892                                                false, vf+1);
3893                 if (status)
3894                         return status;
3895                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3896         }
3897         return 0;
3898 }
3899
3900 static void be_vf_clear(struct be_adapter *adapter)
3901 {
3902         struct be_vf_cfg *vf_cfg;
3903         u32 vf;
3904
3905         if (pci_vfs_assigned(adapter->pdev)) {
3906                 dev_warn(&adapter->pdev->dev,
3907                          "VFs are assigned to VMs: not disabling VFs\n");
3908                 goto done;
3909         }
3910
3911         pci_disable_sriov(adapter->pdev);
3912
3913         for_all_vfs(adapter, vf_cfg, vf) {
3914                 if (BEx_chip(adapter))
3915                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3916                                         vf_cfg->pmac_id, vf + 1);
3917                 else
3918                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3919                                        vf + 1);
3920
3921                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3922         }
3923
3924         if (BE3_chip(adapter))
3925                 be_cmd_set_hsw_config(adapter, 0, 0,
3926                                       adapter->if_handle,
3927                                       PORT_FWD_TYPE_PASSTHRU, 0);
3928 done:
3929         kfree(adapter->vf_cfg);
3930         adapter->num_vfs = 0;
3931         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3932 }
3933
3934 static void be_clear_queues(struct be_adapter *adapter)
3935 {
3936         be_mcc_queues_destroy(adapter);
3937         be_rx_cqs_destroy(adapter);
3938         be_tx_queues_destroy(adapter);
3939         be_evt_queues_destroy(adapter);
3940 }
3941
3942 static void be_cancel_worker(struct be_adapter *adapter)
3943 {
3944         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3945                 cancel_delayed_work_sync(&adapter->work);
3946                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3947         }
3948 }
3949
3950 static void be_cancel_err_detection(struct be_adapter *adapter)
3951 {
3952         struct be_error_recovery *err_rec = &adapter->error_recovery;
3953
3954         if (!be_err_recovery_workq)
3955                 return;
3956
3957         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3958                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3959                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3960         }
3961 }
3962
3963 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3964 {
3965         struct net_device *netdev = adapter->netdev;
3966
3967         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3968                 be_cmd_manage_iface(adapter, adapter->if_handle,
3969                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3970
3971         if (adapter->vxlan_port)
3972                 be_cmd_set_vxlan_port(adapter, 0);
3973
3974         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3975         adapter->vxlan_port = 0;
3976
3977         netdev->hw_enc_features = 0;
3978         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3979         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3980 }
3981
3982 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3983                                 struct be_resources *vft_res)
3984 {
3985         struct be_resources res = adapter->pool_res;
3986         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3987         struct be_resources res_mod = {0};
3988         u16 num_vf_qs = 1;
3989
3990         /* Distribute the queue resources among the PF and it's VFs */
3991         if (num_vfs) {
3992                 /* Divide the rx queues evenly among the VFs and the PF, capped
3993                  * at VF-EQ-count. Any remainder queues belong to the PF.
3994                  */
3995                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3996                                 res.max_rss_qs / (num_vfs + 1));
3997
3998                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3999                  * RSS Tables per port. Provide RSS on VFs, only if number of
4000                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4001                  */
4002                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4003                         num_vf_qs = 1;
4004         }
4005
4006         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4007          * which are modifiable using SET_PROFILE_CONFIG cmd.
4008          */
4009         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4010                                   RESOURCE_MODIFIABLE, 0);
4011
4012         /* If RSS IFACE capability flags are modifiable for a VF, set the
4013          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4014          * more than 1 RSSQ is available for a VF.
4015          * Otherwise, provision only 1 queue pair for VF.
4016          */
4017         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4018                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4019                 if (num_vf_qs > 1) {
4020                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4021                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4022                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4023                 } else {
4024                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4025                                              BE_IF_FLAGS_DEFQ_RSS);
4026                 }
4027         } else {
4028                 num_vf_qs = 1;
4029         }
4030
4031         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4032                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4033                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4034         }
4035
4036         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4037         vft_res->max_rx_qs = num_vf_qs;
4038         vft_res->max_rss_qs = num_vf_qs;
4039         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4040         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4041
4042         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4043          * among the PF and it's VFs, if the fields are changeable
4044          */
4045         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4046                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4047
4048         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4049                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4050
4051         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4052                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4053
4054         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4055                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4056 }
4057
4058 static void be_if_destroy(struct be_adapter *adapter)
4059 {
4060         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4061
4062         kfree(adapter->pmac_id);
4063         adapter->pmac_id = NULL;
4064
4065         kfree(adapter->mc_list);
4066         adapter->mc_list = NULL;
4067
4068         kfree(adapter->uc_list);
4069         adapter->uc_list = NULL;
4070 }
4071
4072 static int be_clear(struct be_adapter *adapter)
4073 {
4074         struct pci_dev *pdev = adapter->pdev;
4075         struct  be_resources vft_res = {0};
4076
4077         be_cancel_worker(adapter);
4078
4079         flush_workqueue(be_wq);
4080
4081         if (sriov_enabled(adapter))
4082                 be_vf_clear(adapter);
4083
4084         /* Re-configure FW to distribute resources evenly across max-supported
4085          * number of VFs, only when VFs are not already enabled.
4086          */
4087         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4088             !pci_vfs_assigned(pdev)) {
4089                 be_calculate_vf_res(adapter,
4090                                     pci_sriov_get_totalvfs(pdev),
4091                                     &vft_res);
4092                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4093                                         pci_sriov_get_totalvfs(pdev),
4094                                         &vft_res);
4095         }
4096
4097         be_disable_vxlan_offloads(adapter);
4098
4099         be_if_destroy(adapter);
4100
4101         be_clear_queues(adapter);
4102
4103         be_msix_disable(adapter);
4104         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4105         return 0;
4106 }
4107
4108 static int be_vfs_if_create(struct be_adapter *adapter)
4109 {
4110         struct be_resources res = {0};
4111         u32 cap_flags, en_flags, vf;
4112         struct be_vf_cfg *vf_cfg;
4113         int status;
4114
4115         /* If a FW profile exists, then cap_flags are updated */
4116         cap_flags = BE_VF_IF_EN_FLAGS;
4117
4118         for_all_vfs(adapter, vf_cfg, vf) {
4119                 if (!BE3_chip(adapter)) {
4120                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4121                                                            ACTIVE_PROFILE_TYPE,
4122                                                            RESOURCE_LIMITS,
4123                                                            vf + 1);
4124                         if (!status) {
4125                                 cap_flags = res.if_cap_flags;
4126                                 /* Prevent VFs from enabling VLAN promiscuous
4127                                  * mode
4128                                  */
4129                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4130                         }
4131                 }
4132
4133                 /* PF should enable IF flags during proxy if_create call */
4134                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4135                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4136                                           &vf_cfg->if_handle, vf + 1);
4137                 if (status)
4138                         return status;
4139         }
4140
4141         return 0;
4142 }
4143
4144 static int be_vf_setup_init(struct be_adapter *adapter)
4145 {
4146         struct be_vf_cfg *vf_cfg;
4147         int vf;
4148
4149         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4150                                   GFP_KERNEL);
4151         if (!adapter->vf_cfg)
4152                 return -ENOMEM;
4153
4154         for_all_vfs(adapter, vf_cfg, vf) {
4155                 vf_cfg->if_handle = -1;
4156                 vf_cfg->pmac_id = -1;
4157         }
4158         return 0;
4159 }
4160
4161 static int be_vf_setup(struct be_adapter *adapter)
4162 {
4163         struct device *dev = &adapter->pdev->dev;
4164         struct be_vf_cfg *vf_cfg;
4165         int status, old_vfs, vf;
4166         bool spoofchk;
4167
4168         old_vfs = pci_num_vf(adapter->pdev);
4169
4170         status = be_vf_setup_init(adapter);
4171         if (status)
4172                 goto err;
4173
4174         if (old_vfs) {
4175                 for_all_vfs(adapter, vf_cfg, vf) {
4176                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4177                         if (status)
4178                                 goto err;
4179                 }
4180
4181                 status = be_vfs_mac_query(adapter);
4182                 if (status)
4183                         goto err;
4184         } else {
4185                 status = be_vfs_if_create(adapter);
4186                 if (status)
4187                         goto err;
4188
4189                 status = be_vf_eth_addr_config(adapter);
4190                 if (status)
4191                         goto err;
4192         }
4193
4194         for_all_vfs(adapter, vf_cfg, vf) {
4195                 /* Allow VFs to programs MAC/VLAN filters */
4196                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4197                                                   vf + 1);
4198                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4199                         status = be_cmd_set_fn_privileges(adapter,
4200                                                           vf_cfg->privileges |
4201                                                           BE_PRIV_FILTMGMT,
4202                                                           vf + 1);
4203                         if (!status) {
4204                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4205                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4206                                          vf);
4207                         }
4208                 }
4209
4210                 /* Allow full available bandwidth */
4211                 if (!old_vfs)
4212                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4213
4214                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4215                                                vf_cfg->if_handle, NULL,
4216                                                &spoofchk);
4217                 if (!status)
4218                         vf_cfg->spoofchk = spoofchk;
4219
4220                 if (!old_vfs) {
4221                         be_cmd_enable_vf(adapter, vf + 1);
4222                         be_cmd_set_logical_link_config(adapter,
4223                                                        IFLA_VF_LINK_STATE_AUTO,
4224                                                        vf+1);
4225                 }
4226         }
4227
4228         if (!old_vfs) {
4229                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4230                 if (status) {
4231                         dev_err(dev, "SRIOV enable failed\n");
4232                         adapter->num_vfs = 0;
4233                         goto err;
4234                 }
4235         }
4236
4237         if (BE3_chip(adapter)) {
4238                 /* On BE3, enable VEB only when SRIOV is enabled */
4239                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4240                                                adapter->if_handle,
4241                                                PORT_FWD_TYPE_VEB, 0);
4242                 if (status)
4243                         goto err;
4244         }
4245
4246         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4247         return 0;
4248 err:
4249         dev_err(dev, "VF setup failed\n");
4250         be_vf_clear(adapter);
4251         return status;
4252 }
4253
4254 /* Converting function_mode bits on BE3 to SH mc_type enums */
4255
4256 static u8 be_convert_mc_type(u32 function_mode)
4257 {
4258         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4259                 return vNIC1;
4260         else if (function_mode & QNQ_MODE)
4261                 return FLEX10;
4262         else if (function_mode & VNIC_MODE)
4263                 return vNIC2;
4264         else if (function_mode & UMC_ENABLED)
4265                 return UMC;
4266         else
4267                 return MC_NONE;
4268 }
4269
4270 /* On BE2/BE3 FW does not suggest the supported limits */
4271 static void BEx_get_resources(struct be_adapter *adapter,
4272                               struct be_resources *res)
4273 {
4274         bool use_sriov = adapter->num_vfs ? 1 : 0;
4275
4276         if (be_physfn(adapter))
4277                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4278         else
4279                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4280
4281         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4282
4283         if (be_is_mc(adapter)) {
4284                 /* Assuming that there are 4 channels per port,
4285                  * when multi-channel is enabled
4286                  */
4287                 if (be_is_qnq_mode(adapter))
4288                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4289                 else
4290                         /* In a non-qnq multichannel mode, the pvid
4291                          * takes up one vlan entry
4292                          */
4293                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4294         } else {
4295                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4296         }
4297
4298         res->max_mcast_mac = BE_MAX_MC;
4299
4300         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4301          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4302          *    *only* if it is RSS-capable.
4303          */
4304         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4305             be_virtfn(adapter) ||
4306             (be_is_mc(adapter) &&
4307              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4308                 res->max_tx_qs = 1;
4309         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4310                 struct be_resources super_nic_res = {0};
4311
4312                 /* On a SuperNIC profile, the driver needs to use the
4313                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4314                  */
4315                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4316                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4317                                           0);
4318                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4319                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4320         } else {
4321                 res->max_tx_qs = BE3_MAX_TX_QS;
4322         }
4323
4324         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4325             !use_sriov && be_physfn(adapter))
4326                 res->max_rss_qs = (adapter->be3_native) ?
4327                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4328         res->max_rx_qs = res->max_rss_qs + 1;
4329
4330         if (be_physfn(adapter))
4331                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4332                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4333         else
4334                 res->max_evt_qs = 1;
4335
4336         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4337         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4338         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4339                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4340 }
4341
4342 static void be_setup_init(struct be_adapter *adapter)
4343 {
4344         adapter->vlan_prio_bmap = 0xff;
4345         adapter->phy.link_speed = -1;
4346         adapter->if_handle = -1;
4347         adapter->be3_native = false;
4348         adapter->if_flags = 0;
4349         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4350         if (be_physfn(adapter))
4351                 adapter->cmd_privileges = MAX_PRIVILEGES;
4352         else
4353                 adapter->cmd_privileges = MIN_PRIVILEGES;
4354 }
4355
4356 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4357  * However, this HW limitation is not exposed to the host via any SLI cmd.
4358  * As a result, in the case of SRIOV and in particular multi-partition configs
4359  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4360  * for distribution between the VFs. This self-imposed limit will determine the
4361  * no: of VFs for which RSS can be enabled.
4362  */
4363 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4364 {
4365         struct be_port_resources port_res = {0};
4366         u8 rss_tables_on_port;
4367         u16 max_vfs = be_max_vfs(adapter);
4368
4369         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4370                                   RESOURCE_LIMITS, 0);
4371
4372         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4373
4374         /* Each PF Pool's RSS Tables limit =
4375          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4376          */
4377         adapter->pool_res.max_rss_tables =
4378                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4379 }
4380
4381 static int be_get_sriov_config(struct be_adapter *adapter)
4382 {
4383         struct be_resources res = {0};
4384         int max_vfs, old_vfs;
4385
4386         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4387                                   RESOURCE_LIMITS, 0);
4388
4389         /* Some old versions of BE3 FW don't report max_vfs value */
4390         if (BE3_chip(adapter) && !res.max_vfs) {
4391                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4392                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4393         }
4394
4395         adapter->pool_res = res;
4396
4397         /* If during previous unload of the driver, the VFs were not disabled,
4398          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4399          * Instead use the TotalVFs value stored in the pci-dev struct.
4400          */
4401         old_vfs = pci_num_vf(adapter->pdev);
4402         if (old_vfs) {
4403                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4404                          old_vfs);
4405
4406                 adapter->pool_res.max_vfs =
4407                         pci_sriov_get_totalvfs(adapter->pdev);
4408                 adapter->num_vfs = old_vfs;
4409         }
4410
4411         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4412                 be_calculate_pf_pool_rss_tables(adapter);
4413                 dev_info(&adapter->pdev->dev,
4414                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4415                          be_max_pf_pool_rss_tables(adapter));
4416         }
4417         return 0;
4418 }
4419
4420 static void be_alloc_sriov_res(struct be_adapter *adapter)
4421 {
4422         int old_vfs = pci_num_vf(adapter->pdev);
4423         struct  be_resources vft_res = {0};
4424         int status;
4425
4426         be_get_sriov_config(adapter);
4427
4428         if (!old_vfs)
4429                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4430
4431         /* When the HW is in SRIOV capable configuration, the PF-pool
4432          * resources are given to PF during driver load, if there are no
4433          * old VFs. This facility is not available in BE3 FW.
4434          * Also, this is done by FW in Lancer chip.
4435          */
4436         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4437                 be_calculate_vf_res(adapter, 0, &vft_res);
4438                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4439                                                  &vft_res);
4440                 if (status)
4441                         dev_err(&adapter->pdev->dev,
4442                                 "Failed to optimize SRIOV resources\n");
4443         }
4444 }
4445
4446 static int be_get_resources(struct be_adapter *adapter)
4447 {
4448         struct device *dev = &adapter->pdev->dev;
4449         struct be_resources res = {0};
4450         int status;
4451
4452         /* For Lancer, SH etc read per-function resource limits from FW.
4453          * GET_FUNC_CONFIG returns per function guaranteed limits.
4454          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4455          */
4456         if (BEx_chip(adapter)) {
4457                 BEx_get_resources(adapter, &res);
4458         } else {
4459                 status = be_cmd_get_func_config(adapter, &res);
4460                 if (status)
4461                         return status;
4462
4463                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4464                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4465                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4466                         res.max_rss_qs -= 1;
4467         }
4468
4469         /* If RoCE is supported stash away half the EQs for RoCE */
4470         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4471                                 res.max_evt_qs / 2 : res.max_evt_qs;
4472         adapter->res = res;
4473
4474         /* If FW supports RSS default queue, then skip creating non-RSS
4475          * queue for non-IP traffic.
4476          */
4477         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4478                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4479
4480         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4481                  be_max_txqs(adapter), be_max_rxqs(adapter),
4482                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4483                  be_max_vfs(adapter));
4484         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4485                  be_max_uc(adapter), be_max_mc(adapter),
4486                  be_max_vlans(adapter));
4487
4488         /* Ensure RX and TX queues are created in pairs at init time */
4489         adapter->cfg_num_rx_irqs =
4490                                 min_t(u16, netif_get_num_default_rss_queues(),
4491                                       be_max_qp_irqs(adapter));
4492         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4493         return 0;
4494 }
4495
4496 static int be_get_config(struct be_adapter *adapter)
4497 {
4498         int status, level;
4499         u16 profile_id;
4500
4501         status = be_cmd_get_cntl_attributes(adapter);
4502         if (status)
4503                 return status;
4504
4505         status = be_cmd_query_fw_cfg(adapter);
4506         if (status)
4507                 return status;
4508
4509         if (!lancer_chip(adapter) && be_physfn(adapter))
4510                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4511
4512         if (BEx_chip(adapter)) {
4513                 level = be_cmd_get_fw_log_level(adapter);
4514                 adapter->msg_enable =
4515                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4516         }
4517
4518         be_cmd_get_acpi_wol_cap(adapter);
4519         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4520         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4521
4522         be_cmd_query_port_name(adapter);
4523
4524         if (be_physfn(adapter)) {
4525                 status = be_cmd_get_active_profile(adapter, &profile_id);
4526                 if (!status)
4527                         dev_info(&adapter->pdev->dev,
4528                                  "Using profile 0x%x\n", profile_id);
4529         }
4530
4531         return 0;
4532 }
4533
4534 static int be_mac_setup(struct be_adapter *adapter)
4535 {
4536         u8 mac[ETH_ALEN];
4537         int status;
4538
4539         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4540                 status = be_cmd_get_perm_mac(adapter, mac);
4541                 if (status)
4542                         return status;
4543
4544                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4545                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4546         }
4547
4548         return 0;
4549 }
4550
4551 static void be_schedule_worker(struct be_adapter *adapter)
4552 {
4553         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4554         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4555 }
4556
4557 static void be_destroy_err_recovery_workq(void)
4558 {
4559         if (!be_err_recovery_workq)
4560                 return;
4561
4562         flush_workqueue(be_err_recovery_workq);
4563         destroy_workqueue(be_err_recovery_workq);
4564         be_err_recovery_workq = NULL;
4565 }
4566
4567 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4568 {
4569         struct be_error_recovery *err_rec = &adapter->error_recovery;
4570
4571         if (!be_err_recovery_workq)
4572                 return;
4573
4574         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4575                            msecs_to_jiffies(delay));
4576         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4577 }
4578
4579 static int be_setup_queues(struct be_adapter *adapter)
4580 {
4581         struct net_device *netdev = adapter->netdev;
4582         int status;
4583
4584         status = be_evt_queues_create(adapter);
4585         if (status)
4586                 goto err;
4587
4588         status = be_tx_qs_create(adapter);
4589         if (status)
4590                 goto err;
4591
4592         status = be_rx_cqs_create(adapter);
4593         if (status)
4594                 goto err;
4595
4596         status = be_mcc_queues_create(adapter);
4597         if (status)
4598                 goto err;
4599
4600         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4601         if (status)
4602                 goto err;
4603
4604         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4605         if (status)
4606                 goto err;
4607
4608         return 0;
4609 err:
4610         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4611         return status;
4612 }
4613
4614 static int be_if_create(struct be_adapter *adapter)
4615 {
4616         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4617         u32 cap_flags = be_if_cap_flags(adapter);
4618         int status;
4619
4620         /* alloc required memory for other filtering fields */
4621         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4622                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4623         if (!adapter->pmac_id)
4624                 return -ENOMEM;
4625
4626         adapter->mc_list = kcalloc(be_max_mc(adapter),
4627                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4628         if (!adapter->mc_list)
4629                 return -ENOMEM;
4630
4631         adapter->uc_list = kcalloc(be_max_uc(adapter),
4632                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4633         if (!adapter->uc_list)
4634                 return -ENOMEM;
4635
4636         if (adapter->cfg_num_rx_irqs == 1)
4637                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4638
4639         en_flags &= cap_flags;
4640         /* will enable all the needed filter flags in be_open() */
4641         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4642                                   &adapter->if_handle, 0);
4643
4644         if (status)
4645                 return status;
4646
4647         return 0;
4648 }
4649
4650 int be_update_queues(struct be_adapter *adapter)
4651 {
4652         struct net_device *netdev = adapter->netdev;
4653         int status;
4654
4655         if (netif_running(netdev))
4656                 be_close(netdev);
4657
4658         be_cancel_worker(adapter);
4659
4660         /* If any vectors have been shared with RoCE we cannot re-program
4661          * the MSIx table.
4662          */
4663         if (!adapter->num_msix_roce_vec)
4664                 be_msix_disable(adapter);
4665
4666         be_clear_queues(adapter);
4667         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4668         if (status)
4669                 return status;
4670
4671         if (!msix_enabled(adapter)) {
4672                 status = be_msix_enable(adapter);
4673                 if (status)
4674                         return status;
4675         }
4676
4677         status = be_if_create(adapter);
4678         if (status)
4679                 return status;
4680
4681         status = be_setup_queues(adapter);
4682         if (status)
4683                 return status;
4684
4685         be_schedule_worker(adapter);
4686
4687         if (netif_running(netdev))
4688                 status = be_open(netdev);
4689
4690         return status;
4691 }
4692
4693 static inline int fw_major_num(const char *fw_ver)
4694 {
4695         int fw_major = 0, i;
4696
4697         i = sscanf(fw_ver, "%d.", &fw_major);
4698         if (i != 1)
4699                 return 0;
4700
4701         return fw_major;
4702 }
4703
4704 /* If it is error recovery, FLR the PF
4705  * Else if any VFs are already enabled don't FLR the PF
4706  */
4707 static bool be_reset_required(struct be_adapter *adapter)
4708 {
4709         if (be_error_recovering(adapter))
4710                 return true;
4711         else
4712                 return pci_num_vf(adapter->pdev) == 0;
4713 }
4714
4715 /* Wait for the FW to be ready and perform the required initialization */
4716 static int be_func_init(struct be_adapter *adapter)
4717 {
4718         int status;
4719
4720         status = be_fw_wait_ready(adapter);
4721         if (status)
4722                 return status;
4723
4724         /* FW is now ready; clear errors to allow cmds/doorbell */
4725         be_clear_error(adapter, BE_CLEAR_ALL);
4726
4727         if (be_reset_required(adapter)) {
4728                 status = be_cmd_reset_function(adapter);
4729                 if (status)
4730                         return status;
4731
4732                 /* Wait for interrupts to quiesce after an FLR */
4733                 msleep(100);
4734         }
4735
4736         /* Tell FW we're ready to fire cmds */
4737         status = be_cmd_fw_init(adapter);
4738         if (status)
4739                 return status;
4740
4741         /* Allow interrupts for other ULPs running on NIC function */
4742         be_intr_set(adapter, true);
4743
4744         return 0;
4745 }
4746
4747 static int be_setup(struct be_adapter *adapter)
4748 {
4749         struct device *dev = &adapter->pdev->dev;
4750         int status;
4751
4752         status = be_func_init(adapter);
4753         if (status)
4754                 return status;
4755
4756         be_setup_init(adapter);
4757
4758         if (!lancer_chip(adapter))
4759                 be_cmd_req_native_mode(adapter);
4760
4761         /* invoke this cmd first to get pf_num and vf_num which are needed
4762          * for issuing profile related cmds
4763          */
4764         if (!BEx_chip(adapter)) {
4765                 status = be_cmd_get_func_config(adapter, NULL);
4766                 if (status)
4767                         return status;
4768         }
4769
4770         status = be_get_config(adapter);
4771         if (status)
4772                 goto err;
4773
4774         if (!BE2_chip(adapter) && be_physfn(adapter))
4775                 be_alloc_sriov_res(adapter);
4776
4777         status = be_get_resources(adapter);
4778         if (status)
4779                 goto err;
4780
4781         status = be_msix_enable(adapter);
4782         if (status)
4783                 goto err;
4784
4785         /* will enable all the needed filter flags in be_open() */
4786         status = be_if_create(adapter);
4787         if (status)
4788                 goto err;
4789
4790         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4791         rtnl_lock();
4792         status = be_setup_queues(adapter);
4793         rtnl_unlock();
4794         if (status)
4795                 goto err;
4796
4797         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4798
4799         status = be_mac_setup(adapter);
4800         if (status)
4801                 goto err;
4802
4803         be_cmd_get_fw_ver(adapter);
4804         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4805
4806         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4807                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4808                         adapter->fw_ver);
4809                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4810         }
4811
4812         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4813                                          adapter->rx_fc);
4814         if (status)
4815                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4816                                         &adapter->rx_fc);
4817
4818         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4819                  adapter->tx_fc, adapter->rx_fc);
4820
4821         if (be_physfn(adapter))
4822                 be_cmd_set_logical_link_config(adapter,
4823                                                IFLA_VF_LINK_STATE_AUTO, 0);
4824
4825         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4826          * confusing a linux bridge or OVS that it might be connected to.
4827          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4828          * when SRIOV is not enabled.
4829          */
4830         if (BE3_chip(adapter))
4831                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4832                                       PORT_FWD_TYPE_PASSTHRU, 0);
4833
4834         if (adapter->num_vfs)
4835                 be_vf_setup(adapter);
4836
4837         status = be_cmd_get_phy_info(adapter);
4838         if (!status && be_pause_supported(adapter))
4839                 adapter->phy.fc_autoneg = 1;
4840
4841         if (be_physfn(adapter) && !lancer_chip(adapter))
4842                 be_cmd_set_features(adapter);
4843
4844         be_schedule_worker(adapter);
4845         adapter->flags |= BE_FLAGS_SETUP_DONE;
4846         return 0;
4847 err:
4848         be_clear(adapter);
4849         return status;
4850 }
4851
4852 #ifdef CONFIG_NET_POLL_CONTROLLER
4853 static void be_netpoll(struct net_device *netdev)
4854 {
4855         struct be_adapter *adapter = netdev_priv(netdev);
4856         struct be_eq_obj *eqo;
4857         int i;
4858
4859         for_all_evt_queues(adapter, eqo, i) {
4860                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4861                 napi_schedule(&eqo->napi);
4862         }
4863 }
4864 #endif
4865
4866 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4867 {
4868         const struct firmware *fw;
4869         int status;
4870
4871         if (!netif_running(adapter->netdev)) {
4872                 dev_err(&adapter->pdev->dev,
4873                         "Firmware load not allowed (interface is down)\n");
4874                 return -ENETDOWN;
4875         }
4876
4877         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4878         if (status)
4879                 goto fw_exit;
4880
4881         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4882
4883         if (lancer_chip(adapter))
4884                 status = lancer_fw_download(adapter, fw);
4885         else
4886                 status = be_fw_download(adapter, fw);
4887
4888         if (!status)
4889                 be_cmd_get_fw_ver(adapter);
4890
4891 fw_exit:
4892         release_firmware(fw);
4893         return status;
4894 }
4895
4896 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4897                                  u16 flags)
4898 {
4899         struct be_adapter *adapter = netdev_priv(dev);
4900         struct nlattr *attr, *br_spec;
4901         int rem;
4902         int status = 0;
4903         u16 mode = 0;
4904
4905         if (!sriov_enabled(adapter))
4906                 return -EOPNOTSUPP;
4907
4908         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4909         if (!br_spec)
4910                 return -EINVAL;
4911
4912         nla_for_each_nested(attr, br_spec, rem) {
4913                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4914                         continue;
4915
4916                 if (nla_len(attr) < sizeof(mode))
4917                         return -EINVAL;
4918
4919                 mode = nla_get_u16(attr);
4920                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4921                         return -EOPNOTSUPP;
4922
4923                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4924                         return -EINVAL;
4925
4926                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4927                                                adapter->if_handle,
4928                                                mode == BRIDGE_MODE_VEPA ?
4929                                                PORT_FWD_TYPE_VEPA :
4930                                                PORT_FWD_TYPE_VEB, 0);
4931                 if (status)
4932                         goto err;
4933
4934                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4935                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4936
4937                 return status;
4938         }
4939 err:
4940         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4941                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4942
4943         return status;
4944 }
4945
4946 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4947                                  struct net_device *dev, u32 filter_mask,
4948                                  int nlflags)
4949 {
4950         struct be_adapter *adapter = netdev_priv(dev);
4951         int status = 0;
4952         u8 hsw_mode;
4953
4954         /* BE and Lancer chips support VEB mode only */
4955         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4956                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4957                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4958                         return 0;
4959                 hsw_mode = PORT_FWD_TYPE_VEB;
4960         } else {
4961                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4962                                                adapter->if_handle, &hsw_mode,
4963                                                NULL);
4964                 if (status)
4965                         return 0;
4966
4967                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4968                         return 0;
4969         }
4970
4971         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4972                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4973                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4974                                        0, 0, nlflags, filter_mask, NULL);
4975 }
4976
4977 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4978                                          void (*func)(struct work_struct *))
4979 {
4980         struct be_cmd_work *work;
4981
4982         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4983         if (!work) {
4984                 dev_err(&adapter->pdev->dev,
4985                         "be_work memory allocation failed\n");
4986                 return NULL;
4987         }
4988
4989         INIT_WORK(&work->work, func);
4990         work->adapter = adapter;
4991         return work;
4992 }
4993
4994 /* VxLAN offload Notes:
4995  *
4996  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4997  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4998  * is expected to work across all types of IP tunnels once exported. Skyhawk
4999  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5000  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5001  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5002  * those other tunnels are unexported on the fly through ndo_features_check().
5003  *
5004  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5005  * adds more than one port, disable offloads and don't re-enable them again
5006  * until after all the tunnels are removed.
5007  */
5008 static void be_work_add_vxlan_port(struct work_struct *work)
5009 {
5010         struct be_cmd_work *cmd_work =
5011                                 container_of(work, struct be_cmd_work, work);
5012         struct be_adapter *adapter = cmd_work->adapter;
5013         struct net_device *netdev = adapter->netdev;
5014         struct device *dev = &adapter->pdev->dev;
5015         __be16 port = cmd_work->info.vxlan_port;
5016         int status;
5017
5018         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5019                 adapter->vxlan_port_aliases++;
5020                 goto done;
5021         }
5022
5023         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5024                 dev_info(dev,
5025                          "Only one UDP port supported for VxLAN offloads\n");
5026                 dev_info(dev, "Disabling VxLAN offloads\n");
5027                 adapter->vxlan_port_count++;
5028                 goto err;
5029         }
5030
5031         if (adapter->vxlan_port_count++ >= 1)
5032                 goto done;
5033
5034         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5035                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5036         if (status) {
5037                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5038                 goto err;
5039         }
5040
5041         status = be_cmd_set_vxlan_port(adapter, port);
5042         if (status) {
5043                 dev_warn(dev, "Failed to add VxLAN port\n");
5044                 goto err;
5045         }
5046         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5047         adapter->vxlan_port = port;
5048
5049         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5050                                    NETIF_F_TSO | NETIF_F_TSO6 |
5051                                    NETIF_F_GSO_UDP_TUNNEL;
5052         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5053         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5054
5055         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5056                  be16_to_cpu(port));
5057         goto done;
5058 err:
5059         be_disable_vxlan_offloads(adapter);
5060 done:
5061         kfree(cmd_work);
5062 }
5063
5064 static void be_work_del_vxlan_port(struct work_struct *work)
5065 {
5066         struct be_cmd_work *cmd_work =
5067                                 container_of(work, struct be_cmd_work, work);
5068         struct be_adapter *adapter = cmd_work->adapter;
5069         __be16 port = cmd_work->info.vxlan_port;
5070
5071         if (adapter->vxlan_port != port)
5072                 goto done;
5073
5074         if (adapter->vxlan_port_aliases) {
5075                 adapter->vxlan_port_aliases--;
5076                 goto out;
5077         }
5078
5079         be_disable_vxlan_offloads(adapter);
5080
5081         dev_info(&adapter->pdev->dev,
5082                  "Disabled VxLAN offloads for UDP port %d\n",
5083                  be16_to_cpu(port));
5084 done:
5085         adapter->vxlan_port_count--;
5086 out:
5087         kfree(cmd_work);
5088 }
5089
5090 static void be_cfg_vxlan_port(struct net_device *netdev,
5091                               struct udp_tunnel_info *ti,
5092                               void (*func)(struct work_struct *))
5093 {
5094         struct be_adapter *adapter = netdev_priv(netdev);
5095         struct be_cmd_work *cmd_work;
5096
5097         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5098                 return;
5099
5100         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5101                 return;
5102
5103         cmd_work = be_alloc_work(adapter, func);
5104         if (cmd_work) {
5105                 cmd_work->info.vxlan_port = ti->port;
5106                 queue_work(be_wq, &cmd_work->work);
5107         }
5108 }
5109
5110 static void be_del_vxlan_port(struct net_device *netdev,
5111                               struct udp_tunnel_info *ti)
5112 {
5113         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5114 }
5115
5116 static void be_add_vxlan_port(struct net_device *netdev,
5117                               struct udp_tunnel_info *ti)
5118 {
5119         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5120 }
5121
5122 static netdev_features_t be_features_check(struct sk_buff *skb,
5123                                            struct net_device *dev,
5124                                            netdev_features_t features)
5125 {
5126         struct be_adapter *adapter = netdev_priv(dev);
5127         u8 l4_hdr = 0;
5128
5129         /* The code below restricts offload features for some tunneled packets.
5130          * Offload features for normal (non tunnel) packets are unchanged.
5131          */
5132         if (!skb->encapsulation ||
5133             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5134                 return features;
5135
5136         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5137          * should disable tunnel offload features if it's not a VxLAN packet,
5138          * as tunnel offloads have been enabled only for VxLAN. This is done to
5139          * allow other tunneled traffic like GRE work fine while VxLAN
5140          * offloads are configured in Skyhawk-R.
5141          */
5142         switch (vlan_get_protocol(skb)) {
5143         case htons(ETH_P_IP):
5144                 l4_hdr = ip_hdr(skb)->protocol;
5145                 break;
5146         case htons(ETH_P_IPV6):
5147                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5148                 break;
5149         default:
5150                 return features;
5151         }
5152
5153         if (l4_hdr != IPPROTO_UDP ||
5154             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5155             skb->inner_protocol != htons(ETH_P_TEB) ||
5156             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5157                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5158             !adapter->vxlan_port ||
5159             udp_hdr(skb)->dest != adapter->vxlan_port)
5160                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5161
5162         return features;
5163 }
5164
5165 static int be_get_phys_port_id(struct net_device *dev,
5166                                struct netdev_phys_item_id *ppid)
5167 {
5168         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5169         struct be_adapter *adapter = netdev_priv(dev);
5170         u8 *id;
5171
5172         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5173                 return -ENOSPC;
5174
5175         ppid->id[0] = adapter->hba_port_num + 1;
5176         id = &ppid->id[1];
5177         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5178              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5179                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5180
5181         ppid->id_len = id_len;
5182
5183         return 0;
5184 }
5185
5186 static void be_set_rx_mode(struct net_device *dev)
5187 {
5188         struct be_adapter *adapter = netdev_priv(dev);
5189         struct be_cmd_work *work;
5190
5191         work = be_alloc_work(adapter, be_work_set_rx_mode);
5192         if (work)
5193                 queue_work(be_wq, &work->work);
5194 }
5195
5196 static const struct net_device_ops be_netdev_ops = {
5197         .ndo_open               = be_open,
5198         .ndo_stop               = be_close,
5199         .ndo_start_xmit         = be_xmit,
5200         .ndo_set_rx_mode        = be_set_rx_mode,
5201         .ndo_set_mac_address    = be_mac_addr_set,
5202         .ndo_get_stats64        = be_get_stats64,
5203         .ndo_validate_addr      = eth_validate_addr,
5204         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5205         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5206         .ndo_set_vf_mac         = be_set_vf_mac,
5207         .ndo_set_vf_vlan        = be_set_vf_vlan,
5208         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5209         .ndo_get_vf_config      = be_get_vf_config,
5210         .ndo_set_vf_link_state  = be_set_vf_link_state,
5211         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5212 #ifdef CONFIG_NET_POLL_CONTROLLER
5213         .ndo_poll_controller    = be_netpoll,
5214 #endif
5215         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5216         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5217 #ifdef CONFIG_NET_RX_BUSY_POLL
5218         .ndo_busy_poll          = be_busy_poll,
5219 #endif
5220         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5221         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5222         .ndo_features_check     = be_features_check,
5223         .ndo_get_phys_port_id   = be_get_phys_port_id,
5224 };
5225
5226 static void be_netdev_init(struct net_device *netdev)
5227 {
5228         struct be_adapter *adapter = netdev_priv(netdev);
5229
5230         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5231                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5232                 NETIF_F_HW_VLAN_CTAG_TX;
5233         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5234                 netdev->hw_features |= NETIF_F_RXHASH;
5235
5236         netdev->features |= netdev->hw_features |
5237                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5238
5239         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5240                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5241
5242         netdev->priv_flags |= IFF_UNICAST_FLT;
5243
5244         netdev->flags |= IFF_MULTICAST;
5245
5246         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5247
5248         netdev->netdev_ops = &be_netdev_ops;
5249
5250         netdev->ethtool_ops = &be_ethtool_ops;
5251
5252         /* MTU range: 256 - 9000 */
5253         netdev->min_mtu = BE_MIN_MTU;
5254         netdev->max_mtu = BE_MAX_MTU;
5255 }
5256
5257 static void be_cleanup(struct be_adapter *adapter)
5258 {
5259         struct net_device *netdev = adapter->netdev;
5260
5261         rtnl_lock();
5262         netif_device_detach(netdev);
5263         if (netif_running(netdev))
5264                 be_close(netdev);
5265         rtnl_unlock();
5266
5267         be_clear(adapter);
5268 }
5269
5270 static int be_resume(struct be_adapter *adapter)
5271 {
5272         struct net_device *netdev = adapter->netdev;
5273         int status;
5274
5275         status = be_setup(adapter);
5276         if (status)
5277                 return status;
5278
5279         rtnl_lock();
5280         if (netif_running(netdev))
5281                 status = be_open(netdev);
5282         rtnl_unlock();
5283
5284         if (status)
5285                 return status;
5286
5287         netif_device_attach(netdev);
5288
5289         return 0;
5290 }
5291
5292 static void be_soft_reset(struct be_adapter *adapter)
5293 {
5294         u32 val;
5295
5296         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5297         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5298         val |= SLIPORT_SOFTRESET_SR_MASK;
5299         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5300 }
5301
5302 static bool be_err_is_recoverable(struct be_adapter *adapter)
5303 {
5304         struct be_error_recovery *err_rec = &adapter->error_recovery;
5305         unsigned long initial_idle_time =
5306                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5307         unsigned long recovery_interval =
5308                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5309         u16 ue_err_code;
5310         u32 val;
5311
5312         val = be_POST_stage_get(adapter);
5313         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5314                 return false;
5315         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5316         if (ue_err_code == 0)
5317                 return false;
5318
5319         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5320                 ue_err_code);
5321
5322         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5323                 dev_err(&adapter->pdev->dev,
5324                         "Cannot recover within %lu sec from driver load\n",
5325                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5326                 return false;
5327         }
5328
5329         if (err_rec->last_recovery_time &&
5330             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5331                 dev_err(&adapter->pdev->dev,
5332                         "Cannot recover within %lu sec from last recovery\n",
5333                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5334                 return false;
5335         }
5336
5337         if (ue_err_code == err_rec->last_err_code) {
5338                 dev_err(&adapter->pdev->dev,
5339                         "Cannot recover from a consecutive TPE error\n");
5340                 return false;
5341         }
5342
5343         err_rec->last_recovery_time = jiffies;
5344         err_rec->last_err_code = ue_err_code;
5345         return true;
5346 }
5347
5348 static int be_tpe_recover(struct be_adapter *adapter)
5349 {
5350         struct be_error_recovery *err_rec = &adapter->error_recovery;
5351         int status = -EAGAIN;
5352         u32 val;
5353
5354         switch (err_rec->recovery_state) {
5355         case ERR_RECOVERY_ST_NONE:
5356                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5357                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5358                 break;
5359
5360         case ERR_RECOVERY_ST_DETECT:
5361                 val = be_POST_stage_get(adapter);
5362                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5363                     POST_STAGE_RECOVERABLE_ERR) {
5364                         dev_err(&adapter->pdev->dev,
5365                                 "Unrecoverable HW error detected: 0x%x\n", val);
5366                         status = -EINVAL;
5367                         err_rec->resched_delay = 0;
5368                         break;
5369                 }
5370
5371                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5372
5373                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5374                  * milliseconds before it checks for final error status in
5375                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5376                  * If it does, then PF0 initiates a Soft Reset.
5377                  */
5378                 if (adapter->pf_num == 0) {
5379                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5380                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5381                                         ERR_RECOVERY_UE_DETECT_DURATION;
5382                         break;
5383                 }
5384
5385                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5386                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5387                                         ERR_RECOVERY_UE_DETECT_DURATION;
5388                 break;
5389
5390         case ERR_RECOVERY_ST_RESET:
5391                 if (!be_err_is_recoverable(adapter)) {
5392                         dev_err(&adapter->pdev->dev,
5393                                 "Failed to meet recovery criteria\n");
5394                         status = -EIO;
5395                         err_rec->resched_delay = 0;
5396                         break;
5397                 }
5398                 be_soft_reset(adapter);
5399                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5400                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5401                                         err_rec->ue_to_reset_time;
5402                 break;
5403
5404         case ERR_RECOVERY_ST_PRE_POLL:
5405                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5406                 err_rec->resched_delay = 0;
5407                 status = 0;                     /* done */
5408                 break;
5409
5410         default:
5411                 status = -EINVAL;
5412                 err_rec->resched_delay = 0;
5413                 break;
5414         }
5415
5416         return status;
5417 }
5418
5419 static int be_err_recover(struct be_adapter *adapter)
5420 {
5421         int status;
5422
5423         if (!lancer_chip(adapter)) {
5424                 if (!adapter->error_recovery.recovery_supported ||
5425                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5426                         return -EIO;
5427                 status = be_tpe_recover(adapter);
5428                 if (status)
5429                         goto err;
5430         }
5431
5432         /* Wait for adapter to reach quiescent state before
5433          * destroying queues
5434          */
5435         status = be_fw_wait_ready(adapter);
5436         if (status)
5437                 goto err;
5438
5439         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5440
5441         be_cleanup(adapter);
5442
5443         status = be_resume(adapter);
5444         if (status)
5445                 goto err;
5446
5447         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5448
5449 err:
5450         return status;
5451 }
5452
5453 static void be_err_detection_task(struct work_struct *work)
5454 {
5455         struct be_error_recovery *err_rec =
5456                         container_of(work, struct be_error_recovery,
5457                                      err_detection_work.work);
5458         struct be_adapter *adapter =
5459                         container_of(err_rec, struct be_adapter,
5460                                      error_recovery);
5461         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5462         struct device *dev = &adapter->pdev->dev;
5463         int recovery_status;
5464
5465         be_detect_error(adapter);
5466         if (!be_check_error(adapter, BE_ERROR_HW))
5467                 goto reschedule_task;
5468
5469         recovery_status = be_err_recover(adapter);
5470         if (!recovery_status) {
5471                 err_rec->recovery_retries = 0;
5472                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5473                 dev_info(dev, "Adapter recovery successful\n");
5474                 goto reschedule_task;
5475         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5476                 /* BEx/SH recovery state machine */
5477                 if (adapter->pf_num == 0 &&
5478                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5479                         dev_err(&adapter->pdev->dev,
5480                                 "Adapter recovery in progress\n");
5481                 resched_delay = err_rec->resched_delay;
5482                 goto reschedule_task;
5483         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5484                 /* For VFs, check if PF have allocated resources
5485                  * every second.
5486                  */
5487                 dev_err(dev, "Re-trying adapter recovery\n");
5488                 goto reschedule_task;
5489         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5490                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5491                 /* In case of another error during recovery, it takes 30 sec
5492                  * for adapter to come out of error. Retry error recovery after
5493                  * this time interval.
5494                  */
5495                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5496                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5497                 goto reschedule_task;
5498         } else {
5499                 dev_err(dev, "Adapter recovery failed\n");
5500                 dev_err(dev, "Please reboot server to recover\n");
5501         }
5502
5503         return;
5504
5505 reschedule_task:
5506         be_schedule_err_detection(adapter, resched_delay);
5507 }
5508
5509 static void be_log_sfp_info(struct be_adapter *adapter)
5510 {
5511         int status;
5512
5513         status = be_cmd_query_sfp_info(adapter);
5514         if (!status) {
5515                 dev_err(&adapter->pdev->dev,
5516                         "Port %c: %s Vendor: %s part no: %s",
5517                         adapter->port_name,
5518                         be_misconfig_evt_port_state[adapter->phy_state],
5519                         adapter->phy.vendor_name,
5520                         adapter->phy.vendor_pn);
5521         }
5522         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5523 }
5524
5525 static void be_worker(struct work_struct *work)
5526 {
5527         struct be_adapter *adapter =
5528                 container_of(work, struct be_adapter, work.work);
5529         struct be_rx_obj *rxo;
5530         int i;
5531
5532         if (be_physfn(adapter) &&
5533             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5534                 be_cmd_get_die_temperature(adapter);
5535
5536         /* when interrupts are not yet enabled, just reap any pending
5537          * mcc completions
5538          */
5539         if (!netif_running(adapter->netdev)) {
5540                 local_bh_disable();
5541                 be_process_mcc(adapter);
5542                 local_bh_enable();
5543                 goto reschedule;
5544         }
5545
5546         if (!adapter->stats_cmd_sent) {
5547                 if (lancer_chip(adapter))
5548                         lancer_cmd_get_pport_stats(adapter,
5549                                                    &adapter->stats_cmd);
5550                 else
5551                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5552         }
5553
5554         for_all_rx_queues(adapter, rxo, i) {
5555                 /* Replenish RX-queues starved due to memory
5556                  * allocation failures.
5557                  */
5558                 if (rxo->rx_post_starved)
5559                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5560         }
5561
5562         /* EQ-delay update for Skyhawk is done while notifying EQ */
5563         if (!skyhawk_chip(adapter))
5564                 be_eqd_update(adapter, false);
5565
5566         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5567                 be_log_sfp_info(adapter);
5568
5569 reschedule:
5570         adapter->work_counter++;
5571         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5572 }
5573
5574 static void be_unmap_pci_bars(struct be_adapter *adapter)
5575 {
5576         if (adapter->csr)
5577                 pci_iounmap(adapter->pdev, adapter->csr);
5578         if (adapter->db)
5579                 pci_iounmap(adapter->pdev, adapter->db);
5580         if (adapter->pcicfg && adapter->pcicfg_mapped)
5581                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5582 }
5583
5584 static int db_bar(struct be_adapter *adapter)
5585 {
5586         if (lancer_chip(adapter) || be_virtfn(adapter))
5587                 return 0;
5588         else
5589                 return 4;
5590 }
5591
5592 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5593 {
5594         if (skyhawk_chip(adapter)) {
5595                 adapter->roce_db.size = 4096;
5596                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5597                                                               db_bar(adapter));
5598                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5599                                                                db_bar(adapter));
5600         }
5601         return 0;
5602 }
5603
5604 static int be_map_pci_bars(struct be_adapter *adapter)
5605 {
5606         struct pci_dev *pdev = adapter->pdev;
5607         u8 __iomem *addr;
5608         u32 sli_intf;
5609
5610         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5611         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5612                                 SLI_INTF_FAMILY_SHIFT;
5613         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5614
5615         if (BEx_chip(adapter) && be_physfn(adapter)) {
5616                 adapter->csr = pci_iomap(pdev, 2, 0);
5617                 if (!adapter->csr)
5618                         return -ENOMEM;
5619         }
5620
5621         addr = pci_iomap(pdev, db_bar(adapter), 0);
5622         if (!addr)
5623                 goto pci_map_err;
5624         adapter->db = addr;
5625
5626         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5627                 if (be_physfn(adapter)) {
5628                         /* PCICFG is the 2nd BAR in BE2 */
5629                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5630                         if (!addr)
5631                                 goto pci_map_err;
5632                         adapter->pcicfg = addr;
5633                         adapter->pcicfg_mapped = true;
5634                 } else {
5635                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5636                         adapter->pcicfg_mapped = false;
5637                 }
5638         }
5639
5640         be_roce_map_pci_bars(adapter);
5641         return 0;
5642
5643 pci_map_err:
5644         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5645         be_unmap_pci_bars(adapter);
5646         return -ENOMEM;
5647 }
5648
5649 static void be_drv_cleanup(struct be_adapter *adapter)
5650 {
5651         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5652         struct device *dev = &adapter->pdev->dev;
5653
5654         if (mem->va)
5655                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5656
5657         mem = &adapter->rx_filter;
5658         if (mem->va)
5659                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5660
5661         mem = &adapter->stats_cmd;
5662         if (mem->va)
5663                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5664 }
5665
5666 /* Allocate and initialize various fields in be_adapter struct */
5667 static int be_drv_init(struct be_adapter *adapter)
5668 {
5669         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5670         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5671         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5672         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5673         struct device *dev = &adapter->pdev->dev;
5674         int status = 0;
5675
5676         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5677         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5678                                                  &mbox_mem_alloc->dma,
5679                                                  GFP_KERNEL);
5680         if (!mbox_mem_alloc->va)
5681                 return -ENOMEM;
5682
5683         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5684         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5685         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5686
5687         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5688         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5689                                             &rx_filter->dma, GFP_KERNEL);
5690         if (!rx_filter->va) {
5691                 status = -ENOMEM;
5692                 goto free_mbox;
5693         }
5694
5695         if (lancer_chip(adapter))
5696                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5697         else if (BE2_chip(adapter))
5698                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5699         else if (BE3_chip(adapter))
5700                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5701         else
5702                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5703         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5704                                             &stats_cmd->dma, GFP_KERNEL);
5705         if (!stats_cmd->va) {
5706                 status = -ENOMEM;
5707                 goto free_rx_filter;
5708         }
5709
5710         mutex_init(&adapter->mbox_lock);
5711         mutex_init(&adapter->mcc_lock);
5712         mutex_init(&adapter->rx_filter_lock);
5713         spin_lock_init(&adapter->mcc_cq_lock);
5714         init_completion(&adapter->et_cmd_compl);
5715
5716         pci_save_state(adapter->pdev);
5717
5718         INIT_DELAYED_WORK(&adapter->work, be_worker);
5719
5720         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5721         adapter->error_recovery.resched_delay = 0;
5722         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5723                           be_err_detection_task);
5724
5725         adapter->rx_fc = true;
5726         adapter->tx_fc = true;
5727
5728         /* Must be a power of 2 or else MODULO will BUG_ON */
5729         adapter->be_get_temp_freq = 64;
5730
5731         return 0;
5732
5733 free_rx_filter:
5734         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5735 free_mbox:
5736         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5737                           mbox_mem_alloc->dma);
5738         return status;
5739 }
5740
5741 static void be_remove(struct pci_dev *pdev)
5742 {
5743         struct be_adapter *adapter = pci_get_drvdata(pdev);
5744
5745         if (!adapter)
5746                 return;
5747
5748         be_roce_dev_remove(adapter);
5749         be_intr_set(adapter, false);
5750
5751         be_cancel_err_detection(adapter);
5752
5753         unregister_netdev(adapter->netdev);
5754
5755         be_clear(adapter);
5756
5757         if (!pci_vfs_assigned(adapter->pdev))
5758                 be_cmd_reset_function(adapter);
5759
5760         /* tell fw we're done with firing cmds */
5761         be_cmd_fw_clean(adapter);
5762
5763         be_unmap_pci_bars(adapter);
5764         be_drv_cleanup(adapter);
5765
5766         pci_disable_pcie_error_reporting(pdev);
5767
5768         pci_release_regions(pdev);
5769         pci_disable_device(pdev);
5770
5771         free_netdev(adapter->netdev);
5772 }
5773
5774 static ssize_t be_hwmon_show_temp(struct device *dev,
5775                                   struct device_attribute *dev_attr,
5776                                   char *buf)
5777 {
5778         struct be_adapter *adapter = dev_get_drvdata(dev);
5779
5780         /* Unit: millidegree Celsius */
5781         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5782                 return -EIO;
5783         else
5784                 return sprintf(buf, "%u\n",
5785                                adapter->hwmon_info.be_on_die_temp * 1000);
5786 }
5787
5788 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5789                           be_hwmon_show_temp, NULL, 1);
5790
5791 static struct attribute *be_hwmon_attrs[] = {
5792         &sensor_dev_attr_temp1_input.dev_attr.attr,
5793         NULL
5794 };
5795
5796 ATTRIBUTE_GROUPS(be_hwmon);
5797
5798 static char *mc_name(struct be_adapter *adapter)
5799 {
5800         char *str = ""; /* default */
5801
5802         switch (adapter->mc_type) {
5803         case UMC:
5804                 str = "UMC";
5805                 break;
5806         case FLEX10:
5807                 str = "FLEX10";
5808                 break;
5809         case vNIC1:
5810                 str = "vNIC-1";
5811                 break;
5812         case nPAR:
5813                 str = "nPAR";
5814                 break;
5815         case UFP:
5816                 str = "UFP";
5817                 break;
5818         case vNIC2:
5819                 str = "vNIC-2";
5820                 break;
5821         default:
5822                 str = "";
5823         }
5824
5825         return str;
5826 }
5827
5828 static inline char *func_name(struct be_adapter *adapter)
5829 {
5830         return be_physfn(adapter) ? "PF" : "VF";
5831 }
5832
5833 static inline char *nic_name(struct pci_dev *pdev)
5834 {
5835         switch (pdev->device) {
5836         case OC_DEVICE_ID1:
5837                 return OC_NAME;
5838         case OC_DEVICE_ID2:
5839                 return OC_NAME_BE;
5840         case OC_DEVICE_ID3:
5841         case OC_DEVICE_ID4:
5842                 return OC_NAME_LANCER;
5843         case BE_DEVICE_ID2:
5844                 return BE3_NAME;
5845         case OC_DEVICE_ID5:
5846         case OC_DEVICE_ID6:
5847                 return OC_NAME_SH;
5848         default:
5849                 return BE_NAME;
5850         }
5851 }
5852
5853 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5854 {
5855         struct be_adapter *adapter;
5856         struct net_device *netdev;
5857         int status = 0;
5858
5859         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5860
5861         status = pci_enable_device(pdev);
5862         if (status)
5863                 goto do_none;
5864
5865         status = pci_request_regions(pdev, DRV_NAME);
5866         if (status)
5867                 goto disable_dev;
5868         pci_set_master(pdev);
5869
5870         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5871         if (!netdev) {
5872                 status = -ENOMEM;
5873                 goto rel_reg;
5874         }
5875         adapter = netdev_priv(netdev);
5876         adapter->pdev = pdev;
5877         pci_set_drvdata(pdev, adapter);
5878         adapter->netdev = netdev;
5879         SET_NETDEV_DEV(netdev, &pdev->dev);
5880
5881         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5882         if (!status) {
5883                 netdev->features |= NETIF_F_HIGHDMA;
5884         } else {
5885                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5886                 if (status) {
5887                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5888                         goto free_netdev;
5889                 }
5890         }
5891
5892         status = pci_enable_pcie_error_reporting(pdev);
5893         if (!status)
5894                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5895
5896         status = be_map_pci_bars(adapter);
5897         if (status)
5898                 goto free_netdev;
5899
5900         status = be_drv_init(adapter);
5901         if (status)
5902                 goto unmap_bars;
5903
5904         status = be_setup(adapter);
5905         if (status)
5906                 goto drv_cleanup;
5907
5908         be_netdev_init(netdev);
5909         status = register_netdev(netdev);
5910         if (status != 0)
5911                 goto unsetup;
5912
5913         be_roce_dev_add(adapter);
5914
5915         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5916         adapter->error_recovery.probe_time = jiffies;
5917
5918         /* On Die temperature not supported for VF. */
5919         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5920                 adapter->hwmon_info.hwmon_dev =
5921                         devm_hwmon_device_register_with_groups(&pdev->dev,
5922                                                                DRV_NAME,
5923                                                                adapter,
5924                                                                be_hwmon_groups);
5925                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5926         }
5927
5928         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5929                  func_name(adapter), mc_name(adapter), adapter->port_name);
5930
5931         return 0;
5932
5933 unsetup:
5934         be_clear(adapter);
5935 drv_cleanup:
5936         be_drv_cleanup(adapter);
5937 unmap_bars:
5938         be_unmap_pci_bars(adapter);
5939 free_netdev:
5940         free_netdev(netdev);
5941 rel_reg:
5942         pci_release_regions(pdev);
5943 disable_dev:
5944         pci_disable_device(pdev);
5945 do_none:
5946         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5947         return status;
5948 }
5949
5950 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5951 {
5952         struct be_adapter *adapter = pci_get_drvdata(pdev);
5953
5954         be_intr_set(adapter, false);
5955         be_cancel_err_detection(adapter);
5956
5957         be_cleanup(adapter);
5958
5959         pci_save_state(pdev);
5960         pci_disable_device(pdev);
5961         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5962         return 0;
5963 }
5964
5965 static int be_pci_resume(struct pci_dev *pdev)
5966 {
5967         struct be_adapter *adapter = pci_get_drvdata(pdev);
5968         int status = 0;
5969
5970         status = pci_enable_device(pdev);
5971         if (status)
5972                 return status;
5973
5974         pci_restore_state(pdev);
5975
5976         status = be_resume(adapter);
5977         if (status)
5978                 return status;
5979
5980         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5981
5982         return 0;
5983 }
5984
5985 /*
5986  * An FLR will stop BE from DMAing any data.
5987  */
5988 static void be_shutdown(struct pci_dev *pdev)
5989 {
5990         struct be_adapter *adapter = pci_get_drvdata(pdev);
5991
5992         if (!adapter)
5993                 return;
5994
5995         be_roce_dev_shutdown(adapter);
5996         cancel_delayed_work_sync(&adapter->work);
5997         be_cancel_err_detection(adapter);
5998
5999         netif_device_detach(adapter->netdev);
6000
6001         be_cmd_reset_function(adapter);
6002
6003         pci_disable_device(pdev);
6004 }
6005
6006 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6007                                             pci_channel_state_t state)
6008 {
6009         struct be_adapter *adapter = pci_get_drvdata(pdev);
6010
6011         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6012
6013         be_roce_dev_remove(adapter);
6014
6015         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6016                 be_set_error(adapter, BE_ERROR_EEH);
6017
6018                 be_cancel_err_detection(adapter);
6019
6020                 be_cleanup(adapter);
6021         }
6022
6023         if (state == pci_channel_io_perm_failure)
6024                 return PCI_ERS_RESULT_DISCONNECT;
6025
6026         pci_disable_device(pdev);
6027
6028         /* The error could cause the FW to trigger a flash debug dump.
6029          * Resetting the card while flash dump is in progress
6030          * can cause it not to recover; wait for it to finish.
6031          * Wait only for first function as it is needed only once per
6032          * adapter.
6033          */
6034         if (pdev->devfn == 0)
6035                 ssleep(30);
6036
6037         return PCI_ERS_RESULT_NEED_RESET;
6038 }
6039
6040 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6041 {
6042         struct be_adapter *adapter = pci_get_drvdata(pdev);
6043         int status;
6044
6045         dev_info(&adapter->pdev->dev, "EEH reset\n");
6046
6047         status = pci_enable_device(pdev);
6048         if (status)
6049                 return PCI_ERS_RESULT_DISCONNECT;
6050
6051         pci_set_master(pdev);
6052         pci_restore_state(pdev);
6053
6054         /* Check if card is ok and fw is ready */
6055         dev_info(&adapter->pdev->dev,
6056                  "Waiting for FW to be ready after EEH reset\n");
6057         status = be_fw_wait_ready(adapter);
6058         if (status)
6059                 return PCI_ERS_RESULT_DISCONNECT;
6060
6061         pci_cleanup_aer_uncorrect_error_status(pdev);
6062         be_clear_error(adapter, BE_CLEAR_ALL);
6063         return PCI_ERS_RESULT_RECOVERED;
6064 }
6065
6066 static void be_eeh_resume(struct pci_dev *pdev)
6067 {
6068         int status = 0;
6069         struct be_adapter *adapter = pci_get_drvdata(pdev);
6070
6071         dev_info(&adapter->pdev->dev, "EEH resume\n");
6072
6073         pci_save_state(pdev);
6074
6075         status = be_resume(adapter);
6076         if (status)
6077                 goto err;
6078
6079         be_roce_dev_add(adapter);
6080
6081         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6082         return;
6083 err:
6084         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6085 }
6086
6087 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6088 {
6089         struct be_adapter *adapter = pci_get_drvdata(pdev);
6090         struct be_resources vft_res = {0};
6091         int status;
6092
6093         if (!num_vfs)
6094                 be_vf_clear(adapter);
6095
6096         adapter->num_vfs = num_vfs;
6097
6098         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6099                 dev_warn(&pdev->dev,
6100                          "Cannot disable VFs while they are assigned\n");
6101                 return -EBUSY;
6102         }
6103
6104         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6105          * are equally distributed across the max-number of VFs. The user may
6106          * request only a subset of the max-vfs to be enabled.
6107          * Based on num_vfs, redistribute the resources across num_vfs so that
6108          * each VF will have access to more number of resources.
6109          * This facility is not available in BE3 FW.
6110          * Also, this is done by FW in Lancer chip.
6111          */
6112         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6113                 be_calculate_vf_res(adapter, adapter->num_vfs,
6114                                     &vft_res);
6115                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6116                                                  adapter->num_vfs, &vft_res);
6117                 if (status)
6118                         dev_err(&pdev->dev,
6119                                 "Failed to optimize SR-IOV resources\n");
6120         }
6121
6122         status = be_get_resources(adapter);
6123         if (status)
6124                 return be_cmd_status(status);
6125
6126         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6127         rtnl_lock();
6128         status = be_update_queues(adapter);
6129         rtnl_unlock();
6130         if (status)
6131                 return be_cmd_status(status);
6132
6133         if (adapter->num_vfs)
6134                 status = be_vf_setup(adapter);
6135
6136         if (!status)
6137                 return adapter->num_vfs;
6138
6139         return 0;
6140 }
6141
6142 static const struct pci_error_handlers be_eeh_handlers = {
6143         .error_detected = be_eeh_err_detected,
6144         .slot_reset = be_eeh_reset,
6145         .resume = be_eeh_resume,
6146 };
6147
6148 static struct pci_driver be_driver = {
6149         .name = DRV_NAME,
6150         .id_table = be_dev_ids,
6151         .probe = be_probe,
6152         .remove = be_remove,
6153         .suspend = be_suspend,
6154         .resume = be_pci_resume,
6155         .shutdown = be_shutdown,
6156         .sriov_configure = be_pci_sriov_configure,
6157         .err_handler = &be_eeh_handlers
6158 };
6159
6160 static int __init be_init_module(void)
6161 {
6162         int status;
6163
6164         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6165             rx_frag_size != 2048) {
6166                 printk(KERN_WARNING DRV_NAME
6167                         " : Module param rx_frag_size must be 2048/4096/8192."
6168                         " Using 2048\n");
6169                 rx_frag_size = 2048;
6170         }
6171
6172         if (num_vfs > 0) {
6173                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6174                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6175         }
6176
6177         be_wq = create_singlethread_workqueue("be_wq");
6178         if (!be_wq) {
6179                 pr_warn(DRV_NAME "workqueue creation failed\n");
6180                 return -1;
6181         }
6182
6183         be_err_recovery_workq =
6184                 create_singlethread_workqueue("be_err_recover");
6185         if (!be_err_recovery_workq)
6186                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6187
6188         status = pci_register_driver(&be_driver);
6189         if (status) {
6190                 destroy_workqueue(be_wq);
6191                 be_destroy_err_recovery_workq();
6192         }
6193         return status;
6194 }
6195 module_init(be_init_module);
6196
6197 static void __exit be_exit_module(void)
6198 {
6199         pci_unregister_driver(&be_driver);
6200
6201         be_destroy_err_recovery_workq();
6202
6203         if (be_wq)
6204                 destroy_workqueue(be_wq);
6205 }
6206 module_exit(be_exit_module);