]> asedeno.scripts.mit.edu Git - linux.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* if device is not running, copy MAC to netdev->dev_addr */
322         if (!netif_running(netdev))
323                 goto done;
324
325         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
326          * privilege or if PF did not provision the new MAC address.
327          * On BE3, this cmd will always fail if the VF doesn't have the
328          * FILTMGMT privilege. This failure is OK, only if the PF programmed
329          * the MAC for the VF.
330          */
331         mutex_lock(&adapter->rx_filter_lock);
332         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
333         if (!status) {
334
335                 /* Delete the old programmed MAC. This call may fail if the
336                  * old MAC was already deleted by the PF driver.
337                  */
338                 if (adapter->pmac_id[0] != old_pmac_id)
339                         be_dev_mac_del(adapter, old_pmac_id);
340         }
341
342         mutex_unlock(&adapter->rx_filter_lock);
343         /* Decide if the new MAC is successfully activated only after
344          * querying the FW
345          */
346         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
347                                        adapter->if_handle, true, 0);
348         if (status)
349                 goto err;
350
351         /* The MAC change did not happen, either due to lack of privilege
352          * or PF didn't pre-provision.
353          */
354         if (!ether_addr_equal(addr->sa_data, mac)) {
355                 status = -EPERM;
356                 goto err;
357         }
358 done:
359         ether_addr_copy(adapter->dev_mac, addr->sa_data);
360         ether_addr_copy(netdev->dev_addr, addr->sa_data);
361         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
362         return 0;
363 err:
364         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
365         return status;
366 }
367
368 /* BE2 supports only v0 cmd */
369 static void *hw_stats_from_cmd(struct be_adapter *adapter)
370 {
371         if (BE2_chip(adapter)) {
372                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
373
374                 return &cmd->hw_stats;
375         } else if (BE3_chip(adapter)) {
376                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
377
378                 return &cmd->hw_stats;
379         } else {
380                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
381
382                 return &cmd->hw_stats;
383         }
384 }
385
386 /* BE2 supports only v0 cmd */
387 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
388 {
389         if (BE2_chip(adapter)) {
390                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
391
392                 return &hw_stats->erx;
393         } else if (BE3_chip(adapter)) {
394                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
395
396                 return &hw_stats->erx;
397         } else {
398                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
399
400                 return &hw_stats->erx;
401         }
402 }
403
404 static void populate_be_v0_stats(struct be_adapter *adapter)
405 {
406         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
407         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
408         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
409         struct be_port_rxf_stats_v0 *port_stats =
410                                         &rxf_stats->port[adapter->port_num];
411         struct be_drv_stats *drvs = &adapter->drv_stats;
412
413         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
414         drvs->rx_pause_frames = port_stats->rx_pause_frames;
415         drvs->rx_crc_errors = port_stats->rx_crc_errors;
416         drvs->rx_control_frames = port_stats->rx_control_frames;
417         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
418         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
419         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
420         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
421         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
422         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
423         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
424         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
425         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
426         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
427         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
428         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
429         drvs->rx_dropped_header_too_small =
430                 port_stats->rx_dropped_header_too_small;
431         drvs->rx_address_filtered =
432                                         port_stats->rx_address_filtered +
433                                         port_stats->rx_vlan_filtered;
434         drvs->rx_alignment_symbol_errors =
435                 port_stats->rx_alignment_symbol_errors;
436
437         drvs->tx_pauseframes = port_stats->tx_pauseframes;
438         drvs->tx_controlframes = port_stats->tx_controlframes;
439
440         if (adapter->port_num)
441                 drvs->jabber_events = rxf_stats->port1_jabber_events;
442         else
443                 drvs->jabber_events = rxf_stats->port0_jabber_events;
444         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
445         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
446         drvs->forwarded_packets = rxf_stats->forwarded_packets;
447         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
448         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
449         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
450         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
451 }
452
453 static void populate_be_v1_stats(struct be_adapter *adapter)
454 {
455         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
456         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
457         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
458         struct be_port_rxf_stats_v1 *port_stats =
459                                         &rxf_stats->port[adapter->port_num];
460         struct be_drv_stats *drvs = &adapter->drv_stats;
461
462         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
463         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
464         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
465         drvs->rx_pause_frames = port_stats->rx_pause_frames;
466         drvs->rx_crc_errors = port_stats->rx_crc_errors;
467         drvs->rx_control_frames = port_stats->rx_control_frames;
468         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
469         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
470         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
471         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
472         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
473         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
474         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
475         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
476         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
477         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
478         drvs->rx_dropped_header_too_small =
479                 port_stats->rx_dropped_header_too_small;
480         drvs->rx_input_fifo_overflow_drop =
481                 port_stats->rx_input_fifo_overflow_drop;
482         drvs->rx_address_filtered = port_stats->rx_address_filtered;
483         drvs->rx_alignment_symbol_errors =
484                 port_stats->rx_alignment_symbol_errors;
485         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
486         drvs->tx_pauseframes = port_stats->tx_pauseframes;
487         drvs->tx_controlframes = port_stats->tx_controlframes;
488         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
489         drvs->jabber_events = port_stats->jabber_events;
490         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
491         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
492         drvs->forwarded_packets = rxf_stats->forwarded_packets;
493         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
494         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
495         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
496         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
497 }
498
499 static void populate_be_v2_stats(struct be_adapter *adapter)
500 {
501         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
502         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
503         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
504         struct be_port_rxf_stats_v2 *port_stats =
505                                         &rxf_stats->port[adapter->port_num];
506         struct be_drv_stats *drvs = &adapter->drv_stats;
507
508         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
509         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
510         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
511         drvs->rx_pause_frames = port_stats->rx_pause_frames;
512         drvs->rx_crc_errors = port_stats->rx_crc_errors;
513         drvs->rx_control_frames = port_stats->rx_control_frames;
514         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
515         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
516         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
517         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
518         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
519         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
520         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
521         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
522         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
523         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
524         drvs->rx_dropped_header_too_small =
525                 port_stats->rx_dropped_header_too_small;
526         drvs->rx_input_fifo_overflow_drop =
527                 port_stats->rx_input_fifo_overflow_drop;
528         drvs->rx_address_filtered = port_stats->rx_address_filtered;
529         drvs->rx_alignment_symbol_errors =
530                 port_stats->rx_alignment_symbol_errors;
531         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
532         drvs->tx_pauseframes = port_stats->tx_pauseframes;
533         drvs->tx_controlframes = port_stats->tx_controlframes;
534         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
535         drvs->jabber_events = port_stats->jabber_events;
536         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
537         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
538         drvs->forwarded_packets = rxf_stats->forwarded_packets;
539         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
540         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
541         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
542         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
543         if (be_roce_supported(adapter)) {
544                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
545                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
546                 drvs->rx_roce_frames = port_stats->roce_frames_received;
547                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
548                 drvs->roce_drops_payload_len =
549                         port_stats->roce_drops_payload_len;
550         }
551 }
552
553 static void populate_lancer_stats(struct be_adapter *adapter)
554 {
555         struct be_drv_stats *drvs = &adapter->drv_stats;
556         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
557
558         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
559         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
560         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
561         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
562         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
563         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
564         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
565         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
566         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
567         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
568         drvs->rx_dropped_tcp_length =
569                                 pport_stats->rx_dropped_invalid_tcp_length;
570         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
571         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
572         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
573         drvs->rx_dropped_header_too_small =
574                                 pport_stats->rx_dropped_header_too_small;
575         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
576         drvs->rx_address_filtered =
577                                         pport_stats->rx_address_filtered +
578                                         pport_stats->rx_vlan_filtered;
579         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
580         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
581         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
582         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
583         drvs->jabber_events = pport_stats->rx_jabbers;
584         drvs->forwarded_packets = pport_stats->num_forwards_lo;
585         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
586         drvs->rx_drops_too_many_frags =
587                                 pport_stats->rx_drops_too_many_frags_lo;
588 }
589
590 static void accumulate_16bit_val(u32 *acc, u16 val)
591 {
592 #define lo(x)                   (x & 0xFFFF)
593 #define hi(x)                   (x & 0xFFFF0000)
594         bool wrapped = val < lo(*acc);
595         u32 newacc = hi(*acc) + val;
596
597         if (wrapped)
598                 newacc += 65536;
599         ACCESS_ONCE(*acc) = newacc;
600 }
601
602 static void populate_erx_stats(struct be_adapter *adapter,
603                                struct be_rx_obj *rxo, u32 erx_stat)
604 {
605         if (!BEx_chip(adapter))
606                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
607         else
608                 /* below erx HW counter can actually wrap around after
609                  * 65535. Driver accumulates a 32-bit value
610                  */
611                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
612                                      (u16)erx_stat);
613 }
614
615 void be_parse_stats(struct be_adapter *adapter)
616 {
617         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
618         struct be_rx_obj *rxo;
619         int i;
620         u32 erx_stat;
621
622         if (lancer_chip(adapter)) {
623                 populate_lancer_stats(adapter);
624         } else {
625                 if (BE2_chip(adapter))
626                         populate_be_v0_stats(adapter);
627                 else if (BE3_chip(adapter))
628                         /* for BE3 */
629                         populate_be_v1_stats(adapter);
630                 else
631                         populate_be_v2_stats(adapter);
632
633                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
634                 for_all_rx_queues(adapter, rxo, i) {
635                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
636                         populate_erx_stats(adapter, rxo, erx_stat);
637                 }
638         }
639 }
640
641 static void be_get_stats64(struct net_device *netdev,
642                            struct rtnl_link_stats64 *stats)
643 {
644         struct be_adapter *adapter = netdev_priv(netdev);
645         struct be_drv_stats *drvs = &adapter->drv_stats;
646         struct be_rx_obj *rxo;
647         struct be_tx_obj *txo;
648         u64 pkts, bytes;
649         unsigned int start;
650         int i;
651
652         for_all_rx_queues(adapter, rxo, i) {
653                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
654
655                 do {
656                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
657                         pkts = rx_stats(rxo)->rx_pkts;
658                         bytes = rx_stats(rxo)->rx_bytes;
659                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
660                 stats->rx_packets += pkts;
661                 stats->rx_bytes += bytes;
662                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
663                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
664                                         rx_stats(rxo)->rx_drops_no_frags;
665         }
666
667         for_all_tx_queues(adapter, txo, i) {
668                 const struct be_tx_stats *tx_stats = tx_stats(txo);
669
670                 do {
671                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
672                         pkts = tx_stats(txo)->tx_pkts;
673                         bytes = tx_stats(txo)->tx_bytes;
674                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
675                 stats->tx_packets += pkts;
676                 stats->tx_bytes += bytes;
677         }
678
679         /* bad pkts received */
680         stats->rx_errors = drvs->rx_crc_errors +
681                 drvs->rx_alignment_symbol_errors +
682                 drvs->rx_in_range_errors +
683                 drvs->rx_out_range_errors +
684                 drvs->rx_frame_too_long +
685                 drvs->rx_dropped_too_small +
686                 drvs->rx_dropped_too_short +
687                 drvs->rx_dropped_header_too_small +
688                 drvs->rx_dropped_tcp_length +
689                 drvs->rx_dropped_runt;
690
691         /* detailed rx errors */
692         stats->rx_length_errors = drvs->rx_in_range_errors +
693                 drvs->rx_out_range_errors +
694                 drvs->rx_frame_too_long;
695
696         stats->rx_crc_errors = drvs->rx_crc_errors;
697
698         /* frame alignment errors */
699         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
700
701         /* receiver fifo overrun */
702         /* drops_no_pbuf is no per i/f, it's per BE card */
703         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
704                                 drvs->rx_input_fifo_overflow_drop +
705                                 drvs->rx_drops_no_pbuf;
706 }
707
708 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
709 {
710         struct net_device *netdev = adapter->netdev;
711
712         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
713                 netif_carrier_off(netdev);
714                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
715         }
716
717         if (link_status)
718                 netif_carrier_on(netdev);
719         else
720                 netif_carrier_off(netdev);
721
722         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
723 }
724
725 static int be_gso_hdr_len(struct sk_buff *skb)
726 {
727         if (skb->encapsulation)
728                 return skb_inner_transport_offset(skb) +
729                        inner_tcp_hdrlen(skb);
730         return skb_transport_offset(skb) + tcp_hdrlen(skb);
731 }
732
733 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
734 {
735         struct be_tx_stats *stats = tx_stats(txo);
736         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
737         /* Account for headers which get duplicated in TSO pkt */
738         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
739
740         u64_stats_update_begin(&stats->sync);
741         stats->tx_reqs++;
742         stats->tx_bytes += skb->len + dup_hdr_len;
743         stats->tx_pkts += tx_pkts;
744         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
745                 stats->tx_vxlan_offload_pkts += tx_pkts;
746         u64_stats_update_end(&stats->sync);
747 }
748
749 /* Returns number of WRBs needed for the skb */
750 static u32 skb_wrb_cnt(struct sk_buff *skb)
751 {
752         /* +1 for the header wrb */
753         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
754 }
755
756 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
757 {
758         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
759         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
760         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
761         wrb->rsvd0 = 0;
762 }
763
764 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
765  * to avoid the swap and shift/mask operations in wrb_fill().
766  */
767 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
768 {
769         wrb->frag_pa_hi = 0;
770         wrb->frag_pa_lo = 0;
771         wrb->frag_len = 0;
772         wrb->rsvd0 = 0;
773 }
774
775 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
776                                      struct sk_buff *skb)
777 {
778         u8 vlan_prio;
779         u16 vlan_tag;
780
781         vlan_tag = skb_vlan_tag_get(skb);
782         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
783         /* If vlan priority provided by OS is NOT in available bmap */
784         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
785                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
786                                 adapter->recommended_prio_bits;
787
788         return vlan_tag;
789 }
790
791 /* Used only for IP tunnel packets */
792 static u16 skb_inner_ip_proto(struct sk_buff *skb)
793 {
794         return (inner_ip_hdr(skb)->version == 4) ?
795                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
796 }
797
798 static u16 skb_ip_proto(struct sk_buff *skb)
799 {
800         return (ip_hdr(skb)->version == 4) ?
801                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
802 }
803
804 static inline bool be_is_txq_full(struct be_tx_obj *txo)
805 {
806         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
807 }
808
809 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
810 {
811         return atomic_read(&txo->q.used) < txo->q.len / 2;
812 }
813
814 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
815 {
816         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
817 }
818
819 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
820                                        struct sk_buff *skb,
821                                        struct be_wrb_params *wrb_params)
822 {
823         u16 proto;
824
825         if (skb_is_gso(skb)) {
826                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
827                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
828                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
829                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
830         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
831                 if (skb->encapsulation) {
832                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
833                         proto = skb_inner_ip_proto(skb);
834                 } else {
835                         proto = skb_ip_proto(skb);
836                 }
837                 if (proto == IPPROTO_TCP)
838                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
839                 else if (proto == IPPROTO_UDP)
840                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
841         }
842
843         if (skb_vlan_tag_present(skb)) {
844                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
845                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
846         }
847
848         BE_WRB_F_SET(wrb_params->features, CRC, 1);
849 }
850
851 static void wrb_fill_hdr(struct be_adapter *adapter,
852                          struct be_eth_hdr_wrb *hdr,
853                          struct be_wrb_params *wrb_params,
854                          struct sk_buff *skb)
855 {
856         memset(hdr, 0, sizeof(*hdr));
857
858         SET_TX_WRB_HDR_BITS(crc, hdr,
859                             BE_WRB_F_GET(wrb_params->features, CRC));
860         SET_TX_WRB_HDR_BITS(ipcs, hdr,
861                             BE_WRB_F_GET(wrb_params->features, IPCS));
862         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
863                             BE_WRB_F_GET(wrb_params->features, TCPCS));
864         SET_TX_WRB_HDR_BITS(udpcs, hdr,
865                             BE_WRB_F_GET(wrb_params->features, UDPCS));
866
867         SET_TX_WRB_HDR_BITS(lso, hdr,
868                             BE_WRB_F_GET(wrb_params->features, LSO));
869         SET_TX_WRB_HDR_BITS(lso6, hdr,
870                             BE_WRB_F_GET(wrb_params->features, LSO6));
871         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
872
873         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
874          * hack is not needed, the evt bit is set while ringing DB.
875          */
876         SET_TX_WRB_HDR_BITS(event, hdr,
877                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
878         SET_TX_WRB_HDR_BITS(vlan, hdr,
879                             BE_WRB_F_GET(wrb_params->features, VLAN));
880         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
881
882         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
883         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
884         SET_TX_WRB_HDR_BITS(mgmt, hdr,
885                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
886 }
887
888 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
889                           bool unmap_single)
890 {
891         dma_addr_t dma;
892         u32 frag_len = le32_to_cpu(wrb->frag_len);
893
894
895         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
896                 (u64)le32_to_cpu(wrb->frag_pa_lo);
897         if (frag_len) {
898                 if (unmap_single)
899                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
900                 else
901                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
902         }
903 }
904
905 /* Grab a WRB header for xmit */
906 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
907 {
908         u32 head = txo->q.head;
909
910         queue_head_inc(&txo->q);
911         return head;
912 }
913
914 /* Set up the WRB header for xmit */
915 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
916                                 struct be_tx_obj *txo,
917                                 struct be_wrb_params *wrb_params,
918                                 struct sk_buff *skb, u16 head)
919 {
920         u32 num_frags = skb_wrb_cnt(skb);
921         struct be_queue_info *txq = &txo->q;
922         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
923
924         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
925         be_dws_cpu_to_le(hdr, sizeof(*hdr));
926
927         BUG_ON(txo->sent_skb_list[head]);
928         txo->sent_skb_list[head] = skb;
929         txo->last_req_hdr = head;
930         atomic_add(num_frags, &txq->used);
931         txo->last_req_wrb_cnt = num_frags;
932         txo->pend_wrb_cnt += num_frags;
933 }
934
935 /* Setup a WRB fragment (buffer descriptor) for xmit */
936 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
937                                  int len)
938 {
939         struct be_eth_wrb *wrb;
940         struct be_queue_info *txq = &txo->q;
941
942         wrb = queue_head_node(txq);
943         wrb_fill(wrb, busaddr, len);
944         queue_head_inc(txq);
945 }
946
947 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
948  * was invoked. The producer index is restored to the previous packet and the
949  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
950  */
951 static void be_xmit_restore(struct be_adapter *adapter,
952                             struct be_tx_obj *txo, u32 head, bool map_single,
953                             u32 copied)
954 {
955         struct device *dev;
956         struct be_eth_wrb *wrb;
957         struct be_queue_info *txq = &txo->q;
958
959         dev = &adapter->pdev->dev;
960         txq->head = head;
961
962         /* skip the first wrb (hdr); it's not mapped */
963         queue_head_inc(txq);
964         while (copied) {
965                 wrb = queue_head_node(txq);
966                 unmap_tx_frag(dev, wrb, map_single);
967                 map_single = false;
968                 copied -= le32_to_cpu(wrb->frag_len);
969                 queue_head_inc(txq);
970         }
971
972         txq->head = head;
973 }
974
975 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
976  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
977  * of WRBs used up by the packet.
978  */
979 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
980                            struct sk_buff *skb,
981                            struct be_wrb_params *wrb_params)
982 {
983         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
984         struct device *dev = &adapter->pdev->dev;
985         struct be_queue_info *txq = &txo->q;
986         bool map_single = false;
987         u32 head = txq->head;
988         dma_addr_t busaddr;
989         int len;
990
991         head = be_tx_get_wrb_hdr(txo);
992
993         if (skb->len > skb->data_len) {
994                 len = skb_headlen(skb);
995
996                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
997                 if (dma_mapping_error(dev, busaddr))
998                         goto dma_err;
999                 map_single = true;
1000                 be_tx_setup_wrb_frag(txo, busaddr, len);
1001                 copied += len;
1002         }
1003
1004         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1005                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1006                 len = skb_frag_size(frag);
1007
1008                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1009                 if (dma_mapping_error(dev, busaddr))
1010                         goto dma_err;
1011                 be_tx_setup_wrb_frag(txo, busaddr, len);
1012                 copied += len;
1013         }
1014
1015         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1016
1017         be_tx_stats_update(txo, skb);
1018         return wrb_cnt;
1019
1020 dma_err:
1021         adapter->drv_stats.dma_map_errors++;
1022         be_xmit_restore(adapter, txo, head, map_single, copied);
1023         return 0;
1024 }
1025
1026 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1027 {
1028         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1029 }
1030
1031 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1032                                              struct sk_buff *skb,
1033                                              struct be_wrb_params
1034                                              *wrb_params)
1035 {
1036         u16 vlan_tag = 0;
1037
1038         skb = skb_share_check(skb, GFP_ATOMIC);
1039         if (unlikely(!skb))
1040                 return skb;
1041
1042         if (skb_vlan_tag_present(skb))
1043                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1044
1045         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1046                 if (!vlan_tag)
1047                         vlan_tag = adapter->pvid;
1048                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1049                  * skip VLAN insertion
1050                  */
1051                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1052         }
1053
1054         if (vlan_tag) {
1055                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1056                                                 vlan_tag);
1057                 if (unlikely(!skb))
1058                         return skb;
1059                 skb->vlan_tci = 0;
1060         }
1061
1062         /* Insert the outer VLAN, if any */
1063         if (adapter->qnq_vid) {
1064                 vlan_tag = adapter->qnq_vid;
1065                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1066                                                 vlan_tag);
1067                 if (unlikely(!skb))
1068                         return skb;
1069                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1070         }
1071
1072         return skb;
1073 }
1074
1075 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1076 {
1077         struct ethhdr *eh = (struct ethhdr *)skb->data;
1078         u16 offset = ETH_HLEN;
1079
1080         if (eh->h_proto == htons(ETH_P_IPV6)) {
1081                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1082
1083                 offset += sizeof(struct ipv6hdr);
1084                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1085                     ip6h->nexthdr != NEXTHDR_UDP) {
1086                         struct ipv6_opt_hdr *ehdr =
1087                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1088
1089                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1090                         if (ehdr->hdrlen == 0xff)
1091                                 return true;
1092                 }
1093         }
1094         return false;
1095 }
1096
1097 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1098 {
1099         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1100 }
1101
1102 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1103 {
1104         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1105 }
1106
1107 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1108                                                   struct sk_buff *skb,
1109                                                   struct be_wrb_params
1110                                                   *wrb_params)
1111 {
1112         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1113         unsigned int eth_hdr_len;
1114         struct iphdr *ip;
1115
1116         /* For padded packets, BE HW modifies tot_len field in IP header
1117          * incorrecly when VLAN tag is inserted by HW.
1118          * For padded packets, Lancer computes incorrect checksum.
1119          */
1120         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1121                                                 VLAN_ETH_HLEN : ETH_HLEN;
1122         if (skb->len <= 60 &&
1123             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1124             is_ipv4_pkt(skb)) {
1125                 ip = (struct iphdr *)ip_hdr(skb);
1126                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1127         }
1128
1129         /* If vlan tag is already inlined in the packet, skip HW VLAN
1130          * tagging in pvid-tagging mode
1131          */
1132         if (be_pvid_tagging_enabled(adapter) &&
1133             veh->h_vlan_proto == htons(ETH_P_8021Q))
1134                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1135
1136         /* HW has a bug wherein it will calculate CSUM for VLAN
1137          * pkts even though it is disabled.
1138          * Manually insert VLAN in pkt.
1139          */
1140         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1141             skb_vlan_tag_present(skb)) {
1142                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1143                 if (unlikely(!skb))
1144                         goto err;
1145         }
1146
1147         /* HW may lockup when VLAN HW tagging is requested on
1148          * certain ipv6 packets. Drop such pkts if the HW workaround to
1149          * skip HW tagging is not enabled by FW.
1150          */
1151         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1152                      (adapter->pvid || adapter->qnq_vid) &&
1153                      !qnq_async_evt_rcvd(adapter)))
1154                 goto tx_drop;
1155
1156         /* Manual VLAN tag insertion to prevent:
1157          * ASIC lockup when the ASIC inserts VLAN tag into
1158          * certain ipv6 packets. Insert VLAN tags in driver,
1159          * and set event, completion, vlan bits accordingly
1160          * in the Tx WRB.
1161          */
1162         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1163             be_vlan_tag_tx_chk(adapter, skb)) {
1164                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1165                 if (unlikely(!skb))
1166                         goto err;
1167         }
1168
1169         return skb;
1170 tx_drop:
1171         dev_kfree_skb_any(skb);
1172 err:
1173         return NULL;
1174 }
1175
1176 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1177                                            struct sk_buff *skb,
1178                                            struct be_wrb_params *wrb_params)
1179 {
1180         int err;
1181
1182         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1183          * packets that are 32b or less may cause a transmit stall
1184          * on that port. The workaround is to pad such packets
1185          * (len <= 32 bytes) to a minimum length of 36b.
1186          */
1187         if (skb->len <= 32) {
1188                 if (skb_put_padto(skb, 36))
1189                         return NULL;
1190         }
1191
1192         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1193                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1194                 if (!skb)
1195                         return NULL;
1196         }
1197
1198         /* The stack can send us skbs with length greater than
1199          * what the HW can handle. Trim the extra bytes.
1200          */
1201         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1202         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1203         WARN_ON(err);
1204
1205         return skb;
1206 }
1207
1208 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1209 {
1210         struct be_queue_info *txq = &txo->q;
1211         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1212
1213         /* Mark the last request eventable if it hasn't been marked already */
1214         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1215                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1216
1217         /* compose a dummy wrb if there are odd set of wrbs to notify */
1218         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1219                 wrb_fill_dummy(queue_head_node(txq));
1220                 queue_head_inc(txq);
1221                 atomic_inc(&txq->used);
1222                 txo->pend_wrb_cnt++;
1223                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1224                                            TX_HDR_WRB_NUM_SHIFT);
1225                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1226                                           TX_HDR_WRB_NUM_SHIFT);
1227         }
1228         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1229         txo->pend_wrb_cnt = 0;
1230 }
1231
1232 /* OS2BMC related */
1233
1234 #define DHCP_CLIENT_PORT        68
1235 #define DHCP_SERVER_PORT        67
1236 #define NET_BIOS_PORT1          137
1237 #define NET_BIOS_PORT2          138
1238 #define DHCPV6_RAS_PORT         547
1239
1240 #define is_mc_allowed_on_bmc(adapter, eh)       \
1241         (!is_multicast_filt_enabled(adapter) && \
1242          is_multicast_ether_addr(eh->h_dest) && \
1243          !is_broadcast_ether_addr(eh->h_dest))
1244
1245 #define is_bc_allowed_on_bmc(adapter, eh)       \
1246         (!is_broadcast_filt_enabled(adapter) && \
1247          is_broadcast_ether_addr(eh->h_dest))
1248
1249 #define is_arp_allowed_on_bmc(adapter, skb)     \
1250         (is_arp(skb) && is_arp_filt_enabled(adapter))
1251
1252 #define is_broadcast_packet(eh, adapter)        \
1253                 (is_multicast_ether_addr(eh->h_dest) && \
1254                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1255
1256 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1257
1258 #define is_arp_filt_enabled(adapter)    \
1259                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1260
1261 #define is_dhcp_client_filt_enabled(adapter)    \
1262                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1263
1264 #define is_dhcp_srvr_filt_enabled(adapter)      \
1265                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1266
1267 #define is_nbios_filt_enabled(adapter)  \
1268                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1269
1270 #define is_ipv6_na_filt_enabled(adapter)        \
1271                 (adapter->bmc_filt_mask &       \
1272                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1273
1274 #define is_ipv6_ra_filt_enabled(adapter)        \
1275                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1276
1277 #define is_ipv6_ras_filt_enabled(adapter)       \
1278                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1279
1280 #define is_broadcast_filt_enabled(adapter)      \
1281                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1282
1283 #define is_multicast_filt_enabled(adapter)      \
1284                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1285
1286 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1287                                struct sk_buff **skb)
1288 {
1289         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1290         bool os2bmc = false;
1291
1292         if (!be_is_os2bmc_enabled(adapter))
1293                 goto done;
1294
1295         if (!is_multicast_ether_addr(eh->h_dest))
1296                 goto done;
1297
1298         if (is_mc_allowed_on_bmc(adapter, eh) ||
1299             is_bc_allowed_on_bmc(adapter, eh) ||
1300             is_arp_allowed_on_bmc(adapter, (*skb))) {
1301                 os2bmc = true;
1302                 goto done;
1303         }
1304
1305         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1306                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1307                 u8 nexthdr = hdr->nexthdr;
1308
1309                 if (nexthdr == IPPROTO_ICMPV6) {
1310                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1311
1312                         switch (icmp6->icmp6_type) {
1313                         case NDISC_ROUTER_ADVERTISEMENT:
1314                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1315                                 goto done;
1316                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1317                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1318                                 goto done;
1319                         default:
1320                                 break;
1321                         }
1322                 }
1323         }
1324
1325         if (is_udp_pkt((*skb))) {
1326                 struct udphdr *udp = udp_hdr((*skb));
1327
1328                 switch (ntohs(udp->dest)) {
1329                 case DHCP_CLIENT_PORT:
1330                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1331                         goto done;
1332                 case DHCP_SERVER_PORT:
1333                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1334                         goto done;
1335                 case NET_BIOS_PORT1:
1336                 case NET_BIOS_PORT2:
1337                         os2bmc = is_nbios_filt_enabled(adapter);
1338                         goto done;
1339                 case DHCPV6_RAS_PORT:
1340                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1341                         goto done;
1342                 default:
1343                         break;
1344                 }
1345         }
1346 done:
1347         /* For packets over a vlan, which are destined
1348          * to BMC, asic expects the vlan to be inline in the packet.
1349          */
1350         if (os2bmc)
1351                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1352
1353         return os2bmc;
1354 }
1355
1356 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1357 {
1358         struct be_adapter *adapter = netdev_priv(netdev);
1359         u16 q_idx = skb_get_queue_mapping(skb);
1360         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1361         struct be_wrb_params wrb_params = { 0 };
1362         bool flush = !skb->xmit_more;
1363         u16 wrb_cnt;
1364
1365         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1366         if (unlikely(!skb))
1367                 goto drop;
1368
1369         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1370
1371         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1372         if (unlikely(!wrb_cnt)) {
1373                 dev_kfree_skb_any(skb);
1374                 goto drop;
1375         }
1376
1377         /* if os2bmc is enabled and if the pkt is destined to bmc,
1378          * enqueue the pkt a 2nd time with mgmt bit set.
1379          */
1380         if (be_send_pkt_to_bmc(adapter, &skb)) {
1381                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1382                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1383                 if (unlikely(!wrb_cnt))
1384                         goto drop;
1385                 else
1386                         skb_get(skb);
1387         }
1388
1389         if (be_is_txq_full(txo)) {
1390                 netif_stop_subqueue(netdev, q_idx);
1391                 tx_stats(txo)->tx_stops++;
1392         }
1393
1394         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1395                 be_xmit_flush(adapter, txo);
1396
1397         return NETDEV_TX_OK;
1398 drop:
1399         tx_stats(txo)->tx_drv_drops++;
1400         /* Flush the already enqueued tx requests */
1401         if (flush && txo->pend_wrb_cnt)
1402                 be_xmit_flush(adapter, txo);
1403
1404         return NETDEV_TX_OK;
1405 }
1406
1407 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1408 {
1409         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1410                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1411 }
1412
1413 static int be_set_vlan_promisc(struct be_adapter *adapter)
1414 {
1415         struct device *dev = &adapter->pdev->dev;
1416         int status;
1417
1418         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1419                 return 0;
1420
1421         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1422         if (!status) {
1423                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1424                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1425         } else {
1426                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1427         }
1428         return status;
1429 }
1430
1431 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1432 {
1433         struct device *dev = &adapter->pdev->dev;
1434         int status;
1435
1436         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1437         if (!status) {
1438                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1439                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1440         }
1441         return status;
1442 }
1443
1444 /*
1445  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1446  * If the user configures more, place BE in vlan promiscuous mode.
1447  */
1448 static int be_vid_config(struct be_adapter *adapter)
1449 {
1450         struct device *dev = &adapter->pdev->dev;
1451         u16 vids[BE_NUM_VLANS_SUPPORTED];
1452         u16 num = 0, i = 0;
1453         int status = 0;
1454
1455         /* No need to change the VLAN state if the I/F is in promiscuous */
1456         if (adapter->netdev->flags & IFF_PROMISC)
1457                 return 0;
1458
1459         if (adapter->vlans_added > be_max_vlans(adapter))
1460                 return be_set_vlan_promisc(adapter);
1461
1462         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1463                 status = be_clear_vlan_promisc(adapter);
1464                 if (status)
1465                         return status;
1466         }
1467         /* Construct VLAN Table to give to HW */
1468         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1469                 vids[num++] = cpu_to_le16(i);
1470
1471         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1472         if (status) {
1473                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1474                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1475                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1476                     addl_status(status) ==
1477                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1478                         return be_set_vlan_promisc(adapter);
1479         }
1480         return status;
1481 }
1482
1483 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1484 {
1485         struct be_adapter *adapter = netdev_priv(netdev);
1486         int status = 0;
1487
1488         mutex_lock(&adapter->rx_filter_lock);
1489
1490         /* Packets with VID 0 are always received by Lancer by default */
1491         if (lancer_chip(adapter) && vid == 0)
1492                 goto done;
1493
1494         if (test_bit(vid, adapter->vids))
1495                 goto done;
1496
1497         set_bit(vid, adapter->vids);
1498         adapter->vlans_added++;
1499
1500         status = be_vid_config(adapter);
1501 done:
1502         mutex_unlock(&adapter->rx_filter_lock);
1503         return status;
1504 }
1505
1506 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1507 {
1508         struct be_adapter *adapter = netdev_priv(netdev);
1509         int status = 0;
1510
1511         mutex_lock(&adapter->rx_filter_lock);
1512
1513         /* Packets with VID 0 are always received by Lancer by default */
1514         if (lancer_chip(adapter) && vid == 0)
1515                 goto done;
1516
1517         if (!test_bit(vid, adapter->vids))
1518                 goto done;
1519
1520         clear_bit(vid, adapter->vids);
1521         adapter->vlans_added--;
1522
1523         status = be_vid_config(adapter);
1524 done:
1525         mutex_unlock(&adapter->rx_filter_lock);
1526         return status;
1527 }
1528
1529 static void be_set_all_promisc(struct be_adapter *adapter)
1530 {
1531         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1532         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1533 }
1534
1535 static void be_set_mc_promisc(struct be_adapter *adapter)
1536 {
1537         int status;
1538
1539         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1540                 return;
1541
1542         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1543         if (!status)
1544                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1545 }
1546
1547 static void be_set_uc_promisc(struct be_adapter *adapter)
1548 {
1549         int status;
1550
1551         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1552                 return;
1553
1554         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1555         if (!status)
1556                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1557 }
1558
1559 static void be_clear_uc_promisc(struct be_adapter *adapter)
1560 {
1561         int status;
1562
1563         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1564                 return;
1565
1566         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1567         if (!status)
1568                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1569 }
1570
1571 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1572  * We use a single callback function for both sync and unsync. We really don't
1573  * add/remove addresses through this callback. But, we use it to detect changes
1574  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1575  */
1576 static int be_uc_list_update(struct net_device *netdev,
1577                              const unsigned char *addr)
1578 {
1579         struct be_adapter *adapter = netdev_priv(netdev);
1580
1581         adapter->update_uc_list = true;
1582         return 0;
1583 }
1584
1585 static int be_mc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_mc_list = true;
1591         return 0;
1592 }
1593
1594 static void be_set_mc_list(struct be_adapter *adapter)
1595 {
1596         struct net_device *netdev = adapter->netdev;
1597         struct netdev_hw_addr *ha;
1598         bool mc_promisc = false;
1599         int status;
1600
1601         netif_addr_lock_bh(netdev);
1602         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1603
1604         if (netdev->flags & IFF_PROMISC) {
1605                 adapter->update_mc_list = false;
1606         } else if (netdev->flags & IFF_ALLMULTI ||
1607                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1608                 /* Enable multicast promisc if num configured exceeds
1609                  * what we support
1610                  */
1611                 mc_promisc = true;
1612                 adapter->update_mc_list = false;
1613         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1614                 /* Update mc-list unconditionally if the iface was previously
1615                  * in mc-promisc mode and now is out of that mode.
1616                  */
1617                 adapter->update_mc_list = true;
1618         }
1619
1620         if (adapter->update_mc_list) {
1621                 int i = 0;
1622
1623                 /* cache the mc-list in adapter */
1624                 netdev_for_each_mc_addr(ha, netdev) {
1625                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1626                         i++;
1627                 }
1628                 adapter->mc_count = netdev_mc_count(netdev);
1629         }
1630         netif_addr_unlock_bh(netdev);
1631
1632         if (mc_promisc) {
1633                 be_set_mc_promisc(adapter);
1634         } else if (adapter->update_mc_list) {
1635                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1636                 if (!status)
1637                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1638                 else
1639                         be_set_mc_promisc(adapter);
1640
1641                 adapter->update_mc_list = false;
1642         }
1643 }
1644
1645 static void be_clear_mc_list(struct be_adapter *adapter)
1646 {
1647         struct net_device *netdev = adapter->netdev;
1648
1649         __dev_mc_unsync(netdev, NULL);
1650         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1651         adapter->mc_count = 0;
1652 }
1653
1654 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1655 {
1656         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1657                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1658                 return 0;
1659         }
1660
1661         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1662                                adapter->if_handle,
1663                                &adapter->pmac_id[uc_idx + 1], 0);
1664 }
1665
1666 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1667 {
1668         if (pmac_id == adapter->pmac_id[0])
1669                 return;
1670
1671         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1672 }
1673
1674 static void be_set_uc_list(struct be_adapter *adapter)
1675 {
1676         struct net_device *netdev = adapter->netdev;
1677         struct netdev_hw_addr *ha;
1678         bool uc_promisc = false;
1679         int curr_uc_macs = 0, i;
1680
1681         netif_addr_lock_bh(netdev);
1682         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1683
1684         if (netdev->flags & IFF_PROMISC) {
1685                 adapter->update_uc_list = false;
1686         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1687                 uc_promisc = true;
1688                 adapter->update_uc_list = false;
1689         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1690                 /* Update uc-list unconditionally if the iface was previously
1691                  * in uc-promisc mode and now is out of that mode.
1692                  */
1693                 adapter->update_uc_list = true;
1694         }
1695
1696         if (adapter->update_uc_list) {
1697                 /* cache the uc-list in adapter array */
1698                 i = 0;
1699                 netdev_for_each_uc_addr(ha, netdev) {
1700                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1701                         i++;
1702                 }
1703                 curr_uc_macs = netdev_uc_count(netdev);
1704         }
1705         netif_addr_unlock_bh(netdev);
1706
1707         if (uc_promisc) {
1708                 be_set_uc_promisc(adapter);
1709         } else if (adapter->update_uc_list) {
1710                 be_clear_uc_promisc(adapter);
1711
1712                 for (i = 0; i < adapter->uc_macs; i++)
1713                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1714
1715                 for (i = 0; i < curr_uc_macs; i++)
1716                         be_uc_mac_add(adapter, i);
1717                 adapter->uc_macs = curr_uc_macs;
1718                 adapter->update_uc_list = false;
1719         }
1720 }
1721
1722 static void be_clear_uc_list(struct be_adapter *adapter)
1723 {
1724         struct net_device *netdev = adapter->netdev;
1725         int i;
1726
1727         __dev_uc_unsync(netdev, NULL);
1728         for (i = 0; i < adapter->uc_macs; i++)
1729                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1730
1731         adapter->uc_macs = 0;
1732 }
1733
1734 static void __be_set_rx_mode(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737
1738         mutex_lock(&adapter->rx_filter_lock);
1739
1740         if (netdev->flags & IFF_PROMISC) {
1741                 if (!be_in_all_promisc(adapter))
1742                         be_set_all_promisc(adapter);
1743         } else if (be_in_all_promisc(adapter)) {
1744                 /* We need to re-program the vlan-list or clear
1745                  * vlan-promisc mode (if needed) when the interface
1746                  * comes out of promisc mode.
1747                  */
1748                 be_vid_config(adapter);
1749         }
1750
1751         be_set_uc_list(adapter);
1752         be_set_mc_list(adapter);
1753
1754         mutex_unlock(&adapter->rx_filter_lock);
1755 }
1756
1757 static void be_work_set_rx_mode(struct work_struct *work)
1758 {
1759         struct be_cmd_work *cmd_work =
1760                                 container_of(work, struct be_cmd_work, work);
1761
1762         __be_set_rx_mode(cmd_work->adapter);
1763         kfree(cmd_work);
1764 }
1765
1766 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1767 {
1768         struct be_adapter *adapter = netdev_priv(netdev);
1769         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1770         int status;
1771
1772         if (!sriov_enabled(adapter))
1773                 return -EPERM;
1774
1775         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1776                 return -EINVAL;
1777
1778         /* Proceed further only if user provided MAC is different
1779          * from active MAC
1780          */
1781         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1782                 return 0;
1783
1784         if (BEx_chip(adapter)) {
1785                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1786                                 vf + 1);
1787
1788                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1789                                          &vf_cfg->pmac_id, vf + 1);
1790         } else {
1791                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1792                                         vf + 1);
1793         }
1794
1795         if (status) {
1796                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1797                         mac, vf, status);
1798                 return be_cmd_status(status);
1799         }
1800
1801         ether_addr_copy(vf_cfg->mac_addr, mac);
1802
1803         return 0;
1804 }
1805
1806 static int be_get_vf_config(struct net_device *netdev, int vf,
1807                             struct ifla_vf_info *vi)
1808 {
1809         struct be_adapter *adapter = netdev_priv(netdev);
1810         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1811
1812         if (!sriov_enabled(adapter))
1813                 return -EPERM;
1814
1815         if (vf >= adapter->num_vfs)
1816                 return -EINVAL;
1817
1818         vi->vf = vf;
1819         vi->max_tx_rate = vf_cfg->tx_rate;
1820         vi->min_tx_rate = 0;
1821         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1822         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1823         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1824         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1825         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1826
1827         return 0;
1828 }
1829
1830 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1831 {
1832         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1833         u16 vids[BE_NUM_VLANS_SUPPORTED];
1834         int vf_if_id = vf_cfg->if_handle;
1835         int status;
1836
1837         /* Enable Transparent VLAN Tagging */
1838         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1839         if (status)
1840                 return status;
1841
1842         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1843         vids[0] = 0;
1844         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1845         if (!status)
1846                 dev_info(&adapter->pdev->dev,
1847                          "Cleared guest VLANs on VF%d", vf);
1848
1849         /* After TVT is enabled, disallow VFs to program VLAN filters */
1850         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1851                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1852                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1853                 if (!status)
1854                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1855         }
1856         return 0;
1857 }
1858
1859 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1860 {
1861         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1862         struct device *dev = &adapter->pdev->dev;
1863         int status;
1864
1865         /* Reset Transparent VLAN Tagging. */
1866         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1867                                        vf_cfg->if_handle, 0, 0);
1868         if (status)
1869                 return status;
1870
1871         /* Allow VFs to program VLAN filtering */
1872         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1873                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1874                                                   BE_PRIV_FILTMGMT, vf + 1);
1875                 if (!status) {
1876                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1877                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1878                 }
1879         }
1880
1881         dev_info(dev,
1882                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1883         return 0;
1884 }
1885
1886 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1887                           __be16 vlan_proto)
1888 {
1889         struct be_adapter *adapter = netdev_priv(netdev);
1890         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1891         int status;
1892
1893         if (!sriov_enabled(adapter))
1894                 return -EPERM;
1895
1896         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1897                 return -EINVAL;
1898
1899         if (vlan_proto != htons(ETH_P_8021Q))
1900                 return -EPROTONOSUPPORT;
1901
1902         if (vlan || qos) {
1903                 vlan |= qos << VLAN_PRIO_SHIFT;
1904                 status = be_set_vf_tvt(adapter, vf, vlan);
1905         } else {
1906                 status = be_clear_vf_tvt(adapter, vf);
1907         }
1908
1909         if (status) {
1910                 dev_err(&adapter->pdev->dev,
1911                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1912                         status);
1913                 return be_cmd_status(status);
1914         }
1915
1916         vf_cfg->vlan_tag = vlan;
1917         return 0;
1918 }
1919
1920 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1921                              int min_tx_rate, int max_tx_rate)
1922 {
1923         struct be_adapter *adapter = netdev_priv(netdev);
1924         struct device *dev = &adapter->pdev->dev;
1925         int percent_rate, status = 0;
1926         u16 link_speed = 0;
1927         u8 link_status;
1928
1929         if (!sriov_enabled(adapter))
1930                 return -EPERM;
1931
1932         if (vf >= adapter->num_vfs)
1933                 return -EINVAL;
1934
1935         if (min_tx_rate)
1936                 return -EINVAL;
1937
1938         if (!max_tx_rate)
1939                 goto config_qos;
1940
1941         status = be_cmd_link_status_query(adapter, &link_speed,
1942                                           &link_status, 0);
1943         if (status)
1944                 goto err;
1945
1946         if (!link_status) {
1947                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1948                 status = -ENETDOWN;
1949                 goto err;
1950         }
1951
1952         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1953                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1954                         link_speed);
1955                 status = -EINVAL;
1956                 goto err;
1957         }
1958
1959         /* On Skyhawk the QOS setting must be done only as a % value */
1960         percent_rate = link_speed / 100;
1961         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1962                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1963                         percent_rate);
1964                 status = -EINVAL;
1965                 goto err;
1966         }
1967
1968 config_qos:
1969         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1970         if (status)
1971                 goto err;
1972
1973         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1974         return 0;
1975
1976 err:
1977         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1978                 max_tx_rate, vf);
1979         return be_cmd_status(status);
1980 }
1981
1982 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1983                                 int link_state)
1984 {
1985         struct be_adapter *adapter = netdev_priv(netdev);
1986         int status;
1987
1988         if (!sriov_enabled(adapter))
1989                 return -EPERM;
1990
1991         if (vf >= adapter->num_vfs)
1992                 return -EINVAL;
1993
1994         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1995         if (status) {
1996                 dev_err(&adapter->pdev->dev,
1997                         "Link state change on VF %d failed: %#x\n", vf, status);
1998                 return be_cmd_status(status);
1999         }
2000
2001         adapter->vf_cfg[vf].plink_tracking = link_state;
2002
2003         return 0;
2004 }
2005
2006 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2007 {
2008         struct be_adapter *adapter = netdev_priv(netdev);
2009         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2010         u8 spoofchk;
2011         int status;
2012
2013         if (!sriov_enabled(adapter))
2014                 return -EPERM;
2015
2016         if (vf >= adapter->num_vfs)
2017                 return -EINVAL;
2018
2019         if (BEx_chip(adapter))
2020                 return -EOPNOTSUPP;
2021
2022         if (enable == vf_cfg->spoofchk)
2023                 return 0;
2024
2025         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2026
2027         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2028                                        0, spoofchk);
2029         if (status) {
2030                 dev_err(&adapter->pdev->dev,
2031                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2032                 return be_cmd_status(status);
2033         }
2034
2035         vf_cfg->spoofchk = enable;
2036         return 0;
2037 }
2038
2039 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2040                           ulong now)
2041 {
2042         aic->rx_pkts_prev = rx_pkts;
2043         aic->tx_reqs_prev = tx_pkts;
2044         aic->jiffies = now;
2045 }
2046
2047 static int be_get_new_eqd(struct be_eq_obj *eqo)
2048 {
2049         struct be_adapter *adapter = eqo->adapter;
2050         int eqd, start;
2051         struct be_aic_obj *aic;
2052         struct be_rx_obj *rxo;
2053         struct be_tx_obj *txo;
2054         u64 rx_pkts = 0, tx_pkts = 0;
2055         ulong now;
2056         u32 pps, delta;
2057         int i;
2058
2059         aic = &adapter->aic_obj[eqo->idx];
2060         if (!aic->enable) {
2061                 if (aic->jiffies)
2062                         aic->jiffies = 0;
2063                 eqd = aic->et_eqd;
2064                 return eqd;
2065         }
2066
2067         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2068                 do {
2069                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2070                         rx_pkts += rxo->stats.rx_pkts;
2071                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2072         }
2073
2074         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2075                 do {
2076                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2077                         tx_pkts += txo->stats.tx_reqs;
2078                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2079         }
2080
2081         /* Skip, if wrapped around or first calculation */
2082         now = jiffies;
2083         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2084             rx_pkts < aic->rx_pkts_prev ||
2085             tx_pkts < aic->tx_reqs_prev) {
2086                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2087                 return aic->prev_eqd;
2088         }
2089
2090         delta = jiffies_to_msecs(now - aic->jiffies);
2091         if (delta == 0)
2092                 return aic->prev_eqd;
2093
2094         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2095                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2096         eqd = (pps / 15000) << 2;
2097
2098         if (eqd < 8)
2099                 eqd = 0;
2100         eqd = min_t(u32, eqd, aic->max_eqd);
2101         eqd = max_t(u32, eqd, aic->min_eqd);
2102
2103         be_aic_update(aic, rx_pkts, tx_pkts, now);
2104
2105         return eqd;
2106 }
2107
2108 /* For Skyhawk-R only */
2109 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2110 {
2111         struct be_adapter *adapter = eqo->adapter;
2112         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2113         ulong now = jiffies;
2114         int eqd;
2115         u32 mult_enc;
2116
2117         if (!aic->enable)
2118                 return 0;
2119
2120         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2121                 eqd = aic->prev_eqd;
2122         else
2123                 eqd = be_get_new_eqd(eqo);
2124
2125         if (eqd > 100)
2126                 mult_enc = R2I_DLY_ENC_1;
2127         else if (eqd > 60)
2128                 mult_enc = R2I_DLY_ENC_2;
2129         else if (eqd > 20)
2130                 mult_enc = R2I_DLY_ENC_3;
2131         else
2132                 mult_enc = R2I_DLY_ENC_0;
2133
2134         aic->prev_eqd = eqd;
2135
2136         return mult_enc;
2137 }
2138
2139 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2140 {
2141         struct be_set_eqd set_eqd[MAX_EVT_QS];
2142         struct be_aic_obj *aic;
2143         struct be_eq_obj *eqo;
2144         int i, num = 0, eqd;
2145
2146         for_all_evt_queues(adapter, eqo, i) {
2147                 aic = &adapter->aic_obj[eqo->idx];
2148                 eqd = be_get_new_eqd(eqo);
2149                 if (force_update || eqd != aic->prev_eqd) {
2150                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2151                         set_eqd[num].eq_id = eqo->q.id;
2152                         aic->prev_eqd = eqd;
2153                         num++;
2154                 }
2155         }
2156
2157         if (num)
2158                 be_cmd_modify_eqd(adapter, set_eqd, num);
2159 }
2160
2161 static void be_rx_stats_update(struct be_rx_obj *rxo,
2162                                struct be_rx_compl_info *rxcp)
2163 {
2164         struct be_rx_stats *stats = rx_stats(rxo);
2165
2166         u64_stats_update_begin(&stats->sync);
2167         stats->rx_compl++;
2168         stats->rx_bytes += rxcp->pkt_size;
2169         stats->rx_pkts++;
2170         if (rxcp->tunneled)
2171                 stats->rx_vxlan_offload_pkts++;
2172         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2173                 stats->rx_mcast_pkts++;
2174         if (rxcp->err)
2175                 stats->rx_compl_err++;
2176         u64_stats_update_end(&stats->sync);
2177 }
2178
2179 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2180 {
2181         /* L4 checksum is not reliable for non TCP/UDP packets.
2182          * Also ignore ipcksm for ipv6 pkts
2183          */
2184         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2185                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2186 }
2187
2188 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2189 {
2190         struct be_adapter *adapter = rxo->adapter;
2191         struct be_rx_page_info *rx_page_info;
2192         struct be_queue_info *rxq = &rxo->q;
2193         u32 frag_idx = rxq->tail;
2194
2195         rx_page_info = &rxo->page_info_tbl[frag_idx];
2196         BUG_ON(!rx_page_info->page);
2197
2198         if (rx_page_info->last_frag) {
2199                 dma_unmap_page(&adapter->pdev->dev,
2200                                dma_unmap_addr(rx_page_info, bus),
2201                                adapter->big_page_size, DMA_FROM_DEVICE);
2202                 rx_page_info->last_frag = false;
2203         } else {
2204                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2205                                         dma_unmap_addr(rx_page_info, bus),
2206                                         rx_frag_size, DMA_FROM_DEVICE);
2207         }
2208
2209         queue_tail_inc(rxq);
2210         atomic_dec(&rxq->used);
2211         return rx_page_info;
2212 }
2213
2214 /* Throwaway the data in the Rx completion */
2215 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2216                                 struct be_rx_compl_info *rxcp)
2217 {
2218         struct be_rx_page_info *page_info;
2219         u16 i, num_rcvd = rxcp->num_rcvd;
2220
2221         for (i = 0; i < num_rcvd; i++) {
2222                 page_info = get_rx_page_info(rxo);
2223                 put_page(page_info->page);
2224                 memset(page_info, 0, sizeof(*page_info));
2225         }
2226 }
2227
2228 /*
2229  * skb_fill_rx_data forms a complete skb for an ether frame
2230  * indicated by rxcp.
2231  */
2232 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2233                              struct be_rx_compl_info *rxcp)
2234 {
2235         struct be_rx_page_info *page_info;
2236         u16 i, j;
2237         u16 hdr_len, curr_frag_len, remaining;
2238         u8 *start;
2239
2240         page_info = get_rx_page_info(rxo);
2241         start = page_address(page_info->page) + page_info->page_offset;
2242         prefetch(start);
2243
2244         /* Copy data in the first descriptor of this completion */
2245         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2246
2247         skb->len = curr_frag_len;
2248         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2249                 memcpy(skb->data, start, curr_frag_len);
2250                 /* Complete packet has now been moved to data */
2251                 put_page(page_info->page);
2252                 skb->data_len = 0;
2253                 skb->tail += curr_frag_len;
2254         } else {
2255                 hdr_len = ETH_HLEN;
2256                 memcpy(skb->data, start, hdr_len);
2257                 skb_shinfo(skb)->nr_frags = 1;
2258                 skb_frag_set_page(skb, 0, page_info->page);
2259                 skb_shinfo(skb)->frags[0].page_offset =
2260                                         page_info->page_offset + hdr_len;
2261                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2262                                   curr_frag_len - hdr_len);
2263                 skb->data_len = curr_frag_len - hdr_len;
2264                 skb->truesize += rx_frag_size;
2265                 skb->tail += hdr_len;
2266         }
2267         page_info->page = NULL;
2268
2269         if (rxcp->pkt_size <= rx_frag_size) {
2270                 BUG_ON(rxcp->num_rcvd != 1);
2271                 return;
2272         }
2273
2274         /* More frags present for this completion */
2275         remaining = rxcp->pkt_size - curr_frag_len;
2276         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2277                 page_info = get_rx_page_info(rxo);
2278                 curr_frag_len = min(remaining, rx_frag_size);
2279
2280                 /* Coalesce all frags from the same physical page in one slot */
2281                 if (page_info->page_offset == 0) {
2282                         /* Fresh page */
2283                         j++;
2284                         skb_frag_set_page(skb, j, page_info->page);
2285                         skb_shinfo(skb)->frags[j].page_offset =
2286                                                         page_info->page_offset;
2287                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2288                         skb_shinfo(skb)->nr_frags++;
2289                 } else {
2290                         put_page(page_info->page);
2291                 }
2292
2293                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2294                 skb->len += curr_frag_len;
2295                 skb->data_len += curr_frag_len;
2296                 skb->truesize += rx_frag_size;
2297                 remaining -= curr_frag_len;
2298                 page_info->page = NULL;
2299         }
2300         BUG_ON(j > MAX_SKB_FRAGS);
2301 }
2302
2303 /* Process the RX completion indicated by rxcp when GRO is disabled */
2304 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2305                                 struct be_rx_compl_info *rxcp)
2306 {
2307         struct be_adapter *adapter = rxo->adapter;
2308         struct net_device *netdev = adapter->netdev;
2309         struct sk_buff *skb;
2310
2311         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2312         if (unlikely(!skb)) {
2313                 rx_stats(rxo)->rx_drops_no_skbs++;
2314                 be_rx_compl_discard(rxo, rxcp);
2315                 return;
2316         }
2317
2318         skb_fill_rx_data(rxo, skb, rxcp);
2319
2320         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2321                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2322         else
2323                 skb_checksum_none_assert(skb);
2324
2325         skb->protocol = eth_type_trans(skb, netdev);
2326         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2327         if (netdev->features & NETIF_F_RXHASH)
2328                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2329
2330         skb->csum_level = rxcp->tunneled;
2331         skb_mark_napi_id(skb, napi);
2332
2333         if (rxcp->vlanf)
2334                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2335
2336         netif_receive_skb(skb);
2337 }
2338
2339 /* Process the RX completion indicated by rxcp when GRO is enabled */
2340 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2341                                     struct napi_struct *napi,
2342                                     struct be_rx_compl_info *rxcp)
2343 {
2344         struct be_adapter *adapter = rxo->adapter;
2345         struct be_rx_page_info *page_info;
2346         struct sk_buff *skb = NULL;
2347         u16 remaining, curr_frag_len;
2348         u16 i, j;
2349
2350         skb = napi_get_frags(napi);
2351         if (!skb) {
2352                 be_rx_compl_discard(rxo, rxcp);
2353                 return;
2354         }
2355
2356         remaining = rxcp->pkt_size;
2357         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2358                 page_info = get_rx_page_info(rxo);
2359
2360                 curr_frag_len = min(remaining, rx_frag_size);
2361
2362                 /* Coalesce all frags from the same physical page in one slot */
2363                 if (i == 0 || page_info->page_offset == 0) {
2364                         /* First frag or Fresh page */
2365                         j++;
2366                         skb_frag_set_page(skb, j, page_info->page);
2367                         skb_shinfo(skb)->frags[j].page_offset =
2368                                                         page_info->page_offset;
2369                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2370                 } else {
2371                         put_page(page_info->page);
2372                 }
2373                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2374                 skb->truesize += rx_frag_size;
2375                 remaining -= curr_frag_len;
2376                 memset(page_info, 0, sizeof(*page_info));
2377         }
2378         BUG_ON(j > MAX_SKB_FRAGS);
2379
2380         skb_shinfo(skb)->nr_frags = j + 1;
2381         skb->len = rxcp->pkt_size;
2382         skb->data_len = rxcp->pkt_size;
2383         skb->ip_summed = CHECKSUM_UNNECESSARY;
2384         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2385         if (adapter->netdev->features & NETIF_F_RXHASH)
2386                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2387
2388         skb->csum_level = rxcp->tunneled;
2389
2390         if (rxcp->vlanf)
2391                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2392
2393         napi_gro_frags(napi);
2394 }
2395
2396 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2397                                  struct be_rx_compl_info *rxcp)
2398 {
2399         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2400         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2401         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2402         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2403         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2404         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2405         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2406         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2407         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2408         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2409         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2410         if (rxcp->vlanf) {
2411                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2412                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2413         }
2414         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2415         rxcp->tunneled =
2416                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2417 }
2418
2419 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2420                                  struct be_rx_compl_info *rxcp)
2421 {
2422         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2423         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2424         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2425         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2426         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2427         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2428         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2429         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2430         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2431         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2432         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2433         if (rxcp->vlanf) {
2434                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2435                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2436         }
2437         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2438         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2439 }
2440
2441 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2442 {
2443         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2444         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2445         struct be_adapter *adapter = rxo->adapter;
2446
2447         /* For checking the valid bit it is Ok to use either definition as the
2448          * valid bit is at the same position in both v0 and v1 Rx compl */
2449         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2450                 return NULL;
2451
2452         rmb();
2453         be_dws_le_to_cpu(compl, sizeof(*compl));
2454
2455         if (adapter->be3_native)
2456                 be_parse_rx_compl_v1(compl, rxcp);
2457         else
2458                 be_parse_rx_compl_v0(compl, rxcp);
2459
2460         if (rxcp->ip_frag)
2461                 rxcp->l4_csum = 0;
2462
2463         if (rxcp->vlanf) {
2464                 /* In QNQ modes, if qnq bit is not set, then the packet was
2465                  * tagged only with the transparent outer vlan-tag and must
2466                  * not be treated as a vlan packet by host
2467                  */
2468                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2469                         rxcp->vlanf = 0;
2470
2471                 if (!lancer_chip(adapter))
2472                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2473
2474                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2475                     !test_bit(rxcp->vlan_tag, adapter->vids))
2476                         rxcp->vlanf = 0;
2477         }
2478
2479         /* As the compl has been parsed, reset it; we wont touch it again */
2480         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2481
2482         queue_tail_inc(&rxo->cq);
2483         return rxcp;
2484 }
2485
2486 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2487 {
2488         u32 order = get_order(size);
2489
2490         if (order > 0)
2491                 gfp |= __GFP_COMP;
2492         return  alloc_pages(gfp, order);
2493 }
2494
2495 /*
2496  * Allocate a page, split it to fragments of size rx_frag_size and post as
2497  * receive buffers to BE
2498  */
2499 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2500 {
2501         struct be_adapter *adapter = rxo->adapter;
2502         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2503         struct be_queue_info *rxq = &rxo->q;
2504         struct page *pagep = NULL;
2505         struct device *dev = &adapter->pdev->dev;
2506         struct be_eth_rx_d *rxd;
2507         u64 page_dmaaddr = 0, frag_dmaaddr;
2508         u32 posted, page_offset = 0, notify = 0;
2509
2510         page_info = &rxo->page_info_tbl[rxq->head];
2511         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2512                 if (!pagep) {
2513                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2514                         if (unlikely(!pagep)) {
2515                                 rx_stats(rxo)->rx_post_fail++;
2516                                 break;
2517                         }
2518                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2519                                                     adapter->big_page_size,
2520                                                     DMA_FROM_DEVICE);
2521                         if (dma_mapping_error(dev, page_dmaaddr)) {
2522                                 put_page(pagep);
2523                                 pagep = NULL;
2524                                 adapter->drv_stats.dma_map_errors++;
2525                                 break;
2526                         }
2527                         page_offset = 0;
2528                 } else {
2529                         get_page(pagep);
2530                         page_offset += rx_frag_size;
2531                 }
2532                 page_info->page_offset = page_offset;
2533                 page_info->page = pagep;
2534
2535                 rxd = queue_head_node(rxq);
2536                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2537                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2538                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2539
2540                 /* Any space left in the current big page for another frag? */
2541                 if ((page_offset + rx_frag_size + rx_frag_size) >
2542                                         adapter->big_page_size) {
2543                         pagep = NULL;
2544                         page_info->last_frag = true;
2545                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2546                 } else {
2547                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2548                 }
2549
2550                 prev_page_info = page_info;
2551                 queue_head_inc(rxq);
2552                 page_info = &rxo->page_info_tbl[rxq->head];
2553         }
2554
2555         /* Mark the last frag of a page when we break out of the above loop
2556          * with no more slots available in the RXQ
2557          */
2558         if (pagep) {
2559                 prev_page_info->last_frag = true;
2560                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2561         }
2562
2563         if (posted) {
2564                 atomic_add(posted, &rxq->used);
2565                 if (rxo->rx_post_starved)
2566                         rxo->rx_post_starved = false;
2567                 do {
2568                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2569                         be_rxq_notify(adapter, rxq->id, notify);
2570                         posted -= notify;
2571                 } while (posted);
2572         } else if (atomic_read(&rxq->used) == 0) {
2573                 /* Let be_worker replenish when memory is available */
2574                 rxo->rx_post_starved = true;
2575         }
2576 }
2577
2578 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2579 {
2580         struct be_queue_info *tx_cq = &txo->cq;
2581         struct be_tx_compl_info *txcp = &txo->txcp;
2582         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2583
2584         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2585                 return NULL;
2586
2587         /* Ensure load ordering of valid bit dword and other dwords below */
2588         rmb();
2589         be_dws_le_to_cpu(compl, sizeof(*compl));
2590
2591         txcp->status = GET_TX_COMPL_BITS(status, compl);
2592         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2593
2594         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2595         queue_tail_inc(tx_cq);
2596         return txcp;
2597 }
2598
2599 static u16 be_tx_compl_process(struct be_adapter *adapter,
2600                                struct be_tx_obj *txo, u16 last_index)
2601 {
2602         struct sk_buff **sent_skbs = txo->sent_skb_list;
2603         struct be_queue_info *txq = &txo->q;
2604         struct sk_buff *skb = NULL;
2605         bool unmap_skb_hdr = false;
2606         struct be_eth_wrb *wrb;
2607         u16 num_wrbs = 0;
2608         u32 frag_index;
2609
2610         do {
2611                 if (sent_skbs[txq->tail]) {
2612                         /* Free skb from prev req */
2613                         if (skb)
2614                                 dev_consume_skb_any(skb);
2615                         skb = sent_skbs[txq->tail];
2616                         sent_skbs[txq->tail] = NULL;
2617                         queue_tail_inc(txq);  /* skip hdr wrb */
2618                         num_wrbs++;
2619                         unmap_skb_hdr = true;
2620                 }
2621                 wrb = queue_tail_node(txq);
2622                 frag_index = txq->tail;
2623                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2624                               (unmap_skb_hdr && skb_headlen(skb)));
2625                 unmap_skb_hdr = false;
2626                 queue_tail_inc(txq);
2627                 num_wrbs++;
2628         } while (frag_index != last_index);
2629         dev_consume_skb_any(skb);
2630
2631         return num_wrbs;
2632 }
2633
2634 /* Return the number of events in the event queue */
2635 static inline int events_get(struct be_eq_obj *eqo)
2636 {
2637         struct be_eq_entry *eqe;
2638         int num = 0;
2639
2640         do {
2641                 eqe = queue_tail_node(&eqo->q);
2642                 if (eqe->evt == 0)
2643                         break;
2644
2645                 rmb();
2646                 eqe->evt = 0;
2647                 num++;
2648                 queue_tail_inc(&eqo->q);
2649         } while (true);
2650
2651         return num;
2652 }
2653
2654 /* Leaves the EQ is disarmed state */
2655 static void be_eq_clean(struct be_eq_obj *eqo)
2656 {
2657         int num = events_get(eqo);
2658
2659         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2660 }
2661
2662 /* Free posted rx buffers that were not used */
2663 static void be_rxq_clean(struct be_rx_obj *rxo)
2664 {
2665         struct be_queue_info *rxq = &rxo->q;
2666         struct be_rx_page_info *page_info;
2667
2668         while (atomic_read(&rxq->used) > 0) {
2669                 page_info = get_rx_page_info(rxo);
2670                 put_page(page_info->page);
2671                 memset(page_info, 0, sizeof(*page_info));
2672         }
2673         BUG_ON(atomic_read(&rxq->used));
2674         rxq->tail = 0;
2675         rxq->head = 0;
2676 }
2677
2678 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2679 {
2680         struct be_queue_info *rx_cq = &rxo->cq;
2681         struct be_rx_compl_info *rxcp;
2682         struct be_adapter *adapter = rxo->adapter;
2683         int flush_wait = 0;
2684
2685         /* Consume pending rx completions.
2686          * Wait for the flush completion (identified by zero num_rcvd)
2687          * to arrive. Notify CQ even when there are no more CQ entries
2688          * for HW to flush partially coalesced CQ entries.
2689          * In Lancer, there is no need to wait for flush compl.
2690          */
2691         for (;;) {
2692                 rxcp = be_rx_compl_get(rxo);
2693                 if (!rxcp) {
2694                         if (lancer_chip(adapter))
2695                                 break;
2696
2697                         if (flush_wait++ > 50 ||
2698                             be_check_error(adapter,
2699                                            BE_ERROR_HW)) {
2700                                 dev_warn(&adapter->pdev->dev,
2701                                          "did not receive flush compl\n");
2702                                 break;
2703                         }
2704                         be_cq_notify(adapter, rx_cq->id, true, 0);
2705                         mdelay(1);
2706                 } else {
2707                         be_rx_compl_discard(rxo, rxcp);
2708                         be_cq_notify(adapter, rx_cq->id, false, 1);
2709                         if (rxcp->num_rcvd == 0)
2710                                 break;
2711                 }
2712         }
2713
2714         /* After cleanup, leave the CQ in unarmed state */
2715         be_cq_notify(adapter, rx_cq->id, false, 0);
2716 }
2717
2718 static void be_tx_compl_clean(struct be_adapter *adapter)
2719 {
2720         struct device *dev = &adapter->pdev->dev;
2721         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2722         struct be_tx_compl_info *txcp;
2723         struct be_queue_info *txq;
2724         u32 end_idx, notified_idx;
2725         struct be_tx_obj *txo;
2726         int i, pending_txqs;
2727
2728         /* Stop polling for compls when HW has been silent for 10ms */
2729         do {
2730                 pending_txqs = adapter->num_tx_qs;
2731
2732                 for_all_tx_queues(adapter, txo, i) {
2733                         cmpl = 0;
2734                         num_wrbs = 0;
2735                         txq = &txo->q;
2736                         while ((txcp = be_tx_compl_get(txo))) {
2737                                 num_wrbs +=
2738                                         be_tx_compl_process(adapter, txo,
2739                                                             txcp->end_index);
2740                                 cmpl++;
2741                         }
2742                         if (cmpl) {
2743                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2744                                 atomic_sub(num_wrbs, &txq->used);
2745                                 timeo = 0;
2746                         }
2747                         if (!be_is_tx_compl_pending(txo))
2748                                 pending_txqs--;
2749                 }
2750
2751                 if (pending_txqs == 0 || ++timeo > 10 ||
2752                     be_check_error(adapter, BE_ERROR_HW))
2753                         break;
2754
2755                 mdelay(1);
2756         } while (true);
2757
2758         /* Free enqueued TX that was never notified to HW */
2759         for_all_tx_queues(adapter, txo, i) {
2760                 txq = &txo->q;
2761
2762                 if (atomic_read(&txq->used)) {
2763                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2764                                  i, atomic_read(&txq->used));
2765                         notified_idx = txq->tail;
2766                         end_idx = txq->tail;
2767                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2768                                   txq->len);
2769                         /* Use the tx-compl process logic to handle requests
2770                          * that were not sent to the HW.
2771                          */
2772                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2773                         atomic_sub(num_wrbs, &txq->used);
2774                         BUG_ON(atomic_read(&txq->used));
2775                         txo->pend_wrb_cnt = 0;
2776                         /* Since hw was never notified of these requests,
2777                          * reset TXQ indices
2778                          */
2779                         txq->head = notified_idx;
2780                         txq->tail = notified_idx;
2781                 }
2782         }
2783 }
2784
2785 static void be_evt_queues_destroy(struct be_adapter *adapter)
2786 {
2787         struct be_eq_obj *eqo;
2788         int i;
2789
2790         for_all_evt_queues(adapter, eqo, i) {
2791                 if (eqo->q.created) {
2792                         be_eq_clean(eqo);
2793                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2794                         netif_napi_del(&eqo->napi);
2795                         free_cpumask_var(eqo->affinity_mask);
2796                 }
2797                 be_queue_free(adapter, &eqo->q);
2798         }
2799 }
2800
2801 static int be_evt_queues_create(struct be_adapter *adapter)
2802 {
2803         struct be_queue_info *eq;
2804         struct be_eq_obj *eqo;
2805         struct be_aic_obj *aic;
2806         int i, rc;
2807
2808         /* need enough EQs to service both RX and TX queues */
2809         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2810                                     max(adapter->cfg_num_rx_irqs,
2811                                         adapter->cfg_num_tx_irqs));
2812
2813         for_all_evt_queues(adapter, eqo, i) {
2814                 int numa_node = dev_to_node(&adapter->pdev->dev);
2815
2816                 aic = &adapter->aic_obj[i];
2817                 eqo->adapter = adapter;
2818                 eqo->idx = i;
2819                 aic->max_eqd = BE_MAX_EQD;
2820                 aic->enable = true;
2821
2822                 eq = &eqo->q;
2823                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2824                                     sizeof(struct be_eq_entry));
2825                 if (rc)
2826                         return rc;
2827
2828                 rc = be_cmd_eq_create(adapter, eqo);
2829                 if (rc)
2830                         return rc;
2831
2832                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2833                         return -ENOMEM;
2834                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2835                                 eqo->affinity_mask);
2836                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2837                                BE_NAPI_WEIGHT);
2838         }
2839         return 0;
2840 }
2841
2842 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2843 {
2844         struct be_queue_info *q;
2845
2846         q = &adapter->mcc_obj.q;
2847         if (q->created)
2848                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2849         be_queue_free(adapter, q);
2850
2851         q = &adapter->mcc_obj.cq;
2852         if (q->created)
2853                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2854         be_queue_free(adapter, q);
2855 }
2856
2857 /* Must be called only after TX qs are created as MCC shares TX EQ */
2858 static int be_mcc_queues_create(struct be_adapter *adapter)
2859 {
2860         struct be_queue_info *q, *cq;
2861
2862         cq = &adapter->mcc_obj.cq;
2863         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2864                            sizeof(struct be_mcc_compl)))
2865                 goto err;
2866
2867         /* Use the default EQ for MCC completions */
2868         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2869                 goto mcc_cq_free;
2870
2871         q = &adapter->mcc_obj.q;
2872         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2873                 goto mcc_cq_destroy;
2874
2875         if (be_cmd_mccq_create(adapter, q, cq))
2876                 goto mcc_q_free;
2877
2878         return 0;
2879
2880 mcc_q_free:
2881         be_queue_free(adapter, q);
2882 mcc_cq_destroy:
2883         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2884 mcc_cq_free:
2885         be_queue_free(adapter, cq);
2886 err:
2887         return -1;
2888 }
2889
2890 static void be_tx_queues_destroy(struct be_adapter *adapter)
2891 {
2892         struct be_queue_info *q;
2893         struct be_tx_obj *txo;
2894         u8 i;
2895
2896         for_all_tx_queues(adapter, txo, i) {
2897                 q = &txo->q;
2898                 if (q->created)
2899                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2900                 be_queue_free(adapter, q);
2901
2902                 q = &txo->cq;
2903                 if (q->created)
2904                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2905                 be_queue_free(adapter, q);
2906         }
2907 }
2908
2909 static int be_tx_qs_create(struct be_adapter *adapter)
2910 {
2911         struct be_queue_info *cq;
2912         struct be_tx_obj *txo;
2913         struct be_eq_obj *eqo;
2914         int status, i;
2915
2916         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2917
2918         for_all_tx_queues(adapter, txo, i) {
2919                 cq = &txo->cq;
2920                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2921                                         sizeof(struct be_eth_tx_compl));
2922                 if (status)
2923                         return status;
2924
2925                 u64_stats_init(&txo->stats.sync);
2926                 u64_stats_init(&txo->stats.sync_compl);
2927
2928                 /* If num_evt_qs is less than num_tx_qs, then more than
2929                  * one txq share an eq
2930                  */
2931                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2932                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2933                 if (status)
2934                         return status;
2935
2936                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2937                                         sizeof(struct be_eth_wrb));
2938                 if (status)
2939                         return status;
2940
2941                 status = be_cmd_txq_create(adapter, txo);
2942                 if (status)
2943                         return status;
2944
2945                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2946                                     eqo->idx);
2947         }
2948
2949         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2950                  adapter->num_tx_qs);
2951         return 0;
2952 }
2953
2954 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2955 {
2956         struct be_queue_info *q;
2957         struct be_rx_obj *rxo;
2958         int i;
2959
2960         for_all_rx_queues(adapter, rxo, i) {
2961                 q = &rxo->cq;
2962                 if (q->created)
2963                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2964                 be_queue_free(adapter, q);
2965         }
2966 }
2967
2968 static int be_rx_cqs_create(struct be_adapter *adapter)
2969 {
2970         struct be_queue_info *eq, *cq;
2971         struct be_rx_obj *rxo;
2972         int rc, i;
2973
2974         adapter->num_rss_qs =
2975                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2976
2977         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2978         if (adapter->num_rss_qs < 2)
2979                 adapter->num_rss_qs = 0;
2980
2981         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2982
2983         /* When the interface is not capable of RSS rings (and there is no
2984          * need to create a default RXQ) we'll still need one RXQ
2985          */
2986         if (adapter->num_rx_qs == 0)
2987                 adapter->num_rx_qs = 1;
2988
2989         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2990         for_all_rx_queues(adapter, rxo, i) {
2991                 rxo->adapter = adapter;
2992                 cq = &rxo->cq;
2993                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2994                                     sizeof(struct be_eth_rx_compl));
2995                 if (rc)
2996                         return rc;
2997
2998                 u64_stats_init(&rxo->stats.sync);
2999                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3000                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3001                 if (rc)
3002                         return rc;
3003         }
3004
3005         dev_info(&adapter->pdev->dev,
3006                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3007         return 0;
3008 }
3009
3010 static irqreturn_t be_intx(int irq, void *dev)
3011 {
3012         struct be_eq_obj *eqo = dev;
3013         struct be_adapter *adapter = eqo->adapter;
3014         int num_evts = 0;
3015
3016         /* IRQ is not expected when NAPI is scheduled as the EQ
3017          * will not be armed.
3018          * But, this can happen on Lancer INTx where it takes
3019          * a while to de-assert INTx or in BE2 where occasionaly
3020          * an interrupt may be raised even when EQ is unarmed.
3021          * If NAPI is already scheduled, then counting & notifying
3022          * events will orphan them.
3023          */
3024         if (napi_schedule_prep(&eqo->napi)) {
3025                 num_evts = events_get(eqo);
3026                 __napi_schedule(&eqo->napi);
3027                 if (num_evts)
3028                         eqo->spurious_intr = 0;
3029         }
3030         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3031
3032         /* Return IRQ_HANDLED only for the the first spurious intr
3033          * after a valid intr to stop the kernel from branding
3034          * this irq as a bad one!
3035          */
3036         if (num_evts || eqo->spurious_intr++ == 0)
3037                 return IRQ_HANDLED;
3038         else
3039                 return IRQ_NONE;
3040 }
3041
3042 static irqreturn_t be_msix(int irq, void *dev)
3043 {
3044         struct be_eq_obj *eqo = dev;
3045
3046         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3047         napi_schedule(&eqo->napi);
3048         return IRQ_HANDLED;
3049 }
3050
3051 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3052 {
3053         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3054 }
3055
3056 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3057                          int budget, int polling)
3058 {
3059         struct be_adapter *adapter = rxo->adapter;
3060         struct be_queue_info *rx_cq = &rxo->cq;
3061         struct be_rx_compl_info *rxcp;
3062         u32 work_done;
3063         u32 frags_consumed = 0;
3064
3065         for (work_done = 0; work_done < budget; work_done++) {
3066                 rxcp = be_rx_compl_get(rxo);
3067                 if (!rxcp)
3068                         break;
3069
3070                 /* Is it a flush compl that has no data */
3071                 if (unlikely(rxcp->num_rcvd == 0))
3072                         goto loop_continue;
3073
3074                 /* Discard compl with partial DMA Lancer B0 */
3075                 if (unlikely(!rxcp->pkt_size)) {
3076                         be_rx_compl_discard(rxo, rxcp);
3077                         goto loop_continue;
3078                 }
3079
3080                 /* On BE drop pkts that arrive due to imperfect filtering in
3081                  * promiscuous mode on some skews
3082                  */
3083                 if (unlikely(rxcp->port != adapter->port_num &&
3084                              !lancer_chip(adapter))) {
3085                         be_rx_compl_discard(rxo, rxcp);
3086                         goto loop_continue;
3087                 }
3088
3089                 /* Don't do gro when we're busy_polling */
3090                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3091                         be_rx_compl_process_gro(rxo, napi, rxcp);
3092                 else
3093                         be_rx_compl_process(rxo, napi, rxcp);
3094
3095 loop_continue:
3096                 frags_consumed += rxcp->num_rcvd;
3097                 be_rx_stats_update(rxo, rxcp);
3098         }
3099
3100         if (work_done) {
3101                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3102
3103                 /* When an rx-obj gets into post_starved state, just
3104                  * let be_worker do the posting.
3105                  */
3106                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3107                     !rxo->rx_post_starved)
3108                         be_post_rx_frags(rxo, GFP_ATOMIC,
3109                                          max_t(u32, MAX_RX_POST,
3110                                                frags_consumed));
3111         }
3112
3113         return work_done;
3114 }
3115
3116 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3117 {
3118         switch (status) {
3119         case BE_TX_COMP_HDR_PARSE_ERR:
3120                 tx_stats(txo)->tx_hdr_parse_err++;
3121                 break;
3122         case BE_TX_COMP_NDMA_ERR:
3123                 tx_stats(txo)->tx_dma_err++;
3124                 break;
3125         case BE_TX_COMP_ACL_ERR:
3126                 tx_stats(txo)->tx_spoof_check_err++;
3127                 break;
3128         }
3129 }
3130
3131 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3132 {
3133         switch (status) {
3134         case LANCER_TX_COMP_LSO_ERR:
3135                 tx_stats(txo)->tx_tso_err++;
3136                 break;
3137         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3138         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3139                 tx_stats(txo)->tx_spoof_check_err++;
3140                 break;
3141         case LANCER_TX_COMP_QINQ_ERR:
3142                 tx_stats(txo)->tx_qinq_err++;
3143                 break;
3144         case LANCER_TX_COMP_PARITY_ERR:
3145                 tx_stats(txo)->tx_internal_parity_err++;
3146                 break;
3147         case LANCER_TX_COMP_DMA_ERR:
3148                 tx_stats(txo)->tx_dma_err++;
3149                 break;
3150         }
3151 }
3152
3153 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3154                           int idx)
3155 {
3156         int num_wrbs = 0, work_done = 0;
3157         struct be_tx_compl_info *txcp;
3158
3159         while ((txcp = be_tx_compl_get(txo))) {
3160                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3161                 work_done++;
3162
3163                 if (txcp->status) {
3164                         if (lancer_chip(adapter))
3165                                 lancer_update_tx_err(txo, txcp->status);
3166                         else
3167                                 be_update_tx_err(txo, txcp->status);
3168                 }
3169         }
3170
3171         if (work_done) {
3172                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3173                 atomic_sub(num_wrbs, &txo->q.used);
3174
3175                 /* As Tx wrbs have been freed up, wake up netdev queue
3176                  * if it was stopped due to lack of tx wrbs.  */
3177                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3178                     be_can_txq_wake(txo)) {
3179                         netif_wake_subqueue(adapter->netdev, idx);
3180                 }
3181
3182                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3183                 tx_stats(txo)->tx_compl += work_done;
3184                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3185         }
3186 }
3187
3188 #ifdef CONFIG_NET_RX_BUSY_POLL
3189 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3190 {
3191         bool status = true;
3192
3193         spin_lock(&eqo->lock); /* BH is already disabled */
3194         if (eqo->state & BE_EQ_LOCKED) {
3195                 WARN_ON(eqo->state & BE_EQ_NAPI);
3196                 eqo->state |= BE_EQ_NAPI_YIELD;
3197                 status = false;
3198         } else {
3199                 eqo->state = BE_EQ_NAPI;
3200         }
3201         spin_unlock(&eqo->lock);
3202         return status;
3203 }
3204
3205 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3206 {
3207         spin_lock(&eqo->lock); /* BH is already disabled */
3208
3209         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3210         eqo->state = BE_EQ_IDLE;
3211
3212         spin_unlock(&eqo->lock);
3213 }
3214
3215 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3216 {
3217         bool status = true;
3218
3219         spin_lock_bh(&eqo->lock);
3220         if (eqo->state & BE_EQ_LOCKED) {
3221                 eqo->state |= BE_EQ_POLL_YIELD;
3222                 status = false;
3223         } else {
3224                 eqo->state |= BE_EQ_POLL;
3225         }
3226         spin_unlock_bh(&eqo->lock);
3227         return status;
3228 }
3229
3230 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3231 {
3232         spin_lock_bh(&eqo->lock);
3233
3234         WARN_ON(eqo->state & (BE_EQ_NAPI));
3235         eqo->state = BE_EQ_IDLE;
3236
3237         spin_unlock_bh(&eqo->lock);
3238 }
3239
3240 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3241 {
3242         spin_lock_init(&eqo->lock);
3243         eqo->state = BE_EQ_IDLE;
3244 }
3245
3246 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3247 {
3248         local_bh_disable();
3249
3250         /* It's enough to just acquire napi lock on the eqo to stop
3251          * be_busy_poll() from processing any queueus.
3252          */
3253         while (!be_lock_napi(eqo))
3254                 mdelay(1);
3255
3256         local_bh_enable();
3257 }
3258
3259 #else /* CONFIG_NET_RX_BUSY_POLL */
3260
3261 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3262 {
3263         return true;
3264 }
3265
3266 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3267 {
3268 }
3269
3270 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3271 {
3272         return false;
3273 }
3274
3275 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3276 {
3277 }
3278
3279 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3280 {
3281 }
3282
3283 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3284 {
3285 }
3286 #endif /* CONFIG_NET_RX_BUSY_POLL */
3287
3288 int be_poll(struct napi_struct *napi, int budget)
3289 {
3290         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3291         struct be_adapter *adapter = eqo->adapter;
3292         int max_work = 0, work, i, num_evts;
3293         struct be_rx_obj *rxo;
3294         struct be_tx_obj *txo;
3295         u32 mult_enc = 0;
3296
3297         num_evts = events_get(eqo);
3298
3299         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3300                 be_process_tx(adapter, txo, i);
3301
3302         if (be_lock_napi(eqo)) {
3303                 /* This loop will iterate twice for EQ0 in which
3304                  * completions of the last RXQ (default one) are also processed
3305                  * For other EQs the loop iterates only once
3306                  */
3307                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3308                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3309                         max_work = max(work, max_work);
3310                 }
3311                 be_unlock_napi(eqo);
3312         } else {
3313                 max_work = budget;
3314         }
3315
3316         if (is_mcc_eqo(eqo))
3317                 be_process_mcc(adapter);
3318
3319         if (max_work < budget) {
3320                 napi_complete(napi);
3321
3322                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323                  * delay via a delay multiplier encoding value
3324                  */
3325                 if (skyhawk_chip(adapter))
3326                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329                              mult_enc);
3330         } else {
3331                 /* As we'll continue in polling mode, count and clear events */
3332                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333         }
3334         return max_work;
3335 }
3336
3337 #ifdef CONFIG_NET_RX_BUSY_POLL
3338 static int be_busy_poll(struct napi_struct *napi)
3339 {
3340         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3341         struct be_adapter *adapter = eqo->adapter;
3342         struct be_rx_obj *rxo;
3343         int i, work = 0;
3344
3345         if (!be_lock_busy_poll(eqo))
3346                 return LL_FLUSH_BUSY;
3347
3348         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3349                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3350                 if (work)
3351                         break;
3352         }
3353
3354         be_unlock_busy_poll(eqo);
3355         return work;
3356 }
3357 #endif
3358
3359 void be_detect_error(struct be_adapter *adapter)
3360 {
3361         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3362         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3363         u32 i;
3364         struct device *dev = &adapter->pdev->dev;
3365
3366         if (be_check_error(adapter, BE_ERROR_HW))
3367                 return;
3368
3369         if (lancer_chip(adapter)) {
3370                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3371                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3372                         be_set_error(adapter, BE_ERROR_UE);
3373                         sliport_err1 = ioread32(adapter->db +
3374                                                 SLIPORT_ERROR1_OFFSET);
3375                         sliport_err2 = ioread32(adapter->db +
3376                                                 SLIPORT_ERROR2_OFFSET);
3377                         /* Do not log error messages if its a FW reset */
3378                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3379                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3380                                 dev_info(dev, "Firmware update in progress\n");
3381                         } else {
3382                                 dev_err(dev, "Error detected in the card\n");
3383                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3384                                         sliport_status);
3385                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3386                                         sliport_err1);
3387                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3388                                         sliport_err2);
3389                         }
3390                 }
3391         } else {
3392                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3393                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3394                 ue_lo_mask = ioread32(adapter->pcicfg +
3395                                       PCICFG_UE_STATUS_LOW_MASK);
3396                 ue_hi_mask = ioread32(adapter->pcicfg +
3397                                       PCICFG_UE_STATUS_HI_MASK);
3398
3399                 ue_lo = (ue_lo & ~ue_lo_mask);
3400                 ue_hi = (ue_hi & ~ue_hi_mask);
3401
3402                 /* On certain platforms BE hardware can indicate spurious UEs.
3403                  * Allow HW to stop working completely in case of a real UE.
3404                  * Hence not setting the hw_error for UE detection.
3405                  */
3406
3407                 if (ue_lo || ue_hi) {
3408                         dev_err(dev, "Error detected in the adapter");
3409                         if (skyhawk_chip(adapter))
3410                                 be_set_error(adapter, BE_ERROR_UE);
3411
3412                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3413                                 if (ue_lo & 1)
3414                                         dev_err(dev, "UE: %s bit set\n",
3415                                                 ue_status_low_desc[i]);
3416                         }
3417                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3418                                 if (ue_hi & 1)
3419                                         dev_err(dev, "UE: %s bit set\n",
3420                                                 ue_status_hi_desc[i]);
3421                         }
3422                 }
3423         }
3424 }
3425
3426 static void be_msix_disable(struct be_adapter *adapter)
3427 {
3428         if (msix_enabled(adapter)) {
3429                 pci_disable_msix(adapter->pdev);
3430                 adapter->num_msix_vec = 0;
3431                 adapter->num_msix_roce_vec = 0;
3432         }
3433 }
3434
3435 static int be_msix_enable(struct be_adapter *adapter)
3436 {
3437         unsigned int i, max_roce_eqs;
3438         struct device *dev = &adapter->pdev->dev;
3439         int num_vec;
3440
3441         /* If RoCE is supported, program the max number of vectors that
3442          * could be used for NIC and RoCE, else, just program the number
3443          * we'll use initially.
3444          */
3445         if (be_roce_supported(adapter)) {
3446                 max_roce_eqs =
3447                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3448                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3449                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3450         } else {
3451                 num_vec = max(adapter->cfg_num_rx_irqs,
3452                               adapter->cfg_num_tx_irqs);
3453         }
3454
3455         for (i = 0; i < num_vec; i++)
3456                 adapter->msix_entries[i].entry = i;
3457
3458         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3459                                         MIN_MSIX_VECTORS, num_vec);
3460         if (num_vec < 0)
3461                 goto fail;
3462
3463         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3464                 adapter->num_msix_roce_vec = num_vec / 2;
3465                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3466                          adapter->num_msix_roce_vec);
3467         }
3468
3469         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3470
3471         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3472                  adapter->num_msix_vec);
3473         return 0;
3474
3475 fail:
3476         dev_warn(dev, "MSIx enable failed\n");
3477
3478         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3479         if (be_virtfn(adapter))
3480                 return num_vec;
3481         return 0;
3482 }
3483
3484 static inline int be_msix_vec_get(struct be_adapter *adapter,
3485                                   struct be_eq_obj *eqo)
3486 {
3487         return adapter->msix_entries[eqo->msix_idx].vector;
3488 }
3489
3490 static int be_msix_register(struct be_adapter *adapter)
3491 {
3492         struct net_device *netdev = adapter->netdev;
3493         struct be_eq_obj *eqo;
3494         int status, i, vec;
3495
3496         for_all_evt_queues(adapter, eqo, i) {
3497                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3498                 vec = be_msix_vec_get(adapter, eqo);
3499                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3500                 if (status)
3501                         goto err_msix;
3502
3503                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3504         }
3505
3506         return 0;
3507 err_msix:
3508         for (i--; i >= 0; i--) {
3509                 eqo = &adapter->eq_obj[i];
3510                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3511         }
3512         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3513                  status);
3514         be_msix_disable(adapter);
3515         return status;
3516 }
3517
3518 static int be_irq_register(struct be_adapter *adapter)
3519 {
3520         struct net_device *netdev = adapter->netdev;
3521         int status;
3522
3523         if (msix_enabled(adapter)) {
3524                 status = be_msix_register(adapter);
3525                 if (status == 0)
3526                         goto done;
3527                 /* INTx is not supported for VF */
3528                 if (be_virtfn(adapter))
3529                         return status;
3530         }
3531
3532         /* INTx: only the first EQ is used */
3533         netdev->irq = adapter->pdev->irq;
3534         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3535                              &adapter->eq_obj[0]);
3536         if (status) {
3537                 dev_err(&adapter->pdev->dev,
3538                         "INTx request IRQ failed - err %d\n", status);
3539                 return status;
3540         }
3541 done:
3542         adapter->isr_registered = true;
3543         return 0;
3544 }
3545
3546 static void be_irq_unregister(struct be_adapter *adapter)
3547 {
3548         struct net_device *netdev = adapter->netdev;
3549         struct be_eq_obj *eqo;
3550         int i, vec;
3551
3552         if (!adapter->isr_registered)
3553                 return;
3554
3555         /* INTx */
3556         if (!msix_enabled(adapter)) {
3557                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3558                 goto done;
3559         }
3560
3561         /* MSIx */
3562         for_all_evt_queues(adapter, eqo, i) {
3563                 vec = be_msix_vec_get(adapter, eqo);
3564                 irq_set_affinity_hint(vec, NULL);
3565                 free_irq(vec, eqo);
3566         }
3567
3568 done:
3569         adapter->isr_registered = false;
3570 }
3571
3572 static void be_rx_qs_destroy(struct be_adapter *adapter)
3573 {
3574         struct rss_info *rss = &adapter->rss_info;
3575         struct be_queue_info *q;
3576         struct be_rx_obj *rxo;
3577         int i;
3578
3579         for_all_rx_queues(adapter, rxo, i) {
3580                 q = &rxo->q;
3581                 if (q->created) {
3582                         /* If RXQs are destroyed while in an "out of buffer"
3583                          * state, there is a possibility of an HW stall on
3584                          * Lancer. So, post 64 buffers to each queue to relieve
3585                          * the "out of buffer" condition.
3586                          * Make sure there's space in the RXQ before posting.
3587                          */
3588                         if (lancer_chip(adapter)) {
3589                                 be_rx_cq_clean(rxo);
3590                                 if (atomic_read(&q->used) == 0)
3591                                         be_post_rx_frags(rxo, GFP_KERNEL,
3592                                                          MAX_RX_POST);
3593                         }
3594
3595                         be_cmd_rxq_destroy(adapter, q);
3596                         be_rx_cq_clean(rxo);
3597                         be_rxq_clean(rxo);
3598                 }
3599                 be_queue_free(adapter, q);
3600         }
3601
3602         if (rss->rss_flags) {
3603                 rss->rss_flags = RSS_ENABLE_NONE;
3604                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3605                                   128, rss->rss_hkey);
3606         }
3607 }
3608
3609 static void be_disable_if_filters(struct be_adapter *adapter)
3610 {
3611         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3612         be_clear_uc_list(adapter);
3613         be_clear_mc_list(adapter);
3614
3615         /* The IFACE flags are enabled in the open path and cleared
3616          * in the close path. When a VF gets detached from the host and
3617          * assigned to a VM the following happens:
3618          *      - VF's IFACE flags get cleared in the detach path
3619          *      - IFACE create is issued by the VF in the attach path
3620          * Due to a bug in the BE3/Skyhawk-R FW
3621          * (Lancer FW doesn't have the bug), the IFACE capability flags
3622          * specified along with the IFACE create cmd issued by a VF are not
3623          * honoured by FW.  As a consequence, if a *new* driver
3624          * (that enables/disables IFACE flags in open/close)
3625          * is loaded in the host and an *old* driver is * used by a VM/VF,
3626          * the IFACE gets created *without* the needed flags.
3627          * To avoid this, disable RX-filter flags only for Lancer.
3628          */
3629         if (lancer_chip(adapter)) {
3630                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3631                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3632         }
3633 }
3634
3635 static int be_close(struct net_device *netdev)
3636 {
3637         struct be_adapter *adapter = netdev_priv(netdev);
3638         struct be_eq_obj *eqo;
3639         int i;
3640
3641         /* This protection is needed as be_close() may be called even when the
3642          * adapter is in cleared state (after eeh perm failure)
3643          */
3644         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3645                 return 0;
3646
3647         /* Before attempting cleanup ensure all the pending cmds in the
3648          * config_wq have finished execution
3649          */
3650         flush_workqueue(be_wq);
3651
3652         be_disable_if_filters(adapter);
3653
3654         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3655                 for_all_evt_queues(adapter, eqo, i) {
3656                         napi_disable(&eqo->napi);
3657                         be_disable_busy_poll(eqo);
3658                 }
3659                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3660         }
3661
3662         be_async_mcc_disable(adapter);
3663
3664         /* Wait for all pending tx completions to arrive so that
3665          * all tx skbs are freed.
3666          */
3667         netif_tx_disable(netdev);
3668         be_tx_compl_clean(adapter);
3669
3670         be_rx_qs_destroy(adapter);
3671
3672         for_all_evt_queues(adapter, eqo, i) {
3673                 if (msix_enabled(adapter))
3674                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3675                 else
3676                         synchronize_irq(netdev->irq);
3677                 be_eq_clean(eqo);
3678         }
3679
3680         be_irq_unregister(adapter);
3681
3682         return 0;
3683 }
3684
3685 static int be_rx_qs_create(struct be_adapter *adapter)
3686 {
3687         struct rss_info *rss = &adapter->rss_info;
3688         u8 rss_key[RSS_HASH_KEY_LEN];
3689         struct be_rx_obj *rxo;
3690         int rc, i, j;
3691
3692         for_all_rx_queues(adapter, rxo, i) {
3693                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3694                                     sizeof(struct be_eth_rx_d));
3695                 if (rc)
3696                         return rc;
3697         }
3698
3699         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3700                 rxo = default_rxo(adapter);
3701                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3702                                        rx_frag_size, adapter->if_handle,
3703                                        false, &rxo->rss_id);
3704                 if (rc)
3705                         return rc;
3706         }
3707
3708         for_all_rss_queues(adapter, rxo, i) {
3709                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3710                                        rx_frag_size, adapter->if_handle,
3711                                        true, &rxo->rss_id);
3712                 if (rc)
3713                         return rc;
3714         }
3715
3716         if (be_multi_rxq(adapter)) {
3717                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3718                         for_all_rss_queues(adapter, rxo, i) {
3719                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3720                                         break;
3721                                 rss->rsstable[j + i] = rxo->rss_id;
3722                                 rss->rss_queue[j + i] = i;
3723                         }
3724                 }
3725                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3726                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3727
3728                 if (!BEx_chip(adapter))
3729                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3730                                 RSS_ENABLE_UDP_IPV6;
3731
3732                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3733                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3734                                        RSS_INDIR_TABLE_LEN, rss_key);
3735                 if (rc) {
3736                         rss->rss_flags = RSS_ENABLE_NONE;
3737                         return rc;
3738                 }
3739
3740                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3741         } else {
3742                 /* Disable RSS, if only default RX Q is created */
3743                 rss->rss_flags = RSS_ENABLE_NONE;
3744         }
3745
3746
3747         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3748          * which is a queue empty condition
3749          */
3750         for_all_rx_queues(adapter, rxo, i)
3751                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3752
3753         return 0;
3754 }
3755
3756 static int be_enable_if_filters(struct be_adapter *adapter)
3757 {
3758         int status;
3759
3760         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3761         if (status)
3762                 return status;
3763
3764         /* For BE3 VFs, the PF programs the initial MAC address */
3765         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3766                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3767                 if (status)
3768                         return status;
3769                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3770         }
3771
3772         if (adapter->vlans_added)
3773                 be_vid_config(adapter);
3774
3775         __be_set_rx_mode(adapter);
3776
3777         return 0;
3778 }
3779
3780 static int be_open(struct net_device *netdev)
3781 {
3782         struct be_adapter *adapter = netdev_priv(netdev);
3783         struct be_eq_obj *eqo;
3784         struct be_rx_obj *rxo;
3785         struct be_tx_obj *txo;
3786         u8 link_status;
3787         int status, i;
3788
3789         status = be_rx_qs_create(adapter);
3790         if (status)
3791                 goto err;
3792
3793         status = be_enable_if_filters(adapter);
3794         if (status)
3795                 goto err;
3796
3797         status = be_irq_register(adapter);
3798         if (status)
3799                 goto err;
3800
3801         for_all_rx_queues(adapter, rxo, i)
3802                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3803
3804         for_all_tx_queues(adapter, txo, i)
3805                 be_cq_notify(adapter, txo->cq.id, true, 0);
3806
3807         be_async_mcc_enable(adapter);
3808
3809         for_all_evt_queues(adapter, eqo, i) {
3810                 napi_enable(&eqo->napi);
3811                 be_enable_busy_poll(eqo);
3812                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3813         }
3814         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3815
3816         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3817         if (!status)
3818                 be_link_status_update(adapter, link_status);
3819
3820         netif_tx_start_all_queues(netdev);
3821         if (skyhawk_chip(adapter))
3822                 udp_tunnel_get_rx_info(netdev);
3823
3824         return 0;
3825 err:
3826         be_close(adapter->netdev);
3827         return -EIO;
3828 }
3829
3830 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3831 {
3832         u32 addr;
3833
3834         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3835
3836         mac[5] = (u8)(addr & 0xFF);
3837         mac[4] = (u8)((addr >> 8) & 0xFF);
3838         mac[3] = (u8)((addr >> 16) & 0xFF);
3839         /* Use the OUI from the current MAC address */
3840         memcpy(mac, adapter->netdev->dev_addr, 3);
3841 }
3842
3843 /*
3844  * Generate a seed MAC address from the PF MAC Address using jhash.
3845  * MAC Address for VFs are assigned incrementally starting from the seed.
3846  * These addresses are programmed in the ASIC by the PF and the VF driver
3847  * queries for the MAC address during its probe.
3848  */
3849 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3850 {
3851         u32 vf;
3852         int status = 0;
3853         u8 mac[ETH_ALEN];
3854         struct be_vf_cfg *vf_cfg;
3855
3856         be_vf_eth_addr_generate(adapter, mac);
3857
3858         for_all_vfs(adapter, vf_cfg, vf) {
3859                 if (BEx_chip(adapter))
3860                         status = be_cmd_pmac_add(adapter, mac,
3861                                                  vf_cfg->if_handle,
3862                                                  &vf_cfg->pmac_id, vf + 1);
3863                 else
3864                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3865                                                 vf + 1);
3866
3867                 if (status)
3868                         dev_err(&adapter->pdev->dev,
3869                                 "Mac address assignment failed for VF %d\n",
3870                                 vf);
3871                 else
3872                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3873
3874                 mac[5] += 1;
3875         }
3876         return status;
3877 }
3878
3879 static int be_vfs_mac_query(struct be_adapter *adapter)
3880 {
3881         int status, vf;
3882         u8 mac[ETH_ALEN];
3883         struct be_vf_cfg *vf_cfg;
3884
3885         for_all_vfs(adapter, vf_cfg, vf) {
3886                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3887                                                mac, vf_cfg->if_handle,
3888                                                false, vf+1);
3889                 if (status)
3890                         return status;
3891                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3892         }
3893         return 0;
3894 }
3895
3896 static void be_vf_clear(struct be_adapter *adapter)
3897 {
3898         struct be_vf_cfg *vf_cfg;
3899         u32 vf;
3900
3901         if (pci_vfs_assigned(adapter->pdev)) {
3902                 dev_warn(&adapter->pdev->dev,
3903                          "VFs are assigned to VMs: not disabling VFs\n");
3904                 goto done;
3905         }
3906
3907         pci_disable_sriov(adapter->pdev);
3908
3909         for_all_vfs(adapter, vf_cfg, vf) {
3910                 if (BEx_chip(adapter))
3911                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3912                                         vf_cfg->pmac_id, vf + 1);
3913                 else
3914                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3915                                        vf + 1);
3916
3917                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3918         }
3919
3920         if (BE3_chip(adapter))
3921                 be_cmd_set_hsw_config(adapter, 0, 0,
3922                                       adapter->if_handle,
3923                                       PORT_FWD_TYPE_PASSTHRU, 0);
3924 done:
3925         kfree(adapter->vf_cfg);
3926         adapter->num_vfs = 0;
3927         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3928 }
3929
3930 static void be_clear_queues(struct be_adapter *adapter)
3931 {
3932         be_mcc_queues_destroy(adapter);
3933         be_rx_cqs_destroy(adapter);
3934         be_tx_queues_destroy(adapter);
3935         be_evt_queues_destroy(adapter);
3936 }
3937
3938 static void be_cancel_worker(struct be_adapter *adapter)
3939 {
3940         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3941                 cancel_delayed_work_sync(&adapter->work);
3942                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3943         }
3944 }
3945
3946 static void be_cancel_err_detection(struct be_adapter *adapter)
3947 {
3948         struct be_error_recovery *err_rec = &adapter->error_recovery;
3949
3950         if (!be_err_recovery_workq)
3951                 return;
3952
3953         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3954                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3955                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3956         }
3957 }
3958
3959 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3960 {
3961         struct net_device *netdev = adapter->netdev;
3962
3963         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3964                 be_cmd_manage_iface(adapter, adapter->if_handle,
3965                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3966
3967         if (adapter->vxlan_port)
3968                 be_cmd_set_vxlan_port(adapter, 0);
3969
3970         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3971         adapter->vxlan_port = 0;
3972
3973         netdev->hw_enc_features = 0;
3974         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3975         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3976 }
3977
3978 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3979                                 struct be_resources *vft_res)
3980 {
3981         struct be_resources res = adapter->pool_res;
3982         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3983         struct be_resources res_mod = {0};
3984         u16 num_vf_qs = 1;
3985
3986         /* Distribute the queue resources among the PF and it's VFs */
3987         if (num_vfs) {
3988                 /* Divide the rx queues evenly among the VFs and the PF, capped
3989                  * at VF-EQ-count. Any remainder queues belong to the PF.
3990                  */
3991                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3992                                 res.max_rss_qs / (num_vfs + 1));
3993
3994                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3995                  * RSS Tables per port. Provide RSS on VFs, only if number of
3996                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3997                  */
3998                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3999                         num_vf_qs = 1;
4000         }
4001
4002         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4003          * which are modifiable using SET_PROFILE_CONFIG cmd.
4004          */
4005         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4006                                   RESOURCE_MODIFIABLE, 0);
4007
4008         /* If RSS IFACE capability flags are modifiable for a VF, set the
4009          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4010          * more than 1 RSSQ is available for a VF.
4011          * Otherwise, provision only 1 queue pair for VF.
4012          */
4013         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4014                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4015                 if (num_vf_qs > 1) {
4016                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4017                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4018                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4019                 } else {
4020                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4021                                              BE_IF_FLAGS_DEFQ_RSS);
4022                 }
4023         } else {
4024                 num_vf_qs = 1;
4025         }
4026
4027         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4028                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4029                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4030         }
4031
4032         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4033         vft_res->max_rx_qs = num_vf_qs;
4034         vft_res->max_rss_qs = num_vf_qs;
4035         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4036         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4037
4038         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4039          * among the PF and it's VFs, if the fields are changeable
4040          */
4041         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4042                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4043
4044         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4045                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4046
4047         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4048                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4049
4050         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4051                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4052 }
4053
4054 static void be_if_destroy(struct be_adapter *adapter)
4055 {
4056         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4057
4058         kfree(adapter->pmac_id);
4059         adapter->pmac_id = NULL;
4060
4061         kfree(adapter->mc_list);
4062         adapter->mc_list = NULL;
4063
4064         kfree(adapter->uc_list);
4065         adapter->uc_list = NULL;
4066 }
4067
4068 static int be_clear(struct be_adapter *adapter)
4069 {
4070         struct pci_dev *pdev = adapter->pdev;
4071         struct  be_resources vft_res = {0};
4072
4073         be_cancel_worker(adapter);
4074
4075         flush_workqueue(be_wq);
4076
4077         if (sriov_enabled(adapter))
4078                 be_vf_clear(adapter);
4079
4080         /* Re-configure FW to distribute resources evenly across max-supported
4081          * number of VFs, only when VFs are not already enabled.
4082          */
4083         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4084             !pci_vfs_assigned(pdev)) {
4085                 be_calculate_vf_res(adapter,
4086                                     pci_sriov_get_totalvfs(pdev),
4087                                     &vft_res);
4088                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4089                                         pci_sriov_get_totalvfs(pdev),
4090                                         &vft_res);
4091         }
4092
4093         be_disable_vxlan_offloads(adapter);
4094
4095         be_if_destroy(adapter);
4096
4097         be_clear_queues(adapter);
4098
4099         be_msix_disable(adapter);
4100         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4101         return 0;
4102 }
4103
4104 static int be_vfs_if_create(struct be_adapter *adapter)
4105 {
4106         struct be_resources res = {0};
4107         u32 cap_flags, en_flags, vf;
4108         struct be_vf_cfg *vf_cfg;
4109         int status;
4110
4111         /* If a FW profile exists, then cap_flags are updated */
4112         cap_flags = BE_VF_IF_EN_FLAGS;
4113
4114         for_all_vfs(adapter, vf_cfg, vf) {
4115                 if (!BE3_chip(adapter)) {
4116                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4117                                                            ACTIVE_PROFILE_TYPE,
4118                                                            RESOURCE_LIMITS,
4119                                                            vf + 1);
4120                         if (!status) {
4121                                 cap_flags = res.if_cap_flags;
4122                                 /* Prevent VFs from enabling VLAN promiscuous
4123                                  * mode
4124                                  */
4125                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4126                         }
4127                 }
4128
4129                 /* PF should enable IF flags during proxy if_create call */
4130                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4131                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4132                                           &vf_cfg->if_handle, vf + 1);
4133                 if (status)
4134                         return status;
4135         }
4136
4137         return 0;
4138 }
4139
4140 static int be_vf_setup_init(struct be_adapter *adapter)
4141 {
4142         struct be_vf_cfg *vf_cfg;
4143         int vf;
4144
4145         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4146                                   GFP_KERNEL);
4147         if (!adapter->vf_cfg)
4148                 return -ENOMEM;
4149
4150         for_all_vfs(adapter, vf_cfg, vf) {
4151                 vf_cfg->if_handle = -1;
4152                 vf_cfg->pmac_id = -1;
4153         }
4154         return 0;
4155 }
4156
4157 static int be_vf_setup(struct be_adapter *adapter)
4158 {
4159         struct device *dev = &adapter->pdev->dev;
4160         struct be_vf_cfg *vf_cfg;
4161         int status, old_vfs, vf;
4162         bool spoofchk;
4163
4164         old_vfs = pci_num_vf(adapter->pdev);
4165
4166         status = be_vf_setup_init(adapter);
4167         if (status)
4168                 goto err;
4169
4170         if (old_vfs) {
4171                 for_all_vfs(adapter, vf_cfg, vf) {
4172                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4173                         if (status)
4174                                 goto err;
4175                 }
4176
4177                 status = be_vfs_mac_query(adapter);
4178                 if (status)
4179                         goto err;
4180         } else {
4181                 status = be_vfs_if_create(adapter);
4182                 if (status)
4183                         goto err;
4184
4185                 status = be_vf_eth_addr_config(adapter);
4186                 if (status)
4187                         goto err;
4188         }
4189
4190         for_all_vfs(adapter, vf_cfg, vf) {
4191                 /* Allow VFs to programs MAC/VLAN filters */
4192                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4193                                                   vf + 1);
4194                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4195                         status = be_cmd_set_fn_privileges(adapter,
4196                                                           vf_cfg->privileges |
4197                                                           BE_PRIV_FILTMGMT,
4198                                                           vf + 1);
4199                         if (!status) {
4200                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4201                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4202                                          vf);
4203                         }
4204                 }
4205
4206                 /* Allow full available bandwidth */
4207                 if (!old_vfs)
4208                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4209
4210                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4211                                                vf_cfg->if_handle, NULL,
4212                                                &spoofchk);
4213                 if (!status)
4214                         vf_cfg->spoofchk = spoofchk;
4215
4216                 if (!old_vfs) {
4217                         be_cmd_enable_vf(adapter, vf + 1);
4218                         be_cmd_set_logical_link_config(adapter,
4219                                                        IFLA_VF_LINK_STATE_AUTO,
4220                                                        vf+1);
4221                 }
4222         }
4223
4224         if (!old_vfs) {
4225                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4226                 if (status) {
4227                         dev_err(dev, "SRIOV enable failed\n");
4228                         adapter->num_vfs = 0;
4229                         goto err;
4230                 }
4231         }
4232
4233         if (BE3_chip(adapter)) {
4234                 /* On BE3, enable VEB only when SRIOV is enabled */
4235                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4236                                                adapter->if_handle,
4237                                                PORT_FWD_TYPE_VEB, 0);
4238                 if (status)
4239                         goto err;
4240         }
4241
4242         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4243         return 0;
4244 err:
4245         dev_err(dev, "VF setup failed\n");
4246         be_vf_clear(adapter);
4247         return status;
4248 }
4249
4250 /* Converting function_mode bits on BE3 to SH mc_type enums */
4251
4252 static u8 be_convert_mc_type(u32 function_mode)
4253 {
4254         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4255                 return vNIC1;
4256         else if (function_mode & QNQ_MODE)
4257                 return FLEX10;
4258         else if (function_mode & VNIC_MODE)
4259                 return vNIC2;
4260         else if (function_mode & UMC_ENABLED)
4261                 return UMC;
4262         else
4263                 return MC_NONE;
4264 }
4265
4266 /* On BE2/BE3 FW does not suggest the supported limits */
4267 static void BEx_get_resources(struct be_adapter *adapter,
4268                               struct be_resources *res)
4269 {
4270         bool use_sriov = adapter->num_vfs ? 1 : 0;
4271
4272         if (be_physfn(adapter))
4273                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4274         else
4275                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4276
4277         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4278
4279         if (be_is_mc(adapter)) {
4280                 /* Assuming that there are 4 channels per port,
4281                  * when multi-channel is enabled
4282                  */
4283                 if (be_is_qnq_mode(adapter))
4284                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4285                 else
4286                         /* In a non-qnq multichannel mode, the pvid
4287                          * takes up one vlan entry
4288                          */
4289                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4290         } else {
4291                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4292         }
4293
4294         res->max_mcast_mac = BE_MAX_MC;
4295
4296         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4297          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4298          *    *only* if it is RSS-capable.
4299          */
4300         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4301             be_virtfn(adapter) ||
4302             (be_is_mc(adapter) &&
4303              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4304                 res->max_tx_qs = 1;
4305         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4306                 struct be_resources super_nic_res = {0};
4307
4308                 /* On a SuperNIC profile, the driver needs to use the
4309                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4310                  */
4311                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4312                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4313                                           0);
4314                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4315                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4316         } else {
4317                 res->max_tx_qs = BE3_MAX_TX_QS;
4318         }
4319
4320         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4321             !use_sriov && be_physfn(adapter))
4322                 res->max_rss_qs = (adapter->be3_native) ?
4323                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4324         res->max_rx_qs = res->max_rss_qs + 1;
4325
4326         if (be_physfn(adapter))
4327                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4328                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4329         else
4330                 res->max_evt_qs = 1;
4331
4332         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4333         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4334         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4335                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4336 }
4337
4338 static void be_setup_init(struct be_adapter *adapter)
4339 {
4340         adapter->vlan_prio_bmap = 0xff;
4341         adapter->phy.link_speed = -1;
4342         adapter->if_handle = -1;
4343         adapter->be3_native = false;
4344         adapter->if_flags = 0;
4345         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4346         if (be_physfn(adapter))
4347                 adapter->cmd_privileges = MAX_PRIVILEGES;
4348         else
4349                 adapter->cmd_privileges = MIN_PRIVILEGES;
4350 }
4351
4352 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4353  * However, this HW limitation is not exposed to the host via any SLI cmd.
4354  * As a result, in the case of SRIOV and in particular multi-partition configs
4355  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4356  * for distribution between the VFs. This self-imposed limit will determine the
4357  * no: of VFs for which RSS can be enabled.
4358  */
4359 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4360 {
4361         struct be_port_resources port_res = {0};
4362         u8 rss_tables_on_port;
4363         u16 max_vfs = be_max_vfs(adapter);
4364
4365         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4366                                   RESOURCE_LIMITS, 0);
4367
4368         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4369
4370         /* Each PF Pool's RSS Tables limit =
4371          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4372          */
4373         adapter->pool_res.max_rss_tables =
4374                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4375 }
4376
4377 static int be_get_sriov_config(struct be_adapter *adapter)
4378 {
4379         struct be_resources res = {0};
4380         int max_vfs, old_vfs;
4381
4382         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4383                                   RESOURCE_LIMITS, 0);
4384
4385         /* Some old versions of BE3 FW don't report max_vfs value */
4386         if (BE3_chip(adapter) && !res.max_vfs) {
4387                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4388                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4389         }
4390
4391         adapter->pool_res = res;
4392
4393         /* If during previous unload of the driver, the VFs were not disabled,
4394          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4395          * Instead use the TotalVFs value stored in the pci-dev struct.
4396          */
4397         old_vfs = pci_num_vf(adapter->pdev);
4398         if (old_vfs) {
4399                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4400                          old_vfs);
4401
4402                 adapter->pool_res.max_vfs =
4403                         pci_sriov_get_totalvfs(adapter->pdev);
4404                 adapter->num_vfs = old_vfs;
4405         }
4406
4407         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4408                 be_calculate_pf_pool_rss_tables(adapter);
4409                 dev_info(&adapter->pdev->dev,
4410                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4411                          be_max_pf_pool_rss_tables(adapter));
4412         }
4413         return 0;
4414 }
4415
4416 static void be_alloc_sriov_res(struct be_adapter *adapter)
4417 {
4418         int old_vfs = pci_num_vf(adapter->pdev);
4419         struct  be_resources vft_res = {0};
4420         int status;
4421
4422         be_get_sriov_config(adapter);
4423
4424         if (!old_vfs)
4425                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4426
4427         /* When the HW is in SRIOV capable configuration, the PF-pool
4428          * resources are given to PF during driver load, if there are no
4429          * old VFs. This facility is not available in BE3 FW.
4430          * Also, this is done by FW in Lancer chip.
4431          */
4432         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4433                 be_calculate_vf_res(adapter, 0, &vft_res);
4434                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4435                                                  &vft_res);
4436                 if (status)
4437                         dev_err(&adapter->pdev->dev,
4438                                 "Failed to optimize SRIOV resources\n");
4439         }
4440 }
4441
4442 static int be_get_resources(struct be_adapter *adapter)
4443 {
4444         struct device *dev = &adapter->pdev->dev;
4445         struct be_resources res = {0};
4446         int status;
4447
4448         /* For Lancer, SH etc read per-function resource limits from FW.
4449          * GET_FUNC_CONFIG returns per function guaranteed limits.
4450          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4451          */
4452         if (BEx_chip(adapter)) {
4453                 BEx_get_resources(adapter, &res);
4454         } else {
4455                 status = be_cmd_get_func_config(adapter, &res);
4456                 if (status)
4457                         return status;
4458
4459                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4460                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4461                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4462                         res.max_rss_qs -= 1;
4463         }
4464
4465         /* If RoCE is supported stash away half the EQs for RoCE */
4466         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4467                                 res.max_evt_qs / 2 : res.max_evt_qs;
4468         adapter->res = res;
4469
4470         /* If FW supports RSS default queue, then skip creating non-RSS
4471          * queue for non-IP traffic.
4472          */
4473         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4474                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4475
4476         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4477                  be_max_txqs(adapter), be_max_rxqs(adapter),
4478                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4479                  be_max_vfs(adapter));
4480         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4481                  be_max_uc(adapter), be_max_mc(adapter),
4482                  be_max_vlans(adapter));
4483
4484         /* Ensure RX and TX queues are created in pairs at init time */
4485         adapter->cfg_num_rx_irqs =
4486                                 min_t(u16, netif_get_num_default_rss_queues(),
4487                                       be_max_qp_irqs(adapter));
4488         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4489         return 0;
4490 }
4491
4492 static int be_get_config(struct be_adapter *adapter)
4493 {
4494         int status, level;
4495         u16 profile_id;
4496
4497         status = be_cmd_get_cntl_attributes(adapter);
4498         if (status)
4499                 return status;
4500
4501         status = be_cmd_query_fw_cfg(adapter);
4502         if (status)
4503                 return status;
4504
4505         if (!lancer_chip(adapter) && be_physfn(adapter))
4506                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4507
4508         if (BEx_chip(adapter)) {
4509                 level = be_cmd_get_fw_log_level(adapter);
4510                 adapter->msg_enable =
4511                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4512         }
4513
4514         be_cmd_get_acpi_wol_cap(adapter);
4515         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4516         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4517
4518         be_cmd_query_port_name(adapter);
4519
4520         if (be_physfn(adapter)) {
4521                 status = be_cmd_get_active_profile(adapter, &profile_id);
4522                 if (!status)
4523                         dev_info(&adapter->pdev->dev,
4524                                  "Using profile 0x%x\n", profile_id);
4525         }
4526
4527         return 0;
4528 }
4529
4530 static int be_mac_setup(struct be_adapter *adapter)
4531 {
4532         u8 mac[ETH_ALEN];
4533         int status;
4534
4535         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4536                 status = be_cmd_get_perm_mac(adapter, mac);
4537                 if (status)
4538                         return status;
4539
4540                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4541                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4542         }
4543
4544         return 0;
4545 }
4546
4547 static void be_schedule_worker(struct be_adapter *adapter)
4548 {
4549         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4550         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4551 }
4552
4553 static void be_destroy_err_recovery_workq(void)
4554 {
4555         if (!be_err_recovery_workq)
4556                 return;
4557
4558         flush_workqueue(be_err_recovery_workq);
4559         destroy_workqueue(be_err_recovery_workq);
4560         be_err_recovery_workq = NULL;
4561 }
4562
4563 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4564 {
4565         struct be_error_recovery *err_rec = &adapter->error_recovery;
4566
4567         if (!be_err_recovery_workq)
4568                 return;
4569
4570         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4571                            msecs_to_jiffies(delay));
4572         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4573 }
4574
4575 static int be_setup_queues(struct be_adapter *adapter)
4576 {
4577         struct net_device *netdev = adapter->netdev;
4578         int status;
4579
4580         status = be_evt_queues_create(adapter);
4581         if (status)
4582                 goto err;
4583
4584         status = be_tx_qs_create(adapter);
4585         if (status)
4586                 goto err;
4587
4588         status = be_rx_cqs_create(adapter);
4589         if (status)
4590                 goto err;
4591
4592         status = be_mcc_queues_create(adapter);
4593         if (status)
4594                 goto err;
4595
4596         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4597         if (status)
4598                 goto err;
4599
4600         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4601         if (status)
4602                 goto err;
4603
4604         return 0;
4605 err:
4606         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4607         return status;
4608 }
4609
4610 static int be_if_create(struct be_adapter *adapter)
4611 {
4612         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4613         u32 cap_flags = be_if_cap_flags(adapter);
4614         int status;
4615
4616         /* alloc required memory for other filtering fields */
4617         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4618                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4619         if (!adapter->pmac_id)
4620                 return -ENOMEM;
4621
4622         adapter->mc_list = kcalloc(be_max_mc(adapter),
4623                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4624         if (!adapter->mc_list)
4625                 return -ENOMEM;
4626
4627         adapter->uc_list = kcalloc(be_max_uc(adapter),
4628                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4629         if (!adapter->uc_list)
4630                 return -ENOMEM;
4631
4632         if (adapter->cfg_num_rx_irqs == 1)
4633                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4634
4635         en_flags &= cap_flags;
4636         /* will enable all the needed filter flags in be_open() */
4637         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4638                                   &adapter->if_handle, 0);
4639
4640         if (status)
4641                 return status;
4642
4643         return 0;
4644 }
4645
4646 int be_update_queues(struct be_adapter *adapter)
4647 {
4648         struct net_device *netdev = adapter->netdev;
4649         int status;
4650
4651         if (netif_running(netdev))
4652                 be_close(netdev);
4653
4654         be_cancel_worker(adapter);
4655
4656         /* If any vectors have been shared with RoCE we cannot re-program
4657          * the MSIx table.
4658          */
4659         if (!adapter->num_msix_roce_vec)
4660                 be_msix_disable(adapter);
4661
4662         be_clear_queues(adapter);
4663         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4664         if (status)
4665                 return status;
4666
4667         if (!msix_enabled(adapter)) {
4668                 status = be_msix_enable(adapter);
4669                 if (status)
4670                         return status;
4671         }
4672
4673         status = be_if_create(adapter);
4674         if (status)
4675                 return status;
4676
4677         status = be_setup_queues(adapter);
4678         if (status)
4679                 return status;
4680
4681         be_schedule_worker(adapter);
4682
4683         if (netif_running(netdev))
4684                 status = be_open(netdev);
4685
4686         return status;
4687 }
4688
4689 static inline int fw_major_num(const char *fw_ver)
4690 {
4691         int fw_major = 0, i;
4692
4693         i = sscanf(fw_ver, "%d.", &fw_major);
4694         if (i != 1)
4695                 return 0;
4696
4697         return fw_major;
4698 }
4699
4700 /* If it is error recovery, FLR the PF
4701  * Else if any VFs are already enabled don't FLR the PF
4702  */
4703 static bool be_reset_required(struct be_adapter *adapter)
4704 {
4705         if (be_error_recovering(adapter))
4706                 return true;
4707         else
4708                 return pci_num_vf(adapter->pdev) == 0;
4709 }
4710
4711 /* Wait for the FW to be ready and perform the required initialization */
4712 static int be_func_init(struct be_adapter *adapter)
4713 {
4714         int status;
4715
4716         status = be_fw_wait_ready(adapter);
4717         if (status)
4718                 return status;
4719
4720         /* FW is now ready; clear errors to allow cmds/doorbell */
4721         be_clear_error(adapter, BE_CLEAR_ALL);
4722
4723         if (be_reset_required(adapter)) {
4724                 status = be_cmd_reset_function(adapter);
4725                 if (status)
4726                         return status;
4727
4728                 /* Wait for interrupts to quiesce after an FLR */
4729                 msleep(100);
4730         }
4731
4732         /* Tell FW we're ready to fire cmds */
4733         status = be_cmd_fw_init(adapter);
4734         if (status)
4735                 return status;
4736
4737         /* Allow interrupts for other ULPs running on NIC function */
4738         be_intr_set(adapter, true);
4739
4740         return 0;
4741 }
4742
4743 static int be_setup(struct be_adapter *adapter)
4744 {
4745         struct device *dev = &adapter->pdev->dev;
4746         int status;
4747
4748         status = be_func_init(adapter);
4749         if (status)
4750                 return status;
4751
4752         be_setup_init(adapter);
4753
4754         if (!lancer_chip(adapter))
4755                 be_cmd_req_native_mode(adapter);
4756
4757         /* invoke this cmd first to get pf_num and vf_num which are needed
4758          * for issuing profile related cmds
4759          */
4760         if (!BEx_chip(adapter)) {
4761                 status = be_cmd_get_func_config(adapter, NULL);
4762                 if (status)
4763                         return status;
4764         }
4765
4766         status = be_get_config(adapter);
4767         if (status)
4768                 goto err;
4769
4770         if (!BE2_chip(adapter) && be_physfn(adapter))
4771                 be_alloc_sriov_res(adapter);
4772
4773         status = be_get_resources(adapter);
4774         if (status)
4775                 goto err;
4776
4777         status = be_msix_enable(adapter);
4778         if (status)
4779                 goto err;
4780
4781         /* will enable all the needed filter flags in be_open() */
4782         status = be_if_create(adapter);
4783         if (status)
4784                 goto err;
4785
4786         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4787         rtnl_lock();
4788         status = be_setup_queues(adapter);
4789         rtnl_unlock();
4790         if (status)
4791                 goto err;
4792
4793         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4794
4795         status = be_mac_setup(adapter);
4796         if (status)
4797                 goto err;
4798
4799         be_cmd_get_fw_ver(adapter);
4800         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4801
4802         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4803                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4804                         adapter->fw_ver);
4805                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4806         }
4807
4808         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4809                                          adapter->rx_fc);
4810         if (status)
4811                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4812                                         &adapter->rx_fc);
4813
4814         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4815                  adapter->tx_fc, adapter->rx_fc);
4816
4817         if (be_physfn(adapter))
4818                 be_cmd_set_logical_link_config(adapter,
4819                                                IFLA_VF_LINK_STATE_AUTO, 0);
4820
4821         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4822          * confusing a linux bridge or OVS that it might be connected to.
4823          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4824          * when SRIOV is not enabled.
4825          */
4826         if (BE3_chip(adapter))
4827                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4828                                       PORT_FWD_TYPE_PASSTHRU, 0);
4829
4830         if (adapter->num_vfs)
4831                 be_vf_setup(adapter);
4832
4833         status = be_cmd_get_phy_info(adapter);
4834         if (!status && be_pause_supported(adapter))
4835                 adapter->phy.fc_autoneg = 1;
4836
4837         if (be_physfn(adapter) && !lancer_chip(adapter))
4838                 be_cmd_set_features(adapter);
4839
4840         be_schedule_worker(adapter);
4841         adapter->flags |= BE_FLAGS_SETUP_DONE;
4842         return 0;
4843 err:
4844         be_clear(adapter);
4845         return status;
4846 }
4847
4848 #ifdef CONFIG_NET_POLL_CONTROLLER
4849 static void be_netpoll(struct net_device *netdev)
4850 {
4851         struct be_adapter *adapter = netdev_priv(netdev);
4852         struct be_eq_obj *eqo;
4853         int i;
4854
4855         for_all_evt_queues(adapter, eqo, i) {
4856                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4857                 napi_schedule(&eqo->napi);
4858         }
4859 }
4860 #endif
4861
4862 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4863 {
4864         const struct firmware *fw;
4865         int status;
4866
4867         if (!netif_running(adapter->netdev)) {
4868                 dev_err(&adapter->pdev->dev,
4869                         "Firmware load not allowed (interface is down)\n");
4870                 return -ENETDOWN;
4871         }
4872
4873         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4874         if (status)
4875                 goto fw_exit;
4876
4877         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4878
4879         if (lancer_chip(adapter))
4880                 status = lancer_fw_download(adapter, fw);
4881         else
4882                 status = be_fw_download(adapter, fw);
4883
4884         if (!status)
4885                 be_cmd_get_fw_ver(adapter);
4886
4887 fw_exit:
4888         release_firmware(fw);
4889         return status;
4890 }
4891
4892 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4893                                  u16 flags)
4894 {
4895         struct be_adapter *adapter = netdev_priv(dev);
4896         struct nlattr *attr, *br_spec;
4897         int rem;
4898         int status = 0;
4899         u16 mode = 0;
4900
4901         if (!sriov_enabled(adapter))
4902                 return -EOPNOTSUPP;
4903
4904         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4905         if (!br_spec)
4906                 return -EINVAL;
4907
4908         nla_for_each_nested(attr, br_spec, rem) {
4909                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4910                         continue;
4911
4912                 if (nla_len(attr) < sizeof(mode))
4913                         return -EINVAL;
4914
4915                 mode = nla_get_u16(attr);
4916                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4917                         return -EOPNOTSUPP;
4918
4919                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4920                         return -EINVAL;
4921
4922                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4923                                                adapter->if_handle,
4924                                                mode == BRIDGE_MODE_VEPA ?
4925                                                PORT_FWD_TYPE_VEPA :
4926                                                PORT_FWD_TYPE_VEB, 0);
4927                 if (status)
4928                         goto err;
4929
4930                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4931                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4932
4933                 return status;
4934         }
4935 err:
4936         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4937                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4938
4939         return status;
4940 }
4941
4942 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4943                                  struct net_device *dev, u32 filter_mask,
4944                                  int nlflags)
4945 {
4946         struct be_adapter *adapter = netdev_priv(dev);
4947         int status = 0;
4948         u8 hsw_mode;
4949
4950         /* BE and Lancer chips support VEB mode only */
4951         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4952                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4953                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4954                         return 0;
4955                 hsw_mode = PORT_FWD_TYPE_VEB;
4956         } else {
4957                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4958                                                adapter->if_handle, &hsw_mode,
4959                                                NULL);
4960                 if (status)
4961                         return 0;
4962
4963                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4964                         return 0;
4965         }
4966
4967         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4968                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4969                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4970                                        0, 0, nlflags, filter_mask, NULL);
4971 }
4972
4973 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4974                                          void (*func)(struct work_struct *))
4975 {
4976         struct be_cmd_work *work;
4977
4978         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4979         if (!work) {
4980                 dev_err(&adapter->pdev->dev,
4981                         "be_work memory allocation failed\n");
4982                 return NULL;
4983         }
4984
4985         INIT_WORK(&work->work, func);
4986         work->adapter = adapter;
4987         return work;
4988 }
4989
4990 /* VxLAN offload Notes:
4991  *
4992  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4993  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4994  * is expected to work across all types of IP tunnels once exported. Skyhawk
4995  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4996  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4997  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4998  * those other tunnels are unexported on the fly through ndo_features_check().
4999  *
5000  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5001  * adds more than one port, disable offloads and don't re-enable them again
5002  * until after all the tunnels are removed.
5003  */
5004 static void be_work_add_vxlan_port(struct work_struct *work)
5005 {
5006         struct be_cmd_work *cmd_work =
5007                                 container_of(work, struct be_cmd_work, work);
5008         struct be_adapter *adapter = cmd_work->adapter;
5009         struct net_device *netdev = adapter->netdev;
5010         struct device *dev = &adapter->pdev->dev;
5011         __be16 port = cmd_work->info.vxlan_port;
5012         int status;
5013
5014         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5015                 adapter->vxlan_port_aliases++;
5016                 goto done;
5017         }
5018
5019         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5020                 dev_info(dev,
5021                          "Only one UDP port supported for VxLAN offloads\n");
5022                 dev_info(dev, "Disabling VxLAN offloads\n");
5023                 adapter->vxlan_port_count++;
5024                 goto err;
5025         }
5026
5027         if (adapter->vxlan_port_count++ >= 1)
5028                 goto done;
5029
5030         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5031                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5032         if (status) {
5033                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5034                 goto err;
5035         }
5036
5037         status = be_cmd_set_vxlan_port(adapter, port);
5038         if (status) {
5039                 dev_warn(dev, "Failed to add VxLAN port\n");
5040                 goto err;
5041         }
5042         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5043         adapter->vxlan_port = port;
5044
5045         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5046                                    NETIF_F_TSO | NETIF_F_TSO6 |
5047                                    NETIF_F_GSO_UDP_TUNNEL;
5048         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5049         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5050
5051         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5052                  be16_to_cpu(port));
5053         goto done;
5054 err:
5055         be_disable_vxlan_offloads(adapter);
5056 done:
5057         kfree(cmd_work);
5058 }
5059
5060 static void be_work_del_vxlan_port(struct work_struct *work)
5061 {
5062         struct be_cmd_work *cmd_work =
5063                                 container_of(work, struct be_cmd_work, work);
5064         struct be_adapter *adapter = cmd_work->adapter;
5065         __be16 port = cmd_work->info.vxlan_port;
5066
5067         if (adapter->vxlan_port != port)
5068                 goto done;
5069
5070         if (adapter->vxlan_port_aliases) {
5071                 adapter->vxlan_port_aliases--;
5072                 goto out;
5073         }
5074
5075         be_disable_vxlan_offloads(adapter);
5076
5077         dev_info(&adapter->pdev->dev,
5078                  "Disabled VxLAN offloads for UDP port %d\n",
5079                  be16_to_cpu(port));
5080 done:
5081         adapter->vxlan_port_count--;
5082 out:
5083         kfree(cmd_work);
5084 }
5085
5086 static void be_cfg_vxlan_port(struct net_device *netdev,
5087                               struct udp_tunnel_info *ti,
5088                               void (*func)(struct work_struct *))
5089 {
5090         struct be_adapter *adapter = netdev_priv(netdev);
5091         struct be_cmd_work *cmd_work;
5092
5093         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5094                 return;
5095
5096         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5097                 return;
5098
5099         cmd_work = be_alloc_work(adapter, func);
5100         if (cmd_work) {
5101                 cmd_work->info.vxlan_port = ti->port;
5102                 queue_work(be_wq, &cmd_work->work);
5103         }
5104 }
5105
5106 static void be_del_vxlan_port(struct net_device *netdev,
5107                               struct udp_tunnel_info *ti)
5108 {
5109         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5110 }
5111
5112 static void be_add_vxlan_port(struct net_device *netdev,
5113                               struct udp_tunnel_info *ti)
5114 {
5115         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5116 }
5117
5118 static netdev_features_t be_features_check(struct sk_buff *skb,
5119                                            struct net_device *dev,
5120                                            netdev_features_t features)
5121 {
5122         struct be_adapter *adapter = netdev_priv(dev);
5123         u8 l4_hdr = 0;
5124
5125         /* The code below restricts offload features for some tunneled packets.
5126          * Offload features for normal (non tunnel) packets are unchanged.
5127          */
5128         if (!skb->encapsulation ||
5129             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5130                 return features;
5131
5132         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5133          * should disable tunnel offload features if it's not a VxLAN packet,
5134          * as tunnel offloads have been enabled only for VxLAN. This is done to
5135          * allow other tunneled traffic like GRE work fine while VxLAN
5136          * offloads are configured in Skyhawk-R.
5137          */
5138         switch (vlan_get_protocol(skb)) {
5139         case htons(ETH_P_IP):
5140                 l4_hdr = ip_hdr(skb)->protocol;
5141                 break;
5142         case htons(ETH_P_IPV6):
5143                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5144                 break;
5145         default:
5146                 return features;
5147         }
5148
5149         if (l4_hdr != IPPROTO_UDP ||
5150             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5151             skb->inner_protocol != htons(ETH_P_TEB) ||
5152             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5153                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5154             !adapter->vxlan_port ||
5155             udp_hdr(skb)->dest != adapter->vxlan_port)
5156                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5157
5158         return features;
5159 }
5160
5161 static int be_get_phys_port_id(struct net_device *dev,
5162                                struct netdev_phys_item_id *ppid)
5163 {
5164         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5165         struct be_adapter *adapter = netdev_priv(dev);
5166         u8 *id;
5167
5168         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5169                 return -ENOSPC;
5170
5171         ppid->id[0] = adapter->hba_port_num + 1;
5172         id = &ppid->id[1];
5173         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5174              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5175                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5176
5177         ppid->id_len = id_len;
5178
5179         return 0;
5180 }
5181
5182 static void be_set_rx_mode(struct net_device *dev)
5183 {
5184         struct be_adapter *adapter = netdev_priv(dev);
5185         struct be_cmd_work *work;
5186
5187         work = be_alloc_work(adapter, be_work_set_rx_mode);
5188         if (work)
5189                 queue_work(be_wq, &work->work);
5190 }
5191
5192 static const struct net_device_ops be_netdev_ops = {
5193         .ndo_open               = be_open,
5194         .ndo_stop               = be_close,
5195         .ndo_start_xmit         = be_xmit,
5196         .ndo_set_rx_mode        = be_set_rx_mode,
5197         .ndo_set_mac_address    = be_mac_addr_set,
5198         .ndo_get_stats64        = be_get_stats64,
5199         .ndo_validate_addr      = eth_validate_addr,
5200         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5201         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5202         .ndo_set_vf_mac         = be_set_vf_mac,
5203         .ndo_set_vf_vlan        = be_set_vf_vlan,
5204         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5205         .ndo_get_vf_config      = be_get_vf_config,
5206         .ndo_set_vf_link_state  = be_set_vf_link_state,
5207         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5208 #ifdef CONFIG_NET_POLL_CONTROLLER
5209         .ndo_poll_controller    = be_netpoll,
5210 #endif
5211         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5212         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5213 #ifdef CONFIG_NET_RX_BUSY_POLL
5214         .ndo_busy_poll          = be_busy_poll,
5215 #endif
5216         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5217         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5218         .ndo_features_check     = be_features_check,
5219         .ndo_get_phys_port_id   = be_get_phys_port_id,
5220 };
5221
5222 static void be_netdev_init(struct net_device *netdev)
5223 {
5224         struct be_adapter *adapter = netdev_priv(netdev);
5225
5226         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5227                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5228                 NETIF_F_HW_VLAN_CTAG_TX;
5229         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5230                 netdev->hw_features |= NETIF_F_RXHASH;
5231
5232         netdev->features |= netdev->hw_features |
5233                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5234
5235         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5236                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5237
5238         netdev->priv_flags |= IFF_UNICAST_FLT;
5239
5240         netdev->flags |= IFF_MULTICAST;
5241
5242         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5243
5244         netdev->netdev_ops = &be_netdev_ops;
5245
5246         netdev->ethtool_ops = &be_ethtool_ops;
5247
5248         /* MTU range: 256 - 9000 */
5249         netdev->min_mtu = BE_MIN_MTU;
5250         netdev->max_mtu = BE_MAX_MTU;
5251 }
5252
5253 static void be_cleanup(struct be_adapter *adapter)
5254 {
5255         struct net_device *netdev = adapter->netdev;
5256
5257         rtnl_lock();
5258         netif_device_detach(netdev);
5259         if (netif_running(netdev))
5260                 be_close(netdev);
5261         rtnl_unlock();
5262
5263         be_clear(adapter);
5264 }
5265
5266 static int be_resume(struct be_adapter *adapter)
5267 {
5268         struct net_device *netdev = adapter->netdev;
5269         int status;
5270
5271         status = be_setup(adapter);
5272         if (status)
5273                 return status;
5274
5275         rtnl_lock();
5276         if (netif_running(netdev))
5277                 status = be_open(netdev);
5278         rtnl_unlock();
5279
5280         if (status)
5281                 return status;
5282
5283         netif_device_attach(netdev);
5284
5285         return 0;
5286 }
5287
5288 static void be_soft_reset(struct be_adapter *adapter)
5289 {
5290         u32 val;
5291
5292         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5293         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5294         val |= SLIPORT_SOFTRESET_SR_MASK;
5295         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5296 }
5297
5298 static bool be_err_is_recoverable(struct be_adapter *adapter)
5299 {
5300         struct be_error_recovery *err_rec = &adapter->error_recovery;
5301         unsigned long initial_idle_time =
5302                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5303         unsigned long recovery_interval =
5304                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5305         u16 ue_err_code;
5306         u32 val;
5307
5308         val = be_POST_stage_get(adapter);
5309         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5310                 return false;
5311         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5312         if (ue_err_code == 0)
5313                 return false;
5314
5315         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5316                 ue_err_code);
5317
5318         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5319                 dev_err(&adapter->pdev->dev,
5320                         "Cannot recover within %lu sec from driver load\n",
5321                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5322                 return false;
5323         }
5324
5325         if (err_rec->last_recovery_time &&
5326             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5327                 dev_err(&adapter->pdev->dev,
5328                         "Cannot recover within %lu sec from last recovery\n",
5329                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5330                 return false;
5331         }
5332
5333         if (ue_err_code == err_rec->last_err_code) {
5334                 dev_err(&adapter->pdev->dev,
5335                         "Cannot recover from a consecutive TPE error\n");
5336                 return false;
5337         }
5338
5339         err_rec->last_recovery_time = jiffies;
5340         err_rec->last_err_code = ue_err_code;
5341         return true;
5342 }
5343
5344 static int be_tpe_recover(struct be_adapter *adapter)
5345 {
5346         struct be_error_recovery *err_rec = &adapter->error_recovery;
5347         int status = -EAGAIN;
5348         u32 val;
5349
5350         switch (err_rec->recovery_state) {
5351         case ERR_RECOVERY_ST_NONE:
5352                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5353                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5354                 break;
5355
5356         case ERR_RECOVERY_ST_DETECT:
5357                 val = be_POST_stage_get(adapter);
5358                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5359                     POST_STAGE_RECOVERABLE_ERR) {
5360                         dev_err(&adapter->pdev->dev,
5361                                 "Unrecoverable HW error detected: 0x%x\n", val);
5362                         status = -EINVAL;
5363                         err_rec->resched_delay = 0;
5364                         break;
5365                 }
5366
5367                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5368
5369                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5370                  * milliseconds before it checks for final error status in
5371                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5372                  * If it does, then PF0 initiates a Soft Reset.
5373                  */
5374                 if (adapter->pf_num == 0) {
5375                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5376                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5377                                         ERR_RECOVERY_UE_DETECT_DURATION;
5378                         break;
5379                 }
5380
5381                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5382                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5383                                         ERR_RECOVERY_UE_DETECT_DURATION;
5384                 break;
5385
5386         case ERR_RECOVERY_ST_RESET:
5387                 if (!be_err_is_recoverable(adapter)) {
5388                         dev_err(&adapter->pdev->dev,
5389                                 "Failed to meet recovery criteria\n");
5390                         status = -EIO;
5391                         err_rec->resched_delay = 0;
5392                         break;
5393                 }
5394                 be_soft_reset(adapter);
5395                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5396                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5397                                         err_rec->ue_to_reset_time;
5398                 break;
5399
5400         case ERR_RECOVERY_ST_PRE_POLL:
5401                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5402                 err_rec->resched_delay = 0;
5403                 status = 0;                     /* done */
5404                 break;
5405
5406         default:
5407                 status = -EINVAL;
5408                 err_rec->resched_delay = 0;
5409                 break;
5410         }
5411
5412         return status;
5413 }
5414
5415 static int be_err_recover(struct be_adapter *adapter)
5416 {
5417         int status;
5418
5419         if (!lancer_chip(adapter)) {
5420                 if (!adapter->error_recovery.recovery_supported ||
5421                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5422                         return -EIO;
5423                 status = be_tpe_recover(adapter);
5424                 if (status)
5425                         goto err;
5426         }
5427
5428         /* Wait for adapter to reach quiescent state before
5429          * destroying queues
5430          */
5431         status = be_fw_wait_ready(adapter);
5432         if (status)
5433                 goto err;
5434
5435         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5436
5437         be_cleanup(adapter);
5438
5439         status = be_resume(adapter);
5440         if (status)
5441                 goto err;
5442
5443         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5444
5445 err:
5446         return status;
5447 }
5448
5449 static void be_err_detection_task(struct work_struct *work)
5450 {
5451         struct be_error_recovery *err_rec =
5452                         container_of(work, struct be_error_recovery,
5453                                      err_detection_work.work);
5454         struct be_adapter *adapter =
5455                         container_of(err_rec, struct be_adapter,
5456                                      error_recovery);
5457         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5458         struct device *dev = &adapter->pdev->dev;
5459         int recovery_status;
5460
5461         be_detect_error(adapter);
5462         if (!be_check_error(adapter, BE_ERROR_HW))
5463                 goto reschedule_task;
5464
5465         recovery_status = be_err_recover(adapter);
5466         if (!recovery_status) {
5467                 err_rec->recovery_retries = 0;
5468                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5469                 dev_info(dev, "Adapter recovery successful\n");
5470                 goto reschedule_task;
5471         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5472                 /* BEx/SH recovery state machine */
5473                 if (adapter->pf_num == 0 &&
5474                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5475                         dev_err(&adapter->pdev->dev,
5476                                 "Adapter recovery in progress\n");
5477                 resched_delay = err_rec->resched_delay;
5478                 goto reschedule_task;
5479         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5480                 /* For VFs, check if PF have allocated resources
5481                  * every second.
5482                  */
5483                 dev_err(dev, "Re-trying adapter recovery\n");
5484                 goto reschedule_task;
5485         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5486                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5487                 /* In case of another error during recovery, it takes 30 sec
5488                  * for adapter to come out of error. Retry error recovery after
5489                  * this time interval.
5490                  */
5491                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5492                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5493                 goto reschedule_task;
5494         } else {
5495                 dev_err(dev, "Adapter recovery failed\n");
5496                 dev_err(dev, "Please reboot server to recover\n");
5497         }
5498
5499         return;
5500
5501 reschedule_task:
5502         be_schedule_err_detection(adapter, resched_delay);
5503 }
5504
5505 static void be_log_sfp_info(struct be_adapter *adapter)
5506 {
5507         int status;
5508
5509         status = be_cmd_query_sfp_info(adapter);
5510         if (!status) {
5511                 dev_err(&adapter->pdev->dev,
5512                         "Port %c: %s Vendor: %s part no: %s",
5513                         adapter->port_name,
5514                         be_misconfig_evt_port_state[adapter->phy_state],
5515                         adapter->phy.vendor_name,
5516                         adapter->phy.vendor_pn);
5517         }
5518         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5519 }
5520
5521 static void be_worker(struct work_struct *work)
5522 {
5523         struct be_adapter *adapter =
5524                 container_of(work, struct be_adapter, work.work);
5525         struct be_rx_obj *rxo;
5526         int i;
5527
5528         if (be_physfn(adapter) &&
5529             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5530                 be_cmd_get_die_temperature(adapter);
5531
5532         /* when interrupts are not yet enabled, just reap any pending
5533          * mcc completions
5534          */
5535         if (!netif_running(adapter->netdev)) {
5536                 local_bh_disable();
5537                 be_process_mcc(adapter);
5538                 local_bh_enable();
5539                 goto reschedule;
5540         }
5541
5542         if (!adapter->stats_cmd_sent) {
5543                 if (lancer_chip(adapter))
5544                         lancer_cmd_get_pport_stats(adapter,
5545                                                    &adapter->stats_cmd);
5546                 else
5547                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5548         }
5549
5550         for_all_rx_queues(adapter, rxo, i) {
5551                 /* Replenish RX-queues starved due to memory
5552                  * allocation failures.
5553                  */
5554                 if (rxo->rx_post_starved)
5555                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5556         }
5557
5558         /* EQ-delay update for Skyhawk is done while notifying EQ */
5559         if (!skyhawk_chip(adapter))
5560                 be_eqd_update(adapter, false);
5561
5562         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5563                 be_log_sfp_info(adapter);
5564
5565 reschedule:
5566         adapter->work_counter++;
5567         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5568 }
5569
5570 static void be_unmap_pci_bars(struct be_adapter *adapter)
5571 {
5572         if (adapter->csr)
5573                 pci_iounmap(adapter->pdev, adapter->csr);
5574         if (adapter->db)
5575                 pci_iounmap(adapter->pdev, adapter->db);
5576         if (adapter->pcicfg && adapter->pcicfg_mapped)
5577                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5578 }
5579
5580 static int db_bar(struct be_adapter *adapter)
5581 {
5582         if (lancer_chip(adapter) || be_virtfn(adapter))
5583                 return 0;
5584         else
5585                 return 4;
5586 }
5587
5588 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5589 {
5590         if (skyhawk_chip(adapter)) {
5591                 adapter->roce_db.size = 4096;
5592                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5593                                                               db_bar(adapter));
5594                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5595                                                                db_bar(adapter));
5596         }
5597         return 0;
5598 }
5599
5600 static int be_map_pci_bars(struct be_adapter *adapter)
5601 {
5602         struct pci_dev *pdev = adapter->pdev;
5603         u8 __iomem *addr;
5604         u32 sli_intf;
5605
5606         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5607         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5608                                 SLI_INTF_FAMILY_SHIFT;
5609         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5610
5611         if (BEx_chip(adapter) && be_physfn(adapter)) {
5612                 adapter->csr = pci_iomap(pdev, 2, 0);
5613                 if (!adapter->csr)
5614                         return -ENOMEM;
5615         }
5616
5617         addr = pci_iomap(pdev, db_bar(adapter), 0);
5618         if (!addr)
5619                 goto pci_map_err;
5620         adapter->db = addr;
5621
5622         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5623                 if (be_physfn(adapter)) {
5624                         /* PCICFG is the 2nd BAR in BE2 */
5625                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5626                         if (!addr)
5627                                 goto pci_map_err;
5628                         adapter->pcicfg = addr;
5629                         adapter->pcicfg_mapped = true;
5630                 } else {
5631                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5632                         adapter->pcicfg_mapped = false;
5633                 }
5634         }
5635
5636         be_roce_map_pci_bars(adapter);
5637         return 0;
5638
5639 pci_map_err:
5640         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5641         be_unmap_pci_bars(adapter);
5642         return -ENOMEM;
5643 }
5644
5645 static void be_drv_cleanup(struct be_adapter *adapter)
5646 {
5647         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5648         struct device *dev = &adapter->pdev->dev;
5649
5650         if (mem->va)
5651                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5652
5653         mem = &adapter->rx_filter;
5654         if (mem->va)
5655                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5656
5657         mem = &adapter->stats_cmd;
5658         if (mem->va)
5659                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5660 }
5661
5662 /* Allocate and initialize various fields in be_adapter struct */
5663 static int be_drv_init(struct be_adapter *adapter)
5664 {
5665         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5666         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5667         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5668         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5669         struct device *dev = &adapter->pdev->dev;
5670         int status = 0;
5671
5672         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5673         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5674                                                  &mbox_mem_alloc->dma,
5675                                                  GFP_KERNEL);
5676         if (!mbox_mem_alloc->va)
5677                 return -ENOMEM;
5678
5679         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5680         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5681         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5682
5683         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5684         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5685                                             &rx_filter->dma, GFP_KERNEL);
5686         if (!rx_filter->va) {
5687                 status = -ENOMEM;
5688                 goto free_mbox;
5689         }
5690
5691         if (lancer_chip(adapter))
5692                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5693         else if (BE2_chip(adapter))
5694                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5695         else if (BE3_chip(adapter))
5696                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5697         else
5698                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5699         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5700                                             &stats_cmd->dma, GFP_KERNEL);
5701         if (!stats_cmd->va) {
5702                 status = -ENOMEM;
5703                 goto free_rx_filter;
5704         }
5705
5706         mutex_init(&adapter->mbox_lock);
5707         mutex_init(&adapter->mcc_lock);
5708         mutex_init(&adapter->rx_filter_lock);
5709         spin_lock_init(&adapter->mcc_cq_lock);
5710         init_completion(&adapter->et_cmd_compl);
5711
5712         pci_save_state(adapter->pdev);
5713
5714         INIT_DELAYED_WORK(&adapter->work, be_worker);
5715
5716         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5717         adapter->error_recovery.resched_delay = 0;
5718         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5719                           be_err_detection_task);
5720
5721         adapter->rx_fc = true;
5722         adapter->tx_fc = true;
5723
5724         /* Must be a power of 2 or else MODULO will BUG_ON */
5725         adapter->be_get_temp_freq = 64;
5726
5727         return 0;
5728
5729 free_rx_filter:
5730         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5731 free_mbox:
5732         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5733                           mbox_mem_alloc->dma);
5734         return status;
5735 }
5736
5737 static void be_remove(struct pci_dev *pdev)
5738 {
5739         struct be_adapter *adapter = pci_get_drvdata(pdev);
5740
5741         if (!adapter)
5742                 return;
5743
5744         be_roce_dev_remove(adapter);
5745         be_intr_set(adapter, false);
5746
5747         be_cancel_err_detection(adapter);
5748
5749         unregister_netdev(adapter->netdev);
5750
5751         be_clear(adapter);
5752
5753         if (!pci_vfs_assigned(adapter->pdev))
5754                 be_cmd_reset_function(adapter);
5755
5756         /* tell fw we're done with firing cmds */
5757         be_cmd_fw_clean(adapter);
5758
5759         be_unmap_pci_bars(adapter);
5760         be_drv_cleanup(adapter);
5761
5762         pci_disable_pcie_error_reporting(pdev);
5763
5764         pci_release_regions(pdev);
5765         pci_disable_device(pdev);
5766
5767         free_netdev(adapter->netdev);
5768 }
5769
5770 static ssize_t be_hwmon_show_temp(struct device *dev,
5771                                   struct device_attribute *dev_attr,
5772                                   char *buf)
5773 {
5774         struct be_adapter *adapter = dev_get_drvdata(dev);
5775
5776         /* Unit: millidegree Celsius */
5777         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5778                 return -EIO;
5779         else
5780                 return sprintf(buf, "%u\n",
5781                                adapter->hwmon_info.be_on_die_temp * 1000);
5782 }
5783
5784 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5785                           be_hwmon_show_temp, NULL, 1);
5786
5787 static struct attribute *be_hwmon_attrs[] = {
5788         &sensor_dev_attr_temp1_input.dev_attr.attr,
5789         NULL
5790 };
5791
5792 ATTRIBUTE_GROUPS(be_hwmon);
5793
5794 static char *mc_name(struct be_adapter *adapter)
5795 {
5796         char *str = ""; /* default */
5797
5798         switch (adapter->mc_type) {
5799         case UMC:
5800                 str = "UMC";
5801                 break;
5802         case FLEX10:
5803                 str = "FLEX10";
5804                 break;
5805         case vNIC1:
5806                 str = "vNIC-1";
5807                 break;
5808         case nPAR:
5809                 str = "nPAR";
5810                 break;
5811         case UFP:
5812                 str = "UFP";
5813                 break;
5814         case vNIC2:
5815                 str = "vNIC-2";
5816                 break;
5817         default:
5818                 str = "";
5819         }
5820
5821         return str;
5822 }
5823
5824 static inline char *func_name(struct be_adapter *adapter)
5825 {
5826         return be_physfn(adapter) ? "PF" : "VF";
5827 }
5828
5829 static inline char *nic_name(struct pci_dev *pdev)
5830 {
5831         switch (pdev->device) {
5832         case OC_DEVICE_ID1:
5833                 return OC_NAME;
5834         case OC_DEVICE_ID2:
5835                 return OC_NAME_BE;
5836         case OC_DEVICE_ID3:
5837         case OC_DEVICE_ID4:
5838                 return OC_NAME_LANCER;
5839         case BE_DEVICE_ID2:
5840                 return BE3_NAME;
5841         case OC_DEVICE_ID5:
5842         case OC_DEVICE_ID6:
5843                 return OC_NAME_SH;
5844         default:
5845                 return BE_NAME;
5846         }
5847 }
5848
5849 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5850 {
5851         struct be_adapter *adapter;
5852         struct net_device *netdev;
5853         int status = 0;
5854
5855         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5856
5857         status = pci_enable_device(pdev);
5858         if (status)
5859                 goto do_none;
5860
5861         status = pci_request_regions(pdev, DRV_NAME);
5862         if (status)
5863                 goto disable_dev;
5864         pci_set_master(pdev);
5865
5866         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5867         if (!netdev) {
5868                 status = -ENOMEM;
5869                 goto rel_reg;
5870         }
5871         adapter = netdev_priv(netdev);
5872         adapter->pdev = pdev;
5873         pci_set_drvdata(pdev, adapter);
5874         adapter->netdev = netdev;
5875         SET_NETDEV_DEV(netdev, &pdev->dev);
5876
5877         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5878         if (!status) {
5879                 netdev->features |= NETIF_F_HIGHDMA;
5880         } else {
5881                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5882                 if (status) {
5883                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5884                         goto free_netdev;
5885                 }
5886         }
5887
5888         status = pci_enable_pcie_error_reporting(pdev);
5889         if (!status)
5890                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5891
5892         status = be_map_pci_bars(adapter);
5893         if (status)
5894                 goto free_netdev;
5895
5896         status = be_drv_init(adapter);
5897         if (status)
5898                 goto unmap_bars;
5899
5900         status = be_setup(adapter);
5901         if (status)
5902                 goto drv_cleanup;
5903
5904         be_netdev_init(netdev);
5905         status = register_netdev(netdev);
5906         if (status != 0)
5907                 goto unsetup;
5908
5909         be_roce_dev_add(adapter);
5910
5911         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5912         adapter->error_recovery.probe_time = jiffies;
5913
5914         /* On Die temperature not supported for VF. */
5915         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5916                 adapter->hwmon_info.hwmon_dev =
5917                         devm_hwmon_device_register_with_groups(&pdev->dev,
5918                                                                DRV_NAME,
5919                                                                adapter,
5920                                                                be_hwmon_groups);
5921                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5922         }
5923
5924         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5925                  func_name(adapter), mc_name(adapter), adapter->port_name);
5926
5927         return 0;
5928
5929 unsetup:
5930         be_clear(adapter);
5931 drv_cleanup:
5932         be_drv_cleanup(adapter);
5933 unmap_bars:
5934         be_unmap_pci_bars(adapter);
5935 free_netdev:
5936         free_netdev(netdev);
5937 rel_reg:
5938         pci_release_regions(pdev);
5939 disable_dev:
5940         pci_disable_device(pdev);
5941 do_none:
5942         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5943         return status;
5944 }
5945
5946 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5947 {
5948         struct be_adapter *adapter = pci_get_drvdata(pdev);
5949
5950         be_intr_set(adapter, false);
5951         be_cancel_err_detection(adapter);
5952
5953         be_cleanup(adapter);
5954
5955         pci_save_state(pdev);
5956         pci_disable_device(pdev);
5957         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5958         return 0;
5959 }
5960
5961 static int be_pci_resume(struct pci_dev *pdev)
5962 {
5963         struct be_adapter *adapter = pci_get_drvdata(pdev);
5964         int status = 0;
5965
5966         status = pci_enable_device(pdev);
5967         if (status)
5968                 return status;
5969
5970         pci_restore_state(pdev);
5971
5972         status = be_resume(adapter);
5973         if (status)
5974                 return status;
5975
5976         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5977
5978         return 0;
5979 }
5980
5981 /*
5982  * An FLR will stop BE from DMAing any data.
5983  */
5984 static void be_shutdown(struct pci_dev *pdev)
5985 {
5986         struct be_adapter *adapter = pci_get_drvdata(pdev);
5987
5988         if (!adapter)
5989                 return;
5990
5991         be_roce_dev_shutdown(adapter);
5992         cancel_delayed_work_sync(&adapter->work);
5993         be_cancel_err_detection(adapter);
5994
5995         netif_device_detach(adapter->netdev);
5996
5997         be_cmd_reset_function(adapter);
5998
5999         pci_disable_device(pdev);
6000 }
6001
6002 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6003                                             pci_channel_state_t state)
6004 {
6005         struct be_adapter *adapter = pci_get_drvdata(pdev);
6006
6007         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6008
6009         be_roce_dev_remove(adapter);
6010
6011         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6012                 be_set_error(adapter, BE_ERROR_EEH);
6013
6014                 be_cancel_err_detection(adapter);
6015
6016                 be_cleanup(adapter);
6017         }
6018
6019         if (state == pci_channel_io_perm_failure)
6020                 return PCI_ERS_RESULT_DISCONNECT;
6021
6022         pci_disable_device(pdev);
6023
6024         /* The error could cause the FW to trigger a flash debug dump.
6025          * Resetting the card while flash dump is in progress
6026          * can cause it not to recover; wait for it to finish.
6027          * Wait only for first function as it is needed only once per
6028          * adapter.
6029          */
6030         if (pdev->devfn == 0)
6031                 ssleep(30);
6032
6033         return PCI_ERS_RESULT_NEED_RESET;
6034 }
6035
6036 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6037 {
6038         struct be_adapter *adapter = pci_get_drvdata(pdev);
6039         int status;
6040
6041         dev_info(&adapter->pdev->dev, "EEH reset\n");
6042
6043         status = pci_enable_device(pdev);
6044         if (status)
6045                 return PCI_ERS_RESULT_DISCONNECT;
6046
6047         pci_set_master(pdev);
6048         pci_restore_state(pdev);
6049
6050         /* Check if card is ok and fw is ready */
6051         dev_info(&adapter->pdev->dev,
6052                  "Waiting for FW to be ready after EEH reset\n");
6053         status = be_fw_wait_ready(adapter);
6054         if (status)
6055                 return PCI_ERS_RESULT_DISCONNECT;
6056
6057         pci_cleanup_aer_uncorrect_error_status(pdev);
6058         be_clear_error(adapter, BE_CLEAR_ALL);
6059         return PCI_ERS_RESULT_RECOVERED;
6060 }
6061
6062 static void be_eeh_resume(struct pci_dev *pdev)
6063 {
6064         int status = 0;
6065         struct be_adapter *adapter = pci_get_drvdata(pdev);
6066
6067         dev_info(&adapter->pdev->dev, "EEH resume\n");
6068
6069         pci_save_state(pdev);
6070
6071         status = be_resume(adapter);
6072         if (status)
6073                 goto err;
6074
6075         be_roce_dev_add(adapter);
6076
6077         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6078         return;
6079 err:
6080         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6081 }
6082
6083 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6084 {
6085         struct be_adapter *adapter = pci_get_drvdata(pdev);
6086         struct be_resources vft_res = {0};
6087         int status;
6088
6089         if (!num_vfs)
6090                 be_vf_clear(adapter);
6091
6092         adapter->num_vfs = num_vfs;
6093
6094         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6095                 dev_warn(&pdev->dev,
6096                          "Cannot disable VFs while they are assigned\n");
6097                 return -EBUSY;
6098         }
6099
6100         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6101          * are equally distributed across the max-number of VFs. The user may
6102          * request only a subset of the max-vfs to be enabled.
6103          * Based on num_vfs, redistribute the resources across num_vfs so that
6104          * each VF will have access to more number of resources.
6105          * This facility is not available in BE3 FW.
6106          * Also, this is done by FW in Lancer chip.
6107          */
6108         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6109                 be_calculate_vf_res(adapter, adapter->num_vfs,
6110                                     &vft_res);
6111                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6112                                                  adapter->num_vfs, &vft_res);
6113                 if (status)
6114                         dev_err(&pdev->dev,
6115                                 "Failed to optimize SR-IOV resources\n");
6116         }
6117
6118         status = be_get_resources(adapter);
6119         if (status)
6120                 return be_cmd_status(status);
6121
6122         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6123         rtnl_lock();
6124         status = be_update_queues(adapter);
6125         rtnl_unlock();
6126         if (status)
6127                 return be_cmd_status(status);
6128
6129         if (adapter->num_vfs)
6130                 status = be_vf_setup(adapter);
6131
6132         if (!status)
6133                 return adapter->num_vfs;
6134
6135         return 0;
6136 }
6137
6138 static const struct pci_error_handlers be_eeh_handlers = {
6139         .error_detected = be_eeh_err_detected,
6140         .slot_reset = be_eeh_reset,
6141         .resume = be_eeh_resume,
6142 };
6143
6144 static struct pci_driver be_driver = {
6145         .name = DRV_NAME,
6146         .id_table = be_dev_ids,
6147         .probe = be_probe,
6148         .remove = be_remove,
6149         .suspend = be_suspend,
6150         .resume = be_pci_resume,
6151         .shutdown = be_shutdown,
6152         .sriov_configure = be_pci_sriov_configure,
6153         .err_handler = &be_eeh_handlers
6154 };
6155
6156 static int __init be_init_module(void)
6157 {
6158         int status;
6159
6160         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6161             rx_frag_size != 2048) {
6162                 printk(KERN_WARNING DRV_NAME
6163                         " : Module param rx_frag_size must be 2048/4096/8192."
6164                         " Using 2048\n");
6165                 rx_frag_size = 2048;
6166         }
6167
6168         if (num_vfs > 0) {
6169                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6170                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6171         }
6172
6173         be_wq = create_singlethread_workqueue("be_wq");
6174         if (!be_wq) {
6175                 pr_warn(DRV_NAME "workqueue creation failed\n");
6176                 return -1;
6177         }
6178
6179         be_err_recovery_workq =
6180                 create_singlethread_workqueue("be_err_recover");
6181         if (!be_err_recovery_workq)
6182                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6183
6184         status = pci_register_driver(&be_driver);
6185         if (status) {
6186                 destroy_workqueue(be_wq);
6187                 be_destroy_err_recovery_workq();
6188         }
6189         return status;
6190 }
6191 module_init(be_init_module);
6192
6193 static void __exit be_exit_module(void)
6194 {
6195         pci_unregister_driver(&be_driver);
6196
6197         be_destroy_err_recovery_workq();
6198
6199         if (be_wq)
6200                 destroy_workqueue(be_wq);
6201 }
6202 module_exit(be_exit_module);